diff --git a/AZUL/rewardBFSPlayer.py b/AZUL/rewardBFSPlayer.py new file mode 100644 index 0000000000000000000000000000000000000000..f0b18ce343da55619a85010c7065711784a46097 --- /dev/null +++ b/AZUL/rewardBFSPlayer.py @@ -0,0 +1,211 @@ +import copy +import time +from collections import deque + +from Gui import Gui +from reward import Reward +from rewardBasedPlayer import RewardBasedPlayer, get_max_difference +from state_monitor import set_search_time, get_search_time + +FIRST_ACTION_NO_PRUNE = True +FIRST_ACTION_SELECT_NUM = 4 +SELECT_NUM = 2 +NO_PRUNE_THRESHOLD = 10 +GAMMA = 0.9 +USING_GUI = False + +class BFS_Player(RewardBasedPlayer): + def __init__(self, _id): + super().__init__(_id) + self.search_agent = BFS_search(_id, self) + + def SelectMove(self, moves, game_state): + print('BFS\n',game_state) + player_order = self.get_player_order(game_state) + moves = self.filtering_moves(game_state.players[self.id], moves) + move = self.search_agent.search(moves, game_state, player_order) + return move + + +class BFS_search: + def __init__(self, _id, agent): + self.max_num = 1 + self.id = _id + self.queue = deque() + self.num = 0 + self.round = 0 + self.agent = agent + + def search(self, moves, game_state, player_order): + self.tree = [] + self.init_moves = moves + self.init_game_state = game_state + root_node = Node(self.init_game_state, None, None, self.init_moves, self.id, (), 0, 0, self.tree) + self.queue.append(root_node) + self.player_order = player_order + + start = time.time() + while len(self.queue) != 0: + node = self.queue.popleft() + children = self.get_successors(node, player_order) + for c in children: + if not c.state.TilesRemaining(): + self.update_node(c) + else: + self.queue.append(c) + + + set_search_time('BFS', round(time.time() - start,2)) + print(get_search_time()) + print('search nodes:', len(self.tree)) + print('BFS search finished') + print('search duration:', time.time() - start) + + print() + + children = sorted(root_node.children, key=lambda x:get_max_difference(x.value, self.id), reverse=True) + # print('attacking') + # for c in children: + # print(c.from_move[1], c.from_move[2], c.value, c.instant_reward) + track = self.get_predict_track(root_node) + if USING_GUI: + Gui(self.tree) + # print('track:') + # for t in track: + # print(t) + return children[0].from_move + + + # get all the successors from a state which are regarded as the most valuable choices + # the other successors are pruned by some certain mechanisms + + def get_successors(self, node, player_order, max_num = 2): + if not node.state.TilesRemaining(): + return [] + moves = node.moves + state = node.state + act_id = node.act_id + children = self.prune(state, moves, act_id, len(self.init_moves), node.layer, player_order) + #print(children) + nodes = [] + for c in children: + gs_copy = copy.deepcopy(state) + new_act_id = act_id+1 if act_id<len(player_order)-1 else 0 + gs_copy.ExecuteMove(act_id, c[0]) + new_moves = gs_copy.players[new_act_id].GetAvailableMoves(gs_copy) + new_moves = self.agent.filtering_moves(gs_copy.players[new_act_id], new_moves) + nodes.append(Node(gs_copy, node, c[0], new_moves, new_act_id, c[0], node.layer+1, c[1], self.tree)) + return nodes + + + # the prune function to select the moves that are regarded as more valuable + + def prune(self, game_state, moves, act_id, init_moves_num, layer, player_order): + + # Threshold + # if the number of the initial moves is less than a threshold, no pruning and expand all the state + if init_moves_num <= NO_PRUNE_THRESHOLD: + moves_data = [(move, self.get_place_reward(game_state, move, act_id, player_order)) for move in moves] + return moves_data + children = {} + for move in moves: + reward, score_list = self.get_place_reward(game_state, move, act_id, player_order) + children[move] = reward, score_list + + if (FIRST_ACTION_NO_PRUNE and layer == 0): + #print('init@@@@@@@@@@@@@@') + children = sorted(children.items(), key=lambda x: x[1][0], reverse=True)[:FIRST_ACTION_SELECT_NUM] if len( + children) > SELECT_NUM else children.items() + return children + else: + children = sorted(children.items(), key=lambda x: x[1][0], reverse=True)[:SELECT_NUM] if len( + children) > SELECT_NUM else children.items() + return children + + + def get_place_reward(self, game_state, move, act_id, player_order): + reward, score_list = eval(self.agent.using_reward)(game_state, act_id, player_order).estimate(move) + return reward, score_list + + + def get_predict_track(self, root_node): + track = [] + node = root_node + while True: + act_id = node.act_id + node.mark = True + node = max(node.children, key=lambda x:get_max_difference(x.value, act_id)) + track.append((node.from_move[1], str(node.from_move[2]), str(act_id))) + if len(node.children) == 0: + break + return track + + def update_node(self, node): + node.state.ExecuteEndOfRound() + reward = [0] * len(node.value) + + for i, plr in enumerate(node.state.players): + reward[i] = node.state.players[i].score + + game_continuing = True + for i in range(len(node.value)): + plr_state = node.state.players[i] + completed_rows = plr_state.GetCompletedRows() + if completed_rows > 0: + game_continuing = False + break + + if not game_continuing: + for i in range(len(node.value)): + node.state.players[i].EndOfGameScore() + reward[i] = node.state.players[i].score + else: + for i, plr in enumerate(node.state.players): + expect_score = eval(self.agent.using_reward)(node.state, i, self.player_order).get_round_expection() + reward[i] = node.state.players[i].score + expect_score + node.value = reward + update_node = node + + while True: + update_node = update_node.parent + if update_node.parent is None: + break + value_list = [] + for c in update_node.children: + value = c.value.copy() + value[update_node.act_id] = c.value[update_node.act_id] + value_list.append(value) + value_list = sorted(value_list, key=lambda x: get_max_difference(x, update_node.act_id), reverse=True) + update_node.value = value_list[0] + +class Node: + def __init__(self, game_state, parent, move, moves, _id, edge, layer, instant_reward, tree): + self.state = game_state + self.parent = parent + self.children = [] + if self.parent is not None: + self.parent.children.append(self) + self.moves = moves + self.from_move = move + self.act_id = _id + self.value = [0]*len(game_state.players) + self.edge = edge + self.layer = layer + self.instant_reward = instant_reward + self.mark = False + tree.append(self) + + def get_children(self): + return self.children + + + def is_end(self): + return not self.state.TilesRemaining() + + + def info(self): + info = '{:2},{}\n{} {}'.format( + self.from_move[1], self.from_move[2], + str(round(self.value[0],2)),str(round(self.value[1],2))) if self.from_move is not None else '' + return info +