Skip to content
Snippets Groups Projects
Commit 61ed4534 authored by Xiaofei Wang's avatar Xiaofei Wang
Browse files

上传新文件

parent b1676c4c
No related branches found
No related tags found
No related merge requests found
import copy
import time
from collections import deque
from Gui import Gui
from reward import Reward
from rewardBasedPlayer import RewardBasedPlayer, get_max_difference
from state_monitor import set_search_time, get_search_time
FIRST_ACTION_NO_PRUNE = True
FIRST_ACTION_SELECT_NUM = 4
SELECT_NUM = 2
NO_PRUNE_THRESHOLD = 10
GAMMA = 0.9
USING_GUI = False
class BFS_Player(RewardBasedPlayer):
def __init__(self, _id):
super().__init__(_id)
self.search_agent = BFS_search(_id, self)
def SelectMove(self, moves, game_state):
print('BFS\n',game_state)
player_order = self.get_player_order(game_state)
moves = self.filtering_moves(game_state.players[self.id], moves)
move = self.search_agent.search(moves, game_state, player_order)
return move
class BFS_search:
def __init__(self, _id, agent):
self.max_num = 1
self.id = _id
self.queue = deque()
self.num = 0
self.round = 0
self.agent = agent
def search(self, moves, game_state, player_order):
self.tree = []
self.init_moves = moves
self.init_game_state = game_state
root_node = Node(self.init_game_state, None, None, self.init_moves, self.id, (), 0, 0, self.tree)
self.queue.append(root_node)
self.player_order = player_order
start = time.time()
while len(self.queue) != 0:
node = self.queue.popleft()
children = self.get_successors(node, player_order)
for c in children:
if not c.state.TilesRemaining():
self.update_node(c)
else:
self.queue.append(c)
set_search_time('BFS', round(time.time() - start,2))
print(get_search_time())
print('search nodes:', len(self.tree))
print('BFS search finished')
print('search duration:', time.time() - start)
print()
children = sorted(root_node.children, key=lambda x:get_max_difference(x.value, self.id), reverse=True)
# print('attacking')
# for c in children:
# print(c.from_move[1], c.from_move[2], c.value, c.instant_reward)
track = self.get_predict_track(root_node)
if USING_GUI:
Gui(self.tree)
# print('track:')
# for t in track:
# print(t)
return children[0].from_move
# get all the successors from a state which are regarded as the most valuable choices
# the other successors are pruned by some certain mechanisms
def get_successors(self, node, player_order, max_num = 2):
if not node.state.TilesRemaining():
return []
moves = node.moves
state = node.state
act_id = node.act_id
children = self.prune(state, moves, act_id, len(self.init_moves), node.layer, player_order)
#print(children)
nodes = []
for c in children:
gs_copy = copy.deepcopy(state)
new_act_id = act_id+1 if act_id<len(player_order)-1 else 0
gs_copy.ExecuteMove(act_id, c[0])
new_moves = gs_copy.players[new_act_id].GetAvailableMoves(gs_copy)
new_moves = self.agent.filtering_moves(gs_copy.players[new_act_id], new_moves)
nodes.append(Node(gs_copy, node, c[0], new_moves, new_act_id, c[0], node.layer+1, c[1], self.tree))
return nodes
# the prune function to select the moves that are regarded as more valuable
def prune(self, game_state, moves, act_id, init_moves_num, layer, player_order):
# Threshold
# if the number of the initial moves is less than a threshold, no pruning and expand all the state
if init_moves_num <= NO_PRUNE_THRESHOLD:
moves_data = [(move, self.get_place_reward(game_state, move, act_id, player_order)) for move in moves]
return moves_data
children = {}
for move in moves:
reward, score_list = self.get_place_reward(game_state, move, act_id, player_order)
children[move] = reward, score_list
if (FIRST_ACTION_NO_PRUNE and layer == 0):
#print('init@@@@@@@@@@@@@@')
children = sorted(children.items(), key=lambda x: x[1][0], reverse=True)[:FIRST_ACTION_SELECT_NUM] if len(
children) > SELECT_NUM else children.items()
return children
else:
children = sorted(children.items(), key=lambda x: x[1][0], reverse=True)[:SELECT_NUM] if len(
children) > SELECT_NUM else children.items()
return children
def get_place_reward(self, game_state, move, act_id, player_order):
reward, score_list = eval(self.agent.using_reward)(game_state, act_id, player_order).estimate(move)
return reward, score_list
def get_predict_track(self, root_node):
track = []
node = root_node
while True:
act_id = node.act_id
node.mark = True
node = max(node.children, key=lambda x:get_max_difference(x.value, act_id))
track.append((node.from_move[1], str(node.from_move[2]), str(act_id)))
if len(node.children) == 0:
break
return track
def update_node(self, node):
node.state.ExecuteEndOfRound()
reward = [0] * len(node.value)
for i, plr in enumerate(node.state.players):
reward[i] = node.state.players[i].score
game_continuing = True
for i in range(len(node.value)):
plr_state = node.state.players[i]
completed_rows = plr_state.GetCompletedRows()
if completed_rows > 0:
game_continuing = False
break
if not game_continuing:
for i in range(len(node.value)):
node.state.players[i].EndOfGameScore()
reward[i] = node.state.players[i].score
else:
for i, plr in enumerate(node.state.players):
expect_score = eval(self.agent.using_reward)(node.state, i, self.player_order).get_round_expection()
reward[i] = node.state.players[i].score + expect_score
node.value = reward
update_node = node
while True:
update_node = update_node.parent
if update_node.parent is None:
break
value_list = []
for c in update_node.children:
value = c.value.copy()
value[update_node.act_id] = c.value[update_node.act_id]
value_list.append(value)
value_list = sorted(value_list, key=lambda x: get_max_difference(x, update_node.act_id), reverse=True)
update_node.value = value_list[0]
class Node:
def __init__(self, game_state, parent, move, moves, _id, edge, layer, instant_reward, tree):
self.state = game_state
self.parent = parent
self.children = []
if self.parent is not None:
self.parent.children.append(self)
self.moves = moves
self.from_move = move
self.act_id = _id
self.value = [0]*len(game_state.players)
self.edge = edge
self.layer = layer
self.instant_reward = instant_reward
self.mark = False
tree.append(self)
def get_children(self):
return self.children
def is_end(self):
return not self.state.TilesRemaining()
def info(self):
info = '{:2},{}\n{} {}'.format(
self.from_move[1], self.from_move[2],
str(round(self.value[0],2)),str(round(self.value[1],2))) if self.from_move is not None else ''
return info
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment