Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
A
AZULSimu
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Shi Minhua
AZULSimu
Commits
61ed4534
Commit
61ed4534
authored
Mar 11, 2020
by
Xiaofei Wang
Browse files
Options
Downloads
Patches
Plain Diff
上传新文件
parent
b1676c4c
No related branches found
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
AZUL/rewardBFSPlayer.py
+211
-0
211 additions, 0 deletions
AZUL/rewardBFSPlayer.py
with
211 additions
and
0 deletions
AZUL/rewardBFSPlayer.py
0 → 100644
+
211
−
0
View file @
61ed4534
import
copy
import
time
from
collections
import
deque
from
Gui
import
Gui
from
reward
import
Reward
from
rewardBasedPlayer
import
RewardBasedPlayer
,
get_max_difference
from
state_monitor
import
set_search_time
,
get_search_time
FIRST_ACTION_NO_PRUNE
=
True
FIRST_ACTION_SELECT_NUM
=
4
SELECT_NUM
=
2
NO_PRUNE_THRESHOLD
=
10
GAMMA
=
0.9
USING_GUI
=
False
class
BFS_Player
(
RewardBasedPlayer
):
def
__init__
(
self
,
_id
):
super
().
__init__
(
_id
)
self
.
search_agent
=
BFS_search
(
_id
,
self
)
def
SelectMove
(
self
,
moves
,
game_state
):
print
(
'
BFS
\n
'
,
game_state
)
player_order
=
self
.
get_player_order
(
game_state
)
moves
=
self
.
filtering_moves
(
game_state
.
players
[
self
.
id
],
moves
)
move
=
self
.
search_agent
.
search
(
moves
,
game_state
,
player_order
)
return
move
class
BFS_search
:
def
__init__
(
self
,
_id
,
agent
):
self
.
max_num
=
1
self
.
id
=
_id
self
.
queue
=
deque
()
self
.
num
=
0
self
.
round
=
0
self
.
agent
=
agent
def
search
(
self
,
moves
,
game_state
,
player_order
):
self
.
tree
=
[]
self
.
init_moves
=
moves
self
.
init_game_state
=
game_state
root_node
=
Node
(
self
.
init_game_state
,
None
,
None
,
self
.
init_moves
,
self
.
id
,
(),
0
,
0
,
self
.
tree
)
self
.
queue
.
append
(
root_node
)
self
.
player_order
=
player_order
start
=
time
.
time
()
while
len
(
self
.
queue
)
!=
0
:
node
=
self
.
queue
.
popleft
()
children
=
self
.
get_successors
(
node
,
player_order
)
for
c
in
children
:
if
not
c
.
state
.
TilesRemaining
():
self
.
update_node
(
c
)
else
:
self
.
queue
.
append
(
c
)
set_search_time
(
'
BFS
'
,
round
(
time
.
time
()
-
start
,
2
))
print
(
get_search_time
())
print
(
'
search nodes:
'
,
len
(
self
.
tree
))
print
(
'
BFS search finished
'
)
print
(
'
search duration:
'
,
time
.
time
()
-
start
)
print
()
children
=
sorted
(
root_node
.
children
,
key
=
lambda
x
:
get_max_difference
(
x
.
value
,
self
.
id
),
reverse
=
True
)
# print('attacking')
# for c in children:
# print(c.from_move[1], c.from_move[2], c.value, c.instant_reward)
track
=
self
.
get_predict_track
(
root_node
)
if
USING_GUI
:
Gui
(
self
.
tree
)
# print('track:')
# for t in track:
# print(t)
return
children
[
0
].
from_move
# get all the successors from a state which are regarded as the most valuable choices
# the other successors are pruned by some certain mechanisms
def
get_successors
(
self
,
node
,
player_order
,
max_num
=
2
):
if
not
node
.
state
.
TilesRemaining
():
return
[]
moves
=
node
.
moves
state
=
node
.
state
act_id
=
node
.
act_id
children
=
self
.
prune
(
state
,
moves
,
act_id
,
len
(
self
.
init_moves
),
node
.
layer
,
player_order
)
#print(children)
nodes
=
[]
for
c
in
children
:
gs_copy
=
copy
.
deepcopy
(
state
)
new_act_id
=
act_id
+
1
if
act_id
<
len
(
player_order
)
-
1
else
0
gs_copy
.
ExecuteMove
(
act_id
,
c
[
0
])
new_moves
=
gs_copy
.
players
[
new_act_id
].
GetAvailableMoves
(
gs_copy
)
new_moves
=
self
.
agent
.
filtering_moves
(
gs_copy
.
players
[
new_act_id
],
new_moves
)
nodes
.
append
(
Node
(
gs_copy
,
node
,
c
[
0
],
new_moves
,
new_act_id
,
c
[
0
],
node
.
layer
+
1
,
c
[
1
],
self
.
tree
))
return
nodes
# the prune function to select the moves that are regarded as more valuable
def
prune
(
self
,
game_state
,
moves
,
act_id
,
init_moves_num
,
layer
,
player_order
):
# Threshold
# if the number of the initial moves is less than a threshold, no pruning and expand all the state
if
init_moves_num
<=
NO_PRUNE_THRESHOLD
:
moves_data
=
[(
move
,
self
.
get_place_reward
(
game_state
,
move
,
act_id
,
player_order
))
for
move
in
moves
]
return
moves_data
children
=
{}
for
move
in
moves
:
reward
,
score_list
=
self
.
get_place_reward
(
game_state
,
move
,
act_id
,
player_order
)
children
[
move
]
=
reward
,
score_list
if
(
FIRST_ACTION_NO_PRUNE
and
layer
==
0
):
#print('init@@@@@@@@@@@@@@')
children
=
sorted
(
children
.
items
(),
key
=
lambda
x
:
x
[
1
][
0
],
reverse
=
True
)[:
FIRST_ACTION_SELECT_NUM
]
if
len
(
children
)
>
SELECT_NUM
else
children
.
items
()
return
children
else
:
children
=
sorted
(
children
.
items
(),
key
=
lambda
x
:
x
[
1
][
0
],
reverse
=
True
)[:
SELECT_NUM
]
if
len
(
children
)
>
SELECT_NUM
else
children
.
items
()
return
children
def
get_place_reward
(
self
,
game_state
,
move
,
act_id
,
player_order
):
reward
,
score_list
=
eval
(
self
.
agent
.
using_reward
)(
game_state
,
act_id
,
player_order
).
estimate
(
move
)
return
reward
,
score_list
def
get_predict_track
(
self
,
root_node
):
track
=
[]
node
=
root_node
while
True
:
act_id
=
node
.
act_id
node
.
mark
=
True
node
=
max
(
node
.
children
,
key
=
lambda
x
:
get_max_difference
(
x
.
value
,
act_id
))
track
.
append
((
node
.
from_move
[
1
],
str
(
node
.
from_move
[
2
]),
str
(
act_id
)))
if
len
(
node
.
children
)
==
0
:
break
return
track
def
update_node
(
self
,
node
):
node
.
state
.
ExecuteEndOfRound
()
reward
=
[
0
]
*
len
(
node
.
value
)
for
i
,
plr
in
enumerate
(
node
.
state
.
players
):
reward
[
i
]
=
node
.
state
.
players
[
i
].
score
game_continuing
=
True
for
i
in
range
(
len
(
node
.
value
)):
plr_state
=
node
.
state
.
players
[
i
]
completed_rows
=
plr_state
.
GetCompletedRows
()
if
completed_rows
>
0
:
game_continuing
=
False
break
if
not
game_continuing
:
for
i
in
range
(
len
(
node
.
value
)):
node
.
state
.
players
[
i
].
EndOfGameScore
()
reward
[
i
]
=
node
.
state
.
players
[
i
].
score
else
:
for
i
,
plr
in
enumerate
(
node
.
state
.
players
):
expect_score
=
eval
(
self
.
agent
.
using_reward
)(
node
.
state
,
i
,
self
.
player_order
).
get_round_expection
()
reward
[
i
]
=
node
.
state
.
players
[
i
].
score
+
expect_score
node
.
value
=
reward
update_node
=
node
while
True
:
update_node
=
update_node
.
parent
if
update_node
.
parent
is
None
:
break
value_list
=
[]
for
c
in
update_node
.
children
:
value
=
c
.
value
.
copy
()
value
[
update_node
.
act_id
]
=
c
.
value
[
update_node
.
act_id
]
value_list
.
append
(
value
)
value_list
=
sorted
(
value_list
,
key
=
lambda
x
:
get_max_difference
(
x
,
update_node
.
act_id
),
reverse
=
True
)
update_node
.
value
=
value_list
[
0
]
class
Node
:
def
__init__
(
self
,
game_state
,
parent
,
move
,
moves
,
_id
,
edge
,
layer
,
instant_reward
,
tree
):
self
.
state
=
game_state
self
.
parent
=
parent
self
.
children
=
[]
if
self
.
parent
is
not
None
:
self
.
parent
.
children
.
append
(
self
)
self
.
moves
=
moves
self
.
from_move
=
move
self
.
act_id
=
_id
self
.
value
=
[
0
]
*
len
(
game_state
.
players
)
self
.
edge
=
edge
self
.
layer
=
layer
self
.
instant_reward
=
instant_reward
self
.
mark
=
False
tree
.
append
(
self
)
def
get_children
(
self
):
return
self
.
children
def
is_end
(
self
):
return
not
self
.
state
.
TilesRemaining
()
def
info
(
self
):
info
=
'
{:2},{}
\n
{} {}
'
.
format
(
self
.
from_move
[
1
],
self
.
from_move
[
2
],
str
(
round
(
self
.
value
[
0
],
2
)),
str
(
round
(
self
.
value
[
1
],
2
)))
if
self
.
from_move
is
not
None
else
''
return
info
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment