Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
A
AZULSimu
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Shi Minhua
AZULSimu
Commits
484373d5
Commit
484373d5
authored
Mar 11, 2020
by
Xiaofei Wang
Browse files
Options
Downloads
Patches
Plain Diff
上传新文件
parent
6dc17896
No related branches found
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
AZUL/newMct.py
+512
-0
512 additions, 0 deletions
AZUL/newMct.py
with
512 additions
and
0 deletions
AZUL/newMct.py
0 → 100644
+
512
−
0
View file @
484373d5
import
math
import
time
# from graphTree import TreeGraph
from
collections
import
defaultdict
from
functools
import
reduce
from
multiprocessing
import
Process
import
numpy
as
np
from
Gui
import
Gui
from
Monitor
import
Monitor
,
MonitorsToString
from
model
import
*
from
naive_player
import
NaivePlayer
from
randomPlayer
import
RandomPlayer
from
reward
import
Reward
from
rewardBasedPlayer
import
RewardBasedPlayer
,
get_max_difference
,
randomMax
from
rewardBFSPlayer
import
BFS_search
from
state_monitor
import
increase_actions
,
set_search_time
,
set_round_recorder
,
set_pre_r
,
get_pre_r
,
\
get_special_action
SEARCH_TIME
=
0.5
GAMMA
=
0.9
MAX
=
10000
USING_GUI
=
False
USING_OPPONENT
=
False
USING_BSET_VALUE
=
True
# NO_PRUNE_THRESHOLD = 20
PRO_DISTRI
=
1.2
class
MI_PlayerNew
(
RewardBasedPlayer
):
def
__init__
(
self
,
_id
):
super
().
__init__
(
_id
)
self
.
search_agent
=
Mcts_search
(
_id
,
False
,
self
)
def
SelectMove
(
self
,
moves
,
game_state
):
print
(
'
MCTS
\n
'
,
game_state
)
increase_actions
(
'
moves
'
)
original_moves
=
moves
player_order
=
self
.
get_player_order
(
game_state
)
moves
=
self
.
filtering_moves
(
game_state
.
players
[
self
.
id
],
moves
)
move
=
self
.
search_agent
.
search
(
moves
,
game_state
,
player_order
,
original_moves
)
return
move
class
Mcts_search
:
def
__init__
(
self
,
_id
,
log
,
agent
):
self
.
id
=
_id
self
.
log
=
log
self
.
agent
=
agent
self
.
pre_score_list
=
[]
self
.
tree
=
[]
# set_pre_r(str(self.__class__), self.pre_score_list)
def
search
(
self
,
moves
,
game_state
,
player_order
,
original_moves
):
self
.
last_tree
=
self
.
tree
self
.
lock
=
False
self
.
tree
=
[]
self
.
init_game_state
=
game_state
self
.
init_moves
=
moves
self
.
player_order
=
player_order
self
.
best_value
=
0
state
=
self
.
init_game_state
parent
=
None
f_move
=
None
act_id
=
self
.
id
root_node
=
Node
(
state
,
parent
,
f_move
,
act_id
,
self
.
tree
)
self
.
root_node
=
root_node
self
.
time_monitor
=
defaultdict
(
float
)
numbers
=
self
.
get_node_count
(
self
.
init_game_state
,
self
.
init_moves
)
start
=
time
.
time
()
n
=
0
# while n<= 4:
# while True:
nodes_len
=
5
*
(
2
**
(
len
(
moves
)
**
(
1
/
2
)))
move_len
=
len
(
moves
)
W
=
move_len
*
0.1
D
=
len
(
numbers
)
AD
=
reduce
(
lambda
x
,
y
:
x
*
y
,
range
(
1
,
D
+
1
))
all_nodes
=
W
**
D
*
AD
/
D
**
D
print
(
'
#############################
'
)
print
(
'
all nodes
'
,
all_nodes
)
eff
=
(
W
**
3
)
/
2
*
(
2
**
max
(
D
-
5
,
0
))
print
(
'
eff
'
,
eff
)
print
(
'
time
'
,
all_nodes
/
eff
/
350
)
print
(
D
,
AD
)
print
(
AD
)
self
.
FIRST_SEARCH
=
(
SEARCH_TIME
*
350
/
2
*
(
2
**
max
(
D
-
5
,
0
))
*
D
**
D
/
AD
)
**
(
1
/
(
max
(
D
-
2
,
1
)))
self
.
FIRST_SEARCH
=
int
(
self
.
FIRST_SEARCH
)
self
.
FOE_SEARCH
=
self
.
FIRST_SEARCH
# self.FIRST_SEARCH = int((SEARCH_TIME**(1/4) * 200)// move_len)
# self.FOE_SEARCH = int((SEARCH_TIME**(1/4) * 200 )// move_len)
# self.FIRST_SEARCH = int((SEARCH_TIME**(1/2) * 600)// move_len)
# self.FOE_SEARCH = int((SEARCH_TIME**(1/2) * 300 )// move_len)
print
(
self
.
FIRST_SEARCH
,
self
.
FOE_SEARCH
)
print
(
'
nodes predict
'
)
print
(
numbers
)
#while time.time() - start < SEARCH_TIME*move_len**(1/2)/5:
#while True:
while
time
.
time
()
-
start
<
SEARCH_TIME
:
#while n<nodes_len:
# a = input('input')
n
+=
1
self
.
one_search
(
root_node
)
if
n
>
len
(
self
.
tree
):
break
print
(
'
width:
'
,
self
.
FIRST_SEARCH
)
print
(
'
branches
'
,
self
.
FOE_SEARCH
)
print
(
'
depth:
'
,
len
(
numbers
))
print
(
'
nodes:
'
,
len
(
self
.
tree
))
print
(
'
search times:
'
,
n
)
a_num
=
[
self
.
FIRST_SEARCH
]
print
(
'
aa
'
)
for
i
in
range
(
1
,
len
(
numbers
)):
a_num
.
append
(
min
(
self
.
FOE_SEARCH
,
numbers
[
i
]))
print
(
'
all:
'
,
a_num
)
a_count
=
self
.
get_num
(
a_num
)
print
(
'
all number
'
,
a_count
)
print
(
'
##############################
'
)
set_search_time
(
'
MCTS
'
,
round
(
time
.
time
()
-
start
,
2
))
print
(
'
searched times
'
,
n
)
print
(
'
nodes:
'
,
len
(
self
.
tree
))
print
(
'
{} finished
'
.
format
(
str
(
self
.
agent
.
__class__
)))
print
(
'
seach duration
'
,
time
.
time
()
-
start
)
print
(
'
distribute
'
,
self
.
time_monitor
)
print
()
dict
=
{}
for
m
,
(
c
,
p
)
in
root_node
.
moves
.
items
():
Q
=
get_max_difference
(
c
.
value
,
self
.
id
)
if
c
is
not
None
else
-
1000
dict
[
m
]
=
Q
move
=
randomMax
(
dict
)
m_list
=
[
m
for
m
,
(
c
,
p
)
in
root_node
.
moves
.
items
()]
i
=
str
(
m_list
.
index
(
move
))
increase_actions
(
i
)
print
(
get_special_action
())
pre_value
=
root_node
.
moves
[
move
][
0
].
value
pre_value
=
[
round
(
pre_value
[
0
],
2
),
round
(
pre_value
[
1
],
2
)]
if
self
.
id
==
0
else
[
round
(
pre_value
[
1
],
2
),
round
(
pre_value
[
0
],
2
)]
# self.pre_score_list.append(pre_value)
# print(self.pre_score_list)
m
=
Monitor
(
move
,
root_node
.
moves
[
move
][
0
].
value
,
self
.
id
,
len
(
game_state
.
players
[
0
].
player_trace
.
round_scores
)
-
1
,
[
self
.
FIRST_SEARCH
,
self
.
FOE_SEARCH
],
move_len
,
n
,
len
(
self
.
tree
))
is_decrease
=
m
.
decrease
self
.
get_predict_track
(
root_node
,
move
)
# if is_decrease and len(self.tree)<n and m.action_id!=0 and not self.lock:
# self.lock =True
# Process(target=Gui, args=(self.last_tree,)).start()
# Process(target=Gui, args=(self.tree,)).start()
MonitorsToString
()
print
(
self
.
get_num
(
numbers
))
round_num_str
=
str
(
5
-
len
(
game_state
.
bag
)
//
20
)
set_pre_r
(
str
(
self
.
__class__
)
+
round_num_str
,
pre_value
)
if
USING_GUI
:
self
.
get_predict_track
(
root_node
,
move
)
Gui
(
self
.
tree
,
'
mcts
'
)
return
move
def
get_node_count
(
self
,
init_state
,
init_moves
):
state
=
copy
.
deepcopy
(
init_state
)
state
=
copy
.
deepcopy
(
state
)
player_count
=
len
(
self
.
player_order
)
# players = [Simu_Player(i, self.agent.using_reward) for i in range(player_count)]
players
=
[
RandomPlayer
(
i
)
for
i
in
range
(
player_count
)]
act_id
=
self
.
id
nodes_num
=
len
(
init_moves
)
len_move
=
nodes_num
numbers
=
[
len_move
]
while
state
.
TilesRemaining
():
if
self
.
log
:
print
(
act_id
)
print
(
'
id
'
,
act_id
)
print
(
'
before
'
)
print
(
state
.
detail_str
())
move
=
players
[
act_id
].
SelectMove
(
None
,
state
)
state
.
ExecuteMove
(
act_id
,
move
)
act_id
=
act_id
+
1
if
act_id
+
1
<
player_count
else
0
num
=
len
(
self
.
agent
.
filtering_moves
(
state
.
players
[
act_id
],
state
.
players
[
act_id
].
GetAvailableMoves
(
state
)))
if
num
!=
0
:
numbers
.
append
(
num
)
return
numbers
def
get_num
(
self
,
numbers
):
nodes_num
=
numbers
[
0
]
len_move
=
numbers
[
0
]
if
len
(
numbers
)
>
1
:
for
i
in
range
(
1
,
len
(
numbers
)):
len_move
*=
numbers
[
i
]
nodes_num
+=
len_move
return
nodes_num
def
get_predict_track
(
self
,
root_node
,
move
):
track
=
[
move
]
node
=
root_node
.
moves
[
move
][
0
]
while
True
:
node
.
mark
=
True
id
=
node
.
act_id
children
=
[
c
for
m
,
(
c
,
p
)
in
node
.
moves
.
items
()
if
c
is
not
None
]
if
node
.
moves
is
not
None
else
[]
if
len
(
children
)
==
0
:
break
node
=
max
(
children
,
key
=
lambda
x
:
get_max_difference
(
x
.
value
,
id
))
track
.
append
((
node
.
from_move
[
1
],
str
(
node
.
from_move
[
2
]),
str
(
id
),
node
,
node
.
value
))
return
track
def
one_search
(
self
,
root_node
):
start
=
time
.
time
()
select_node
,
move
=
self
.
select
(
root_node
)
self
.
time_monitor
[
'
select
'
]
+=
(
time
.
time
()
-
start
)
if
self
.
log
:
print
(
'
select
'
)
print
(
select_node
,
move
)
start
=
time
.
time
()
node
=
self
.
expand
(
select_node
,
move
)
self
.
time_monitor
[
'
expand
'
]
+=
(
time
.
time
()
-
start
)
if
self
.
log
:
print
(
'
expand
'
)
print
(
node
)
start
=
time
.
time
()
result
=
self
.
simulate
(
node
)
self
.
time_monitor
[
'
simulate
'
]
+=
(
time
.
time
()
-
start
)
if
self
.
log
:
print
(
result
)
start
=
time
.
time
()
self
.
backup
(
node
,
result
)
self
.
time_monitor
[
'
back
'
]
+=
(
time
.
time
()
-
start
)
def
select
(
self
,
root_node
):
node
=
root_node
while
not
node
.
is_end
():
if
node
.
moves
is
None
:
moves
=
self
.
get_pre_prob
(
node
.
state
,
node
.
state
.
players
[
node
.
act_id
].
GetAvailableMoves
(
node
.
state
),
node
.
act_id
,
self
.
player_order
)
node
.
give_moves
(
moves
)
if
not
node
.
is_fully_expanded
():
return
node
,
node
.
get_unexpanded_move
()
else
:
node
=
self
.
best_child
(
node
)
return
node
,
None
def
best_child
(
self
,
p_node
):
if
self
.
log
:
print
(
'
jump
'
)
print
(
p_node
)
node_v_para
=
2
*
math
.
log
(
p_node
.
visited
)
uct_dict
=
{}
for
m
,
(
c
,
p
)
in
p_node
.
moves
.
items
():
if
USING_BSET_VALUE
:
#print(self.best_value)
Q
=
get_max_difference
(
c
.
value
,
p_node
.
act_id
)
/
(
self
.
best_value
if
self
.
best_value
!=
0
else
1
)
else
:
Q
=
get_max_difference
(
c
.
value
,
p_node
.
act_id
)
/
0.001
#N = 1 / c.visited if c.visited != 0 else MAX
N
=
((
node_v_para
/
c
.
visited
)
**
(
1
/
2
))
if
c
.
visited
!=
0
else
MAX
uct_value
=
Q
+
p
+
N
uct_dict
[
c
]
=
uct_value
node
=
randomMax
(
uct_dict
)
return
node
def
expand
(
self
,
p_node
,
move
):
if
move
is
None
:
return
p_node
state
=
copy
.
deepcopy
((
p_node
.
state
))
state
.
ExecuteMove
(
p_node
.
act_id
,
move
)
parent
=
p_node
f_move
=
move
act_id
=
p_node
.
act_id
+
1
if
p_node
.
act_id
<
len
(
self
.
player_order
)
-
1
else
0
return
Node
(
state
,
parent
,
f_move
,
act_id
,
self
.
tree
)
def
simulate
(
self
,
node
):
start
=
time
.
time
()
state
=
copy
.
deepcopy
(
node
.
state
)
player_count
=
len
(
self
.
player_order
)
#players = [Simu_Player(i, self.agent.using_reward) for i in range(player_count)]
players
=
[
RandomPlayer
(
i
)
for
i
in
range
(
player_count
)]
act_id
=
node
.
act_id
while
state
.
TilesRemaining
():
if
self
.
log
:
print
(
act_id
)
print
(
'
id
'
,
act_id
)
print
(
'
before
'
)
print
(
state
.
detail_str
())
move
=
players
[
act_id
].
SelectMove
(
None
,
state
)
state
.
ExecuteMove
(
act_id
,
move
)
act_id
=
act_id
+
1
if
act_id
+
1
<
player_count
else
0
if
self
.
log
:
print
(
'
simulate over
'
)
state
.
ExecuteEndOfRound
()
reward
=
[
0
]
*
player_count
for
i
,
plr
in
enumerate
(
state
.
players
):
reward
[
i
]
=
state
.
players
[
i
].
score
# print(state.detail_str())
# print(reward)
game_continuing
=
True
self
.
time_monitor
[
'
simulate w
'
]
+=
(
time
.
time
()
-
start
)
for
i
in
range
(
player_count
):
plr_state
=
state
.
players
[
i
]
completed_rows
=
plr_state
.
GetCompletedRows
()
if
completed_rows
>
0
:
game_continuing
=
False
break
if
not
game_continuing
:
for
i
in
range
(
player_count
):
state
.
players
[
i
].
EndOfGameScore
()
reward
[
i
]
=
state
.
players
[
i
].
score
else
:
for
i
,
plr
in
enumerate
(
state
.
players
):
expect_score
=
eval
(
self
.
agent
.
using_reward
)(
state
,
i
,
self
.
player_order
).
get_round_expection
()
reward
[
i
]
=
state
.
players
[
i
].
score
+
expect_score
self
.
time_monitor
[
'
simulate p
'
]
+=
(
time
.
time
()
-
start
)
if
USING_BSET_VALUE
:
if
max
(
reward
)
>
self
.
best_value
:
self
.
best_value
=
max
(
reward
)
return
reward
def
backup
(
self
,
node
,
result
):
GuiPool
=
[]
update_node
=
node
update_node
.
update
(
self
.
id
,
result
)
while
True
:
update_node
=
update_node
.
parent
if
update_node
is
None
:
break
update_node
.
update
(
self
.
id
)
def
get_pre_prob
(
self
,
game_state
,
moves
,
act_id
,
player_order
):
#threshold_most = self.FOE_SEARCH if act_id != self.id else self.FIRST_SEARCH
threshold_most
=
self
.
FOE_SEARCH
if
len
(
self
.
tree
)
!=
1
else
self
.
FIRST_SEARCH
# threshold_impo = 4
ft_moves
=
self
.
agent
.
filtering_moves
(
game_state
.
players
[
act_id
],
moves
)
move_dict
=
{}
for
move
in
ft_moves
:
reward
,
score_list
=
self
.
agent
.
get_place_reward
(
game_state
,
move
,
act_id
,
player_order
)
move_dict
[
move
]
=
reward
,
score_list
move_tuple
=
sorted
(
move_dict
.
items
(),
key
=
lambda
x
:
x
[
1
][
0
],
reverse
=
True
)[:
threshold_most
]
if
len
(
move_dict
)
>
threshold_most
else
move_dict
.
items
()
move_prob_dict
=
{}
sum_reward
=
sum
([
PRO_DISTRI
**
m
[
1
][
0
]
for
m
in
move_tuple
])
for
i
,
m
in
enumerate
(
move_tuple
):
move_prob_dict
[
m
[
0
]]
=
None
,
(
PRO_DISTRI
**
m
[
1
][
0
])
/
sum_reward
return
move_prob_dict
class
Node
:
def
__init__
(
self
,
game_state
,
parent
,
from_move
,
act_id
,
tree
):
self
.
state
=
game_state
self
.
parent
=
parent
self
.
from_move
=
from_move
if
self
.
parent
is
not
None
:
# print( self.parent.moves[from_move])
self
.
parent
.
moves
[
from_move
]
=
(
self
,
self
.
parent
.
moves
[
from_move
][
1
])
peers
=
[
c
for
m
,
(
c
,
p
)
in
self
.
parent
.
moves
.
items
()]
assert
self
in
peers
self
.
act_id
=
act_id
self
.
value
=
[
0
]
*
len
(
game_state
.
players
)
tree
.
append
(
self
)
self
.
visited
=
0
self
.
name
=
'
n
'
+
str
(
len
(
tree
))
self
.
mark
=
False
self
.
moves
=
None
self
.
tree
=
tree
def
is_fully_expanded
(
self
):
for
m
,
(
c
,
p
)
in
self
.
moves
.
items
():
if
c
is
None
:
return
False
return
True
def
give_moves
(
self
,
moves
):
self
.
moves
=
moves
def
get_unexpanded_move
(
self
):
if
USING_OPPONENT
and
self
.
act_id
!=
self
.
tree
[
0
].
act_id
:
dice
=
random
.
random
()
if
dice
<=
0.7
:
moves
=
[
m
for
m
,
_
in
self
.
moves
.
items
()]
return
NaivePlayer
(
self
.
act_id
).
SelectMove
(
moves
,
self
.
state
)
unexp_dict
=
{}
for
m
,
(
c
,
p
)
in
self
.
moves
.
items
():
if
c
is
None
:
unexp_dict
[
m
]
=
p
# unexp_prob = sum(unexp_dict.values())
# assert len(unexp_dict) > 0
# # print(unexp_dict.values())
# for m, p in unexp_dict.items():
# unexp_dict[m] = p / unexp_prob
# # print(sum(unexp_dict.values()))
# unexp_m_list = [(k, v) for k, v in unexp_dict.items()]
# p = np.array([v for k, v in unexp_m_list])
# # print(p)
# index = np.random.choice([i for i in range(len(p))], p=p.ravel())
# m, _ = unexp_m_list[index]
m
=
max
(
unexp_dict
,
key
=
unexp_dict
.
get
)
return
m
def
is_end
(
self
):
return
not
self
.
state
.
TilesRemaining
()
def
update
(
self
,
agent_id
,
result
=
None
):
self
.
visited
+=
1
if
result
is
not
None
:
for
i
in
range
(
len
(
self
.
value
)):
self
.
value
[
i
]
=
(
self
.
value
[
i
]
*
(
self
.
visited
-
1
)
+
result
[
i
])
/
self
.
visited
return
value_list
=
[]
for
m
,
(
c
,
p
)
in
self
.
moves
.
items
():
if
c
is
None
or
c
.
visited
==
0
:
continue
value
=
c
.
value
.
copy
()
value_list
.
append
(
value
)
value_list
=
sorted
(
value_list
,
key
=
lambda
x
:
get_max_difference
(
x
,
self
.
act_id
),
reverse
=
True
)
self
.
value
=
value_list
[
0
]
def
get_children
(
self
):
return
[
c
for
m
,
(
c
,
p
)
in
self
.
moves
.
items
()]
if
self
.
moves
is
not
None
else
[]
def
info
(
self
):
info
=
'
{:2},{}
\n
p:{}
\n
v:{}
\n
{} {}
\n
id:{}
'
.
format
(
self
.
from_move
[
1
],
self
.
from_move
[
2
],
round
(
self
.
parent
.
moves
[
self
.
from_move
][
1
],
2
),
self
.
visited
,
str
(
round
(
self
.
value
[
0
],
2
)),
str
(
round
(
self
.
value
[
1
],
2
)),
self
.
act_id
)
if
self
.
from_move
is
not
None
else
''
return
info
class
Simu_Player
(
RewardBasedPlayer
):
def
__init__
(
self
,
_id
,
using_reward
):
super
().
__init__
(
_id
)
self
.
using_reward
=
using_reward
def
SelectMove
(
self
,
moves
,
game_state
):
player_order
=
[]
for
i
in
range
(
self
.
id
+
1
,
len
(
game_state
.
players
)):
player_order
.
append
(
i
)
for
i
in
range
(
0
,
self
.
id
+
1
):
player_order
.
append
(
i
)
i_moves
=
game_state
.
players
[
self
.
id
].
GetAvailableMoves
(
game_state
)
ft_moves
=
self
.
filtering_moves
(
game_state
.
players
[
self
.
id
],
i_moves
)
move_dict
=
{}
for
m
in
ft_moves
:
r
=
self
.
get_place_reward
(
game_state
,
m
,
self
.
id
,
player_order
)
move_dict
[
m
]
=
(
None
,
r
[
0
])
move
=
max
(
move_dict
.
items
(),
key
=
lambda
x
:
x
[
1
][
1
])[
0
]
return
move
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment