Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
C
Collective risk game
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Liyao Zhu
Collective risk game
Commits
a5e27101
Commit
a5e27101
authored
6 years ago
by
Liyao Zhu
Browse files
Options
Downloads
Patches
Plain Diff
all files commented. Figures listed.
parent
d78a8cca
No related branches found
No related tags found
No related merge requests found
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
agent.py
+11
-9
11 additions, 9 deletions
agent.py
game.py
+34
-29
34 additions, 29 deletions
game.py
main.py
+208
-45
208 additions, 45 deletions
main.py
utilis.py
+0
-18
0 additions, 18 deletions
utilis.py
workfile
+0
-2
0 additions, 2 deletions
workfile
with
253 additions
and
103 deletions
agent.py
+
11
−
9
View file @
a5e27101
...
@@ -35,10 +35,7 @@ class Agent:
...
@@ -35,10 +35,7 @@ class Agent:
def
updateReward
(
self
,
round
,
action
,
loss
):
def
updateReward
(
self
,
round
,
action
,
loss
):
"""
"""
:param round:
updates the Q-table by receiving a payoff
:param action:
:param loss:
:return:
"""
"""
newWealth
=
self
.
wealth
*
(
1
-
action
)
*
(
1
-
loss
)
newWealth
=
self
.
wealth
*
(
1
-
action
)
*
(
1
-
loss
)
reward
=
newWealth
-
self
.
wealth
reward
=
newWealth
-
self
.
wealth
...
@@ -54,8 +51,12 @@ class Agent:
...
@@ -54,8 +51,12 @@ class Agent:
else
:
else
:
print
(
"
ERROR: Illegal round number
"
)
print
(
"
ERROR: Illegal round number
"
)
exit
(
2
)
exit
(
2
)
"""
Update function
"""
self
.
qTable
[
round
][
index
]
+=
self
.
learnRate
*
(
self
.
qTable
[
round
][
index
]
+=
self
.
learnRate
*
(
reward
+
self
.
discount
*
maxNextQ
-
self
.
qTable
[
round
][
index
])
reward
+
self
.
discount
*
maxNextQ
-
self
.
qTable
[
round
][
index
])
# if self.iteration == 999:
# print("QTABLE:", self.qTable)
# print("QTABLE:", self.qTable)
if
round
==
self
.
R
-
1
:
if
round
==
self
.
R
-
1
:
...
@@ -64,9 +65,10 @@ class Agent:
...
@@ -64,9 +65,10 @@ class Agent:
def
chooseAction
(
self
,
roundNumber
):
def
chooseAction
(
self
,
roundNumber
):
"""
"""
Method: Q-learning
"""
Choose an action based on current round number
:return: an action (float type)
"""
randomAct
=
False
randomAct
=
False
if
self
.
multiArm
==
'
decrease
'
:
if
self
.
multiArm
==
'
decrease
'
:
"""
Epsilon Decrease
"""
"""
Epsilon Decrease
"""
...
...
This diff is collapsed.
Click to expand it.
game.py
+
34
−
29
View file @
a5e27101
"""
This file contains an implementation of a 2-player R-round collective-risk game
model. Each player can choose to contribute part of their wealth to a common
pool to reduce the risk of a collective climate catastrophe. N players are
randomly paired with one another in a graph-based model in each iteration, and
play one game. Each player plays at least one game in one iteration. If N is
odd, a player could play multiple games, but the payoffs are averaged.
Author: Liyao Zhu liyao@student.unimelb.edu.au
Date: Apr. 2019
"""
import
numpy
as
np
import
numpy
as
np
import
agent
,
graph
import
agent
,
graph
class
Game
:
class
Game
:
def
__init__
(
self
,
N
=
100
,
R
=
1
,
K
=
99
,
P
=
0
,
Actions
=
[
0
,
0.2
,
0.4
,
0.6
,
0.8
],
I
=
1000
,
RF
=
0
,
alpha
=
1
,
epsilon
=
0.1
,
def
__init__
(
self
,
N
=
100
,
R
=
1
,
K
=
99
,
P
=
0
,
Actions
=
[
0
,
0.2
,
0.4
,
0.6
,
0.8
],
multiArm
=
'
greedy
'
,
threshold
=
0.8
):
I
=
1000
,
RF
=
0
,
alpha
=
1
,
epsilon
=
0.1
,
multiArm
=
'
greedy
'
,
# datamap = utilis.read()
threshold
=
0.8
):
# self.N = datamap['N'] # N-Player Game
# self.M = datamap['M'] # Randomly choose M players to play the game (normally 2)
# self.RF = datamap['RF'] # Parsed number of risk function chosen for the game
# self.alpha = datamap['alpha'] # Loss fraction
# self.R = datamap['R'] # Rounds of a game
self
.
N
=
N
self
.
N
=
N
self
.
M
=
2
self
.
M
=
2
self
.
RF
=
RF
self
.
RF
=
RF
self
.
alpha
=
alpha
self
.
alpha
=
alpha
self
.
R
=
R
self
.
R
=
R
self
.
threshold
=
threshold
# Threshold
self
.
threshold
=
threshold
# self.actions = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
self
.
actions
=
Actions
self
.
actions
=
Actions
self
.
iterations
=
I
self
.
iterations
=
I
...
@@ -27,11 +33,11 @@ class Game:
...
@@ -27,11 +33,11 @@ class Game:
|
|
| P: Probability of rewiring each original edge in the graph
| P: Probability of rewiring each original edge in the graph
|
|
| K: The number of edges(games) connected to each player. Has to be an even number.
| K: The number of edges(games) connected to each player. Has to be an
| If 1 is desired, don
'
t use graph model. Max k: n - 2 (for even n) | n - 1 (for odd n)
| even number. Max k: n - 2 (for even n) | n - 1 (for odd n)
| * k can be odd as n - 1 (for even n). In cases k = n - 1 (for all n) -> a fully connected graph
| k can only be odd as n - 1 (for all n). In cases k = n - 1 -> a
| fully connected graph
"""
"""
# self.graph_based = True
self
.
rewire_p
=
P
self
.
rewire_p
=
P
self
.
rewire_k
=
K
self
.
rewire_k
=
K
# assert (self.rewire_k < self.N)
# assert (self.rewire_k < self.N)
...
@@ -39,14 +45,17 @@ class Game:
...
@@ -39,14 +45,17 @@ class Game:
"
Create players
"
"
Create players
"
self
.
players
=
[]
self
.
players
=
[]
IW
=
100
# Initial Wealth
- can be input, can be variable to distinguish population
IW
=
100
# Initial Wealth
self
.
totalWealth
=
self
.
M
*
IW
# subject to change
self
.
totalWealth
=
self
.
M
*
IW
for
i
in
range
(
self
.
N
):
for
i
in
range
(
self
.
N
):
self
.
players
.
append
(
agent
.
Agent
(
self
.
R
,
IW
,
self
.
actions
,
epsilon
=
epsilon
,
multiArm
=
multiArm
))
self
.
players
.
append
(
agent
.
Agent
(
self
.
R
,
IW
,
self
.
actions
,
epsilon
=
epsilon
,
multiArm
=
multiArm
))
def
riskfunc
(
self
,
contribution
,
totalwealth
):
def
riskfunc
(
self
,
contribution
,
totalwealth
):
"""
"""
the probability of collective-risk happening, given contribution
Implemented different risk functions here.
:return: the probability of disaster happening, given contribution
and total wealth
"""
"""
proportion
=
contribution
/
totalwealth
proportion
=
contribution
/
totalwealth
...
@@ -73,13 +82,16 @@ class Game:
...
@@ -73,13 +82,16 @@ class Game:
return
"
error
"
return
"
error
"
def
play
(
self
):
def
play
(
self
):
"""
# lastStrategyTable = np.zeros((self.N, self.R))
Play a whole trial of I (1000) iterations, N (100) players games
# sameStrategyRounds = 0
:return: a 3d numpy matrix, recording the averaged counted number of
all actions in each round in all iterations.
"""
results
=
np
.
zeros
((
self
.
iterations
,
self
.
R
,
len
(
self
.
actions
)))
results
=
np
.
zeros
((
self
.
iterations
,
self
.
R
,
len
(
self
.
actions
)))
"""
ITERATION
"""
"""
ITERATION
"""
for
iter
in
range
(
self
.
iterations
):
for
iter
in
range
(
self
.
iterations
):
# print("GAME ITERATION", iter)
actionTable
=
np
.
zeros
((
self
.
N
,
self
.
R
))
actionTable
=
np
.
zeros
((
self
.
N
,
self
.
R
))
strategyTable
=
np
.
zeros
((
self
.
R
,
self
.
N
))
# DIFFERENT AXIS R-N
strategyTable
=
np
.
zeros
((
self
.
R
,
self
.
N
))
# DIFFERENT AXIS R-N
...
@@ -107,13 +119,6 @@ class Game:
...
@@ -107,13 +119,6 @@ class Game:
for
i
in
range
(
self
.
N
):
for
i
in
range
(
self
.
N
):
self
.
players
[
i
].
updateReward
(
r
,
actionTable
[
i
][
r
],
lossTable
[
i
][
r
])
self
.
players
[
i
].
updateReward
(
r
,
actionTable
[
i
][
r
],
lossTable
[
i
][
r
])
"""
Strategy Stats
"""
# if np.array_equal(strategyTable, lastStrategyTable):
# sameStrategyRounds += 1
# else:
# sameStrategyRounds = 0
# lastStrategyTable = strategyTable
for
r
in
range
(
self
.
R
):
for
r
in
range
(
self
.
R
):
unique
,
count
=
np
.
unique
(
strategyTable
[
r
],
return_counts
=
True
)
unique
,
count
=
np
.
unique
(
strategyTable
[
r
],
return_counts
=
True
)
round_counter
=
dict
(
zip
(
unique
,
count
))
round_counter
=
dict
(
zip
(
unique
,
count
))
...
...
This diff is collapsed.
Click to expand it.
main.py
+
208
−
45
View file @
a5e27101
"""
This file contains graph methods and t-test implementations. The main
function should produce all Figures and t-test results as the thesis.
Author: Liyao Zhu liyao@student.unimelb.edu.au
Date: Apr. 2019
"""
import
matplotlib.pyplot
as
plt
import
matplotlib.pyplot
as
plt
from
matplotlib
import
cm
from
matplotlib
import
cm
from
mpl_toolkits.mplot3d
import
Axes3D
from
mpl_toolkits.mplot3d
import
Axes3D
...
@@ -6,7 +15,10 @@ from scipy import stats
...
@@ -6,7 +15,10 @@ from scipy import stats
import
game
import
game
def
stackPlot
(
data
,
r
,
Actions
,
Iterations
,
titleComment
=
""
):
def
stackPlot
(
data
,
r
,
Actions
,
Iterations
,
legendLoc
=
'
best
'
,
titleComment
=
""
):
"""
Draw a stack plot from averaged data of round r.
"""
A
=
len
(
Actions
)
A
=
len
(
Actions
)
x
=
range
(
Iterations
)
x
=
range
(
Iterations
)
y
=
np
.
zeros
((
Iterations
,
A
))
y
=
np
.
zeros
((
Iterations
,
A
))
...
@@ -15,12 +27,13 @@ def stackPlot(data, r, Actions, Iterations, titleComment=""):
...
@@ -15,12 +27,13 @@ def stackPlot(data, r, Actions, Iterations, titleComment=""):
y
=
np
.
vstack
(
y
.
T
)
y
=
np
.
vstack
(
y
.
T
)
fig
,
ax
=
plt
.
subplots
()
fig
,
ax
=
plt
.
subplots
()
ax
.
stackplot
(
x
,
y
,
labels
=
Actions
,
colors
=
[
str
(
0.9
-
0.9
*
x
)
for
x
in
Actions
])
ax
.
stackplot
(
x
,
y
,
labels
=
Actions
,
colors
=
[
str
(
0.9
-
0.9
*
x
)
for
x
in
ax
.
legend
(
loc
=
'
best
'
)
Actions
])
plt
.
ylabel
(
'
Number of Actions
'
)
ax
.
legend
(
loc
=
legendLoc
)
plt
.
ylabel
(
'
Percentage of each action
'
)
plt
.
xlabel
(
'
Time(iterations)
'
)
plt
.
xlabel
(
'
Time(iterations)
'
)
title
=
'
Average
Number
of Actions in Round
'
+
str
(
r
+
1
)
title
=
'
Average
Composition
of Actions in Round
'
+
str
(
r
+
1
)
if
titleComment
:
if
titleComment
:
title
+=
"
\n
"
+
titleComment
title
+=
"
\n
"
+
titleComment
...
@@ -32,6 +45,11 @@ def stackPlot(data, r, Actions, Iterations, titleComment=""):
...
@@ -32,6 +45,11 @@ def stackPlot(data, r, Actions, Iterations, titleComment=""):
def
rep
(
repeat
=
30
,
R
=
1
,
Actions
=
[
0
,
0.2
,
0.4
,
0.6
,
0.8
],
I
=
1000
,
**
kwargs
):
def
rep
(
repeat
=
30
,
R
=
1
,
Actions
=
[
0
,
0.2
,
0.4
,
0.6
,
0.8
],
I
=
1000
,
**
kwargs
):
"""
Repeat the game over (30) trials and retrieve the average data of
game.play()
:return: Averaged game results, same shape as the return of game.play()
"""
data
=
np
.
zeros
((
I
,
R
,
len
(
Actions
)))
data
=
np
.
zeros
((
I
,
R
,
len
(
Actions
)))
Actions
.
sort
()
Actions
.
sort
()
for
re
in
range
(
repeat
):
for
re
in
range
(
repeat
):
...
@@ -44,6 +62,11 @@ def rep(repeat=30, R=1, Actions=[0, 0.2, 0.4, 0.6, 0.8], I=1000, **kwargs):
...
@@ -44,6 +62,11 @@ def rep(repeat=30, R=1, Actions=[0, 0.2, 0.4, 0.6, 0.8], I=1000, **kwargs):
def
averageOfLast
(
data
,
Actions
,
N
=
100
,
r
=
0
,
lastIterations
=
100
):
def
averageOfLast
(
data
,
Actions
,
N
=
100
,
r
=
0
,
lastIterations
=
100
):
"""
Averaged contribution and action counter of last (100) iterations from the
data produced by rep()
:return: a tuple: (average contribution, a dictionary as action counter)
"""
sum
=
0
sum
=
0
action_counter
=
{
action
:
0
for
action
in
Actions
}
action_counter
=
{
action
:
0
for
action
in
Actions
}
...
@@ -54,16 +77,22 @@ def averageOfLast(data, Actions, N=100, r=0, lastIterations=100):
...
@@ -54,16 +77,22 @@ def averageOfLast(data, Actions, N=100, r=0, lastIterations=100):
return
(
sum
/
(
lastIterations
*
N
),
action_counter
)
return
(
sum
/
(
lastIterations
*
N
),
action_counter
)
def
graph_kp3d
(
Actions
,
Klist
=
[
2
,
4
,
8
,
10
],
Plist
=
[
0
,
0.3
,
0.6
,
0.9
],
repeat
=
30
,
N
=
100
):
def
graph_kp3d
(
Actions
,
Klist
=
[
2
,
4
,
8
,
10
],
Plist
=
[
0
,
0.3
,
0.6
,
0.9
],
repeat
=
30
,
N
=
100
,
**
kwargs
):
"""
Draw a 3D graph for graph-based model, showing the effect of K and P on
average contributions. (No effect observed)
"""
K
=
Klist
K
=
Klist
P
=
Plist
P
=
Plist
meanA
=
np
.
zeros
((
len
(
K
),
len
(
P
)))
meanA
=
np
.
zeros
((
len
(
K
),
len
(
P
)))
for
k
in
range
(
len
(
K
)):
for
k
in
range
(
len
(
K
)):
for
p
in
range
(
len
(
P
)):
for
p
in
range
(
len
(
P
)):
data
=
rep
(
repeat
,
K
=
K
[
k
],
P
=
P
[
p
],
Actions
=
Actions
)
# Specify other params by adding here
data
=
rep
(
repeat
,
K
=
K
[
k
],
P
=
P
[
p
],
Actions
=
Actions
,
**
kwargs
)
meanA
[
k
][
p
]
=
averageOfLast
(
data
,
Actions
,
lastIterations
=
100
,
N
=
N
)[
0
]
# Doing the first round only -- for now
meanA
[
k
][
p
]
=
averageOfLast
(
data
,
Actions
,
lastIterations
=
100
,
N
=
N
)[
0
]
print
(
"
k, p, mean
"
,
k
,
p
,
meanA
[
k
][
p
])
print
(
"
k, p, mean
"
,
k
,
p
,
meanA
[
k
][
p
])
P
,
K
=
np
.
meshgrid
(
P
,
K
)
P
,
K
=
np
.
meshgrid
(
P
,
K
)
...
@@ -78,19 +107,26 @@ def graph_kp3d(Actions, Klist=[2, 4, 8, 10], Plist=[0, 0.3, 0.6, 0.9], repeat=30
...
@@ -78,19 +107,26 @@ def graph_kp3d(Actions, Klist=[2, 4, 8, 10], Plist=[0, 0.3, 0.6, 0.9], repeat=30
plt
.
show
()
plt
.
show
()
def
graph3d_alpha_threshold
(
Actions
,
repeat
=
30
,
AlphaList
=
np
.
arange
(
0
,
1.01
,
0.05
),
ThreshList
=
np
.
arange
(
0.1
,
1.1
,
0.1
),
N
=
100
,
**
kwargs
):
def
graph3d_alpha_threshold
(
Actions
,
repeat
=
30
,
AlphaList
=
np
.
arange
(
0
,
1.01
,
0.05
),
ThreshList
=
np
.
arange
(
0.1
,
1.05
,
0.05
),
N
=
100
,
**
kwargs
):
"""
Draw two 3D graphs showing the average contribution and the average
contribution divided by threshold on two parameters: alpha and threshold
"""
mean
=
np
.
zeros
((
len
(
ThreshList
),
len
(
AlphaList
)))
mean
=
np
.
zeros
((
len
(
ThreshList
),
len
(
AlphaList
)))
ratio_by_threshold
=
np
.
zeros
((
len
(
ThreshList
),
len
(
AlphaList
)))
ratio_by_threshold
=
np
.
zeros
((
len
(
ThreshList
),
len
(
AlphaList
)))
for
t
in
range
(
len
(
ThreshList
)):
for
t
in
range
(
len
(
ThreshList
)):
for
a
in
range
(
len
(
AlphaList
)):
for
a
in
range
(
len
(
AlphaList
)):
print
(
"
Calculating... t, alpha =
"
,
t
,
a
)
print
(
"
Calculating... t, alpha =
"
,
t
,
a
)
data
=
rep
(
repeat
=
repeat
,
Actions
=
Actions
,
alpha
=
AlphaList
[
a
],
threshold
=
ThreshList
[
t
],
**
kwargs
)
data
=
rep
(
repeat
=
repeat
,
Actions
=
Actions
,
alpha
=
AlphaList
[
a
],
mean
[
t
][
a
]
=
averageOfLast
(
data
,
Actions
,
lastIterations
=
100
,
N
=
N
)[
0
]
threshold
=
ThreshList
[
t
],
**
kwargs
)
mean
[
t
][
a
]
=
averageOfLast
(
data
,
Actions
,
lastIterations
=
100
,
N
=
N
)[
0
]
ratio_by_threshold
[
t
]
=
mean
[
t
]
/
ThreshList
[
t
]
ratio_by_threshold
[
t
]
=
mean
[
t
]
/
ThreshList
[
t
]
A
,
T
=
np
.
meshgrid
(
AlphaList
,
ThreshList
)
A
,
T
=
np
.
meshgrid
(
AlphaList
,
ThreshList
)
fig
=
plt
.
figure
()
fig
=
plt
.
figure
()
...
@@ -119,11 +155,12 @@ def graph3d_alpha_threshold(Actions, repeat=30, AlphaList=np.arange(0, 1.01, 0.0
...
@@ -119,11 +155,12 @@ def graph3d_alpha_threshold(Actions, repeat=30, AlphaList=np.arange(0, 1.01, 0.0
plt
.
show
()
plt
.
show
()
def
stackBar
(
r
,
Actions
,
repeat
=
30
,
multiArm
=
'
greedy
'
,
**
kwargs
):
# Plotting the data for round r
def
stackBar
(
r
,
Actions
,
repeat
=
30
,
multiArm
=
'
greedy
'
,
legendLoc
=
'
best
'
,
**
kwargs
):
# if len(kwargs) != 1:
"""
# print("ERROR, Stack Bar Graph Expects 1 List, gets:", len(kwargs))
Draw a stack bar graph, to compare the composition of actions on one
# key, alist = list(kwargs.items())[0]
parameter, specified as a list in **kwargs
"""
key
=
-
1
key
=
-
1
alist
=
[]
alist
=
[]
...
@@ -152,14 +189,17 @@ def stackBar(r, Actions, repeat=30, multiArm='greedy', **kwargs): # Plotting th
...
@@ -152,14 +189,17 @@ def stackBar(r, Actions, repeat=30, multiArm='greedy', **kwargs): # Plotting th
newKwargs
[
'
K
'
]
=
alist
[
al
]
-
1
newKwargs
[
'
K
'
]
=
alist
[
al
]
-
1
elif
'
N
'
not
in
newKwargs
.
keys
():
elif
'
N
'
not
in
newKwargs
.
keys
():
newKwargs
[
'
N
'
]
=
100
# default value
newKwargs
[
'
N
'
]
=
100
# default value
data
=
rep
(
repeat
,
Actions
=
Actions
,
multiArm
=
multiArm
,
**
newKwargs
)
/
newKwargs
[
'
N
'
]
*
100
data
=
rep
(
repeat
,
Actions
=
Actions
,
multiArm
=
multiArm
,
**
newKwargs
)
/
\
action_counter
=
averageOfLast
(
data
,
Actions
,
r
=
r
,
lastIterations
=
100
)[
1
]
newKwargs
[
'
N
'
]
*
100
action_counter
=
averageOfLast
(
data
,
Actions
,
r
=
r
,
lastIterations
=
100
)[
1
]
for
a
in
range
(
A
):
for
a
in
range
(
A
):
count
[
a
,
al
]
=
action_counter
[
Actions
[
a
]]
count
[
a
,
al
]
=
action_counter
[
Actions
[
a
]]
base
=
0
base
=
0
for
a
in
range
(
A
):
for
a
in
range
(
A
):
p
.
append
(
plt
.
bar
(
ind
,
count
[
a
],
width
,
bottom
=
base
,
color
=
str
(
0.9
-
0.9
*
Actions
[
a
])))
p
.
append
(
plt
.
bar
(
ind
,
count
[
a
],
width
,
bottom
=
base
,
color
=
str
(
0.9
-
0.9
*
Actions
[
a
])))
base
+=
count
[
a
]
base
+=
count
[
a
]
plt
.
ylabel
(
'
Percentage of Actions
'
)
plt
.
ylabel
(
'
Percentage of Actions
'
)
...
@@ -167,14 +207,20 @@ def stackBar(r, Actions, repeat=30, multiArm='greedy', **kwargs): # Plotting th
...
@@ -167,14 +207,20 @@ def stackBar(r, Actions, repeat=30, multiArm='greedy', **kwargs): # Plotting th
plt
.
xlabel
(
key
+
'
(
'
+
multiArm
+
'
)
'
)
plt
.
xlabel
(
key
+
'
(
'
+
multiArm
+
'
)
'
)
else
:
else
:
plt
.
xlabel
(
key
)
plt
.
xlabel
(
key
)
plt
.
title
(
'
Average
Number
of Actions in Round
'
+
str
(
r
+
1
))
plt
.
title
(
'
Average
Composition
of Actions in Round
'
+
str
(
r
+
1
))
plt
.
xticks
(
ind
,
alist
)
plt
.
xticks
(
ind
,
alist
)
plt
.
yticks
(
np
.
arange
(
0
,
101
,
10
))
plt
.
yticks
(
np
.
arange
(
0
,
101
,
10
))
plt
.
legend
(
tuple
([
p
[
x
][
0
]
for
x
in
range
(
A
)][::
-
1
]),
tuple
(
Actions
[::
-
1
]),
loc
=
'
best
'
)
plt
.
legend
(
tuple
([
p
[
x
][
0
]
for
x
in
range
(
A
)][::
-
1
]),
tuple
(
Actions
[::
-
1
]),
loc
=
legendLoc
)
plt
.
show
()
plt
.
show
()
def
t_test
(
repeat
,
Actions
,
r
=
0
,
R
=
1
,
I
=
1000
,
lastIterations
=
100
,
N
=
100
,
byThreshold
=
False
,
**
kwargs
):
def
t_test
(
repeat
,
Actions
,
r
=
0
,
R
=
1
,
I
=
1000
,
lastIterations
=
100
,
N
=
100
,
byThreshold
=
False
,
**
kwargs
):
"""
Compute the p-value of average contributions on two values of one
parameter, specified as a tuple in **kwargs
"""
key
=
-
1
key
=
-
1
atuple
=
()
atuple
=
()
for
k
,
v
in
kwargs
.
items
():
for
k
,
v
in
kwargs
.
items
():
...
@@ -191,7 +237,8 @@ def t_test(repeat, Actions, r=0, R=1, I=1000, lastIterations=100, N=100, byThres
...
@@ -191,7 +237,8 @@ def t_test(repeat, Actions, r=0, R=1, I=1000, lastIterations=100, N=100, byThres
for
s
in
(
0
,
1
):
for
s
in
(
0
,
1
):
newArgs
=
{
**
{
key
:
atuple
[
s
]},
**
kwargs
}
newArgs
=
{
**
{
key
:
atuple
[
s
]},
**
kwargs
}
samples
[
s
]
=
repHist
(
repeat
,
Actions
,
R
,
r
,
I
,
lastIterations
,
N
,
**
newArgs
)
samples
[
s
]
=
repHist
(
repeat
,
Actions
,
R
,
r
,
I
,
lastIterations
,
N
,
**
newArgs
)
if
byThreshold
:
if
byThreshold
:
samples
[
s
]
/=
newArgs
[
"
threshold
"
]
samples
[
s
]
/=
newArgs
[
"
threshold
"
]
print
(
"
Sample
"
,
s
,
samples
[
s
])
print
(
"
Sample
"
,
s
,
samples
[
s
])
...
@@ -199,7 +246,11 @@ def t_test(repeat, Actions, r=0, R=1, I=1000, lastIterations=100, N=100, byThres
...
@@ -199,7 +246,11 @@ def t_test(repeat, Actions, r=0, R=1, I=1000, lastIterations=100, N=100, byThres
print
(
stats
.
ttest_ind
(
samples
[
0
],
samples
[
1
]))
print
(
stats
.
ttest_ind
(
samples
[
0
],
samples
[
1
]))
def
repHist
(
repeat
,
Actions
,
R
=
1
,
r
=
0
,
I
=
1000
,
lastIterations
=
100
,
N
=
100
,
**
kwargs
):
def
repHist
(
repeat
,
Actions
,
R
=
1
,
r
=
0
,
I
=
1000
,
lastIterations
=
100
,
N
=
100
,
**
kwargs
):
"""
:return: A list of average contributions of all repetitions
"""
hist
=
np
.
zeros
(
repeat
)
hist
=
np
.
zeros
(
repeat
)
for
re
in
range
(
repeat
):
for
re
in
range
(
repeat
):
# print("HistREP", re)
# print("HistREP", re)
...
@@ -209,10 +260,8 @@ def repHist(repeat, Actions, R=1, r=0, I=1000, lastIterations=100, N=100, **kwar
...
@@ -209,10 +260,8 @@ def repHist(repeat, Actions, R=1, r=0, I=1000, lastIterations=100, N=100, **kwar
return
hist
return
hist
def
main
():
def
main
():
#
Read-in or Define Parameter
s
#
Default value
s
N
=
100
N
=
100
R
=
1
R
=
1
...
@@ -222,6 +271,76 @@ def main():
...
@@ -222,6 +271,76 @@ def main():
RF
=
0
RF
=
0
alpha
=
1
alpha
=
1
Actions
=
[
0
,
0.2
,
0.4
,
0.6
,
0.8
]
Actions
=
[
0
,
0.2
,
0.4
,
0.6
,
0.8
]
repeat
=
1
"""
Fig. 2
"""
# data = rep(repeat=repeat, N=100, alpha=0.8, R=8)
# stackPlot(data, r=0, Iterations=I, Actions=Actions,
# legendLoc='lower right')
"""
Fig. 3
"""
# stackBar(0, Actions, repeat=repeat,
# alpha=[0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1],
# legendLoc='lower left')
"""
Fig. 4
"""
# data = rep(repeat, R=8)
# for r in [0, 1, 3]:
# stackPlot(data, r, Actions, I, legendLoc='lower right')
"""
Fig. 5
"""
# stackBar(0, Actions, repeat=repeat,
# threshold=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1],
# legendLoc='upper left', RF=2)
"""
Fig. 6
"""
# data = rep(repeat=repeat, Actions=Actions, R=1, I=I, RF=2, threshold=0.2)
# stackPlot(data, r=0, Iterations=I, Actions=Actions,
# titleComment="threshold = 0.2")
"""
Fig. 7 & 8 - 3D graph comparing alpha and threshold
"""
# graph3d_alpha_threshold(Actions, repeat=repeat, RF=2)
"""
Fig. 9 - simple line graph comparing alpha for threshold=0.2
"""
# alphaList = np.arange(0, 1.01, 0.02)
# mean = np.zeros(len(alphaList))
# for i in range(len(alphaList)):
# data = rep(repeat, alpha=alphaList[i], threshold=0.2, RF=2)
# mean[i] = averageOfLast(data, Actions)[0]
#
# plt.plot([0.2, 0.2], '--', color='0.5')
# plt.plot(alphaList, mean, color='black')
#
# plt.ylabel('Average Contributions')
# plt.xlabel('Alpha, the loss fraction')
# plt.show()
"""
Fig. 10 - Comparing composition on epsilon (greedy)
"""
# stackBar(0, Actions, repeat=repeat, multiArm='greedy',
# legendLoc='lower right',
# epsilon=[0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8,0.9])
"""
Fig. 11 - Extending 0.2-greedy to 5000 iterations
"""
# data = rep(repeat=repeat, Actions=Actions, multiArm='greedy',
# epsilon=0.2, I=5000)
# stackPlot(data, r=0, Iterations=5000, Actions=Actions,
# titleComment="0.2 - greedy", legendLoc='lower left')
"""
Fig. 12 - Comparing composition on epsilon (decrease)
"""
# stackBar(0, Actions, repeat=repeat, multiArm='decrease',
# legendLoc='lower left',
# epsilon=[0.1, 0.4, 0.8, 0.9, 0.95, 0.98, 0.99, 0.999, 0.9999])
"""
Fig. 13 - Extending 0.999-decrease to 5000 iterations
"""
# data = rep(repeat=repeat, Actions=Actions, multiArm='decrease',
# epsilon=0.999, I=5000)
# stackPlot(data, r=0, Iterations=5000, Actions=Actions,
# titleComment="0.999 - decrease", legendLoc='lower left')
"""
"""
Graph1: Number of Actions of Round r (start by 0) by Iteration
Graph1: Number of Actions of Round r (start by 0) by Iteration
...
@@ -242,35 +361,56 @@ def main():
...
@@ -242,35 +361,56 @@ def main():
# data = rep(repeat=30, N=100, K=k, Actions=Actions, R=1, I=I, P=p)
# data = rep(repeat=30, N=100, K=k, Actions=Actions, R=1, I=I, P=p)
# stackPlot(data, r=0, Iterations=I, Actions=Actions, titleComment=("K=" + str(k) + ", P=" + str(p)))
# stackPlot(data, r=0, Iterations=I, Actions=Actions, titleComment=("K=" + str(k) + ", P=" + str(p)))
# data = rep(repeat=30, Actions=Actions, R=1, I=I, RF=2, threshold=0.3)
# stackPlot(data, r=0, Iterations=I, Actions=Actions, titleComment="threshold = 0.3")
"""
"""
Graph2: Average contribution by K, P
Graph2: Average contribution by K, P
"""
"""
# graph_kp3d(Actions)
# graph_kp3d(Actions)
"""
"""
Graph3: Comparing a parameter (put in a list)
Graph3: Comparing a parameter (put in a list)
"""
"""
# stackBar(0, Actions, repeat=1, alpha=[0, 0.2, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1])
# stackBar(0, Actions, repeat=1, N=[5, 10, 20, 50, 100], threshold=0.6, RF=2)
# stackBar(0, Actions, repeat=1, N=[5, 10, 20, 50, 100], threshold=0.6, RF=2)
stackBar
(
0
,
Actions
,
repeat
=
30
,
RF
=
2
,
threshold
=
[
0.1
,
0.2
,
0.3
,
0.4
,
0.5
,
0.6
,
0.7
,
0.8
,
0.9
,
1
])
#
stackBar(0, Actions, repeat=30, RF=2, threshold=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1])
"""
"""
Graph4: Actions by different epsilon
method + value
Graph4: Actions by different epsilon
with multi-arm bandit algorithms
"""
"""
# stackBar(0, Actions, repeat=1, multiArm='greedy', epsilon=[0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
# stackBar(0, Actions, repeat=1, multiArm='decrease', epsilon=[0.8, 0.9, 0.95, 0.98, 0.99, 0.999, 0.9999])
"""
Graph5: Average contribution by Alpha and Threshold
"""
# graph3d_alpha_threshold(Actions, repeat=1, RF=2)
# stackBar(0, Actions, repeat=30, multiArm='greedy', legendLoc='lower right',
# epsilon=[0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
# threshold=0.3, RF=2)
# data = rep(repeat=30, Actions=Actions, multiArm='greedy', epsilon=0.05)
# stackPlot(data, r=0, Iterations=I, Actions=Actions, titleComment="0.05 - greedy",legendLoc='lower left')
#
# data = rep(repeat=30, Actions=Actions, multiArm='greedy', epsilon=0.1)
# stackPlot(data, r=0, Iterations=I, Actions=Actions, titleComment="0.1 - greedy",legendLoc='lower left')
#
# data = rep(repeat=30, Actions=Actions, multiArm='greedy', epsilon=0.2)
# stackPlot(data, r=0, Iterations=I, Actions=Actions, titleComment="0.2 - greedy",legendLoc='lower left')
# data = rep(repeat=30, Actions=Actions, multiArm='greedy', epsilon=0.6, I=5000)
# stackPlot(data, r=0, Iterations=5000, Actions=Actions, titleComment="0.6 - greedy", legendLoc='lower left')
# data = rep(repeat=30, Actions=Actions, multiArm='decrease', epsilon=0.9)
# stackPlot(data, r=0, Iterations=I, Actions=Actions, titleComment="0.9 - decrease",legendLoc='lower left')
# data = rep(repeat=30, Actions=Actions, multiArm='decrease', epsilon=0.99)
# stackPlot(data, r=0, Iterations=I, Actions=Actions, titleComment="0.99 - decrease",legendLoc='lower left')
#
# data = rep(repeat=30, Actions=Actions, multiArm='decrease', epsilon=0.999)
# stackPlot(data, r=0, Iterations=I, Actions=Actions, titleComment="0.999 - decrease",legendLoc='lower left')
"""
"""
T-Test
T-Test
...
@@ -293,6 +433,29 @@ def main():
...
@@ -293,6 +433,29 @@ def main():
# t_test(30, Actions, alpha=(1, 0.45), RF=2, threshold=0.2) #pvalue=1.3749e-11
# t_test(30, Actions, alpha=(1, 0.45), RF=2, threshold=0.2) #pvalue=1.3749e-11
# t_test(30, Actions, alpha=(1, 0.4), RF=2, threshold=0.2) #pvalue=3.8352e-19
# t_test(30, Actions, alpha=(1, 0.4), RF=2, threshold=0.2) #pvalue=3.8352e-19
# base = repHist(30, Actions, alpha=1, RF=2, threshold=0.2)
# for alpha in np.arange(0.8, 1, 0.01):
# compare = repHist(30, Actions, alpha=alpha, RF=2, threshold=0.2)
# print("Alpha=", alpha, stats.ttest_ind(base, compare))
"""
Epsilon-decrease 0.99 with 0.1 and 0.999
"""
# base = repHist(30, Actions, multiArm='decrease', epsilon=0.99)
# for epsilon in (0.1, 0.999):
# compare = repHist(30, Actions, multiArm='decrease', epsilon=epsilon)
# print("Epsilon=", epsilon, stats.ttest_ind(base, compare))
"""
T-TEST for 0.999-decrease 5000 iterations with 0.9
"""
# base = repHist(30, Actions, multiArm='decrease', epsilon=0.9)
# compare = repHist(30, Actions, multiArm='decrease', epsilon=0.999, I=5000)
# print(stats.ttest_ind(base, compare))
"""
T-TEST K,P
"""
"""
T-TEST K,P
"""
# t_test(30, Actions, K=(2, 99), P=0) #pvalue=0.4278
# t_test(30, Actions, K=(2, 99), P=0) #pvalue=0.4278
...
...
This diff is collapsed.
Click to expand it.
utilis.py
deleted
100644 → 0
+
0
−
18
View file @
d78a8cca
file
=
open
(
"
workfile
"
,
"
r
"
)
print
(
file
.
readline
())
# regular expression
def
read
():
map
=
{
'
N
'
:
100
,
'
M
'
:
2
}
return
map
def
combine_dict
(
dict1
,
dict2
):
return
{
k
:
(
dict1
[
k
]
+
dict2
[
k
])
for
k
in
dict1
}
\ No newline at end of file
This diff is collapsed.
Click to expand it.
workfile
deleted
100644 → 0
+
0
−
2
View file @
d78a8cca
N,100
#Number of players in a game
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment