Skip to content
Snippets Groups Projects
Commit 03431897 authored by Liyao Zhu's avatar Liyao Zhu
Browse files

KWARGS:multiarm, threshold

STACKBAR: N(percentage), Epsilon,
STACKPLOT: Save figs

Graph: debug for odd N
Agent: Allow both epsilon methods
parent 2cbe44dc
No related branches found
No related tags found
No related merge requests found
...@@ -16,13 +16,13 @@ import numpy as np ...@@ -16,13 +16,13 @@ import numpy as np
class Agent: class Agent:
def __init__(self, rounds, initialWealth, availableActions, alpha=0.1, def __init__(self, rounds, initialWealth, availableActions, alpha=0.1,
gamma=0.9, epsilon=0.1): gamma=0.9, epsilon=0.1, multiArm='greedy'):
self.R = rounds self.R = rounds
self.initialWealth = initialWealth self.initialWealth = initialWealth
self.wealth = initialWealth self.wealth = initialWealth
self.availableActions = availableActions self.availableActions = availableActions
# self.iteration = 0 self.iteration = 0
"""initialise Q table to small random numbers""" """initialise Q table to small random numbers"""
self.qTable = np.random.rand(self.R, len(self.availableActions)) * 0.01 self.qTable = np.random.rand(self.R, len(self.availableActions)) * 0.01
...@@ -31,6 +31,7 @@ class Agent: ...@@ -31,6 +31,7 @@ class Agent:
self.learnRate = alpha self.learnRate = alpha
self.discount = gamma self.discount = gamma
self.epsilon = epsilon self.epsilon = epsilon
self.multiArm = multiArm
def updateReward(self, round, action, loss): def updateReward(self, round, action, loss):
""" """
...@@ -57,8 +58,8 @@ class Agent: ...@@ -57,8 +58,8 @@ class Agent:
reward + self.discount * maxNextQ - self.qTable[round][index]) reward + self.discount * maxNextQ - self.qTable[round][index])
# print("QTABLE:", self.qTable) # print("QTABLE:", self.qTable)
# if round == self.R - 1: if round == self.R - 1:
# self.iteration += 1 self.iteration += 1
# print("Player iteration +1 =", self.iteration) # print("Player iteration +1 =", self.iteration)
...@@ -66,14 +67,18 @@ class Agent: ...@@ -66,14 +67,18 @@ class Agent:
"""Method: Q-learning""" """Method: Q-learning"""
randomAct = False
if self.multiArm == 'decrease':
"""Epsilon Decrease""" """Epsilon Decrease"""
if np.random.uniform(0, 1) <= 1 * self.epsilon ** self.iteration:
randomAct = True
# if np.random.uniform(0, 1) <= 1 * self.epsilon ** self.iteration: elif self.multiArm == 'greedy':
"""EPSILON GREEDY""" """EPSILON GREEDY"""
if np.random.uniform(0, 1) <= self.epsilon: if np.random.uniform(0, 1) <= self.epsilon:
randomAct = True
if randomAct:
return np.random.choice(self.availableActions) return np.random.choice(self.availableActions)
else: else:
index = np.argmax(self.qTable[roundNumber]) index = np.argmax(self.qTable[roundNumber])
......
...@@ -6,7 +6,8 @@ import utilis, agent, graph ...@@ -6,7 +6,8 @@ import utilis, agent, graph
class game: class game:
def __init__(self, N=100, R=1, K=99, P=0, Actions=[0, 0.2, 0.4, 0.6, 0.8], I=1000, RF=0, alpha=1): def __init__(self, N=100, R=1, K=99, P=0, Actions=[0, 0.2, 0.4, 0.6, 0.8], I=1000, RF=0, alpha=1, epsilon=0.1,
multiArm='greedy', threshold=0.8):
# datamap = utilis.read() # datamap = utilis.read()
# self.N = datamap['N'] # N-Player Game # self.N = datamap['N'] # N-Player Game
# self.M = datamap['M'] # Randomly choose M players to play the game (normally 2) # self.M = datamap['M'] # Randomly choose M players to play the game (normally 2)
...@@ -14,13 +15,12 @@ class game: ...@@ -14,13 +15,12 @@ class game:
# self.alpha = datamap['alpha'] # Loss fraction # self.alpha = datamap['alpha'] # Loss fraction
# self.R = datamap['R'] # Rounds of a game # self.R = datamap['R'] # Rounds of a game
self.N = N self.N = N
self.M = 2 self.M = 2
self.RF = RF self.RF = RF
self.alpha = alpha self.alpha = alpha
self.R = R self.R = R
self.threshold = 0.5 # Threshold self.threshold = threshold # Threshold
# self.actions = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1] # self.actions = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
self.actions = Actions self.actions = Actions
self.iterations = I self.iterations = I
...@@ -40,15 +40,12 @@ class game: ...@@ -40,15 +40,12 @@ class game:
# assert (self.rewire_k < self.N) # assert (self.rewire_k < self.N)
self.graph = graph.Graph(self.N, self.rewire_k, self.rewire_p) self.graph = graph.Graph(self.N, self.rewire_k, self.rewire_p)
"Create players" "Create players"
self.players = [] self.players = []
IW = 100 # Initial Wealth - can be input, can be variable to distinguish population IW = 100 # Initial Wealth - can be input, can be variable to distinguish population
self.totalWealth = self.M * IW # subject to change self.totalWealth = self.M * IW # subject to change
for i in range(self.N): for i in range(self.N):
self.players.append(agent.Agent(self.R, IW, self.actions)) self.players.append(agent.Agent(self.R, IW, self.actions, epsilon=epsilon, multiArm=multiArm))
def riskfunc(self, contribution, totalwealth): def riskfunc(self, contribution, totalwealth):
""" """
...@@ -63,22 +60,21 @@ class game: ...@@ -63,22 +60,21 @@ class game:
elif self.RF == 1: elif self.RF == 1:
if proportion >= 0.5: if proportion >= self.threshold:
return 0 return 0
else: else:
return 1 return 1
elif self.RF == 2: elif self.RF == 2:
threshold = 0.5
if proportion < threshold: if proportion < self.threshold:
return 1 - proportion / threshold return 1 - proportion / self.threshold
else: else:
return 0 return 0
return "error" return "error"
def play(self): def play(self):
# lastStrategyTable = np.zeros((self.N, self.R)) # lastStrategyTable = np.zeros((self.N, self.R))
...@@ -135,9 +131,7 @@ class game: ...@@ -135,9 +131,7 @@ class game:
return results return results
def stackPlot(data, r, Actions, Iterations, titleComment=""): def stackPlot(data, r, Actions, Iterations, titleComment=""):
A = len(Actions) A = len(Actions)
x = range(Iterations) x = range(Iterations)
y = np.zeros((Iterations, A)) y = np.zeros((Iterations, A))
...@@ -147,25 +141,30 @@ def stackPlot(data, r, Actions, Iterations, titleComment = ""): ...@@ -147,25 +141,30 @@ def stackPlot(data, r, Actions, Iterations, titleComment = ""):
fig, ax = plt.subplots() fig, ax = plt.subplots()
# grays = np.arange(0, 1, (max(Actions) - min(Actions))/A) # grays = np.arange(0, 1, (max(Actions) - min(Actions))/A)
ax.stackplot(x, y, labels=Actions, colors=[str(1 - x) for x in Actions]) ax.stackplot(x, y, labels=Actions, colors=[str(0.9 - 0.9 * x) for x in Actions])
ax.legend(loc='lower right') ax.legend(loc='lower right')
plt.ylabel('Number of Actions') plt.ylabel('Number of Actions')
plt.xlabel('Time(iterations)') plt.xlabel('Time(iterations)')
title = 'Average Number of Actions in Round ' + str(r+1) + ')' title = 'Average Number of Actions in Round ' + str(r + 1)
if not titleComment: if titleComment:
title += "\n" + titleComment title += "\n" + titleComment
plt.title(title) plt.title(title)
plt.savefig(titleComment + " in round " + str(r+1) + ".jpg")
plt.show() plt.show()
def rep(repeat, N=100, R=1, K=99, P=0, Actions=[0, 0.2, 0.4, 0.6, 0.8], I=1000, RF=0, alpha=1): # def rep(repeat, N=100, R=1, K=99, P=0, Actions=[0, 0.2, 0.4, 0.6, 0.8], I=1000, RF=0, alpha=1, epsilon=0.1, multiArm='greedy'):
def rep(repeat=30, R=1, Actions=[0, 0.2, 0.4, 0.6, 0.8], I=1000, **kwargs):
data = np.zeros((I, R, len(Actions))) data = np.zeros((I, R, len(Actions)))
Actions.sort() Actions.sort()
for re in range(repeat): for re in range(repeat):
print("REP", re) print("REP", re)
g = game(N, R, K, P, Actions, I, RF, alpha) g = game(R=R, Actions=Actions, I=I, **kwargs)
result = g.play() result = g.play()
data += result data += result
data /= repeat data /= repeat
...@@ -183,8 +182,7 @@ def averageOfLast(data, Actions, r=0, lastIterations=100 ): ...@@ -183,8 +182,7 @@ def averageOfLast(data, Actions, r=0, lastIterations=100 ):
return (sum / 100, action_counter) return (sum / 100, action_counter)
def graph_kp3d(Actions, Klist=[99], Plist=[0, 0.3, 0.6, 0.9], repeat=30):
def graph_kp3d(Actions, Klist=[2, 4], Plist=[0.2, 0.5, 0.8], repeat=1):
K = Klist K = Klist
P = Plist P = Plist
...@@ -192,7 +190,7 @@ def graph_kp3d(Actions, Klist=[2, 4], Plist=[0.2, 0.5, 0.8], repeat=1): ...@@ -192,7 +190,7 @@ def graph_kp3d(Actions, Klist=[2, 4], Plist=[0.2, 0.5, 0.8], repeat=1):
for k in range(len(K)): for k in range(len(K)):
for p in range(len(P)): for p in range(len(P)):
data = rep(repeat, K=K[k], P=P[p]) # Specify other params by adding here or change default of rep data = rep(repeat, K=K[k], P=P[p], Actions=Actions) # Specify other params by adding here
meanA[k][p] = averageOfLast(data, Actions, lastIterations=100)[0] # Doing the first round only -- for now meanA[k][p] = averageOfLast(data, Actions, lastIterations=100)[0] # Doing the first round only -- for now
print("k, p, mean", k, p, meanA[k][p]) print("k, p, mean", k, p, meanA[k][p])
...@@ -208,16 +206,40 @@ def graph_kp3d(Actions, Klist=[2, 4], Plist=[0.2, 0.5, 0.8], repeat=1): ...@@ -208,16 +206,40 @@ def graph_kp3d(Actions, Klist=[2, 4], Plist=[0.2, 0.5, 0.8], repeat=1):
plt.show() plt.show()
def stackBar_alpha(r, Actions, alphaList, repeat=1): # Plotting the data for round r def stackBar(r, Actions, repeat=30, multiArm='greedy', **kwargs): # Plotting the data for round r
# if len(kwargs) != 1:
# print("ERROR, Stack Bar Graph Expects 1 List, gets:", len(kwargs))
# key, alist = list(kwargs.items())[0]
key = -1
alist = []
for k, v in kwargs.items():
if isinstance(v, list):
if key == -1:
key = k
alist = v
else:
print("ERROR, Stack Bar Graph Expects Only 1 List to Compare")
exit(4)
del kwargs[k]
print("Comparing:", key)
print("On:", alist)
A = len(Actions) A = len(Actions)
p = [] p = []
count = np.zeros((A, len(alphaList))) # of each action in each iter count = np.zeros((A, len(alist))) # of each action in each iter
ind = np.arange(len(alphaList)) ind = np.arange(len(alist))
width = 0.3 width = 0.3
for al in range(len(alphaList)): for al in range(len(alist)):
data = rep(repeat, Actions=Actions, alpha=alphaList[al]) newKwargs = {**{key: alist[al]}, **kwargs}
if key == 'N':
newKwargs['K'] = alist[al] - 1
elif 'N' not in newKwargs.keys():
newKwargs['N'] = 100 # default value
data = rep(repeat, Actions=Actions, multiArm=multiArm, **newKwargs) / newKwargs['N'] * 100
action_counter = averageOfLast(data, Actions, r, 100)[1] action_counter = averageOfLast(data, Actions, r, 100)[1]
for a in range(A): for a in range(A):
count[a, al] = action_counter[Actions[a]] count[a, al] = action_counter[Actions[a]]
...@@ -227,18 +249,19 @@ def stackBar_alpha(r, Actions, alphaList, repeat=1): # Plotting the data for ...@@ -227,18 +249,19 @@ def stackBar_alpha(r, Actions, alphaList, repeat=1): # Plotting the data for
p.append(plt.bar(ind, count[a], width, bottom=base, color=str(0.9 - 0.9 * Actions[a]))) p.append(plt.bar(ind, count[a], width, bottom=base, color=str(0.9 - 0.9 * Actions[a])))
base += count[a] base += count[a]
plt.ylabel('Number of Actions') plt.ylabel('Percentage of Actions')
plt.xlabel('Alpha, the loss fraction') if key == 'epsilon':
plt.xlabel(key + ' (' + multiArm + ')')
else:
plt.xlabel(key)
plt.title('Average Number of Actions in Round ' + str(r + 1)) plt.title('Average Number of Actions in Round ' + str(r + 1))
plt.xticks(ind, alphaList) plt.xticks(ind, alist)
# plt.yticks(np.arange(0, 81, 10)) plt.yticks(np.arange(0, 101, 10))
plt.legend(tuple([p[x][0] for x in range(A)][::-1]), tuple(Actions[::-1]), loc='lower left') plt.legend(tuple([p[x][0] for x in range(A)][::-1]), tuple(Actions[::-1]), loc='best')
plt.show() plt.show()
def main(): def main():
# Read-in or Define Parameters # Read-in or Define Parameters
N = 100 N = 100
...@@ -250,16 +273,23 @@ def main(): ...@@ -250,16 +273,23 @@ def main():
alpha = 1 alpha = 1
Actions = [0, 0.2, 0.4, 0.6, 0.8] Actions = [0, 0.2, 0.4, 0.6, 0.8]
""" """
Graph1: Number of Actions of Round r (start by 0) by Iteration Graph1: Number of Actions of Round r (start by 0) by Iteration
""" """
# Repeat game and get the averaged data
# RepeatTimes = 30 # RepeatTimes = 30
# data = rep(RepeatTimes, N, R, K, P, Actions, I, RF, alpha) # for N in [5, 10, 20, 50, 100]:
# # K = N - 1
# for R in [1, 2, 4]:
# for alpha in [0.2, 0.4, 0.6, 0.8, 1]:
# data = rep(RepeatTimes, R, Actions, I, N=N, K=K, alpha=alpha)
# for r in range(R): # for r in range(R):
# stackPlot(data, r, Actions, I, "Fully-Mixed Graph") # stackPlot(data, r, Actions, I, titleComment="N="+ str(N) + ", R=" + str(R) + ", alpha=" +str(alpha) + ", Well-Mixed graph")
for k in [2, 4, 10, 40, 90, 99]:
data = rep(repeat=30, N=100, K=k, Actions=Actions, R=1, I=I, P=P)
stackPlot(data, r=0, Iterations=I, Actions=Actions, titleComment=("K=" + str(k) + ", P=" + str(P)))
""" """
Graph2: Average contribution by K, P Graph2: Average contribution by K, P
...@@ -270,7 +300,16 @@ def main(): ...@@ -270,7 +300,16 @@ def main():
""" """
Graph3: Actions by different alpha value Graph3: Actions by different alpha value
""" """
stackBar_alpha(0, Actions, alphaList=[0, 0.2, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]) # stackBar(0, Actions, repeat=1, alpha=[0, 0.2, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1])
# stackBar(0, Actions, repeat=1, N=[5, 10, 20, 50, 100])
# stackBar(0, Actions, repeat=1, RF=2, threshold=[0.2, 0.4, 0.6, 0.8, 1])
"""
Graph4: Actions by different epsilon method + value
"""
# stackBar(0, Actions, repeat=1, multiArm='greedy', epsilon=[0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
# stackBar(0, Actions, repeat=30, multiArm='decrease', epsilon=[0.8, 0.9, 0.95, 0.98, 0.99, 0.999, 0.9999])
if __name__ == '__main__': if __name__ == '__main__':
main() main()
\ No newline at end of file
...@@ -72,8 +72,9 @@ class Graph: ...@@ -72,8 +72,9 @@ class Graph:
extraNode = np.random.randint(0, self.N) extraNode = np.random.randint(0, self.N)
while extraNode == permutation[self.N - 1]: while extraNode == permutation[self.N - 1]:
extraNode = np.random.randint(0, self.N) extraNode = np.random.randint(0, self.N)
np.append(permutation, extraNode) permutation = np.append(permutation, extraNode)
selectedNodes[extraNode] += 1 selectedNodes[extraNode] += 1
select = permutation.reshape((int(len(permutation)/2), 2)) select = permutation.reshape((int(len(permutation)/2), 2))
else: else:
while edges: # Loop when edges is not empty while edges: # Loop when edges is not empty
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment