From a5e27101596c47ea1fa8675c06a609680585db2c Mon Sep 17 00:00:00 2001 From: Liyao Zhu <l.zhu34@student.unimelb.edu.au> Date: Thu, 6 Jun 2019 02:41:04 +1000 Subject: [PATCH] all files commented. Figures listed. --- agent.py | 20 +++-- game.py | 63 +++++++------- main.py | 253 ++++++++++++++++++++++++++++++++++++++++++++---------- utilis.py | 18 ---- workfile | 2 - 5 files changed, 253 insertions(+), 103 deletions(-) delete mode 100644 utilis.py delete mode 100644 workfile diff --git a/agent.py b/agent.py index 4a1fc19..6c6e1d5 100644 --- a/agent.py +++ b/agent.py @@ -35,10 +35,7 @@ class Agent: def updateReward(self, round, action, loss): """ - :param round: - :param action: - :param loss: - :return: + updates the Q-table by receiving a payoff """ newWealth = self.wealth * (1-action) * (1-loss) reward = newWealth - self.wealth @@ -54,9 +51,13 @@ class Agent: else: print("ERROR: Illegal round number") exit(2) + + """Update function""" self.qTable[round][index] += self.learnRate * ( reward + self.discount * maxNextQ - self.qTable[round][index]) - # print("QTABLE:", self.qTable) + + # if self.iteration == 999: + # print("QTABLE:", self.qTable) if round == self.R - 1: self.iteration += 1 @@ -64,14 +65,15 @@ class Agent: def chooseAction(self, roundNumber): - - """Method: Q-learning""" - + """ + Choose an action based on current round number + :return: an action (float type) + """ randomAct = False if self.multiArm == 'decrease': """Epsilon Decrease""" if np.random.uniform(0, 1) <= 1 * self.epsilon ** self.iteration: - randomAct = True + randomAct = True elif self.multiArm == 'greedy': """EPSILON GREEDY""" diff --git a/game.py b/game.py index a91475c..7a93bf8 100644 --- a/game.py +++ b/game.py @@ -1,24 +1,30 @@ +""" +This file contains an implementation of a 2-player R-round collective-risk game +model. Each player can choose to contribute part of their wealth to a common +pool to reduce the risk of a collective climate catastrophe. N players are +randomly paired with one another in a graph-based model in each iteration, and +play one game. Each player plays at least one game in one iteration. If N is +odd, a player could play multiple games, but the payoffs are averaged. + +Author: Liyao Zhu liyao@student.unimelb.edu.au +Date: Apr. 2019 +""" + + import numpy as np import agent, graph class Game: - def __init__(self, N=100, R=1, K=99, P=0, Actions=[0, 0.2, 0.4, 0.6, 0.8], I=1000, RF=0, alpha=1, epsilon=0.1, - multiArm='greedy', threshold=0.8): - # datamap = utilis.read() - # self.N = datamap['N'] # N-Player Game - # self.M = datamap['M'] # Randomly choose M players to play the game (normally 2) - # self.RF = datamap['RF'] # Parsed number of risk function chosen for the game - # self.alpha = datamap['alpha'] # Loss fraction - # self.R = datamap['R'] # Rounds of a game - + def __init__(self, N=100, R=1, K=99, P=0, Actions=[0, 0.2, 0.4, 0.6, 0.8], + I=1000, RF=0, alpha=1, epsilon=0.1,multiArm='greedy', + threshold=0.8): self.N = N self.M = 2 self.RF = RF self.alpha = alpha self.R = R - self.threshold = threshold # Threshold - # self.actions = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1] + self.threshold = threshold self.actions = Actions self.iterations = I @@ -27,11 +33,11 @@ class Game: | | P: Probability of rewiring each original edge in the graph | - | K: The number of edges(games) connected to each player. Has to be an even number. - | If 1 is desired, don't use graph model. Max k: n - 2 (for even n) | n - 1 (for odd n) - | * k can be odd as n - 1 (for even n). In cases k = n - 1 (for all n) -> a fully connected graph + | K: The number of edges(games) connected to each player. Has to be an + | even number. Max k: n - 2 (for even n) | n - 1 (for odd n) + | k can only be odd as n - 1 (for all n). In cases k = n - 1 -> a + | fully connected graph """ - # self.graph_based = True self.rewire_p = P self.rewire_k = K # assert (self.rewire_k < self.N) @@ -39,14 +45,17 @@ class Game: "Create players" self.players = [] - IW = 100 # Initial Wealth - can be input, can be variable to distinguish population - self.totalWealth = self.M * IW # subject to change + IW = 100 # Initial Wealth + self.totalWealth = self.M * IW for i in range(self.N): - self.players.append(agent.Agent(self.R, IW, self.actions, epsilon=epsilon, multiArm=multiArm)) + self.players.append(agent.Agent(self.R, IW, self.actions, + epsilon=epsilon, multiArm=multiArm)) def riskfunc(self, contribution, totalwealth): """ - the probability of collective-risk happening, given contribution + Implemented different risk functions here. + :return: the probability of disaster happening, given contribution + and total wealth """ proportion = contribution / totalwealth @@ -73,13 +82,16 @@ class Game: return "error" def play(self): - - # lastStrategyTable = np.zeros((self.N, self.R)) - # sameStrategyRounds = 0 + """ + Play a whole trial of I (1000) iterations, N (100) players games + :return: a 3d numpy matrix, recording the averaged counted number of + all actions in each round in all iterations. + """ results = np.zeros((self.iterations, self.R, len(self.actions))) """ITERATION""" for iter in range(self.iterations): + # print("GAME ITERATION", iter) actionTable = np.zeros((self.N, self.R)) strategyTable = np.zeros((self.R, self.N)) # DIFFERENT AXIS R-N @@ -107,13 +119,6 @@ class Game: for i in range(self.N): self.players[i].updateReward(r, actionTable[i][r], lossTable[i][r]) - """Strategy Stats""" - # if np.array_equal(strategyTable, lastStrategyTable): - # sameStrategyRounds += 1 - # else: - # sameStrategyRounds = 0 - # lastStrategyTable = strategyTable - for r in range(self.R): unique, count = np.unique(strategyTable[r], return_counts=True) round_counter = dict(zip(unique, count)) diff --git a/main.py b/main.py index cbaf2a5..b2db86b 100644 --- a/main.py +++ b/main.py @@ -1,3 +1,12 @@ +""" +This file contains graph methods and t-test implementations. The main +function should produce all Figures and t-test results as the thesis. + +Author: Liyao Zhu liyao@student.unimelb.edu.au +Date: Apr. 2019 +""" + + import matplotlib.pyplot as plt from matplotlib import cm from mpl_toolkits.mplot3d import Axes3D @@ -6,7 +15,10 @@ from scipy import stats import game -def stackPlot(data, r, Actions, Iterations, titleComment=""): +def stackPlot(data, r, Actions, Iterations, legendLoc='best', titleComment=""): + """ + Draw a stack plot from averaged data of round r. + """ A = len(Actions) x = range(Iterations) y = np.zeros((Iterations, A)) @@ -15,12 +27,13 @@ def stackPlot(data, r, Actions, Iterations, titleComment=""): y = np.vstack(y.T) fig, ax = plt.subplots() - ax.stackplot(x, y, labels=Actions, colors=[str(0.9 - 0.9 * x) for x in Actions]) - ax.legend(loc='best') - plt.ylabel('Number of Actions') + ax.stackplot(x, y, labels=Actions, colors=[str(0.9 - 0.9 * x) for x in + Actions]) + ax.legend(loc=legendLoc) + plt.ylabel('Percentage of each action') plt.xlabel('Time(iterations)') - title = 'Average Number of Actions in Round ' + str(r + 1) + title = 'Average Composition of Actions in Round ' + str(r + 1) if titleComment: title += "\n" + titleComment @@ -32,6 +45,11 @@ def stackPlot(data, r, Actions, Iterations, titleComment=""): def rep(repeat=30, R=1, Actions=[0, 0.2, 0.4, 0.6, 0.8], I=1000, **kwargs): + """ + Repeat the game over (30) trials and retrieve the average data of + game.play() + :return: Averaged game results, same shape as the return of game.play() + """ data = np.zeros((I, R, len(Actions))) Actions.sort() for re in range(repeat): @@ -44,6 +62,11 @@ def rep(repeat=30, R=1, Actions=[0, 0.2, 0.4, 0.6, 0.8], I=1000, **kwargs): def averageOfLast(data, Actions, N=100, r=0, lastIterations=100): + """ + Averaged contribution and action counter of last (100) iterations from the + data produced by rep() + :return: a tuple: (average contribution, a dictionary as action counter) + """ sum = 0 action_counter = {action: 0 for action in Actions} @@ -54,16 +77,22 @@ def averageOfLast(data, Actions, N=100, r=0, lastIterations=100): return (sum / (lastIterations * N), action_counter) -def graph_kp3d(Actions, Klist=[2, 4, 8, 10], Plist=[0, 0.3, 0.6, 0.9], repeat=30, N=100): +def graph_kp3d(Actions, Klist=[2, 4, 8, 10], Plist=[0, 0.3, 0.6, 0.9], + repeat=30, N=100, **kwargs): + """ + Draw a 3D graph for graph-based model, showing the effect of K and P on + average contributions. (No effect observed) + """ + K = Klist P = Plist - meanA = np.zeros((len(K), len(P))) for k in range(len(K)): for p in range(len(P)): - data = rep(repeat, K=K[k], P=P[p], Actions=Actions) # Specify other params by adding here - meanA[k][p] = averageOfLast(data, Actions, lastIterations=100, N=N)[0] # Doing the first round only -- for now + data = rep(repeat, K=K[k], P=P[p], Actions=Actions, **kwargs) + meanA[k][p] = averageOfLast(data, Actions, lastIterations=100, + N=N)[0] print("k, p, mean", k, p, meanA[k][p]) P, K = np.meshgrid(P, K) @@ -78,19 +107,26 @@ def graph_kp3d(Actions, Klist=[2, 4, 8, 10], Plist=[0, 0.3, 0.6, 0.9], repeat=30 plt.show() -def graph3d_alpha_threshold(Actions, repeat=30, AlphaList=np.arange(0, 1.01, 0.05), ThreshList=np.arange(0.1, 1.1, 0.1), N=100, **kwargs): - +def graph3d_alpha_threshold(Actions, repeat=30, + AlphaList=np.arange(0, 1.01, 0.05), + ThreshList=np.arange(0.1, 1.05, 0.05), + N=100, **kwargs): + """ + Draw two 3D graphs showing the average contribution and the average + contribution divided by threshold on two parameters: alpha and threshold + """ mean = np.zeros((len(ThreshList), len(AlphaList))) ratio_by_threshold = np.zeros((len(ThreshList), len(AlphaList))) for t in range(len(ThreshList)): for a in range(len(AlphaList)): print("Calculating... t, alpha = ", t, a) - data = rep(repeat=repeat, Actions=Actions, alpha=AlphaList[a], threshold=ThreshList[t], **kwargs) - mean[t][a] = averageOfLast(data, Actions, lastIterations=100, N=N)[0] + data = rep(repeat=repeat, Actions=Actions, alpha=AlphaList[a], + threshold=ThreshList[t], **kwargs) + mean[t][a] = averageOfLast(data, Actions, lastIterations=100, + N=N)[0] ratio_by_threshold[t] = mean[t] / ThreshList[t] - A, T = np.meshgrid(AlphaList, ThreshList) fig = plt.figure() @@ -119,11 +155,12 @@ def graph3d_alpha_threshold(Actions, repeat=30, AlphaList=np.arange(0, 1.01, 0.0 plt.show() -def stackBar(r, Actions, repeat=30, multiArm='greedy', **kwargs): # Plotting the data for round r - - # if len(kwargs) != 1: - # print("ERROR, Stack Bar Graph Expects 1 List, gets:", len(kwargs)) - # key, alist = list(kwargs.items())[0] +def stackBar(r, Actions, repeat=30, multiArm='greedy', legendLoc='best', + **kwargs): + """ + Draw a stack bar graph, to compare the composition of actions on one + parameter, specified as a list in **kwargs + """ key = -1 alist = [] @@ -152,14 +189,17 @@ def stackBar(r, Actions, repeat=30, multiArm='greedy', **kwargs): # Plotting th newKwargs['K'] = alist[al] - 1 elif 'N' not in newKwargs.keys(): newKwargs['N'] = 100 # default value - data = rep(repeat, Actions=Actions, multiArm=multiArm, **newKwargs) / newKwargs['N'] * 100 - action_counter = averageOfLast(data, Actions, r=r, lastIterations=100)[1] + data = rep(repeat, Actions=Actions, multiArm=multiArm, **newKwargs) /\ + newKwargs['N'] * 100 + action_counter = averageOfLast(data, Actions, r=r, + lastIterations=100)[1] for a in range(A): count[a, al] = action_counter[Actions[a]] base = 0 for a in range(A): - p.append(plt.bar(ind, count[a], width, bottom=base, color=str(0.9 - 0.9 * Actions[a]))) + p.append(plt.bar(ind, count[a], width, bottom=base, + color=str(0.9 - 0.9 * Actions[a]))) base += count[a] plt.ylabel('Percentage of Actions') @@ -167,14 +207,20 @@ def stackBar(r, Actions, repeat=30, multiArm='greedy', **kwargs): # Plotting th plt.xlabel(key + ' (' + multiArm + ')') else: plt.xlabel(key) - plt.title('Average Number of Actions in Round ' + str(r + 1)) + plt.title('Average Composition of Actions in Round ' + str(r + 1)) plt.xticks(ind, alist) plt.yticks(np.arange(0, 101, 10)) - plt.legend(tuple([p[x][0] for x in range(A)][::-1]), tuple(Actions[::-1]), loc='best') + plt.legend(tuple([p[x][0] for x in range(A)][::-1]), tuple(Actions[::-1]), + loc=legendLoc) plt.show() -def t_test(repeat, Actions, r=0, R=1, I=1000, lastIterations=100, N=100, byThreshold=False, **kwargs): +def t_test(repeat, Actions, r=0, R=1, I=1000, lastIterations=100, N=100, + byThreshold=False, **kwargs): + """ + Compute the p-value of average contributions on two values of one + parameter, specified as a tuple in **kwargs + """ key = -1 atuple = () for k, v in kwargs.items(): @@ -191,7 +237,8 @@ def t_test(repeat, Actions, r=0, R=1, I=1000, lastIterations=100, N=100, byThres for s in (0, 1): newArgs = {**{key: atuple[s]}, **kwargs} - samples[s] = repHist(repeat, Actions, R, r, I, lastIterations, N, **newArgs) + samples[s] = repHist(repeat, Actions, R, r, I, lastIterations, N, + **newArgs) if byThreshold: samples[s] /= newArgs["threshold"] print("Sample", s, samples[s]) @@ -199,7 +246,11 @@ def t_test(repeat, Actions, r=0, R=1, I=1000, lastIterations=100, N=100, byThres print(stats.ttest_ind(samples[0], samples[1])) -def repHist(repeat, Actions, R=1, r=0, I=1000, lastIterations=100, N=100, **kwargs): +def repHist(repeat, Actions, R=1, r=0, I=1000, lastIterations=100, N=100, + **kwargs): + """ + :return: A list of average contributions of all repetitions + """ hist = np.zeros(repeat) for re in range(repeat): # print("HistREP", re) @@ -209,10 +260,8 @@ def repHist(repeat, Actions, R=1, r=0, I=1000, lastIterations=100, N=100, **kwar return hist - - def main(): - # Read-in or Define Parameters + # Default values N = 100 R = 1 @@ -222,6 +271,76 @@ def main(): RF = 0 alpha = 1 Actions = [0, 0.2, 0.4, 0.6, 0.8] + repeat = 1 + + + """Fig. 2""" + # data = rep(repeat=repeat, N=100, alpha=0.8, R=8) + # stackPlot(data, r=0, Iterations=I, Actions=Actions, + # legendLoc='lower right') + + + """Fig. 3""" + # stackBar(0, Actions, repeat=repeat, + # alpha=[0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1], + # legendLoc='lower left') + + """Fig. 4""" + # data = rep(repeat, R=8) + # for r in [0, 1, 3]: + # stackPlot(data, r, Actions, I, legendLoc='lower right') + + """Fig. 5""" + # stackBar(0, Actions, repeat=repeat, + # threshold=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1], + # legendLoc='upper left', RF=2) + + """Fig. 6""" + # data = rep(repeat=repeat, Actions=Actions, R=1, I=I, RF=2, threshold=0.2) + # stackPlot(data, r=0, Iterations=I, Actions=Actions, + # titleComment="threshold = 0.2") + + """Fig. 7 & 8 - 3D graph comparing alpha and threshold""" + # graph3d_alpha_threshold(Actions, repeat=repeat, RF=2) + + """Fig. 9 - simple line graph comparing alpha for threshold=0.2""" + # alphaList = np.arange(0, 1.01, 0.02) + # mean = np.zeros(len(alphaList)) + # for i in range(len(alphaList)): + # data = rep(repeat, alpha=alphaList[i], threshold=0.2, RF=2) + # mean[i] = averageOfLast(data, Actions)[0] + # + # plt.plot([0.2, 0.2], '--', color='0.5') + # plt.plot(alphaList, mean, color='black') + # + # plt.ylabel('Average Contributions') + # plt.xlabel('Alpha, the loss fraction') + # plt.show() + + """Fig. 10 - Comparing composition on epsilon (greedy)""" + # stackBar(0, Actions, repeat=repeat, multiArm='greedy', + # legendLoc='lower right', + # epsilon=[0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8,0.9]) + + """Fig. 11 - Extending 0.2-greedy to 5000 iterations""" + # data = rep(repeat=repeat, Actions=Actions, multiArm='greedy', + # epsilon=0.2, I=5000) + # stackPlot(data, r=0, Iterations=5000, Actions=Actions, + # titleComment="0.2 - greedy", legendLoc='lower left') + + """Fig. 12 - Comparing composition on epsilon (decrease)""" + # stackBar(0, Actions, repeat=repeat, multiArm='decrease', + # legendLoc='lower left', + # epsilon=[0.1, 0.4, 0.8, 0.9, 0.95, 0.98, 0.99, 0.999, 0.9999]) + + """Fig. 13 - Extending 0.999-decrease to 5000 iterations""" + # data = rep(repeat=repeat, Actions=Actions, multiArm='decrease', + # epsilon=0.999, I=5000) + # stackPlot(data, r=0, Iterations=5000, Actions=Actions, + # titleComment="0.999 - decrease", legendLoc='lower left') + + + """ Graph1: Number of Actions of Round r (start by 0) by Iteration @@ -242,35 +361,56 @@ def main(): # data = rep(repeat=30, N=100, K=k, Actions=Actions, R=1, I=I, P=p) # stackPlot(data, r=0, Iterations=I, Actions=Actions, titleComment=("K=" + str(k) + ", P=" + str(p))) - - # data = rep(repeat=30, Actions=Actions, R=1, I=I, RF=2, threshold=0.3) - # stackPlot(data, r=0, Iterations=I, Actions=Actions, titleComment="threshold = 0.3") - - """ Graph2: Average contribution by K, P """ - # graph_kp3d(Actions) """ Graph3: Comparing a parameter (put in a list) """ - # stackBar(0, Actions, repeat=1, alpha=[0, 0.2, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]) # stackBar(0, Actions, repeat=1, N=[5, 10, 20, 50, 100], threshold=0.6, RF=2) - stackBar(0, Actions, repeat=30, RF=2, threshold=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]) + # stackBar(0, Actions, repeat=30, RF=2, threshold=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]) """ - Graph4: Actions by different epsilon method + value + Graph4: Actions by different epsilon with multi-arm bandit algorithms """ - # stackBar(0, Actions, repeat=1, multiArm='greedy', epsilon=[0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]) - # stackBar(0, Actions, repeat=1, multiArm='decrease', epsilon=[0.8, 0.9, 0.95, 0.98, 0.99, 0.999, 0.9999]) - """ - Graph5: Average contribution by Alpha and Threshold - """ - # graph3d_alpha_threshold(Actions, repeat=1, RF=2) + # stackBar(0, Actions, repeat=30, multiArm='greedy', legendLoc='lower right', + # epsilon=[0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], + # threshold=0.3, RF=2) + + + # data = rep(repeat=30, Actions=Actions, multiArm='greedy', epsilon=0.05) + # stackPlot(data, r=0, Iterations=I, Actions=Actions, titleComment="0.05 - greedy",legendLoc='lower left') + # + # data = rep(repeat=30, Actions=Actions, multiArm='greedy', epsilon=0.1) + # stackPlot(data, r=0, Iterations=I, Actions=Actions, titleComment="0.1 - greedy",legendLoc='lower left') + # + # data = rep(repeat=30, Actions=Actions, multiArm='greedy', epsilon=0.2) + # stackPlot(data, r=0, Iterations=I, Actions=Actions, titleComment="0.2 - greedy",legendLoc='lower left') + + + + # data = rep(repeat=30, Actions=Actions, multiArm='greedy', epsilon=0.6, I=5000) + # stackPlot(data, r=0, Iterations=5000, Actions=Actions, titleComment="0.6 - greedy", legendLoc='lower left') + + # data = rep(repeat=30, Actions=Actions, multiArm='decrease', epsilon=0.9) + # stackPlot(data, r=0, Iterations=I, Actions=Actions, titleComment="0.9 - decrease",legendLoc='lower left') + + # data = rep(repeat=30, Actions=Actions, multiArm='decrease', epsilon=0.99) + # stackPlot(data, r=0, Iterations=I, Actions=Actions, titleComment="0.99 - decrease",legendLoc='lower left') + # + # data = rep(repeat=30, Actions=Actions, multiArm='decrease', epsilon=0.999) + # stackPlot(data, r=0, Iterations=I, Actions=Actions, titleComment="0.999 - decrease",legendLoc='lower left') + + + + + + + """ T-Test @@ -293,6 +433,29 @@ def main(): # t_test(30, Actions, alpha=(1, 0.45), RF=2, threshold=0.2) #pvalue=1.3749e-11 # t_test(30, Actions, alpha=(1, 0.4), RF=2, threshold=0.2) #pvalue=3.8352e-19 + # base = repHist(30, Actions, alpha=1, RF=2, threshold=0.2) + # for alpha in np.arange(0.8, 1, 0.01): + # compare = repHist(30, Actions, alpha=alpha, RF=2, threshold=0.2) + # print("Alpha=", alpha, stats.ttest_ind(base, compare)) + + """Epsilon-decrease 0.99 with 0.1 and 0.999""" + + # base = repHist(30, Actions, multiArm='decrease', epsilon=0.99) + # for epsilon in (0.1, 0.999): + # compare = repHist(30, Actions, multiArm='decrease', epsilon=epsilon) + # print("Epsilon=", epsilon, stats.ttest_ind(base, compare)) + + """T-TEST for 0.999-decrease 5000 iterations with 0.9""" + # base = repHist(30, Actions, multiArm='decrease', epsilon=0.9) + # compare = repHist(30, Actions, multiArm='decrease', epsilon=0.999, I=5000) + # print(stats.ttest_ind(base, compare)) + + + + + + + """T-TEST K,P""" # t_test(30, Actions, K=(2, 99), P=0) #pvalue=0.4278 diff --git a/utilis.py b/utilis.py deleted file mode 100644 index 8d46677..0000000 --- a/utilis.py +++ /dev/null @@ -1,18 +0,0 @@ -file = open("workfile","r") -print(file.readline()) - - -# regular expression - - -def read(): - - - map = {'N':100, 'M':2} - - return map - - - -def combine_dict(dict1, dict2): - return {k: (dict1[k] + dict2[k]) for k in dict1} \ No newline at end of file diff --git a/workfile b/workfile deleted file mode 100644 index 07cfd59..0000000 --- a/workfile +++ /dev/null @@ -1,2 +0,0 @@ -N,100 -#Number of players in a game -- GitLab