KWARGS:multiarm, threshold

STACKBAR: N(percentage), Epsilon, STACKPLOT: Save figs Graph: debug for odd N Agent: Allow both epsilon methods

KWARGS:multiarm, threshold
03431897 · Liyao Zhu · 2cbe44dc · 03431897 · 03431897 · 03431897
Commit 03431897 authored Apr 28, 2019 by Liyao Zhu
--- a/agent.py
+++ b/agent.py
@@ -16,13 +16,13 @@ import numpy as np
 class Agent:
    def __init__(self, rounds, initialWealth, availableActions, alpha=0.1,
-                 gamma=0.9, epsilon=0.1):
+                 gamma=0.9, epsilon=0.1, multiArm='greedy'):
        self.R = rounds
        self.initialWealth = initialWealth
        self.wealth = initialWealth
        self.availableActions = availableActions
-        # self.iteration = 0
+        self.iteration = 0
        """initialise Q table to small random numbers"""
        self.qTable = np.random.rand(self.R, len(self.availableActions)) * 0.01
@@ -31,6 +31,7 @@ class Agent:
        self.learnRate = alpha
        self.discount  = gamma
        self.epsilon   = epsilon
+        self.multiArm  = multiArm
    def updateReward(self, round, action, loss):
        """
@@ -57,8 +58,8 @@ class Agent:
                reward + self.discount * maxNextQ - self.qTable[round][index])
        # print("QTABLE:", self.qTable)
-        # if round == self.R - 1:
+        if round == self.R - 1:
-        #     self.iteration += 1
+            self.iteration += 1
            # print("Player iteration +1 =", self.iteration)
@@ -66,14 +67,18 @@ class Agent:
        """Method: Q-learning"""
+        randomAct = False
+        if self.multiArm == 'decrease':
            """Epsilon Decrease"""
+            if np.random.uniform(0, 1) <= 1 * self.epsilon ** self.iteration:
+               randomAct = True
-        # if np.random.uniform(0, 1) <= 1 * self.epsilon ** self.iteration:
+        elif self.multiArm == 'greedy':
            """EPSILON GREEDY"""
            if np.random.uniform(0, 1) <= self.epsilon:
+                randomAct = True
+        if randomAct:
            return np.random.choice(self.availableActions)
        else:
            index = np.argmax(self.qTable[roundNumber])

--- a/game.py
+++ b/game.py
@@ -6,7 +6,8 @@ import utilis, agent, graph
 class game:
-    def __init__(self, N=100, R=1, K=99, P=0, Actions=[0, 0.2, 0.4, 0.6, 0.8], I=1000, RF=0, alpha=1):
+    def __init__(self, N=100, R=1, K=99, P=0, Actions=[0, 0.2, 0.4, 0.6, 0.8], I=1000, RF=0, alpha=1, epsilon=0.1,
+                 multiArm='greedy', threshold=0.8):
        # datamap = utilis.read()
        # self.N = datamap['N']  # N-Player Game
        # self.M = datamap['M']  # Randomly choose M players to play the game (normally 2)
@@ -14,13 +15,12 @@ class game:
        # self.alpha = datamap['alpha']  # Loss fraction
        # self.R = datamap['R']   # Rounds of a game
        self.N = N
        self.M = 2
        self.RF = RF
        self.alpha = alpha
        self.R = R
-        self.threshold = 0.5      # Threshold
+        self.threshold = threshold  # Threshold
        # self.actions = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
        self.actions = Actions
        self.iterations = I
@@ -40,15 +40,12 @@ class game:
        # assert (self.rewire_k < self.N)
        self.graph = graph.Graph(self.N, self.rewire_k, self.rewire_p)
        "Create players"
        self.players = []
        IW = 100  # Initial Wealth - can be input, can be variable to distinguish population
        self.totalWealth = self.M * IW  # subject to change
        for i in range(self.N):
-            self.players.append(agent.Agent(self.R, IW, self.actions))
+            self.players.append(agent.Agent(self.R, IW, self.actions, epsilon=epsilon, multiArm=multiArm))
    def riskfunc(self, contribution, totalwealth):
        """
@@ -63,22 +60,21 @@ class game:
        elif self.RF == 1:
-            if proportion >= 0.5:
+            if proportion >= self.threshold:
                return 0
            else:
                return 1
        elif self.RF == 2:
-            threshold = 0.5
-            if proportion < threshold:
+            if proportion < self.threshold:
-                return 1 - proportion / threshold
+                return 1 - proportion / self.threshold
            else:
                return 0
        return "error"
    def play(self):
        # lastStrategyTable = np.zeros((self.N, self.R))
@@ -135,9 +131,7 @@ class game:
        return results
 def stackPlot(data, r, Actions, Iterations, titleComment=""):
    A = len(Actions)
    x = range(Iterations)
    y = np.zeros((Iterations, A))
@@ -147,25 +141,30 @@ def stackPlot(data, r, Actions, Iterations, titleComment = ""):
    fig, ax = plt.subplots()
    # grays = np.arange(0, 1, (max(Actions) - min(Actions))/A)
-    ax.stackplot(x, y, labels=Actions, colors=[str(1 - x) for x in Actions])
+    ax.stackplot(x, y, labels=Actions, colors=[str(0.9 - 0.9 * x) for x in Actions])
    ax.legend(loc='lower right')
    plt.ylabel('Number of Actions')
    plt.xlabel('Time(iterations)')
-    title = 'Average Number of Actions in Round ' + str(r+1) + ')'
+    title = 'Average Number of Actions in Round ' + str(r + 1)
-    if not titleComment:
+    if titleComment:
        title += "\n" + titleComment
    plt.title(title)
+    plt.savefig(titleComment + " in round " + str(r+1) + ".jpg")
    plt.show()
-def rep(repeat, N=100, R=1, K=99, P=0, Actions=[0, 0.2, 0.4, 0.6, 0.8], I=1000, RF=0, alpha=1):
+# def rep(repeat, N=100, R=1, K=99, P=0, Actions=[0, 0.2, 0.4, 0.6, 0.8], I=1000, RF=0, alpha=1, epsilon=0.1, multiArm='greedy'):
+def rep(repeat=30, R=1, Actions=[0, 0.2, 0.4, 0.6, 0.8], I=1000, **kwargs):
    data = np.zeros((I, R, len(Actions)))
    Actions.sort()
    for re in range(repeat):
        print("REP", re)
-        g = game(N, R, K, P, Actions, I, RF, alpha)
+        g = game(R=R, Actions=Actions, I=I, **kwargs)
        result = g.play()
        data += result
    data /= repeat
@@ -183,8 +182,7 @@ def averageOfLast(data, Actions, r=0, lastIterations=100 ):
    return (sum / 100, action_counter)
+def graph_kp3d(Actions, Klist=[99], Plist=[0, 0.3, 0.6, 0.9], repeat=30):
-def graph_kp3d(Actions, Klist=[2, 4], Plist=[0.2, 0.5, 0.8], repeat=1):
    K = Klist
    P = Plist
@@ -192,7 +190,7 @@ def graph_kp3d(Actions, Klist=[2, 4], Plist=[0.2, 0.5, 0.8], repeat=1):
    for k in range(len(K)):
        for p in range(len(P)):
-            data = rep(repeat, K=K[k], P=P[p])    # Specify other params by adding here or change default of rep
+            data = rep(repeat, K=K[k], P=P[p], Actions=Actions)  # Specify other params by adding here
            meanA[k][p] = averageOfLast(data, Actions, lastIterations=100)[0]  # Doing the first round only -- for now
            print("k, p, mean", k, p, meanA[k][p])
@@ -208,16 +206,40 @@ def graph_kp3d(Actions, Klist=[2, 4], Plist=[0.2, 0.5, 0.8], repeat=1):
    plt.show()
-def stackBar_alpha(r, Actions, alphaList, repeat=1):    # Plotting the data for round r
+def stackBar(r, Actions, repeat=30, multiArm='greedy', **kwargs):  # Plotting the data for round r
+    # if len(kwargs) != 1:
+    #     print("ERROR, Stack Bar Graph Expects 1 List, gets:", len(kwargs))
+    # key, alist = list(kwargs.items())[0]
+    key = -1
+    alist = []
+    for k, v in kwargs.items():
+        if isinstance(v, list):
+            if key == -1:
+                key = k
+                alist = v
+            else:
+                print("ERROR, Stack Bar Graph Expects Only 1 List to Compare")
+                exit(4)
+    del kwargs[k]
+    print("Comparing:", key)
+    print("On:", alist)
    A = len(Actions)
    p = []
-    count = np.zeros((A, len(alphaList)))     # of each action in each iter
+    count = np.zeros((A, len(alist)))  # of each action in each iter
-    ind = np.arange(len(alphaList))
+    ind = np.arange(len(alist))
    width = 0.3
-    for al in range(len(alphaList)):
+    for al in range(len(alist)):
-        data = rep(repeat, Actions=Actions, alpha=alphaList[al])
+        newKwargs = {**{key: alist[al]}, **kwargs}
+        if key == 'N':
+            newKwargs['K'] = alist[al] - 1
+        elif 'N' not in newKwargs.keys():
+            newKwargs['N'] = 100  # default value
+        data = rep(repeat, Actions=Actions, multiArm=multiArm, **newKwargs) / newKwargs['N'] * 100
        action_counter = averageOfLast(data, Actions, r, 100)[1]
        for a in range(A):
            count[a, al] = action_counter[Actions[a]]
@@ -227,18 +249,19 @@ def stackBar_alpha(r, Actions, alphaList, repeat=1):    # Plotting the data for
        p.append(plt.bar(ind, count[a], width, bottom=base, color=str(0.9 - 0.9 * Actions[a])))
        base += count[a]
-    plt.ylabel('Number of Actions')
+    plt.ylabel('Percentage of Actions')
-    plt.xlabel('Alpha, the loss fraction')
+    if key == 'epsilon':
+        plt.xlabel(key + ' (' + multiArm + ')')
+    else:
+        plt.xlabel(key)
    plt.title('Average Number of Actions in Round ' + str(r + 1))
-    plt.xticks(ind, alphaList)
+    plt.xticks(ind, alist)
-    # plt.yticks(np.arange(0, 81, 10))
+    plt.yticks(np.arange(0, 101, 10))
-    plt.legend(tuple([p[x][0] for x in range(A)][::-1]), tuple(Actions[::-1]), loc='lower left')
+    plt.legend(tuple([p[x][0] for x in range(A)][::-1]), tuple(Actions[::-1]), loc='best')
    plt.show()
 def main():
    # Read-in or Define Parameters
    N = 100
@@ -250,16 +273,23 @@ def main():
    alpha = 1
    Actions = [0, 0.2, 0.4, 0.6, 0.8]
    """
    Graph1: Number of Actions of Round r (start by 0) by Iteration
    """
-    # Repeat game and get the averaged data
    # RepeatTimes = 30
-    # data = rep(RepeatTimes, N, R, K, P, Actions, I, RF, alpha)
+    # for N in [5, 10, 20, 50, 100]:
-    #
+    #     K = N - 1
+    #     for R in [1, 2, 4]:
+    #         for alpha in [0.2, 0.4, 0.6, 0.8, 1]:
+    #             data = rep(RepeatTimes, R, Actions, I, N=N, K=K, alpha=alpha)
    #             for r in range(R):
-    #     stackPlot(data, r, Actions, I, "Fully-Mixed Graph")
+    #                 stackPlot(data, r, Actions, I, titleComment="N="+ str(N) + ", R=" + str(R) + ", alpha=" +str(alpha) + ", Well-Mixed graph")
+    for k in [2, 4, 10, 40, 90, 99]:
+        data = rep(repeat=30, N=100, K=k, Actions=Actions, R=1, I=I, P=P)
+        stackPlot(data, r=0, Iterations=I, Actions=Actions, titleComment=("K=" + str(k) + ", P=" + str(P)))
    """
    Graph2: Average contribution by K, P
@@ -270,7 +300,16 @@ def main():
    """
    Graph3: Actions by different alpha value
    """
-    stackBar_alpha(0, Actions, alphaList=[0, 0.2, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1])
+    # stackBar(0, Actions, repeat=1, alpha=[0, 0.2, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1])
+    # stackBar(0, Actions, repeat=1, N=[5, 10, 20, 50, 100])
+    # stackBar(0, Actions, repeat=1, RF=2, threshold=[0.2, 0.4, 0.6, 0.8, 1])
+    """
+    Graph4: Actions by different epsilon method + value
+    """
+    # stackBar(0, Actions, repeat=1, multiArm='greedy', epsilon=[0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
+    # stackBar(0, Actions, repeat=30, multiArm='decrease', epsilon=[0.8, 0.9, 0.95, 0.98, 0.99, 0.999, 0.9999])
 if __name__ == '__main__':
    main()
\ No newline at end of file
--- a/graph.py
+++ b/graph.py
@@ -72,8 +72,9 @@ class Graph:
                extraNode = np.random.randint(0, self.N)
                while extraNode == permutation[self.N - 1]:
                    extraNode = np.random.randint(0, self.N)
-                np.append(permutation, extraNode)
+                permutation = np.append(permutation, extraNode)
                selectedNodes[extraNode] += 1
            select = permutation.reshape((int(len(permutation)/2), 2))
        else:
            while edges:       # Loop when edges is not empty