Monte Carlo Tree Search

stevexyz · May 11, 2018 · 45683b7 · 45683b7
1 parent fc768dc
commit 45683b7
Show file tree

Hide file tree

Showing 3 changed files with 245 additions and 253 deletions.
diff --git a/ValueNetwork/MlegoMctsXboard2.py b/ValueNetwork/MlegoMctsXboard2.py
@@ -0,0 +1,234 @@
+#!/usr/bin/python3
+
+# To be verified: "eval" recalculation in backpropagation
+
+
+import time
+import Const
+from queue import *
+import Eval
+import chess.uci
+import sys
+from math import (sqrt, log)
+
+balance_constant = 3 # exploration vs exploitation balance
+move_timeframe = 3  # number of seconds for a move
+modeleval = {} # model will be loaded in protover call
+
+
+# references:
+# - http://ccg.doc.gold.ac.uk/ccg_old/teaching/ludic_computing/ludic16.pdf
+# - https://int8.io/monte-carlo-tree-search-beginners-guide/
+
+
+class MctsNode(object):
+
+    def __init__(self, board, parent=None):
+        self.parent = parent
+        self.children = []
+        self.movetochild = []
+        self.visitcount = 1
+        self.anneval = None
+        self.value = None
+        self.board = board
+        self.movetoexpand = Queue()
+        self.terminalnode = True
+        for m in self.board.generate_legal_moves():
+            self.movetoexpand.put(m)
+            self.terminalnode = False
+        self.isroot = False # root will be moved during game progress
+
+    def valuepos(self):
+        if not self.value:
+            self.anneval = modeleval.EvaluatePositionB(self.board)[0] \
+                           * (-1 if self.board.turn==chess.WHITE else 1)
+            self.value = self.anneval
+        return self.value
+
+    def boardcopy(self):
+        return self.board.copy()
+
+    def setroot(self):
+        self.isroot = True
+
+    def is_empty_unvisited_children(self):
+        return self.movetoexpand.empty()
+
+
+def traverse(parent):
+
+    parent.visitcount += 1
+
+    if parent.terminalnode:
+        print("# terminal")
+        return parent
+
+    if not parent.is_empty_unvisited_children():
+        # pick_univisted_children and create node for it
+        move = parent.movetoexpand.get()
+        parent.movetochild.append(move)
+        board = parent.boardcopy()
+        board.push(move)
+        node = MctsNode(board, parent=parent)
+        parent.children.append(node)
+        print("# picked unvisited ", move)
+        return node
+
+    # traverse child with best upper confidence bound
+    max = -float('inf')
+    bestchild = None
+    for i in range(len(parent.children)):
+        e = parent.children[i].valuepos() + \
+            balance_constant * sqrt( log( parent.children[i].visitcount ) / parent.visitcount )
+        if e > max:
+            bestchild = i
+            max = e
+    print("# picked move ", parent.movetochild[bestchild], \
+          " visited already ", parent.children[bestchild].visitcount, " times")
+
+    return traverse(parent.children[bestchild])
+
+
+def backpropagate(node, eval):
+
+    # recalculate max (to be optimized)
+    if not node.terminalnode:
+        if len(node.children)>1:
+            mx = eval
+            for n in node.children:
+                mx = max( mx, -n.valuepos()) # minus because is adversary
+            eval = mx
+            print("# backpropagated recalculation value ", mx)
+
+    print("# old node value ", node.value, " new node value ", eval)
+    node.value = eval
+
+    if not node.isroot:
+        print("# continue backpropagation")
+        backpropagate(node.parent, -eval)
+
+    return
+
+
+#=========================
+# main MlegoMctsXboard2.py
+
+if len(sys.argv) > 3:
+    print('Xboard engine string: "python3 ', sys.argv[0], '[[<modelfile>] <bestlowerconfidenceboundselection>]"')
+    exit(1)
+
+if len(sys.argv) > 2:
+    selection_mode = "best lower confidence bound selection"
+else:
+    selection_mode = "highest number of visits"
+
+forcemove = False
+
+while True:
+
+    try: line = sys.stdin.readline()
+    except KeyboardInterrupt: pass # avoid control-c breaks
+    line = line.strip(' \t\n'+chr(3))
+    parts = line.split(' ')
+
+    if parts[0]=='xboard':
+        print('tellics say Monte Carlo Tree Search experimental chess engine')
+        print('tellics say based on SlyMlego deep learning platform')
+        print('tellics say by Stefano Marago\' 2018')
+        print('tellics say https://github.com/stevexyz/SlyMlego')
+
+    elif parts[0]=='protover' and parts[1]=='2':
+        print('feature done=0')
+        sys.stdout.flush() # ensure xboard wait to activate network
+        print('feature debug=1')
+        from Eval import Eval
+        if len(sys.argv)<=1:
+            modeleval = Eval(quiet=True)
+        else:
+            modeleval = Eval(modelfile=sys.argv[1], quiet=True)
+        modeleval.EvaluatePositionB(chess.Board()) # just to startup engine
+        print('feature myname="mcts-mlego-v0.1"')
+        print('feature variants="normal"')
+        print('feature setboard=0')
+        print('feature ping=1')
+        print('feature usermove=1')
+        print('feature analyze=0')
+        print('feature pause=0')
+        print('feature nps=0')
+        print('feature memory=0')
+        print('feature sigint=0')
+        print('feature done=1')
+
+    elif parts[0]=='quit':
+        break
+
+    elif parts[0]=='new':
+        # create root node
+        root = MctsNode(chess.Board())
+
+    elif parts[0]=='ping':
+        print('pong '+parts[1])
+
+    elif parts[0]=='undo':
+        root.isroot = False
+        root = root.parent
+
+    elif parts[0]=='force':
+        forcemove = True
+
+    elif parts[0]=='usermove':
+
+        # expand unvisited children in case move is there
+        while not root.is_empty_unvisited_children():
+            move = root.movetoexpand.get()
+            root.movetochild.append(move)
+            board = root.boardcopy()
+            board.push(move)
+            node = MctsNode(board, parent=root)
+            root.children.append(node)
+            print("# created unvisited child ", move)
+
+        for i in range(len(root.children)):
+            if root.movetochild[i].uci()==parts[1]:
+                root = root.children[i]
+                break
+
+    if parts[0]=='go' or (parts[0]=='usermove' and not forcemove): # usermove already processed
+
+        forcemove = False
+
+        # until there is time expand tree with mc ucb selection approach
+        start_time = time.time()
+        root.setroot() # stop backpropagation here
+        while time.time()-start_time < move_timeframe:
+            leaf = traverse(root) # to pick unvisited/best node
+            eval = leaf.valuepos() # simulation
+            backpropagate(leaf.parent, -eval)
+
+        # pick child with highest number of visits
+        # or alternatively best lower confidence bound
+        # and update root
+        bestvalue = -float('inf')
+        bestchild = None
+        for i in range(len(root.children)):
+            if selection_mode=="best lower confidence bound selection":
+                # pick child with best lower confidence bound 
+                e = root.children[i].valuepos() - \
+                    balance_constant * sqrt( log( root.children[i].visitcount ) / root.visitcount )
+                if e > bestvalue:
+                    bestchild = i
+                    bestvalue = e
+            elif selection_mode=="highest number of visits":
+                # pick child with highest number of visits
+                if root.children[i].visitcount > bestvalue:
+                    bestchild = i
+                    bestvalue = root.children[i].visitcount
+            else:
+                raise ValueError('Selection mode "%s" not recognized' % selection_mode)
+
+        print('move %s' % root.movetochild[bestchild])
+        root = root.children[bestchild]
+
+    sys.stdout.flush()
+
+exit(0)
diff --git a/ValueNetwork/TrainModel.py b/ValueNetwork/TrainModel.py
@@ -120,47 +120,18 @@ def rmse(y_true, y_pred):
     #@modelbegin
     #----------
 
-    modelname = "Resnet3-alfa"
+    modelname = "Test-20180501"
 
-    def residual_block(y, nb_channels_in, nb_channels_out, cardinality=4):
-        shortcut = y
-        if cardinality == 1:
-            y = Conv2D(nb_channels_in, kernel_size=(8, 8), strides=(1,1), padding='same', use_bias=False)(y)
-        else:
-            assert not nb_channels_in % cardinality
-            _d = nb_channels_in // cardinality
-            groups = []
-            for j in range(cardinality):
-                group = Lambda(lambda z: z[:, :, :, j * _d:j * _d + _d])(y)
-                groups.append(Conv2D(_d, kernel_size=(8, 8), strides=(1,1), padding='same', use_bias=False)(group))
-            y = concatenate(groups)
-        y = BatchNormalization(axis=-1)(y)
-        y = ELU()(y)
-        y = add([shortcut, y])
-        y = ELU()(y)
-        return y
-
-    input_tensor = Input(shape=(8, 8, Const.NUMFEATURES))
-    net_size = 64
-    network = Conv2D(net_size, kernel_size=(8, 8), strides=(1, 1), padding='same', use_bias=False)(input_tensor)
-    network = BatchNormalization(axis=-1)(network)
-    network = ELU()(network)
-    for i in range(4):
-        network = residual_block(network, net_size, net_size)
-    network = GlobalAveragePooling2D()(network)
-    network = Dense(1)(network)
-    network = Activation("tanh")(network)
-    model = Model(inputs=[input_tensor], outputs=[network])
-
-    # # example of simple model
-    # modelname = "Test-20180430-c"
-    # input = Input(shape=((8, 8, Const.NUMFEATURES) if K.image_dim_ordering()=="tf" \
-    #            else (Const.NUMFEATURES, 8, 8)))
-    # net = Dense(8, use_bias=False, activation='relu') (input)
-    # #net = Dense(64, use_bias=False, activation='relu') (net)
-    # net = Flatten() (net)
-    # net = Dense(1, activation='tanh') (net)
-    # model = Model(inputs=input, outputs=net)
+    input = Input(shape=((8, 8, Const.NUMFEATURES) if K.image_dim_ordering()=="tf" \
+               else (Const.NUMFEATURES, 8, 8)))
+
+    net = Dense(Const.NUMFEATURES * 8, use_bias=False, activation='relu') (input)
+
+    net = Flatten() (net)
+
+    net = Dense(1, activation='tanh') (net)
+
+    model = Model(inputs=input, outputs=net)
 
     model.compile(
         loss='mean_absolute_percentage_error',