-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathnim.py
131 lines (116 loc) · 5.42 KB
/
nim.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
'''
MINIMAX NIM GAME CLASS AND SOLVER
AUTHOR: Dhruv Joshi
---------------------------------
Based on concepts learned in CS221 - Artificial Intelligence at Stanford (Fall 2016)
'''
class NimGame:
# --------------------------------------------------------- #
# The game definition class #
# --------------------------------------------------------- #
# A nim game is a type of zero sum game played by 2 players #
# Read more at https://en.wikipedia.org/wiki/Nim #
# This class models the game and the methods allow the #
# evaluation of state space for different type of solvers #
# --------------------------------------------------------- #
def __init__(self, n):
# initialize the game - only the number 'n' is required
# which tells us how many objects are there in a heap
# this is the variant of the "subtraction game"
self.n = n
def startState(self):
# Starting state - there are n objects in a single heap
return self.n
def isEnd(self, state):
# the game has reached the end state if there are no objects
return True if state == 0 else False
def utility(self, state, player):
# The utility is +inf for the player having won
# and -inf for the adversary having won
if state == 0:
if player == 1:
# you lost
return float('-inf')
else:
# you won!
return float('+inf')
def actions(self, state):
# the possible actions at a particular state
if state >= 3:
return [1,2,3]
return range(1, state+1)
def successor(self, state, action):
# The (distinct) successor state if an action
# is taken at a particular state
if action > state:
return 0
return state - action
def minimaxPolicy(game, state, player):
# --------------------------------------------------------- #
# The minimax solver in state space for a general game #
# --------------------------------------------------------- #
# The minimax solver assumes a zero-sum 2-player game being #
# played, between a human "player" and an AI "adversary" #
# It returns optimal (value, action) at a state. #
# It needs to recursively compute the value, therefore #
# define the recursion function within this function. #
#
# Read more about minimax here: #
# https://en.wikipedia.org/wiki/Minimax #
# --------------------------------------------------------- #
def recurse(state, player):
# ----------------------------------------------------------------- #
# The recursive function checks the sub-tree of a particular state #
# and minimizes expected value for the player while maximizing the #
# expected value of a player's move, i.e. assuming the player is #
# behaving using an optimal strategy
# ----------------------------------------------------------------- #
# start with the base case
if game.isEnd(state) == True:
# return the utility of the state, no more actions that can be taken
return (game.utility(state, player), None)
if cache.has_key((state, player)):
return cache[(state, player)]
# otherwise, look at all possible actions in the game
# toggle the player each time it is called (since it is a 2-player game)
# we thus represent the present player by 1 or -1 to ease toggling
choices = [(recurse(game.successor(state, action), -1*player)[0], action) for action in game.actions(state)]
# return the max of the choice of (utility, optimalAction) if it is the agent and min if it is the opponent
# the min/max function will only be applied to hte first element of the tuple in the list "choices"
# This is because we want to maximize/minimize over the expected utility, NOT the action
# But we want the action as well.
if player == +1:
val = max(choices)
else:
val = min(choices)
cache[(state, player)] = val
return val
# Recurse over the choices for the state, return the second argument of returned tuple
# The solver will go all the way till the end of the tree to choose the optimal next action
#
# Returned values:
# value = the optimal value (utility of the end state reached by both players taking a series of optimal actions)
# action = the optimal action for the adversary to take next to minimize the expected utlity of the player
value, action = recurse(state, player)
return (value, action)
# cache values globally to speed up the minimax recursion
# dynamic programming ftw
cache = {}
if __name__ == "__main__":
# play the nim game on terminal for fun
game = NimGame(25)
state = game.startState()
while (state > 0):
action = 0
print "current state is", state
while (action not in [1,2,3]) and state - action >= 0:
action = int(raw_input("Choose action from 1,2,3:"))
state -= action
if state == 0:
print "You won!"
break
val, act = minimaxPolicy(game, state, 1)
state -= act
print "computer moves state to", state
if state == 0:
print "You Lost!"