Typo in win check
This commit is contained in:
parent
a209d02d8b
commit
acd515bc9a
@ -7,7 +7,7 @@ class TicTacToeEnv(gym.Env):
|
|||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.action_space = spaces.Tuple((spaces.Discrete(2), spaces.Discrete(9)))
|
self.action_space = spaces.Tuple((spaces.Discrete(2), spaces.Discrete(9)))
|
||||||
self.observation_space = spaces.Discrete(3)#Tuple(spaces.Discrete(3), spaces.Discrete(9))
|
self.observation_space = spaces.Discrete(3) # Tuple(spaces.Discrete(3), spaces.Discrete(9))
|
||||||
def _step(self, action):
|
def _step(self, action):
|
||||||
done = False
|
done = False
|
||||||
reward = 0
|
reward = 0
|
||||||
@ -16,9 +16,9 @@ class TicTacToeEnv(gym.Env):
|
|||||||
|
|
||||||
# p = p*2 - 1
|
# p = p*2 - 1
|
||||||
# check move legality
|
# check move legality
|
||||||
proposed = self.state['board'][square]
|
board = self.state['board']
|
||||||
|
proposed = board[square]
|
||||||
om = self.state['on_move']
|
om = self.state['on_move']
|
||||||
print ("on move: ", om)
|
|
||||||
if (proposed != 0): # wrong player, not empty
|
if (proposed != 0): # wrong player, not empty
|
||||||
print("illegal move ", action, ". (square occupied): ", square)
|
print("illegal move ", action, ". (square occupied): ", square)
|
||||||
done = True
|
done = True
|
||||||
@ -28,21 +28,22 @@ class TicTacToeEnv(gym.Env):
|
|||||||
done = True
|
done = True
|
||||||
reward = -2 * om # player who did NOT make the illegal move
|
reward = -2 * om # player who did NOT make the illegal move
|
||||||
else:
|
else:
|
||||||
self.state['board'][square] = p
|
board[square] = p
|
||||||
self.state['on_move'] = -p
|
self.state['on_move'] = -p
|
||||||
|
|
||||||
# check game over
|
# check game over
|
||||||
for i in range(3):
|
for i in range(3):
|
||||||
if (self.state['board'][i * 3] == p and self.state['board'][i*3 + 1] == p and self.state['board'][i*3+2] == 2):
|
# horizontals and verticals
|
||||||
|
if ((board[i * 3] == p and board[i * 3 + 1] == p and board[i * 3 + 2 ] == p)
|
||||||
|
or (board[i + 0] == p and board[i + 3] == p and board[i + 6] == p)):
|
||||||
reward = p
|
reward = p
|
||||||
done = True
|
done = True
|
||||||
break
|
break
|
||||||
#TODO other cases
|
|
||||||
|
|
||||||
return np.array(self.state), reward, done, {}
|
return np.array(self.state), reward, done, {}
|
||||||
def _reset(self):
|
def _reset(self):
|
||||||
self.state = {}
|
self.state = {}
|
||||||
self.state['board'] = [0,0,0,0,0,0,0,0,0]
|
self.state['board'] = [0, 0, 0, 0, 0, 0, 0, 0, 0]
|
||||||
self.state['on_move'] = 1
|
self.state['on_move'] = 1
|
||||||
return self.state
|
return self.state
|
||||||
def _render(self, mode='human', close=False):
|
def _render(self, mode='human', close=False):
|
||||||
@ -56,7 +57,7 @@ class TicTacToeEnv(gym.Env):
|
|||||||
moves = []
|
moves = []
|
||||||
for i in range (9):
|
for i in range (9):
|
||||||
|
|
||||||
if (self.state['board'][i]== 0):
|
if (self.state['board'][i] == 0):
|
||||||
p = self.state['on_move']
|
p = self.state['on_move']
|
||||||
m = [p, i]
|
m = [p, i]
|
||||||
moves.append(m)
|
moves.append(m)
|
||||||
|
Loading…
Reference in New Issue
Block a user