Typo in win check
This commit is contained in:
parent
a209d02d8b
commit
acd515bc9a
@ -16,9 +16,9 @@ class TicTacToeEnv(gym.Env):
|
||||
|
||||
# p = p*2 - 1
|
||||
# check move legality
|
||||
proposed = self.state['board'][square]
|
||||
board = self.state['board']
|
||||
proposed = board[square]
|
||||
om = self.state['on_move']
|
||||
print ("on move: ", om)
|
||||
if (proposed != 0): # wrong player, not empty
|
||||
print("illegal move ", action, ". (square occupied): ", square)
|
||||
done = True
|
||||
@ -28,16 +28,17 @@ class TicTacToeEnv(gym.Env):
|
||||
done = True
|
||||
reward = -2 * om # player who did NOT make the illegal move
|
||||
else:
|
||||
self.state['board'][square] = p
|
||||
board[square] = p
|
||||
self.state['on_move'] = -p
|
||||
|
||||
# check game over
|
||||
for i in range(3):
|
||||
if (self.state['board'][i * 3] == p and self.state['board'][i*3 + 1] == p and self.state['board'][i*3+2] == 2):
|
||||
# horizontals and verticals
|
||||
if ((board[i * 3] == p and board[i * 3 + 1] == p and board[i * 3 + 2 ] == p)
|
||||
or (board[i + 0] == p and board[i + 3] == p and board[i + 6] == p)):
|
||||
reward = p
|
||||
done = True
|
||||
break
|
||||
#TODO other cases
|
||||
|
||||
return np.array(self.state), reward, done, {}
|
||||
def _reset(self):
|
||||
|
Loading…
Reference in New Issue
Block a user