increase penalty for illegal moves
This commit is contained in:
parent
46f077ef1f
commit
17b9bb7486
@ -22,11 +22,11 @@ class TicTacToeEnv(gym.Env):
|
|||||||
if (proposed != 0): # wrong player, not empty
|
if (proposed != 0): # wrong player, not empty
|
||||||
print("illegal move ", action, ". (square occupied): ", square)
|
print("illegal move ", action, ". (square occupied): ", square)
|
||||||
done = True
|
done = True
|
||||||
reward = -om # player who did NOT make the illegal move
|
reward = -2 * om # player who did NOT make the illegal move
|
||||||
if (p != om): # wrong player, not empty
|
if (p != om): # wrong player, not empty
|
||||||
print("illegal move ", action, " not on move: ", p)
|
print("illegal move ", action, " not on move: ", p)
|
||||||
done = True
|
done = True
|
||||||
reward = -om # player who did NOT make the illegal move
|
reward = -2 * om # player who did NOT make the illegal move
|
||||||
else:
|
else:
|
||||||
self.state['board'][square] = p
|
self.state['board'][square] = p
|
||||||
self.state['on_move'] = -p
|
self.state['on_move'] = -p
|
||||||
@ -57,8 +57,6 @@ class TicTacToeEnv(gym.Env):
|
|||||||
for i in range (9):
|
for i in range (9):
|
||||||
if (self.state.state['board'][i]== 0):
|
if (self.state.state['board'][i]== 0):
|
||||||
p = self.state.on_move
|
p = self.state.on_move
|
||||||
if (p == 2):
|
|
||||||
p = -1
|
|
||||||
m = [p, i]
|
m = [p, i]
|
||||||
moves.append(m)
|
moves.append(m)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user