cleanup
This commit is contained in:
parent
0962101988
commit
73495ae7e4
@ -7,7 +7,7 @@ class TicTacToeEnv(gym.Env):
|
||||
|
||||
def __init__(self):
|
||||
self.action_space = spaces.Discrete(9)
|
||||
self.observation_space = spaces.Discrete(9 * 3) # flattened
|
||||
self.observation_space = spaces.Discrete(512*512*2) # flattened
|
||||
def _step(self, action):
|
||||
done = False
|
||||
reward = 0
|
||||
@ -22,11 +22,11 @@ class TicTacToeEnv(gym.Env):
|
||||
if (proposed != 0): # wrong player, not empty
|
||||
print("illegal move ", action, ". (square occupied): ", square)
|
||||
done = True
|
||||
reward = -2 * om # player who did NOT make the illegal move
|
||||
reward = -1 * om # player who did NOT make the illegal move
|
||||
if (p != om): # wrong player, not empty
|
||||
print("illegal move ", action, " not on move: ", p)
|
||||
done = True
|
||||
reward = -2 * om # player who did NOT make the illegal move
|
||||
reward = -1 * om # player who did NOT make the illegal move
|
||||
else:
|
||||
board[square] = p
|
||||
self.state['on_move'] = -p
|
||||
@ -40,7 +40,7 @@ class TicTacToeEnv(gym.Env):
|
||||
done = True
|
||||
break
|
||||
|
||||
return np.array(self.state), reward, done, {}
|
||||
return self.state, reward, done, {}
|
||||
def _reset(self):
|
||||
self.state = {}
|
||||
self.state['board'] = [0, 0, 0, 0, 0, 0, 0, 0, 0]
|
||||
@ -53,27 +53,6 @@ class TicTacToeEnv(gym.Env):
|
||||
for i in range (9):
|
||||
print (self.state['board'][i], end=" ")
|
||||
print()
|
||||
def hash_ttt(state):
|
||||
#of course this is just for the upper bound;
|
||||
#we should really take advantage of the redundancies
|
||||
# to reduce the number of states to 765 for the board
|
||||
# and who is on move really is implicit in how many
|
||||
# squares are occupied
|
||||
retval = 0
|
||||
low9 = 0
|
||||
high9 = 0
|
||||
lowmult = 2
|
||||
highmult = 1024
|
||||
board = state['board']
|
||||
if (state['on_move'] == -1):
|
||||
retval = 1
|
||||
for i in range(9):
|
||||
if (board[i] != 0):
|
||||
retval += lowmult #todo bitwise logic in python how?
|
||||
if (board[i] < 0):
|
||||
retval += highmult
|
||||
lowmult *=2
|
||||
highmult *= 2
|
||||
def move_generator(self):
|
||||
moves = []
|
||||
for i in range (9):
|
||||
|
Loading…
Reference in New Issue
Block a user