cleanup
This commit is contained in:
parent
0962101988
commit
73495ae7e4
@ -7,7 +7,7 @@ class TicTacToeEnv(gym.Env):
|
|||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.action_space = spaces.Discrete(9)
|
self.action_space = spaces.Discrete(9)
|
||||||
self.observation_space = spaces.Discrete(9 * 3) # flattened
|
self.observation_space = spaces.Discrete(512*512*2) # flattened
|
||||||
def _step(self, action):
|
def _step(self, action):
|
||||||
done = False
|
done = False
|
||||||
reward = 0
|
reward = 0
|
||||||
@ -22,11 +22,11 @@ class TicTacToeEnv(gym.Env):
|
|||||||
if (proposed != 0): # wrong player, not empty
|
if (proposed != 0): # wrong player, not empty
|
||||||
print("illegal move ", action, ". (square occupied): ", square)
|
print("illegal move ", action, ". (square occupied): ", square)
|
||||||
done = True
|
done = True
|
||||||
reward = -2 * om # player who did NOT make the illegal move
|
reward = -1 * om # player who did NOT make the illegal move
|
||||||
if (p != om): # wrong player, not empty
|
if (p != om): # wrong player, not empty
|
||||||
print("illegal move ", action, " not on move: ", p)
|
print("illegal move ", action, " not on move: ", p)
|
||||||
done = True
|
done = True
|
||||||
reward = -2 * om # player who did NOT make the illegal move
|
reward = -1 * om # player who did NOT make the illegal move
|
||||||
else:
|
else:
|
||||||
board[square] = p
|
board[square] = p
|
||||||
self.state['on_move'] = -p
|
self.state['on_move'] = -p
|
||||||
@ -40,7 +40,7 @@ class TicTacToeEnv(gym.Env):
|
|||||||
done = True
|
done = True
|
||||||
break
|
break
|
||||||
|
|
||||||
return np.array(self.state), reward, done, {}
|
return self.state, reward, done, {}
|
||||||
def _reset(self):
|
def _reset(self):
|
||||||
self.state = {}
|
self.state = {}
|
||||||
self.state['board'] = [0, 0, 0, 0, 0, 0, 0, 0, 0]
|
self.state['board'] = [0, 0, 0, 0, 0, 0, 0, 0, 0]
|
||||||
@ -53,27 +53,6 @@ class TicTacToeEnv(gym.Env):
|
|||||||
for i in range (9):
|
for i in range (9):
|
||||||
print (self.state['board'][i], end=" ")
|
print (self.state['board'][i], end=" ")
|
||||||
print()
|
print()
|
||||||
def hash_ttt(state):
|
|
||||||
#of course this is just for the upper bound;
|
|
||||||
#we should really take advantage of the redundancies
|
|
||||||
# to reduce the number of states to 765 for the board
|
|
||||||
# and who is on move really is implicit in how many
|
|
||||||
# squares are occupied
|
|
||||||
retval = 0
|
|
||||||
low9 = 0
|
|
||||||
high9 = 0
|
|
||||||
lowmult = 2
|
|
||||||
highmult = 1024
|
|
||||||
board = state['board']
|
|
||||||
if (state['on_move'] == -1):
|
|
||||||
retval = 1
|
|
||||||
for i in range(9):
|
|
||||||
if (board[i] != 0):
|
|
||||||
retval += lowmult #todo bitwise logic in python how?
|
|
||||||
if (board[i] < 0):
|
|
||||||
retval += highmult
|
|
||||||
lowmult *=2
|
|
||||||
highmult *= 2
|
|
||||||
def move_generator(self):
|
def move_generator(self):
|
||||||
moves = []
|
moves = []
|
||||||
for i in range (9):
|
for i in range (9):
|
||||||
|
Loading…
Reference in New Issue
Block a user