This commit is contained in:
Nicolai Czempin 2017-04-04 23:07:45 +02:00
parent 0962101988
commit 73495ae7e4

View File

@ -7,7 +7,7 @@ class TicTacToeEnv(gym.Env):
def __init__(self): def __init__(self):
self.action_space = spaces.Discrete(9) self.action_space = spaces.Discrete(9)
self.observation_space = spaces.Discrete(9 * 3) # flattened self.observation_space = spaces.Discrete(512*512*2) # flattened
def _step(self, action): def _step(self, action):
done = False done = False
reward = 0 reward = 0
@ -22,11 +22,11 @@ class TicTacToeEnv(gym.Env):
if (proposed != 0): # wrong player, not empty if (proposed != 0): # wrong player, not empty
print("illegal move ", action, ". (square occupied): ", square) print("illegal move ", action, ". (square occupied): ", square)
done = True done = True
reward = -2 * om # player who did NOT make the illegal move reward = -1 * om # player who did NOT make the illegal move
if (p != om): # wrong player, not empty if (p != om): # wrong player, not empty
print("illegal move ", action, " not on move: ", p) print("illegal move ", action, " not on move: ", p)
done = True done = True
reward = -2 * om # player who did NOT make the illegal move reward = -1 * om # player who did NOT make the illegal move
else: else:
board[square] = p board[square] = p
self.state['on_move'] = -p self.state['on_move'] = -p
@ -40,7 +40,7 @@ class TicTacToeEnv(gym.Env):
done = True done = True
break break
return np.array(self.state), reward, done, {} return self.state, reward, done, {}
def _reset(self): def _reset(self):
self.state = {} self.state = {}
self.state['board'] = [0, 0, 0, 0, 0, 0, 0, 0, 0] self.state['board'] = [0, 0, 0, 0, 0, 0, 0, 0, 0]
@ -53,27 +53,6 @@ class TicTacToeEnv(gym.Env):
for i in range (9): for i in range (9):
print (self.state['board'][i], end=" ") print (self.state['board'][i], end=" ")
print() print()
def hash_ttt(state):
#of course this is just for the upper bound;
#we should really take advantage of the redundancies
# to reduce the number of states to 765 for the board
# and who is on move really is implicit in how many
# squares are occupied
retval = 0
low9 = 0
high9 = 0
lowmult = 2
highmult = 1024
board = state['board']
if (state['on_move'] == -1):
retval = 1
for i in range(9):
if (board[i] != 0):
retval += lowmult #todo bitwise logic in python how?
if (board[i] < 0):
retval += highmult
lowmult *=2
highmult *= 2
def move_generator(self): def move_generator(self):
moves = [] moves = []
for i in range (9): for i in range (9):