cleanup

2017-04-04 23:07:45 +02:00 · 2017-04-04 23:07:45 +02:00 · 73495ae7e4
commit 73495ae7e4
parent 0962101988
1 changed files with 4 additions and 25 deletions
--- a/gym_tic_tac_toe/envs/tic_tac_toe_env.py
+++ b/gym_tic_tac_toe/envs/tic_tac_toe_env.py
@ -7,7 +7,7 @@ class TicTacToeEnv(gym.Env):
    def __init__(self):
        self.action_space = spaces.Discrete(9)
-        self.observation_space = spaces.Discrete(9 * 3) # flattened
+        self.observation_space = spaces.Discrete(512*512*2) # flattened
    def _step(self, action):
        done = False
        reward = 0
@ -22,11 +22,11 @@ class TicTacToeEnv(gym.Env):
        if (proposed != 0):  # wrong player, not empty
            print("illegal move ", action, ". (square occupied): ", square)
            done = True
-            reward = -2 * om  # player who did NOT make the illegal move
+            reward = -1 * om  # player who did NOT make the illegal move
        if (p != om):  # wrong player, not empty
            print("illegal move  ", action, " not on move: ", p)
            done = True
-            reward = -2 * om  # player who did NOT make the illegal move
+            reward = -1 * om  # player who did NOT make the illegal move
        else:
            board[square] = p
            self.state['on_move'] = -p
@ -40,7 +40,7 @@ class TicTacToeEnv(gym.Env):
                done = True
                break
-        return np.array(self.state), reward, done, {}
+        return self.state, reward, done, {}
    def _reset(self):
        self.state = {}
        self.state['board'] = [0, 0, 0, 0, 0, 0, 0, 0, 0]
@ -53,27 +53,6 @@ class TicTacToeEnv(gym.Env):
        for i in range (9):
            print (self.state['board'][i], end=" ")
        print()
    def hash_ttt(state):
        #of course this is just for the upper bound;
        #we should really take advantage of the redundancies
        # to reduce the number of states to 765 for the board
        # and who is on move really is implicit in how many
        # squares are occupied
        retval = 0
        low9 = 0
        high9 = 0
        lowmult = 2
        highmult = 1024
        board = state['board']
        if (state['on_move'] == -1):
            retval = 1
        for i in range(9):
            if (board[i] != 0):
                retval += lowmult #todo bitwise logic in python how?
                if (board[i] < 0):
                    retval += highmult
            lowmult *=2
            highmult *= 2
    def move_generator(self):
        moves = []
        for i in range (9):