Add a hash function for our state

2017-04-04 21:47:54 +02:00 · 2017-04-04 21:47:54 +02:00 · 0962101988
commit 0962101988
parent d5347cafc7
1 changed files with 23 additions and 2 deletions
--- a/gym_tic_tac_toe/envs/tic_tac_toe_env.py
+++ b/gym_tic_tac_toe/envs/tic_tac_toe_env.py
@ -6,8 +6,8 @@ class TicTacToeEnv(gym.Env):
    metadata = {'render.modes': ['human']}
    def __init__(self):
-        self.action_space = spaces.Tuple((spaces.Discrete(2), spaces.Discrete(9)))
+        self.action_space = spaces.Discrete(9)
-        self.observation_space = spaces.Discrete(3)  # Tuple(spaces.Discrete(3), spaces.Discrete(9))
+        self.observation_space = spaces.Discrete(9 * 3) # flattened
    def _step(self, action):
        done = False
        reward = 0
@ -53,6 +53,27 @@ class TicTacToeEnv(gym.Env):
        for i in range (9):
            print (self.state['board'][i], end=" ")
        print()
    def hash_ttt(state):
        #of course this is just for the upper bound;
        #we should really take advantage of the redundancies
        # to reduce the number of states to 765 for the board
        # and who is on move really is implicit in how many
        # squares are occupied
        retval = 0
        low9 = 0
        high9 = 0
        lowmult = 2
        highmult = 1024
        board = state['board']
        if (state['on_move'] == -1):
            retval = 1
        for i in range(9):
            if (board[i] != 0):
                retval += lowmult #todo bitwise logic in python how?
                if (board[i] < 0):
                    retval += highmult
            lowmult *=2
            highmult *= 2
    def move_generator(self):
        moves = []
        for i in range (9):