From 73495ae7e41b568586981167e5cd58238765f399 Mon Sep 17 00:00:00 2001
From: Nicolai Czempin <nicolai.czempin@gmail.com>
Date: Tue, 4 Apr 2017 23:07:45 +0200
Subject: [PATCH] cleanup

---
 gym_tic_tac_toe/envs/tic_tac_toe_env.py | 29 ++++---------------------
 1 file changed, 4 insertions(+), 25 deletions(-)

diff --git a/gym_tic_tac_toe/envs/tic_tac_toe_env.py b/gym_tic_tac_toe/envs/tic_tac_toe_env.py
index f4be54d..71007b3 100644
--- a/gym_tic_tac_toe/envs/tic_tac_toe_env.py
+++ b/gym_tic_tac_toe/envs/tic_tac_toe_env.py
@@ -7,7 +7,7 @@ class TicTacToeEnv(gym.Env):
 
     def __init__(self):
         self.action_space = spaces.Discrete(9)
-        self.observation_space = spaces.Discrete(9 * 3) # flattened
+        self.observation_space = spaces.Discrete(512*512*2) # flattened
     def _step(self, action):
         done = False
         reward = 0
@@ -22,11 +22,11 @@ class TicTacToeEnv(gym.Env):
         if (proposed != 0):  # wrong player, not empty
             print("illegal move ", action, ". (square occupied): ", square)
             done = True
-            reward = -2 * om  # player who did NOT make the illegal move
+            reward = -1 * om  # player who did NOT make the illegal move
         if (p != om):  # wrong player, not empty
             print("illegal move  ", action, " not on move: ", p)
             done = True
-            reward = -2 * om  # player who did NOT make the illegal move
+            reward = -1 * om  # player who did NOT make the illegal move
         else:
             board[square] = p
             self.state['on_move'] = -p
@@ -40,7 +40,7 @@ class TicTacToeEnv(gym.Env):
                 done = True
                 break
                 
-        return np.array(self.state), reward, done, {}
+        return self.state, reward, done, {}
     def _reset(self):
         self.state = {}
         self.state['board'] = [0, 0, 0, 0, 0, 0, 0, 0, 0]
@@ -53,27 +53,6 @@ class TicTacToeEnv(gym.Env):
         for i in range (9):
             print (self.state['board'][i], end=" ")
         print()
-    def hash_ttt(state):
-        #of course this is just for the upper bound;
-        #we should really take advantage of the redundancies
-        # to reduce the number of states to 765 for the board
-        # and who is on move really is implicit in how many
-        # squares are occupied
-        retval = 0
-        low9 = 0
-        high9 = 0
-        lowmult = 2
-        highmult = 1024
-        board = state['board']
-        if (state['on_move'] == -1):
-            retval = 1
-        for i in range(9):
-            if (board[i] != 0):
-                retval += lowmult #todo bitwise logic in python how?
-                if (board[i] < 0):
-                    retval += highmult
-            lowmult *=2
-            highmult *= 2
     def move_generator(self):
         moves = []
         for i in range (9):