diff --git a/examples/random_tic_tac_toe.py b/examples/random_tic_tac_toe.py deleted file mode 100644 index d6731df..0000000 --- a/examples/random_tic_tac_toe.py +++ /dev/null @@ -1,62 +0,0 @@ -import gym -import numpy as np -import gym_tic_tac_toe -import random - - -def random_plus_middle_move(moves, p): - if ([p, 4] in moves): - m = [p, 4] - else: - m = random_move(moves, p) - return m -def random_move(moves, p): - m = random.choice(moves) - return m - -env = gym.make('tic_tac_toe-v0') - -num_episodes = 2000 -num_steps_per_episode = 10 - -collected_rewards = [] -for i in range(num_episodes): - s = env.reset() - print (s) - print ("starting new episode") - env.render() - print ("started") - total_reward = 0 - done = False - om = 1 - for j in range(num_steps_per_episode): - moves = env.move_generator() - print ("moves: ", moves) - if (not moves): - print ("out of moves") - break - if (len(moves)==1): - m = moves[0] - else: - if (om == 1): - m = random_plus_middle_move(moves, om) - else: - m = random_move(moves, om) - print ("m: ", m) -# a = env.action_space.sample() -# print (a[0]) -# #sm = s['on_move'] -# #print (sm) -# a = tuple((om, a[1])) - s1, reward, done, _ = env.step(m) - om = -om - env.render() - total_reward += reward - s = s1 - if done: - print ("game over: ", reward) - break - collected_rewards.append(total_reward) - print ("total reward ", total_reward, " after episode: ", i, ". steps: ", j+1) -print ("average score: ", sum(collected_rewards) / num_episodes) -print("#########") diff --git a/gym_tic_tac_toe/__init__.py b/gym_tic_tac_toe/__init__.py index 2de5b0b..b341feb 100644 --- a/gym_tic_tac_toe/__init__.py +++ b/gym_tic_tac_toe/__init__.py @@ -1,6 +1,6 @@ from gym.envs.registration import register register( - id='tic_tac_toe-v0', + id='tic_tac_toe-v1', entry_point='gym_tic_tac_toe.envs:TicTacToeEnv', ) diff --git a/gym_tic_tac_toe/envs/tic_tac_toe_env.py b/gym_tic_tac_toe/envs/tic_tac_toe_env.py index 57fa174..b9f148c 100644 --- a/gym_tic_tac_toe/envs/tic_tac_toe_env.py +++ b/gym_tic_tac_toe/envs/tic_tac_toe_env.py @@ -4,17 +4,18 @@ import numpy as np class TicTacToeEnv(gym.Env): metadata = {'render.modes': ['human']} + + symbols = ['O', ' ', 'X']; def __init__(self): self.action_space = spaces.Discrete(9) - self.observation_space = spaces.Discrete(512*512*2) # flattened + self.observation_space = spaces.Discrete(9*3*2) # flattened def step(self, action): done = False reward = 0 p, square = action - # p = p*2 - 1 # check move legality board = self.state['board'] proposed = board[square] @@ -54,14 +55,14 @@ class TicTacToeEnv(gym.Env): def render(self, mode='human', close=False): if close: return - print("on move: " , self.state['on_move']) + print("on move: " , self.symbols[self.state['on_move']+1]) for i in range (9): - print (self.state['board'][i], end=" ") - print() + print (self.symbols[self.state['board'][i]+1], end=" "); + if ((i % 3) == 2): + print(); def move_generator(self): moves = [] for i in range (9): - if (self.state['board'][i] == 0): p = self.state['on_move'] m = [p, i] diff --git a/random_tic_tac_toe.py b/random_tic_tac_toe.py index d6731df..20fa4a5 100644 --- a/random_tic_tac_toe.py +++ b/random_tic_tac_toe.py @@ -1,6 +1,6 @@ import gym -import numpy as np -import gym_tic_tac_toe +#import numpy as np +import gym_tic_tac_toe #noqa import random @@ -14,49 +14,59 @@ def random_move(moves, p): m = random.choice(moves) return m -env = gym.make('tic_tac_toe-v0') + +env = gym.make('tic_tac_toe-v1') num_episodes = 2000 num_steps_per_episode = 10 collected_rewards = [] +oom =1 for i in range(num_episodes): s = env.reset() - print (s) - print ("starting new episode") - env.render() - print ("started") + #print (s) + #print ("starting new episode") + #env.render() + #print ("started") total_reward = 0 done = False - om = 1 + om = oom; + #run one episode + #print("starting player: ", om); + for j in range(num_steps_per_episode): moves = env.move_generator() - print ("moves: ", moves) + #print ("moves: ", moves) if (not moves): - print ("out of moves") + #print ("out of moves") break if (len(moves)==1): + #only a single possible move m = moves[0] else: if (om == 1): m = random_plus_middle_move(moves, om) + #m = random_move(moves, om) else: m = random_move(moves, om) - print ("m: ", m) -# a = env.action_space.sample() -# print (a[0]) -# #sm = s['on_move'] -# #print (sm) -# a = tuple((om, a[1])) + #print ("m: ", m) s1, reward, done, _ = env.step(m) om = -om - env.render() + #env.render() total_reward += reward s = s1 if done: - print ("game over: ", reward) + #print ("game over: ", reward) break + env.render() + total_reward *= oom; collected_rewards.append(total_reward) - print ("total reward ", total_reward, " after episode: ", i, ". steps: ", j+1) -print ("average score: ", sum(collected_rewards) / num_episodes) + #print ("total reward", total_reward, "after episode: ", i+1, ". steps: ", j+1) + oom = -oom + +print ("after "+ str(i+1) + " episodes:"); + +average = sum(collected_rewards) / num_episodes; +print ("average score: ", average); +print("percentage: ", round(100*(average+1)/2,1)); print("#########") diff --git a/setup.py b/setup.py index b7a9fb8..86ea6e9 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup from setuptools import find_packages setup(name='gym_tic_tac_toe', - version='0.0.1', + version='0.0.2', install_requires=['gym'], url="https://github.com/nczempin/gym-tic-tac-toe", packages=find_packages()