Make the board prettier, provide some additional stats

This commit is contained in:
Nicolai Czempin 2018-12-16 21:30:13 +01:00
parent 542dc1bbe9
commit 4f0958a97f
5 changed files with 39 additions and 90 deletions

View File

@ -1,62 +0,0 @@
import gym
import numpy as np
import gym_tic_tac_toe
import random
def random_plus_middle_move(moves, p):
if ([p, 4] in moves):
m = [p, 4]
else:
m = random_move(moves, p)
return m
def random_move(moves, p):
m = random.choice(moves)
return m
env = gym.make('tic_tac_toe-v0')
num_episodes = 2000
num_steps_per_episode = 10
collected_rewards = []
for i in range(num_episodes):
s = env.reset()
print (s)
print ("starting new episode")
env.render()
print ("started")
total_reward = 0
done = False
om = 1
for j in range(num_steps_per_episode):
moves = env.move_generator()
print ("moves: ", moves)
if (not moves):
print ("out of moves")
break
if (len(moves)==1):
m = moves[0]
else:
if (om == 1):
m = random_plus_middle_move(moves, om)
else:
m = random_move(moves, om)
print ("m: ", m)
# a = env.action_space.sample()
# print (a[0])
# #sm = s['on_move']
# #print (sm)
# a = tuple((om, a[1]))
s1, reward, done, _ = env.step(m)
om = -om
env.render()
total_reward += reward
s = s1
if done:
print ("game over: ", reward)
break
collected_rewards.append(total_reward)
print ("total reward ", total_reward, " after episode: ", i, ". steps: ", j+1)
print ("average score: ", sum(collected_rewards) / num_episodes)
print("#########")

View File

@ -1,6 +1,6 @@
from gym.envs.registration import register
register(
id='tic_tac_toe-v0',
id='tic_tac_toe-v1',
entry_point='gym_tic_tac_toe.envs:TicTacToeEnv',
)

View File

@ -5,16 +5,17 @@ import numpy as np
class TicTacToeEnv(gym.Env):
metadata = {'render.modes': ['human']}
symbols = ['O', ' ', 'X'];
def __init__(self):
self.action_space = spaces.Discrete(9)
self.observation_space = spaces.Discrete(512*512*2) # flattened
self.observation_space = spaces.Discrete(9*3*2) # flattened
def step(self, action):
done = False
reward = 0
p, square = action
# p = p*2 - 1
# check move legality
board = self.state['board']
proposed = board[square]
@ -54,14 +55,14 @@ class TicTacToeEnv(gym.Env):
def render(self, mode='human', close=False):
if close:
return
print("on move: " , self.state['on_move'])
print("on move: " , self.symbols[self.state['on_move']+1])
for i in range (9):
print (self.state['board'][i], end=" ")
print()
print (self.symbols[self.state['board'][i]+1], end=" ");
if ((i % 3) == 2):
print();
def move_generator(self):
moves = []
for i in range (9):
if (self.state['board'][i] == 0):
p = self.state['on_move']
m = [p, i]

View File

@ -1,6 +1,6 @@
import gym
import numpy as np
import gym_tic_tac_toe
#import numpy as np
import gym_tic_tac_toe #noqa
import random
@ -14,49 +14,59 @@ def random_move(moves, p):
m = random.choice(moves)
return m
env = gym.make('tic_tac_toe-v0')
env = gym.make('tic_tac_toe-v1')
num_episodes = 2000
num_steps_per_episode = 10
collected_rewards = []
oom =1
for i in range(num_episodes):
s = env.reset()
print (s)
print ("starting new episode")
env.render()
print ("started")
#print (s)
#print ("starting new episode")
#env.render()
#print ("started")
total_reward = 0
done = False
om = 1
om = oom;
#run one episode
#print("starting player: ", om);
for j in range(num_steps_per_episode):
moves = env.move_generator()
print ("moves: ", moves)
#print ("moves: ", moves)
if (not moves):
print ("out of moves")
#print ("out of moves")
break
if (len(moves)==1):
#only a single possible move
m = moves[0]
else:
if (om == 1):
m = random_plus_middle_move(moves, om)
#m = random_move(moves, om)
else:
m = random_move(moves, om)
print ("m: ", m)
# a = env.action_space.sample()
# print (a[0])
# #sm = s['on_move']
# #print (sm)
# a = tuple((om, a[1]))
#print ("m: ", m)
s1, reward, done, _ = env.step(m)
om = -om
env.render()
#env.render()
total_reward += reward
s = s1
if done:
print ("game over: ", reward)
#print ("game over: ", reward)
break
env.render()
total_reward *= oom;
collected_rewards.append(total_reward)
print ("total reward ", total_reward, " after episode: ", i, ". steps: ", j+1)
print ("average score: ", sum(collected_rewards) / num_episodes)
#print ("total reward", total_reward, "after episode: ", i+1, ". steps: ", j+1)
oom = -oom
print ("after "+ str(i+1) + " episodes:");
average = sum(collected_rewards) / num_episodes;
print ("average score: ", average);
print("percentage: ", round(100*(average+1)/2,1));
print("#########")

View File

@ -2,7 +2,7 @@ from setuptools import setup
from setuptools import find_packages
setup(name='gym_tic_tac_toe',
version='0.0.1',
version='0.0.2',
install_requires=['gym'],
url="https://github.com/nczempin/gym-tic-tac-toe",
packages=find_packages()