Make the board prettier, provide some additional stats
This commit is contained in:
parent
542dc1bbe9
commit
4f0958a97f
@ -1,62 +0,0 @@
|
|||||||
import gym
|
|
||||||
import numpy as np
|
|
||||||
import gym_tic_tac_toe
|
|
||||||
import random
|
|
||||||
|
|
||||||
|
|
||||||
def random_plus_middle_move(moves, p):
|
|
||||||
if ([p, 4] in moves):
|
|
||||||
m = [p, 4]
|
|
||||||
else:
|
|
||||||
m = random_move(moves, p)
|
|
||||||
return m
|
|
||||||
def random_move(moves, p):
|
|
||||||
m = random.choice(moves)
|
|
||||||
return m
|
|
||||||
|
|
||||||
env = gym.make('tic_tac_toe-v0')
|
|
||||||
|
|
||||||
num_episodes = 2000
|
|
||||||
num_steps_per_episode = 10
|
|
||||||
|
|
||||||
collected_rewards = []
|
|
||||||
for i in range(num_episodes):
|
|
||||||
s = env.reset()
|
|
||||||
print (s)
|
|
||||||
print ("starting new episode")
|
|
||||||
env.render()
|
|
||||||
print ("started")
|
|
||||||
total_reward = 0
|
|
||||||
done = False
|
|
||||||
om = 1
|
|
||||||
for j in range(num_steps_per_episode):
|
|
||||||
moves = env.move_generator()
|
|
||||||
print ("moves: ", moves)
|
|
||||||
if (not moves):
|
|
||||||
print ("out of moves")
|
|
||||||
break
|
|
||||||
if (len(moves)==1):
|
|
||||||
m = moves[0]
|
|
||||||
else:
|
|
||||||
if (om == 1):
|
|
||||||
m = random_plus_middle_move(moves, om)
|
|
||||||
else:
|
|
||||||
m = random_move(moves, om)
|
|
||||||
print ("m: ", m)
|
|
||||||
# a = env.action_space.sample()
|
|
||||||
# print (a[0])
|
|
||||||
# #sm = s['on_move']
|
|
||||||
# #print (sm)
|
|
||||||
# a = tuple((om, a[1]))
|
|
||||||
s1, reward, done, _ = env.step(m)
|
|
||||||
om = -om
|
|
||||||
env.render()
|
|
||||||
total_reward += reward
|
|
||||||
s = s1
|
|
||||||
if done:
|
|
||||||
print ("game over: ", reward)
|
|
||||||
break
|
|
||||||
collected_rewards.append(total_reward)
|
|
||||||
print ("total reward ", total_reward, " after episode: ", i, ". steps: ", j+1)
|
|
||||||
print ("average score: ", sum(collected_rewards) / num_episodes)
|
|
||||||
print("#########")
|
|
@ -1,6 +1,6 @@
|
|||||||
from gym.envs.registration import register
|
from gym.envs.registration import register
|
||||||
|
|
||||||
register(
|
register(
|
||||||
id='tic_tac_toe-v0',
|
id='tic_tac_toe-v1',
|
||||||
entry_point='gym_tic_tac_toe.envs:TicTacToeEnv',
|
entry_point='gym_tic_tac_toe.envs:TicTacToeEnv',
|
||||||
)
|
)
|
||||||
|
@ -5,16 +5,17 @@ import numpy as np
|
|||||||
class TicTacToeEnv(gym.Env):
|
class TicTacToeEnv(gym.Env):
|
||||||
metadata = {'render.modes': ['human']}
|
metadata = {'render.modes': ['human']}
|
||||||
|
|
||||||
|
symbols = ['O', ' ', 'X'];
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.action_space = spaces.Discrete(9)
|
self.action_space = spaces.Discrete(9)
|
||||||
self.observation_space = spaces.Discrete(512*512*2) # flattened
|
self.observation_space = spaces.Discrete(9*3*2) # flattened
|
||||||
def step(self, action):
|
def step(self, action):
|
||||||
done = False
|
done = False
|
||||||
reward = 0
|
reward = 0
|
||||||
|
|
||||||
p, square = action
|
p, square = action
|
||||||
|
|
||||||
# p = p*2 - 1
|
|
||||||
# check move legality
|
# check move legality
|
||||||
board = self.state['board']
|
board = self.state['board']
|
||||||
proposed = board[square]
|
proposed = board[square]
|
||||||
@ -54,14 +55,14 @@ class TicTacToeEnv(gym.Env):
|
|||||||
def render(self, mode='human', close=False):
|
def render(self, mode='human', close=False):
|
||||||
if close:
|
if close:
|
||||||
return
|
return
|
||||||
print("on move: " , self.state['on_move'])
|
print("on move: " , self.symbols[self.state['on_move']+1])
|
||||||
for i in range (9):
|
for i in range (9):
|
||||||
print (self.state['board'][i], end=" ")
|
print (self.symbols[self.state['board'][i]+1], end=" ");
|
||||||
print()
|
if ((i % 3) == 2):
|
||||||
|
print();
|
||||||
def move_generator(self):
|
def move_generator(self):
|
||||||
moves = []
|
moves = []
|
||||||
for i in range (9):
|
for i in range (9):
|
||||||
|
|
||||||
if (self.state['board'][i] == 0):
|
if (self.state['board'][i] == 0):
|
||||||
p = self.state['on_move']
|
p = self.state['on_move']
|
||||||
m = [p, i]
|
m = [p, i]
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
import gym
|
import gym
|
||||||
import numpy as np
|
#import numpy as np
|
||||||
import gym_tic_tac_toe
|
import gym_tic_tac_toe #noqa
|
||||||
import random
|
import random
|
||||||
|
|
||||||
|
|
||||||
@ -14,49 +14,59 @@ def random_move(moves, p):
|
|||||||
m = random.choice(moves)
|
m = random.choice(moves)
|
||||||
return m
|
return m
|
||||||
|
|
||||||
env = gym.make('tic_tac_toe-v0')
|
|
||||||
|
env = gym.make('tic_tac_toe-v1')
|
||||||
|
|
||||||
num_episodes = 2000
|
num_episodes = 2000
|
||||||
num_steps_per_episode = 10
|
num_steps_per_episode = 10
|
||||||
|
|
||||||
collected_rewards = []
|
collected_rewards = []
|
||||||
|
oom =1
|
||||||
for i in range(num_episodes):
|
for i in range(num_episodes):
|
||||||
s = env.reset()
|
s = env.reset()
|
||||||
print (s)
|
#print (s)
|
||||||
print ("starting new episode")
|
#print ("starting new episode")
|
||||||
env.render()
|
#env.render()
|
||||||
print ("started")
|
#print ("started")
|
||||||
total_reward = 0
|
total_reward = 0
|
||||||
done = False
|
done = False
|
||||||
om = 1
|
om = oom;
|
||||||
|
#run one episode
|
||||||
|
#print("starting player: ", om);
|
||||||
|
|
||||||
for j in range(num_steps_per_episode):
|
for j in range(num_steps_per_episode):
|
||||||
moves = env.move_generator()
|
moves = env.move_generator()
|
||||||
print ("moves: ", moves)
|
#print ("moves: ", moves)
|
||||||
if (not moves):
|
if (not moves):
|
||||||
print ("out of moves")
|
#print ("out of moves")
|
||||||
break
|
break
|
||||||
if (len(moves)==1):
|
if (len(moves)==1):
|
||||||
|
#only a single possible move
|
||||||
m = moves[0]
|
m = moves[0]
|
||||||
else:
|
else:
|
||||||
if (om == 1):
|
if (om == 1):
|
||||||
m = random_plus_middle_move(moves, om)
|
m = random_plus_middle_move(moves, om)
|
||||||
|
#m = random_move(moves, om)
|
||||||
else:
|
else:
|
||||||
m = random_move(moves, om)
|
m = random_move(moves, om)
|
||||||
print ("m: ", m)
|
#print ("m: ", m)
|
||||||
# a = env.action_space.sample()
|
|
||||||
# print (a[0])
|
|
||||||
# #sm = s['on_move']
|
|
||||||
# #print (sm)
|
|
||||||
# a = tuple((om, a[1]))
|
|
||||||
s1, reward, done, _ = env.step(m)
|
s1, reward, done, _ = env.step(m)
|
||||||
om = -om
|
om = -om
|
||||||
env.render()
|
#env.render()
|
||||||
total_reward += reward
|
total_reward += reward
|
||||||
s = s1
|
s = s1
|
||||||
if done:
|
if done:
|
||||||
print ("game over: ", reward)
|
#print ("game over: ", reward)
|
||||||
break
|
break
|
||||||
|
env.render()
|
||||||
|
total_reward *= oom;
|
||||||
collected_rewards.append(total_reward)
|
collected_rewards.append(total_reward)
|
||||||
print ("total reward ", total_reward, " after episode: ", i, ". steps: ", j+1)
|
#print ("total reward", total_reward, "after episode: ", i+1, ". steps: ", j+1)
|
||||||
print ("average score: ", sum(collected_rewards) / num_episodes)
|
oom = -oom
|
||||||
|
|
||||||
|
print ("after "+ str(i+1) + " episodes:");
|
||||||
|
|
||||||
|
average = sum(collected_rewards) / num_episodes;
|
||||||
|
print ("average score: ", average);
|
||||||
|
print("percentage: ", round(100*(average+1)/2,1));
|
||||||
print("#########")
|
print("#########")
|
||||||
|
2
setup.py
2
setup.py
@ -2,7 +2,7 @@ from setuptools import setup
|
|||||||
from setuptools import find_packages
|
from setuptools import find_packages
|
||||||
|
|
||||||
setup(name='gym_tic_tac_toe',
|
setup(name='gym_tic_tac_toe',
|
||||||
version='0.0.1',
|
version='0.0.2',
|
||||||
install_requires=['gym'],
|
install_requires=['gym'],
|
||||||
url="https://github.com/nczempin/gym-tic-tac-toe",
|
url="https://github.com/nczempin/gym-tic-tac-toe",
|
||||||
packages=find_packages()
|
packages=find_packages()
|
||||||
|
Loading…
Reference in New Issue
Block a user