Add basic TTT
This commit is contained in:
parent
d995b315e1
commit
46f077ef1f
@ -1,8 +1,8 @@
|
|||||||
import gym
|
import gym
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import gym_random_walk
|
import gym_tic_tac_toe
|
||||||
|
|
||||||
env = gym.make('random_walk-v0')
|
env = gym.make('tic_tac_toe-v0')
|
||||||
|
|
||||||
num_episodes = 20
|
num_episodes = 20
|
||||||
num_steps_per_episode = 200
|
num_steps_per_episode = 200
|
||||||
@ -10,14 +10,21 @@ num_steps_per_episode = 200
|
|||||||
collected_rewards = []
|
collected_rewards = []
|
||||||
for i in range(num_episodes):
|
for i in range(num_episodes):
|
||||||
s = env.reset()
|
s = env.reset()
|
||||||
|
print (s)
|
||||||
print ("starting new episode")
|
print ("starting new episode")
|
||||||
env.render()
|
env.render()
|
||||||
print ("started")
|
print ("started")
|
||||||
total_reward = 0
|
total_reward = 0
|
||||||
done = False
|
done = False
|
||||||
|
om = 1
|
||||||
for j in range(num_steps_per_episode):
|
for j in range(num_steps_per_episode):
|
||||||
a = np.random.randint(env.action_space.n)
|
a = env.action_space.sample()
|
||||||
|
print (a[0])
|
||||||
|
#sm = s['on_move']
|
||||||
|
#print (sm)
|
||||||
|
a = tuple((om, a[1]))
|
||||||
s1, reward, done, _ = env.step(a)
|
s1, reward, done, _ = env.step(a)
|
||||||
|
om = -om
|
||||||
env.render()
|
env.render()
|
||||||
total_reward += reward
|
total_reward += reward
|
||||||
s = s1
|
s = s1
|
@ -1,10 +0,0 @@
|
|||||||
from gym.envs.registration import register
|
|
||||||
|
|
||||||
register(
|
|
||||||
id='random_walk-v0',
|
|
||||||
entry_point='gym_random_walk.envs:RandomWalkEnv',
|
|
||||||
)
|
|
||||||
#register(
|
|
||||||
# id='foo-extrahard-v0',
|
|
||||||
# entry_point='gym_foo.envs:FooExtraHardEnv',
|
|
||||||
#)
|
|
@ -1 +0,0 @@
|
|||||||
from gym_random_walk.envs.random_walk_env import RandomWalkEnv
|
|
@ -1,36 +0,0 @@
|
|||||||
import gym
|
|
||||||
from gym import error, spaces, utils
|
|
||||||
from gym.utils import seeding
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
class RandomWalkEnv(gym.Env):
|
|
||||||
metadata = {'render.modes': ['human']}
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.action_space = spaces.Discrete(2)
|
|
||||||
self.size = 6
|
|
||||||
#print("init")
|
|
||||||
def _step(self, action):
|
|
||||||
#print("step")
|
|
||||||
reward = 0
|
|
||||||
done = False
|
|
||||||
if (action == 0):
|
|
||||||
self.state -= 1
|
|
||||||
if (action == 1):
|
|
||||||
self.state += 1
|
|
||||||
if (self.state >= self.size):
|
|
||||||
reward = 1
|
|
||||||
done = True
|
|
||||||
if (self.state <= 0):
|
|
||||||
done = True
|
|
||||||
return np.array(self.state), reward, done, {}
|
|
||||||
def _reset(self):
|
|
||||||
#print("reset")
|
|
||||||
print("#self.size:",self.size)
|
|
||||||
self.state = np.random.randint(1,self.size-1)
|
|
||||||
print("starting: ", self.state)
|
|
||||||
def _render(self, mode='human', close=False):
|
|
||||||
if close:
|
|
||||||
return
|
|
||||||
#print("render")
|
|
||||||
print("current state: ",self.state)
|
|
6
gym_tic_tac_toe/__init__.py
Normal file
6
gym_tic_tac_toe/__init__.py
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
from gym.envs.registration import register
|
||||||
|
|
||||||
|
register(
|
||||||
|
id='tic_tac_toe-v0',
|
||||||
|
entry_point='gym_tic_tac_toe.envs:TicTacToeEnv',
|
||||||
|
)
|
1
gym_tic_tac_toe/envs/__init__.py
Normal file
1
gym_tic_tac_toe/envs/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
from gym_tic_tac_toe.envs.tic_tac_toe_env import TicTacToeEnv
|
64
gym_tic_tac_toe/envs/tic_tac_toe_env.py
Normal file
64
gym_tic_tac_toe/envs/tic_tac_toe_env.py
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
import gym
|
||||||
|
from gym import spaces
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
class TicTacToeEnv(gym.Env):
|
||||||
|
metadata = {'render.modes': ['human']}
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.action_space = spaces.Tuple((spaces.Discrete(2), spaces.Discrete(9)))
|
||||||
|
self.observation_space = spaces.Discrete(3)#Tuple(spaces.Discrete(3), spaces.Discrete(9))
|
||||||
|
def _step(self, action):
|
||||||
|
done = False
|
||||||
|
reward = 0
|
||||||
|
|
||||||
|
p, square = action
|
||||||
|
|
||||||
|
# p = p*2 - 1
|
||||||
|
# check move legality
|
||||||
|
proposed = self.state['board'][square]
|
||||||
|
om = self.state['on_move']
|
||||||
|
print ("on move: ", om)
|
||||||
|
if (proposed != 0): # wrong player, not empty
|
||||||
|
print("illegal move ", action, ". (square occupied): ", square)
|
||||||
|
done = True
|
||||||
|
reward = -om # player who did NOT make the illegal move
|
||||||
|
if (p != om): # wrong player, not empty
|
||||||
|
print("illegal move ", action, " not on move: ", p)
|
||||||
|
done = True
|
||||||
|
reward = -om # player who did NOT make the illegal move
|
||||||
|
else:
|
||||||
|
self.state['board'][square] = p
|
||||||
|
self.state['on_move'] = -p
|
||||||
|
|
||||||
|
# check game over
|
||||||
|
for i in range(3):
|
||||||
|
if (self.state['board'][i * 3] == p and self.state['board'][i*3 + 1] == p and self.state['board'][i*3+2] == 2):
|
||||||
|
reward = p
|
||||||
|
done = True
|
||||||
|
break
|
||||||
|
#TODO other cases
|
||||||
|
|
||||||
|
return np.array(self.state), reward, done, {}
|
||||||
|
def _reset(self):
|
||||||
|
self.state = {}
|
||||||
|
self.state['board'] = [0,0,0,0,0,0,0,0,0]
|
||||||
|
self.state['on_move'] = 1
|
||||||
|
return self.state
|
||||||
|
def _render(self, mode='human', close=False):
|
||||||
|
if close:
|
||||||
|
return
|
||||||
|
print("on move: " , self.state['on_move'])
|
||||||
|
for i in range (9):
|
||||||
|
print (self.state['board'][i], end=" ")
|
||||||
|
print()
|
||||||
|
def move_generator(self):
|
||||||
|
moves = []
|
||||||
|
for i in range (9):
|
||||||
|
if (self.state.state['board'][i]== 0):
|
||||||
|
p = self.state.on_move
|
||||||
|
if (p == 2):
|
||||||
|
p = -1
|
||||||
|
m = [p, i]
|
||||||
|
moves.append(m)
|
||||||
|
|
4
setup.py
4
setup.py
@ -1,9 +1,9 @@
|
|||||||
from setuptools import setup
|
from setuptools import setup
|
||||||
from setuptools import find_packages
|
from setuptools import find_packages
|
||||||
|
|
||||||
setup(name='gym_random_walk',
|
setup(name='gym_tic_tac_toe',
|
||||||
version='0.0.1',
|
version='0.0.1',
|
||||||
install_requires=['gym'],
|
install_requires=['gym'],
|
||||||
url="https://github.com/nczempin/gym-random-walk",
|
url="https://github.com/nczempin/gym-tic-tac-toe",
|
||||||
packages=find_packages()
|
packages=find_packages()
|
||||||
)
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user