Pick a move from the generated ones rather than from the whole action space

This commit is contained in:
Nicolai Czempin 2017-04-04 17:46:38 +02:00
parent 8bed7b4aae
commit a209d02d8b

View File

@ -1,6 +1,7 @@
import gym import gym
import numpy as np import numpy as np
import gym_tic_tac_toe import gym_tic_tac_toe
import random
env = gym.make('tic_tac_toe-v0') env = gym.make('tic_tac_toe-v0')
@ -18,12 +19,18 @@ for i in range(num_episodes):
done = False done = False
om = 1 om = 1
for j in range(num_steps_per_episode): for j in range(num_steps_per_episode):
moves = env.move_generator()
print ("moves: ", moves)
if (not moves):
break
m = random.choice(moves)
print ("m: ", m)
a = env.action_space.sample() a = env.action_space.sample()
print (a[0]) print (a[0])
#sm = s['on_move'] #sm = s['on_move']
#print (sm) #print (sm)
a = tuple((om, a[1])) a = tuple((om, a[1]))
s1, reward, done, _ = env.step(a) s1, reward, done, _ = env.step(m)
om = -om om = -om
env.render() env.render()
total_reward += reward total_reward += reward