Pick a move from the generated ones rather than from the whole action space
This commit is contained in:
parent
8bed7b4aae
commit
a209d02d8b
@ -1,6 +1,7 @@
|
|||||||
import gym
|
import gym
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import gym_tic_tac_toe
|
import gym_tic_tac_toe
|
||||||
|
import random
|
||||||
|
|
||||||
env = gym.make('tic_tac_toe-v0')
|
env = gym.make('tic_tac_toe-v0')
|
||||||
|
|
||||||
@ -18,12 +19,18 @@ for i in range(num_episodes):
|
|||||||
done = False
|
done = False
|
||||||
om = 1
|
om = 1
|
||||||
for j in range(num_steps_per_episode):
|
for j in range(num_steps_per_episode):
|
||||||
|
moves = env.move_generator()
|
||||||
|
print ("moves: ", moves)
|
||||||
|
if (not moves):
|
||||||
|
break
|
||||||
|
m = random.choice(moves)
|
||||||
|
print ("m: ", m)
|
||||||
a = env.action_space.sample()
|
a = env.action_space.sample()
|
||||||
print (a[0])
|
print (a[0])
|
||||||
#sm = s['on_move']
|
#sm = s['on_move']
|
||||||
#print (sm)
|
#print (sm)
|
||||||
a = tuple((om, a[1]))
|
a = tuple((om, a[1]))
|
||||||
s1, reward, done, _ = env.step(a)
|
s1, reward, done, _ = env.step(m)
|
||||||
om = -om
|
om = -om
|
||||||
env.render()
|
env.render()
|
||||||
total_reward += reward
|
total_reward += reward
|
||||||
|
Loading…
Reference in New Issue
Block a user