Pick a move from the generated ones rather than from the whole action space

2017-04-04 17:46:38 +02:00 · 2017-04-04 17:46:38 +02:00 · a209d02d8b
commit a209d02d8b
parent 8bed7b4aae
1 changed files with 8 additions and 1 deletions
--- a/examples/random_tic_tac_toe.py
+++ b/examples/random_tic_tac_toe.py
@ -1,6 +1,7 @@
 import gym
 import numpy as np
 import gym_tic_tac_toe
 import random
 env = gym.make('tic_tac_toe-v0')
@ -18,12 +19,18 @@ for i in range(num_episodes):
    done = False
    om = 1
    for j in range(num_steps_per_episode):
        moves = env.move_generator()
        print ("moves: ", moves)
        if (not moves):
            break
        m = random.choice(moves)
        print ("m: ", m)
        a = env.action_space.sample()
        print (a[0])
        #sm = s['on_move']
        #print (sm)
        a = tuple((om, a[1]))
-        s1, reward, done, _ = env.step(a)
+        s1, reward, done, _ = env.step(m)
        om = -om
        env.render()
        total_reward += reward