From a209d02d8b66afaa85e5b62ced0b41c8d46b718d Mon Sep 17 00:00:00 2001 From: Nicolai Czempin Date: Tue, 4 Apr 2017 17:46:38 +0200 Subject: [PATCH] Pick a move from the generated ones rather than from the whole action space --- examples/random_tic_tac_toe.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/examples/random_tic_tac_toe.py b/examples/random_tic_tac_toe.py index 758cd82..0fc8964 100644 --- a/examples/random_tic_tac_toe.py +++ b/examples/random_tic_tac_toe.py @@ -1,6 +1,7 @@ import gym import numpy as np import gym_tic_tac_toe +import random env = gym.make('tic_tac_toe-v0') @@ -18,12 +19,18 @@ for i in range(num_episodes): done = False om = 1 for j in range(num_steps_per_episode): + moves = env.move_generator() + print ("moves: ", moves) + if (not moves): + break + m = random.choice(moves) + print ("m: ", m) a = env.action_space.sample() print (a[0]) #sm = s['on_move'] #print (sm) a = tuple((om, a[1])) - s1, reward, done, _ = env.step(a) + s1, reward, done, _ = env.step(m) om = -om env.render() total_reward += reward