diff --git a/examples/random_tic_tac_toe.py b/examples/random_tic_tac_toe.py index 0fc8964..d6731df 100644 --- a/examples/random_tic_tac_toe.py +++ b/examples/random_tic_tac_toe.py @@ -3,10 +3,21 @@ import numpy as np import gym_tic_tac_toe import random + +def random_plus_middle_move(moves, p): + if ([p, 4] in moves): + m = [p, 4] + else: + m = random_move(moves, p) + return m +def random_move(moves, p): + m = random.choice(moves) + return m + env = gym.make('tic_tac_toe-v0') -num_episodes = 20 -num_steps_per_episode = 200 +num_episodes = 2000 +num_steps_per_episode = 10 collected_rewards = [] for i in range(num_episodes): @@ -22,22 +33,30 @@ for i in range(num_episodes): moves = env.move_generator() print ("moves: ", moves) if (not moves): + print ("out of moves") break - m = random.choice(moves) + if (len(moves)==1): + m = moves[0] + else: + if (om == 1): + m = random_plus_middle_move(moves, om) + else: + m = random_move(moves, om) print ("m: ", m) - a = env.action_space.sample() - print (a[0]) - #sm = s['on_move'] - #print (sm) - a = tuple((om, a[1])) +# a = env.action_space.sample() +# print (a[0]) +# #sm = s['on_move'] +# #print (sm) +# a = tuple((om, a[1])) s1, reward, done, _ = env.step(m) om = -om env.render() total_reward += reward s = s1 if done: + print ("game over: ", reward) break collected_rewards.append(total_reward) - print ("total reward ", total_reward, " after episode: ", j) + print ("total reward ", total_reward, " after episode: ", i, ". steps: ", j+1) print ("average score: ", sum(collected_rewards) / num_episodes) print("#########")