diff --git a/random_tic_tac_toe.py b/random_tic_tac_toe.py new file mode 100644 index 0000000..d6731df --- /dev/null +++ b/random_tic_tac_toe.py @@ -0,0 +1,62 @@ +import gym +import numpy as np +import gym_tic_tac_toe +import random + + +def random_plus_middle_move(moves, p): + if ([p, 4] in moves): + m = [p, 4] + else: + m = random_move(moves, p) + return m +def random_move(moves, p): + m = random.choice(moves) + return m + +env = gym.make('tic_tac_toe-v0') + +num_episodes = 2000 +num_steps_per_episode = 10 + +collected_rewards = [] +for i in range(num_episodes): + s = env.reset() + print (s) + print ("starting new episode") + env.render() + print ("started") + total_reward = 0 + done = False + om = 1 + for j in range(num_steps_per_episode): + moves = env.move_generator() + print ("moves: ", moves) + if (not moves): + print ("out of moves") + break + if (len(moves)==1): + m = moves[0] + else: + if (om == 1): + m = random_plus_middle_move(moves, om) + else: + m = random_move(moves, om) + print ("m: ", m) +# a = env.action_space.sample() +# print (a[0]) +# #sm = s['on_move'] +# #print (sm) +# a = tuple((om, a[1])) + s1, reward, done, _ = env.step(m) + om = -om + env.render() + total_reward += reward + s = s1 + if done: + print ("game over: ", reward) + break + collected_rewards.append(total_reward) + print ("total reward ", total_reward, " after episode: ", i, ". steps: ", j+1) +print ("average score: ", sum(collected_rewards) / num_episodes) +print("#########")