gym-tic-tac-toe/examples/random_tic_tac_toe.py

63 lines
1.5 KiB
Python
Raw Normal View History

2017-04-01 14:01:16 +04:00
import gym
import numpy as np
2017-04-04 01:51:35 +04:00
import gym_tic_tac_toe
import random
2017-04-01 14:01:16 +04:00
def random_plus_middle_move(moves, p):
if ([p, 4] in moves):
m = [p, 4]
else:
m = random_move(moves, p)
return m
def random_move(moves, p):
m = random.choice(moves)
return m
2017-04-04 01:51:35 +04:00
env = gym.make('tic_tac_toe-v0')
2017-04-01 14:01:16 +04:00
num_episodes = 2000
num_steps_per_episode = 10
2017-04-01 14:01:16 +04:00
collected_rewards = []
for i in range(num_episodes):
s = env.reset()
2017-04-04 01:51:35 +04:00
print (s)
2017-04-01 14:01:16 +04:00
print ("starting new episode")
env.render()
print ("started")
total_reward = 0
done = False
2017-04-04 01:51:35 +04:00
om = 1
2017-04-01 14:01:16 +04:00
for j in range(num_steps_per_episode):
moves = env.move_generator()
print ("moves: ", moves)
if (not moves):
print ("out of moves")
break
if (len(moves)==1):
m = moves[0]
else:
if (om == 1):
m = random_plus_middle_move(moves, om)
else:
m = random_move(moves, om)
print ("m: ", m)
# a = env.action_space.sample()
# print (a[0])
# #sm = s['on_move']
# #print (sm)
# a = tuple((om, a[1]))
s1, reward, done, _ = env.step(m)
2017-04-04 01:51:35 +04:00
om = -om
2017-04-01 14:01:16 +04:00
env.render()
total_reward += reward
s = s1
if done:
print ("game over: ", reward)
2017-04-01 14:01:16 +04:00
break
collected_rewards.append(total_reward)
print ("total reward ", total_reward, " after episode: ", i, ". steps: ", j+1)
2017-04-01 14:01:16 +04:00
print ("average score: ", sum(collected_rewards) / num_episodes)
print("#########")