gym-tic-tac-toe/examples/random_tic_tac_toe.py
Nicolai Czempin 46f077ef1f Add basic TTT
2017-04-03 23:51:35 +02:00

37 lines
877 B
Python

import gym
import numpy as np
import gym_tic_tac_toe
env = gym.make('tic_tac_toe-v0')
num_episodes = 20
num_steps_per_episode = 200
collected_rewards = []
for i in range(num_episodes):
s = env.reset()
print (s)
print ("starting new episode")
env.render()
print ("started")
total_reward = 0
done = False
om = 1
for j in range(num_steps_per_episode):
a = env.action_space.sample()
print (a[0])
#sm = s['on_move']
#print (sm)
a = tuple((om, a[1]))
s1, reward, done, _ = env.step(a)
om = -om
env.render()
total_reward += reward
s = s1
if done:
break
collected_rewards.append(total_reward)
print ("total reward ", total_reward, " after episode: ", j)
print ("average score: ", sum(collected_rewards) / num_episodes)
print("#########")