73 lines
1.7 KiB
Python
73 lines
1.7 KiB
Python
import gym
|
|
#import numpy as np
|
|
import gym_tic_tac_toe #noqa
|
|
import random
|
|
|
|
|
|
def random_plus_middle_move(moves, p):
|
|
if ([p, 4] in moves):
|
|
m = [p, 4]
|
|
else:
|
|
m = random_move(moves, p)
|
|
return m
|
|
def random_move(moves, p):
|
|
m = random.choice(moves)
|
|
return m
|
|
|
|
|
|
env = gym.make('tic_tac_toe-v1')
|
|
|
|
num_episodes = 2000
|
|
num_steps_per_episode = 10
|
|
|
|
collected_rewards = []
|
|
oom =1
|
|
for i in range(num_episodes):
|
|
s = env.reset()
|
|
#print (s)
|
|
#print ("starting new episode")
|
|
#env.render()
|
|
#print ("started")
|
|
total_reward = 0
|
|
done = False
|
|
om = oom;
|
|
#run one episode
|
|
#print("starting player: ", om);
|
|
|
|
for j in range(num_steps_per_episode):
|
|
moves = env.move_generator()
|
|
#print ("moves: ", moves)
|
|
if (not moves):
|
|
#print ("out of moves")
|
|
break
|
|
if (len(moves)==1):
|
|
#only a single possible move
|
|
m = moves[0]
|
|
else:
|
|
if (om == 1):
|
|
m = random_plus_middle_move(moves, om)
|
|
#m = random_move(moves, om)
|
|
else:
|
|
m = random_move(moves, om)
|
|
#print ("m: ", m)
|
|
s1, reward, done, _ = env.step(m)
|
|
om = -om
|
|
#env.render()
|
|
total_reward += reward
|
|
s = s1
|
|
if done:
|
|
#print ("game over: ", reward)
|
|
break
|
|
env.render()
|
|
total_reward *= oom;
|
|
collected_rewards.append(total_reward)
|
|
#print ("total reward", total_reward, "after episode: ", i+1, ". steps: ", j+1)
|
|
oom = -oom
|
|
|
|
print ("after "+ str(i+1) + " episodes:");
|
|
|
|
average = sum(collected_rewards) / num_episodes;
|
|
print ("average score: ", average);
|
|
print("percentage: ", round(100*(average+1)/2,1));
|
|
print("#########")
|