Play around with SBRT for a bit, I'm probably doing it wrong
This commit is contained in:
parent
4f0958a97f
commit
32e17f864e
@ -1,7 +1,8 @@
|
|||||||
import gym
|
import math
|
||||||
#import numpy as np
|
|
||||||
import gym_tic_tac_toe #noqa
|
|
||||||
import random
|
import random
|
||||||
|
import gym_tic_tac_toe
|
||||||
|
|
||||||
|
import gym
|
||||||
|
|
||||||
|
|
||||||
def random_plus_middle_move(moves, p):
|
def random_plus_middle_move(moves, p):
|
||||||
@ -10,6 +11,8 @@ def random_plus_middle_move(moves, p):
|
|||||||
else:
|
else:
|
||||||
m = random_move(moves, p)
|
m = random_move(moves, p)
|
||||||
return m
|
return m
|
||||||
|
|
||||||
|
|
||||||
def random_move(moves, p):
|
def random_move(moves, p):
|
||||||
m = random.choice(moves)
|
m = random.choice(moves)
|
||||||
return m
|
return m
|
||||||
@ -17,31 +20,44 @@ def random_move(moves, p):
|
|||||||
|
|
||||||
env = gym.make('tic_tac_toe-v1')
|
env = gym.make('tic_tac_toe-v1')
|
||||||
|
|
||||||
num_episodes = 2000
|
p1 = 0.48
|
||||||
|
p2 = 0.55
|
||||||
|
alpha = 0.01
|
||||||
|
beta = 0.01
|
||||||
|
# theta = math.log((p1*(1-p0)) / (p0*(1-p1)));
|
||||||
|
|
||||||
|
h1 = math.log((1 - alpha) / beta) / (math.log(p2 / p1) + math.log((1 - p1) / (1 - p2)))
|
||||||
|
h2 = math.log((1 - beta) / alpha) / (math.log(p2 / p1) + math.log((1 - p1) / (1 - p2)))
|
||||||
|
ss = math.log((1 - p1) / (1 - p2)) / (math.log(p2 / p1) + math.log((1 - p1) / (1 - p2)))
|
||||||
|
print("ss:", ss)
|
||||||
|
print("h1:", h1)
|
||||||
|
print("h2:", h2)
|
||||||
|
|
||||||
|
num_episodes = 300
|
||||||
num_steps_per_episode = 10
|
num_steps_per_episode = 10
|
||||||
|
|
||||||
collected_rewards = []
|
collected_rewards = []
|
||||||
oom =1
|
oom = 1
|
||||||
for i in range(num_episodes):
|
for i in range(num_episodes):
|
||||||
s = env.reset()
|
s = env.reset()
|
||||||
#print (s)
|
# print (s)
|
||||||
#print ("starting new episode")
|
# print ("starting new episode")
|
||||||
#env.render()
|
# env.render()
|
||||||
#print ("started")
|
# print ("started")
|
||||||
total_reward = 0
|
total_reward = 0
|
||||||
done = False
|
done = False
|
||||||
om = oom;
|
om = oom;
|
||||||
#run one episode
|
# run one episode
|
||||||
#print("starting player: ", om);
|
# print("starting player: ", om);
|
||||||
|
|
||||||
for j in range(num_steps_per_episode):
|
for j in range(num_steps_per_episode):
|
||||||
moves = env.move_generator()
|
moves = env.move_generator()
|
||||||
#print ("moves: ", moves)
|
# print ("moves: ", moves)
|
||||||
if (not moves):
|
if (not moves):
|
||||||
#print ("out of moves")
|
# print ("out of moves")
|
||||||
break
|
break
|
||||||
if (len(moves)==1):
|
if (len(moves) == 1):
|
||||||
#only a single possible move
|
# only a single possible move
|
||||||
m = moves[0]
|
m = moves[0]
|
||||||
else:
|
else:
|
||||||
if (om == 1):
|
if (om == 1):
|
||||||
@ -49,24 +65,38 @@ for i in range(num_episodes):
|
|||||||
#m = random_move(moves, om)
|
#m = random_move(moves, om)
|
||||||
else:
|
else:
|
||||||
m = random_move(moves, om)
|
m = random_move(moves, om)
|
||||||
#print ("m: ", m)
|
# print ("m: ", m)
|
||||||
s1, reward, done, _ = env.step(m)
|
s1, reward, done, _ = env.step(m)
|
||||||
om = -om
|
om = -om
|
||||||
#env.render()
|
# env.render()
|
||||||
total_reward += reward
|
total_reward += reward
|
||||||
s = s1
|
s = s1
|
||||||
if done:
|
if done:
|
||||||
#print ("game over: ", reward)
|
# print ("game over: ", reward)
|
||||||
break
|
break
|
||||||
env.render()
|
# env.render()
|
||||||
total_reward *= oom;
|
total_reward *= oom;
|
||||||
collected_rewards.append(total_reward)
|
collected_rewards.append(total_reward)
|
||||||
#print ("total reward", total_reward, "after episode: ", i+1, ". steps: ", j+1)
|
# print ("total reward", total_reward, "after episode: ", i+1, ". steps: ", j+1)
|
||||||
oom = -oom
|
oom = -oom
|
||||||
|
|
||||||
print ("after "+ str(i+1) + " episodes:");
|
print("after " + str(i + 1) + " episodes:")
|
||||||
|
|
||||||
average = sum(collected_rewards) / num_episodes;
|
average = sum(collected_rewards) / num_episodes
|
||||||
print ("average score: ", average);
|
percentage = round(100*(average + 1) / 2, 1)
|
||||||
print("percentage: ", round(100*(average+1)/2,1));
|
score = percentage/100 * (i+1);
|
||||||
|
print("average score: ", average)
|
||||||
|
print("percentage: ", percentage)
|
||||||
|
print("score:", score)
|
||||||
|
print()
|
||||||
|
y1 = ss * (i+1) - h1
|
||||||
|
print ("y1:", y1)
|
||||||
|
y2 = ss * (i+1) + h2
|
||||||
|
print ("y2:", y2)
|
||||||
|
if (score <= y1):
|
||||||
|
print("accept null hypothesis")
|
||||||
|
break
|
||||||
|
if (score >= y2):
|
||||||
|
print("reject null hypothesis")
|
||||||
|
break
|
||||||
print("#########")
|
print("#########")
|
||||||
|
Loading…
Reference in New Issue
Block a user