Play around with SBRT for a bit, I'm probably doing it wrong

This commit is contained in:
Nicolai Czempin 2018-12-16 22:58:53 +01:00
parent 4f0958a97f
commit 32e17f864e

View File

@ -1,7 +1,8 @@
import gym import math
#import numpy as np
import gym_tic_tac_toe #noqa
import random import random
import gym_tic_tac_toe
import gym
def random_plus_middle_move(moves, p): def random_plus_middle_move(moves, p):
@ -10,6 +11,8 @@ def random_plus_middle_move(moves, p):
else: else:
m = random_move(moves, p) m = random_move(moves, p)
return m return m
def random_move(moves, p): def random_move(moves, p):
m = random.choice(moves) m = random.choice(moves)
return m return m
@ -17,56 +20,83 @@ def random_move(moves, p):
env = gym.make('tic_tac_toe-v1') env = gym.make('tic_tac_toe-v1')
num_episodes = 2000 p1 = 0.48
p2 = 0.55
alpha = 0.01
beta = 0.01
# theta = math.log((p1*(1-p0)) / (p0*(1-p1)));
h1 = math.log((1 - alpha) / beta) / (math.log(p2 / p1) + math.log((1 - p1) / (1 - p2)))
h2 = math.log((1 - beta) / alpha) / (math.log(p2 / p1) + math.log((1 - p1) / (1 - p2)))
ss = math.log((1 - p1) / (1 - p2)) / (math.log(p2 / p1) + math.log((1 - p1) / (1 - p2)))
print("ss:", ss)
print("h1:", h1)
print("h2:", h2)
num_episodes = 300
num_steps_per_episode = 10 num_steps_per_episode = 10
collected_rewards = [] collected_rewards = []
oom =1 oom = 1
for i in range(num_episodes): for i in range(num_episodes):
s = env.reset() s = env.reset()
#print (s) # print (s)
#print ("starting new episode") # print ("starting new episode")
#env.render() # env.render()
#print ("started") # print ("started")
total_reward = 0 total_reward = 0
done = False done = False
om = oom; om = oom;
#run one episode # run one episode
#print("starting player: ", om); # print("starting player: ", om);
for j in range(num_steps_per_episode): for j in range(num_steps_per_episode):
moves = env.move_generator() moves = env.move_generator()
#print ("moves: ", moves) # print ("moves: ", moves)
if (not moves): if (not moves):
#print ("out of moves") # print ("out of moves")
break break
if (len(moves)==1): if (len(moves) == 1):
#only a single possible move # only a single possible move
m = moves[0] m = moves[0]
else: else:
if (om == 1): if (om == 1):
m = random_plus_middle_move(moves, om) m = random_plus_middle_move(moves, om)
#m = random_move(moves, om) #m = random_move(moves, om)
else: else:
m = random_move(moves, om) m = random_move(moves, om)
#print ("m: ", m) # print ("m: ", m)
s1, reward, done, _ = env.step(m) s1, reward, done, _ = env.step(m)
om = -om om = -om
#env.render() # env.render()
total_reward += reward total_reward += reward
s = s1 s = s1
if done: if done:
#print ("game over: ", reward) # print ("game over: ", reward)
break break
env.render() # env.render()
total_reward *= oom; total_reward *= oom;
collected_rewards.append(total_reward) collected_rewards.append(total_reward)
#print ("total reward", total_reward, "after episode: ", i+1, ". steps: ", j+1) # print ("total reward", total_reward, "after episode: ", i+1, ". steps: ", j+1)
oom = -oom oom = -oom
print ("after "+ str(i+1) + " episodes:"); print("after " + str(i + 1) + " episodes:")
average = sum(collected_rewards) / num_episodes; average = sum(collected_rewards) / num_episodes
print ("average score: ", average); percentage = round(100*(average + 1) / 2, 1)
print("percentage: ", round(100*(average+1)/2,1)); score = percentage/100 * (i+1);
print("average score: ", average)
print("percentage: ", percentage)
print("score:", score)
print()
y1 = ss * (i+1) - h1
print ("y1:", y1)
y2 = ss * (i+1) + h2
print ("y2:", y2)
if (score <= y1):
print("accept null hypothesis")
break
if (score >= y2):
print("reject null hypothesis")
break
print("#########") print("#########")