diff --git a/examples/random_tic_tac_toe.py b/examples/random_tic_tac_toe.py
index 0fc8964..d6731df 100644
--- a/examples/random_tic_tac_toe.py
+++ b/examples/random_tic_tac_toe.py
@@ -3,10 +3,21 @@ import numpy as np
 import gym_tic_tac_toe
 import random
 
+
+def random_plus_middle_move(moves, p):
+    if ([p, 4] in moves):
+        m = [p, 4]
+    else:
+        m = random_move(moves, p)
+    return m
+def random_move(moves, p):
+    m = random.choice(moves)
+    return m
+
 env = gym.make('tic_tac_toe-v0')
 
-num_episodes = 20
-num_steps_per_episode = 200
+num_episodes = 2000
+num_steps_per_episode = 10
 
 collected_rewards = []
 for i in range(num_episodes):
@@ -22,22 +33,30 @@ for i in range(num_episodes):
         moves = env.move_generator()
         print ("moves: ", moves)
         if (not moves):
+            print ("out of moves")
             break
-        m = random.choice(moves)
+        if (len(moves)==1):
+            m = moves[0]
+        else:
+            if (om == 1):
+                m = random_plus_middle_move(moves, om)
+            else:
+                m = random_move(moves, om)
         print ("m: ", m)
-        a = env.action_space.sample()
-        print (a[0])
-        #sm = s['on_move']
-        #print (sm)
-        a = tuple((om, a[1]))
+#         a = env.action_space.sample()
+#         print (a[0])
+#         #sm = s['on_move']
+#         #print (sm)
+#         a = tuple((om, a[1]))
         s1, reward, done, _ = env.step(m)
         om = -om
         env.render()
         total_reward += reward
         s = s1
         if done:
+            print ("game over: ", reward)
             break
     collected_rewards.append(total_reward)
-    print ("total reward ", total_reward, " after episode: ", j)
+    print ("total reward ", total_reward, " after episode: ", i, ". steps: ", j+1)
 print ("average score: ", sum(collected_rewards) / num_episodes)
 print("#########")