2017-04-04 01:51:35 +04:00
|
|
|
import gym
|
|
|
|
from gym import spaces
|
|
|
|
import numpy as np
|
|
|
|
|
|
|
|
class TicTacToeEnv(gym.Env):
|
|
|
|
metadata = {'render.modes': ['human']}
|
2018-12-17 00:30:13 +04:00
|
|
|
|
|
|
|
symbols = ['O', ' ', 'X'];
|
2017-04-04 01:51:35 +04:00
|
|
|
|
|
|
|
def __init__(self):
|
2017-04-04 23:47:54 +04:00
|
|
|
self.action_space = spaces.Discrete(9)
|
2018-12-17 00:30:13 +04:00
|
|
|
self.observation_space = spaces.Discrete(9*3*2) # flattened
|
2018-12-16 20:29:32 +04:00
|
|
|
def step(self, action):
|
2017-04-04 01:51:35 +04:00
|
|
|
done = False
|
|
|
|
reward = 0
|
|
|
|
|
|
|
|
p, square = action
|
|
|
|
|
|
|
|
# check move legality
|
2017-04-04 20:04:18 +04:00
|
|
|
board = self.state['board']
|
|
|
|
proposed = board[square]
|
2017-04-04 01:51:35 +04:00
|
|
|
om = self.state['on_move']
|
2017-04-04 20:04:18 +04:00
|
|
|
if (proposed != 0): # wrong player, not empty
|
2017-04-04 01:51:35 +04:00
|
|
|
print("illegal move ", action, ". (square occupied): ", square)
|
|
|
|
done = True
|
2017-04-05 01:07:45 +04:00
|
|
|
reward = -1 * om # player who did NOT make the illegal move
|
2017-04-04 20:04:18 +04:00
|
|
|
if (p != om): # wrong player, not empty
|
2017-04-04 01:51:35 +04:00
|
|
|
print("illegal move ", action, " not on move: ", p)
|
|
|
|
done = True
|
2017-04-05 01:07:45 +04:00
|
|
|
reward = -1 * om # player who did NOT make the illegal move
|
2017-04-04 01:51:35 +04:00
|
|
|
else:
|
2017-04-04 20:04:18 +04:00
|
|
|
board[square] = p
|
2017-04-04 01:51:35 +04:00
|
|
|
self.state['on_move'] = -p
|
|
|
|
|
|
|
|
# check game over
|
|
|
|
for i in range(3):
|
2017-06-13 14:57:32 +04:00
|
|
|
# horizontals and verticals
|
2017-06-13 14:04:03 +04:00
|
|
|
if ((board[i * 3] == p and board[i * 3 + 1] == p and board[i * 3 + 2] == p)
|
2017-06-13 14:57:32 +04:00
|
|
|
or (board[i + 0] == p and board[i + 3] == p and board[i + 6] == p)):
|
2017-04-04 01:51:35 +04:00
|
|
|
reward = p
|
|
|
|
done = True
|
|
|
|
break
|
2017-06-13 14:57:32 +04:00
|
|
|
# diagonals
|
|
|
|
if((board[0] == p and board[4] == p and board[8] == p)
|
|
|
|
or (board[2] == p and board[4] == p and board[6] == p)):
|
|
|
|
reward = p
|
|
|
|
done = True
|
2017-04-04 01:51:35 +04:00
|
|
|
|
2017-04-05 01:07:45 +04:00
|
|
|
return self.state, reward, done, {}
|
2018-12-16 20:29:32 +04:00
|
|
|
def reset(self):
|
2017-04-04 01:51:35 +04:00
|
|
|
self.state = {}
|
2017-04-04 20:04:18 +04:00
|
|
|
self.state['board'] = [0, 0, 0, 0, 0, 0, 0, 0, 0]
|
2017-04-04 01:51:35 +04:00
|
|
|
self.state['on_move'] = 1
|
|
|
|
return self.state
|
2018-12-16 20:29:32 +04:00
|
|
|
def render(self, mode='human', close=False):
|
2017-04-04 01:51:35 +04:00
|
|
|
if close:
|
|
|
|
return
|
2018-12-17 00:30:13 +04:00
|
|
|
print("on move: " , self.symbols[self.state['on_move']+1])
|
2017-04-04 01:51:35 +04:00
|
|
|
for i in range (9):
|
2018-12-17 00:30:13 +04:00
|
|
|
print (self.symbols[self.state['board'][i]+1], end=" ");
|
|
|
|
if ((i % 3) == 2):
|
|
|
|
print();
|
2017-04-04 01:51:35 +04:00
|
|
|
def move_generator(self):
|
|
|
|
moves = []
|
|
|
|
for i in range (9):
|
2017-04-04 20:04:18 +04:00
|
|
|
if (self.state['board'][i] == 0):
|
2017-04-04 19:36:32 +04:00
|
|
|
p = self.state['on_move']
|
2017-04-04 01:51:35 +04:00
|
|
|
m = [p, i]
|
|
|
|
moves.append(m)
|
2017-04-04 19:45:58 +04:00
|
|
|
return moves
|
2017-04-04 01:51:35 +04:00
|
|
|
|