From d6dbdeea7bda25c454da7093e64d15df8e9b48fb Mon Sep 17 00:00:00 2001 From: tsu-nera Date: Tue, 13 Jun 2017 19:04:03 +0900 Subject: [PATCH 1/2] add cross condition --- gym_tic_tac_toe/envs/tic_tac_toe_env.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/gym_tic_tac_toe/envs/tic_tac_toe_env.py b/gym_tic_tac_toe/envs/tic_tac_toe_env.py index 71007b3..8ebaabf 100644 --- a/gym_tic_tac_toe/envs/tic_tac_toe_env.py +++ b/gym_tic_tac_toe/envs/tic_tac_toe_env.py @@ -33,9 +33,11 @@ class TicTacToeEnv(gym.Env): # check game over for i in range(3): - # horizontals and verticals - if ((board[i * 3] == p and board[i * 3 + 1] == p and board[i * 3 + 2 ] == p) - or (board[i + 0] == p and board[i + 3] == p and board[i + 6] == p)): + # horizontals and verticals and cross + if ((board[i * 3] == p and board[i * 3 + 1] == p and board[i * 3 + 2] == p) + or (board[i + 0] == p and board[i + 3] == p and board[i + 6] == p) + or (board[0] == p and board[4] == p and board[8] == p) + or (board[2] == p and board[4] == p and board[6] == p)): reward = p done = True break From a79bcf3241a58b442c4704e9049f308aa2ff643e Mon Sep 17 00:00:00 2001 From: tsu-nera Date: Tue, 13 Jun 2017 19:57:32 +0900 Subject: [PATCH 2/2] checking the diagonals outside the i loop --- gym_tic_tac_toe/envs/tic_tac_toe_env.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/gym_tic_tac_toe/envs/tic_tac_toe_env.py b/gym_tic_tac_toe/envs/tic_tac_toe_env.py index 8ebaabf..817e30c 100644 --- a/gym_tic_tac_toe/envs/tic_tac_toe_env.py +++ b/gym_tic_tac_toe/envs/tic_tac_toe_env.py @@ -33,14 +33,17 @@ class TicTacToeEnv(gym.Env): # check game over for i in range(3): - # horizontals and verticals and cross + # horizontals and verticals if ((board[i * 3] == p and board[i * 3 + 1] == p and board[i * 3 + 2] == p) - or (board[i + 0] == p and board[i + 3] == p and board[i + 6] == p) - or (board[0] == p and board[4] == p and board[8] == p) - or (board[2] == p and board[4] == p and board[6] == p)): + or (board[i + 0] == p and board[i + 3] == p and board[i + 6] == p)): reward = p done = True break + # diagonals + if((board[0] == p and board[4] == p and board[8] == p) + or (board[2] == p and board[4] == p and board[6] == p)): + reward = p + done = True return self.state, reward, done, {} def _reset(self):