gym-tic-tac-toe/gym_random_walk/envs/random_walk_env.py

32 lines
768 B
Python
Raw Normal View History

2017-03-31 02:38:15 +04:00
import gym
from gym import error, spaces, utils
from gym.utils import seeding
2017-03-31 03:14:26 +04:00
import numpy as np
2017-03-31 02:38:15 +04:00
class RandomWalkEnv(gym.Env):
metadata = {'render.modes': ['human']}
def __init__(self):
2017-03-31 03:14:26 +04:00
self.action_space = spaces.Discrete(2)
2017-04-01 03:54:06 +04:00
#print("init")
2017-03-31 02:38:15 +04:00
def _step(self, action):
2017-04-01 03:54:06 +04:00
#print("step")
2017-03-31 03:14:26 +04:00
reward = 0
done = False
2017-04-01 03:54:06 +04:00
if (action == 0):
self.state -= 1
if (action == 1):
self.state += 1
if (self.state >= 6):
reward = 1
done = True
if (self.state <= 0):
done = True
2017-03-31 03:14:26 +04:00
return np.array(self.state), reward, done, {}
2017-03-31 02:38:15 +04:00
def _reset(self):
2017-04-01 03:54:06 +04:00
#print("reset")
self.state = 1 # TODO start in a random position
2017-03-31 02:38:15 +04:00
def _render(self, mode='human', close=False):
2017-04-01 03:54:06 +04:00
#print("render")
print(self.state)