2017-03-31 02:38:15 +04:00
|
|
|
import gym
|
|
|
|
from gym import error, spaces, utils
|
|
|
|
from gym.utils import seeding
|
2017-03-31 03:14:26 +04:00
|
|
|
import numpy as np
|
2017-03-31 02:38:15 +04:00
|
|
|
|
|
|
|
class RandomWalkEnv(gym.Env):
|
|
|
|
metadata = {'render.modes': ['human']}
|
|
|
|
|
|
|
|
def __init__(self):
|
2017-03-31 03:14:26 +04:00
|
|
|
self.action_space = spaces.Discrete(2)
|
2017-04-01 12:53:41 +04:00
|
|
|
self.size = 6
|
2017-04-01 03:54:06 +04:00
|
|
|
#print("init")
|
2017-03-31 02:38:15 +04:00
|
|
|
def _step(self, action):
|
2017-04-01 03:54:06 +04:00
|
|
|
#print("step")
|
2017-03-31 03:14:26 +04:00
|
|
|
reward = 0
|
|
|
|
done = False
|
2017-04-01 03:54:06 +04:00
|
|
|
if (action == 0):
|
|
|
|
self.state -= 1
|
|
|
|
if (action == 1):
|
|
|
|
self.state += 1
|
2017-04-01 12:53:41 +04:00
|
|
|
if (self.state >= self.size):
|
2017-04-01 03:54:06 +04:00
|
|
|
reward = 1
|
|
|
|
done = True
|
|
|
|
if (self.state <= 0):
|
|
|
|
done = True
|
2017-03-31 03:14:26 +04:00
|
|
|
return np.array(self.state), reward, done, {}
|
2017-03-31 02:38:15 +04:00
|
|
|
def _reset(self):
|
2017-04-01 03:54:06 +04:00
|
|
|
#print("reset")
|
2017-04-01 12:53:41 +04:00
|
|
|
print("#self.size:",self.size)
|
|
|
|
self.state = np.random.randint(1,self.size-1)
|
|
|
|
print("starting: ", self.state)
|
2017-03-31 02:38:15 +04:00
|
|
|
def _render(self, mode='human', close=False):
|
2017-04-01 12:53:41 +04:00
|
|
|
if close:
|
|
|
|
return
|
2017-04-01 03:54:06 +04:00
|
|
|
#print("render")
|
2017-04-01 12:53:41 +04:00
|
|
|
print("current state: ",self.state)
|