gym-tic-tac-toe/gym_random_walk/envs/random_walk_env.py

import gym
from gym import error, spaces, utils
from gym.utils import seeding
import numpy as np

class RandomWalkEnv(gym.Env):
  metadata = {'render.modes': ['human']}

  def __init__(self):
    self.action_space = spaces.Discrete(2)
    self.size = 6
    #print("init")
  def _step(self, action):
    #print("step")
    reward = 0
    done = False
    if (action == 0):
       self.state -= 1
    if (action == 1):
        self.state += 1
    if (self.state >= self.size):
        reward = 1
        done = True
    if (self.state <= 0):
        done = True
    return np.array(self.state), reward, done, {}
  def _reset(self):
    #print("reset")
    print("#self.size:",self.size)
    self.state =  np.random.randint(1,self.size-1)
    print("starting: ", self.state)
  def _render(self, mode='human', close=False):
    if close:
        return
    #print("render")
    print("current state: ",self.state)
add initial Python files 2017-03-31 02:38:15 +04:00			`import gym`
			`from gym import error, spaces, utils`
			`from gym.utils import seeding`
rename to the proper form 2017-03-31 03:14:26 +04:00			`import numpy as np`
add initial Python files 2017-03-31 02:38:15 +04:00
			`class RandomWalkEnv(gym.Env):`
			`metadata = {'render.modes': ['human']}`

			`def __init__(self):`
rename to the proper form 2017-03-31 03:14:26 +04:00			`self.action_space = spaces.Discrete(2)`
Random starting state 2017-04-01 12:53:41 +04:00			`self.size = 6`
Clean up; do the terminal states 2017-04-01 03:54:06 +04:00			`#print("init")`
add initial Python files 2017-03-31 02:38:15 +04:00			`def _step(self, action):`
Clean up; do the terminal states 2017-04-01 03:54:06 +04:00			`#print("step")`
rename to the proper form 2017-03-31 03:14:26 +04:00			`reward = 0`
			`done = False`
Clean up; do the terminal states 2017-04-01 03:54:06 +04:00			`if (action == 0):`
			`self.state -= 1`
			`if (action == 1):`
			`self.state += 1`
Random starting state 2017-04-01 12:53:41 +04:00			`if (self.state >= self.size):`
Clean up; do the terminal states 2017-04-01 03:54:06 +04:00			`reward = 1`
			`done = True`
			`if (self.state <= 0):`
			`done = True`
rename to the proper form 2017-03-31 03:14:26 +04:00			`return np.array(self.state), reward, done, {}`
add initial Python files 2017-03-31 02:38:15 +04:00			`def _reset(self):`
Clean up; do the terminal states 2017-04-01 03:54:06 +04:00			`#print("reset")`
Random starting state 2017-04-01 12:53:41 +04:00			`print("#self.size:",self.size)`
			`self.state = np.random.randint(1,self.size-1)`
			`print("starting: ", self.state)`
add initial Python files 2017-03-31 02:38:15 +04:00			`def _render(self, mode='human', close=False):`
Random starting state 2017-04-01 12:53:41 +04:00			`if close:`
			`return`
Clean up; do the terminal states 2017-04-01 03:54:06 +04:00			`#print("render")`
Random starting state 2017-04-01 12:53:41 +04:00			`print("current state: ",self.state)`