some new stuff

This commit is contained in:
ottofabian 2020-09-19 17:47:20 +02:00
parent cbdd5d1854
commit 8fc1210f1e
3 changed files with 64 additions and 8 deletions

View File

@ -3,7 +3,58 @@ from gym.envs.registration import register
register(
id='ALRReacher-v0',
entry_point='alr_envs.mujoco:ALRReacherEnv',
max_episode_steps=1000,
max_episode_steps=200,
kwargs={
"steps_before_reward": 0,
}
)
register(
id='ALRReacher100-v0',
entry_point='alr_envs.mujoco:ALRReacherEnv',
max_episode_steps=200,
kwargs={
"steps_before_reward": 100,
}
)
register(
id='ALRReacher180-v0',
entry_point='alr_envs.mujoco:ALRReacherEnv',
max_episode_steps=200,
kwargs={
"steps_before_reward": 180,
}
)
register(
id='ALRReacher7-v0',
entry_point='alr_envs.mujoco:ALRReacherEnv',
max_episode_steps=200,
kwargs={
"steps_before_reward": 0,
"n_links": 7,
}
)
register(
id='ALRReacher100_7-v0',
entry_point='alr_envs.mujoco:ALRReacherEnv',
max_episode_steps=200,
kwargs={
"steps_before_reward": 100,
"n_links": 7,
}
)
register(
id='ALRReacher180_7-v0',
entry_point='alr_envs.mujoco:ALRReacherEnv',
max_episode_steps=200,
kwargs={
"steps_before_reward": 180,
"n_links": 7,
}
)
register(

View File

@ -31,7 +31,7 @@ class SimpleReacherEnv(gym.Env):
self._angle_velocity = None
self.max_torque = 1 # 10
self.steps_before_reward = 100
self.steps_before_reward = 180
action_bound = np.ones((self.n_links,))
state_bound = np.hstack([
@ -69,7 +69,7 @@ class SimpleReacherEnv(gym.Env):
def _add_action_noise(self, action: np.ndarray):
"""
add unobserved Gaussian Noise N(0,0.5) to the actions
add unobserved Gaussian Noise N(0,0.01) to the actions
Args:
action:

View File

@ -1,15 +1,20 @@
import time
import gym
if __name__ == '__main__':
# env = gym.make('alr_envs:ALRReacher-v0')
env = gym.make('alr_envs:SimpleReacher-v0')
env = gym.make('alr_envs:ALRReacher-v0')
# env = gym.make('alr_envs:SimpleReacher-v0')
# env = gym.make('alr_envs:ALRReacher7-v0')
state = env.reset()
for i in range(10000):
state, reward, done, info = env.step(env.action_space.sample())
if i % 5 == 0:
if i % 1 == 0:
env.render()
if done:
state = env.reset()
# if done:
state = env.reset()
time.sleep(0.5)