2020-08-28 16:00:47 +02:00
|
|
|
import gym
|
|
|
|
|
|
|
|
|
2021-03-26 14:05:16 +01:00
|
|
|
def example_mujoco():
|
2021-01-21 09:42:04 +01:00
|
|
|
env = gym.make('alr_envs:ALRReacher-v0')
|
2021-03-26 14:05:16 +01:00
|
|
|
rewards = 0
|
|
|
|
obs = env.reset()
|
2020-08-28 16:00:47 +02:00
|
|
|
|
2021-03-26 14:05:16 +01:00
|
|
|
# number of environment steps
|
2020-08-28 16:00:47 +02:00
|
|
|
for i in range(10000):
|
2021-03-26 14:05:16 +01:00
|
|
|
obs, reward, done, info = env.step(env.action_space.sample())
|
|
|
|
rewards += reward
|
|
|
|
|
2020-09-19 17:47:20 +02:00
|
|
|
if i % 1 == 0:
|
2020-08-28 18:31:06 +02:00
|
|
|
env.render()
|
|
|
|
|
2021-03-26 14:05:16 +01:00
|
|
|
if done:
|
|
|
|
print(rewards)
|
|
|
|
rewards = 0
|
|
|
|
obs = env.reset()
|
|
|
|
|
|
|
|
|
|
|
|
def example_dmp():
|
|
|
|
# env = gym.make("alr_envs:ViaPointReacherDMP-v0")
|
|
|
|
env = gym.make("alr_envs:HoleReacherDMP-v0")
|
|
|
|
rewards = 0
|
|
|
|
# env.render(mode=None)
|
|
|
|
obs = env.reset()
|
|
|
|
|
|
|
|
# number of samples/full trajectories (multiple environment steps)
|
|
|
|
for i in range(10):
|
|
|
|
obs, reward, done, info = env.step(env.action_space.sample())
|
|
|
|
rewards += reward
|
2020-09-19 17:47:20 +02:00
|
|
|
|
2021-03-26 14:05:16 +01:00
|
|
|
if i % 1 == 0:
|
|
|
|
# render full DMP trajectory
|
|
|
|
# render can only be called once in the beginning as well. That would render every trajectory
|
|
|
|
# Calling it after every trajectory allows to modify the mode. mode=None, disables rendering.
|
|
|
|
env.render(mode="partial")
|
|
|
|
|
|
|
|
if done:
|
|
|
|
print(rewards)
|
|
|
|
rewards = 0
|
|
|
|
obs = env.reset()
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
example_dmp()
|