Merge remote-tracking branch 'upstream/dmc_integration' into dmc_integration
# Conflicts: # alr_envs/__init__.py
This commit is contained in:
commit
37c4ab8a41
@ -4,9 +4,8 @@ from gym.envs.registration import register
|
|||||||
from alr_envs.classic_control.hole_reacher.hole_reacher_mp_wrapper import HoleReacherMPWrapper
|
from alr_envs.classic_control.hole_reacher.hole_reacher_mp_wrapper import HoleReacherMPWrapper
|
||||||
from alr_envs.classic_control.simple_reacher.simple_reacher_mp_wrapper import SimpleReacherMPWrapper
|
from alr_envs.classic_control.simple_reacher.simple_reacher_mp_wrapper import SimpleReacherMPWrapper
|
||||||
from alr_envs.classic_control.viapoint_reacher.viapoint_reacher_mp_wrapper import ViaPointReacherMPWrapper
|
from alr_envs.classic_control.viapoint_reacher.viapoint_reacher_mp_wrapper import ViaPointReacherMPWrapper
|
||||||
from alr_envs.dmc.Ball_in_the_cup_mp_wrapper import DMCBallInCupMPWrapper
|
from alr_envs.dmc.ball_in_cup.ball_in_the_cup_mp_wrapper import DMCBallInCupMPWrapper
|
||||||
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_mp_wrapper import BallInACupMPWrapper
|
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_mp_wrapper import BallInACupMPWrapper
|
||||||
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_positional_wrapper import BallInACupPositionalWrapper
|
|
||||||
from alr_envs.open_ai import reacher_v2, continuous_mountain_car, fetch
|
from alr_envs.open_ai import reacher_v2, continuous_mountain_car, fetch
|
||||||
from alr_envs.stochastic_search.functions.f_rosenbrock import Rosenbrock
|
from alr_envs.stochastic_search.functions.f_rosenbrock import Rosenbrock
|
||||||
|
|
||||||
@ -205,7 +204,7 @@ register(
|
|||||||
"hole_width": None,
|
"hole_width": None,
|
||||||
"hole_depth": 1,
|
"hole_depth": 1,
|
||||||
"hole_x": None,
|
"hole_x": None,
|
||||||
"collision_penalty": 1000,
|
"collision_penalty": 100,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -237,7 +236,7 @@ register(
|
|||||||
"hole_width": 0.25,
|
"hole_width": 0.25,
|
||||||
"hole_depth": 1,
|
"hole_depth": 1,
|
||||||
"hole_x": 2,
|
"hole_x": 2,
|
||||||
"collision_penalty": 1000,
|
"collision_penalty": 100,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -355,7 +354,7 @@ register(
|
|||||||
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
|
||||||
kwargs={
|
kwargs={
|
||||||
"name": "alr_envs:ALRBallInACupSimple-v0",
|
"name": "alr_envs:ALRBallInACupSimple-v0",
|
||||||
"wrappers": [BallInACupMPWrapper, BallInACupPositionalWrapper],
|
"wrappers": [BallInACupMPWrapper],
|
||||||
"mp_kwargs": {
|
"mp_kwargs": {
|
||||||
"num_dof": 3,
|
"num_dof": 3,
|
||||||
"num_basis": 5,
|
"num_basis": 5,
|
||||||
@ -380,7 +379,7 @@ register(
|
|||||||
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
|
||||||
kwargs={
|
kwargs={
|
||||||
"name": "alr_envs:ALRBallInACup-v0",
|
"name": "alr_envs:ALRBallInACup-v0",
|
||||||
"wrappers": [BallInACupMPWrapper, BallInACupPositionalWrapper],
|
"wrappers": [BallInACupMPWrapper],
|
||||||
"mp_kwargs": {
|
"mp_kwargs": {
|
||||||
"num_dof": 7,
|
"num_dof": 7,
|
||||||
"num_basis": 5,
|
"num_basis": 5,
|
||||||
@ -405,7 +404,7 @@ register(
|
|||||||
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
|
||||||
kwargs={
|
kwargs={
|
||||||
"name": "alr_envs:ALRBallInACupSimple-v0",
|
"name": "alr_envs:ALRBallInACupSimple-v0",
|
||||||
"wrappers": [BallInACupMPWrapper, BallInACupPositionalWrapper],
|
"wrappers": [BallInACupMPWrapper],
|
||||||
"mp_kwargs": {
|
"mp_kwargs": {
|
||||||
"num_dof": 3,
|
"num_dof": 3,
|
||||||
"num_basis": 5,
|
"num_basis": 5,
|
||||||
@ -430,7 +429,7 @@ register(
|
|||||||
entry_point='alr_envs.mujoco.ball_in_a_cup.biac_pd:make_detpmp_env_helper',
|
entry_point='alr_envs.mujoco.ball_in_a_cup.biac_pd:make_detpmp_env_helper',
|
||||||
kwargs={
|
kwargs={
|
||||||
"name": "alr_envs:ALRBallInACupPDSimple-v0",
|
"name": "alr_envs:ALRBallInACupPDSimple-v0",
|
||||||
"wrappers": [BallInACupMPWrapper, BallInACupPositionalWrapper],
|
"wrappers": [BallInACupMPWrapper],
|
||||||
"mp_kwargs": {
|
"mp_kwargs": {
|
||||||
"num_dof": 3,
|
"num_dof": 3,
|
||||||
"num_basis": 5,
|
"num_basis": 5,
|
||||||
@ -475,7 +474,7 @@ register(
|
|||||||
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
|
||||||
kwargs={
|
kwargs={
|
||||||
"name": "alr_envs:ALRBallInACupSimple-v0",
|
"name": "alr_envs:ALRBallInACupSimple-v0",
|
||||||
"wrappers": [BallInACupMPWrapper, BallInACupPositionalWrapper],
|
"wrappers": [BallInACupMPWrapper],
|
||||||
"mp_kwargs": {
|
"mp_kwargs": {
|
||||||
"num_dof": 7,
|
"num_dof": 7,
|
||||||
"num_basis": 5,
|
"num_basis": 5,
|
||||||
@ -487,7 +486,6 @@ register(
|
|||||||
"zero_start": True,
|
"zero_start": True,
|
||||||
"zero_goal": True,
|
"zero_goal": True,
|
||||||
"policy_kwargs": {
|
"policy_kwargs": {
|
||||||
|
|
||||||
"p_gains": np.array([4. / 3., 2.4, 2.5, 5. / 3., 2., 2., 1.25]),
|
"p_gains": np.array([4. / 3., 2.4, 2.5, 5. / 3., 2., 2., 1.25]),
|
||||||
"d_gains": np.array([0.0466, 0.12, 0.125, 0.04166, 0.06, 0.06, 0.025])
|
"d_gains": np.array([0.0466, 0.12, 0.125, 0.04166, 0.06, 0.06, 0.025])
|
||||||
}
|
}
|
||||||
@ -500,7 +498,7 @@ register(
|
|||||||
entry_point='alr_envs.utils.make_env_helpers:make_contextual_env',
|
entry_point='alr_envs.utils.make_env_helpers:make_contextual_env',
|
||||||
kwargs={
|
kwargs={
|
||||||
"name": "alr_envs:ALRBallInACupGoal-v0",
|
"name": "alr_envs:ALRBallInACupGoal-v0",
|
||||||
"wrappers": [BallInACupMPWrapper, BallInACupPositionalWrapper],
|
"wrappers": [BallInACupMPWrapper],
|
||||||
"mp_kwargs": {
|
"mp_kwargs": {
|
||||||
"num_dof": 7,
|
"num_dof": 7,
|
||||||
"num_basis": 5,
|
"num_basis": 5,
|
||||||
@ -523,7 +521,7 @@ register(
|
|||||||
## DMC
|
## DMC
|
||||||
|
|
||||||
register(
|
register(
|
||||||
id=f'dmc_ball_in_cup_dmp-v0',
|
id=f'dmc_ball_in_cup-catch_dmp-v0',
|
||||||
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
|
||||||
# max_episode_steps=1,
|
# max_episode_steps=1,
|
||||||
kwargs={
|
kwargs={
|
||||||
@ -532,19 +530,23 @@ register(
|
|||||||
"mp_kwargs": {
|
"mp_kwargs": {
|
||||||
"num_dof": 2,
|
"num_dof": 2,
|
||||||
"num_basis": 5,
|
"num_basis": 5,
|
||||||
"duration": 2,
|
"duration": 20,
|
||||||
"learn_goal": True,
|
"learn_goal": True,
|
||||||
"alpha_phase": 2,
|
"alpha_phase": 2,
|
||||||
"bandwidth_factor": 2,
|
"bandwidth_factor": 2,
|
||||||
"policy_type": "velocity",
|
"policy_type": "motor",
|
||||||
"weights_scale": 50,
|
"weights_scale": 50,
|
||||||
"goal_scale": 0.1
|
"goal_scale": 0.1,
|
||||||
|
"policy_kwargs": {
|
||||||
|
"p_gains": 50,
|
||||||
|
"d_gains": 1
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
register(
|
register(
|
||||||
id=f'dmc_ball_in_cup_detpmp-v0',
|
id=f'dmc_ball_in_cup-catch_detpmp-v0',
|
||||||
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
|
||||||
kwargs={
|
kwargs={
|
||||||
"name": f"ball_in_cup-catch",
|
"name": f"ball_in_cup-catch",
|
||||||
@ -552,11 +554,15 @@ register(
|
|||||||
"mp_kwargs": {
|
"mp_kwargs": {
|
||||||
"num_dof": 2,
|
"num_dof": 2,
|
||||||
"num_basis": 5,
|
"num_basis": 5,
|
||||||
"duration": 2,
|
"duration": 20,
|
||||||
"width": 0.025,
|
"width": 0.025,
|
||||||
"policy_type": "velocity",
|
"policy_type": "motor",
|
||||||
"weights_scale": 0.2,
|
"weights_scale": 0.2,
|
||||||
"zero_start": True
|
"zero_start": True,
|
||||||
|
"policy_kwargs": {
|
||||||
|
"p_gains": 50,
|
||||||
|
"d_gains": 1
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
@ -69,9 +69,17 @@ class HoleReacherEnv(gym.Env):
|
|||||||
def dt(self) -> Union[float, int]:
|
def dt(self) -> Union[float, int]:
|
||||||
return self._dt
|
return self._dt
|
||||||
|
|
||||||
|
# @property
|
||||||
|
# def start_pos(self):
|
||||||
|
# return self._start_pos
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def start_pos(self):
|
def current_pos(self):
|
||||||
return self._start_pos
|
return self._joint_angles.copy()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def current_vel(self):
|
||||||
|
return self._angle_velocity.copy()
|
||||||
|
|
||||||
def step(self, action: np.ndarray):
|
def step(self, action: np.ndarray):
|
||||||
"""
|
"""
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
from typing import Union
|
from typing import Tuple, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
|
from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
|
||||||
|
|
||||||
|
|
||||||
class HoleReacherMPWrapper(MPEnvWrapper):
|
class HoleReacherMPWrapper(MPEnvWrapper):
|
||||||
@ -19,11 +19,15 @@ class HoleReacherMPWrapper(MPEnvWrapper):
|
|||||||
])
|
])
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def start_pos(self) -> Union[float, int, np.ndarray]:
|
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
return self.env.start_pos
|
return self.env.current_pos
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def goal_pos(self) -> Union[float, int, np.ndarray]:
|
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
|
return self.env.current_vel
|
||||||
|
|
||||||
|
@property
|
||||||
|
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
|
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
@ -59,9 +59,17 @@ class SimpleReacherEnv(gym.Env):
|
|||||||
def dt(self) -> Union[float, int]:
|
def dt(self) -> Union[float, int]:
|
||||||
return self._dt
|
return self._dt
|
||||||
|
|
||||||
|
# @property
|
||||||
|
# def start_pos(self):
|
||||||
|
# return self._start_pos
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def start_pos(self):
|
def current_pos(self):
|
||||||
return self._start_pos
|
return self._joint_angles
|
||||||
|
|
||||||
|
@property
|
||||||
|
def current_vel(self):
|
||||||
|
return self._angle_velocity
|
||||||
|
|
||||||
def step(self, action: np.ndarray):
|
def step(self, action: np.ndarray):
|
||||||
"""
|
"""
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
from typing import Union
|
from typing import Tuple, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
|
from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
|
||||||
|
|
||||||
|
|
||||||
class SimpleReacherMPWrapper(MPEnvWrapper):
|
class SimpleReacherMPWrapper(MPEnvWrapper):
|
||||||
@ -17,11 +17,15 @@ class SimpleReacherMPWrapper(MPEnvWrapper):
|
|||||||
])
|
])
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def start_pos(self):
|
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
return self.env.start_pos
|
return self.env.current_pos
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def goal_pos(self):
|
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
|
return self.env.current_vel
|
||||||
|
|
||||||
|
@property
|
||||||
|
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
|
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
@ -63,9 +63,17 @@ class ViaPointReacher(gym.Env):
|
|||||||
def dt(self):
|
def dt(self):
|
||||||
return self._dt
|
return self._dt
|
||||||
|
|
||||||
|
# @property
|
||||||
|
# def start_pos(self):
|
||||||
|
# return self._start_pos
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def start_pos(self):
|
def current_pos(self):
|
||||||
return self._start_pos
|
return self._joint_angles.copy()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def current_vel(self):
|
||||||
|
return self._angle_velocity.copy()
|
||||||
|
|
||||||
def step(self, action: np.ndarray):
|
def step(self, action: np.ndarray):
|
||||||
"""
|
"""
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
from typing import Union
|
from typing import Tuple, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
|
from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
|
||||||
|
|
||||||
|
|
||||||
class ViaPointReacherMPWrapper(MPEnvWrapper):
|
class ViaPointReacherMPWrapper(MPEnvWrapper):
|
||||||
@ -18,11 +18,15 @@ class ViaPointReacherMPWrapper(MPEnvWrapper):
|
|||||||
])
|
])
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def start_pos(self) -> Union[float, int, np.ndarray]:
|
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
return self.env.start_pos
|
return self.env.current_pos
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def goal_pos(self) -> Union[float, int, np.ndarray]:
|
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
|
return self.env.current_vel
|
||||||
|
|
||||||
|
@property
|
||||||
|
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
|
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
@ -1,27 +0,0 @@
|
|||||||
from typing import Union
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
|
|
||||||
|
|
||||||
|
|
||||||
class DMCBallInCupMPWrapper(MPEnvWrapper):
|
|
||||||
|
|
||||||
@property
|
|
||||||
def active_obs(self):
|
|
||||||
# Besides the ball position, the environment is always set to 0.
|
|
||||||
return np.hstack([
|
|
||||||
[False] * 2, # cup position
|
|
||||||
[True] * 2, # ball position
|
|
||||||
[False] * 2, # cup velocity
|
|
||||||
[False] * 2, # ball velocity
|
|
||||||
])
|
|
||||||
|
|
||||||
@property
|
|
||||||
def start_pos(self) -> Union[float, int, np.ndarray]:
|
|
||||||
return np.hstack([self.physics.named.data.qpos['cup_x'], self.physics.named.data.qpos['cup_z']])
|
|
||||||
|
|
||||||
@property
|
|
||||||
def dt(self) -> Union[float, int]:
|
|
||||||
# Taken from: https://github.com/deepmind/dm_control/blob/master/dm_control/suite/ball_in_cup.py#L27
|
|
||||||
return 0.02
|
|
0
alr_envs/dmc/ball_in_cup/__init__.py
Normal file
0
alr_envs/dmc/ball_in_cup/__init__.py
Normal file
34
alr_envs/dmc/ball_in_cup/ball_in_the_cup_mp_wrapper.py
Normal file
34
alr_envs/dmc/ball_in_cup/ball_in_the_cup_mp_wrapper.py
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
from typing import Tuple, Union
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
|
||||||
|
|
||||||
|
|
||||||
|
class DMCBallInCupMPWrapper(MPEnvWrapper):
|
||||||
|
|
||||||
|
@property
|
||||||
|
def active_obs(self):
|
||||||
|
# Besides the ball position, the environment is always set to 0.
|
||||||
|
return np.hstack([
|
||||||
|
[False] * 2, # cup position
|
||||||
|
[True] * 2, # ball position
|
||||||
|
[False] * 2, # cup velocity
|
||||||
|
[False] * 2, # ball velocity
|
||||||
|
])
|
||||||
|
|
||||||
|
@property
|
||||||
|
def current_pos(self) -> Union[float, int, np.ndarray]:
|
||||||
|
return np.hstack([self.physics.named.data.qpos['cup_x'], self.physics.named.data.qpos['cup_z']])
|
||||||
|
|
||||||
|
@property
|
||||||
|
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
|
return np.hstack([self.physics.named.data.qvel['cup_x'], self.physics.named.data.qvel['cup_z']])
|
||||||
|
|
||||||
|
@property
|
||||||
|
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
|
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def dt(self) -> Union[float, int]:
|
||||||
|
return self.env.dt
|
@ -1,9 +1,23 @@
|
|||||||
from alr_envs.dmc.Ball_in_the_cup_mp_wrapper import DMCBallInCupMPWrapper
|
from alr_envs.dmc.ball_in_cup.ball_in_the_cup_mp_wrapper import DMCBallInCupMPWrapper
|
||||||
from alr_envs.utils.make_env_helpers import make_dmp_env, make_env
|
from alr_envs.utils.make_env_helpers import make_dmp_env, make_env
|
||||||
|
|
||||||
|
|
||||||
def example_dmc(env_name="fish-swim", seed=1, iterations=1000):
|
def example_dmc(env_id="fish-swim", seed=1, iterations=1000, render=True):
|
||||||
env = make_env(env_name, seed)
|
"""
|
||||||
|
Example for running a DMC based env in the step based setting.
|
||||||
|
The env_id has to be specified as `domain_name-task_name` or
|
||||||
|
for manipulation tasks as `manipulation-environment_name`
|
||||||
|
|
||||||
|
Args:
|
||||||
|
env_id: Either `domain_name-task_name` or `manipulation-environment_name`
|
||||||
|
seed: seed for deterministic behaviour
|
||||||
|
iterations: Number of rollout steps to run
|
||||||
|
render: Render the episode
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
env = make_env(env_id, seed)
|
||||||
rewards = 0
|
rewards = 0
|
||||||
obs = env.reset()
|
obs = env.reset()
|
||||||
print("observation shape:", env.observation_space.shape)
|
print("observation shape:", env.observation_space.shape)
|
||||||
@ -15,56 +29,72 @@ def example_dmc(env_name="fish-swim", seed=1, iterations=1000):
|
|||||||
obs, reward, done, info = env.step(ac)
|
obs, reward, done, info = env.step(ac)
|
||||||
rewards += reward
|
rewards += reward
|
||||||
|
|
||||||
env.render("human")
|
if render:
|
||||||
|
env.render("human")
|
||||||
|
|
||||||
if done:
|
if done:
|
||||||
print(env_name, rewards)
|
print(env_id, rewards)
|
||||||
rewards = 0
|
rewards = 0
|
||||||
obs = env.reset()
|
obs = env.reset()
|
||||||
|
|
||||||
env.close()
|
env.close()
|
||||||
|
|
||||||
|
|
||||||
def example_custom_dmc_and_mp(seed=1):
|
def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
|
||||||
"""
|
"""
|
||||||
Example for running a custom motion primitive based environments based off of a dmc task.
|
Example for running a custom motion primitive based environments.
|
||||||
Our already registered environments follow the same structure, but do not directly allow for modifications.
|
Our already registered environments follow the same structure.
|
||||||
Hence, this also allows to adjust hyperparameters of the motion primitives more easily.
|
Hence, this also allows to adjust hyperparameters of the motion primitives.
|
||||||
|
Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks.
|
||||||
We appreciate PRs for custom environments (especially MP wrappers of existing tasks)
|
We appreciate PRs for custom environments (especially MP wrappers of existing tasks)
|
||||||
for our repo: https://github.com/ALRhub/alr_envs/
|
for our repo: https://github.com/ALRhub/alr_envs/
|
||||||
Args:
|
Args:
|
||||||
seed: seed
|
seed: seed for deterministic behaviour
|
||||||
|
iterations: Number of rollout steps to run
|
||||||
|
render: Render the episode
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# Base DMC name, according to structure of above example
|
||||||
base_env = "ball_in_cup-catch"
|
base_env = "ball_in_cup-catch"
|
||||||
|
|
||||||
# Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper.
|
# Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper.
|
||||||
# You can also add other gym.Wrappers in case they are needed.
|
# You can also add other gym.Wrappers in case they are needed.
|
||||||
# wrappers = [HoleReacherMPWrapper]
|
|
||||||
wrappers = [DMCBallInCupMPWrapper]
|
wrappers = [DMCBallInCupMPWrapper]
|
||||||
mp_kwargs = {
|
mp_kwargs = {
|
||||||
"num_dof": 2, # env.start_pos
|
"num_dof": 2,
|
||||||
"num_basis": 5,
|
"num_basis": 5,
|
||||||
"duration": 2,
|
"duration": 20,
|
||||||
"learn_goal": True,
|
"learn_goal": True,
|
||||||
"alpha_phase": 2,
|
"alpha_phase": 2,
|
||||||
"bandwidth_factor": 2,
|
"bandwidth_factor": 2,
|
||||||
"policy_type": "velocity",
|
"policy_type": "motor",
|
||||||
"weights_scale": 50,
|
"weights_scale": 50,
|
||||||
"goal_scale": 0.1
|
"goal_scale": 0.1,
|
||||||
|
"policy_kwargs": {
|
||||||
|
"p_gains": 0.2,
|
||||||
|
"d_gains": 0.05
|
||||||
|
}
|
||||||
}
|
}
|
||||||
env = make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)
|
env = make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)
|
||||||
# OR for a deterministic ProMP:
|
# OR for a deterministic ProMP:
|
||||||
# env = make_detpmp_env(base_env, wrappers=wrappers, seed=seed, **mp_args)
|
# env = make_detpmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_args)
|
||||||
|
|
||||||
|
# This renders the full MP trajectory
|
||||||
|
# It is only required to call render() once in the beginning, which renders every consecutive trajectory.
|
||||||
|
# Resetting to no rendering, can be achieved by render(mode=None).
|
||||||
|
# It is also possible to change them mode multiple times when
|
||||||
|
# e.g. only every nth trajectory should be displayed.
|
||||||
|
if render:
|
||||||
|
env.render(mode="human")
|
||||||
|
|
||||||
rewards = 0
|
rewards = 0
|
||||||
obs = env.reset()
|
obs = env.reset()
|
||||||
env.render("human")
|
|
||||||
|
|
||||||
# number of samples/full trajectories (multiple environment steps)
|
# number of samples/full trajectories (multiple environment steps)
|
||||||
for i in range(10):
|
for i in range(iterations):
|
||||||
ac = env.action_space.sample()
|
ac = env.action_space.sample()
|
||||||
obs, reward, done, info = env.step(ac)
|
obs, reward, done, info = env.step(ac)
|
||||||
rewards += reward
|
rewards += reward
|
||||||
@ -85,14 +115,14 @@ if __name__ == '__main__':
|
|||||||
# export MUJOCO_GL="osmesa"
|
# export MUJOCO_GL="osmesa"
|
||||||
|
|
||||||
# Standard DMC Suite tasks
|
# Standard DMC Suite tasks
|
||||||
example_dmc("fish-swim", seed=10, iterations=100)
|
example_dmc("fish-swim", seed=10, iterations=1000, render=True)
|
||||||
|
|
||||||
# Manipulation tasks
|
# Manipulation tasks
|
||||||
# The vision versions are currently not integrated
|
# Disclaimer: The vision versions are currently not integrated and yield an error
|
||||||
example_dmc("manipulation-reach_site_features", seed=10, iterations=100)
|
example_dmc("manipulation-reach_site_features", seed=10, iterations=250, render=True)
|
||||||
|
|
||||||
# Gym + DMC hybrid task provided in the MP framework
|
# Gym + DMC hybrid task provided in the MP framework
|
||||||
example_dmc("dmc_ball_in_cup_dmp-v0", seed=10, iterations=10)
|
example_dmc("dmc_ball_in_cup-catch_detpmp-v0", seed=10, iterations=1, render=True)
|
||||||
|
|
||||||
# Custom DMC task
|
# Custom DMC task
|
||||||
example_custom_dmc_and_mp()
|
example_custom_dmc_and_mp(seed=10, iterations=1, render=True)
|
||||||
|
@ -4,14 +4,23 @@ from collections import defaultdict
|
|||||||
import gym
|
import gym
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from alr_envs.utils.make_env_helpers import make_env
|
from alr_envs.utils.make_env_helpers import make_env, make_env_rank
|
||||||
from alr_envs.utils.mp_env_async_sampler import AlrContextualMpEnvSampler, AlrMpEnvSampler, DummyDist
|
from alr_envs.utils.mp_env_async_sampler import AlrContextualMpEnvSampler, AlrMpEnvSampler, DummyDist
|
||||||
|
|
||||||
|
|
||||||
def example_general(env_id: str, seed=1, iterations=1000):
|
def example_general(env_id="Pendulum-v0", seed=1, iterations=1000, render=True):
|
||||||
"""
|
"""
|
||||||
Example for running any env in the step based setting.
|
Example for running any env in the step based setting.
|
||||||
This also includes DMC environments when leveraging our custom make_env function.
|
This also includes DMC environments when leveraging our custom make_env function.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
env_id: OpenAI/Custom gym task id or either `domain_name-task_name` or `manipulation-environment_name` for DMC tasks
|
||||||
|
seed: seed for deterministic behaviour
|
||||||
|
iterations: Number of rollout steps to run
|
||||||
|
render: Render the episode
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
env = make_env(env_id, seed)
|
env = make_env(env_id, seed)
|
||||||
@ -25,7 +34,7 @@ def example_general(env_id: str, seed=1, iterations=1000):
|
|||||||
obs, reward, done, info = env.step(env.action_space.sample())
|
obs, reward, done, info = env.step(env.action_space.sample())
|
||||||
rewards += reward
|
rewards += reward
|
||||||
|
|
||||||
if i % 1 == 0:
|
if render:
|
||||||
env.render()
|
env.render()
|
||||||
|
|
||||||
if done:
|
if done:
|
||||||
@ -34,36 +43,60 @@ def example_general(env_id: str, seed=1, iterations=1000):
|
|||||||
obs = env.reset()
|
obs = env.reset()
|
||||||
|
|
||||||
|
|
||||||
def example_async(env_id="alr_envs:HoleReacherDMP-v0", n_cpu=4, seed=int('533D', 16)):
|
def example_async(env_id="alr_envs:HoleReacher-v0", n_cpu=4, seed=int('533D', 16), n_samples=800):
|
||||||
def sample(env: gym.vector.VectorEnv, n_samples=100):
|
"""
|
||||||
# for plotting
|
Example for running any env in a vectorized multiprocessing setting to generate more samples faster.
|
||||||
rewards = np.zeros(n_cpu)
|
This also includes DMC and DMP environments when leveraging our custom make_env function.
|
||||||
|
Be aware, increasing the number of environments reduces the total length of the individual episodes.
|
||||||
|
|
||||||
# this would generate more samples than requested if n_samples % num_envs != 0
|
Args:
|
||||||
repeat = int(np.ceil(n_samples / env.num_envs))
|
env_id: OpenAI/Custom gym task id or either `domain_name-task_name` or `manipulation-environment_name` for DMC tasks
|
||||||
vals = defaultdict(list)
|
seed: seed for deterministic behaviour
|
||||||
for i in range(repeat):
|
n_cpu: Number of cpus cores to use in parallel
|
||||||
obs, reward, done, info = envs.step(envs.action_space.sample())
|
n_samples: number of samples generated in total by all environments.
|
||||||
vals['obs'].append(obs)
|
|
||||||
vals['reward'].append(reward)
|
|
||||||
vals['done'].append(done)
|
|
||||||
vals['info'].append(info)
|
|
||||||
rewards += reward
|
|
||||||
if np.any(done):
|
|
||||||
print(rewards[done])
|
|
||||||
rewards[done] = 0
|
|
||||||
|
|
||||||
# do not return values above threshold
|
Returns: Tuple of (obs, reward, done, info) with type np.ndarray
|
||||||
return (*map(lambda v: np.stack(v)[:n_samples], vals.values()),)
|
|
||||||
|
|
||||||
from alr_envs.utils.make_env_helpers import make_env_rank
|
"""
|
||||||
envs = gym.vector.AsyncVectorEnv([make_env_rank(env_id, seed, i) for i in range(n_cpu)])
|
env = gym.vector.AsyncVectorEnv([make_env_rank(env_id, seed, i) for i in range(n_cpu)])
|
||||||
|
# OR
|
||||||
# envs = gym.vector.AsyncVectorEnv([make_env(env_id, seed + i) for i in range(n_cpu)])
|
# envs = gym.vector.AsyncVectorEnv([make_env(env_id, seed + i) for i in range(n_cpu)])
|
||||||
|
|
||||||
obs = envs.reset()
|
# for plotting
|
||||||
print(sample(envs, 16))
|
rewards = np.zeros(n_cpu)
|
||||||
|
buffer = defaultdict(list)
|
||||||
|
|
||||||
|
obs = env.reset()
|
||||||
|
|
||||||
|
# this would generate more samples than requested if n_samples % num_envs != 0
|
||||||
|
repeat = int(np.ceil(n_samples / env.num_envs))
|
||||||
|
for i in range(repeat):
|
||||||
|
obs, reward, done, info = env.step(env.action_space.sample())
|
||||||
|
buffer['obs'].append(obs)
|
||||||
|
buffer['reward'].append(reward)
|
||||||
|
buffer['done'].append(done)
|
||||||
|
buffer['info'].append(info)
|
||||||
|
rewards += reward
|
||||||
|
if np.any(done):
|
||||||
|
print(f"Reward at iteration {i}: {rewards[done]}")
|
||||||
|
rewards[done] = 0
|
||||||
|
|
||||||
|
# do not return values above threshold
|
||||||
|
return *map(lambda v: np.stack(v)[:n_samples], buffer.values()),
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# Mujoco task from framework
|
# Basic gym task
|
||||||
example_general("alr_envs:ALRReacher-v0")
|
example_general("Pendulum-v0", seed=10, iterations=200, render=True)
|
||||||
|
#
|
||||||
|
# # Basis task from framework
|
||||||
|
example_general("alr_envs:HoleReacher-v0", seed=10, iterations=200, render=True)
|
||||||
|
#
|
||||||
|
# # OpenAI Mujoco task
|
||||||
|
example_general("HalfCheetah-v2", seed=10, render=True)
|
||||||
|
#
|
||||||
|
# # Mujoco task from framework
|
||||||
|
example_general("alr_envs:ALRReacher-v0", seed=10, iterations=200, render=True)
|
||||||
|
|
||||||
|
# Vectorized multiprocessing environments
|
||||||
|
example_async(env_id="alr_envs:HoleReacher-v0", n_cpu=2, seed=int('533D', 16), n_samples=2 * 200)
|
||||||
|
@ -2,12 +2,14 @@ from alr_envs import HoleReacherMPWrapper
|
|||||||
from alr_envs.utils.make_env_helpers import make_dmp_env, make_env
|
from alr_envs.utils.make_env_helpers import make_dmp_env, make_env
|
||||||
|
|
||||||
|
|
||||||
def example_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1):
|
def example_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=1, render=True):
|
||||||
"""
|
"""
|
||||||
Example for running a motion primitive based environment, which is already registered
|
Example for running a motion primitive based environment, which is already registered
|
||||||
Args:
|
Args:
|
||||||
env_name: DMP env_id
|
env_name: DMP env_id
|
||||||
seed: seed
|
seed: seed for deterministic behaviour
|
||||||
|
iterations: Number of rollout steps to run
|
||||||
|
render: Render the episode
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
|
|
||||||
@ -16,44 +18,81 @@ def example_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1):
|
|||||||
# First, it already takes care of seeding and second enables the use of DMC tasks within the gym interface.
|
# First, it already takes care of seeding and second enables the use of DMC tasks within the gym interface.
|
||||||
env = make_env(env_name, seed)
|
env = make_env(env_name, seed)
|
||||||
|
|
||||||
# Changing the mp_kwargs is possible by providing them to gym.
|
|
||||||
# E.g. here by providing way to many basis functions
|
|
||||||
# mp_kwargs = {
|
|
||||||
# "num_dof": 5,
|
|
||||||
# "num_basis": 1000,
|
|
||||||
# "duration": 2,
|
|
||||||
# "learn_goal": True,
|
|
||||||
# "alpha_phase": 2,
|
|
||||||
# "bandwidth_factor": 2,
|
|
||||||
# "policy_type": "velocity",
|
|
||||||
# "weights_scale": 50,
|
|
||||||
# "goal_scale": 0.1
|
|
||||||
# }
|
|
||||||
# env = make_env(env_name, seed, mp_kwargs=mp_kwargs)
|
|
||||||
|
|
||||||
rewards = 0
|
rewards = 0
|
||||||
# env.render(mode=None)
|
# env.render(mode=None)
|
||||||
obs = env.reset()
|
obs = env.reset()
|
||||||
|
|
||||||
# number of samples/full trajectories (multiple environment steps)
|
# number of samples/full trajectories (multiple environment steps)
|
||||||
for i in range(10):
|
for i in range(iterations):
|
||||||
|
|
||||||
|
if render and i % 2 == 0:
|
||||||
|
# This renders the full MP trajectory
|
||||||
|
# It is only required to call render() once in the beginning, which renders every consecutive trajectory.
|
||||||
|
# Resetting to no rendering, can be achieved by render(mode=None).
|
||||||
|
# It is also possible to change the mode multiple times when
|
||||||
|
# e.g. only every second trajectory should be displayed, such as here
|
||||||
|
# Just make sure the correct mode is set before executing the step.
|
||||||
|
env.render(mode="human")
|
||||||
|
else:
|
||||||
|
env.render(mode=None)
|
||||||
|
|
||||||
ac = env.action_space.sample()
|
ac = env.action_space.sample()
|
||||||
obs, reward, done, info = env.step(ac)
|
obs, reward, done, info = env.step(ac)
|
||||||
rewards += reward
|
rewards += reward
|
||||||
|
|
||||||
if i % 1 == 0:
|
|
||||||
# render full DMP trajectory
|
|
||||||
# render can only be called once in the beginning as well. That would render every trajectory
|
|
||||||
# Calling it after every trajectory allows to modify the mode. mode=None, disables rendering.
|
|
||||||
env.render(mode="human")
|
|
||||||
|
|
||||||
if done:
|
if done:
|
||||||
print(rewards)
|
print(rewards)
|
||||||
rewards = 0
|
rewards = 0
|
||||||
obs = env.reset()
|
obs = env.reset()
|
||||||
|
|
||||||
|
|
||||||
def example_custom_mp(seed=1):
|
def example_custom_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=1, render=True):
|
||||||
|
"""
|
||||||
|
Example for running a motion primitive based environment, which is already registered
|
||||||
|
Args:
|
||||||
|
env_name: DMP env_id
|
||||||
|
seed: seed for deterministic behaviour
|
||||||
|
iterations: Number of rollout steps to run
|
||||||
|
render: Render the episode
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
# Changing the mp_kwargs is possible by providing them to gym.
|
||||||
|
# E.g. here by providing way to many basis functions
|
||||||
|
mp_kwargs = {
|
||||||
|
"num_dof": 5,
|
||||||
|
"num_basis": 1000,
|
||||||
|
"duration": 2,
|
||||||
|
"learn_goal": True,
|
||||||
|
"alpha_phase": 2,
|
||||||
|
"bandwidth_factor": 2,
|
||||||
|
"policy_type": "velocity",
|
||||||
|
"weights_scale": 50,
|
||||||
|
"goal_scale": 0.1
|
||||||
|
}
|
||||||
|
env = make_env(env_name, seed, mp_kwargs=mp_kwargs)
|
||||||
|
|
||||||
|
# This time rendering every trajectory
|
||||||
|
if render:
|
||||||
|
env.render(mode="human")
|
||||||
|
|
||||||
|
rewards = 0
|
||||||
|
obs = env.reset()
|
||||||
|
|
||||||
|
# number of samples/full trajectories (multiple environment steps)
|
||||||
|
for i in range(iterations):
|
||||||
|
ac = env.action_space.sample()
|
||||||
|
obs, reward, done, info = env.step(ac)
|
||||||
|
rewards += reward
|
||||||
|
|
||||||
|
if done:
|
||||||
|
print(rewards)
|
||||||
|
rewards = 0
|
||||||
|
obs = env.reset()
|
||||||
|
|
||||||
|
|
||||||
|
def example_fully_custom_mp(seed=1, iterations=1, render=True):
|
||||||
"""
|
"""
|
||||||
Example for running a custom motion primitive based environments.
|
Example for running a custom motion primitive based environments.
|
||||||
Our already registered environments follow the same structure.
|
Our already registered environments follow the same structure.
|
||||||
@ -63,12 +102,15 @@ def example_custom_mp(seed=1):
|
|||||||
for our repo: https://github.com/ALRhub/alr_envs/
|
for our repo: https://github.com/ALRhub/alr_envs/
|
||||||
Args:
|
Args:
|
||||||
seed: seed
|
seed: seed
|
||||||
|
iterations: Number of rollout steps to run
|
||||||
|
render: Render the episode
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
base_env = "alr_envs:HoleReacher-v1"
|
base_env = "alr_envs:HoleReacher-v1"
|
||||||
|
|
||||||
# Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper.
|
# Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper.
|
||||||
# You can also add other gym.Wrappers in case they are needed.
|
# You can also add other gym.Wrappers in case they are needed.
|
||||||
wrappers = [HoleReacherMPWrapper]
|
wrappers = [HoleReacherMPWrapper]
|
||||||
@ -85,19 +127,16 @@ def example_custom_mp(seed=1):
|
|||||||
}
|
}
|
||||||
env = make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)
|
env = make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)
|
||||||
# OR for a deterministic ProMP:
|
# OR for a deterministic ProMP:
|
||||||
# env = make_detpmp_env(base_env, wrappers=wrappers, seed=seed)
|
# env = make_detpmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)
|
||||||
|
|
||||||
|
if render:
|
||||||
|
env.render(mode="human")
|
||||||
|
|
||||||
rewards = 0
|
rewards = 0
|
||||||
# render full DMP trajectory
|
|
||||||
# It is only required to call render() once in the beginning, which renders every consecutive trajectory.
|
|
||||||
# Resetting to no rendering, can be achieved by render(mode=None).
|
|
||||||
# It is also possible to change them mode multiple times when
|
|
||||||
# e.g. only every nth trajectory should be displayed.
|
|
||||||
env.render(mode="human")
|
|
||||||
obs = env.reset()
|
obs = env.reset()
|
||||||
|
|
||||||
# number of samples/full trajectories (multiple environment steps)
|
# number of samples/full trajectories (multiple environment steps)
|
||||||
for i in range(10):
|
for i in range(iterations):
|
||||||
ac = env.action_space.sample()
|
ac = env.action_space.sample()
|
||||||
obs, reward, done, info = env.step(ac)
|
obs, reward, done, info = env.step(ac)
|
||||||
rewards += reward
|
rewards += reward
|
||||||
@ -110,10 +149,13 @@ def example_custom_mp(seed=1):
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# DMP
|
# DMP
|
||||||
example_mp("alr_envs:HoleReacherDMP-v1")
|
example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=True)
|
||||||
|
|
||||||
# DetProMP
|
# DetProMP
|
||||||
example_mp("alr_envs:HoleReacherDetPMP-v1")
|
example_mp("alr_envs:HoleReacherDetPMP-v1", seed=10, iterations=1, render=True)
|
||||||
|
|
||||||
# Custom DMP
|
# Altered basis functions
|
||||||
example_custom_mp()
|
example_custom_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=True)
|
||||||
|
|
||||||
|
# Custom MP
|
||||||
|
example_fully_custom_mp(seed=10, iterations=1, render=True)
|
||||||
|
@ -1,270 +0,0 @@
|
|||||||
from collections import OrderedDict
|
|
||||||
import os
|
|
||||||
from abc import abstractmethod
|
|
||||||
|
|
||||||
|
|
||||||
from gym import error, spaces
|
|
||||||
from gym.utils import seeding
|
|
||||||
import numpy as np
|
|
||||||
from os import path
|
|
||||||
|
|
||||||
from alr_envs.utils.mps.alr_env import AlrEnv
|
|
||||||
from alr_envs.utils.positional_env import PositionalEnv
|
|
||||||
|
|
||||||
try:
|
|
||||||
import mujoco_py
|
|
||||||
except ImportError as e:
|
|
||||||
raise error.DependencyNotInstalled("{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(e))
|
|
||||||
|
|
||||||
DEFAULT_SIZE = 500
|
|
||||||
|
|
||||||
|
|
||||||
def convert_observation_to_space(observation):
|
|
||||||
if isinstance(observation, dict):
|
|
||||||
space = spaces.Dict(OrderedDict([
|
|
||||||
(key, convert_observation_to_space(value))
|
|
||||||
for key, value in observation.items()
|
|
||||||
]))
|
|
||||||
elif isinstance(observation, np.ndarray):
|
|
||||||
low = np.full(observation.shape, -float('inf'), dtype=np.float32)
|
|
||||||
high = np.full(observation.shape, float('inf'), dtype=np.float32)
|
|
||||||
space = spaces.Box(low, high, dtype=observation.dtype)
|
|
||||||
else:
|
|
||||||
raise NotImplementedError(type(observation), observation)
|
|
||||||
|
|
||||||
return space
|
|
||||||
|
|
||||||
|
|
||||||
class AlrMujocoEnv(PositionalEnv, AlrEnv):
|
|
||||||
"""
|
|
||||||
Superclass for all MuJoCo environments.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, model_path, n_substeps, apply_gravity_comp=True):
|
|
||||||
"""
|
|
||||||
|
|
||||||
Args:
|
|
||||||
model_path: path to xml file
|
|
||||||
n_substeps: how many steps mujoco does per call to env.step
|
|
||||||
apply_gravity_comp: Whether gravity compensation should be active
|
|
||||||
"""
|
|
||||||
if model_path.startswith("/"):
|
|
||||||
fullpath = model_path
|
|
||||||
else:
|
|
||||||
fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path)
|
|
||||||
if not path.exists(fullpath):
|
|
||||||
raise IOError("File %s does not exist" % fullpath)
|
|
||||||
self.n_substeps = n_substeps
|
|
||||||
self.apply_gravity_comp = apply_gravity_comp
|
|
||||||
self.model = mujoco_py.load_model_from_path(fullpath)
|
|
||||||
self.sim = mujoco_py.MjSim(self.model, nsubsteps=n_substeps)
|
|
||||||
self.data = self.sim.data
|
|
||||||
self.viewer = None
|
|
||||||
self._viewers = {}
|
|
||||||
|
|
||||||
self.metadata = {
|
|
||||||
'render.modes': ['human', 'rgb_array', 'depth_array'],
|
|
||||||
'video.frames_per_second': int(np.round(1.0 / self.dt))
|
|
||||||
}
|
|
||||||
|
|
||||||
self.init_qpos = self.sim.data.qpos.ravel().copy()
|
|
||||||
self.init_qvel = self.sim.data.qvel.ravel().copy()
|
|
||||||
|
|
||||||
self._start_pos = None
|
|
||||||
self._start_vel = None
|
|
||||||
|
|
||||||
self._set_action_space()
|
|
||||||
|
|
||||||
observation = self._get_obs() # TODO: is calling get_obs enough? should we call reset, or even step?
|
|
||||||
|
|
||||||
self._set_observation_space(observation)
|
|
||||||
|
|
||||||
self.seed()
|
|
||||||
|
|
||||||
@property
|
|
||||||
def current_pos(self):
|
|
||||||
"""
|
|
||||||
By default returns the joint positions of all simulated objects. May be overridden in subclass.
|
|
||||||
"""
|
|
||||||
return self.sim.data.qpos
|
|
||||||
|
|
||||||
@property
|
|
||||||
def current_vel(self):
|
|
||||||
"""
|
|
||||||
By default returns the joint velocities of all simulated objects. May be overridden in subclass.
|
|
||||||
"""
|
|
||||||
return self.sim.data.qvel
|
|
||||||
|
|
||||||
@property
|
|
||||||
def start_pos(self):
|
|
||||||
"""
|
|
||||||
Start position of the agent, for example joint angles of a Panda robot. Necessary for MP wrapped simple_reacher.
|
|
||||||
"""
|
|
||||||
return self._start_pos
|
|
||||||
|
|
||||||
@property
|
|
||||||
def start_vel(self):
|
|
||||||
"""
|
|
||||||
Start velocity of the agent. Necessary for MP wrapped simple_reacher.
|
|
||||||
"""
|
|
||||||
return self._start_vel
|
|
||||||
|
|
||||||
def extend_des_pos(self, des_pos):
|
|
||||||
"""
|
|
||||||
In a simplified environment, the actions may only control a subset of all the joints.
|
|
||||||
Extend the trajectory to match the environments full action space
|
|
||||||
Args:
|
|
||||||
des_pos:
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
def extend_des_vel(self, des_vel):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def _set_action_space(self):
|
|
||||||
bounds = self.model.actuator_ctrlrange.copy().astype(np.float32)
|
|
||||||
low, high = bounds.T
|
|
||||||
self.action_space = spaces.Box(low=low, high=high, dtype=np.float32)
|
|
||||||
return self.action_space
|
|
||||||
|
|
||||||
def _set_observation_space(self, observation):
|
|
||||||
self.observation_space = convert_observation_to_space(observation)
|
|
||||||
return self.observation_space
|
|
||||||
|
|
||||||
def seed(self, seed=None):
|
|
||||||
self.np_random, seed = seeding.np_random(seed)
|
|
||||||
return [seed]
|
|
||||||
|
|
||||||
# methods to override:
|
|
||||||
# ----------------------------
|
|
||||||
|
|
||||||
@property
|
|
||||||
@abstractmethod
|
|
||||||
def active_obs(self):
|
|
||||||
"""Returns boolean mask for each observation entry
|
|
||||||
whether the observation is returned for the contextual case or not.
|
|
||||||
This effectively allows to filter unwanted or unnecessary observations from the full step-based case.
|
|
||||||
"""
|
|
||||||
return np.ones(self.observation_space.shape, dtype=bool)
|
|
||||||
|
|
||||||
def _get_obs(self):
|
|
||||||
"""Returns the observation.
|
|
||||||
"""
|
|
||||||
raise NotImplementedError()
|
|
||||||
|
|
||||||
def reset_model(self):
|
|
||||||
"""
|
|
||||||
Reset the robot degrees of freedom (qpos and qvel).
|
|
||||||
Implement this in each subclass.
|
|
||||||
"""
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def viewer_setup(self):
|
|
||||||
"""
|
|
||||||
This method is called when the viewer is initialized.
|
|
||||||
Optionally implement this method, if you need to tinker with camera position
|
|
||||||
and so forth.
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
# -----------------------------
|
|
||||||
|
|
||||||
def reset(self):
|
|
||||||
self.sim.reset()
|
|
||||||
ob = self.reset_model()
|
|
||||||
return ob
|
|
||||||
|
|
||||||
def set_state(self, qpos, qvel):
|
|
||||||
assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
|
|
||||||
old_state = self.sim.get_state()
|
|
||||||
new_state = mujoco_py.MjSimState(old_state.time, qpos, qvel,
|
|
||||||
old_state.act, old_state.udd_state)
|
|
||||||
self.sim.set_state(new_state)
|
|
||||||
self.sim.forward()
|
|
||||||
|
|
||||||
@property
|
|
||||||
def dt(self):
|
|
||||||
return self.model.opt.timestep * self.n_substeps
|
|
||||||
|
|
||||||
def do_simulation(self, ctrl):
|
|
||||||
"""
|
|
||||||
Additionally returns whether there was an error while stepping the simulation
|
|
||||||
"""
|
|
||||||
error_in_sim = False
|
|
||||||
num_actuations = len(ctrl)
|
|
||||||
if self.apply_gravity_comp:
|
|
||||||
self.sim.data.ctrl[:num_actuations] = ctrl + self.sim.data.qfrc_bias[:num_actuations].copy() / self.model.actuator_gear[:, 0]
|
|
||||||
else:
|
|
||||||
self.sim.data.ctrl[:num_actuations] = ctrl
|
|
||||||
|
|
||||||
try:
|
|
||||||
self.sim.step()
|
|
||||||
except mujoco_py.builder.MujocoException:
|
|
||||||
error_in_sim = True
|
|
||||||
|
|
||||||
return error_in_sim
|
|
||||||
|
|
||||||
def render(self,
|
|
||||||
mode='human',
|
|
||||||
width=DEFAULT_SIZE,
|
|
||||||
height=DEFAULT_SIZE,
|
|
||||||
camera_id=None,
|
|
||||||
camera_name=None):
|
|
||||||
if mode == 'rgb_array' or mode == 'depth_array':
|
|
||||||
if camera_id is not None and camera_name is not None:
|
|
||||||
raise ValueError("Both `camera_id` and `camera_name` cannot be"
|
|
||||||
" specified at the same time.")
|
|
||||||
|
|
||||||
no_camera_specified = camera_name is None and camera_id is None
|
|
||||||
if no_camera_specified:
|
|
||||||
camera_name = 'track'
|
|
||||||
|
|
||||||
if camera_id is None and camera_name in self.model._camera_name2id:
|
|
||||||
camera_id = self.model.camera_name2id(camera_name)
|
|
||||||
|
|
||||||
self._get_viewer(mode).render(width, height, camera_id=camera_id)
|
|
||||||
|
|
||||||
if mode == 'rgb_array':
|
|
||||||
# window size used for old mujoco-py:
|
|
||||||
data = self._get_viewer(mode).read_pixels(width, height, depth=False)
|
|
||||||
# original image is upside-down, so flip it
|
|
||||||
return data[::-1, :, :]
|
|
||||||
elif mode == 'depth_array':
|
|
||||||
self._get_viewer(mode).render(width, height)
|
|
||||||
# window size used for old mujoco-py:
|
|
||||||
# Extract depth part of the read_pixels() tuple
|
|
||||||
data = self._get_viewer(mode).read_pixels(width, height, depth=True)[1]
|
|
||||||
# original image is upside-down, so flip it
|
|
||||||
return data[::-1, :]
|
|
||||||
elif mode == 'human':
|
|
||||||
self._get_viewer(mode).render()
|
|
||||||
|
|
||||||
def close(self):
|
|
||||||
if self.viewer is not None:
|
|
||||||
# self.viewer.finish()
|
|
||||||
self.viewer = None
|
|
||||||
self._viewers = {}
|
|
||||||
|
|
||||||
def _get_viewer(self, mode):
|
|
||||||
self.viewer = self._viewers.get(mode)
|
|
||||||
if self.viewer is None:
|
|
||||||
if mode == 'human':
|
|
||||||
self.viewer = mujoco_py.MjViewer(self.sim)
|
|
||||||
elif mode == 'rgb_array' or mode == 'depth_array':
|
|
||||||
self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, -1)
|
|
||||||
|
|
||||||
self.viewer_setup()
|
|
||||||
self._viewers[mode] = self.viewer
|
|
||||||
return self.viewer
|
|
||||||
|
|
||||||
def get_body_com(self, body_name):
|
|
||||||
return self.data.get_body_xpos(body_name)
|
|
||||||
|
|
||||||
def state_vector(self):
|
|
||||||
return np.concatenate([
|
|
||||||
self.sim.data.qpos.flat,
|
|
||||||
self.sim.data.qvel.flat
|
|
||||||
])
|
|
@ -1,10 +1,11 @@
|
|||||||
from gym import utils
|
from gym import utils
|
||||||
import os
|
import os
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from alr_envs.mujoco import alr_mujoco_env
|
from gym.envs.mujoco import MujocoEnv
|
||||||
|
|
||||||
|
|
||||||
class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
|
|
||||||
|
class ALRBallInACupEnv(MujocoEnv, utils.EzPickle):
|
||||||
def __init__(self, n_substeps=4, apply_gravity_comp=True, simplified: bool = False,
|
def __init__(self, n_substeps=4, apply_gravity_comp=True, simplified: bool = False,
|
||||||
reward_type: str = None, context: np.ndarray = None):
|
reward_type: str = None, context: np.ndarray = None):
|
||||||
utils.EzPickle.__init__(**locals())
|
utils.EzPickle.__init__(**locals())
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
from typing import Union
|
from typing import Tuple, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
|
from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
|
||||||
|
|
||||||
|
|
||||||
class BallInACupMPWrapper(MPEnvWrapper):
|
class BallInACupMPWrapper(MPEnvWrapper):
|
||||||
@ -24,6 +24,14 @@ class BallInACupMPWrapper(MPEnvWrapper):
|
|||||||
else:
|
else:
|
||||||
return self._start_pos
|
return self._start_pos
|
||||||
|
|
||||||
|
@property
|
||||||
|
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
|
return self.sim.data.qpos[0:7].copy()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
|
return self.sim.data.qvel[0:7].copy()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def goal_pos(self):
|
def goal_pos(self):
|
||||||
# TODO: @Max I think the default value of returning to the start is reasonable here
|
# TODO: @Max I think the default value of returning to the start is reasonable here
|
||||||
|
@ -1,15 +0,0 @@
|
|||||||
from typing import Tuple, Union
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
from mp_env_api.env_wrappers.positional_env_wrapper import PositionalEnvWrapper
|
|
||||||
|
|
||||||
|
|
||||||
class BallInACupPositionalWrapper(PositionalEnvWrapper):
|
|
||||||
@property
|
|
||||||
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
|
||||||
return self.sim.data.qpos[0:7].copy()
|
|
||||||
|
|
||||||
@property
|
|
||||||
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
|
||||||
return self.sim.data.qvel[0:7].copy()
|
|
@ -1,11 +1,15 @@
|
|||||||
from gym import utils
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from alr_envs.mujoco import alr_mujoco_env
|
from gym import utils
|
||||||
|
from gym.envs.mujoco import MujocoEnv
|
||||||
|
|
||||||
|
|
||||||
class ALRBeerpongEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
|
|
||||||
def __init__(self, n_substeps=4, apply_gravity_comp=True, reward_function=None):
|
class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
|
||||||
|
def __init__(self, model_path, frame_skip, n_substeps=4, apply_gravity_comp=True, reward_function=None):
|
||||||
|
utils.EzPickle.__init__(**locals())
|
||||||
|
MujocoEnv.__init__(self, model_path=model_path, frame_skip=frame_skip)
|
||||||
self._steps = 0
|
self._steps = 0
|
||||||
|
|
||||||
self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
|
self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
|
||||||
@ -25,12 +29,10 @@ class ALRBeerpongEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
|
|||||||
self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
|
self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
|
||||||
|
|
||||||
self.context = None
|
self.context = None
|
||||||
|
# alr_mujoco_env.AlrMujocoEnv.__init__(self,
|
||||||
utils.EzPickle.__init__(self)
|
# self.xml_path,
|
||||||
alr_mujoco_env.AlrMujocoEnv.__init__(self,
|
# apply_gravity_comp=apply_gravity_comp,
|
||||||
self.xml_path,
|
# n_substeps=n_substeps)
|
||||||
apply_gravity_comp=apply_gravity_comp,
|
|
||||||
n_substeps=n_substeps)
|
|
||||||
|
|
||||||
self.sim_time = 8 # seconds
|
self.sim_time = 8 # seconds
|
||||||
self.sim_steps = int(self.sim_time / self.dt)
|
self.sim_steps = int(self.sim_time / self.dt)
|
||||||
|
@ -1,11 +1,13 @@
|
|||||||
from gym import utils
|
from gym import utils
|
||||||
import os
|
import os
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from alr_envs.mujoco import alr_mujoco_env
|
from gym.envs.mujoco import MujocoEnv
|
||||||
|
|
||||||
|
|
||||||
class ALRBeerpongEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
|
class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
|
||||||
def __init__(self, n_substeps=4, apply_gravity_comp=True, reward_function=None):
|
def __init__(self, n_substeps=4, apply_gravity_comp=True, reward_function=None):
|
||||||
|
utils.EzPickle.__init__(**locals())
|
||||||
|
|
||||||
self._steps = 0
|
self._steps = 0
|
||||||
|
|
||||||
self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
|
self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
|
||||||
@ -26,11 +28,12 @@ class ALRBeerpongEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
self.context = None
|
self.context = None
|
||||||
|
|
||||||
utils.EzPickle.__init__(self)
|
MujocoEnv.__init__(self, model_path=self.xml_path, frame_skip=n_substeps)
|
||||||
alr_mujoco_env.AlrMujocoEnv.__init__(self,
|
|
||||||
self.xml_path,
|
# alr_mujoco_env.AlrMujocoEnv.__init__(self,
|
||||||
apply_gravity_comp=apply_gravity_comp,
|
# self.xml_path,
|
||||||
n_substeps=n_substeps)
|
# apply_gravity_comp=apply_gravity_comp,
|
||||||
|
# n_substeps=n_substeps)
|
||||||
|
|
||||||
self.sim_time = 8 # seconds
|
self.sim_time = 8 # seconds
|
||||||
self.sim_steps = int(self.sim_time / self.dt)
|
self.sim_steps = int(self.sim_time / self.dt)
|
||||||
|
@ -1,22 +1,24 @@
|
|||||||
|
import collections
|
||||||
import re
|
import re
|
||||||
|
from typing import Union
|
||||||
|
|
||||||
import gym
|
import gym
|
||||||
from gym.envs.registration import register
|
from gym.envs.registration import register
|
||||||
|
|
||||||
|
|
||||||
def make(
|
def make(
|
||||||
id,
|
id: str,
|
||||||
seed=1,
|
seed: int = 1,
|
||||||
visualize_reward=True,
|
visualize_reward: bool = True,
|
||||||
from_pixels=False,
|
from_pixels: bool = False,
|
||||||
height=84,
|
height: int = 84,
|
||||||
width=84,
|
width: int = 84,
|
||||||
camera_id=0,
|
camera_id: int = 0,
|
||||||
frame_skip=1,
|
frame_skip: int = 1,
|
||||||
episode_length=1000,
|
episode_length: Union[None, int] = None,
|
||||||
environment_kwargs=None,
|
environment_kwargs: dict = {},
|
||||||
time_limit=None,
|
time_limit: Union[None, float] = None,
|
||||||
channels_first=True
|
channels_first: bool = True
|
||||||
):
|
):
|
||||||
# Adopted from: https://github.com/denisyarats/dmc2gym/blob/master/dmc2gym/__init__.py
|
# Adopted from: https://github.com/denisyarats/dmc2gym/blob/master/dmc2gym/__init__.py
|
||||||
# License: MIT
|
# License: MIT
|
||||||
@ -31,12 +33,16 @@ def make(
|
|||||||
assert not visualize_reward, 'cannot use visualize reward when learning from pixels'
|
assert not visualize_reward, 'cannot use visualize reward when learning from pixels'
|
||||||
|
|
||||||
# shorten episode length
|
# shorten episode length
|
||||||
|
if episode_length is None:
|
||||||
|
# Default lengths for benchmarking suite is 1000 and for manipulation tasks 250
|
||||||
|
episode_length = 250 if domain_name == "manipulation" else 1000
|
||||||
|
|
||||||
max_episode_steps = (episode_length + frame_skip - 1) // frame_skip
|
max_episode_steps = (episode_length + frame_skip - 1) // frame_skip
|
||||||
|
|
||||||
if env_id not in gym.envs.registry.env_specs:
|
if env_id not in gym.envs.registry.env_specs:
|
||||||
task_kwargs = {}
|
task_kwargs = {'random': seed}
|
||||||
# if seed is not None:
|
# if seed is not None:
|
||||||
task_kwargs['random'] = seed
|
# task_kwargs['random'] = seed
|
||||||
if time_limit is not None:
|
if time_limit is not None:
|
||||||
task_kwargs['time_limit'] = time_limit
|
task_kwargs['time_limit'] = time_limit
|
||||||
register(
|
register(
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
# Adopted from: https://github.com/denisyarats/dmc2gym/blob/master/dmc2gym/wrappers.py
|
# Adopted from: https://github.com/denisyarats/dmc2gym/blob/master/dmc2gym/wrappers.py
|
||||||
# License: MIT
|
# License: MIT
|
||||||
# Copyright (c) 2020 Denis Yarats
|
# Copyright (c) 2020 Denis Yarats
|
||||||
|
import collections
|
||||||
from typing import Any, Dict, Tuple
|
from typing import Any, Dict, Tuple
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@ -31,12 +32,21 @@ def _spec_to_box(spec):
|
|||||||
return spaces.Box(low, high, dtype=np.float32)
|
return spaces.Box(low, high, dtype=np.float32)
|
||||||
|
|
||||||
|
|
||||||
def _flatten_obs(obs):
|
def _flatten_obs(obs: collections.MutableMapping):
|
||||||
obs_pieces = []
|
# obs_pieces = []
|
||||||
for v in obs.values():
|
# for v in obs.values():
|
||||||
flat = np.array([v]) if np.isscalar(v) else v.ravel()
|
# flat = np.array([v]) if np.isscalar(v) else v.ravel()
|
||||||
obs_pieces.append(flat)
|
# obs_pieces.append(flat)
|
||||||
return np.concatenate(obs_pieces, axis=0)
|
# return np.concatenate(obs_pieces, axis=0)
|
||||||
|
|
||||||
|
if not isinstance(obs, collections.MutableMapping):
|
||||||
|
raise ValueError(f'Requires dict-like observations structure. {type(obs)} found.')
|
||||||
|
|
||||||
|
# Keep key order consistent for non OrderedDicts
|
||||||
|
keys = obs.keys() if isinstance(obs, collections.OrderedDict) else sorted(obs.keys())
|
||||||
|
|
||||||
|
obs_vals = [np.array([obs[key]]) if np.isscalar(obs[key]) else obs[key].ravel() for key in keys]
|
||||||
|
return np.concatenate(obs_vals)
|
||||||
|
|
||||||
|
|
||||||
class DMCWrapper(core.Env):
|
class DMCWrapper(core.Env):
|
||||||
@ -75,15 +85,19 @@ class DMCWrapper(core.Env):
|
|||||||
self._action_space = _spec_to_box([self._env.action_spec()])
|
self._action_space = _spec_to_box([self._env.action_spec()])
|
||||||
self._observation_space = _spec_to_box(self._env.observation_spec().values())
|
self._observation_space = _spec_to_box(self._env.observation_spec().values())
|
||||||
|
|
||||||
self._last_observation = None
|
self._last_state = None
|
||||||
self.viewer = None
|
self.viewer = None
|
||||||
|
|
||||||
# set seed
|
# set seed
|
||||||
self.seed(seed=task_kwargs.get('random', 1))
|
self.seed(seed=task_kwargs.get('random', 1))
|
||||||
|
|
||||||
def __getattr__(self, name):
|
def __getattr__(self, item):
|
||||||
"""Delegate attribute access to underlying environment."""
|
"""Propagate only non-existent properties to wrapped env."""
|
||||||
return getattr(self._env, name)
|
if item.startswith('_'):
|
||||||
|
raise AttributeError("attempted to get missing private attribute '{}'".format(item))
|
||||||
|
if item in self.__dict__:
|
||||||
|
return getattr(self, item)
|
||||||
|
return getattr(self._env, item)
|
||||||
|
|
||||||
def _get_obs(self, time_step):
|
def _get_obs(self, time_step):
|
||||||
if self._from_pixels:
|
if self._from_pixels:
|
||||||
@ -107,6 +121,10 @@ class DMCWrapper(core.Env):
|
|||||||
def action_space(self):
|
def action_space(self):
|
||||||
return self._action_space
|
return self._action_space
|
||||||
|
|
||||||
|
@property
|
||||||
|
def dt(self):
|
||||||
|
return self._env.control_timestep() * self._frame_skip
|
||||||
|
|
||||||
def seed(self, seed=None):
|
def seed(self, seed=None):
|
||||||
self._action_space.seed(seed)
|
self._action_space.seed(seed)
|
||||||
self._observation_space.seed(seed)
|
self._observation_space.seed(seed)
|
||||||
@ -123,19 +141,19 @@ class DMCWrapper(core.Env):
|
|||||||
if done:
|
if done:
|
||||||
break
|
break
|
||||||
|
|
||||||
self._last_observation = _flatten_obs(time_step.observation)
|
self._last_state = _flatten_obs(time_step.observation)
|
||||||
obs = self._get_obs(time_step)
|
obs = self._get_obs(time_step)
|
||||||
extra['discount'] = time_step.discount
|
extra['discount'] = time_step.discount
|
||||||
return obs, reward, done, extra
|
return obs, reward, done, extra
|
||||||
|
|
||||||
def reset(self) -> np.ndarray:
|
def reset(self) -> np.ndarray:
|
||||||
time_step = self._env.reset()
|
time_step = self._env.reset()
|
||||||
self._last_observation = _flatten_obs(time_step.observation)
|
self._last_state = _flatten_obs(time_step.observation)
|
||||||
obs = self._get_obs(time_step)
|
obs = self._get_obs(time_step)
|
||||||
return obs
|
return obs
|
||||||
|
|
||||||
def render(self, mode='rgb_array', height=None, width=None, camera_id=0):
|
def render(self, mode='rgb_array', height=None, width=None, camera_id=0):
|
||||||
if self._last_observation is None:
|
if self._last_state is None:
|
||||||
raise ValueError('Environment not ready to render. Call reset() first.')
|
raise ValueError('Environment not ready to render. Call reset() first.')
|
||||||
|
|
||||||
# assert mode == 'rgb_array', 'only support rgb_array mode, given %s' % mode
|
# assert mode == 'rgb_array', 'only support rgb_array mode, given %s' % mode
|
||||||
|
@ -3,7 +3,7 @@ from typing import Iterable, List, Type
|
|||||||
|
|
||||||
import gym
|
import gym
|
||||||
|
|
||||||
from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
|
from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
|
||||||
from mp_env_api.mp_wrappers.detpmp_wrapper import DetPMPWrapper
|
from mp_env_api.mp_wrappers.detpmp_wrapper import DetPMPWrapper
|
||||||
from mp_env_api.mp_wrappers.dmp_wrapper import DmpWrapper
|
from mp_env_api.mp_wrappers.dmp_wrapper import DmpWrapper
|
||||||
|
|
||||||
@ -32,7 +32,7 @@ def make_env_rank(env_id: str, seed: int, rank: int = 0):
|
|||||||
def make_env(env_id: str, seed, **kwargs):
|
def make_env(env_id: str, seed, **kwargs):
|
||||||
"""
|
"""
|
||||||
Converts an env_id to an environment with the gym API.
|
Converts an env_id to an environment with the gym API.
|
||||||
This also works for DeepMind Control Suite env_wrappers
|
This also works for DeepMind Control Suite interface_wrappers
|
||||||
for which domain name and task name are expected to be separated by "-".
|
for which domain name and task name are expected to be separated by "-".
|
||||||
Args:
|
Args:
|
||||||
env_id: gym name or env_id of the form "domain_name-task_name" for DMC tasks
|
env_id: gym name or env_id of the form "domain_name-task_name" for DMC tasks
|
||||||
@ -42,7 +42,7 @@ def make_env(env_id: str, seed, **kwargs):
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# Add seed to kwargs in case it is a predefined dmc environment.
|
# Add seed to kwargs in case it is a predefined gym+dmc hybrid environment.
|
||||||
if env_id.startswith("dmc"):
|
if env_id.startswith("dmc"):
|
||||||
kwargs.update({"seed": seed})
|
kwargs.update({"seed": seed})
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user