mp wrapper fixes
This commit is contained in:
parent
eddef33d9a
commit
6704c9d63a
@ -118,33 +118,30 @@ for _dims in [5, 7]:
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
## Hopper Jump random joints and desired position
|
|
||||||
register(
|
register(
|
||||||
id='HopperJumpSparse-v0',
|
id='HopperJumpSparse-v0',
|
||||||
entry_point='alr_envs.alr.mujoco:ALRHopperXYJumpEnv',
|
entry_point='alr_envs.alr.mujoco:HopperJumpEnv',
|
||||||
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
|
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
|
||||||
kwargs={
|
kwargs={
|
||||||
# "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP,
|
|
||||||
"sparse": True,
|
"sparse": True,
|
||||||
# "healthy_reward": 1.0
|
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
## Hopper Jump random joints and desired position step based reward
|
|
||||||
register(
|
register(
|
||||||
id='HopperJump-v0',
|
id='HopperJump-v0',
|
||||||
entry_point='alr_envs.alr.mujoco:ALRHopperXYJumpEnvStepBased',
|
entry_point='alr_envs.alr.mujoco:HopperJumpEnv',
|
||||||
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
|
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
|
||||||
kwargs={
|
kwargs={
|
||||||
# "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP,
|
|
||||||
"sparse": False,
|
"sparse": False,
|
||||||
# "healthy_reward": 1.0
|
"healthy_reward": 1.0,
|
||||||
|
"contact_weight": 0.0,
|
||||||
|
"height_weight": 3.0,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
register(
|
register(
|
||||||
id='ALRAntJump-v0',
|
id='ALRAntJump-v0',
|
||||||
entry_point='alr_envs.alr.mujoco:ALRAntJumpEnv',
|
entry_point='alr_envs.alr.mujoco:AntJumpEnv',
|
||||||
max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP,
|
max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP,
|
||||||
kwargs={
|
kwargs={
|
||||||
"max_episode_steps": MAX_EPISODE_STEPS_ANTJUMP,
|
"max_episode_steps": MAX_EPISODE_STEPS_ANTJUMP,
|
||||||
@ -266,7 +263,7 @@ for _v in _versions:
|
|||||||
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
|
||||||
# max_episode_steps=1,
|
# max_episode_steps=1,
|
||||||
kwargs={
|
kwargs={
|
||||||
"name": f"alr_envs:{_v}",
|
"name": f"{_v}",
|
||||||
"wrappers": [classic_control.simple_reacher.MPWrapper],
|
"wrappers": [classic_control.simple_reacher.MPWrapper],
|
||||||
"traj_gen_kwargs": {
|
"traj_gen_kwargs": {
|
||||||
"num_dof": 2 if "long" not in _v.lower() else 5,
|
"num_dof": 2 if "long" not in _v.lower() else 5,
|
||||||
@ -304,7 +301,7 @@ register(
|
|||||||
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
|
||||||
# max_episode_steps=1,
|
# max_episode_steps=1,
|
||||||
kwargs={
|
kwargs={
|
||||||
"name": "alr_envs:ViaPointReacher-v0",
|
"name": "ViaPointReacher-v0",
|
||||||
"wrappers": [classic_control.viapoint_reacher.MPWrapper],
|
"wrappers": [classic_control.viapoint_reacher.MPWrapper],
|
||||||
"traj_gen_kwargs": {
|
"traj_gen_kwargs": {
|
||||||
"num_dof": 5,
|
"num_dof": 5,
|
||||||
@ -340,7 +337,7 @@ for _v in _versions:
|
|||||||
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
|
||||||
# max_episode_steps=1,
|
# max_episode_steps=1,
|
||||||
kwargs={
|
kwargs={
|
||||||
"name": f"alr_envs:HoleReacher-{_v}",
|
"name": f"HoleReacher-{_v}",
|
||||||
"wrappers": [classic_control.hole_reacher.MPWrapper],
|
"wrappers": [classic_control.hole_reacher.MPWrapper],
|
||||||
"traj_gen_kwargs": {
|
"traj_gen_kwargs": {
|
||||||
"num_dof": 5,
|
"num_dof": 5,
|
||||||
@ -362,7 +359,7 @@ for _v in _versions:
|
|||||||
kwargs_dict_hole_reacher_promp['wrappers'].append(classic_control.hole_reacher.MPWrapper)
|
kwargs_dict_hole_reacher_promp['wrappers'].append(classic_control.hole_reacher.MPWrapper)
|
||||||
kwargs_dict_hole_reacher_promp['trajectory_generator_kwargs']['weight_scale'] = 2
|
kwargs_dict_hole_reacher_promp['trajectory_generator_kwargs']['weight_scale'] = 2
|
||||||
kwargs_dict_hole_reacher_promp['controller_kwargs']['controller_type'] = 'velocity'
|
kwargs_dict_hole_reacher_promp['controller_kwargs']['controller_type'] = 'velocity'
|
||||||
kwargs_dict_hole_reacher_promp['name'] = f"alr_envs:{_v}"
|
kwargs_dict_hole_reacher_promp['name'] = f"{_v}"
|
||||||
register(
|
register(
|
||||||
id=_env_id,
|
id=_env_id,
|
||||||
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
||||||
@ -370,8 +367,8 @@ for _v in _versions:
|
|||||||
)
|
)
|
||||||
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||||
|
|
||||||
## ALRReacher
|
## ReacherNd
|
||||||
_versions = ["ALRReacher-v0", "ALRLongReacher-v0", "ALRReacherSparse-v0", "ALRLongReacherSparse-v0"]
|
_versions = ["Reacher5d-v0", "Reacher7d-v0", "Reacher5dSparse-v0", "Reacher7dSparse-v0"]
|
||||||
for _v in _versions:
|
for _v in _versions:
|
||||||
_name = _v.split("-")
|
_name = _v.split("-")
|
||||||
_env_id = f'{_name[0]}DMP-{_name[1]}'
|
_env_id = f'{_name[0]}DMP-{_name[1]}'
|
||||||
@ -380,7 +377,7 @@ for _v in _versions:
|
|||||||
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
|
||||||
# max_episode_steps=1,
|
# max_episode_steps=1,
|
||||||
kwargs={
|
kwargs={
|
||||||
"name": f"alr_envs:{_v}",
|
"name": f"{_v}",
|
||||||
"wrappers": [mujoco.reacher.MPWrapper],
|
"wrappers": [mujoco.reacher.MPWrapper],
|
||||||
"traj_gen_kwargs": {
|
"traj_gen_kwargs": {
|
||||||
"num_dof": 5 if "long" not in _v.lower() else 7,
|
"num_dof": 5 if "long" not in _v.lower() else 7,
|
||||||
@ -404,10 +401,10 @@ for _v in _versions:
|
|||||||
kwargs_dict_alr_reacher_promp['wrappers'].append(mujoco.reacher.MPWrapper)
|
kwargs_dict_alr_reacher_promp['wrappers'].append(mujoco.reacher.MPWrapper)
|
||||||
kwargs_dict_alr_reacher_promp['controller_kwargs']['p_gains'] = 1
|
kwargs_dict_alr_reacher_promp['controller_kwargs']['p_gains'] = 1
|
||||||
kwargs_dict_alr_reacher_promp['controller_kwargs']['d_gains'] = 0.1
|
kwargs_dict_alr_reacher_promp['controller_kwargs']['d_gains'] = 0.1
|
||||||
kwargs_dict_alr_reacher_promp['name'] = f"alr_envs:{_v}"
|
kwargs_dict_alr_reacher_promp['name'] = f"{_v}"
|
||||||
register(
|
register(
|
||||||
id=_env_id,
|
id=_env_id,
|
||||||
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||||
kwargs=kwargs_dict_alr_reacher_promp
|
kwargs=kwargs_dict_alr_reacher_promp
|
||||||
)
|
)
|
||||||
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||||
@ -425,7 +422,7 @@ for _v in _versions:
|
|||||||
kwargs_dict_bp_promp['controller_kwargs']['d_gains'] = np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125])
|
kwargs_dict_bp_promp['controller_kwargs']['d_gains'] = np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125])
|
||||||
kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis'] = 2
|
kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis'] = 2
|
||||||
kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis_zero_start'] = 2
|
kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis_zero_start'] = 2
|
||||||
kwargs_dict_bp_promp['name'] = f"alr_envs:{_v}"
|
kwargs_dict_bp_promp['name'] = f"{_v}"
|
||||||
register(
|
register(
|
||||||
id=_env_id,
|
id=_env_id,
|
||||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||||
@ -445,7 +442,7 @@ for _v in _versions:
|
|||||||
kwargs_dict_bp_promp['controller_kwargs']['d_gains'] = np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125])
|
kwargs_dict_bp_promp['controller_kwargs']['d_gains'] = np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125])
|
||||||
kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis'] = 2
|
kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis'] = 2
|
||||||
kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis_zero_start'] = 2
|
kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis_zero_start'] = 2
|
||||||
kwargs_dict_bp_promp['name'] = f"alr_envs:{_v}"
|
kwargs_dict_bp_promp['name'] = f"{_v}"
|
||||||
register(
|
register(
|
||||||
id=_env_id,
|
id=_env_id,
|
||||||
entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
|
||||||
@ -465,7 +462,7 @@ for _v in _versions:
|
|||||||
_env_id = f'{_name[0]}ProMP-{_name[1]}'
|
_env_id = f'{_name[0]}ProMP-{_name[1]}'
|
||||||
kwargs_dict_ant_jump_promp = deepcopy(DEFAULT_BB_DICT)
|
kwargs_dict_ant_jump_promp = deepcopy(DEFAULT_BB_DICT)
|
||||||
kwargs_dict_ant_jump_promp['wrappers'].append(mujoco.ant_jump.MPWrapper)
|
kwargs_dict_ant_jump_promp['wrappers'].append(mujoco.ant_jump.MPWrapper)
|
||||||
kwargs_dict_ant_jump_promp['name'] = f"alr_envs:{_v}"
|
kwargs_dict_ant_jump_promp['name'] = f"{_v}"
|
||||||
register(
|
register(
|
||||||
id=_env_id,
|
id=_env_id,
|
||||||
entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
|
||||||
@ -482,7 +479,7 @@ for _v in _versions:
|
|||||||
_env_id = f'{_name[0]}ProMP-{_name[1]}'
|
_env_id = f'{_name[0]}ProMP-{_name[1]}'
|
||||||
kwargs_dict_halfcheetah_jump_promp = deepcopy(DEFAULT_BB_DICT)
|
kwargs_dict_halfcheetah_jump_promp = deepcopy(DEFAULT_BB_DICT)
|
||||||
kwargs_dict_halfcheetah_jump_promp['wrappers'].append(mujoco.half_cheetah_jump.MPWrapper)
|
kwargs_dict_halfcheetah_jump_promp['wrappers'].append(mujoco.half_cheetah_jump.MPWrapper)
|
||||||
kwargs_dict_halfcheetah_jump_promp['name'] = f"alr_envs:{_v}"
|
kwargs_dict_halfcheetah_jump_promp['name'] = f"{_v}"
|
||||||
register(
|
register(
|
||||||
id=_env_id,
|
id=_env_id,
|
||||||
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
||||||
@ -502,7 +499,7 @@ for _v in _versions:
|
|||||||
_env_id = f'{_name[0]}ProMP-{_name[1]}'
|
_env_id = f'{_name[0]}ProMP-{_name[1]}'
|
||||||
kwargs_dict_hopper_jump_promp = deepcopy(DEFAULT_BB_DICT)
|
kwargs_dict_hopper_jump_promp = deepcopy(DEFAULT_BB_DICT)
|
||||||
kwargs_dict_hopper_jump_promp['wrappers'].append(mujoco.hopper_jump.MPWrapper)
|
kwargs_dict_hopper_jump_promp['wrappers'].append(mujoco.hopper_jump.MPWrapper)
|
||||||
kwargs_dict_hopper_jump_promp['name'] = f"alr_envs:{_v}"
|
kwargs_dict_hopper_jump_promp['name'] = f"{_v}"
|
||||||
register(
|
register(
|
||||||
id=_env_id,
|
id=_env_id,
|
||||||
entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
|
||||||
@ -520,7 +517,7 @@ for _v in _versions:
|
|||||||
_env_id = f'{_name[0]}ProMP-{_name[1]}'
|
_env_id = f'{_name[0]}ProMP-{_name[1]}'
|
||||||
kwargs_dict_walker2d_jump_promp = deepcopy(DEFAULT_BB_DICT)
|
kwargs_dict_walker2d_jump_promp = deepcopy(DEFAULT_BB_DICT)
|
||||||
kwargs_dict_walker2d_jump_promp['wrappers'].append(mujoco.walker_2d_jump.MPWrapper)
|
kwargs_dict_walker2d_jump_promp['wrappers'].append(mujoco.walker_2d_jump.MPWrapper)
|
||||||
kwargs_dict_walker2d_jump_promp['name'] = f"alr_envs:{_v}"
|
kwargs_dict_walker2d_jump_promp['name'] = f"{_v}"
|
||||||
register(
|
register(
|
||||||
id=_env_id,
|
id=_env_id,
|
||||||
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
||||||
@ -583,7 +580,7 @@ register(
|
|||||||
# CtxtFree are v0, Contextual are v1
|
# CtxtFree are v0, Contextual are v1
|
||||||
register(
|
register(
|
||||||
id='ALRAntJump-v0',
|
id='ALRAntJump-v0',
|
||||||
entry_point='alr_envs.alr.mujoco:ALRAntJumpEnv',
|
entry_point='alr_envs.alr.mujoco:AntJumpEnv',
|
||||||
max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP,
|
max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP,
|
||||||
kwargs={
|
kwargs={
|
||||||
"max_episode_steps": MAX_EPISODE_STEPS_ANTJUMP,
|
"max_episode_steps": MAX_EPISODE_STEPS_ANTJUMP,
|
||||||
@ -602,7 +599,7 @@ register(
|
|||||||
)
|
)
|
||||||
register(
|
register(
|
||||||
id='ALRHopperJump-v0',
|
id='ALRHopperJump-v0',
|
||||||
entry_point='alr_envs.alr.mujoco:ALRHopperJumpEnv',
|
entry_point='alr_envs.alr.mujoco:HopperJumpEnv',
|
||||||
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
|
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
|
||||||
kwargs={
|
kwargs={
|
||||||
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP,
|
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP,
|
||||||
@ -649,7 +646,7 @@ for i in _vs:
|
|||||||
id=_env_id,
|
id=_env_id,
|
||||||
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
||||||
kwargs={
|
kwargs={
|
||||||
"name": f"alr_envs:{_env_id.replace('ProMP', '')}",
|
"name": f"{_env_id.replace('ProMP', '')}",
|
||||||
"wrappers": [mujoco.reacher.MPWrapper],
|
"wrappers": [mujoco.reacher.MPWrapper],
|
||||||
"mp_kwargs": {
|
"mp_kwargs": {
|
||||||
"num_dof": 5,
|
"num_dof": 5,
|
||||||
@ -672,7 +669,7 @@ for i in _vs:
|
|||||||
id=_env_id,
|
id=_env_id,
|
||||||
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
||||||
kwargs={
|
kwargs={
|
||||||
"name": f"alr_envs:{_env_id.replace('ProMP', '')}",
|
"name": f"{_env_id.replace('ProMP', '')}",
|
||||||
"wrappers": [mujoco.reacher.MPWrapper],
|
"wrappers": [mujoco.reacher.MPWrapper],
|
||||||
"mp_kwargs": {
|
"mp_kwargs": {
|
||||||
"num_dof": 5,
|
"num_dof": 5,
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import Union
|
from typing import Union, Tuple, Optional
|
||||||
|
|
||||||
import gym
|
import gym
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from gym import spaces
|
from gym import spaces
|
||||||
|
from gym.core import ObsType
|
||||||
from gym.utils import seeding
|
from gym.utils import seeding
|
||||||
|
|
||||||
from alr_envs.alr.classic_control.utils import intersect
|
from alr_envs.alr.classic_control.utils import intersect
|
||||||
@ -14,8 +15,7 @@ class BaseReacherEnv(gym.Env, ABC):
|
|||||||
Base class for all reaching environments.
|
Base class for all reaching environments.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, n_links: int, random_start: bool = True,
|
def __init__(self, n_links: int, random_start: bool = True, allow_self_collision: bool = False):
|
||||||
allow_self_collision: bool = False):
|
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.link_lengths = np.ones(n_links)
|
self.link_lengths = np.ones(n_links)
|
||||||
self.n_links = n_links
|
self.n_links = n_links
|
||||||
@ -70,7 +70,8 @@ class BaseReacherEnv(gym.Env, ABC):
|
|||||||
def current_vel(self):
|
def current_vel(self):
|
||||||
return self._angle_velocity.copy()
|
return self._angle_velocity.copy()
|
||||||
|
|
||||||
def reset(self):
|
def reset(self, *, seed: Optional[int] = None, return_info: bool = False,
|
||||||
|
options: Optional[dict] = None, ) -> Union[ObsType, Tuple[ObsType, dict]]:
|
||||||
# Sample only orientation of first link, i.e. the arm is always straight.
|
# Sample only orientation of first link, i.e. the arm is always straight.
|
||||||
if self.random_start:
|
if self.random_start:
|
||||||
first_joint = self.np_random.uniform(np.pi / 4, 3 * np.pi / 4)
|
first_joint = self.np_random.uniform(np.pi / 4, 3 * np.pi / 4)
|
||||||
|
@ -1,8 +1,9 @@
|
|||||||
from typing import Union
|
from typing import Union, Optional, Tuple
|
||||||
|
|
||||||
import gym
|
import gym
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from gym.core import ObsType
|
||||||
from matplotlib import patches
|
from matplotlib import patches
|
||||||
|
|
||||||
from alr_envs.alr.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv
|
from alr_envs.alr.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv
|
||||||
@ -51,7 +52,8 @@ class HoleReacherEnv(BaseReacherDirectEnv):
|
|||||||
else:
|
else:
|
||||||
raise ValueError("Unknown reward function {}".format(rew_fct))
|
raise ValueError("Unknown reward function {}".format(rew_fct))
|
||||||
|
|
||||||
def reset(self):
|
def reset(self, *, seed: Optional[int] = None, return_info: bool = False,
|
||||||
|
options: Optional[dict] = None, ) -> Union[ObsType, Tuple[ObsType, dict]]:
|
||||||
self._generate_hole()
|
self._generate_hole()
|
||||||
self._set_patches()
|
self._set_patches()
|
||||||
self.reward_function.reset()
|
self.reward_function.reset()
|
||||||
@ -223,6 +225,7 @@ class HoleReacherEnv(BaseReacherDirectEnv):
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import time
|
import time
|
||||||
|
|
||||||
env = HoleReacherEnv(5)
|
env = HoleReacherEnv(5)
|
||||||
env.reset()
|
env.reset()
|
||||||
|
|
||||||
|
@ -7,6 +7,7 @@ from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
|
|||||||
|
|
||||||
class MPWrapper(RawInterfaceWrapper):
|
class MPWrapper(RawInterfaceWrapper):
|
||||||
|
|
||||||
|
@property
|
||||||
def context_mask(self):
|
def context_mask(self):
|
||||||
return np.hstack([
|
return np.hstack([
|
||||||
[self.env.random_start] * self.env.n_links, # cos
|
[self.env.random_start] * self.env.n_links, # cos
|
||||||
@ -25,7 +26,3 @@ class MPWrapper(RawInterfaceWrapper):
|
|||||||
@property
|
@property
|
||||||
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
return self.env.current_vel
|
return self.env.current_vel
|
||||||
|
|
||||||
@property
|
|
||||||
def dt(self) -> Union[float, int]:
|
|
||||||
return self.env.dt
|
|
||||||
|
@ -7,6 +7,7 @@ from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
|
|||||||
|
|
||||||
class MPWrapper(RawInterfaceWrapper):
|
class MPWrapper(RawInterfaceWrapper):
|
||||||
|
|
||||||
|
@property
|
||||||
def context_mask(self):
|
def context_mask(self):
|
||||||
return np.hstack([
|
return np.hstack([
|
||||||
[self.env.random_start] * self.env.n_links, # cos
|
[self.env.random_start] * self.env.n_links, # cos
|
||||||
@ -23,7 +24,3 @@ class MPWrapper(RawInterfaceWrapper):
|
|||||||
@property
|
@property
|
||||||
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
return self.env.current_vel
|
return self.env.current_vel
|
||||||
|
|
||||||
@property
|
|
||||||
def dt(self) -> Union[float, int]:
|
|
||||||
return self.env.dt
|
|
||||||
|
@ -1,8 +1,9 @@
|
|||||||
from typing import Iterable, Union
|
from typing import Iterable, Union, Optional, Tuple
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from gym import spaces
|
from gym import spaces
|
||||||
|
from gym.core import ObsType
|
||||||
|
|
||||||
from alr_envs.alr.classic_control.base_reacher.base_reacher_torque import BaseReacherTorqueEnv
|
from alr_envs.alr.classic_control.base_reacher.base_reacher_torque import BaseReacherTorqueEnv
|
||||||
|
|
||||||
@ -15,7 +16,7 @@ class SimpleReacherEnv(BaseReacherTorqueEnv):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, n_links: int, target: Union[None, Iterable] = None, random_start: bool = True,
|
def __init__(self, n_links: int, target: Union[None, Iterable] = None, random_start: bool = True,
|
||||||
allow_self_collision: bool = False,):
|
allow_self_collision: bool = False, ):
|
||||||
super().__init__(n_links, random_start, allow_self_collision)
|
super().__init__(n_links, random_start, allow_self_collision)
|
||||||
|
|
||||||
# provided initial parameters
|
# provided initial parameters
|
||||||
@ -41,7 +42,8 @@ class SimpleReacherEnv(BaseReacherTorqueEnv):
|
|||||||
# def start_pos(self):
|
# def start_pos(self):
|
||||||
# return self._start_pos
|
# return self._start_pos
|
||||||
|
|
||||||
def reset(self):
|
def reset(self, *, seed: Optional[int] = None, return_info: bool = False,
|
||||||
|
options: Optional[dict] = None, ) -> Union[ObsType, Tuple[ObsType, dict]]:
|
||||||
self._generate_goal()
|
self._generate_goal()
|
||||||
|
|
||||||
return super().reset()
|
return super().reset()
|
||||||
|
@ -7,6 +7,7 @@ from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
|
|||||||
|
|
||||||
class MPWrapper(RawInterfaceWrapper):
|
class MPWrapper(RawInterfaceWrapper):
|
||||||
|
|
||||||
|
@property
|
||||||
def context_mask(self):
|
def context_mask(self):
|
||||||
return np.hstack([
|
return np.hstack([
|
||||||
[self.env.random_start] * self.env.n_links, # cos
|
[self.env.random_start] * self.env.n_links, # cos
|
||||||
@ -24,7 +25,3 @@ class MPWrapper(RawInterfaceWrapper):
|
|||||||
@property
|
@property
|
||||||
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
return self.env.current_vel
|
return self.env.current_vel
|
||||||
|
|
||||||
@property
|
|
||||||
def dt(self) -> Union[float, int]:
|
|
||||||
return self.env.dt
|
|
||||||
|
@ -1,8 +1,9 @@
|
|||||||
from typing import Iterable, Union
|
from typing import Iterable, Union, Tuple, Optional
|
||||||
|
|
||||||
import gym
|
import gym
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from gym.core import ObsType
|
||||||
from gym.utils import seeding
|
from gym.utils import seeding
|
||||||
|
|
||||||
from alr_envs.alr.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv
|
from alr_envs.alr.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv
|
||||||
@ -40,7 +41,8 @@ class ViaPointReacherEnv(BaseReacherDirectEnv):
|
|||||||
# def start_pos(self):
|
# def start_pos(self):
|
||||||
# return self._start_pos
|
# return self._start_pos
|
||||||
|
|
||||||
def reset(self):
|
def reset(self, *, seed: Optional[int] = None, return_info: bool = False,
|
||||||
|
options: Optional[dict] = None, ) -> Union[ObsType, Tuple[ObsType, dict]]:
|
||||||
self._generate_goal()
|
self._generate_goal()
|
||||||
return super().reset()
|
return super().reset()
|
||||||
|
|
||||||
@ -183,8 +185,10 @@ class ViaPointReacherEnv(BaseReacherDirectEnv):
|
|||||||
|
|
||||||
plt.pause(0.01)
|
plt.pause(0.01)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import time
|
import time
|
||||||
|
|
||||||
env = ViaPointReacherEnv(5)
|
env = ViaPointReacherEnv(5)
|
||||||
env.reset()
|
env.reset()
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
from .ant_jump.ant_jump import ALRAntJumpEnv
|
from .ant_jump.ant_jump import AntJumpEnv
|
||||||
from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
|
from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
|
||||||
from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
|
from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
|
||||||
from alr_envs.alr.mujoco.beerpong.beerpong import BeerPongEnv
|
from alr_envs.alr.mujoco.beerpong.beerpong import BeerPongEnv
|
||||||
|
@ -1 +1 @@
|
|||||||
from .new_mp_wrapper import MPWrapper
|
from .mp_wrapper import MPWrapper
|
||||||
|
@ -1,14 +1,19 @@
|
|||||||
|
from typing import Tuple, Union, Optional
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from gym.core import ObsType
|
||||||
from gym.envs.mujoco.ant_v3 import AntEnv
|
from gym.envs.mujoco.ant_v3 import AntEnv
|
||||||
|
|
||||||
MAX_EPISODE_STEPS_ANTJUMP = 200
|
MAX_EPISODE_STEPS_ANTJUMP = 200
|
||||||
# TODO: This environment was not testet yet. Do the following todos and test it.
|
|
||||||
|
|
||||||
|
# TODO: This environment was not tested yet. Do the following todos and test it.
|
||||||
# TODO: Right now this environment only considers jumping to a specific height, which is not nice. It should be extended
|
# TODO: Right now this environment only considers jumping to a specific height, which is not nice. It should be extended
|
||||||
# to the same structure as the Hopper, where the angles are randomized (->contexts) and the agent should jump as heigh
|
# to the same structure as the Hopper, where the angles are randomized (->contexts) and the agent should jump as heigh
|
||||||
# as possible, while landing at a specific target position
|
# as possible, while landing at a specific target position
|
||||||
|
|
||||||
|
|
||||||
class ALRAntJumpEnv(AntEnv):
|
class AntJumpEnv(AntEnv):
|
||||||
"""
|
"""
|
||||||
Initialization changes to normal Ant:
|
Initialization changes to normal Ant:
|
||||||
- healthy_reward: 1.0 -> 0.01 -> 0.0 no healthy reward needed - Paul and Marc
|
- healthy_reward: 1.0 -> 0.01 -> 0.0 no healthy reward needed - Paul and Marc
|
||||||
@ -27,17 +32,15 @@ class ALRAntJumpEnv(AntEnv):
|
|||||||
contact_force_range=(-1.0, 1.0),
|
contact_force_range=(-1.0, 1.0),
|
||||||
reset_noise_scale=0.1,
|
reset_noise_scale=0.1,
|
||||||
exclude_current_positions_from_observation=True,
|
exclude_current_positions_from_observation=True,
|
||||||
max_episode_steps=200):
|
):
|
||||||
self.current_step = 0
|
self.current_step = 0
|
||||||
self.max_height = 0
|
self.max_height = 0
|
||||||
self.max_episode_steps = max_episode_steps
|
|
||||||
self.goal = 0
|
self.goal = 0
|
||||||
super().__init__(xml_file, ctrl_cost_weight, contact_cost_weight, healthy_reward, terminate_when_unhealthy,
|
super().__init__(xml_file, ctrl_cost_weight, contact_cost_weight, healthy_reward, terminate_when_unhealthy,
|
||||||
healthy_z_range, contact_force_range, reset_noise_scale,
|
healthy_z_range, contact_force_range, reset_noise_scale,
|
||||||
exclude_current_positions_from_observation)
|
exclude_current_positions_from_observation)
|
||||||
|
|
||||||
def step(self, action):
|
def step(self, action):
|
||||||
|
|
||||||
self.current_step += 1
|
self.current_step += 1
|
||||||
self.do_simulation(action, self.frame_skip)
|
self.do_simulation(action, self.frame_skip)
|
||||||
|
|
||||||
@ -54,10 +57,10 @@ class ALRAntJumpEnv(AntEnv):
|
|||||||
|
|
||||||
done = height < 0.3 # fall over -> is the 0.3 value from healthy_z_range? TODO change 0.3 to the value of healthy z angle
|
done = height < 0.3 # fall over -> is the 0.3 value from healthy_z_range? TODO change 0.3 to the value of healthy z angle
|
||||||
|
|
||||||
if self.current_step == self.max_episode_steps or done:
|
if self.current_step == MAX_EPISODE_STEPS_ANTJUMP or done:
|
||||||
# -10 for scaling the value of the distance between the max_height and the goal height; only used when context is enabled
|
# -10 for scaling the value of the distance between the max_height and the goal height; only used when context is enabled
|
||||||
# height_reward = -10 * (np.linalg.norm(self.max_height - self.goal))
|
# height_reward = -10 * (np.linalg.norm(self.max_height - self.goal))
|
||||||
height_reward = -10*np.linalg.norm(self.max_height - self.goal)
|
height_reward = -10 * np.linalg.norm(self.max_height - self.goal)
|
||||||
# no healthy reward when using context, because we optimize a negative value
|
# no healthy reward when using context, because we optimize a negative value
|
||||||
healthy_reward = 0
|
healthy_reward = 0
|
||||||
|
|
||||||
@ -77,7 +80,8 @@ class ALRAntJumpEnv(AntEnv):
|
|||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
return np.append(super()._get_obs(), self.goal)
|
return np.append(super()._get_obs(), self.goal)
|
||||||
|
|
||||||
def reset(self):
|
def reset(self, *, seed: Optional[int] = None, return_info: bool = False,
|
||||||
|
options: Optional[dict] = None, ) -> Union[ObsType, Tuple[ObsType, dict]]:
|
||||||
self.current_step = 0
|
self.current_step = 0
|
||||||
self.max_height = 0
|
self.max_height = 0
|
||||||
self.goal = np.random.uniform(1.0, 2.5,
|
self.goal = np.random.uniform(1.0, 2.5,
|
||||||
@ -96,19 +100,3 @@ class ALRAntJumpEnv(AntEnv):
|
|||||||
|
|
||||||
observation = self._get_obs()
|
observation = self._get_obs()
|
||||||
return observation
|
return observation
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
render_mode = "human" # "human" or "partial" or "final"
|
|
||||||
env = ALRAntJumpEnv()
|
|
||||||
obs = env.reset()
|
|
||||||
|
|
||||||
for i in range(2000):
|
|
||||||
# test with random actions
|
|
||||||
ac = env.action_space.sample()
|
|
||||||
obs, rew, d, info = env.step(ac)
|
|
||||||
if i % 10 == 0:
|
|
||||||
env.render(mode=render_mode)
|
|
||||||
if d:
|
|
||||||
env.reset()
|
|
||||||
|
|
||||||
env.close()
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
from typing import Tuple, Union
|
from typing import Union, Tuple
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@ -8,7 +8,7 @@ from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
|
|||||||
class MPWrapper(RawInterfaceWrapper):
|
class MPWrapper(RawInterfaceWrapper):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def context_mask(self) -> np.ndarray:
|
def context_mask(self):
|
||||||
return np.hstack([
|
return np.hstack([
|
||||||
[False] * 111, # ant has 111 dimensional observation space !!
|
[False] * 111, # ant has 111 dimensional observation space !!
|
||||||
[True] # goal height
|
[True] # goal height
|
||||||
@ -21,11 +21,3 @@ class MPWrapper(RawInterfaceWrapper):
|
|||||||
@property
|
@property
|
||||||
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
return self.env.sim.data.qvel[6:14].copy()
|
return self.env.sim.data.qvel[6:14].copy()
|
||||||
|
|
||||||
@property
|
|
||||||
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
|
||||||
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
|
|
||||||
|
|
||||||
@property
|
|
||||||
def dt(self) -> Union[float, int]:
|
|
||||||
return self.env.dt
|
|
||||||
|
@ -1,22 +0,0 @@
|
|||||||
from alr_envs.black_box.black_box_wrapper import BlackBoxWrapper
|
|
||||||
from typing import Union, Tuple
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
|
|
||||||
|
|
||||||
|
|
||||||
class MPWrapper(RawInterfaceWrapper):
|
|
||||||
|
|
||||||
def get_context_mask(self):
|
|
||||||
return np.hstack([
|
|
||||||
[False] * 111, # ant has 111 dimensional observation space !!
|
|
||||||
[True] # goal height
|
|
||||||
])
|
|
||||||
|
|
||||||
@property
|
|
||||||
def current_pos(self) -> Union[float, int, np.ndarray]:
|
|
||||||
return self.env.sim.data.qpos[7:15].copy()
|
|
||||||
|
|
||||||
@property
|
|
||||||
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
|
||||||
return self.env.sim.data.qvel[6:14].copy()
|
|
@ -1,5 +1,5 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from alr_envs.alr.mujoco import alr_reward_fct
|
from alr_envs.alr.mujoco.ball_in_a_cup import alr_reward_fct
|
||||||
|
|
||||||
|
|
||||||
class BallInACupReward(alr_reward_fct.AlrReward):
|
class BallInACupReward(alr_reward_fct.AlrReward):
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from alr_envs.alr.mujoco import alr_reward_fct
|
from alr_envs.alr.mujoco.ball_in_a_cup import alr_reward_fct
|
||||||
|
|
||||||
|
|
||||||
class BallInACupReward(alr_reward_fct.AlrReward):
|
class BallInACupReward(alr_reward_fct.AlrReward):
|
||||||
|
@ -6,17 +6,6 @@ import mujoco_py.builder
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from gym import utils
|
from gym import utils
|
||||||
|
|
||||||
from mp_env_api.mp_wrappers.detpmp_wrapper import DetPMPWrapper
|
|
||||||
from mp_env_api.utils.policies import PDControllerExtend
|
|
||||||
|
|
||||||
|
|
||||||
def make_detpmp_env(**kwargs):
|
|
||||||
name = kwargs.pop("name")
|
|
||||||
_env = gym.make(name)
|
|
||||||
policy = PDControllerExtend(_env, p_gains=kwargs.pop('p_gains'), d_gains=kwargs.pop('d_gains'))
|
|
||||||
kwargs['policy_type'] = policy
|
|
||||||
return DetPMPWrapper(_env, **kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
class ALRBallInACupPDEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
class ALRBallInACupPDEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||||
def __init__(self, frame_skip=4, apply_gravity_comp=True, simplified: bool = False,
|
def __init__(self, frame_skip=4, apply_gravity_comp=True, simplified: bool = False,
|
||||||
|
@ -1 +1 @@
|
|||||||
from .new_mp_wrapper import MPWrapper
|
from .mp_wrapper import MPWrapper
|
||||||
|
@ -1,12 +1,11 @@
|
|||||||
import os
|
import os
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
import mujoco_py.builder
|
import mujoco_py.builder
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from gym import utils
|
from gym import utils
|
||||||
from gym.envs.mujoco import MujocoEnv
|
from gym.envs.mujoco import MujocoEnv
|
||||||
|
|
||||||
from alr_envs.alr.mujoco.beerpong.deprecated.beerpong_reward_staged import BeerPongReward
|
|
||||||
|
|
||||||
# XML Variables
|
# XML Variables
|
||||||
ROBOT_COLLISION_OBJ = ["wrist_palm_link_convex_geom",
|
ROBOT_COLLISION_OBJ = ["wrist_palm_link_convex_geom",
|
||||||
"wrist_pitch_link_convex_decomposition_p1_geom",
|
"wrist_pitch_link_convex_decomposition_p1_geom",
|
||||||
@ -76,7 +75,7 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
|
|||||||
def start_vel(self):
|
def start_vel(self):
|
||||||
return self._start_vel
|
return self._start_vel
|
||||||
|
|
||||||
def reset(self):
|
def reset(self, *, seed: Optional[int] = None, return_info: bool = False, options: Optional[dict] = None):
|
||||||
self.dists = []
|
self.dists = []
|
||||||
self.dists_final = []
|
self.dists_final = []
|
||||||
self.action_costs = []
|
self.action_costs = []
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
from typing import Tuple, Union
|
from typing import Union, Tuple
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@ -7,8 +7,7 @@ from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
|
|||||||
|
|
||||||
class MPWrapper(RawInterfaceWrapper):
|
class MPWrapper(RawInterfaceWrapper):
|
||||||
|
|
||||||
@property
|
def get_context_mask(self):
|
||||||
def context_mask(self) -> np.ndarray:
|
|
||||||
return np.hstack([
|
return np.hstack([
|
||||||
[False] * 7, # cos
|
[False] * 7, # cos
|
||||||
[False] * 7, # sin
|
[False] * 7, # sin
|
||||||
@ -19,22 +18,25 @@ class MPWrapper(RawInterfaceWrapper):
|
|||||||
[False] # env steps
|
[False] # env steps
|
||||||
])
|
])
|
||||||
|
|
||||||
@property
|
|
||||||
def start_pos(self):
|
|
||||||
return self._start_pos
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
return self.sim.data.qpos[0:7].copy()
|
return self.env.sim.data.qpos[0:7].copy()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
return self.sim.data.qvel[0:7].copy()
|
return self.env.sim.data.qvel[0:7].copy()
|
||||||
|
|
||||||
@property
|
# TODO: Fix this
|
||||||
def goal_pos(self):
|
def _episode_callback(self, action: np.ndarray, mp) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
|
||||||
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
|
if mp.learn_tau:
|
||||||
|
self.env.env.release_step = action[0] / self.env.dt # Tau value
|
||||||
|
return action, None
|
||||||
|
else:
|
||||||
|
return action, None
|
||||||
|
|
||||||
@property
|
def set_context(self, context):
|
||||||
def dt(self) -> Union[float, int]:
|
xyz = np.zeros(3)
|
||||||
return self.env.dt
|
xyz[:2] = context
|
||||||
|
xyz[-1] = 0.840
|
||||||
|
self.env.env.model.body_pos[self.env.env.cup_table_id] = xyz
|
||||||
|
return self.get_observation_from_step(self.env.env._get_obs())
|
||||||
|
@ -1,41 +0,0 @@
|
|||||||
from typing import Union, Tuple
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
|
|
||||||
|
|
||||||
|
|
||||||
class MPWrapper(RawInterfaceWrapper):
|
|
||||||
@property
|
|
||||||
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
|
||||||
return self.env.sim.data.qpos[0:7].copy()
|
|
||||||
|
|
||||||
@property
|
|
||||||
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
|
||||||
return self.env.sim.data.qvel[0:7].copy()
|
|
||||||
|
|
||||||
def get_context_mask(self):
|
|
||||||
return np.hstack([
|
|
||||||
[False] * 7, # cos
|
|
||||||
[False] * 7, # sin
|
|
||||||
[False] * 7, # joint velocities
|
|
||||||
[False] * 3, # cup_goal_diff_final
|
|
||||||
[False] * 3, # cup_goal_diff_top
|
|
||||||
[True] * 2, # xy position of cup
|
|
||||||
[False] # env steps
|
|
||||||
])
|
|
||||||
|
|
||||||
# TODO: Fix this
|
|
||||||
def _episode_callback(self, action: np.ndarray, mp) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
|
|
||||||
if mp.learn_tau:
|
|
||||||
self.env.env.release_step = action[0] / self.env.dt # Tau value
|
|
||||||
return action, None
|
|
||||||
else:
|
|
||||||
return action, None
|
|
||||||
|
|
||||||
def set_context(self, context):
|
|
||||||
xyz = np.zeros(3)
|
|
||||||
xyz[:2] = context
|
|
||||||
xyz[-1] = 0.840
|
|
||||||
self.env.env.model.body_pos[self.env.env.cup_table_id] = xyz
|
|
||||||
return self.get_observation_from_step(self.env.env._get_obs())
|
|
@ -1 +1 @@
|
|||||||
from .new_mp_wrapper import MPWrapper
|
from .mp_wrapper import MPWrapper
|
||||||
|
@ -1,4 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
|
from typing import Tuple, Union, Optional
|
||||||
|
|
||||||
|
from gym.core import ObsType
|
||||||
from gym.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv
|
from gym.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@ -20,7 +23,7 @@ class ALRHalfCheetahJumpEnv(HalfCheetahEnv):
|
|||||||
max_episode_steps=100):
|
max_episode_steps=100):
|
||||||
self.current_step = 0
|
self.current_step = 0
|
||||||
self.max_height = 0
|
self.max_height = 0
|
||||||
self.max_episode_steps = max_episode_steps
|
# self.max_episode_steps = max_episode_steps
|
||||||
self.goal = 0
|
self.goal = 0
|
||||||
self.context = context
|
self.context = context
|
||||||
xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
|
xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
|
||||||
@ -37,14 +40,14 @@ class ALRHalfCheetahJumpEnv(HalfCheetahEnv):
|
|||||||
|
|
||||||
## Didnt use fell_over, because base env also has no done condition - Paul and Marc
|
## Didnt use fell_over, because base env also has no done condition - Paul and Marc
|
||||||
# fell_over = abs(self.sim.data.qpos[2]) > 2.5 # how to figure out if the cheetah fell over? -> 2.5 oke?
|
# fell_over = abs(self.sim.data.qpos[2]) > 2.5 # how to figure out if the cheetah fell over? -> 2.5 oke?
|
||||||
# TODO: Should a fall over be checked herE?
|
# TODO: Should a fall over be checked here?
|
||||||
done = False
|
done = False
|
||||||
|
|
||||||
ctrl_cost = self.control_cost(action)
|
ctrl_cost = self.control_cost(action)
|
||||||
costs = ctrl_cost
|
costs = ctrl_cost
|
||||||
|
|
||||||
if self.current_step == self.max_episode_steps:
|
if self.current_step == MAX_EPISODE_STEPS_HALFCHEETAHJUMP:
|
||||||
height_goal_distance = -10*np.linalg.norm(self.max_height - self.goal) + 1e-8 if self.context \
|
height_goal_distance = -10 * np.linalg.norm(self.max_height - self.goal) + 1e-8 if self.context \
|
||||||
else self.max_height
|
else self.max_height
|
||||||
rewards = self._forward_reward_weight * height_goal_distance
|
rewards = self._forward_reward_weight * height_goal_distance
|
||||||
else:
|
else:
|
||||||
@ -62,7 +65,8 @@ class ALRHalfCheetahJumpEnv(HalfCheetahEnv):
|
|||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
return np.append(super()._get_obs(), self.goal)
|
return np.append(super()._get_obs(), self.goal)
|
||||||
|
|
||||||
def reset(self):
|
def reset(self, *, seed: Optional[int] = None, return_info: bool = False,
|
||||||
|
options: Optional[dict] = None, ) -> Union[ObsType, Tuple[ObsType, dict]]:
|
||||||
self.max_height = 0
|
self.max_height = 0
|
||||||
self.current_step = 0
|
self.current_step = 0
|
||||||
self.goal = np.random.uniform(1.1, 1.6, 1) # 1.1 1.6
|
self.goal = np.random.uniform(1.1, 1.6, 1) # 1.1 1.6
|
||||||
@ -80,21 +84,3 @@ class ALRHalfCheetahJumpEnv(HalfCheetahEnv):
|
|||||||
|
|
||||||
observation = self._get_obs()
|
observation = self._get_obs()
|
||||||
return observation
|
return observation
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
render_mode = "human" # "human" or "partial" or "final"
|
|
||||||
env = ALRHalfCheetahJumpEnv()
|
|
||||||
obs = env.reset()
|
|
||||||
|
|
||||||
for i in range(2000):
|
|
||||||
# objective.load_result("/tmp/cma")
|
|
||||||
# test with random actions
|
|
||||||
ac = env.action_space.sample()
|
|
||||||
obs, rew, d, info = env.step(ac)
|
|
||||||
if i % 10 == 0:
|
|
||||||
env.render(mode=render_mode)
|
|
||||||
if d:
|
|
||||||
print('After ', i, ' steps, done: ', d)
|
|
||||||
env.reset()
|
|
||||||
|
|
||||||
env.close()
|
|
@ -20,11 +20,3 @@ class MPWrapper(RawInterfaceWrapper):
|
|||||||
@property
|
@property
|
||||||
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
return self.env.sim.data.qvel[3:9].copy()
|
return self.env.sim.data.qvel[3:9].copy()
|
||||||
|
|
||||||
@property
|
|
||||||
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
|
||||||
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
|
|
||||||
|
|
||||||
@property
|
|
||||||
def dt(self) -> Union[float, int]:
|
|
||||||
return self.env.dt
|
|
||||||
|
@ -1,22 +0,0 @@
|
|||||||
from typing import Tuple, Union
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
|
|
||||||
|
|
||||||
|
|
||||||
class MPWrapper(RawInterfaceWrapper):
|
|
||||||
def context_mask(self):
|
|
||||||
return np.hstack([
|
|
||||||
[False] * 17,
|
|
||||||
[True] # goal height
|
|
||||||
])
|
|
||||||
|
|
||||||
@property
|
|
||||||
def current_pos(self) -> Union[float, int, np.ndarray]:
|
|
||||||
return self.env.sim.data.qpos[3:9].copy()
|
|
||||||
|
|
||||||
@property
|
|
||||||
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
|
||||||
return self.env.sim.data.qvel[3:9].copy()
|
|
||||||
|
|
@ -1 +1,2 @@
|
|||||||
from .mp_wrapper import MPWrapper
|
from .mp_wrapper import MPWrapper
|
||||||
|
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
import copy
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from gym.envs.mujoco.hopper_v3 import HopperEnv
|
from gym.envs.mujoco.hopper_v3 import HopperEnv
|
||||||
@ -7,10 +8,11 @@ import os
|
|||||||
MAX_EPISODE_STEPS_HOPPERJUMP = 250
|
MAX_EPISODE_STEPS_HOPPERJUMP = 250
|
||||||
|
|
||||||
|
|
||||||
class ALRHopperJumpEnv(HopperEnv):
|
class HopperJumpEnv(HopperEnv):
|
||||||
"""
|
"""
|
||||||
Initialization changes to normal Hopper:
|
Initialization changes to normal Hopper:
|
||||||
- terminate_when_unhealthy: True -> False
|
- terminate_when_unhealthy: True -> False
|
||||||
|
- healthy_reward: 1.0 -> 2.0
|
||||||
- healthy_z_range: (0.7, float('inf')) -> (0.5, float('inf'))
|
- healthy_z_range: (0.7, float('inf')) -> (0.5, float('inf'))
|
||||||
- healthy_angle_range: (-0.2, 0.2) -> (-float('inf'), float('inf'))
|
- healthy_angle_range: (-0.2, 0.2) -> (-float('inf'), float('inf'))
|
||||||
- exclude_current_positions_from_observation: True -> False
|
- exclude_current_positions_from_observation: True -> False
|
||||||
@ -21,24 +23,28 @@ class ALRHopperJumpEnv(HopperEnv):
|
|||||||
xml_file='hopper_jump.xml',
|
xml_file='hopper_jump.xml',
|
||||||
forward_reward_weight=1.0,
|
forward_reward_weight=1.0,
|
||||||
ctrl_cost_weight=1e-3,
|
ctrl_cost_weight=1e-3,
|
||||||
healthy_reward=1.0,
|
healthy_reward=2.0, # 1 step
|
||||||
penalty=0.0,
|
contact_weight=2.0, # 0 step
|
||||||
|
height_weight=10.0, # 3 step
|
||||||
|
dist_weight=3.0, # 3 step
|
||||||
terminate_when_unhealthy=False,
|
terminate_when_unhealthy=False,
|
||||||
healthy_state_range=(-100.0, 100.0),
|
healthy_state_range=(-100.0, 100.0),
|
||||||
healthy_z_range=(0.5, float('inf')),
|
healthy_z_range=(0.5, float('inf')),
|
||||||
healthy_angle_range=(-float('inf'), float('inf')),
|
healthy_angle_range=(-float('inf'), float('inf')),
|
||||||
reset_noise_scale=5e-3,
|
reset_noise_scale=5e-3,
|
||||||
exclude_current_positions_from_observation=False,
|
exclude_current_positions_from_observation=False,
|
||||||
|
sparse=False,
|
||||||
):
|
):
|
||||||
|
|
||||||
self._steps = 0
|
self.sparse = sparse
|
||||||
|
self._height_weight = height_weight
|
||||||
|
self._dist_weight = dist_weight
|
||||||
|
self._contact_weight = contact_weight
|
||||||
|
|
||||||
self.max_height = 0
|
self.max_height = 0
|
||||||
# self.penalty = penalty
|
|
||||||
self.goal = 0
|
self.goal = 0
|
||||||
|
|
||||||
self._floor_geom_id = None
|
self._steps = 0
|
||||||
self._foot_geom_id = None
|
|
||||||
|
|
||||||
self.contact_with_floor = False
|
self.contact_with_floor = False
|
||||||
self.init_floor_contact = False
|
self.init_floor_contact = False
|
||||||
self.has_left_floor = False
|
self.has_left_floor = False
|
||||||
@ -49,12 +55,12 @@ class ALRHopperJumpEnv(HopperEnv):
|
|||||||
healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
|
healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
|
||||||
exclude_current_positions_from_observation)
|
exclude_current_positions_from_observation)
|
||||||
|
|
||||||
|
# increase initial height
|
||||||
|
self.init_qpos[1] = 1.5
|
||||||
|
|
||||||
def step(self, action):
|
def step(self, action):
|
||||||
self._steps += 1
|
self._steps += 1
|
||||||
|
|
||||||
self._floor_geom_id = self.model.geom_name2id('floor')
|
|
||||||
self._foot_geom_id = self.model.geom_name2id('foot_geom')
|
|
||||||
|
|
||||||
self.do_simulation(action, self.frame_skip)
|
self.do_simulation(action, self.frame_skip)
|
||||||
|
|
||||||
height_after = self.get_body_com("torso")[2]
|
height_after = self.get_body_com("torso")[2]
|
||||||
@ -73,18 +79,19 @@ class ALRHopperJumpEnv(HopperEnv):
|
|||||||
ctrl_cost = self.control_cost(action)
|
ctrl_cost = self.control_cost(action)
|
||||||
costs = ctrl_cost
|
costs = ctrl_cost
|
||||||
done = False
|
done = False
|
||||||
goal_dist = np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0]))
|
|
||||||
|
|
||||||
|
goal_dist = np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0]))
|
||||||
if self.contact_dist is None and self.contact_with_floor:
|
if self.contact_dist is None and self.contact_with_floor:
|
||||||
self.contact_dist = goal_dist
|
self.contact_dist = goal_dist
|
||||||
|
|
||||||
rewards = 0
|
rewards = 0
|
||||||
if self._steps >= MAX_EPISODE_STEPS_HOPPERJUMP:
|
if not self.sparse or (self.sparse and self._steps >= MAX_EPISODE_STEPS_HOPPERJUMP):
|
||||||
# healthy_reward = 0 if self.context else self.healthy_reward * self._steps
|
healthy_reward = self.healthy_reward
|
||||||
healthy_reward = self.healthy_reward * 2 # * self._steps
|
distance_reward = goal_dist * self._dist_weight
|
||||||
contact_dist = self.contact_dist if self.contact_dist is not None else 5
|
height_reward = (self.max_height if self.sparse else self.get_body_com("torso")[2]) * self._height_weight
|
||||||
dist_reward = self._forward_reward_weight * (-3 * goal_dist + 10 * self.max_height - 2 * contact_dist)
|
contact_reward = (self.contact_dist or 5) * self._contact_weight
|
||||||
rewards = dist_reward + healthy_reward
|
# dist_reward = self._forward_reward_weight * (-3 * goal_dist + 10 * self.max_height - 2 * contact_dist)
|
||||||
|
rewards = self._forward_reward_weight * (distance_reward + height_reward + contact_reward + healthy_reward)
|
||||||
|
|
||||||
observation = self._get_obs()
|
observation = self._get_obs()
|
||||||
reward = rewards - costs
|
reward = rewards - costs
|
||||||
@ -97,24 +104,40 @@ class ALRHopperJumpEnv(HopperEnv):
|
|||||||
height_rew=self.max_height,
|
height_rew=self.max_height,
|
||||||
healthy_reward=self.healthy_reward * 2,
|
healthy_reward=self.healthy_reward * 2,
|
||||||
healthy=self.is_healthy,
|
healthy=self.is_healthy,
|
||||||
contact_dist=self.contact_dist if self.contact_dist is not None else 0
|
contact_dist=self.contact_dist or 0
|
||||||
)
|
)
|
||||||
return observation, reward, done, info
|
return observation, reward, done, info
|
||||||
|
|
||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
return np.append(super()._get_obs(), self.goal)
|
goal_dist = self.data.get_site_xpos('foot_site') - np.array([self.goal, 0, 0])
|
||||||
|
return np.concatenate((super(HopperJumpEnv, self)._get_obs(), goal_dist.copy(), self.goal.copy()))
|
||||||
|
|
||||||
def reset(self, *, seed: Optional[int] = None, return_info: bool = False, options: Optional[dict] = None, ):
|
def reset_model(self):
|
||||||
self.goal = self.np_random.uniform(1.4, 2.16, 1)[0] # 1.3 2.3
|
super(HopperJumpEnv, self).reset_model()
|
||||||
|
|
||||||
|
self.goal = self.np_random.uniform(0.3, 1.35, 1)[0]
|
||||||
|
self.sim.model.body_pos[self.sim.model.body_name2id('goal_site_body')] = np.array([self.goal, 0, 0])
|
||||||
self.max_height = 0
|
self.max_height = 0
|
||||||
self._steps = 0
|
self._steps = 0
|
||||||
return super().reset()
|
|
||||||
|
|
||||||
# overwrite reset_model to make it deterministic
|
noise_low = -np.zeros(self.model.nq)
|
||||||
def reset_model(self):
|
noise_low[3] = -0.5
|
||||||
|
noise_low[4] = -0.2
|
||||||
|
noise_low[5] = 0
|
||||||
|
|
||||||
qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
|
noise_high = np.zeros(self.model.nq)
|
||||||
qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
|
noise_high[3] = 0
|
||||||
|
noise_high[4] = 0
|
||||||
|
noise_high[5] = 0.785
|
||||||
|
|
||||||
|
qpos = (
|
||||||
|
self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq) +
|
||||||
|
self.init_qpos
|
||||||
|
)
|
||||||
|
qvel = (
|
||||||
|
# self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv) +
|
||||||
|
self.init_qvel
|
||||||
|
)
|
||||||
|
|
||||||
self.set_state(qpos, qvel)
|
self.set_state(qpos, qvel)
|
||||||
|
|
||||||
@ -123,6 +146,7 @@ class ALRHopperJumpEnv(HopperEnv):
|
|||||||
self.contact_with_floor = False
|
self.contact_with_floor = False
|
||||||
self.init_floor_contact = False
|
self.init_floor_contact = False
|
||||||
self.contact_dist = None
|
self.contact_dist = None
|
||||||
|
|
||||||
return observation
|
return observation
|
||||||
|
|
||||||
def _is_floor_foot_contact(self):
|
def _is_floor_foot_contact(self):
|
||||||
@ -137,184 +161,75 @@ class ALRHopperJumpEnv(HopperEnv):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
|
class HopperJumpStepEnv(HopperJumpEnv):
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
xml_file='hopper_jump.xml',
|
||||||
|
forward_reward_weight=1.0,
|
||||||
|
ctrl_cost_weight=1e-3,
|
||||||
|
healthy_reward=1.0,
|
||||||
|
height_weight=3,
|
||||||
|
dist_weight=3,
|
||||||
|
terminate_when_unhealthy=False,
|
||||||
|
healthy_state_range=(-100.0, 100.0),
|
||||||
|
healthy_z_range=(0.5, float('inf')),
|
||||||
|
healthy_angle_range=(-float('inf'), float('inf')),
|
||||||
|
reset_noise_scale=5e-3,
|
||||||
|
exclude_current_positions_from_observation=False
|
||||||
|
):
|
||||||
|
|
||||||
|
self._height_weight = height_weight
|
||||||
|
self._dist_weight = dist_weight
|
||||||
|
super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
|
||||||
|
healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
|
||||||
|
exclude_current_positions_from_observation)
|
||||||
|
|
||||||
def step(self, action):
|
def step(self, action):
|
||||||
self._floor_geom_id = self.model.geom_name2id('floor')
|
|
||||||
self._foot_geom_id = self.model.geom_name2id('foot_geom')
|
|
||||||
|
|
||||||
self._steps += 1
|
self._steps += 1
|
||||||
|
|
||||||
self.do_simulation(action, self.frame_skip)
|
self.do_simulation(action, self.frame_skip)
|
||||||
|
|
||||||
height_after = self.get_body_com("torso")[2]
|
height_after = self.get_body_com("torso")[2]
|
||||||
site_pos_after = self.sim.data.site_xpos[self.model.site_name2id('foot_site')].copy()
|
site_pos_after = self.data.get_site_xpos('foot_site')
|
||||||
self.max_height = max(height_after, self.max_height)
|
self.max_height = max(height_after, self.max_height)
|
||||||
|
|
||||||
# floor_contact = self._contact_checker(self._floor_geom_id, self._foot_geom_id) if not self.contact_with_floor else False
|
|
||||||
# self.init_floor_contact = floor_contact if not self.init_floor_contact else self.init_floor_contact
|
|
||||||
# self.has_left_floor = not floor_contact if self.init_floor_contact and not self.has_left_floor else self.has_left_floor
|
|
||||||
# self.contact_with_floor = floor_contact if not self.contact_with_floor and self.has_left_floor else self.contact_with_floor
|
|
||||||
|
|
||||||
floor_contact = self._is_floor_foot_contact(self._floor_geom_id,
|
|
||||||
self._foot_geom_id) if not self.contact_with_floor else False
|
|
||||||
if not self.init_floor_contact:
|
|
||||||
self.init_floor_contact = floor_contact
|
|
||||||
if self.init_floor_contact and not self.has_left_floor:
|
|
||||||
self.has_left_floor = not floor_contact
|
|
||||||
if not self.contact_with_floor and self.has_left_floor:
|
|
||||||
self.contact_with_floor = floor_contact
|
|
||||||
|
|
||||||
if self.contact_dist is None and self.contact_with_floor:
|
|
||||||
self.contact_dist = np.linalg.norm(self.sim.data.site_xpos[self.model.site_name2id('foot_site')]
|
|
||||||
- np.array([self.goal, 0, 0]))
|
|
||||||
|
|
||||||
ctrl_cost = self.control_cost(action)
|
ctrl_cost = self.control_cost(action)
|
||||||
|
healthy_reward = self.healthy_reward
|
||||||
|
height_reward = self._height_weight * height_after
|
||||||
|
goal_dist = np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0]))
|
||||||
|
goal_dist_reward = -self._dist_weight * goal_dist
|
||||||
|
dist_reward = self._forward_reward_weight * (goal_dist_reward + height_reward)
|
||||||
|
|
||||||
|
rewards = dist_reward + healthy_reward
|
||||||
costs = ctrl_cost
|
costs = ctrl_cost
|
||||||
done = False
|
done = False
|
||||||
goal_dist = np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0]))
|
|
||||||
rewards = 0
|
# This is only for logging the distance to goal when first having the contact
|
||||||
if self._steps >= self.max_episode_steps:
|
has_floor_contact = self._is_floor_foot_contact() if not self.contact_with_floor else False
|
||||||
# healthy_reward = 0 if self.context else self.healthy_reward * self._steps
|
|
||||||
healthy_reward = self.healthy_reward * 2 # * self._steps
|
if not self.init_floor_contact:
|
||||||
contact_dist = self.contact_dist if self.contact_dist is not None else 5
|
self.init_floor_contact = has_floor_contact
|
||||||
dist_reward = self._forward_reward_weight * (-3 * goal_dist + 10 * self.max_height - 2 * contact_dist)
|
if self.init_floor_contact and not self.has_left_floor:
|
||||||
rewards = dist_reward + healthy_reward
|
self.has_left_floor = not has_floor_contact
|
||||||
|
if not self.contact_with_floor and self.has_left_floor:
|
||||||
|
self.contact_with_floor = has_floor_contact
|
||||||
|
|
||||||
|
if self.contact_dist is None and self.contact_with_floor:
|
||||||
|
self.contact_dist = goal_dist
|
||||||
|
|
||||||
|
##############################################################
|
||||||
|
|
||||||
observation = self._get_obs()
|
observation = self._get_obs()
|
||||||
reward = rewards - costs
|
reward = rewards - costs
|
||||||
info = {
|
info = {
|
||||||
'height': height_after,
|
'height': height_after,
|
||||||
'x_pos': site_pos_after,
|
'x_pos': site_pos_after,
|
||||||
'max_height': self.max_height,
|
'max_height': copy.copy(self.max_height),
|
||||||
'goal': self.goal,
|
'goal': copy.copy(self.goal),
|
||||||
'goal_dist': goal_dist,
|
'goal_dist': goal_dist,
|
||||||
'height_rew': self.max_height,
|
'height_rew': height_reward,
|
||||||
'healthy_reward': self.healthy_reward * 2,
|
'healthy_reward': healthy_reward,
|
||||||
'healthy': self.is_healthy,
|
'healthy': copy.copy(self.is_healthy),
|
||||||
'contact_dist': self.contact_dist if self.contact_dist is not None else 0
|
'contact_dist': copy.copy(self.contact_dist) or 0
|
||||||
}
|
|
||||||
return observation, reward, done, info
|
|
||||||
|
|
||||||
def reset_model(self):
|
|
||||||
self.init_qpos[1] = 1.5
|
|
||||||
self._floor_geom_id = self.model.geom_name2id('floor')
|
|
||||||
self._foot_geom_id = self.model.geom_name2id('foot_geom')
|
|
||||||
noise_low = -np.zeros(self.model.nq)
|
|
||||||
noise_low[3] = -0.5
|
|
||||||
noise_low[4] = -0.2
|
|
||||||
noise_low[5] = 0
|
|
||||||
|
|
||||||
noise_high = np.zeros(self.model.nq)
|
|
||||||
noise_high[3] = 0
|
|
||||||
noise_high[4] = 0
|
|
||||||
noise_high[5] = 0.785
|
|
||||||
|
|
||||||
rnd_vec = self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
|
|
||||||
qpos = self.init_qpos + rnd_vec
|
|
||||||
qvel = self.init_qvel
|
|
||||||
self.set_state(qpos, qvel)
|
|
||||||
|
|
||||||
observation = self._get_obs()
|
|
||||||
self.has_left_floor = False
|
|
||||||
self.contact_with_floor = False
|
|
||||||
self.init_floor_contact = False
|
|
||||||
self.contact_dist = None
|
|
||||||
|
|
||||||
return observation
|
|
||||||
|
|
||||||
def reset(self):
|
|
||||||
super().reset()
|
|
||||||
self.goal = self.np_random.uniform(0.3, 1.35, 1)[0]
|
|
||||||
self.sim.model.body_pos[self.sim.model.body_name2id('goal_site_body')] = np.array([self.goal, 0, 0])
|
|
||||||
return self.reset_model()
|
|
||||||
|
|
||||||
def _get_obs(self):
|
|
||||||
goal_diff = self.sim.data.site_xpos[self.model.site_name2id('foot_site')].copy() \
|
|
||||||
- np.array([self.goal, 0, 0])
|
|
||||||
return np.concatenate((super(ALRHopperXYJumpEnv, self)._get_obs(), goal_diff))
|
|
||||||
|
|
||||||
def set_context(self, context):
|
|
||||||
# context is 4 dimensional
|
|
||||||
qpos = self.init_qpos
|
|
||||||
qvel = self.init_qvel
|
|
||||||
qpos[-3:] = context[:3]
|
|
||||||
self.goal = context[-1]
|
|
||||||
self.set_state(qpos, qvel)
|
|
||||||
self.sim.model.body_pos[self.sim.model.body_name2id('goal_site_body')] = np.array([self.goal, 0, 0])
|
|
||||||
return self._get_obs()
|
|
||||||
|
|
||||||
|
|
||||||
class ALRHopperXYJumpEnvStepBased(ALRHopperXYJumpEnv):
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
xml_file='hopper_jump.xml',
|
|
||||||
forward_reward_weight=1.0,
|
|
||||||
ctrl_cost_weight=1e-3,
|
|
||||||
healthy_reward=0.0,
|
|
||||||
penalty=0.0,
|
|
||||||
context=True,
|
|
||||||
terminate_when_unhealthy=False,
|
|
||||||
healthy_state_range=(-100.0, 100.0),
|
|
||||||
healthy_z_range=(0.5, float('inf')),
|
|
||||||
healthy_angle_range=(-float('inf'), float('inf')),
|
|
||||||
reset_noise_scale=5e-3,
|
|
||||||
exclude_current_positions_from_observation=False,
|
|
||||||
max_episode_steps=250,
|
|
||||||
height_scale=10,
|
|
||||||
dist_scale=3,
|
|
||||||
healthy_scale=2
|
|
||||||
):
|
|
||||||
self.height_scale = height_scale
|
|
||||||
self.dist_scale = dist_scale
|
|
||||||
self.healthy_scale = healthy_scale
|
|
||||||
super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, penalty, context,
|
|
||||||
terminate_when_unhealthy, healthy_state_range, healthy_z_range, healthy_angle_range,
|
|
||||||
reset_noise_scale, exclude_current_positions_from_observation, max_episode_steps)
|
|
||||||
|
|
||||||
def step(self, action):
|
|
||||||
self._floor_geom_id = self.model.geom_name2id('floor')
|
|
||||||
self._foot_geom_id = self.model.geom_name2id('foot_geom')
|
|
||||||
|
|
||||||
self._steps += 1
|
|
||||||
self.do_simulation(action, self.frame_skip)
|
|
||||||
height_after = self.get_body_com("torso")[2]
|
|
||||||
site_pos_after = self.sim.data.site_xpos[self.model.site_name2id('foot_site')].copy()
|
|
||||||
self.max_height = max(height_after, self.max_height)
|
|
||||||
ctrl_cost = self.control_cost(action)
|
|
||||||
|
|
||||||
healthy_reward = self.healthy_reward * self.healthy_scale
|
|
||||||
height_reward = self.height_scale * height_after
|
|
||||||
goal_dist = np.atleast_1d(np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0], dtype=object)))[0]
|
|
||||||
goal_dist_reward = -self.dist_scale * goal_dist
|
|
||||||
dist_reward = self._forward_reward_weight * (goal_dist_reward + height_reward)
|
|
||||||
reward = -ctrl_cost + healthy_reward + dist_reward
|
|
||||||
done = False
|
|
||||||
observation = self._get_obs()
|
|
||||||
|
|
||||||
###########################################################
|
|
||||||
# This is only for logging the distance to goal when first having the contact
|
|
||||||
##########################################################
|
|
||||||
floor_contact = self._is_floor_foot_contact(self._floor_geom_id,
|
|
||||||
self._foot_geom_id) if not self.contact_with_floor else False
|
|
||||||
if not self.init_floor_contact:
|
|
||||||
self.init_floor_contact = floor_contact
|
|
||||||
if self.init_floor_contact and not self.has_left_floor:
|
|
||||||
self.has_left_floor = not floor_contact
|
|
||||||
if not self.contact_with_floor and self.has_left_floor:
|
|
||||||
self.contact_with_floor = floor_contact
|
|
||||||
|
|
||||||
if self.contact_dist is None and self.contact_with_floor:
|
|
||||||
self.contact_dist = np.linalg.norm(self.sim.data.site_xpos[self.model.site_name2id('foot_site')]
|
|
||||||
- np.array([self.goal, 0, 0]))
|
|
||||||
info = {
|
|
||||||
'height': height_after,
|
|
||||||
'x_pos': site_pos_after,
|
|
||||||
'max_height': self.max_height,
|
|
||||||
'goal': self.goal,
|
|
||||||
'goal_dist': goal_dist,
|
|
||||||
'height_rew': self.max_height,
|
|
||||||
'healthy_reward': self.healthy_reward * self.healthy_reward,
|
|
||||||
'healthy': self.is_healthy,
|
|
||||||
'contact_dist': self.contact_dist if self.contact_dist is not None else 0
|
|
||||||
}
|
}
|
||||||
return observation, reward, done, info
|
return observation, reward, done, info
|
||||||
|
@ -8,6 +8,7 @@ from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
|
|||||||
class MPWrapper(RawInterfaceWrapper):
|
class MPWrapper(RawInterfaceWrapper):
|
||||||
|
|
||||||
# Random x goal + random init pos
|
# Random x goal + random init pos
|
||||||
|
@property
|
||||||
def context_mask(self):
|
def context_mask(self):
|
||||||
return np.hstack([
|
return np.hstack([
|
||||||
[False] * (2 + int(not self.exclude_current_positions_from_observation)), # position
|
[False] * (2 + int(not self.exclude_current_positions_from_observation)), # position
|
||||||
|
@ -6,8 +6,9 @@ from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
|
|||||||
|
|
||||||
|
|
||||||
class MPWrapper(RawInterfaceWrapper):
|
class MPWrapper(RawInterfaceWrapper):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def context_mask(self) -> np.ndarray:
|
def context_mask(self):
|
||||||
return np.hstack([
|
return np.hstack([
|
||||||
[False] * 17,
|
[False] * 17,
|
||||||
[True] # goal pos
|
[True] # goal pos
|
||||||
@ -15,16 +16,8 @@ class MPWrapper(RawInterfaceWrapper):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def current_pos(self) -> Union[float, int, np.ndarray]:
|
def current_pos(self) -> Union[float, int, np.ndarray]:
|
||||||
return self.env.sim.data.qpos[3:6].copy()
|
return self.env.data.qpos[3:6].copy()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
return self.env.sim.data.qvel[3:6].copy()
|
return self.env.data.qvel[3:6].copy()
|
||||||
|
|
||||||
@property
|
|
||||||
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
|
||||||
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
|
|
||||||
|
|
||||||
@property
|
|
||||||
def dt(self) -> Union[float, int]:
|
|
||||||
return self.env.dt
|
|
||||||
|
@ -1,25 +0,0 @@
|
|||||||
from typing import Tuple, Union
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
|
|
||||||
|
|
||||||
|
|
||||||
class MPWrapper(RawInterfaceWrapper):
|
|
||||||
def context_mask(self):
|
|
||||||
return np.hstack([
|
|
||||||
[False] * 17,
|
|
||||||
[True] # goal pos
|
|
||||||
])
|
|
||||||
|
|
||||||
@property
|
|
||||||
def current_pos(self) -> Union[float, int, np.ndarray]:
|
|
||||||
return self.env.sim.data.qpos[3:6].copy()
|
|
||||||
|
|
||||||
@property
|
|
||||||
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
|
||||||
return self.env.sim.data.qvel[3:6].copy()
|
|
||||||
|
|
||||||
@property
|
|
||||||
def dt(self) -> Union[float, int]:
|
|
||||||
return self.env.dt
|
|
@ -15,14 +15,13 @@ class MPWrapper(RawInterfaceWrapper):
|
|||||||
[True] * 2, # goal position
|
[True] * 2, # goal position
|
||||||
[False] * self.env.n_links, # angular velocity
|
[False] * self.env.n_links, # angular velocity
|
||||||
[False] * 3, # goal distance
|
[False] * 3, # goal distance
|
||||||
# self.get_body_com("target"), # only return target to make problem harder
|
# [False], # step
|
||||||
[False], # step
|
|
||||||
])
|
])
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
return self.env.sim.data.qpos.flat[:self.env.n_links]
|
return self.env.data.qpos.flat[:self.env.n_links]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
return self.env.sim.data.qvel.flat[:self.env.n_links]
|
return self.env.data.qvel.flat[:self.env.n_links]
|
||||||
|
@ -94,12 +94,12 @@ class ReacherEnv(MujocoEnv, utils.EzPickle):
|
|||||||
return self._get_obs()
|
return self._get_obs()
|
||||||
|
|
||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
theta = self.sim.data.qpos.flat[:self.n_links]
|
theta = self.data.qpos.flat[:self.n_links]
|
||||||
target = self.get_body_com("target")
|
target = self.get_body_com("target")
|
||||||
return np.concatenate([
|
return np.concatenate([
|
||||||
np.cos(theta),
|
np.cos(theta),
|
||||||
np.sin(theta),
|
np.sin(theta),
|
||||||
target[:2], # x-y of goal position
|
target[:2], # x-y of goal position
|
||||||
self.sim.data.qvel.flat[:self.n_links], # angular velocity
|
self.data.qvel.flat[:self.n_links], # angular velocity
|
||||||
self.get_body_com("fingertip") - target, # goal distance
|
self.get_body_com("fingertip") - target, # goal distance
|
||||||
])
|
])
|
||||||
|
@ -1 +1 @@
|
|||||||
from .new_mp_wrapper import MPWrapper
|
from .mp_wrapper import MPWrapper
|
||||||
|
@ -6,8 +6,9 @@ from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
|
|||||||
|
|
||||||
|
|
||||||
class MPWrapper(RawInterfaceWrapper):
|
class MPWrapper(RawInterfaceWrapper):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def context_mask(self) -> np.ndarray:
|
def context_mask(self):
|
||||||
return np.hstack([
|
return np.hstack([
|
||||||
[False] * 17,
|
[False] * 17,
|
||||||
[True] # goal pos
|
[True] # goal pos
|
||||||
@ -15,16 +16,8 @@ class MPWrapper(RawInterfaceWrapper):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def current_pos(self) -> Union[float, int, np.ndarray]:
|
def current_pos(self) -> Union[float, int, np.ndarray]:
|
||||||
return self.env.sim.data.qpos[3:9].copy()
|
return self.env.data.qpos[3:9].copy()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
return self.env.sim.data.qvel[3:9].copy()
|
return self.env.data.qvel[3:9].copy()
|
||||||
|
|
||||||
@property
|
|
||||||
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
|
||||||
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
|
|
||||||
|
|
||||||
@property
|
|
||||||
def dt(self) -> Union[float, int]:
|
|
||||||
return self.env.dt
|
|
||||||
|
@ -1,26 +0,0 @@
|
|||||||
from typing import Tuple, Union
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
|
|
||||||
|
|
||||||
|
|
||||||
class MPWrapper(RawInterfaceWrapper):
|
|
||||||
def context_mask(self):
|
|
||||||
return np.hstack([
|
|
||||||
[False] * 17,
|
|
||||||
[True] # goal pos
|
|
||||||
])
|
|
||||||
|
|
||||||
@property
|
|
||||||
def current_pos(self) -> Union[float, int, np.ndarray]:
|
|
||||||
return self.env.sim.data.qpos[3:9].copy()
|
|
||||||
|
|
||||||
@property
|
|
||||||
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
|
||||||
return self.env.sim.data.qvel[3:9].copy()
|
|
||||||
|
|
||||||
@property
|
|
||||||
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
|
||||||
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
|||||||
from typing import Tuple, Union
|
from typing import Tuple, Union, Optional
|
||||||
|
|
||||||
import gym
|
import gym
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@ -19,7 +19,7 @@ class BlackBoxWrapper(gym.ObservationWrapper):
|
|||||||
duration: float,
|
duration: float,
|
||||||
verbose: int = 1,
|
verbose: int = 1,
|
||||||
learn_sub_trajectories: bool = False,
|
learn_sub_trajectories: bool = False,
|
||||||
replanning_schedule: Union[None, callable] = None,
|
replanning_schedule: Optional[callable] = None,
|
||||||
reward_aggregation: callable = np.sum
|
reward_aggregation: callable = np.sum
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
@ -41,7 +41,8 @@ class BlackBoxWrapper(gym.ObservationWrapper):
|
|||||||
|
|
||||||
self.duration = duration
|
self.duration = duration
|
||||||
self.learn_sub_trajectories = learn_sub_trajectories
|
self.learn_sub_trajectories = learn_sub_trajectories
|
||||||
self.replanning_schedule = replanning_schedule
|
self.do_replanning = replanning_schedule is not None
|
||||||
|
self.replanning_schedule = replanning_schedule or (lambda *x: False)
|
||||||
self.current_traj_steps = 0
|
self.current_traj_steps = 0
|
||||||
|
|
||||||
# trajectory generation
|
# trajectory generation
|
||||||
@ -55,12 +56,10 @@ class BlackBoxWrapper(gym.ObservationWrapper):
|
|||||||
self.reward_aggregation = reward_aggregation
|
self.reward_aggregation = reward_aggregation
|
||||||
|
|
||||||
# spaces
|
# spaces
|
||||||
self.return_context_observation = not (self.learn_sub_trajectories or replanning_schedule)
|
self.return_context_observation = not (learn_sub_trajectories or self.do_replanning)
|
||||||
self.traj_gen_action_space = self.get_traj_gen_action_space()
|
self.traj_gen_action_space = self._get_traj_gen_action_space()
|
||||||
self.action_space = self.get_action_space()
|
self.action_space = self._get_action_space()
|
||||||
self.observation_space = spaces.Box(low=self.env.observation_space.low[self.env.context_mask],
|
self.observation_space = self._get_observation_space()
|
||||||
high=self.env.observation_space.high[self.env.context_mask],
|
|
||||||
dtype=self.env.observation_space.dtype)
|
|
||||||
|
|
||||||
# rendering
|
# rendering
|
||||||
self.render_kwargs = {}
|
self.render_kwargs = {}
|
||||||
@ -76,23 +75,24 @@ class BlackBoxWrapper(gym.ObservationWrapper):
|
|||||||
# TODO: Bruce said DMP, ProMP, ProDMP can have 0 bc_time for sequencing
|
# TODO: Bruce said DMP, ProMP, ProDMP can have 0 bc_time for sequencing
|
||||||
# TODO Check with Bruce for replanning
|
# TODO Check with Bruce for replanning
|
||||||
self.traj_gen.set_boundary_conditions(
|
self.traj_gen.set_boundary_conditions(
|
||||||
bc_time=np.zeros((1,)) if not self.replanning_schedule else self.current_traj_steps * self.dt,
|
bc_time=np.zeros((1,)) if not self.do_replanning else np.array([self.current_traj_steps * self.dt]),
|
||||||
bc_pos=self.current_pos, bc_vel=self.current_vel)
|
bc_pos=self.current_pos, bc_vel=self.current_vel)
|
||||||
# TODO: is this correct for replanning? Do we need to adjust anything here?
|
# TODO: is this correct for replanning? Do we need to adjust anything here?
|
||||||
self.traj_gen.set_duration(None if self.learn_sub_trajectories else np.array([self.duration]), np.array([self.dt]))
|
self.traj_gen.set_duration(None if self.learn_sub_trajectories else np.array([self.duration]),
|
||||||
|
np.array([self.dt]))
|
||||||
traj_dict = self.traj_gen.get_trajs(get_pos=True, get_vel=True)
|
traj_dict = self.traj_gen.get_trajs(get_pos=True, get_vel=True)
|
||||||
trajectory_tensor, velocity_tensor = traj_dict['pos'], traj_dict['vel']
|
trajectory_tensor, velocity_tensor = traj_dict['pos'], traj_dict['vel']
|
||||||
|
|
||||||
return get_numpy(trajectory_tensor), get_numpy(velocity_tensor)
|
return get_numpy(trajectory_tensor), get_numpy(velocity_tensor)
|
||||||
|
|
||||||
def get_traj_gen_action_space(self):
|
def _get_traj_gen_action_space(self):
|
||||||
"""This function can be used to set up an individual space for the parameters of the traj_gen."""
|
"""This function can be used to set up an individual space for the parameters of the traj_gen."""
|
||||||
min_action_bounds, max_action_bounds = self.traj_gen.get_param_bounds()
|
min_action_bounds, max_action_bounds = self.traj_gen.get_param_bounds()
|
||||||
mp_action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
|
action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
|
||||||
dtype=np.float32)
|
dtype=self.env.action_space.dtype)
|
||||||
return mp_action_space
|
return action_space
|
||||||
|
|
||||||
def get_action_space(self):
|
def _get_action_space(self):
|
||||||
"""
|
"""
|
||||||
This function can be used to modify the action space for considering actions which are not learned via motion
|
This function can be used to modify the action space for considering actions which are not learned via motion
|
||||||
primitives. E.g. ball releasing time for the beer pong task. By default, it is the parameter space of the
|
primitives. E.g. ball releasing time for the beer pong task. By default, it is the parameter space of the
|
||||||
@ -102,12 +102,21 @@ class BlackBoxWrapper(gym.ObservationWrapper):
|
|||||||
try:
|
try:
|
||||||
return self.traj_gen_action_space
|
return self.traj_gen_action_space
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
return self.get_traj_gen_action_space()
|
return self._get_traj_gen_action_space()
|
||||||
|
|
||||||
|
def _get_observation_space(self):
|
||||||
|
mask = self.env.context_mask
|
||||||
|
if not self.return_context_observation:
|
||||||
|
# return full observation
|
||||||
|
mask = np.ones_like(mask, dtype=bool)
|
||||||
|
min_obs_bound = self.env.observation_space.low[mask]
|
||||||
|
max_obs_bound = self.env.observation_space.high[mask]
|
||||||
|
return spaces.Box(low=min_obs_bound, high=max_obs_bound, dtype=self.env.observation_space.dtype)
|
||||||
|
|
||||||
def step(self, action: np.ndarray):
|
def step(self, action: np.ndarray):
|
||||||
""" This function generates a trajectory based on a MP and then does the usual loop over reset and step"""
|
""" This function generates a trajectory based on a MP and then does the usual loop over reset and step"""
|
||||||
|
|
||||||
# agent to learn when to release the ball
|
# TODO remove this part, right now only needed for beer pong
|
||||||
mp_params, env_spec_params = self.env._episode_callback(action, self.traj_gen)
|
mp_params, env_spec_params = self.env._episode_callback(action, self.traj_gen)
|
||||||
trajectory, velocity = self.get_trajectory(mp_params)
|
trajectory, velocity = self.get_trajectory(mp_params)
|
||||||
|
|
||||||
@ -121,10 +130,8 @@ class BlackBoxWrapper(gym.ObservationWrapper):
|
|||||||
infos = dict()
|
infos = dict()
|
||||||
done = False
|
done = False
|
||||||
|
|
||||||
for t, pos_vel in enumerate(zip(trajectory, velocity)):
|
for t, (pos, vel) in enumerate(zip(trajectory, velocity)):
|
||||||
step_action = self.tracking_controller.get_action(pos_vel[0], pos_vel[1], self.current_pos,
|
step_action = self.tracking_controller.get_action(pos, vel, self.current_pos, self.current_vel)
|
||||||
self.current_vel)
|
|
||||||
step_action = self._step_callback(t, env_spec_params, step_action) # include possible callback info
|
|
||||||
c_action = np.clip(step_action, self.env.action_space.low, self.env.action_space.high)
|
c_action = np.clip(step_action, self.env.action_space.low, self.env.action_space.high)
|
||||||
# print('step/clipped action ratio: ', step_action/c_action)
|
# print('step/clipped action ratio: ', step_action/c_action)
|
||||||
obs, c_reward, done, info = self.env.step(c_action)
|
obs, c_reward, done, info = self.env.step(c_action)
|
||||||
@ -142,10 +149,7 @@ class BlackBoxWrapper(gym.ObservationWrapper):
|
|||||||
if self.render_kwargs:
|
if self.render_kwargs:
|
||||||
self.render(**self.render_kwargs)
|
self.render(**self.render_kwargs)
|
||||||
|
|
||||||
if done:
|
if done or self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action,
|
||||||
break
|
|
||||||
|
|
||||||
if self.replanning_schedule and self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action,
|
|
||||||
t + 1 + self.current_traj_steps):
|
t + 1 + self.current_traj_steps):
|
||||||
break
|
break
|
||||||
|
|
||||||
@ -153,25 +157,26 @@ class BlackBoxWrapper(gym.ObservationWrapper):
|
|||||||
self.current_traj_steps += t + 1
|
self.current_traj_steps += t + 1
|
||||||
|
|
||||||
if self.verbose >= 2:
|
if self.verbose >= 2:
|
||||||
infos['trajectory'] = trajectory
|
infos['positions'] = trajectory
|
||||||
|
infos['velocities'] = velocity
|
||||||
infos['step_actions'] = actions[:t + 1]
|
infos['step_actions'] = actions[:t + 1]
|
||||||
infos['step_observations'] = observations[:t + 1]
|
infos['step_observations'] = observations[:t + 1]
|
||||||
infos['step_rewards'] = rewards[:t + 1]
|
infos['step_rewards'] = rewards[:t + 1]
|
||||||
|
|
||||||
infos['trajectory_length'] = t + 1
|
infos['trajectory_length'] = t + 1
|
||||||
trajectory_return = self.reward_aggregation(rewards[:t + 1])
|
trajectory_return = self.reward_aggregation(rewards[:t + 1])
|
||||||
return obs, trajectory_return, done, infos
|
return self.observation(obs), trajectory_return, done, infos
|
||||||
|
|
||||||
def render(self, **kwargs):
|
def render(self, **kwargs):
|
||||||
"""Only set render options here, such that they can be used during the rollout.
|
"""Only set render options here, such that they can be used during the rollout.
|
||||||
This only needs to be called once"""
|
This only needs to be called once"""
|
||||||
self.render_kwargs = kwargs
|
self.render_kwargs = kwargs or self.render_kwargs
|
||||||
# self.env.render(mode=self.render_mode, **self.render_kwargs)
|
# self.env.render(mode=self.render_mode, **self.render_kwargs)
|
||||||
self.env.render(**kwargs)
|
self.env.render(**self.render_kwargs)
|
||||||
|
|
||||||
def reset(self, **kwargs):
|
def reset(self, *, seed: Optional[int] = None, return_info: bool = False, options: Optional[dict] = None):
|
||||||
self.current_traj_steps = 0
|
self.current_traj_steps = 0
|
||||||
super(BlackBoxWrapper, self).reset(**kwargs)
|
return super(BlackBoxWrapper, self).reset(seed=seed, return_info=return_info, options=options)
|
||||||
|
|
||||||
def plot_trajs(self, des_trajs, des_vels):
|
def plot_trajs(self, des_trajs, des_vels):
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
@ -57,7 +57,8 @@ class RawInterfaceWrapper(gym.Wrapper):
|
|||||||
# return bool(self.replanning_model(s))
|
# return bool(self.replanning_model(s))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def _episode_callback(self, action: np.ndarray, traj_gen: MPInterface) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
|
def _episode_callback(self, action: np.ndarray, traj_gen: MPInterface) -> Tuple[
|
||||||
|
np.ndarray, Union[np.ndarray, None]]:
|
||||||
"""
|
"""
|
||||||
Used to extract the parameters for the motion primitive and other parameters from an action array which might
|
Used to extract the parameters for the motion primitive and other parameters from an action array which might
|
||||||
include other actions like ball releasing time for the beer pong environment.
|
include other actions like ball releasing time for the beer pong environment.
|
||||||
@ -70,20 +71,3 @@ class RawInterfaceWrapper(gym.Wrapper):
|
|||||||
Tuple: mp_arguments and other arguments
|
Tuple: mp_arguments and other arguments
|
||||||
"""
|
"""
|
||||||
return action, None
|
return action, None
|
||||||
|
|
||||||
def _step_callback(self, t: int, env_spec_params: Union[np.ndarray, None], step_action: np.ndarray) -> Union[
|
|
||||||
np.ndarray]:
|
|
||||||
"""
|
|
||||||
This function can be used to modify the step_action with additional parameters e.g. releasing the ball in the
|
|
||||||
Beerpong env. The parameters used should not be part of the motion primitive parameters.
|
|
||||||
Returns step_action by default, can be overwritten in individual mp_wrappers.
|
|
||||||
Args:
|
|
||||||
t: the current time step of the episode
|
|
||||||
env_spec_params: the environment specific parameter, as defined in function _episode_callback
|
|
||||||
(e.g. ball release time in Beer Pong)
|
|
||||||
step_action: the current step-based action
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
modified step action
|
|
||||||
"""
|
|
||||||
return step_action
|
|
||||||
|
@ -87,10 +87,10 @@ if __name__ == '__main__':
|
|||||||
render = True
|
render = True
|
||||||
|
|
||||||
# Basic gym task
|
# Basic gym task
|
||||||
example_general("Pendulum-v0", seed=10, iterations=200, render=render)
|
example_general("Reacher5d-v0", seed=10, iterations=200, render=render)
|
||||||
|
|
||||||
# # Basis task from framework
|
# # Basis task from framework
|
||||||
example_general("alr_envs:HoleReacher-v0", seed=10, iterations=200, render=render)
|
example_general("Reacher-v0", seed=10, iterations=200, render=render)
|
||||||
|
|
||||||
# # OpenAI Mujoco task
|
# # OpenAI Mujoco task
|
||||||
example_general("HalfCheetah-v2", seed=10, render=render)
|
example_general("HalfCheetah-v2", seed=10, render=render)
|
||||||
|
@ -45,7 +45,7 @@ def example_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=1, rend
|
|||||||
obs = env.reset()
|
obs = env.reset()
|
||||||
|
|
||||||
|
|
||||||
def example_custom_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=1, render=True):
|
def example_custom_mp(env_name="Reacher5dProMP-v0", seed=1, iterations=1, render=True):
|
||||||
"""
|
"""
|
||||||
Example for running a motion primitive based environment, which is already registered
|
Example for running a motion primitive based environment, which is already registered
|
||||||
Args:
|
Args:
|
||||||
@ -57,32 +57,12 @@ def example_custom_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=
|
|||||||
Returns:
|
Returns:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
# Changing the traj_gen_kwargs is possible by providing them to gym.
|
# Changing the arguments of the black box env is possible by providing them to gym as with all kwargs.
|
||||||
# E.g. here by providing way to many basis functions
|
# E.g. here for way to many basis functions
|
||||||
# mp_dict = alr_envs.from_default_config('ALRReacher-v0', {'basis_generator_kwargs': {'num_basis': 10}})
|
env = alr_envs.make(env_name, seed, basis_generator_kwargs={'num_basis': 1000})
|
||||||
# mp_dict.update({'basis_generator_kwargs': {'num_basis': 10}})
|
|
||||||
# mp_dict.update({'black_box_kwargs': {'learn_sub_trajectories': True}})
|
# mp_dict.update({'black_box_kwargs': {'learn_sub_trajectories': True}})
|
||||||
# mp_dict.update({'black_box_kwargs': {'do_replanning': lambda pos, vel, t: lambda t: t % 100}})
|
# mp_dict.update({'black_box_kwargs': {'do_replanning': lambda pos, vel, t: lambda t: t % 100}})
|
||||||
|
|
||||||
# default env with promp and no learn_sub_trajectories and replanning
|
|
||||||
# env = alr_envs.make('ALRReacherProMP-v0', 1, n_links=7)
|
|
||||||
env = alr_envs.make('ALRReacherProMP-v0', 1, basis_generator_kwargs={'num_basis': 10}, n_links=7)
|
|
||||||
# env = alr_envs.make('ALRReacher-v0', seed=1, bb_kwargs=mp_dict, n_links=1)
|
|
||||||
# env = alr_envs.make_bb('ALRReacher-v0', **mp_dict)
|
|
||||||
|
|
||||||
mp_kwargs = {
|
|
||||||
"num_dof": 5,
|
|
||||||
"num_basis": 1000,
|
|
||||||
"duration": 2,
|
|
||||||
"learn_goal": True,
|
|
||||||
"alpha_phase": 2,
|
|
||||||
"bandwidth_factor": 2,
|
|
||||||
"policy_type": "velocity",
|
|
||||||
"weights_scale": 50,
|
|
||||||
"goal_scale": 0.1
|
|
||||||
}
|
|
||||||
env = alr_envs.make(env_name, seed, mp_kwargs=mp_kwargs)
|
|
||||||
|
|
||||||
# This time rendering every trajectory
|
# This time rendering every trajectory
|
||||||
if render:
|
if render:
|
||||||
env.render(mode="human")
|
env.render(mode="human")
|
||||||
@ -100,6 +80,7 @@ def example_custom_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=
|
|||||||
print(rewards)
|
print(rewards)
|
||||||
rewards = 0
|
rewards = 0
|
||||||
obs = env.reset()
|
obs = env.reset()
|
||||||
|
print(obs)
|
||||||
|
|
||||||
|
|
||||||
def example_fully_custom_mp(seed=1, iterations=1, render=True):
|
def example_fully_custom_mp(seed=1, iterations=1, render=True):
|
||||||
@ -169,7 +150,7 @@ if __name__ == '__main__':
|
|||||||
# example_mp("alr_envs:HoleReacherDetPMP-v1", seed=10, iterations=1, render=render)
|
# example_mp("alr_envs:HoleReacherDetPMP-v1", seed=10, iterations=1, render=render)
|
||||||
|
|
||||||
# Altered basis functions
|
# Altered basis functions
|
||||||
example_custom_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render)
|
example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=10, render=render)
|
||||||
|
|
||||||
# Custom MP
|
# Custom MP
|
||||||
example_fully_custom_mp(seed=10, iterations=1, render=render)
|
# example_fully_custom_mp(seed=10, iterations=1, render=render)
|
||||||
|
@ -43,10 +43,10 @@ def make_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwa
|
|||||||
|
|
||||||
|
|
||||||
def make(env_id, seed, **kwargs):
|
def make(env_id, seed, **kwargs):
|
||||||
# spec = registry.get(env_id) # TODO: This doesn't work with gym ==0.21.0
|
# TODO: This doesn't work with gym ==0.21.0
|
||||||
spec = registry.spec(env_id)
|
|
||||||
# This access is required to allow for nested dict updates
|
# This access is required to allow for nested dict updates
|
||||||
all_kwargs = deepcopy(spec._kwargs)
|
spec = registry.get(env_id)
|
||||||
|
all_kwargs = deepcopy(spec.kwargs)
|
||||||
nested_update(all_kwargs, kwargs)
|
nested_update(all_kwargs, kwargs)
|
||||||
return _make(env_id, seed, **all_kwargs)
|
return _make(env_id, seed, **all_kwargs)
|
||||||
|
|
||||||
@ -148,7 +148,7 @@ def make_bb(
|
|||||||
phase_kwargs: kwargs for the phase generator
|
phase_kwargs: kwargs for the phase generator
|
||||||
controller_kwargs: kwargs for the tracking controller
|
controller_kwargs: kwargs for the tracking controller
|
||||||
env_id: base_env_name,
|
env_id: base_env_name,
|
||||||
wrappers: list of wrappers (at least an BlackBoxWrapper),
|
wrappers: list of wrappers (at least an RawInterfaceWrapper),
|
||||||
seed: seed of environment
|
seed: seed of environment
|
||||||
traj_gen_kwargs: dict of at least {num_dof: int, num_basis: int} for DMP
|
traj_gen_kwargs: dict of at least {num_dof: int, num_basis: int} for DMP
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
from collections import Mapping, MutableMapping
|
from collections.abc import Mapping, MutableMapping
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch as ch
|
import torch as ch
|
||||||
@ -46,4 +46,5 @@ def nested_update(base: MutableMapping, update):
|
|||||||
"""
|
"""
|
||||||
for k, v in update.items():
|
for k, v in update.items():
|
||||||
base[k] = nested_update(base.get(k, {}), v) if isinstance(v, Mapping) else v
|
base[k] = nested_update(base.get(k, {}), v) if isinstance(v, Mapping) else v
|
||||||
|
return base
|
||||||
|
|
||||||
|
29
setup.py
29
setup.py
@ -1,24 +1,35 @@
|
|||||||
|
import itertools
|
||||||
|
|
||||||
from setuptools import setup
|
from setuptools import setup
|
||||||
|
|
||||||
|
# Environment-specific dependencies for dmc and metaworld
|
||||||
|
extras = {
|
||||||
|
"dmc": ["dm_control"],
|
||||||
|
"meta": ["mujoco_py<2.2,>=2.1, git+https://github.com/rlworkgroup/metaworld.git@master#egg=metaworld"],
|
||||||
|
"mujoco": ["mujoco==2.2.0", "imageio>=2.14.1"],
|
||||||
|
}
|
||||||
|
|
||||||
|
# All dependencies
|
||||||
|
all_groups = set(extras.keys())
|
||||||
|
extras["all"] = list(set(itertools.chain.from_iterable(map(lambda group: extras[group], all_groups))))
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name='alr_envs',
|
author='Fabian Otto, Onur Celik, Marcel Sandermann, Maximilian Huettenrauch',
|
||||||
|
name='simple_gym',
|
||||||
version='0.0.1',
|
version='0.0.1',
|
||||||
packages=['alr_envs', 'alr_envs.alr', 'alr_envs.open_ai', 'alr_envs.dmc', 'alr_envs.meta', 'alr_envs.utils'],
|
packages=['alr_envs', 'alr_envs.alr', 'alr_envs.open_ai', 'alr_envs.dmc', 'alr_envs.meta', 'alr_envs.utils'],
|
||||||
install_requires=[
|
install_requires=[
|
||||||
'gym',
|
'gym',
|
||||||
'PyQt5',
|
'PyQt5',
|
||||||
#'matplotlib',
|
# 'matplotlib',
|
||||||
#'mp_env_api @ git+https://github.com/ALRhub/motion_primitive_env_api.git',
|
# 'mp_env_api @ git+https://github.com/ALRhub/motion_primitive_env_api.git',
|
||||||
# 'mp_env_api @ git+ssh://git@github.com/ALRhub/motion_primitive_env_api.git',
|
# 'mp_env_api @ git+ssh://git@github.com/ALRhub/motion_primitive_env_api.git',
|
||||||
'mujoco-py<2.1,>=2.0',
|
'mujoco-py<2.1,>=2.0',
|
||||||
'dm_control',
|
'dm_control',
|
||||||
'metaworld @ git+https://github.com/rlworkgroup/metaworld.git@master#egg=metaworld',
|
'metaworld @ git+https://github.com/rlworkgroup/metaworld.git@master#egg=metaworld',
|
||||||
],
|
],
|
||||||
|
|
||||||
url='https://github.com/ALRhub/alr_envs/',
|
url='https://github.com/ALRhub/alr_envs/',
|
||||||
license='MIT',
|
# license='AGPL-3.0 license',
|
||||||
author='Fabian Otto, Marcel Sandermann, Maximilian Huettenrauch',
|
|
||||||
author_email='',
|
author_email='',
|
||||||
description='Custom Gym environments for various (robotics) tasks. integration of DMC environments into the'
|
description='Simple Gym: Aggregating interface for various RL environments with support for Black Box approaches.'
|
||||||
'gym interface, and support for using motion primitives with gym environments.'
|
|
||||||
)
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user