Added environments from Paul + Marc

This commit is contained in:
Onur 2022-04-13 17:28:25 +02:00
parent 7ffe94dcfd
commit 092525889b
26 changed files with 1795 additions and 2 deletions

View File

@ -11,6 +11,13 @@ from .mujoco.reacher.alr_reacher import ALRReacherEnv
from .mujoco.reacher.balancing import BalancingEnv
from alr_envs.alr.mujoco.table_tennis.tt_gym import MAX_EPISODE_STEPS
from .mujoco.ant_jump.ant_jump import MAX_EPISODE_STEPS_ANTJUMP
from .mujoco.half_cheetah_jump.half_cheetah_jump import MAX_EPISODE_STEPS_HALFCHEETAHJUMP
from .mujoco.hopper_jump.hopper_jump import MAX_EPISODE_STEPS_HOPPERJUMP
from .mujoco.hopper_jump.hopper_jump_on_box import MAX_EPISODE_STEPS_HOPPERJUMPONBOX
from .mujoco.hopper_throw.hopper_throw import MAX_EPISODE_STEPS_HOPPERTHROW
from .mujoco.hopper_throw.hopper_throw_in_basket import MAX_EPISODE_STEPS_HOPPERTHROWINBASKET
from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
@ -185,6 +192,69 @@ register(
}
)
register(
id='ALRAntJump-v0',
entry_point='alr_envs.alr.mujoco:ALRAntJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP,
kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_ANTJUMP
}
)
register(
id='ALRHalfCheetahJump-v0',
entry_point='alr_envs.alr.mujoco:ALRHalfCheetahJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP,
kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_HALFCHEETAHJUMP
}
)
register(
id='ALRHopperJump-v0',
entry_point='alr_envs.alr.mujoco:ALRHopperJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP
}
)
register(
id='ALRHopperJumpOnBox-v0',
entry_point='alr_envs.alr.mujoco:ALRHopperJumpOnBoxEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX,
kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMPONBOX
}
)
register(
id='ALRHopperThrow-v0',
entry_point='alr_envs.alr.mujoco:ALRHopperThrowEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW,
kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROW
}
)
register(
id='ALRHopperThrowInBasket-v0',
entry_point='alr_envs.alr.mujoco:ALRHopperThrowInBasketEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET,
kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROWINBASKET
}
)
register(
id='ALRWalker2DJump-v0',
entry_point='alr_envs.alr.mujoco:ALRWalker2dJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP,
kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_WALKERJUMP
}
)
## Balancing Reacher
register(
@ -431,3 +501,204 @@ for _v, cd in enumerate(ctxt_dim):
}
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
## AntJump
for _v, cd in enumerate(ctxt_dim):
_env_id = f'TableTennisProMP-v{_v}'
register(
id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
kwargs={
"name": "alr_envs:TableTennis{}DCtxt-v0".format(cd),
"wrappers": [mujoco.table_tennis.MPWrapper],
"mp_kwargs": {
"num_dof": 7,
"num_basis": 2,
"duration": 1.25,
"post_traj_time": 1.5,
"policy_type": "motor",
"weights_scale": 1.0,
"zero_start": True,
"zero_goal": False,
"policy_kwargs": {
"p_gains": 0.5*np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]),
"d_gains": 0.5*np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1])
}
}
}
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
register(
id='ALRAntJumpProMP-v0',
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
kwargs={
"name": "alr_envs:ALRAntJump-v0",
"wrappers": [mujoco.ant_jump.MPWrapper],
"mp_kwargs": {
"num_dof": 8,
"num_basis": 5,
"duration": 10,
"post_traj_time": 0,
"policy_type": "motor",
"weights_scale": 1.0,
"zero_start": True,
"zero_goal": False,
"policy_kwargs": {
"p_gains": np.ones(8),
"d_gains": 0.1*np.ones(8)
}
}
}
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRAntJumpProMP-v0')
register(
id='ALRHalfCheetahJumpProMP-v0',
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
kwargs={
"name": "alr_envs:ALRHalfCheetahJump-v0",
"wrappers": [mujoco.half_cheetah_jump.MPWrapper],
"mp_kwargs": {
"num_dof": 6,
"num_basis": 5,
"duration": 5,
"post_traj_time": 0,
"policy_type": "motor",
"weights_scale": 1.0,
"zero_start": True,
"zero_goal": False,
"policy_kwargs": {
"p_gains": np.ones(6),
"d_gains": 0.1*np.ones(6)
}
}
}
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRHalfCheetahJumpProMP-v0')
register(
id='ALRHopperJumpProMP-v0',
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
kwargs={
"name": "alr_envs:ALRHopperJump-v0",
"wrappers": [mujoco.hopper_jump.MPWrapper],
"mp_kwargs": {
"num_dof": 3,
"num_basis": 5,
"duration": 2,
"post_traj_time": 0,
"policy_type": "motor",
"weights_scale": 1.0,
"zero_start": True,
"zero_goal": False,
"policy_kwargs": {
"p_gains": np.ones(3),
"d_gains": 0.1*np.ones(3)
}
}
}
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRHopperJumpProMP-v0')
register(
id='ALRHopperJumpOnBoxProMP-v0',
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
kwargs={
"name": "alr_envs:ALRHopperJumpOnBox-v0",
"wrappers": [mujoco.hopper_jump.MPWrapper],
"mp_kwargs": {
"num_dof": 3,
"num_basis": 5,
"duration": 2,
"post_traj_time": 0,
"policy_type": "motor",
"weights_scale": 1.0,
"zero_start": True,
"zero_goal": False,
"policy_kwargs": {
"p_gains": np.ones(3),
"d_gains": 0.1*np.ones(3)
}
}
}
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRHopperJumpOnBoxProMP-v0')
register(
id='ALRHopperThrowProMP-v0',
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
kwargs={
"name": "alr_envs:ALRHopperThrow-v0",
"wrappers": [mujoco.hopper_throw.MPWrapper],
"mp_kwargs": {
"num_dof": 3,
"num_basis": 5,
"duration": 2,
"post_traj_time": 0,
"policy_type": "motor",
"weights_scale": 1.0,
"zero_start": True,
"zero_goal": False,
"policy_kwargs": {
"p_gains": np.ones(3),
"d_gains": 0.1*np.ones(3)
}
}
}
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRHopperThrowProMP-v0')
register(
id='ALRHopperThrowInBasketProMP-v0',
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
kwargs={
"name": "alr_envs:ALRHopperThrowInBasket-v0",
"wrappers": [mujoco.hopper_throw.MPWrapper],
"mp_kwargs": {
"num_dof": 3,
"num_basis": 5,
"duration": 2,
"post_traj_time": 0,
"policy_type": "motor",
"weights_scale": 1.0,
"zero_start": True,
"zero_goal": False,
"policy_kwargs": {
"p_gains": np.ones(3),
"d_gains": 0.1*np.ones(3)
}
}
}
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRHopperThrowInBasketProMP-v0')
register(
id='ALRWalker2DJumpProMP-v0',
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
kwargs={
"name": "alr_envs:ALRWalker2DJump-v0",
"wrappers": [mujoco.walker_2d_jump.MPWrapper],
"mp_kwargs": {
"num_dof": 6,
"num_basis": 5,
"duration": 2.4,
"post_traj_time": 0,
"policy_type": "motor",
"weights_scale": 1.0,
"zero_start": True,
"zero_goal": False,
"policy_kwargs": {
"p_gains": np.ones(6),
"d_gains": 0.1*np.ones(6)
}
}
}
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRWalker2DJumpProMP-v0')

View File

@ -4,3 +4,10 @@ from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
from .table_tennis.tt_gym import TTEnvGym
from .beerpong.beerpong import ALRBeerBongEnv
from .ant_jump.ant_jump import ALRAntJumpEnv
from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
from .hopper_jump.hopper_jump import ALRHopperJumpEnv
from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv
from .hopper_throw.hopper_throw import ALRHopperThrowEnv
from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv
from .walker_2d_jump.walker_2d_jump import ALRWalker2dJumpEnv

View File

@ -0,0 +1 @@
from .mp_wrapper import MPWrapper

View File

@ -0,0 +1,117 @@
import numpy as np
from gym.envs.mujoco.ant_v3 import AntEnv
MAX_EPISODE_STEPS_ANTJUMP = 200
class ALRAntJumpEnv(AntEnv):
"""
Initialization changes to normal Ant:
- healthy_reward: 1.0 -> 0.01 -> 0.0 no healthy reward needed - Paul and Marc
- ctrl_cost_weight 0.5 -> 0.0
- contact_cost_weight: 5e-4 -> 0.0
- healthy_z_range: (0.2, 1.0) -> (0.3, float('inf')) !!!!! Does that make sense, limiting height?
"""
def __init__(self,
xml_file='ant.xml',
ctrl_cost_weight=0.0,
contact_cost_weight=0.0,
healthy_reward=0.0,
terminate_when_unhealthy=True,
healthy_z_range=(0.3, float('inf')),
contact_force_range=(-1.0, 1.0),
reset_noise_scale=0.1,
context=True, # variable to decide if context is used or not
exclude_current_positions_from_observation=True,
max_episode_steps=200):
self.current_step = 0
self.max_height = 0
self.context = context
self.max_episode_steps = max_episode_steps
self.goal = 0 # goal when training with context
super().__init__(xml_file, ctrl_cost_weight, contact_cost_weight, healthy_reward, terminate_when_unhealthy,
healthy_z_range, contact_force_range, reset_noise_scale,
exclude_current_positions_from_observation)
def step(self, action):
self.current_step += 1
self.do_simulation(action, self.frame_skip)
height = self.get_body_com("torso")[2].copy()
self.max_height = max(height, self.max_height)
rewards = 0
ctrl_cost = self.control_cost(action)
contact_cost = self.contact_cost
costs = ctrl_cost + contact_cost
done = height < 0.3 # fall over -> is the 0.3 value from healthy_z_range? TODO change 0.3 to the value of healthy z angle
if self.current_step == self.max_episode_steps or done:
if self.context:
# -10 for scaling the value of the distance between the max_height and the goal height; only used when context is enabled
# height_reward = -10 * (np.linalg.norm(self.max_height - self.goal))
height_reward = -10*np.linalg.norm(self.max_height - self.goal)
# no healthy reward when using context, because we optimize a negative value
healthy_reward = 0
else:
height_reward = self.max_height - 0.7
healthy_reward = self.healthy_reward * self.current_step
rewards = height_reward + healthy_reward
obs = self._get_obs()
reward = rewards - costs
info = {
'height': height,
'max_height': self.max_height,
'goal': self.goal
}
return obs, reward, done, info
def _get_obs(self):
return np.append(super()._get_obs(), self.goal)
def reset(self):
self.current_step = 0
self.max_height = 0
self.goal = np.random.uniform(1.0, 2.5,
1) # goal heights from 1.0 to 2.5; can be increased, but didnt work well with CMORE
return super().reset()
# reset_model had to be implemented in every env to make it deterministic
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation
if __name__ == '__main__':
render_mode = "human" # "human" or "partial" or "final"
env = ALRAntJumpEnv()
obs = env.reset()
for i in range(2000):
# objective.load_result("/tmp/cma")
# test with random actions
ac = env.action_space.sample()
obs, rew, d, info = env.step(ac)
if i % 10 == 0:
env.render(mode=render_mode)
if d:
env.reset()
env.close()

View File

@ -0,0 +1,81 @@
<mujoco model="ant">
<compiler angle="degree" coordinate="local" inertiafromgeom="true"/>
<option integrator="RK4" timestep="0.01"/>
<custom>
<numeric data="0.0 0.0 0.55 1.0 0.0 0.0 0.0 0.0 1.0 0.0 -1.0 0.0 -1.0 0.0 1.0" name="init_qpos"/>
</custom>
<default>
<joint armature="1" damping="1" limited="true"/>
<geom conaffinity="0" condim="3" density="5.0" friction="1 0.5 0.5" margin="0.01" rgba="0.8 0.6 0.4 1"/>
</default>
<asset>
<texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
<body name="torso" pos="0 0 0.75">
<camera name="track" mode="trackcom" pos="0 -3 0.3" xyaxes="1 0 0 0 0 1"/>
<geom name="torso_geom" pos="0 0 0" size="0.25" type="sphere"/>
<joint armature="0" damping="0" limited="false" margin="0.01" name="root" pos="0 0 0" type="free"/>
<body name="front_left_leg" pos="0 0 0">
<geom fromto="0.0 0.0 0.0 0.2 0.2 0.0" name="aux_1_geom" size="0.08" type="capsule"/>
<body name="aux_1" pos="0.2 0.2 0">
<joint axis="0 0 1" name="hip_1" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 0.2 0.2 0.0" name="left_leg_geom" size="0.08" type="capsule"/>
<body pos="0.2 0.2 0">
<joint axis="-1 1 0" name="ankle_1" pos="0.0 0.0 0.0" range="30 70" type="hinge"/>
<geom fromto="0.0 0.0 0.0 0.4 0.4 0.0" name="left_ankle_geom" size="0.08" type="capsule"/>
</body>
</body>
</body>
<body name="front_right_leg" pos="0 0 0">
<geom fromto="0.0 0.0 0.0 -0.2 0.2 0.0" name="aux_2_geom" size="0.08" type="capsule"/>
<body name="aux_2" pos="-0.2 0.2 0">
<joint axis="0 0 1" name="hip_2" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 -0.2 0.2 0.0" name="right_leg_geom" size="0.08" type="capsule"/>
<body pos="-0.2 0.2 0">
<joint axis="1 1 0" name="ankle_2" pos="0.0 0.0 0.0" range="-70 -30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 -0.4 0.4 0.0" name="right_ankle_geom" size="0.08" type="capsule"/>
</body>
</body>
</body>
<body name="back_leg" pos="0 0 0">
<geom fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" name="aux_3_geom" size="0.08" type="capsule"/>
<body name="aux_3" pos="-0.2 -0.2 0">
<joint axis="0 0 1" name="hip_3" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" name="back_leg_geom" size="0.08" type="capsule"/>
<body pos="-0.2 -0.2 0">
<joint axis="-1 1 0" name="ankle_3" pos="0.0 0.0 0.0" range="-70 -30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 -0.4 -0.4 0.0" name="third_ankle_geom" size="0.08" type="capsule"/>
</body>
</body>
</body>
<body name="right_back_leg" pos="0 0 0">
<geom fromto="0.0 0.0 0.0 0.2 -0.2 0.0" name="aux_4_geom" size="0.08" type="capsule"/>
<body name="aux_4" pos="0.2 -0.2 0">
<joint axis="0 0 1" name="hip_4" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 0.2 -0.2 0.0" name="rightback_leg_geom" size="0.08" type="capsule"/>
<body pos="0.2 -0.2 0">
<joint axis="1 1 0" name="ankle_4" pos="0.0 0.0 0.0" range="30 70" type="hinge"/>
<geom fromto="0.0 0.0 0.0 0.4 -0.4 0.0" name="fourth_ankle_geom" size="0.08" type="capsule"/>
</body>
</body>
</body>
</body>
</worldbody>
<actuator>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_4" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_4" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_1" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_1" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_2" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_2" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_3" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_3" gear="150"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,31 @@
from typing import Tuple, Union
import numpy as np
from mp_env_api import MPEnvWrapper
class MPWrapper(MPEnvWrapper):
@property
def active_obs(self):
return np.hstack([
[False] * 111, # ant has 111 dimensional observation space !!
[True] # goal height
])
@property
def current_pos(self) -> Union[float, int, np.ndarray]:
return self.env.sim.data.qpos[7:15].copy()
@property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.sim.data.qvel[6:14].copy()
@property
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
@property
def dt(self) -> Union[float, int]:
return self.env.dt

View File

@ -0,0 +1 @@
from .mp_wrapper import MPWrapper

View File

@ -0,0 +1,62 @@
<mujoco model="cheetah">
<compiler angle="radian" coordinate="local" inertiafromgeom="true" settotalmass="14"/>
<default>
<joint armature=".1" damping=".01" limited="true" solimplimit="0 .8 .03" solreflimit=".02 1" stiffness="8"/>
<geom conaffinity="0" condim="3" contype="1" friction=".4 .1 .1" rgba="0.8 0.6 .4 1" solimp="0.0 0.8 0.01" solref="0.02 1"/>
<motor ctrllimited="true" ctrlrange="-1 1"/>
</default>
<size nstack="300000" nuser_geom="1"/>
<option gravity="0 0 -9.81" timestep="0.01"/>
<asset>
<texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
<body name="torso" pos="0 0 .7">
<camera name="track" mode="trackcom" pos="0 -3 0.3" xyaxes="1 0 0 0 0 1"/>
<joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
<joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" stiffness="0" type="slide"/>
<joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 0" stiffness="0" type="hinge"/>
<geom fromto="-.5 0 0 .5 0 0" name="torso" size="0.046" type="capsule"/>
<geom axisangle="0 1 0 .87" name="head" pos=".6 0 .1" size="0.046 .15" type="capsule"/>
<!-- <site name='tip' pos='.15 0 .11'/>-->
<body name="bthigh" pos="-.5 0 0">
<joint axis="0 1 0" damping="6" name="bthigh" pos="0 0 0" range="-.52 1.05" stiffness="240" type="hinge"/>
<geom axisangle="0 1 0 -3.8" name="bthigh" pos=".1 0 -.13" size="0.046 .145" type="capsule"/>
<body name="bshin" pos=".16 0 -.25">
<joint axis="0 1 0" damping="4.5" name="bshin" pos="0 0 0" range="-.785 .785" stiffness="180" type="hinge"/>
<geom axisangle="0 1 0 -2.03" name="bshin" pos="-.14 0 -.07" rgba="0.9 0.6 0.6 1" size="0.046 .15" type="capsule"/>
<body name="bfoot" pos="-.28 0 -.14">
<joint axis="0 1 0" damping="3" name="bfoot" pos="0 0 0" range="-.4 .785" stiffness="120" type="hinge"/>
<geom axisangle="0 1 0 -.27" name="bfoot" pos=".03 0 -.097" rgba="0.9 0.6 0.6 1" size="0.046 .094" type="capsule"/>
</body>
</body>
</body>
<body name="fthigh" pos=".5 0 0">
<joint axis="0 1 0" damping="4.5" name="fthigh" pos="0 0 0" range="-1 .7" stiffness="180" type="hinge"/>
<geom axisangle="0 1 0 .52" name="fthigh" pos="-.07 0 -.12" size="0.046 .133" type="capsule"/>
<body name="fshin" pos="-.14 0 -.24">
<joint axis="0 1 0" damping="3" name="fshin" pos="0 0 0" range="-1.2 .87" stiffness="120" type="hinge"/>
<geom axisangle="0 1 0 -.6" name="fshin" pos=".065 0 -.09" rgba="0.9 0.6 0.6 1" size="0.046 .106" type="capsule"/>
<body name="ffoot" pos=".13 0 -.18">
<joint axis="0 1 0" damping="1.5" name="ffoot" pos="0 0 0" range="-.5 .5" stiffness="60" type="hinge"/>
<geom axisangle="0 1 0 -.6" name="ffoot" pos=".045 0 -.07" rgba="0.9 0.6 0.6 1" size="0.046 .07" type="capsule"/>
</body>
</body>
</body>
</body>
</worldbody>
<actuator>
<motor gear="120" joint="bthigh" name="bthigh"/>
<motor gear="90" joint="bshin" name="bshin"/>
<motor gear="60" joint="bfoot" name="bfoot"/>
<motor gear="120" joint="fthigh" name="fthigh"/>
<motor gear="60" joint="fshin" name="fshin"/>
<motor gear="30" joint="ffoot" name="ffoot"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,100 @@
import os
from gym.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv
import numpy as np
MAX_EPISODE_STEPS_HALFCHEETAHJUMP = 100
class ALRHalfCheetahJumpEnv(HalfCheetahEnv):
"""
ctrl_cost_weight 0.1 -> 0.0
"""
def __init__(self,
xml_file='cheetah.xml',
forward_reward_weight=1.0,
ctrl_cost_weight=0.0,
reset_noise_scale=0.1,
context=True,
exclude_current_positions_from_observation=True,
max_episode_steps=100):
self.current_step = 0
self.max_height = 0
self.max_episode_steps = max_episode_steps
self.goal = 0
self.context = context
xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, reset_noise_scale,
exclude_current_positions_from_observation)
def step(self, action):
self.current_step += 1
self.do_simulation(action, self.frame_skip)
height_after = self.get_body_com("torso")[2]
self.max_height = max(height_after, self.max_height)
## Didnt use fell_over, because base env also has no done condition - Paul and Marc
# fell_over = abs(self.sim.data.qpos[2]) > 2.5 # how to figure out if the cheetah fell over? -> 2.5 oke?
# TODO: Should a fall over be checked herE?
done = False
ctrl_cost = self.control_cost(action)
costs = ctrl_cost
if self.current_step == self.max_episode_steps:
height_goal_distance = -10*np.linalg.norm(self.max_height - self.goal) + 1e-8 if self.context \
else self.max_height
rewards = self._forward_reward_weight * height_goal_distance
else:
rewards = 0
observation = self._get_obs()
reward = rewards - costs
info = {
'height': height_after,
'max_height': self.max_height
}
return observation, reward, done, info
def _get_obs(self):
return np.append(super()._get_obs(), self.goal)
def reset(self):
self.max_height = 0
self.current_step = 0
self.goal = np.random.uniform(1.1, 1.6, 1) # 1.1 1.6
return super().reset()
# overwrite reset_model to make it deterministic
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation
if __name__ == '__main__':
render_mode = "human" # "human" or "partial" or "final"
env = ALRHalfCheetahJumpEnv()
obs = env.reset()
for i in range(2000):
# objective.load_result("/tmp/cma")
# test with random actions
ac = env.action_space.sample()
obs, rew, d, info = env.step(ac)
if i % 10 == 0:
env.render(mode=render_mode)
if d:
print('After ', i, ' steps, done: ', d)
env.reset()
env.close()

View File

@ -0,0 +1,30 @@
from typing import Tuple, Union
import numpy as np
from mp_env_api import MPEnvWrapper
class MPWrapper(MPEnvWrapper):
@property
def active_obs(self):
return np.hstack([
[False] * 17,
[True] # goal height
])
@property
def current_pos(self) -> Union[float, int, np.ndarray]:
return self.env.sim.data.qpos[3:9].copy()
@property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.sim.data.qvel[3:9].copy()
@property
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
@property
def dt(self) -> Union[float, int]:
return self.env.dt

View File

@ -0,0 +1 @@
from .mp_wrapper import MPWrapper

View File

@ -0,0 +1,48 @@
<mujoco model="hopper">
<compiler angle="degree" coordinate="global" inertiafromgeom="true"/>
<default>
<joint armature="1" damping="1" limited="true"/>
<geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1" solimp=".8 .8 .01" solref=".02 1"/>
<motor ctrllimited="true" ctrlrange="-.4 .4"/>
</default>
<option integrator="RK4" timestep="0.002"/>
<visual>
<map znear="0.02"/>
</visual>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom conaffinity="1" condim="3" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 .125" type="plane" material="MatPlane"/>
<body name="torso" pos="0 0 1.25">
<camera name="track" mode="trackcom" pos="0 -3 1" xyaxes="1 0 0 0 0 1"/>
<joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
<joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" ref="1.25" stiffness="0" type="slide"/>
<joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 1.25" stiffness="0" type="hinge"/>
<geom friction="0.9" fromto="0 0 1.45 0 0 1.05" name="torso_geom" size="0.05" type="capsule"/>
<body name="thigh" pos="0 0 1.05">
<joint axis="0 -1 0" name="thigh_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
<geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_geom" size="0.05" type="capsule"/>
<body name="leg" pos="0 0 0.35">
<joint axis="0 -1 0" name="leg_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
<geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_geom" size="0.04" type="capsule"/>
<body name="foot" pos="0.13/2 0 0.1">
<joint axis="0 -1 0" name="foot_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
<geom friction="2.0" fromto="-0.13 0 0.1 0.26 0 0.1" name="foot_geom" size="0.06" type="capsule"/>
</body>
</body>
</body>
</body>
</worldbody>
<actuator>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="thigh_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="leg_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="foot_joint"/>
</actuator>
<asset>
<texture type="skybox" builtin="gradient" rgb1=".4 .5 .6" rgb2="0 0 0"
width="100" height="100"/>
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
</mujoco>

View File

@ -0,0 +1,51 @@
<mujoco model="hopper">
<compiler angle="degree" coordinate="global" inertiafromgeom="true"/>
<default>
<joint armature="1" damping="1" limited="true"/>
<geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1" solimp=".8 .8 .01" solref=".02 1"/>
<motor ctrllimited="true" ctrlrange="-.4 .4"/>
</default>
<option integrator="RK4" timestep="0.002"/>
<visual>
<map znear="0.02"/>
</visual>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom conaffinity="1" condim="3" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 .125" type="plane" material="MatPlane"/>
<body name="torso" pos="0 0 1.25">
<camera name="track" mode="trackcom" pos="0 -3 1" xyaxes="1 0 0 0 0 1"/>
<joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
<joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" ref="1.25" stiffness="0" type="slide"/>
<joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 1.25" stiffness="0" type="hinge"/>
<geom friction="0.9" fromto="0 0 1.45 0 0 1.05" name="torso_geom" size="0.05" type="capsule"/>
<body name="thigh" pos="0 0 1.05">
<joint axis="0 -1 0" name="thigh_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
<geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_geom" size="0.05" type="capsule"/>
<body name="leg" pos="0 0 0.35">
<joint axis="0 -1 0" name="leg_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
<geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_geom" size="0.04" type="capsule"/>
<body name="foot" pos="0.13/2 0 0.1">
<joint axis="0 -1 0" name="foot_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
<geom friction="2.0" fromto="-0.13 0 0.1 0.26 0 0.1" name="foot_geom" size="0.06" type="capsule"/>
</body>
</body>
</body>
</body>
<body name="box" pos="1 0 0">
<geom friction="1.0" fromto="0.48 0 0 1 0 0" name="basket_ground_geom" size="0.3" type="box" rgba="1 0 0 1"/>
</body>
</worldbody>
<actuator>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="thigh_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="leg_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="foot_joint"/>
</actuator>
<asset>
<texture type="skybox" builtin="gradient" rgb1=".4 .5 .6" rgb2="0 0 0"
width="100" height="100"/>
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
</mujoco>

View File

@ -0,0 +1,110 @@
from gym.envs.mujoco.hopper_v3 import HopperEnv
import numpy as np
MAX_EPISODE_STEPS_HOPPERJUMP = 250
class ALRHopperJumpEnv(HopperEnv):
"""
Initialization changes to normal Hopper:
- healthy_reward: 1.0 -> 0.1 -> 0
- healthy_angle_range: (-0.2, 0.2) -> (-float('inf'), float('inf'))
- healthy_z_range: (0.7, float('inf')) -> (0.5, float('inf'))
"""
def __init__(self,
xml_file='hopper.xml',
forward_reward_weight=1.0,
ctrl_cost_weight=1e-3,
healthy_reward=0.0,
penalty=0.0,
context=True,
terminate_when_unhealthy=True,
healthy_state_range=(-100.0, 100.0),
healthy_z_range=(0.5, float('inf')),
healthy_angle_range=(-float('inf'), float('inf')),
reset_noise_scale=5e-3,
exclude_current_positions_from_observation=True,
max_episode_steps=250):
self.current_step = 0
self.max_height = 0
self.max_episode_steps = max_episode_steps
self.penalty = penalty
self.goal = 0
self.context = context
self.exclude_current_positions_from_observation = exclude_current_positions_from_observation
super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
exclude_current_positions_from_observation)
def step(self, action):
self.current_step += 1
self.do_simulation(action, self.frame_skip)
height_after = self.get_body_com("torso")[2]
self.max_height = max(height_after, self.max_height)
ctrl_cost = self.control_cost(action)
costs = ctrl_cost
done = False
if self.current_step >= self.max_episode_steps:
hight_goal_distance = -10*np.linalg.norm(self.max_height - self.goal) if self.context else self.max_height
healthy_reward = 0 if self.context else self.healthy_reward * self.current_step
height_reward = self._forward_reward_weight * hight_goal_distance # maybe move reward calculation into if structure and define two different _forward_reward_weight variables for context and episodic seperatley
rewards = height_reward + healthy_reward
else:
# penalty for wrong start direction of first two joints; not needed, could be removed
rewards = ((action[:2] > 0) * self.penalty).sum() if self.current_step < 10 else 0
observation = self._get_obs()
reward = rewards - costs
info = {
'height' : height_after,
'max_height': self.max_height,
'goal' : self.goal
}
return observation, reward, done, info
def _get_obs(self):
return np.append(super()._get_obs(), self.goal)
def reset(self):
self.goal = np.random.uniform(1.4, 2.3, 1) # 1.3 2.3
self.max_height = 0
self.current_step = 0
return super().reset()
# overwrite reset_model to make it deterministic
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation
if __name__ == '__main__':
render_mode = "human" # "human" or "partial" or "final"
env = ALRHopperJumpEnv()
obs = env.reset()
for i in range(2000):
# objective.load_result("/tmp/cma")
# test with random actions
ac = env.action_space.sample()
obs, rew, d, info = env.step(ac)
if i % 10 == 0:
env.render(mode=render_mode)
if d:
print('After ', i, ' steps, done: ', d)
env.reset()
env.close()

View File

@ -0,0 +1,170 @@
from gym.envs.mujoco.hopper_v3 import HopperEnv
import numpy as np
import os
MAX_EPISODE_STEPS_HOPPERJUMPONBOX = 250
class ALRHopperJumpOnBoxEnv(HopperEnv):
"""
Initialization changes to normal Hopper:
- healthy_reward: 1.0 -> 0.01 -> 0.001
- healthy_angle_range: (-0.2, 0.2) -> (-float('inf'), float('inf'))
"""
def __init__(self,
xml_file='hopper_jump_on_box.xml',
forward_reward_weight=1.0,
ctrl_cost_weight=1e-3,
healthy_reward=0.001,
terminate_when_unhealthy=True,
healthy_state_range=(-100.0, 100.0),
healthy_z_range=(0.7, float('inf')),
healthy_angle_range=(-float('inf'), float('inf')),
reset_noise_scale=5e-3,
context=True,
exclude_current_positions_from_observation=True,
max_episode_steps=250):
self.current_step = 0
self.max_height = 0
self.max_episode_steps = max_episode_steps
self.min_distance = 5000 # what value?
self.hopper_on_box = False
self.context = context
self.box_x = 1
xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
exclude_current_positions_from_observation)
def step(self, action):
self.current_step += 1
self.do_simulation(action, self.frame_skip)
height_after = self.get_body_com("torso")[2]
foot_pos = self.get_body_com("foot")
self.max_height = max(height_after, self.max_height)
vx, vz, vangle = self.sim.data.qvel[0:3]
s = self.state_vector()
fell_over = not (np.isfinite(s).all() and (np.abs(s[2:]) < 100).all() and (height_after > .7))
box_pos = self.get_body_com("box")
box_size = 0.3
box_height = 0.3
box_center = (box_pos[0] + (box_size / 2), box_pos[1], box_height)
foot_length = 0.3
foot_center = foot_pos[0] - (foot_length / 2)
dist = np.linalg.norm(foot_pos - box_center)
self.min_distance = min(dist, self.min_distance)
# check if foot is on box
is_on_box_x = box_pos[0] <= foot_center <= box_pos[0] + box_size
is_on_box_y = True # is y always true because he can only move in x and z direction?
is_on_box_z = box_height - 0.02 <= foot_pos[2] <= box_height + 0.02
is_on_box = is_on_box_x and is_on_box_y and is_on_box_z
if is_on_box: self.hopper_on_box = True
ctrl_cost = self.control_cost(action)
costs = ctrl_cost
done = fell_over or self.hopper_on_box
if self.current_step >= self.max_episode_steps or done:
done = False
max_height = self.max_height.copy()
min_distance = self.min_distance.copy()
alive_bonus = self._healthy_reward * self.current_step
box_bonus = 0
rewards = 0
# TODO explain what we did here for the calculation of the reward
if is_on_box:
if self.context:
rewards -= 100 * vx ** 2 if 100 * vx ** 2 < 1 else 1
else:
box_bonus = 10
rewards += box_bonus
# rewards -= dist * dist ???? why when already on box?
# reward -= 90 - abs(angle)
rewards -= 100 * vx ** 2 if 100 * vx ** 2 < 1 else 1
rewards += max_height * 3
rewards += alive_bonus
else:
if self.context:
rewards = -10 - min_distance
rewards += max_height * 3
else:
# reward -= (dist*dist)
rewards -= min_distance * min_distance
# rewards -= dist / self.max_distance
rewards += max_height
rewards += alive_bonus
else:
rewards = 0
observation = self._get_obs()
reward = rewards - costs
info = {
'height': height_after,
'max_height': self.max_height.copy(),
'min_distance': self.min_distance,
'goal': self.box_x,
}
return observation, reward, done, info
def _get_obs(self):
return np.append(super()._get_obs(), self.box_x)
def reset(self):
self.max_height = 0
self.min_distance = 5000
self.current_step = 0
self.hopper_on_box = False
if self.context:
box_id = self.sim.model.body_name2id("box")
self.box_x = np.random.uniform(1, 3, 1)
self.sim.model.body_pos[box_id] = [self.box_x, 0, 0]
return super().reset()
# overwrite reset_model to make it deterministic
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation
if __name__ == '__main__':
render_mode = "human" # "human" or "partial" or "final"
env = ALRHopperJumpOnBoxEnv()
obs = env.reset()
for i in range(2000):
# objective.load_result("/tmp/cma")
# test with random actions
ac = env.action_space.sample()
obs, rew, d, info = env.step(ac)
if i % 10 == 0:
env.render(mode=render_mode)
if d:
print('After ', i, ' steps, done: ', d)
env.reset()
env.close()

View File

@ -0,0 +1,31 @@
from typing import Tuple, Union
import numpy as np
from mp_env_api import MPEnvWrapper
class MPWrapper(MPEnvWrapper):
@property
def active_obs(self):
return np.hstack([
[False] * (5 + int(not self.exclude_current_positions_from_observation)), # position
[False] * 6, # velocity
[True]
])
@property
def current_pos(self) -> Union[float, int, np.ndarray]:
return self.env.sim.data.qpos[3:6].copy()
@property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.sim.data.qvel[3:6].copy()
@property
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
@property
def dt(self) -> Union[float, int]:
return self.env.dt

View File

@ -0,0 +1 @@
from .mp_wrapper import MPWrapper

View File

@ -0,0 +1,56 @@
<mujoco model="hopper">
<compiler angle="degree" coordinate="global" inertiafromgeom="true"/>
<default>
<joint armature="1" damping="1" limited="true"/>
<geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1" solimp=".8 .8 .01" solref=".02 1"/>
<motor ctrllimited="true" ctrlrange="-.4 .4"/>
</default>
<option integrator="RK4" timestep="0.002"/>
<visual>
<map znear="0.02"/>
</visual>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom conaffinity="1" condim="3" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 .125" type="plane" material="MatPlane"/>
<body name="torso" pos="0 0 1.25">
<camera name="track" mode="trackcom" pos="0 -3 1" xyaxes="1 0 0 0 0 1"/>
<joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
<joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" ref="1.25" stiffness="0" type="slide"/>
<joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 1.25" stiffness="0" type="hinge"/>
<geom friction="0.9" fromto="0 0 1.45 0 0 1.05" name="torso_geom" size="0.05" type="capsule"/>
<body name="thigh" pos="0 0 1.05">
<joint axis="0 -1 0" name="thigh_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
<geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_geom" size="0.05" type="capsule"/>
<body name="leg" pos="0 0 0.35">
<joint axis="0 -1 0" name="leg_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
<geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_geom" size="0.04" type="capsule"/>
<body name="foot" pos="0.13/2 0 0.1">
<joint axis="0 -1 0" name="foot_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
<geom friction="2.0" fromto="-0.13 0 0.1 0.26 0 0.1" name="foot_geom" size="0.06" type="capsule"/>
</body>
</body>
</body>
</body>
<body name="ball" pos="0 0 1.53">
<joint armature="0" axis="1 0 0" damping="0.0" name="tar:x" pos="0 0 1.53" stiffness="0" type="slide" frictionloss="0" limited="false"/>
<joint armature="0" axis="0 1 0" damping="0.0" name="tar:y" pos="0 0 1.53" stiffness="0" type="slide" frictionloss="0" limited="false"/>
<joint armature="0" axis="0 0 1" damping="0.0" name="tar:z" pos="0 0 1.53" stiffness="0" type="slide" frictionloss="0" limited="false"/>
<geom pos="0 0 1.53" priority= "1" size="0.025 0.025 0.025" type="sphere" condim="4" name="ball_geom" rgba="0.8 0.2 0.1 1" mass="0.1"
friction="0.1 0.1 0.1" solimp="0.9 0.95 0.001 0.5 2" solref="-10000 -10"/>
<site name="target_ball" pos="0 0 1.53" size="0.04 0.04 0.04" rgba="1 0 0 1" type="sphere"/>
</body>
</worldbody>
<actuator>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="thigh_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="leg_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="foot_joint"/>
</actuator>
<asset>
<texture type="skybox" builtin="gradient" rgb1=".4 .5 .6" rgb2="0 0 0"
width="100" height="100"/>
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
</mujoco>

View File

@ -0,0 +1,132 @@
<mujoco model="hopper">
<compiler angle="degree" coordinate="global" inertiafromgeom="true"/>
<default>
<joint armature="1" damping="1" limited="true"/>
<geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1" solimp=".8 .8 .01" solref=".02 1"/>
<motor ctrllimited="true" ctrlrange="-.4 .4"/>
</default>
<option integrator="RK4" timestep="0.002"/>
<visual>
<map znear="0.02"/>
</visual>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom conaffinity="1" condim="3" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 .125" type="plane" material="MatPlane"/>
<body name="torso" pos="0 0 1.25">
<camera name="track" mode="trackcom" pos="0 -3 1" xyaxes="1 0 0 0 0 1"/>
<joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
<joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" ref="1.25" stiffness="0" type="slide"/>
<joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 1.25" stiffness="0" type="hinge"/>
<geom friction="0.9" fromto="0 0 1.45 0 0 1.05" name="torso_geom" size="0.05" type="capsule"/>
<body name="thigh" pos="0 0 1.05">
<joint axis="0 -1 0" name="thigh_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
<geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_geom" size="0.05" type="capsule"/>
<body name="leg" pos="0 0 0.35">
<joint axis="0 -1 0" name="leg_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
<geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_geom" size="0.04" type="capsule"/>
<body name="foot" pos="0.13/2 0 0.1">
<joint axis="0 -1 0" name="foot_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
<geom friction="2.0" fromto="-0.13 0 0.1 0.26 0 0.1" name="foot_geom" size="0.06" type="capsule"/>
</body>
</body>
</body>
</body>
<body name="ball" pos="0 0 1.53">
<joint armature="0" axis="1 0 0" damping="0.0" name="tar:x" pos="0 0 1.53" stiffness="0" type="slide" frictionloss="0" limited="false"/>
<joint armature="0" axis="0 1 0" damping="0.0" name="tar:y" pos="0 0 1.53" stiffness="0" type="slide" frictionloss="0" limited="false"/>
<joint armature="0" axis="0 0 1" damping="0.0" name="tar:z" pos="0 0 1.53" stiffness="0" type="slide" frictionloss="0" limited="false"/>
<geom pos="0 0 1.53" priority= "1" size="0.025 0.025 0.025" type="sphere" condim="4" name="ball_geom" rgba="0.8 0.2 0.1 1" mass="0.1"
friction="0.1 0.1 0.1" solimp="0.9 0.95 0.001 0.5 2" solref="-10000 -10"/>
<site name="target_ball" pos="0 0 1.53" size="0.04 0.04 0.04" rgba="1 0 0 1" type="sphere"/>
</body>
<body name="basket_ground" pos="5 0 0">
<geom friction="0.9" fromto="5 0 0 5.3 0 0" name="basket_ground_geom" size="0.1 0.4 0.3" type="box"/>
<body name="edge1" pos="5 0 0">
<geom friction="2.0" fromto="5 0 0 5 0 0.2" name="edge1_geom" size="0.04" type="capsule"/>
</body>
<body name="edge2" pos="5 0 0.05">
<geom friction="2.0" fromto="5 0.05 0 5 0.05 0.2" name="edge2_geom" size="0.04" type="capsule"/>
</body>
<body name="edge3" pos="5 0 0.1">
<geom friction="2.0" fromto="5 0.1 0 5 0.1 0.2" name="edge3_geom" size="0.04" type="capsule"/>
</body>
<body name="edge4" pos="5 0 0.15">
<geom friction="2.0" fromto="5 0.15 0 5 0.15 0.2" name="edge4_geom" size="0.04" type="capsule"/>
</body>
<body name="edge5" pos="5.05 0 0.15">
<geom friction="2.0" fromto="5.05 0.15 0 5.05 0.15 0.2" name="edge5_geom" size="0.04" type="capsule"/>
</body>
<body name="edge6" pos="5.1 0 0.15">
<geom friction="2.0" fromto="5.1 0.15 0 5.1 0.15 0.2" name="edge6_geom" size="0.04" type="capsule"/>
</body>
<body name="edge7" pos="5.15 0 0.15">
<geom friction="2.0" fromto="5.15 0.15 0 5.15 0.15 0.2" name="edge7_geom" size="0.04" type="capsule"/>
</body>
<body name="edge8" pos="5.2 0 0.15">
<geom friction="2.0" fromto="5.2 0.15 0 5.2 0.15 0.2" name="edge8_geom" size="0.04" type="capsule"/>
</body>
<body name="edge9" pos="5.25 0 0.15">
<geom friction="2.0" fromto="5.25 0.15 0 5.25 0.15 0.2" name="edge9_geom" size="0.04" type="capsule"/>
</body>
<body name="edge10" pos="5.3 0 0.15">
<geom friction="2.0" fromto="5.3 0.15 0 5.3 0.15 0.2" name="edge10_geom" size="0.04" type="capsule"/>
</body>
<body name="edge11" pos="5.3 0 0.1">
<geom friction="2.0" fromto="5.3 0.1 0 5.3 0.1 0.2" name="edge11_geom" size="0.04" type="capsule"/>
</body>
<body name="edge12" pos="5.3 0 0.05">
<geom friction="2.0" fromto="5.3 0.05 0 5.3 0.05 0.2" name="edge12_geom" size="0.04" type="capsule"/>
</body>
<body name="edge13" pos="5.3 0 0.0">
<geom friction="2.0" fromto="5.3 0 0 5.3 0 0.2" name="edge13_geom" size="0.04" type="capsule"/>
</body>
<body name="edge14" pos="5.3 0 -0.05">
<geom friction="2.0" fromto="5.3 -0.05 0 5.3 -0.05 0.2" name="edge14_geom" size="0.04" type="capsule"/>
</body>
<body name="edge15" pos="5.3 0 -0.1">
<geom friction="2.0" fromto="5.3 -0.1 0 5.3 -0.1 0.2" name="edge15_geom" size="0.04" type="capsule"/>
</body>
<body name="edge16" pos="5.3 0 -0.15">
<geom friction="2.0" fromto="5.3 -0.15 0 5.3 -0.15 0.2" name="edge16_geom" size="0.04" type="capsule"/>
</body>
<body name="edge20" pos="5.25 0 -0.15">
<geom friction="2.0" fromto="5.25 -0.15 0 5.25 -0.15 0.2" name="edge20_geom" size="0.04" type="capsule"/>
</body>
<body name="edge21" pos="5.2 0 -0.15">
<geom friction="2.0" fromto="5.2 -0.15 0 5.2 -0.15 0.2" name="edge21_geom" size="0.04" type="capsule"/>
</body>
<body name="edge22" pos="5.15 0 -0.15">
<geom friction="2.0" fromto="5.15 -0.15 0 5.15 -0.15 0.2" name="edge22_geom" size="0.04" type="capsule"/>
</body>
<body name="edge23" pos="5.1 0 -0.15">
<geom friction="2.0" fromto="5.1 -0.15 0 5.1 -0.15 0.2" name="edge23_geom" size="0.04" type="capsule"/>
</body>
<body name="edge24" pos="5.05 0 -0.15">
<geom friction="2.0" fromto="5.05 -0.15 0 5.05 -0.15 0.2" name="edge24_geom" size="0.04" type="capsule"/>
</body>
<body name="edge25" pos="5 0 -0.15">
<geom friction="2.0" fromto="5 -0.15 0 5 -0.15 0.2" name="edge25_geom" size="0.04" type="capsule"/>
</body>
<body name="edge26" pos="5 0 -0.1">
<geom friction="2.0" fromto="5 -0.1 0 5 -0.1 0.2" name="edge26_geom" size="0.04" type="capsule"/>
</body>
<body name="edge27" pos="5 0 -0.05">
<geom friction="2.0" fromto="5 -0.05 0 5 -0.05 0.2" name="edge27_geom" size="0.04" type="capsule"/>
</body>
</body>
</worldbody>
<actuator>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="thigh_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="leg_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="foot_joint"/>
</actuator>
<asset>
<texture type="skybox" builtin="gradient" rgb1=".4 .5 .6" rgb2="0 0 0"
width="100" height="100"/>
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
</mujoco>

View File

@ -0,0 +1,113 @@
import os
from gym.envs.mujoco.hopper_v3 import HopperEnv
import numpy as np
MAX_EPISODE_STEPS_HOPPERTHROW = 250
class ALRHopperThrowEnv(HopperEnv):
"""
Initialization changes to normal Hopper:
- healthy_reward: 1.0 -> 0.0 -> 0.1
- forward_reward_weight -> 5.0
- healthy_angle_range: (-0.2, 0.2) -> (-float('inf'), float('inf'))
Reward changes to normal Hopper:
- velocity: (x_position_after - x_position_before) -> self.get_body_com("ball")[0]
"""
def __init__(self,
xml_file='hopper_throw.xml',
forward_reward_weight=5.0,
ctrl_cost_weight=1e-3,
healthy_reward=0.1,
terminate_when_unhealthy=True,
healthy_state_range=(-100.0, 100.0),
healthy_z_range=(0.7, float('inf')),
healthy_angle_range=(-float('inf'), float('inf')),
reset_noise_scale=5e-3,
context = True,
exclude_current_positions_from_observation=True,
max_episode_steps=250):
xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
self.current_step = 0
self.max_episode_steps = max_episode_steps
self.context = context
self.goal = 0
super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
exclude_current_positions_from_observation)
def step(self, action):
self.current_step += 1
self.do_simulation(action, self.frame_skip)
ball_pos_after = self.get_body_com("ball")[0] #abs(self.get_body_com("ball")[0]) # use x and y to get point and use euclid distance as reward?
ball_pos_after_y = self.get_body_com("ball")[2]
# done = self.done TODO We should use this, not sure why there is no other termination; ball_landed should be enough, because we only look at the throw itself? - Paul and Marc
ball_landed = self.get_body_com("ball")[2] <= 0.05
done = ball_landed
ctrl_cost = self.control_cost(action)
costs = ctrl_cost
rewards = 0
if self.current_step >= self.max_episode_steps or done:
distance_reward = -np.linalg.norm(ball_pos_after - self.goal) if self.context else \
self._forward_reward_weight * ball_pos_after
healthy_reward = 0 if self.context else self.healthy_reward * self.current_step
rewards = distance_reward + healthy_reward
observation = self._get_obs()
reward = rewards - costs
info = {
'ball_pos': ball_pos_after,
'ball_pos_y': ball_pos_after_y,
'current_step' : self.current_step,
'goal' : self.goal,
}
return observation, reward, done, info
def _get_obs(self):
return np.append(super()._get_obs(), self.goal)
def reset(self):
self.current_step = 0
self.goal = self.goal = np.random.uniform(2.0, 6.0, 1) # 0.5 8.0
return super().reset()
# overwrite reset_model to make it deterministic
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation
if __name__ == '__main__':
render_mode = "human" # "human" or "partial" or "final"
env = ALRHopperThrowEnv()
obs = env.reset()
for i in range(2000):
# objective.load_result("/tmp/cma")
# test with random actions
ac = env.action_space.sample()
obs, rew, d, info = env.step(ac)
if i % 10 == 0:
env.render(mode=render_mode)
if d:
print('After ', i, ' steps, done: ', d)
env.reset()
env.close()

View File

@ -0,0 +1,143 @@
import os
from gym.envs.mujoco.hopper_v3 import HopperEnv
import numpy as np
MAX_EPISODE_STEPS_HOPPERTHROWINBASKET = 250
class ALRHopperThrowInBasketEnv(HopperEnv):
"""
Initialization changes to normal Hopper:
- healthy_reward: 1.0 -> 0.0
- healthy_angle_range: (-0.2, 0.2) -> (-float('inf'), float('inf'))
- hit_basket_reward: - -> 10
Reward changes to normal Hopper:
- velocity: (x_position_after - x_position_before) -> (ball_position_after - ball_position_before)
"""
def __init__(self,
xml_file='hopper_throw_in_basket.xml',
forward_reward_weight=1.0,
ctrl_cost_weight=1e-3,
healthy_reward=0.0,
hit_basket_reward=10,
basket_size=0.3,
terminate_when_unhealthy=True,
healthy_state_range=(-100.0, 100.0),
healthy_z_range=(0.7, float('inf')),
healthy_angle_range=(-float('inf'), float('inf')),
reset_noise_scale=5e-3,
context=True,
penalty=0.0,
exclude_current_positions_from_observation=True,
max_episode_steps = 250):
self.hit_basket_reward = hit_basket_reward
self.current_step = 0
self.max_episode_steps = max_episode_steps
self.ball_in_basket = False
self.basket_size = basket_size
self.context = context
self.penalty = penalty
self.basket_x = 5
xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
exclude_current_positions_from_observation)
def step(self, action):
self.current_step += 1
self.do_simulation(action, self.frame_skip)
ball_pos = self.get_body_com("ball")
basket_pos = self.get_body_com("basket_ground")
basket_center = (basket_pos[0] + 0.5, basket_pos[1], basket_pos[2])
is_in_basket_x = ball_pos[0] >= basket_pos[0] and ball_pos[0] <= basket_pos[0] + self.basket_size
is_in_basket_y = ball_pos[1] >= basket_pos[1] - (self.basket_size/2) and ball_pos[1] <= basket_pos[1] + (self.basket_size/2)
is_in_basket_z = ball_pos[2] < 0.1
is_in_basket = is_in_basket_x and is_in_basket_y and is_in_basket_z
if is_in_basket: self.ball_in_basket = True
ball_landed = self.get_body_com("ball")[2] <= 0.05
done = ball_landed or is_in_basket
rewards = 0
ctrl_cost = self.control_cost(action)
costs = ctrl_cost
if self.current_step >= self.max_episode_steps or done:
if is_in_basket:
if not self.context:
rewards += self.hit_basket_reward
else:
dist = np.linalg.norm(ball_pos-basket_center)
if self.context:
rewards = -10 * dist
else:
rewards -= (dist*dist)
else:
# penalty not needed
rewards += ((action[:2] > 0) * self.penalty).sum() if self.current_step < 10 else 0 #too much of a penalty?
observation = self._get_obs()
reward = rewards - costs
info = {
'ball_pos': ball_pos[0],
}
return observation, reward, done, info
def _get_obs(self):
return np.append(super()._get_obs(), self.basket_x)
def reset(self):
if self.max_episode_steps == 10:
# We have to initialize this here, because the spec is only added after creating the env.
self.max_episode_steps = self.spec.max_episode_steps
self.current_step = 0
self.ball_in_basket = False
if self.context:
basket_id = self.sim.model.body_name2id("basket_ground")
self.basket_x = np.random.uniform(3, 7, 1)
self.sim.model.body_pos[basket_id] = [self.basket_x, 0, 0]
return super().reset()
# overwrite reset_model to make it deterministic
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation
if __name__ == '__main__':
render_mode = "human" # "human" or "partial" or "final"
env = ALRHopperThrowInBasketEnv()
obs = env.reset()
for i in range(2000):
# objective.load_result("/tmp/cma")
# test with random actions
ac = env.action_space.sample()
obs, rew, d, info = env.step(ac)
if i % 10 == 0:
env.render(mode=render_mode)
if d:
print('After ', i, ' steps, done: ', d)
env.reset()
env.close()

View File

@ -0,0 +1,30 @@
from typing import Tuple, Union
import numpy as np
from mp_env_api import MPEnvWrapper
class MPWrapper(MPEnvWrapper):
@property
def active_obs(self):
return np.hstack([
[False] * 17,
[True] # goal pos
])
@property
def current_pos(self) -> Union[float, int, np.ndarray]:
return self.env.sim.data.qpos[3:6].copy()
@property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.sim.data.qvel[3:6].copy()
@property
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
@property
def dt(self) -> Union[float, int]:
return self.env.dt

View File

@ -0,0 +1 @@
from .mp_wrapper import MPWrapper

View File

@ -0,0 +1,62 @@
<mujoco model="walker2d">
<compiler angle="degree" coordinate="global" inertiafromgeom="true"/>
<default>
<joint armature="0.01" damping=".1" limited="true"/>
<geom conaffinity="0" condim="3" contype="1" density="1000" friction=".7 .1 .1" rgba="0.8 0.6 .4 1"/>
</default>
<option integrator="RK4" timestep="0.002"/>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom conaffinity="1" condim="3" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane" material="MatPlane"/>
<body name="torso" pos="0 0 1.25">
<camera name="track" mode="trackcom" pos="0 -3 1" xyaxes="1 0 0 0 0 1"/>
<joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
<joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" ref="1.25" stiffness="0" type="slide"/>
<joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 1.25" stiffness="0" type="hinge"/>
<geom friction="0.9" fromto="0 0 1.45 0 0 1.05" name="torso_geom" size="0.05" type="capsule"/>
<body name="thigh" pos="0 0 1.05">
<joint axis="0 -1 0" name="thigh_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
<geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_geom" size="0.05" type="capsule"/>
<body name="leg" pos="0 0 0.35">
<joint axis="0 -1 0" name="leg_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
<geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_geom" size="0.04" type="capsule"/>
<body name="foot" pos="0.2/2 0 0.1">
<joint axis="0 -1 0" name="foot_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
<geom friction="0.9" fromto="-0.0 0 0.1 0.2 0 0.1" name="foot_geom" size="0.06" type="capsule"/>
</body>
</body>
</body>
<!-- copied and then replace thigh->thigh_left, leg->leg_left, foot->foot_right -->
<body name="thigh_left" pos="0 0 1.05">
<joint axis="0 -1 0" name="thigh_left_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
<geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_left_geom" rgba=".7 .3 .6 1" size="0.05" type="capsule"/>
<body name="leg_left" pos="0 0 0.35">
<joint axis="0 -1 0" name="leg_left_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
<geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_left_geom" rgba=".7 .3 .6 1" size="0.04" type="capsule"/>
<body name="foot_left" pos="0.2/2 0 0.1">
<joint axis="0 -1 0" name="foot_left_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
<geom friction="1.9" fromto="-0.0 0 0.1 0.2 0 0.1" name="foot_left_geom" rgba=".7 .3 .6 1" size="0.06" type="capsule"/>
</body>
</body>
</body>
</body>
</worldbody>
<actuator>
<!-- <motor joint="torso_joint" ctrlrange="-100.0 100.0" isctrllimited="true"/>-->
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="thigh_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="leg_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="foot_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="thigh_left_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="leg_left_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="foot_left_joint"/>
<!-- <motor joint="finger2_rot" ctrlrange="-20.0 20.0" isctrllimited="true"/>-->
</actuator>
<asset>
<texture type="skybox" builtin="gradient" rgb1=".4 .5 .6" rgb2="0 0 0"
width="100" height="100"/>
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
</mujoco>

View File

@ -0,0 +1,30 @@
from typing import Tuple, Union
import numpy as np
from mp_env_api import MPEnvWrapper
class MPWrapper(MPEnvWrapper):
@property
def active_obs(self):
return np.hstack([
[False] * 17,
[True] # goal pos
])
@property
def current_pos(self) -> Union[float, int, np.ndarray]:
return self.env.sim.data.qpos[3:9].copy()
@property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.sim.data.qvel[3:9].copy()
@property
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
@property
def dt(self) -> Union[float, int]:
return self.env.dt

View File

@ -0,0 +1,113 @@
import os
from gym.envs.mujoco.walker2d_v3 import Walker2dEnv
import numpy as np
MAX_EPISODE_STEPS_WALKERJUMP = 300
class ALRWalker2dJumpEnv(Walker2dEnv):
"""
healthy reward 1.0 -> 0.005 -> 0.0025 not from alex
penalty 10 -> 0 not from alex
"""
def __init__(self,
xml_file='walker2d.xml',
forward_reward_weight=1.0,
ctrl_cost_weight=1e-3,
healthy_reward=0.0025,
terminate_when_unhealthy=True,
healthy_z_range=(0.8, 2.0),
healthy_angle_range=(-1.0, 1.0),
reset_noise_scale=5e-3,
penalty=0,
context=True,
exclude_current_positions_from_observation=True,
max_episode_steps=300):
self.current_step = 0
self.max_episode_steps = max_episode_steps
self.max_height = 0
self._penalty = penalty
self.context = context
self.goal = 0
xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
healthy_z_range, healthy_angle_range, reset_noise_scale,
exclude_current_positions_from_observation)
def step(self, action):
self.current_step += 1
self.do_simulation(action, self.frame_skip)
#pos_after = self.get_body_com("torso")[0]
height = self.get_body_com("torso")[2]
self.max_height = max(height, self.max_height)
fell_over = height < 0.2
done = fell_over
ctrl_cost = self.control_cost(action)
costs = ctrl_cost
if self.current_step >= self.max_episode_steps or done:
done = True
height_goal_distance = -10 * (np.linalg.norm(self.max_height - self.goal)) if self.context else self.max_height
healthy_reward = self.healthy_reward * self.current_step
rewards = height_goal_distance + healthy_reward
else:
# penalty not needed
rewards = 0
rewards += ((action[:2] > 0) * self._penalty).sum() if self.current_step < 4 else 0
rewards += ((action[3:5] > 0) * self._penalty).sum() if self.current_step < 4 else 0
observation = self._get_obs()
reward = rewards - costs
info = {
'height': height,
'max_height': self.max_height,
'goal' : self.goal,
}
return observation, reward, done, info
def _get_obs(self):
return np.append(super()._get_obs(), self.goal)
def reset(self):
self.current_step = 0
self.max_height = 0
self.goal = np.random.uniform(1.5, 2.5, 1) # 1.5 3.0
return super().reset()
# overwrite reset_model to make it deterministic
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation
if __name__ == '__main__':
render_mode = "human" # "human" or "partial" or "final"
env = ALRWalker2dJumpEnv()
obs = env.reset()
for i in range(2000):
# objective.load_result("/tmp/cma")
# test with random actions
ac = env.action_space.sample()
obs, rew, d, info = env.step(ac)
if i % 10 == 0:
env.render(mode=render_mode)
if d:
print('After ', i, ' steps, done: ', d)
env.reset()
env.close()