Added environments from Paul + Marc

2022-04-13 17:28:25 +02:00 · 2022-04-13 17:28:25 +02:00 · 092525889b
commit 092525889b
parent 7ffe94dcfd
26 changed files with 1795 additions and 2 deletions
--- a/alr_envs/alr/init.py
+++ b/alr_envs/alr/init.py
@ -11,6 +11,13 @@ from .mujoco.reacher.alr_reacher import ALRReacherEnv
 from .mujoco.reacher.balancing import BalancingEnv
 from alr_envs.alr.mujoco.table_tennis.tt_gym import MAX_EPISODE_STEPS
 from .mujoco.ant_jump.ant_jump import MAX_EPISODE_STEPS_ANTJUMP
 from .mujoco.half_cheetah_jump.half_cheetah_jump import MAX_EPISODE_STEPS_HALFCHEETAHJUMP
 from .mujoco.hopper_jump.hopper_jump import MAX_EPISODE_STEPS_HOPPERJUMP
 from .mujoco.hopper_jump.hopper_jump_on_box import MAX_EPISODE_STEPS_HOPPERJUMPONBOX
 from .mujoco.hopper_throw.hopper_throw import MAX_EPISODE_STEPS_HOPPERTHROW
 from .mujoco.hopper_throw.hopper_throw_in_basket import MAX_EPISODE_STEPS_HOPPERTHROWINBASKET
 from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP
 ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
@ -185,6 +192,69 @@ register(
    }
 )
 register(
    id='ALRAntJump-v0',
    entry_point='alr_envs.alr.mujoco:ALRAntJumpEnv',
    max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP,
    kwargs={
        "max_episode_steps": MAX_EPISODE_STEPS_ANTJUMP
    }
 )
 register(
    id='ALRHalfCheetahJump-v0',
    entry_point='alr_envs.alr.mujoco:ALRHalfCheetahJumpEnv',
    max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP,
    kwargs={
        "max_episode_steps": MAX_EPISODE_STEPS_HALFCHEETAHJUMP
    }
 )
 register(
    id='ALRHopperJump-v0',
    entry_point='alr_envs.alr.mujoco:ALRHopperJumpEnv',
    max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
    kwargs={
        "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP
    }
 )
 register(
    id='ALRHopperJumpOnBox-v0',
    entry_point='alr_envs.alr.mujoco:ALRHopperJumpOnBoxEnv',
    max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX,
    kwargs={
        "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMPONBOX
    }
 )
 register(
    id='ALRHopperThrow-v0',
    entry_point='alr_envs.alr.mujoco:ALRHopperThrowEnv',
    max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW,
    kwargs={
        "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROW
    }
 )
 register(
    id='ALRHopperThrowInBasket-v0',
    entry_point='alr_envs.alr.mujoco:ALRHopperThrowInBasketEnv',
    max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET,
    kwargs={
        "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROWINBASKET
    }
 )
 register(
    id='ALRWalker2DJump-v0',
    entry_point='alr_envs.alr.mujoco:ALRWalker2dJumpEnv',
    max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP,
    kwargs={
        "max_episode_steps": MAX_EPISODE_STEPS_WALKERJUMP
    }
 )
 ## Balancing Reacher
 register(
@ -430,4 +500,205 @@ for _v, cd in enumerate(ctxt_dim):
            }
        }
    )
-    ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+    ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
 ## AntJump
 for _v, cd in enumerate(ctxt_dim):
    _env_id = f'TableTennisProMP-v{_v}'
    register(
        id=_env_id,
        entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
        kwargs={
            "name": "alr_envs:TableTennis{}DCtxt-v0".format(cd),
            "wrappers": [mujoco.table_tennis.MPWrapper],
            "mp_kwargs": {
                "num_dof": 7,
                "num_basis": 2,
                "duration": 1.25,
                "post_traj_time": 1.5,
                "policy_type": "motor",
                "weights_scale": 1.0,
                "zero_start": True,
                "zero_goal": False,
                "policy_kwargs": {
                    "p_gains": 0.5*np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]),
                    "d_gains": 0.5*np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1])
                }
            }
        }
    )
    ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
 register(
    id='ALRAntJumpProMP-v0',
    entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
    kwargs={
        "name": "alr_envs:ALRAntJump-v0",
        "wrappers": [mujoco.ant_jump.MPWrapper],
        "mp_kwargs": {
            "num_dof": 8,
            "num_basis": 5,
            "duration": 10,
            "post_traj_time": 0,
            "policy_type": "motor",
            "weights_scale": 1.0,
            "zero_start": True,
            "zero_goal": False,
            "policy_kwargs": {
                "p_gains": np.ones(8),
                "d_gains": 0.1*np.ones(8)
            }
        }
    }
 )
 ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRAntJumpProMP-v0')
 register(
    id='ALRHalfCheetahJumpProMP-v0',
    entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
    kwargs={
        "name": "alr_envs:ALRHalfCheetahJump-v0",
        "wrappers": [mujoco.half_cheetah_jump.MPWrapper],
        "mp_kwargs": {
            "num_dof": 6,
            "num_basis": 5,
            "duration": 5,
            "post_traj_time": 0,
            "policy_type": "motor",
            "weights_scale": 1.0,
            "zero_start": True,
            "zero_goal": False,
            "policy_kwargs": {
                "p_gains": np.ones(6),
                "d_gains": 0.1*np.ones(6)
            }
        }
    }
 )
 ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRHalfCheetahJumpProMP-v0')
 register(
    id='ALRHopperJumpProMP-v0',
    entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
    kwargs={
        "name": "alr_envs:ALRHopperJump-v0",
        "wrappers": [mujoco.hopper_jump.MPWrapper],
        "mp_kwargs": {
            "num_dof": 3,
            "num_basis": 5,
            "duration": 2,
            "post_traj_time": 0,
            "policy_type": "motor",
            "weights_scale": 1.0,
            "zero_start": True,
            "zero_goal": False,
            "policy_kwargs": {
                "p_gains": np.ones(3),
                "d_gains": 0.1*np.ones(3)
            }
        }
    }
 )
 ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRHopperJumpProMP-v0')
 register(
    id='ALRHopperJumpOnBoxProMP-v0',
    entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
    kwargs={
        "name": "alr_envs:ALRHopperJumpOnBox-v0",
        "wrappers": [mujoco.hopper_jump.MPWrapper],
        "mp_kwargs": {
            "num_dof": 3,
            "num_basis": 5,
            "duration": 2,
            "post_traj_time": 0,
            "policy_type": "motor",
            "weights_scale": 1.0,
            "zero_start": True,
            "zero_goal": False,
            "policy_kwargs": {
                "p_gains": np.ones(3),
                "d_gains": 0.1*np.ones(3)
            }
        }
    }
 )
 ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRHopperJumpOnBoxProMP-v0')
 register(
    id='ALRHopperThrowProMP-v0',
    entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
    kwargs={
        "name": "alr_envs:ALRHopperThrow-v0",
        "wrappers": [mujoco.hopper_throw.MPWrapper],
        "mp_kwargs": {
            "num_dof": 3,
            "num_basis": 5,
            "duration": 2,
            "post_traj_time": 0,
            "policy_type": "motor",
            "weights_scale": 1.0,
            "zero_start": True,
            "zero_goal": False,
            "policy_kwargs": {
                "p_gains": np.ones(3),
                "d_gains": 0.1*np.ones(3)
            }
        }
    }
 )
 ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRHopperThrowProMP-v0')
 register(
    id='ALRHopperThrowInBasketProMP-v0',
    entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
    kwargs={
        "name": "alr_envs:ALRHopperThrowInBasket-v0",
        "wrappers": [mujoco.hopper_throw.MPWrapper],
        "mp_kwargs": {
            "num_dof": 3,
            "num_basis": 5,
            "duration": 2,
            "post_traj_time": 0,
            "policy_type": "motor",
            "weights_scale": 1.0,
            "zero_start": True,
            "zero_goal": False,
            "policy_kwargs": {
                "p_gains": np.ones(3),
                "d_gains": 0.1*np.ones(3)
            }
        }
    }
 )
 ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRHopperThrowInBasketProMP-v0')
 register(
    id='ALRWalker2DJumpProMP-v0',
    entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
    kwargs={
        "name": "alr_envs:ALRWalker2DJump-v0",
        "wrappers": [mujoco.walker_2d_jump.MPWrapper],
        "mp_kwargs": {
            "num_dof": 6,
            "num_basis": 5,
            "duration": 2.4,
            "post_traj_time": 0,
            "policy_type": "motor",
            "weights_scale": 1.0,
            "zero_start": True,
            "zero_goal": False,
            "policy_kwargs": {
                "p_gains": np.ones(6),
                "d_gains": 0.1*np.ones(6)
            }
        }
    }
 )
 ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRWalker2DJumpProMP-v0')
--- a/alr_envs/alr/mujoco/init.py
+++ b/alr_envs/alr/mujoco/init.py
@ -3,4 +3,11 @@ from .reacher.balancing import BalancingEnv
 from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
 from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
 from .table_tennis.tt_gym import TTEnvGym
-from .beerpong.beerpong import ALRBeerBongEnv
+from .beerpong.beerpong import ALRBeerBongEnv
 from .ant_jump.ant_jump import ALRAntJumpEnv
 from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
 from .hopper_jump.hopper_jump import ALRHopperJumpEnv
 from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv
 from .hopper_throw.hopper_throw import ALRHopperThrowEnv
 from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv
 from .walker_2d_jump.walker_2d_jump import ALRWalker2dJumpEnv
--- a/alr_envs/alr/mujoco/ant_jump/init.py
+++ b/alr_envs/alr/mujoco/ant_jump/init.py
@ -0,0 +1 @@
 from .mp_wrapper import MPWrapper
--- a/alr_envs/alr/mujoco/ant_jump/ant_jump.py
+++ b/alr_envs/alr/mujoco/ant_jump/ant_jump.py
@ -0,0 +1,117 @@
 import numpy as np
 from gym.envs.mujoco.ant_v3 import AntEnv
 MAX_EPISODE_STEPS_ANTJUMP = 200
 class ALRAntJumpEnv(AntEnv):
    """
    Initialization changes to normal Ant:
    - healthy_reward: 1.0 -> 0.01 -> 0.0 no healthy reward needed - Paul and Marc
    - ctrl_cost_weight 0.5 -> 0.0
    - contact_cost_weight: 5e-4 -> 0.0
    - healthy_z_range: (0.2, 1.0) -> (0.3, float('inf'))  !!!!! Does that make sense, limiting height?
    """
    def __init__(self,
                 xml_file='ant.xml',
                 ctrl_cost_weight=0.0,
                 contact_cost_weight=0.0,
                 healthy_reward=0.0,
                 terminate_when_unhealthy=True,
                 healthy_z_range=(0.3, float('inf')),
                 contact_force_range=(-1.0, 1.0),
                 reset_noise_scale=0.1,
                 context=True,  # variable to decide if context is used or not
                 exclude_current_positions_from_observation=True,
                 max_episode_steps=200):
        self.current_step = 0
        self.max_height = 0
        self.context = context
        self.max_episode_steps = max_episode_steps
        self.goal = 0  # goal when training with context
        super().__init__(xml_file, ctrl_cost_weight, contact_cost_weight, healthy_reward, terminate_when_unhealthy,
                         healthy_z_range, contact_force_range, reset_noise_scale,
                         exclude_current_positions_from_observation)
    def step(self, action):
        self.current_step += 1
        self.do_simulation(action, self.frame_skip)
        height = self.get_body_com("torso")[2].copy()
        self.max_height = max(height, self.max_height)
        rewards = 0
        ctrl_cost = self.control_cost(action)
        contact_cost = self.contact_cost
        costs = ctrl_cost + contact_cost
        done = height < 0.3 # fall over -> is the 0.3 value from healthy_z_range? TODO change 0.3 to the value of healthy z angle
        if self.current_step == self.max_episode_steps or done:
            if self.context:
                # -10 for scaling the value of the distance between the max_height and the goal height; only used when context is enabled
                # height_reward = -10 * (np.linalg.norm(self.max_height - self.goal))
                height_reward = -10*np.linalg.norm(self.max_height - self.goal)
                # no healthy reward when using context, because we optimize a negative value
                healthy_reward = 0
            else:
                height_reward = self.max_height - 0.7
                healthy_reward = self.healthy_reward * self.current_step
            rewards = height_reward + healthy_reward
        obs = self._get_obs()
        reward = rewards - costs
        info = {
            'height': height,
            'max_height': self.max_height,
            'goal': self.goal
        }
        return obs, reward, done, info
    def _get_obs(self):
        return np.append(super()._get_obs(), self.goal)
    def reset(self):
        self.current_step = 0
        self.max_height = 0
        self.goal = np.random.uniform(1.0, 2.5,
                                      1)  # goal heights from 1.0 to 2.5; can be increased, but didnt work well with CMORE
        return super().reset()
    # reset_model had to be implemented in every env to make it deterministic
    def reset_model(self):
        noise_low = -self._reset_noise_scale
        noise_high = self._reset_noise_scale
        qpos = self.init_qpos  # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
        qvel = self.init_qvel  # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
        self.set_state(qpos, qvel)
        observation = self._get_obs()
        return observation
 if __name__ == '__main__':
    render_mode = "human"  # "human" or "partial" or "final"
    env = ALRAntJumpEnv()
    obs = env.reset()
    for i in range(2000):
        # objective.load_result("/tmp/cma")
        # test with random actions
        ac = env.action_space.sample()
        obs, rew, d, info = env.step(ac)
        if i % 10 == 0:
            env.render(mode=render_mode)
        if d:
            env.reset()
    env.close()
--- a/alr_envs/alr/mujoco/ant_jump/assets/ant.xml
+++ b/alr_envs/alr/mujoco/ant_jump/assets/ant.xml
@ -0,0 +1,81 @@
 <mujoco model="ant">
  <compiler angle="degree" coordinate="local" inertiafromgeom="true"/>
  <option integrator="RK4" timestep="0.01"/>
  <custom>
    <numeric data="0.0 0.0 0.55 1.0 0.0 0.0 0.0 0.0 1.0 0.0 -1.0 0.0 -1.0 0.0 1.0" name="init_qpos"/>
  </custom>
  <default>
    <joint armature="1" damping="1" limited="true"/>
    <geom conaffinity="0" condim="3" density="5.0" friction="1 0.5 0.5" margin="0.01" rgba="0.8 0.6 0.4 1"/>
  </default>
  <asset>
    <texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
    <texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
    <texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
    <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
    <material name="geom" texture="texgeom" texuniform="true"/>
  </asset>
  <worldbody>
    <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
    <geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
    <body name="torso" pos="0 0 0.75">
      <camera name="track" mode="trackcom" pos="0 -3 0.3" xyaxes="1 0 0 0 0 1"/>
      <geom name="torso_geom" pos="0 0 0" size="0.25" type="sphere"/>
      <joint armature="0" damping="0" limited="false" margin="0.01" name="root" pos="0 0 0" type="free"/>
      <body name="front_left_leg" pos="0 0 0">
        <geom fromto="0.0 0.0 0.0 0.2 0.2 0.0" name="aux_1_geom" size="0.08" type="capsule"/>
        <body name="aux_1" pos="0.2 0.2 0">
          <joint axis="0 0 1" name="hip_1" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
          <geom fromto="0.0 0.0 0.0 0.2 0.2 0.0" name="left_leg_geom" size="0.08" type="capsule"/>
          <body pos="0.2 0.2 0">
            <joint axis="-1 1 0" name="ankle_1" pos="0.0 0.0 0.0" range="30 70" type="hinge"/>
            <geom fromto="0.0 0.0 0.0 0.4 0.4 0.0" name="left_ankle_geom" size="0.08" type="capsule"/>
          </body>
        </body>
      </body>
      <body name="front_right_leg" pos="0 0 0">
        <geom fromto="0.0 0.0 0.0 -0.2 0.2 0.0" name="aux_2_geom" size="0.08" type="capsule"/>
        <body name="aux_2" pos="-0.2 0.2 0">
          <joint axis="0 0 1" name="hip_2" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
          <geom fromto="0.0 0.0 0.0 -0.2 0.2 0.0" name="right_leg_geom" size="0.08" type="capsule"/>
          <body pos="-0.2 0.2 0">
            <joint axis="1 1 0" name="ankle_2" pos="0.0 0.0 0.0" range="-70 -30" type="hinge"/>
            <geom fromto="0.0 0.0 0.0 -0.4 0.4 0.0" name="right_ankle_geom" size="0.08" type="capsule"/>
          </body>
        </body>
      </body>
      <body name="back_leg" pos="0 0 0">
        <geom fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" name="aux_3_geom" size="0.08" type="capsule"/>
        <body name="aux_3" pos="-0.2 -0.2 0">
          <joint axis="0 0 1" name="hip_3" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
          <geom fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" name="back_leg_geom" size="0.08" type="capsule"/>
          <body pos="-0.2 -0.2 0">
            <joint axis="-1 1 0" name="ankle_3" pos="0.0 0.0 0.0" range="-70 -30" type="hinge"/>
            <geom fromto="0.0 0.0 0.0 -0.4 -0.4 0.0" name="third_ankle_geom" size="0.08" type="capsule"/>
          </body>
        </body>
      </body>
      <body name="right_back_leg" pos="0 0 0">
        <geom fromto="0.0 0.0 0.0 0.2 -0.2 0.0" name="aux_4_geom" size="0.08" type="capsule"/>
        <body name="aux_4" pos="0.2 -0.2 0">
          <joint axis="0 0 1" name="hip_4" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
          <geom fromto="0.0 0.0 0.0 0.2 -0.2 0.0" name="rightback_leg_geom" size="0.08" type="capsule"/>
          <body pos="0.2 -0.2 0">
            <joint axis="1 1 0" name="ankle_4" pos="0.0 0.0 0.0" range="30 70" type="hinge"/>
            <geom fromto="0.0 0.0 0.0 0.4 -0.4 0.0" name="fourth_ankle_geom" size="0.08" type="capsule"/>
          </body>
        </body>
      </body>
    </body>
  </worldbody>
  <actuator>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_4" gear="150"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_4" gear="150"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_1" gear="150"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_1" gear="150"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_2" gear="150"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_2" gear="150"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_3" gear="150"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_3" gear="150"/>
  </actuator>
 </mujoco>
--- a/alr_envs/alr/mujoco/ant_jump/mp_wrapper.py
+++ b/alr_envs/alr/mujoco/ant_jump/mp_wrapper.py
@ -0,0 +1,31 @@
 from typing import Tuple, Union
 import numpy as np
 from mp_env_api import MPEnvWrapper
 class MPWrapper(MPEnvWrapper):
    @property
    def active_obs(self):
        return np.hstack([
            [False] * 111, # ant has 111 dimensional observation space !!
            [True] # goal height
        ])
    @property
    def current_pos(self) -> Union[float, int, np.ndarray]:
        return self.env.sim.data.qpos[7:15].copy()
    @property
    def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
        return self.env.sim.data.qvel[6:14].copy()
    @property
    def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
        raise ValueError("Goal position is not available and has to be learnt based on the environment.")
    @property
    def dt(self) -> Union[float, int]:
        return self.env.dt
--- a/alr_envs/alr/mujoco/half_cheetah_jump/init.py
+++ b/alr_envs/alr/mujoco/half_cheetah_jump/init.py
@ -0,0 +1 @@
 from .mp_wrapper import MPWrapper
--- a/alr_envs/alr/mujoco/half_cheetah_jump/assets/cheetah.xml
+++ b/alr_envs/alr/mujoco/half_cheetah_jump/assets/cheetah.xml
@ -0,0 +1,62 @@
 <mujoco model="cheetah">
  <compiler angle="radian" coordinate="local" inertiafromgeom="true" settotalmass="14"/>
  <default>
    <joint armature=".1" damping=".01" limited="true" solimplimit="0 .8 .03" solreflimit=".02 1" stiffness="8"/>
    <geom conaffinity="0" condim="3" contype="1" friction=".4 .1 .1" rgba="0.8 0.6 .4 1" solimp="0.0 0.8 0.01" solref="0.02 1"/>
    <motor ctrllimited="true" ctrlrange="-1 1"/>
  </default>
  <size nstack="300000" nuser_geom="1"/>
  <option gravity="0 0 -9.81" timestep="0.01"/>
  <asset>
    <texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
    <texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
    <texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
    <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
    <material name="geom" texture="texgeom" texuniform="true"/>
  </asset>
  <worldbody>
    <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
    <geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
    <body name="torso" pos="0 0 .7">
      <camera name="track" mode="trackcom" pos="0 -3 0.3" xyaxes="1 0 0 0 0 1"/>
      <joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
      <joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" stiffness="0" type="slide"/>
      <joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 0" stiffness="0" type="hinge"/>
      <geom fromto="-.5 0 0 .5 0 0" name="torso" size="0.046" type="capsule"/>
      <geom axisangle="0 1 0 .87" name="head" pos=".6 0 .1" size="0.046 .15" type="capsule"/>
      <!-- <site name='tip'  pos='.15 0 .11'/>-->
      <body name="bthigh" pos="-.5 0 0">
        <joint axis="0 1 0" damping="6" name="bthigh" pos="0 0 0" range="-.52 1.05" stiffness="240" type="hinge"/>
        <geom axisangle="0 1 0 -3.8" name="bthigh" pos=".1 0 -.13" size="0.046 .145" type="capsule"/>
        <body name="bshin" pos=".16 0 -.25">
          <joint axis="0 1 0" damping="4.5" name="bshin" pos="0 0 0" range="-.785 .785" stiffness="180" type="hinge"/>
          <geom axisangle="0 1 0 -2.03" name="bshin" pos="-.14 0 -.07" rgba="0.9 0.6 0.6 1" size="0.046 .15" type="capsule"/>
          <body name="bfoot" pos="-.28 0 -.14">
            <joint axis="0 1 0" damping="3" name="bfoot" pos="0 0 0" range="-.4 .785" stiffness="120" type="hinge"/>
            <geom axisangle="0 1 0 -.27" name="bfoot" pos=".03 0 -.097" rgba="0.9 0.6 0.6 1" size="0.046 .094" type="capsule"/>
          </body>
        </body>
      </body>
      <body name="fthigh" pos=".5 0 0">
        <joint axis="0 1 0" damping="4.5" name="fthigh" pos="0 0 0" range="-1 .7" stiffness="180" type="hinge"/>
        <geom axisangle="0 1 0 .52" name="fthigh" pos="-.07 0 -.12" size="0.046 .133" type="capsule"/>
        <body name="fshin" pos="-.14 0 -.24">
          <joint axis="0 1 0" damping="3" name="fshin" pos="0 0 0" range="-1.2 .87" stiffness="120" type="hinge"/>
          <geom axisangle="0 1 0 -.6" name="fshin" pos=".065 0 -.09" rgba="0.9 0.6 0.6 1" size="0.046 .106" type="capsule"/>
          <body name="ffoot" pos=".13 0 -.18">
            <joint axis="0 1 0" damping="1.5" name="ffoot" pos="0 0 0" range="-.5 .5" stiffness="60" type="hinge"/>
            <geom axisangle="0 1 0 -.6" name="ffoot" pos=".045 0 -.07" rgba="0.9 0.6 0.6 1" size="0.046 .07" type="capsule"/>
          </body>
        </body>
      </body>
    </body>
  </worldbody>
  <actuator>
    <motor gear="120" joint="bthigh" name="bthigh"/>
    <motor gear="90" joint="bshin" name="bshin"/>
    <motor gear="60" joint="bfoot" name="bfoot"/>
    <motor gear="120" joint="fthigh" name="fthigh"/>
    <motor gear="60" joint="fshin" name="fshin"/>
    <motor gear="30" joint="ffoot" name="ffoot"/>
  </actuator>
 </mujoco>
--- a/alr_envs/alr/mujoco/half_cheetah_jump/half_cheetah_jump.py
+++ b/alr_envs/alr/mujoco/half_cheetah_jump/half_cheetah_jump.py
@ -0,0 +1,100 @@
 import os
 from gym.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv
 import numpy as np
 MAX_EPISODE_STEPS_HALFCHEETAHJUMP = 100
 class ALRHalfCheetahJumpEnv(HalfCheetahEnv):
    """
    ctrl_cost_weight 0.1 -> 0.0
    """
    def __init__(self,
                 xml_file='cheetah.xml',
                 forward_reward_weight=1.0,
                 ctrl_cost_weight=0.0,
                 reset_noise_scale=0.1,
                 context=True,
                 exclude_current_positions_from_observation=True,
                 max_episode_steps=100):
        self.current_step = 0
        self.max_height = 0
        self.max_episode_steps = max_episode_steps
        self.goal = 0
        self.context = context
        xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
        super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, reset_noise_scale,
                         exclude_current_positions_from_observation)
    def step(self, action):
        self.current_step += 1
        self.do_simulation(action, self.frame_skip)
        height_after = self.get_body_com("torso")[2]
        self.max_height = max(height_after, self.max_height)
        ## Didnt use fell_over, because base env also has no done condition - Paul and Marc
        # fell_over = abs(self.sim.data.qpos[2]) > 2.5  # how to figure out if the cheetah fell over? -> 2.5 oke?
        # TODO: Should a fall over be checked herE?
        done = False
        ctrl_cost = self.control_cost(action)
        costs = ctrl_cost
        if self.current_step == self.max_episode_steps:
            height_goal_distance = -10*np.linalg.norm(self.max_height - self.goal) + 1e-8 if self.context \
                                                                                       else self.max_height
            rewards = self._forward_reward_weight * height_goal_distance
        else:
            rewards = 0
        observation = self._get_obs()
        reward = rewards - costs
        info = {
            'height': height_after,
            'max_height': self.max_height
        }
        return observation, reward, done, info
    def _get_obs(self):
        return np.append(super()._get_obs(), self.goal)
    def reset(self):
        self.max_height = 0
        self.current_step = 0
        self.goal = np.random.uniform(1.1, 1.6, 1)  # 1.1 1.6
        return super().reset()
    # overwrite reset_model to make it deterministic
    def reset_model(self):
        noise_low = -self._reset_noise_scale
        noise_high = self._reset_noise_scale
        qpos = self.init_qpos  # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
        qvel = self.init_qvel  # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
        self.set_state(qpos, qvel)
        observation = self._get_obs()
        return observation
 if __name__ == '__main__':
    render_mode = "human"  # "human" or "partial" or "final"
    env = ALRHalfCheetahJumpEnv()
    obs = env.reset()
    for i in range(2000):
        # objective.load_result("/tmp/cma")
        # test with random actions
        ac = env.action_space.sample()
        obs, rew, d, info = env.step(ac)
        if i % 10 == 0:
            env.render(mode=render_mode)
        if d:
            print('After ', i, ' steps, done: ', d)
            env.reset()
    env.close()
--- a/alr_envs/alr/mujoco/half_cheetah_jump/mp_wrapper.py
+++ b/alr_envs/alr/mujoco/half_cheetah_jump/mp_wrapper.py
@ -0,0 +1,30 @@
 from typing import Tuple, Union
 import numpy as np
 from mp_env_api import MPEnvWrapper
 class MPWrapper(MPEnvWrapper):
    @property
    def active_obs(self):
        return np.hstack([
            [False] * 17,
            [True] # goal height
        ])
    @property
    def current_pos(self) -> Union[float, int, np.ndarray]:
        return self.env.sim.data.qpos[3:9].copy()
    @property
    def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
        return self.env.sim.data.qvel[3:9].copy()
    @property
    def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
        raise ValueError("Goal position is not available and has to be learnt based on the environment.")
    @property
    def dt(self) -> Union[float, int]:
        return self.env.dt
--- a/alr_envs/alr/mujoco/hopper_jump/init.py
+++ b/alr_envs/alr/mujoco/hopper_jump/init.py
@ -0,0 +1 @@
 from .mp_wrapper import MPWrapper
--- a/alr_envs/alr/mujoco/hopper_jump/assets/hopper.xml
+++ b/alr_envs/alr/mujoco/hopper_jump/assets/hopper.xml
@ -0,0 +1,48 @@
 <mujoco model="hopper">
  <compiler angle="degree" coordinate="global" inertiafromgeom="true"/>
  <default>
    <joint armature="1" damping="1" limited="true"/>
    <geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1" solimp=".8 .8 .01" solref=".02 1"/>
    <motor ctrllimited="true" ctrlrange="-.4 .4"/>
  </default>
  <option integrator="RK4" timestep="0.002"/>
  <visual>
    <map znear="0.02"/>
  </visual>
  <worldbody>
    <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
    <geom conaffinity="1" condim="3" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 .125" type="plane" material="MatPlane"/>
    <body name="torso" pos="0 0 1.25">
      <camera name="track" mode="trackcom" pos="0 -3 1" xyaxes="1 0 0 0 0 1"/>
      <joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
      <joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" ref="1.25" stiffness="0" type="slide"/>
      <joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 1.25" stiffness="0" type="hinge"/>
      <geom friction="0.9" fromto="0 0 1.45 0 0 1.05" name="torso_geom" size="0.05" type="capsule"/>
      <body name="thigh" pos="0 0 1.05">
        <joint axis="0 -1 0" name="thigh_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
        <geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_geom" size="0.05" type="capsule"/>
        <body name="leg" pos="0 0 0.35">
          <joint axis="0 -1 0" name="leg_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
          <geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_geom" size="0.04" type="capsule"/>
          <body name="foot" pos="0.13/2 0 0.1">
            <joint axis="0 -1 0" name="foot_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
            <geom friction="2.0" fromto="-0.13 0 0.1 0.26 0 0.1" name="foot_geom" size="0.06" type="capsule"/>
          </body>
        </body>
      </body>
    </body>
  </worldbody>
  <actuator>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="thigh_joint"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="leg_joint"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="foot_joint"/>
  </actuator>
    <asset>
        <texture type="skybox" builtin="gradient" rgb1=".4 .5 .6" rgb2="0 0 0"
            width="100" height="100"/>
        <texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
        <texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
        <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
        <material name="geom" texture="texgeom" texuniform="true"/>
    </asset>
 </mujoco>
--- a/alr_envs/alr/mujoco/hopper_jump/assets/hopper_jump_on_box.xml
+++ b/alr_envs/alr/mujoco/hopper_jump/assets/hopper_jump_on_box.xml
@ -0,0 +1,51 @@
 <mujoco model="hopper">
  <compiler angle="degree" coordinate="global" inertiafromgeom="true"/>
  <default>
    <joint armature="1" damping="1" limited="true"/>
    <geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1" solimp=".8 .8 .01" solref=".02 1"/>
    <motor ctrllimited="true" ctrlrange="-.4 .4"/>
  </default>
  <option integrator="RK4" timestep="0.002"/>
  <visual>
    <map znear="0.02"/>
  </visual>
  <worldbody>
    <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
    <geom conaffinity="1" condim="3" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 .125" type="plane" material="MatPlane"/>
    <body name="torso" pos="0 0 1.25">
      <camera name="track" mode="trackcom" pos="0 -3 1" xyaxes="1 0 0 0 0 1"/>
      <joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
      <joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" ref="1.25" stiffness="0" type="slide"/>
      <joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 1.25" stiffness="0" type="hinge"/>
      <geom friction="0.9" fromto="0 0 1.45 0 0 1.05" name="torso_geom" size="0.05" type="capsule"/>
      <body name="thigh" pos="0 0 1.05">
        <joint axis="0 -1 0" name="thigh_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
        <geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_geom" size="0.05" type="capsule"/>
        <body name="leg" pos="0 0 0.35">
          <joint axis="0 -1 0" name="leg_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
          <geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_geom" size="0.04" type="capsule"/>
          <body name="foot" pos="0.13/2 0 0.1">
            <joint axis="0 -1 0" name="foot_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
            <geom friction="2.0" fromto="-0.13 0 0.1 0.26 0 0.1" name="foot_geom" size="0.06" type="capsule"/>
          </body>
        </body>
      </body>
    </body>
    <body name="box" pos="1 0 0">
        <geom friction="1.0" fromto="0.48 0 0 1 0 0" name="basket_ground_geom" size="0.3" type="box" rgba="1 0 0 1"/>
    </body>
  </worldbody>
  <actuator>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="thigh_joint"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="leg_joint"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="foot_joint"/>
  </actuator>
    <asset>
        <texture type="skybox" builtin="gradient" rgb1=".4 .5 .6" rgb2="0 0 0"
            width="100" height="100"/>
        <texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
        <texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
        <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
        <material name="geom" texture="texgeom" texuniform="true"/>
    </asset>
 </mujoco>
--- a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
+++ b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
@ -0,0 +1,110 @@
 from gym.envs.mujoco.hopper_v3 import HopperEnv
 import numpy as np
 MAX_EPISODE_STEPS_HOPPERJUMP = 250
 class ALRHopperJumpEnv(HopperEnv):
    """
    Initialization changes to normal Hopper:
    - healthy_reward: 1.0 -> 0.1 -> 0
    - healthy_angle_range: (-0.2, 0.2) -> (-float('inf'), float('inf'))
    - healthy_z_range: (0.7, float('inf')) -> (0.5, float('inf'))
    """
    def __init__(self,
                 xml_file='hopper.xml',
                 forward_reward_weight=1.0,
                 ctrl_cost_weight=1e-3,
                 healthy_reward=0.0,
                 penalty=0.0,
                 context=True,
                 terminate_when_unhealthy=True,
                 healthy_state_range=(-100.0, 100.0),
                 healthy_z_range=(0.5, float('inf')),
                 healthy_angle_range=(-float('inf'), float('inf')),
                 reset_noise_scale=5e-3,
                 exclude_current_positions_from_observation=True,
                 max_episode_steps=250):
        self.current_step = 0
        self.max_height = 0
        self.max_episode_steps = max_episode_steps
        self.penalty = penalty
        self.goal = 0
        self.context = context
        self.exclude_current_positions_from_observation = exclude_current_positions_from_observation
        super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
                         healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
                         exclude_current_positions_from_observation)
    def step(self, action):
        self.current_step += 1
        self.do_simulation(action, self.frame_skip)
        height_after = self.get_body_com("torso")[2]
        self.max_height = max(height_after, self.max_height)
        ctrl_cost = self.control_cost(action)
        costs = ctrl_cost
        done = False
        if self.current_step >= self.max_episode_steps:
            hight_goal_distance = -10*np.linalg.norm(self.max_height - self.goal) if self.context else self.max_height
            healthy_reward = 0 if self.context else self.healthy_reward * self.current_step
            height_reward = self._forward_reward_weight * hight_goal_distance # maybe move reward calculation into if structure and define two different _forward_reward_weight variables for context and episodic seperatley
            rewards = height_reward + healthy_reward
        else:
            # penalty for wrong start direction of first two joints; not needed, could be removed
            rewards = ((action[:2] > 0) * self.penalty).sum() if self.current_step < 10 else 0
        observation = self._get_obs()
        reward = rewards - costs
        info = {
            'height'    : height_after,
            'max_height': self.max_height,
            'goal' : self.goal
        }
        return observation, reward, done, info
    def _get_obs(self):
        return np.append(super()._get_obs(), self.goal)
    def reset(self):
        self.goal = np.random.uniform(1.4, 2.3, 1) # 1.3 2.3
        self.max_height = 0
        self.current_step = 0
        return super().reset()
    # overwrite reset_model to make it deterministic
    def reset_model(self):
        noise_low = -self._reset_noise_scale
        noise_high = self._reset_noise_scale
        qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
        qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
        self.set_state(qpos, qvel)
        observation = self._get_obs()
        return observation
 if __name__ == '__main__':
    render_mode = "human"  # "human" or "partial" or "final"
    env = ALRHopperJumpEnv()
    obs = env.reset()
    for i in range(2000):
        # objective.load_result("/tmp/cma")
        # test with random actions
        ac = env.action_space.sample()
        obs, rew, d, info = env.step(ac)
        if i % 10 == 0:
            env.render(mode=render_mode)
        if d:
            print('After ', i, ' steps, done: ', d)
            env.reset()
    env.close()
--- a/alr_envs/alr/mujoco/hopper_jump/hopper_jump_on_box.py
+++ b/alr_envs/alr/mujoco/hopper_jump/hopper_jump_on_box.py
@ -0,0 +1,170 @@
 from gym.envs.mujoco.hopper_v3 import HopperEnv
 import numpy as np
 import os
 MAX_EPISODE_STEPS_HOPPERJUMPONBOX = 250
 class ALRHopperJumpOnBoxEnv(HopperEnv):
    """
    Initialization changes to normal Hopper:
    - healthy_reward: 1.0 -> 0.01 -> 0.001
    - healthy_angle_range: (-0.2, 0.2) -> (-float('inf'), float('inf'))
    """
    def __init__(self,
                 xml_file='hopper_jump_on_box.xml',
                 forward_reward_weight=1.0,
                 ctrl_cost_weight=1e-3,
                 healthy_reward=0.001,
                 terminate_when_unhealthy=True,
                 healthy_state_range=(-100.0, 100.0),
                 healthy_z_range=(0.7, float('inf')),
                 healthy_angle_range=(-float('inf'), float('inf')),
                 reset_noise_scale=5e-3,
                 context=True,
                 exclude_current_positions_from_observation=True,
                 max_episode_steps=250):
        self.current_step = 0
        self.max_height = 0
        self.max_episode_steps = max_episode_steps
        self.min_distance = 5000  # what value?
        self.hopper_on_box = False
        self.context = context
        self.box_x = 1
        xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
        super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
                         healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
                         exclude_current_positions_from_observation)
    def step(self, action):
        self.current_step += 1
        self.do_simulation(action, self.frame_skip)
        height_after = self.get_body_com("torso")[2]
        foot_pos = self.get_body_com("foot")
        self.max_height = max(height_after, self.max_height)
        vx, vz, vangle = self.sim.data.qvel[0:3]
        s = self.state_vector()
        fell_over = not (np.isfinite(s).all() and (np.abs(s[2:]) < 100).all() and (height_after > .7))
        box_pos = self.get_body_com("box")
        box_size = 0.3
        box_height = 0.3
        box_center = (box_pos[0] + (box_size / 2), box_pos[1], box_height)
        foot_length = 0.3
        foot_center = foot_pos[0] - (foot_length / 2)
        dist = np.linalg.norm(foot_pos - box_center)
        self.min_distance = min(dist, self.min_distance)
        # check if foot is on box
        is_on_box_x = box_pos[0] <= foot_center <= box_pos[0] + box_size
        is_on_box_y = True  # is y always true because he can only move in x and z direction?
        is_on_box_z = box_height - 0.02 <= foot_pos[2] <= box_height + 0.02
        is_on_box = is_on_box_x and is_on_box_y and is_on_box_z
        if is_on_box: self.hopper_on_box = True
        ctrl_cost = self.control_cost(action)
        costs = ctrl_cost
        done = fell_over or self.hopper_on_box
        if self.current_step >= self.max_episode_steps or done:
            done = False
            max_height = self.max_height.copy()
            min_distance = self.min_distance.copy()
            alive_bonus = self._healthy_reward * self.current_step
            box_bonus = 0
            rewards = 0
            # TODO explain what we did here for the calculation of the reward
            if is_on_box:
                if self.context:
                    rewards -= 100 * vx ** 2 if 100 * vx ** 2 < 1 else 1
                else:
                    box_bonus = 10
                    rewards += box_bonus
                    # rewards -= dist * dist ???? why when already on box?
                    # reward -= 90 - abs(angle)
                    rewards -= 100 * vx ** 2 if 100 * vx ** 2 < 1 else 1
                    rewards += max_height * 3
                    rewards += alive_bonus
            else:
                if self.context:
                    rewards = -10 - min_distance
                    rewards += max_height * 3
                else:
                    # reward -= (dist*dist)
                    rewards -= min_distance * min_distance
                    # rewards -= dist / self.max_distance
                    rewards += max_height
                    rewards += alive_bonus
        else:
            rewards = 0
        observation = self._get_obs()
        reward = rewards - costs
        info = {
            'height': height_after,
            'max_height': self.max_height.copy(),
            'min_distance': self.min_distance,
            'goal': self.box_x,
        }
        return observation, reward, done, info
    def _get_obs(self):
        return np.append(super()._get_obs(), self.box_x)
    def reset(self):
        self.max_height = 0
        self.min_distance = 5000
        self.current_step = 0
        self.hopper_on_box = False
        if self.context:
            box_id = self.sim.model.body_name2id("box")
            self.box_x = np.random.uniform(1, 3, 1)
            self.sim.model.body_pos[box_id] = [self.box_x, 0, 0]
        return super().reset()
    # overwrite reset_model to make it deterministic
    def reset_model(self):
        noise_low = -self._reset_noise_scale
        noise_high = self._reset_noise_scale
        qpos = self.init_qpos  # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
        qvel = self.init_qvel  # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
        self.set_state(qpos, qvel)
        observation = self._get_obs()
        return observation
 if __name__ == '__main__':
    render_mode = "human"  # "human" or "partial" or "final"
    env = ALRHopperJumpOnBoxEnv()
    obs = env.reset()
    for i in range(2000):
        # objective.load_result("/tmp/cma")
        # test with random actions
        ac = env.action_space.sample()
        obs, rew, d, info = env.step(ac)
        if i % 10 == 0:
            env.render(mode=render_mode)
        if d:
            print('After ', i, ' steps, done: ', d)
            env.reset()
    env.close()
--- a/alr_envs/alr/mujoco/hopper_jump/mp_wrapper.py
+++ b/alr_envs/alr/mujoco/hopper_jump/mp_wrapper.py
@ -0,0 +1,31 @@
 from typing import Tuple, Union
 import numpy as np
 from mp_env_api import MPEnvWrapper
 class MPWrapper(MPEnvWrapper):
    @property
    def active_obs(self):
        return np.hstack([
            [False] * (5 + int(not self.exclude_current_positions_from_observation)),  # position
            [False] * 6,  # velocity
            [True]
        ])
    @property
    def current_pos(self) -> Union[float, int, np.ndarray]:
        return self.env.sim.data.qpos[3:6].copy()
    @property
    def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
        return self.env.sim.data.qvel[3:6].copy()
    @property
    def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
        raise ValueError("Goal position is not available and has to be learnt based on the environment.")
    @property
    def dt(self) -> Union[float, int]:
        return self.env.dt
--- a/alr_envs/alr/mujoco/hopper_throw/init.py
+++ b/alr_envs/alr/mujoco/hopper_throw/init.py
@ -0,0 +1 @@
 from .mp_wrapper import MPWrapper
--- a/alr_envs/alr/mujoco/hopper_throw/assets/hopper_throw.xml
+++ b/alr_envs/alr/mujoco/hopper_throw/assets/hopper_throw.xml
@ -0,0 +1,56 @@
 <mujoco model="hopper">
  <compiler angle="degree" coordinate="global" inertiafromgeom="true"/>
  <default>
    <joint armature="1" damping="1" limited="true"/>
    <geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1" solimp=".8 .8 .01" solref=".02 1"/>
    <motor ctrllimited="true" ctrlrange="-.4 .4"/>
  </default>
  <option integrator="RK4" timestep="0.002"/>
  <visual>
    <map znear="0.02"/>
  </visual>
  <worldbody>
    <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
    <geom conaffinity="1" condim="3" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 .125" type="plane" material="MatPlane"/>
    <body name="torso" pos="0 0 1.25">
      <camera name="track" mode="trackcom" pos="0 -3 1" xyaxes="1 0 0 0 0 1"/>
      <joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
      <joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" ref="1.25" stiffness="0" type="slide"/>
      <joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 1.25" stiffness="0" type="hinge"/>
      <geom friction="0.9" fromto="0 0 1.45 0 0 1.05" name="torso_geom" size="0.05" type="capsule"/>
      <body name="thigh" pos="0 0 1.05">
        <joint axis="0 -1 0" name="thigh_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
        <geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_geom" size="0.05" type="capsule"/>
        <body name="leg" pos="0 0 0.35">
          <joint axis="0 -1 0" name="leg_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
          <geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_geom" size="0.04" type="capsule"/>
          <body name="foot" pos="0.13/2 0 0.1">
            <joint axis="0 -1 0" name="foot_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
            <geom friction="2.0" fromto="-0.13 0 0.1 0.26 0 0.1" name="foot_geom" size="0.06" type="capsule"/>
          </body>
        </body>
      </body>
    </body>
    <body name="ball" pos="0 0 1.53">
          <joint armature="0" axis="1 0 0" damping="0.0" name="tar:x" pos="0 0 1.53" stiffness="0" type="slide" frictionloss="0" limited="false"/>
          <joint armature="0" axis="0 1 0" damping="0.0" name="tar:y" pos="0 0 1.53" stiffness="0" type="slide" frictionloss="0" limited="false"/>
          <joint armature="0" axis="0 0 1" damping="0.0" name="tar:z" pos="0 0 1.53" stiffness="0" type="slide" frictionloss="0" limited="false"/>
          <geom pos="0 0 1.53" priority= "1" size="0.025 0.025 0.025" type="sphere" condim="4" name="ball_geom" rgba="0.8 0.2 0.1 1" mass="0.1"
                friction="0.1 0.1 0.1" solimp="0.9 0.95 0.001 0.5 2" solref="-10000 -10"/>
          <site name="target_ball" pos="0 0 1.53" size="0.04 0.04 0.04" rgba="1 0 0 1" type="sphere"/>
    </body>
  </worldbody>
  <actuator>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="thigh_joint"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="leg_joint"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="foot_joint"/>
  </actuator>
    <asset>
        <texture type="skybox" builtin="gradient" rgb1=".4 .5 .6" rgb2="0 0 0"
            width="100" height="100"/>
        <texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
        <texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
        <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
        <material name="geom" texture="texgeom" texuniform="true"/>
    </asset>
 </mujoco>
--- a/alr_envs/alr/mujoco/hopper_throw/assets/hopper_throw_in_basket.xml
+++ b/alr_envs/alr/mujoco/hopper_throw/assets/hopper_throw_in_basket.xml
@ -0,0 +1,132 @@
 <mujoco model="hopper">
  <compiler angle="degree" coordinate="global" inertiafromgeom="true"/>
  <default>
    <joint armature="1" damping="1" limited="true"/>
    <geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1" solimp=".8 .8 .01" solref=".02 1"/>
    <motor ctrllimited="true" ctrlrange="-.4 .4"/>
  </default>
  <option integrator="RK4" timestep="0.002"/>
  <visual>
    <map znear="0.02"/>
  </visual>
  <worldbody>
    <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
    <geom conaffinity="1" condim="3" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 .125" type="plane" material="MatPlane"/>
    <body name="torso" pos="0 0 1.25">
      <camera name="track" mode="trackcom" pos="0 -3 1" xyaxes="1 0 0 0 0 1"/>
      <joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
      <joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" ref="1.25" stiffness="0" type="slide"/>
      <joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 1.25" stiffness="0" type="hinge"/>
      <geom friction="0.9" fromto="0 0 1.45 0 0 1.05" name="torso_geom" size="0.05" type="capsule"/>
      <body name="thigh" pos="0 0 1.05">
        <joint axis="0 -1 0" name="thigh_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
        <geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_geom" size="0.05" type="capsule"/>
        <body name="leg" pos="0 0 0.35">
          <joint axis="0 -1 0" name="leg_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
          <geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_geom" size="0.04" type="capsule"/>
          <body name="foot" pos="0.13/2 0 0.1">
            <joint axis="0 -1 0" name="foot_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
            <geom friction="2.0" fromto="-0.13 0 0.1 0.26 0 0.1" name="foot_geom" size="0.06" type="capsule"/>
          </body>
        </body>
      </body>
    </body>
    <body name="ball" pos="0 0 1.53">
          <joint armature="0" axis="1 0 0" damping="0.0" name="tar:x" pos="0 0 1.53" stiffness="0" type="slide" frictionloss="0" limited="false"/>
          <joint armature="0" axis="0 1 0" damping="0.0" name="tar:y" pos="0 0 1.53" stiffness="0" type="slide" frictionloss="0" limited="false"/>
          <joint armature="0" axis="0 0 1" damping="0.0" name="tar:z" pos="0 0 1.53" stiffness="0" type="slide" frictionloss="0" limited="false"/>
          <geom pos="0 0 1.53" priority= "1" size="0.025 0.025 0.025" type="sphere" condim="4" name="ball_geom" rgba="0.8 0.2 0.1 1" mass="0.1"
                friction="0.1 0.1 0.1" solimp="0.9 0.95 0.001 0.5 2" solref="-10000 -10"/>
          <site name="target_ball" pos="0 0 1.53" size="0.04 0.04 0.04" rgba="1 0 0 1" type="sphere"/>
    </body>
    <body name="basket_ground" pos="5 0 0">
        <geom friction="0.9" fromto="5 0 0 5.3 0 0" name="basket_ground_geom" size="0.1 0.4 0.3" type="box"/>
        <body name="edge1" pos="5 0 0">
          <geom friction="2.0" fromto="5 0 0 5 0 0.2" name="edge1_geom" size="0.04" type="capsule"/>
        </body>
        <body name="edge2" pos="5 0 0.05">
          <geom friction="2.0" fromto="5 0.05 0 5 0.05 0.2" name="edge2_geom" size="0.04" type="capsule"/>
        </body>
        <body name="edge3" pos="5 0 0.1">
          <geom friction="2.0" fromto="5 0.1 0 5 0.1 0.2" name="edge3_geom" size="0.04" type="capsule"/>
        </body>
        <body name="edge4" pos="5 0 0.15">
          <geom friction="2.0" fromto="5 0.15 0 5 0.15 0.2" name="edge4_geom" size="0.04" type="capsule"/>
        </body>
        <body name="edge5" pos="5.05 0 0.15">
          <geom friction="2.0" fromto="5.05 0.15 0 5.05 0.15 0.2" name="edge5_geom" size="0.04" type="capsule"/>
        </body>
        <body name="edge6" pos="5.1 0 0.15">
          <geom friction="2.0" fromto="5.1 0.15 0 5.1 0.15 0.2" name="edge6_geom" size="0.04" type="capsule"/>
        </body>
        <body name="edge7" pos="5.15 0 0.15">
          <geom friction="2.0" fromto="5.15 0.15 0 5.15 0.15 0.2" name="edge7_geom" size="0.04" type="capsule"/>
        </body>
        <body name="edge8" pos="5.2 0 0.15">
          <geom friction="2.0" fromto="5.2 0.15 0 5.2 0.15 0.2" name="edge8_geom" size="0.04" type="capsule"/>
        </body>
         <body name="edge9" pos="5.25 0 0.15">
          <geom friction="2.0" fromto="5.25 0.15 0 5.25 0.15 0.2" name="edge9_geom" size="0.04" type="capsule"/>
        </body>
        <body name="edge10" pos="5.3 0 0.15">
          <geom friction="2.0" fromto="5.3 0.15 0 5.3 0.15 0.2" name="edge10_geom" size="0.04" type="capsule"/>
        </body>
        <body name="edge11" pos="5.3 0 0.1">
          <geom friction="2.0" fromto="5.3 0.1 0 5.3 0.1 0.2" name="edge11_geom" size="0.04" type="capsule"/>
        </body>
        <body name="edge12" pos="5.3 0 0.05">
          <geom friction="2.0" fromto="5.3 0.05 0 5.3 0.05 0.2" name="edge12_geom" size="0.04" type="capsule"/>
        </body>
        <body name="edge13" pos="5.3 0 0.0">
          <geom friction="2.0" fromto="5.3 0 0 5.3 0 0.2" name="edge13_geom" size="0.04" type="capsule"/>
        </body>
        <body name="edge14" pos="5.3 0 -0.05">
          <geom friction="2.0" fromto="5.3 -0.05 0 5.3 -0.05 0.2" name="edge14_geom" size="0.04" type="capsule"/>
        </body>
        <body name="edge15" pos="5.3 0 -0.1">
          <geom friction="2.0" fromto="5.3 -0.1 0 5.3 -0.1 0.2" name="edge15_geom" size="0.04" type="capsule"/>
        </body>
        <body name="edge16" pos="5.3 0 -0.15">
          <geom friction="2.0" fromto="5.3 -0.15 0 5.3 -0.15 0.2" name="edge16_geom" size="0.04" type="capsule"/>
        </body>
        <body name="edge20" pos="5.25 0 -0.15">
          <geom friction="2.0" fromto="5.25 -0.15 0 5.25 -0.15 0.2" name="edge20_geom" size="0.04" type="capsule"/>
        </body>
        <body name="edge21" pos="5.2 0 -0.15">
          <geom friction="2.0" fromto="5.2 -0.15 0 5.2 -0.15 0.2" name="edge21_geom" size="0.04" type="capsule"/>
        </body>
        <body name="edge22" pos="5.15 0 -0.15">
          <geom friction="2.0" fromto="5.15 -0.15 0 5.15 -0.15 0.2" name="edge22_geom" size="0.04" type="capsule"/>
        </body>
        <body name="edge23" pos="5.1 0 -0.15">
          <geom friction="2.0" fromto="5.1 -0.15 0 5.1 -0.15 0.2" name="edge23_geom" size="0.04" type="capsule"/>
        </body>
        <body name="edge24" pos="5.05 0 -0.15">
          <geom friction="2.0" fromto="5.05 -0.15 0 5.05 -0.15 0.2" name="edge24_geom" size="0.04" type="capsule"/>
        </body>
        <body name="edge25" pos="5 0 -0.15">
          <geom friction="2.0" fromto="5 -0.15 0 5 -0.15 0.2" name="edge25_geom" size="0.04" type="capsule"/>
        </body>
        <body name="edge26" pos="5 0 -0.1">
          <geom friction="2.0" fromto="5 -0.1 0 5 -0.1 0.2" name="edge26_geom" size="0.04" type="capsule"/>
        </body>
        <body name="edge27" pos="5 0 -0.05">
          <geom friction="2.0" fromto="5 -0.05 0 5 -0.05 0.2" name="edge27_geom" size="0.04" type="capsule"/>
        </body>
    </body>
  </worldbody>
  <actuator>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="thigh_joint"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="leg_joint"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="foot_joint"/>
  </actuator>
    <asset>
        <texture type="skybox" builtin="gradient" rgb1=".4 .5 .6" rgb2="0 0 0"
            width="100" height="100"/>
        <texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
        <texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
        <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
        <material name="geom" texture="texgeom" texuniform="true"/>
    </asset>
 </mujoco>
--- a/alr_envs/alr/mujoco/hopper_throw/hopper_throw.py
+++ b/alr_envs/alr/mujoco/hopper_throw/hopper_throw.py
@ -0,0 +1,113 @@
 import os
 from gym.envs.mujoco.hopper_v3 import HopperEnv
 import numpy as np
 MAX_EPISODE_STEPS_HOPPERTHROW = 250
 class ALRHopperThrowEnv(HopperEnv):
    """
    Initialization changes to normal Hopper:
    - healthy_reward: 1.0 -> 0.0 -> 0.1
    - forward_reward_weight -> 5.0
    - healthy_angle_range: (-0.2, 0.2) -> (-float('inf'), float('inf'))
    Reward changes to normal Hopper:
    - velocity: (x_position_after - x_position_before) -> self.get_body_com("ball")[0]
    """
    def __init__(self,
                 xml_file='hopper_throw.xml',
                 forward_reward_weight=5.0,
                 ctrl_cost_weight=1e-3,
                 healthy_reward=0.1,
                 terminate_when_unhealthy=True,
                 healthy_state_range=(-100.0, 100.0),
                 healthy_z_range=(0.7, float('inf')),
                 healthy_angle_range=(-float('inf'), float('inf')),
                 reset_noise_scale=5e-3,
                 context = True,
                 exclude_current_positions_from_observation=True,
                 max_episode_steps=250):
        xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
        self.current_step = 0
        self.max_episode_steps = max_episode_steps
        self.context = context
        self.goal = 0
        super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
                         healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
                         exclude_current_positions_from_observation)
    def step(self, action):
        self.current_step += 1
        self.do_simulation(action, self.frame_skip)
        ball_pos_after = self.get_body_com("ball")[0] #abs(self.get_body_com("ball")[0]) # use x and y to get point and use euclid distance as reward?
        ball_pos_after_y = self.get_body_com("ball")[2]
        # done = self.done TODO We should use this, not sure why there is no other termination; ball_landed should be enough, because we only look at the throw itself? - Paul and Marc
        ball_landed = self.get_body_com("ball")[2] <= 0.05
        done = ball_landed
        ctrl_cost = self.control_cost(action)
        costs = ctrl_cost
        rewards = 0
        if self.current_step >= self.max_episode_steps or done:
            distance_reward = -np.linalg.norm(ball_pos_after - self.goal) if self.context else \
                                                                            self._forward_reward_weight * ball_pos_after
            healthy_reward = 0 if self.context else self.healthy_reward * self.current_step
            rewards = distance_reward + healthy_reward
        observation = self._get_obs()
        reward = rewards - costs
        info = {
            'ball_pos': ball_pos_after,
            'ball_pos_y': ball_pos_after_y,
            'current_step' : self.current_step,
            'goal' : self.goal,
        }
        return observation, reward, done, info
    def _get_obs(self):
        return np.append(super()._get_obs(), self.goal)
    def reset(self):
        self.current_step = 0
        self.goal = self.goal = np.random.uniform(2.0, 6.0, 1) # 0.5 8.0
        return super().reset()
    # overwrite reset_model to make it deterministic
    def reset_model(self):
        noise_low = -self._reset_noise_scale
        noise_high = self._reset_noise_scale
        qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
        qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
        self.set_state(qpos, qvel)
        observation = self._get_obs()
        return observation
 if __name__ == '__main__':
    render_mode = "human"  # "human" or "partial" or "final"
    env = ALRHopperThrowEnv()
    obs = env.reset()
    for i in range(2000):
        # objective.load_result("/tmp/cma")
        # test with random actions
        ac = env.action_space.sample()
        obs, rew, d, info = env.step(ac)
        if i % 10 == 0:
            env.render(mode=render_mode)
        if d:
            print('After ', i, ' steps, done: ', d)
            env.reset()
    env.close()
--- a/alr_envs/alr/mujoco/hopper_throw/hopper_throw_in_basket.py
+++ b/alr_envs/alr/mujoco/hopper_throw/hopper_throw_in_basket.py
@ -0,0 +1,143 @@
 import os
 from gym.envs.mujoco.hopper_v3 import HopperEnv
 import numpy as np
 MAX_EPISODE_STEPS_HOPPERTHROWINBASKET = 250
 class ALRHopperThrowInBasketEnv(HopperEnv):
    """
    Initialization changes to normal Hopper:
    - healthy_reward: 1.0 -> 0.0
    - healthy_angle_range: (-0.2, 0.2) -> (-float('inf'), float('inf'))
    - hit_basket_reward: - -> 10
    Reward changes to normal Hopper:
    - velocity: (x_position_after - x_position_before) -> (ball_position_after - ball_position_before)
    """
    def __init__(self,
                 xml_file='hopper_throw_in_basket.xml',
                 forward_reward_weight=1.0,
                 ctrl_cost_weight=1e-3,
                 healthy_reward=0.0,
                 hit_basket_reward=10,
                 basket_size=0.3,
                 terminate_when_unhealthy=True,
                 healthy_state_range=(-100.0, 100.0),
                 healthy_z_range=(0.7, float('inf')),
                 healthy_angle_range=(-float('inf'), float('inf')),
                 reset_noise_scale=5e-3,
                 context=True,
                 penalty=0.0,
                 exclude_current_positions_from_observation=True,
                 max_episode_steps = 250):
        self.hit_basket_reward = hit_basket_reward
        self.current_step = 0
        self.max_episode_steps = max_episode_steps
        self.ball_in_basket = False
        self.basket_size = basket_size
        self.context = context
        self.penalty = penalty
        self.basket_x = 5
        xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
        super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
                         healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
                         exclude_current_positions_from_observation)
    def step(self, action):
        self.current_step += 1
        self.do_simulation(action, self.frame_skip)
        ball_pos = self.get_body_com("ball")
        basket_pos = self.get_body_com("basket_ground")
        basket_center = (basket_pos[0] + 0.5, basket_pos[1], basket_pos[2])
        is_in_basket_x = ball_pos[0] >= basket_pos[0] and ball_pos[0] <= basket_pos[0] + self.basket_size
        is_in_basket_y = ball_pos[1] >= basket_pos[1] - (self.basket_size/2) and ball_pos[1] <= basket_pos[1] + (self.basket_size/2)
        is_in_basket_z = ball_pos[2] < 0.1
        is_in_basket = is_in_basket_x and is_in_basket_y and is_in_basket_z
        if is_in_basket: self.ball_in_basket = True
        ball_landed = self.get_body_com("ball")[2] <= 0.05
        done = ball_landed or is_in_basket
        rewards = 0
        ctrl_cost = self.control_cost(action)
        costs = ctrl_cost
        if self.current_step >= self.max_episode_steps or done:
            if is_in_basket:
                if not self.context:
                    rewards += self.hit_basket_reward
            else:
                dist = np.linalg.norm(ball_pos-basket_center)
                if self.context:
                    rewards = -10 * dist
                else:
                    rewards -= (dist*dist)
        else:
            # penalty not needed
            rewards += ((action[:2] > 0) * self.penalty).sum() if self.current_step < 10 else 0 #too much of a penalty?
        observation = self._get_obs()
        reward = rewards - costs
        info = {
            'ball_pos': ball_pos[0],
        }
        return observation, reward, done, info
    def _get_obs(self):
        return np.append(super()._get_obs(), self.basket_x)
    def reset(self):
        if self.max_episode_steps == 10:
            # We have to initialize this here, because the spec is only added after creating the env.
            self.max_episode_steps = self.spec.max_episode_steps
        self.current_step = 0
        self.ball_in_basket = False
        if self.context:
            basket_id = self.sim.model.body_name2id("basket_ground")
            self.basket_x = np.random.uniform(3, 7, 1)
            self.sim.model.body_pos[basket_id] = [self.basket_x, 0, 0]
        return super().reset()
    # overwrite reset_model to make it deterministic
    def reset_model(self):
        noise_low = -self._reset_noise_scale
        noise_high = self._reset_noise_scale
        qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
        qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
        self.set_state(qpos, qvel)
        observation = self._get_obs()
        return observation
 if __name__ == '__main__':
    render_mode = "human"  # "human" or "partial" or "final"
    env = ALRHopperThrowInBasketEnv()
    obs = env.reset()
    for i in range(2000):
        # objective.load_result("/tmp/cma")
        # test with random actions
        ac = env.action_space.sample()
        obs, rew, d, info = env.step(ac)
        if i % 10 == 0:
            env.render(mode=render_mode)
        if d:
            print('After ', i, ' steps, done: ', d)
            env.reset()
    env.close()
--- a/alr_envs/alr/mujoco/hopper_throw/mp_wrapper.py
+++ b/alr_envs/alr/mujoco/hopper_throw/mp_wrapper.py
@ -0,0 +1,30 @@
 from typing import Tuple, Union
 import numpy as np
 from mp_env_api import MPEnvWrapper
 class MPWrapper(MPEnvWrapper):
    @property
    def active_obs(self):
        return np.hstack([
            [False] * 17,
            [True] # goal pos
        ])
    @property
    def current_pos(self) -> Union[float, int, np.ndarray]:
        return self.env.sim.data.qpos[3:6].copy()
    @property
    def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
        return self.env.sim.data.qvel[3:6].copy()
    @property
    def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
        raise ValueError("Goal position is not available and has to be learnt based on the environment.")
    @property
    def dt(self) -> Union[float, int]:
        return self.env.dt
--- a/alr_envs/alr/mujoco/walker_2d_jump/init.py
+++ b/alr_envs/alr/mujoco/walker_2d_jump/init.py
@ -0,0 +1 @@
 from .mp_wrapper import MPWrapper
--- a/alr_envs/alr/mujoco/walker_2d_jump/assets/walker2d.xml
+++ b/alr_envs/alr/mujoco/walker_2d_jump/assets/walker2d.xml
@ -0,0 +1,62 @@
 <mujoco model="walker2d">
  <compiler angle="degree" coordinate="global" inertiafromgeom="true"/>
  <default>
    <joint armature="0.01" damping=".1" limited="true"/>
    <geom conaffinity="0" condim="3" contype="1" density="1000" friction=".7 .1 .1" rgba="0.8 0.6 .4 1"/>
  </default>
  <option integrator="RK4" timestep="0.002"/>
  <worldbody>
    <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
    <geom conaffinity="1" condim="3" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane" material="MatPlane"/>
    <body name="torso" pos="0 0 1.25">
      <camera name="track" mode="trackcom" pos="0 -3 1" xyaxes="1 0 0 0 0 1"/>
      <joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
      <joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" ref="1.25" stiffness="0" type="slide"/>
      <joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 1.25" stiffness="0" type="hinge"/>
      <geom friction="0.9" fromto="0 0 1.45 0 0 1.05" name="torso_geom" size="0.05" type="capsule"/>
      <body name="thigh" pos="0 0 1.05">
        <joint axis="0 -1 0" name="thigh_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
        <geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_geom" size="0.05" type="capsule"/>
        <body name="leg" pos="0 0 0.35">
          <joint axis="0 -1 0" name="leg_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
          <geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_geom" size="0.04" type="capsule"/>
          <body name="foot" pos="0.2/2 0 0.1">
            <joint axis="0 -1 0" name="foot_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
            <geom friction="0.9" fromto="-0.0 0 0.1 0.2 0 0.1" name="foot_geom" size="0.06" type="capsule"/>
          </body>
        </body>
      </body>
      <!-- copied and then replace thigh->thigh_left, leg->leg_left, foot->foot_right -->
      <body name="thigh_left" pos="0 0 1.05">
        <joint axis="0 -1 0" name="thigh_left_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
        <geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_left_geom" rgba=".7 .3 .6 1" size="0.05" type="capsule"/>
        <body name="leg_left" pos="0 0 0.35">
          <joint axis="0 -1 0" name="leg_left_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
          <geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_left_geom" rgba=".7 .3 .6 1" size="0.04" type="capsule"/>
          <body name="foot_left" pos="0.2/2 0 0.1">
            <joint axis="0 -1 0" name="foot_left_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
            <geom friction="1.9" fromto="-0.0 0 0.1 0.2 0 0.1" name="foot_left_geom" rgba=".7 .3 .6 1" size="0.06" type="capsule"/>
          </body>
        </body>
      </body>
    </body>
  </worldbody>
  <actuator>
    <!-- <motor joint="torso_joint" ctrlrange="-100.0 100.0" isctrllimited="true"/>-->
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="thigh_joint"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="leg_joint"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="foot_joint"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="thigh_left_joint"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="leg_left_joint"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="foot_left_joint"/>
    <!-- <motor joint="finger2_rot" ctrlrange="-20.0 20.0" isctrllimited="true"/>-->
  </actuator>
    <asset>
        <texture type="skybox" builtin="gradient" rgb1=".4 .5 .6" rgb2="0 0 0"
            width="100" height="100"/>
        <texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
        <texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
        <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
        <material name="geom" texture="texgeom" texuniform="true"/>
    </asset>
 </mujoco>
--- a/alr_envs/alr/mujoco/walker_2d_jump/mp_wrapper.py
+++ b/alr_envs/alr/mujoco/walker_2d_jump/mp_wrapper.py
@ -0,0 +1,30 @@
 from typing import Tuple, Union
 import numpy as np
 from mp_env_api import MPEnvWrapper
 class MPWrapper(MPEnvWrapper):
    @property
    def active_obs(self):
        return np.hstack([
            [False] * 17,
            [True] # goal pos
        ])
    @property
    def current_pos(self) -> Union[float, int, np.ndarray]:
        return self.env.sim.data.qpos[3:9].copy()
    @property
    def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
        return self.env.sim.data.qvel[3:9].copy()
    @property
    def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
        raise ValueError("Goal position is not available and has to be learnt based on the environment.")
    @property
    def dt(self) -> Union[float, int]:
        return self.env.dt
--- a/alr_envs/alr/mujoco/walker_2d_jump/walker_2d_jump.py
+++ b/alr_envs/alr/mujoco/walker_2d_jump/walker_2d_jump.py
@ -0,0 +1,113 @@
 import os
 from gym.envs.mujoco.walker2d_v3 import Walker2dEnv
 import numpy as np
 MAX_EPISODE_STEPS_WALKERJUMP = 300
 class ALRWalker2dJumpEnv(Walker2dEnv):
    """
    healthy reward 1.0 -> 0.005 -> 0.0025 not from alex
    penalty 10 -> 0 not from alex
    """
    def __init__(self,
                 xml_file='walker2d.xml',
                 forward_reward_weight=1.0,
                 ctrl_cost_weight=1e-3,
                 healthy_reward=0.0025,
                 terminate_when_unhealthy=True,
                 healthy_z_range=(0.8, 2.0),
                 healthy_angle_range=(-1.0, 1.0),
                 reset_noise_scale=5e-3,
                 penalty=0,
                 context=True,
                 exclude_current_positions_from_observation=True,
                 max_episode_steps=300):
        self.current_step = 0
        self.max_episode_steps = max_episode_steps
        self.max_height = 0
        self._penalty = penalty
        self.context = context
        self.goal = 0
        xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
        super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
                         healthy_z_range, healthy_angle_range, reset_noise_scale,
                         exclude_current_positions_from_observation)
    def step(self, action):
        self.current_step += 1
        self.do_simulation(action, self.frame_skip)
        #pos_after = self.get_body_com("torso")[0]
        height = self.get_body_com("torso")[2]
        self.max_height = max(height, self.max_height)
        fell_over = height < 0.2
        done = fell_over
        ctrl_cost = self.control_cost(action)
        costs = ctrl_cost
        if self.current_step >= self.max_episode_steps or done:
            done = True
            height_goal_distance = -10 * (np.linalg.norm(self.max_height - self.goal)) if self.context else self.max_height
            healthy_reward = self.healthy_reward * self.current_step
            rewards = height_goal_distance + healthy_reward
        else:
            # penalty not needed
            rewards = 0
            rewards += ((action[:2] > 0) * self._penalty).sum() if self.current_step < 4 else 0
            rewards += ((action[3:5] > 0) * self._penalty).sum() if self.current_step < 4 else 0
        observation = self._get_obs()
        reward = rewards - costs
        info = {
            'height': height,
            'max_height': self.max_height,
            'goal' : self.goal,
        }
        return observation, reward, done, info
    def _get_obs(self):
        return np.append(super()._get_obs(), self.goal)
    def reset(self):
        self.current_step = 0
        self.max_height = 0
        self.goal = np.random.uniform(1.5, 2.5, 1) # 1.5 3.0
        return super().reset()
    # overwrite reset_model to make it deterministic
    def reset_model(self):
        noise_low = -self._reset_noise_scale
        noise_high = self._reset_noise_scale
        qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
        qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
        self.set_state(qpos, qvel)
        observation = self._get_obs()
        return observation
 if __name__ == '__main__':
    render_mode = "human"  # "human" or "partial" or "final"
    env = ALRWalker2dJumpEnv()
    obs = env.reset()
    for i in range(2000):
        # objective.load_result("/tmp/cma")
        # test with random actions
        ac = env.action_space.sample()
        obs, rew, d, info = env.step(ac)
        if i % 10 == 0:
            env.render(mode=render_mode)
        if d:
            print('After ', i, ' steps, done: ', d)
            env.reset()
    env.close()