diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 38abbf1..e435fa6 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -11,6 +11,13 @@ from .mujoco.reacher.alr_reacher import ALRReacherEnv
from .mujoco.reacher.balancing import BalancingEnv
from alr_envs.alr.mujoco.table_tennis.tt_gym import MAX_EPISODE_STEPS
+from .mujoco.ant_jump.ant_jump import MAX_EPISODE_STEPS_ANTJUMP
+from .mujoco.half_cheetah_jump.half_cheetah_jump import MAX_EPISODE_STEPS_HALFCHEETAHJUMP
+from .mujoco.hopper_jump.hopper_jump import MAX_EPISODE_STEPS_HOPPERJUMP
+from .mujoco.hopper_jump.hopper_jump_on_box import MAX_EPISODE_STEPS_HOPPERJUMPONBOX
+from .mujoco.hopper_throw.hopper_throw import MAX_EPISODE_STEPS_HOPPERTHROW
+from .mujoco.hopper_throw.hopper_throw_in_basket import MAX_EPISODE_STEPS_HOPPERTHROWINBASKET
+from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
@@ -185,6 +192,69 @@ register(
}
)
+
+register(
+ id='ALRAntJump-v0',
+ entry_point='alr_envs.alr.mujoco:ALRAntJumpEnv',
+ max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP,
+ kwargs={
+ "max_episode_steps": MAX_EPISODE_STEPS_ANTJUMP
+ }
+)
+
+register(
+ id='ALRHalfCheetahJump-v0',
+ entry_point='alr_envs.alr.mujoco:ALRHalfCheetahJumpEnv',
+ max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP,
+ kwargs={
+ "max_episode_steps": MAX_EPISODE_STEPS_HALFCHEETAHJUMP
+ }
+)
+
+register(
+ id='ALRHopperJump-v0',
+ entry_point='alr_envs.alr.mujoco:ALRHopperJumpEnv',
+ max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
+ kwargs={
+ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP
+ }
+)
+
+register(
+ id='ALRHopperJumpOnBox-v0',
+ entry_point='alr_envs.alr.mujoco:ALRHopperJumpOnBoxEnv',
+ max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX,
+ kwargs={
+ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMPONBOX
+ }
+)
+
+register(
+ id='ALRHopperThrow-v0',
+ entry_point='alr_envs.alr.mujoco:ALRHopperThrowEnv',
+ max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW,
+ kwargs={
+ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROW
+ }
+)
+
+register(
+ id='ALRHopperThrowInBasket-v0',
+ entry_point='alr_envs.alr.mujoco:ALRHopperThrowInBasketEnv',
+ max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET,
+ kwargs={
+ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROWINBASKET
+ }
+)
+
+register(
+ id='ALRWalker2DJump-v0',
+ entry_point='alr_envs.alr.mujoco:ALRWalker2dJumpEnv',
+ max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP,
+ kwargs={
+ "max_episode_steps": MAX_EPISODE_STEPS_WALKERJUMP
+ }
+)
## Balancing Reacher
register(
@@ -430,4 +500,205 @@ for _v, cd in enumerate(ctxt_dim):
}
}
)
- ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
\ No newline at end of file
+ ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+
+
+## AntJump
+for _v, cd in enumerate(ctxt_dim):
+ _env_id = f'TableTennisProMP-v{_v}'
+ register(
+ id=_env_id,
+ entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+ kwargs={
+ "name": "alr_envs:TableTennis{}DCtxt-v0".format(cd),
+ "wrappers": [mujoco.table_tennis.MPWrapper],
+ "mp_kwargs": {
+ "num_dof": 7,
+ "num_basis": 2,
+ "duration": 1.25,
+ "post_traj_time": 1.5,
+ "policy_type": "motor",
+ "weights_scale": 1.0,
+ "zero_start": True,
+ "zero_goal": False,
+ "policy_kwargs": {
+ "p_gains": 0.5*np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]),
+ "d_gains": 0.5*np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1])
+ }
+ }
+ }
+ )
+ ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+
+
+register(
+ id='ALRAntJumpProMP-v0',
+ entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+ kwargs={
+ "name": "alr_envs:ALRAntJump-v0",
+ "wrappers": [mujoco.ant_jump.MPWrapper],
+ "mp_kwargs": {
+ "num_dof": 8,
+ "num_basis": 5,
+ "duration": 10,
+ "post_traj_time": 0,
+ "policy_type": "motor",
+ "weights_scale": 1.0,
+ "zero_start": True,
+ "zero_goal": False,
+ "policy_kwargs": {
+ "p_gains": np.ones(8),
+ "d_gains": 0.1*np.ones(8)
+ }
+ }
+ }
+)
+ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRAntJumpProMP-v0')
+
+register(
+ id='ALRHalfCheetahJumpProMP-v0',
+ entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+ kwargs={
+ "name": "alr_envs:ALRHalfCheetahJump-v0",
+ "wrappers": [mujoco.half_cheetah_jump.MPWrapper],
+ "mp_kwargs": {
+ "num_dof": 6,
+ "num_basis": 5,
+ "duration": 5,
+ "post_traj_time": 0,
+ "policy_type": "motor",
+ "weights_scale": 1.0,
+ "zero_start": True,
+ "zero_goal": False,
+ "policy_kwargs": {
+ "p_gains": np.ones(6),
+ "d_gains": 0.1*np.ones(6)
+ }
+ }
+ }
+)
+ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRHalfCheetahJumpProMP-v0')
+
+
+register(
+ id='ALRHopperJumpProMP-v0',
+ entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+ kwargs={
+ "name": "alr_envs:ALRHopperJump-v0",
+ "wrappers": [mujoco.hopper_jump.MPWrapper],
+ "mp_kwargs": {
+ "num_dof": 3,
+ "num_basis": 5,
+ "duration": 2,
+ "post_traj_time": 0,
+ "policy_type": "motor",
+ "weights_scale": 1.0,
+ "zero_start": True,
+ "zero_goal": False,
+ "policy_kwargs": {
+ "p_gains": np.ones(3),
+ "d_gains": 0.1*np.ones(3)
+ }
+ }
+ }
+)
+ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRHopperJumpProMP-v0')
+
+register(
+ id='ALRHopperJumpOnBoxProMP-v0',
+ entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+ kwargs={
+ "name": "alr_envs:ALRHopperJumpOnBox-v0",
+ "wrappers": [mujoco.hopper_jump.MPWrapper],
+ "mp_kwargs": {
+ "num_dof": 3,
+ "num_basis": 5,
+ "duration": 2,
+ "post_traj_time": 0,
+ "policy_type": "motor",
+ "weights_scale": 1.0,
+ "zero_start": True,
+ "zero_goal": False,
+ "policy_kwargs": {
+ "p_gains": np.ones(3),
+ "d_gains": 0.1*np.ones(3)
+ }
+ }
+ }
+)
+ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRHopperJumpOnBoxProMP-v0')
+
+
+register(
+ id='ALRHopperThrowProMP-v0',
+ entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+ kwargs={
+ "name": "alr_envs:ALRHopperThrow-v0",
+ "wrappers": [mujoco.hopper_throw.MPWrapper],
+ "mp_kwargs": {
+ "num_dof": 3,
+ "num_basis": 5,
+ "duration": 2,
+ "post_traj_time": 0,
+ "policy_type": "motor",
+ "weights_scale": 1.0,
+ "zero_start": True,
+ "zero_goal": False,
+ "policy_kwargs": {
+ "p_gains": np.ones(3),
+ "d_gains": 0.1*np.ones(3)
+ }
+ }
+ }
+)
+ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRHopperThrowProMP-v0')
+
+
+register(
+ id='ALRHopperThrowInBasketProMP-v0',
+ entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+ kwargs={
+ "name": "alr_envs:ALRHopperThrowInBasket-v0",
+ "wrappers": [mujoco.hopper_throw.MPWrapper],
+ "mp_kwargs": {
+ "num_dof": 3,
+ "num_basis": 5,
+ "duration": 2,
+ "post_traj_time": 0,
+ "policy_type": "motor",
+ "weights_scale": 1.0,
+ "zero_start": True,
+ "zero_goal": False,
+ "policy_kwargs": {
+ "p_gains": np.ones(3),
+ "d_gains": 0.1*np.ones(3)
+ }
+ }
+ }
+)
+ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRHopperThrowInBasketProMP-v0')
+
+
+register(
+ id='ALRWalker2DJumpProMP-v0',
+ entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+ kwargs={
+ "name": "alr_envs:ALRWalker2DJump-v0",
+ "wrappers": [mujoco.walker_2d_jump.MPWrapper],
+ "mp_kwargs": {
+ "num_dof": 6,
+ "num_basis": 5,
+ "duration": 2.4,
+ "post_traj_time": 0,
+ "policy_type": "motor",
+ "weights_scale": 1.0,
+ "zero_start": True,
+ "zero_goal": False,
+ "policy_kwargs": {
+ "p_gains": np.ones(6),
+ "d_gains": 0.1*np.ones(6)
+ }
+ }
+ }
+)
+ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRWalker2DJumpProMP-v0')
diff --git a/alr_envs/alr/mujoco/__init__.py b/alr_envs/alr/mujoco/__init__.py
index cdb3cde..8935db4 100644
--- a/alr_envs/alr/mujoco/__init__.py
+++ b/alr_envs/alr/mujoco/__init__.py
@@ -3,4 +3,11 @@ from .reacher.balancing import BalancingEnv
from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
from .table_tennis.tt_gym import TTEnvGym
-from .beerpong.beerpong import ALRBeerBongEnv
\ No newline at end of file
+from .beerpong.beerpong import ALRBeerBongEnv
+from .ant_jump.ant_jump import ALRAntJumpEnv
+from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
+from .hopper_jump.hopper_jump import ALRHopperJumpEnv
+from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv
+from .hopper_throw.hopper_throw import ALRHopperThrowEnv
+from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv
+from .walker_2d_jump.walker_2d_jump import ALRWalker2dJumpEnv
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/ant_jump/__init__.py b/alr_envs/alr/mujoco/ant_jump/__init__.py
new file mode 100644
index 0000000..c5e6d2f
--- /dev/null
+++ b/alr_envs/alr/mujoco/ant_jump/__init__.py
@@ -0,0 +1 @@
+from .mp_wrapper import MPWrapper
diff --git a/alr_envs/alr/mujoco/ant_jump/ant_jump.py b/alr_envs/alr/mujoco/ant_jump/ant_jump.py
new file mode 100644
index 0000000..09c623d
--- /dev/null
+++ b/alr_envs/alr/mujoco/ant_jump/ant_jump.py
@@ -0,0 +1,117 @@
+import numpy as np
+from gym.envs.mujoco.ant_v3 import AntEnv
+
+MAX_EPISODE_STEPS_ANTJUMP = 200
+
+
+class ALRAntJumpEnv(AntEnv):
+ """
+ Initialization changes to normal Ant:
+ - healthy_reward: 1.0 -> 0.01 -> 0.0 no healthy reward needed - Paul and Marc
+ - ctrl_cost_weight 0.5 -> 0.0
+ - contact_cost_weight: 5e-4 -> 0.0
+ - healthy_z_range: (0.2, 1.0) -> (0.3, float('inf')) !!!!! Does that make sense, limiting height?
+ """
+
+ def __init__(self,
+ xml_file='ant.xml',
+ ctrl_cost_weight=0.0,
+ contact_cost_weight=0.0,
+ healthy_reward=0.0,
+ terminate_when_unhealthy=True,
+ healthy_z_range=(0.3, float('inf')),
+ contact_force_range=(-1.0, 1.0),
+ reset_noise_scale=0.1,
+ context=True, # variable to decide if context is used or not
+ exclude_current_positions_from_observation=True,
+ max_episode_steps=200):
+ self.current_step = 0
+ self.max_height = 0
+ self.context = context
+ self.max_episode_steps = max_episode_steps
+ self.goal = 0 # goal when training with context
+ super().__init__(xml_file, ctrl_cost_weight, contact_cost_weight, healthy_reward, terminate_when_unhealthy,
+ healthy_z_range, contact_force_range, reset_noise_scale,
+ exclude_current_positions_from_observation)
+
+ def step(self, action):
+
+ self.current_step += 1
+ self.do_simulation(action, self.frame_skip)
+
+ height = self.get_body_com("torso")[2].copy()
+
+ self.max_height = max(height, self.max_height)
+
+ rewards = 0
+
+ ctrl_cost = self.control_cost(action)
+ contact_cost = self.contact_cost
+
+ costs = ctrl_cost + contact_cost
+
+ done = height < 0.3 # fall over -> is the 0.3 value from healthy_z_range? TODO change 0.3 to the value of healthy z angle
+
+ if self.current_step == self.max_episode_steps or done:
+ if self.context:
+ # -10 for scaling the value of the distance between the max_height and the goal height; only used when context is enabled
+ # height_reward = -10 * (np.linalg.norm(self.max_height - self.goal))
+ height_reward = -10*np.linalg.norm(self.max_height - self.goal)
+ # no healthy reward when using context, because we optimize a negative value
+ healthy_reward = 0
+ else:
+ height_reward = self.max_height - 0.7
+ healthy_reward = self.healthy_reward * self.current_step
+
+ rewards = height_reward + healthy_reward
+
+ obs = self._get_obs()
+ reward = rewards - costs
+
+ info = {
+ 'height': height,
+ 'max_height': self.max_height,
+ 'goal': self.goal
+ }
+
+ return obs, reward, done, info
+
+ def _get_obs(self):
+ return np.append(super()._get_obs(), self.goal)
+
+ def reset(self):
+ self.current_step = 0
+ self.max_height = 0
+ self.goal = np.random.uniform(1.0, 2.5,
+ 1) # goal heights from 1.0 to 2.5; can be increased, but didnt work well with CMORE
+ return super().reset()
+
+ # reset_model had to be implemented in every env to make it deterministic
+ def reset_model(self):
+ noise_low = -self._reset_noise_scale
+ noise_high = self._reset_noise_scale
+
+ qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
+ qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
+
+ self.set_state(qpos, qvel)
+
+ observation = self._get_obs()
+ return observation
+
+if __name__ == '__main__':
+ render_mode = "human" # "human" or "partial" or "final"
+ env = ALRAntJumpEnv()
+ obs = env.reset()
+
+ for i in range(2000):
+ # objective.load_result("/tmp/cma")
+ # test with random actions
+ ac = env.action_space.sample()
+ obs, rew, d, info = env.step(ac)
+ if i % 10 == 0:
+ env.render(mode=render_mode)
+ if d:
+ env.reset()
+
+ env.close()
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/ant_jump/assets/ant.xml b/alr_envs/alr/mujoco/ant_jump/assets/ant.xml
new file mode 100644
index 0000000..ee4d679
--- /dev/null
+++ b/alr_envs/alr/mujoco/ant_jump/assets/ant.xml
@@ -0,0 +1,81 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/alr_envs/alr/mujoco/ant_jump/mp_wrapper.py b/alr_envs/alr/mujoco/ant_jump/mp_wrapper.py
new file mode 100644
index 0000000..4967b64
--- /dev/null
+++ b/alr_envs/alr/mujoco/ant_jump/mp_wrapper.py
@@ -0,0 +1,31 @@
+from typing import Tuple, Union
+
+import numpy as np
+
+from mp_env_api import MPEnvWrapper
+
+
+class MPWrapper(MPEnvWrapper):
+
+ @property
+ def active_obs(self):
+ return np.hstack([
+ [False] * 111, # ant has 111 dimensional observation space !!
+ [True] # goal height
+ ])
+
+ @property
+ def current_pos(self) -> Union[float, int, np.ndarray]:
+ return self.env.sim.data.qpos[7:15].copy()
+
+ @property
+ def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.env.sim.data.qvel[6:14].copy()
+
+ @property
+ def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+ raise ValueError("Goal position is not available and has to be learnt based on the environment.")
+
+ @property
+ def dt(self) -> Union[float, int]:
+ return self.env.dt
diff --git a/alr_envs/alr/mujoco/half_cheetah_jump/__init__.py b/alr_envs/alr/mujoco/half_cheetah_jump/__init__.py
new file mode 100644
index 0000000..c5e6d2f
--- /dev/null
+++ b/alr_envs/alr/mujoco/half_cheetah_jump/__init__.py
@@ -0,0 +1 @@
+from .mp_wrapper import MPWrapper
diff --git a/alr_envs/alr/mujoco/half_cheetah_jump/assets/cheetah.xml b/alr_envs/alr/mujoco/half_cheetah_jump/assets/cheetah.xml
new file mode 100644
index 0000000..41daadf
--- /dev/null
+++ b/alr_envs/alr/mujoco/half_cheetah_jump/assets/cheetah.xml
@@ -0,0 +1,62 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/alr_envs/alr/mujoco/half_cheetah_jump/half_cheetah_jump.py b/alr_envs/alr/mujoco/half_cheetah_jump/half_cheetah_jump.py
new file mode 100644
index 0000000..4b53a5d
--- /dev/null
+++ b/alr_envs/alr/mujoco/half_cheetah_jump/half_cheetah_jump.py
@@ -0,0 +1,100 @@
+import os
+from gym.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv
+import numpy as np
+
+MAX_EPISODE_STEPS_HALFCHEETAHJUMP = 100
+
+
+class ALRHalfCheetahJumpEnv(HalfCheetahEnv):
+ """
+ ctrl_cost_weight 0.1 -> 0.0
+ """
+
+ def __init__(self,
+ xml_file='cheetah.xml',
+ forward_reward_weight=1.0,
+ ctrl_cost_weight=0.0,
+ reset_noise_scale=0.1,
+ context=True,
+ exclude_current_positions_from_observation=True,
+ max_episode_steps=100):
+ self.current_step = 0
+ self.max_height = 0
+ self.max_episode_steps = max_episode_steps
+ self.goal = 0
+ self.context = context
+ xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
+ super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, reset_noise_scale,
+ exclude_current_positions_from_observation)
+
+ def step(self, action):
+
+ self.current_step += 1
+ self.do_simulation(action, self.frame_skip)
+
+ height_after = self.get_body_com("torso")[2]
+ self.max_height = max(height_after, self.max_height)
+
+ ## Didnt use fell_over, because base env also has no done condition - Paul and Marc
+ # fell_over = abs(self.sim.data.qpos[2]) > 2.5 # how to figure out if the cheetah fell over? -> 2.5 oke?
+ # TODO: Should a fall over be checked herE?
+ done = False
+
+ ctrl_cost = self.control_cost(action)
+ costs = ctrl_cost
+
+ if self.current_step == self.max_episode_steps:
+ height_goal_distance = -10*np.linalg.norm(self.max_height - self.goal) + 1e-8 if self.context \
+ else self.max_height
+ rewards = self._forward_reward_weight * height_goal_distance
+ else:
+ rewards = 0
+
+ observation = self._get_obs()
+ reward = rewards - costs
+ info = {
+ 'height': height_after,
+ 'max_height': self.max_height
+ }
+
+ return observation, reward, done, info
+
+ def _get_obs(self):
+ return np.append(super()._get_obs(), self.goal)
+
+ def reset(self):
+ self.max_height = 0
+ self.current_step = 0
+ self.goal = np.random.uniform(1.1, 1.6, 1) # 1.1 1.6
+ return super().reset()
+
+ # overwrite reset_model to make it deterministic
+ def reset_model(self):
+ noise_low = -self._reset_noise_scale
+ noise_high = self._reset_noise_scale
+
+ qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
+ qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
+
+ self.set_state(qpos, qvel)
+
+ observation = self._get_obs()
+ return observation
+
+if __name__ == '__main__':
+ render_mode = "human" # "human" or "partial" or "final"
+ env = ALRHalfCheetahJumpEnv()
+ obs = env.reset()
+
+ for i in range(2000):
+ # objective.load_result("/tmp/cma")
+ # test with random actions
+ ac = env.action_space.sample()
+ obs, rew, d, info = env.step(ac)
+ if i % 10 == 0:
+ env.render(mode=render_mode)
+ if d:
+ print('After ', i, ' steps, done: ', d)
+ env.reset()
+
+ env.close()
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/half_cheetah_jump/mp_wrapper.py b/alr_envs/alr/mujoco/half_cheetah_jump/mp_wrapper.py
new file mode 100644
index 0000000..6179b07
--- /dev/null
+++ b/alr_envs/alr/mujoco/half_cheetah_jump/mp_wrapper.py
@@ -0,0 +1,30 @@
+from typing import Tuple, Union
+
+import numpy as np
+
+from mp_env_api import MPEnvWrapper
+
+
+class MPWrapper(MPEnvWrapper):
+ @property
+ def active_obs(self):
+ return np.hstack([
+ [False] * 17,
+ [True] # goal height
+ ])
+
+ @property
+ def current_pos(self) -> Union[float, int, np.ndarray]:
+ return self.env.sim.data.qpos[3:9].copy()
+
+ @property
+ def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.env.sim.data.qvel[3:9].copy()
+
+ @property
+ def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+ raise ValueError("Goal position is not available and has to be learnt based on the environment.")
+
+ @property
+ def dt(self) -> Union[float, int]:
+ return self.env.dt
diff --git a/alr_envs/alr/mujoco/hopper_jump/__init__.py b/alr_envs/alr/mujoco/hopper_jump/__init__.py
new file mode 100644
index 0000000..c5e6d2f
--- /dev/null
+++ b/alr_envs/alr/mujoco/hopper_jump/__init__.py
@@ -0,0 +1 @@
+from .mp_wrapper import MPWrapper
diff --git a/alr_envs/alr/mujoco/hopper_jump/assets/hopper.xml b/alr_envs/alr/mujoco/hopper_jump/assets/hopper.xml
new file mode 100644
index 0000000..f18bc46
--- /dev/null
+++ b/alr_envs/alr/mujoco/hopper_jump/assets/hopper.xml
@@ -0,0 +1,48 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/alr_envs/alr/mujoco/hopper_jump/assets/hopper_jump_on_box.xml b/alr_envs/alr/mujoco/hopper_jump/assets/hopper_jump_on_box.xml
new file mode 100644
index 0000000..69d78ff
--- /dev/null
+++ b/alr_envs/alr/mujoco/hopper_jump/assets/hopper_jump_on_box.xml
@@ -0,0 +1,51 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
new file mode 100644
index 0000000..7ce8196
--- /dev/null
+++ b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
@@ -0,0 +1,110 @@
+from gym.envs.mujoco.hopper_v3 import HopperEnv
+import numpy as np
+
+MAX_EPISODE_STEPS_HOPPERJUMP = 250
+
+
+class ALRHopperJumpEnv(HopperEnv):
+ """
+ Initialization changes to normal Hopper:
+ - healthy_reward: 1.0 -> 0.1 -> 0
+ - healthy_angle_range: (-0.2, 0.2) -> (-float('inf'), float('inf'))
+ - healthy_z_range: (0.7, float('inf')) -> (0.5, float('inf'))
+
+ """
+
+ def __init__(self,
+ xml_file='hopper.xml',
+ forward_reward_weight=1.0,
+ ctrl_cost_weight=1e-3,
+ healthy_reward=0.0,
+ penalty=0.0,
+ context=True,
+ terminate_when_unhealthy=True,
+ healthy_state_range=(-100.0, 100.0),
+ healthy_z_range=(0.5, float('inf')),
+ healthy_angle_range=(-float('inf'), float('inf')),
+ reset_noise_scale=5e-3,
+ exclude_current_positions_from_observation=True,
+ max_episode_steps=250):
+ self.current_step = 0
+ self.max_height = 0
+ self.max_episode_steps = max_episode_steps
+ self.penalty = penalty
+ self.goal = 0
+ self.context = context
+ self.exclude_current_positions_from_observation = exclude_current_positions_from_observation
+ super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
+ healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
+ exclude_current_positions_from_observation)
+
+ def step(self, action):
+
+ self.current_step += 1
+ self.do_simulation(action, self.frame_skip)
+ height_after = self.get_body_com("torso")[2]
+ self.max_height = max(height_after, self.max_height)
+
+ ctrl_cost = self.control_cost(action)
+ costs = ctrl_cost
+ done = False
+
+ if self.current_step >= self.max_episode_steps:
+ hight_goal_distance = -10*np.linalg.norm(self.max_height - self.goal) if self.context else self.max_height
+ healthy_reward = 0 if self.context else self.healthy_reward * self.current_step
+ height_reward = self._forward_reward_weight * hight_goal_distance # maybe move reward calculation into if structure and define two different _forward_reward_weight variables for context and episodic seperatley
+ rewards = height_reward + healthy_reward
+
+ else:
+ # penalty for wrong start direction of first two joints; not needed, could be removed
+ rewards = ((action[:2] > 0) * self.penalty).sum() if self.current_step < 10 else 0
+
+ observation = self._get_obs()
+ reward = rewards - costs
+ info = {
+ 'height' : height_after,
+ 'max_height': self.max_height,
+ 'goal' : self.goal
+ }
+
+ return observation, reward, done, info
+
+ def _get_obs(self):
+ return np.append(super()._get_obs(), self.goal)
+
+ def reset(self):
+ self.goal = np.random.uniform(1.4, 2.3, 1) # 1.3 2.3
+ self.max_height = 0
+ self.current_step = 0
+ return super().reset()
+
+ # overwrite reset_model to make it deterministic
+ def reset_model(self):
+ noise_low = -self._reset_noise_scale
+ noise_high = self._reset_noise_scale
+
+ qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
+ qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
+
+ self.set_state(qpos, qvel)
+
+ observation = self._get_obs()
+ return observation
+
+if __name__ == '__main__':
+ render_mode = "human" # "human" or "partial" or "final"
+ env = ALRHopperJumpEnv()
+ obs = env.reset()
+
+ for i in range(2000):
+ # objective.load_result("/tmp/cma")
+ # test with random actions
+ ac = env.action_space.sample()
+ obs, rew, d, info = env.step(ac)
+ if i % 10 == 0:
+ env.render(mode=render_mode)
+ if d:
+ print('After ', i, ' steps, done: ', d)
+ env.reset()
+
+ env.close()
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/hopper_jump/hopper_jump_on_box.py b/alr_envs/alr/mujoco/hopper_jump/hopper_jump_on_box.py
new file mode 100644
index 0000000..e5e363e
--- /dev/null
+++ b/alr_envs/alr/mujoco/hopper_jump/hopper_jump_on_box.py
@@ -0,0 +1,170 @@
+from gym.envs.mujoco.hopper_v3 import HopperEnv
+
+import numpy as np
+import os
+
+MAX_EPISODE_STEPS_HOPPERJUMPONBOX = 250
+
+
+class ALRHopperJumpOnBoxEnv(HopperEnv):
+ """
+ Initialization changes to normal Hopper:
+ - healthy_reward: 1.0 -> 0.01 -> 0.001
+ - healthy_angle_range: (-0.2, 0.2) -> (-float('inf'), float('inf'))
+ """
+
+ def __init__(self,
+ xml_file='hopper_jump_on_box.xml',
+ forward_reward_weight=1.0,
+ ctrl_cost_weight=1e-3,
+ healthy_reward=0.001,
+ terminate_when_unhealthy=True,
+ healthy_state_range=(-100.0, 100.0),
+ healthy_z_range=(0.7, float('inf')),
+ healthy_angle_range=(-float('inf'), float('inf')),
+ reset_noise_scale=5e-3,
+ context=True,
+ exclude_current_positions_from_observation=True,
+ max_episode_steps=250):
+ self.current_step = 0
+ self.max_height = 0
+ self.max_episode_steps = max_episode_steps
+ self.min_distance = 5000 # what value?
+ self.hopper_on_box = False
+ self.context = context
+ self.box_x = 1
+ xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
+ super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
+ healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
+ exclude_current_positions_from_observation)
+
+ def step(self, action):
+
+ self.current_step += 1
+ self.do_simulation(action, self.frame_skip)
+ height_after = self.get_body_com("torso")[2]
+ foot_pos = self.get_body_com("foot")
+ self.max_height = max(height_after, self.max_height)
+
+ vx, vz, vangle = self.sim.data.qvel[0:3]
+
+ s = self.state_vector()
+ fell_over = not (np.isfinite(s).all() and (np.abs(s[2:]) < 100).all() and (height_after > .7))
+
+ box_pos = self.get_body_com("box")
+ box_size = 0.3
+ box_height = 0.3
+ box_center = (box_pos[0] + (box_size / 2), box_pos[1], box_height)
+ foot_length = 0.3
+ foot_center = foot_pos[0] - (foot_length / 2)
+
+ dist = np.linalg.norm(foot_pos - box_center)
+
+ self.min_distance = min(dist, self.min_distance)
+
+ # check if foot is on box
+ is_on_box_x = box_pos[0] <= foot_center <= box_pos[0] + box_size
+ is_on_box_y = True # is y always true because he can only move in x and z direction?
+ is_on_box_z = box_height - 0.02 <= foot_pos[2] <= box_height + 0.02
+ is_on_box = is_on_box_x and is_on_box_y and is_on_box_z
+ if is_on_box: self.hopper_on_box = True
+
+ ctrl_cost = self.control_cost(action)
+
+ costs = ctrl_cost
+
+ done = fell_over or self.hopper_on_box
+
+ if self.current_step >= self.max_episode_steps or done:
+ done = False
+
+ max_height = self.max_height.copy()
+ min_distance = self.min_distance.copy()
+
+ alive_bonus = self._healthy_reward * self.current_step
+ box_bonus = 0
+ rewards = 0
+
+ # TODO explain what we did here for the calculation of the reward
+ if is_on_box:
+ if self.context:
+ rewards -= 100 * vx ** 2 if 100 * vx ** 2 < 1 else 1
+ else:
+ box_bonus = 10
+ rewards += box_bonus
+ # rewards -= dist * dist ???? why when already on box?
+ # reward -= 90 - abs(angle)
+ rewards -= 100 * vx ** 2 if 100 * vx ** 2 < 1 else 1
+ rewards += max_height * 3
+ rewards += alive_bonus
+
+ else:
+ if self.context:
+ rewards = -10 - min_distance
+ rewards += max_height * 3
+ else:
+ # reward -= (dist*dist)
+ rewards -= min_distance * min_distance
+ # rewards -= dist / self.max_distance
+ rewards += max_height
+ rewards += alive_bonus
+
+ else:
+ rewards = 0
+
+ observation = self._get_obs()
+ reward = rewards - costs
+ info = {
+ 'height': height_after,
+ 'max_height': self.max_height.copy(),
+ 'min_distance': self.min_distance,
+ 'goal': self.box_x,
+ }
+
+ return observation, reward, done, info
+
+ def _get_obs(self):
+ return np.append(super()._get_obs(), self.box_x)
+
+ def reset(self):
+
+ self.max_height = 0
+ self.min_distance = 5000
+ self.current_step = 0
+ self.hopper_on_box = False
+ if self.context:
+ box_id = self.sim.model.body_name2id("box")
+ self.box_x = np.random.uniform(1, 3, 1)
+ self.sim.model.body_pos[box_id] = [self.box_x, 0, 0]
+ return super().reset()
+
+ # overwrite reset_model to make it deterministic
+ def reset_model(self):
+ noise_low = -self._reset_noise_scale
+ noise_high = self._reset_noise_scale
+
+ qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
+ qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
+
+ self.set_state(qpos, qvel)
+
+ observation = self._get_obs()
+ return observation
+
+if __name__ == '__main__':
+ render_mode = "human" # "human" or "partial" or "final"
+ env = ALRHopperJumpOnBoxEnv()
+ obs = env.reset()
+
+ for i in range(2000):
+ # objective.load_result("/tmp/cma")
+ # test with random actions
+ ac = env.action_space.sample()
+ obs, rew, d, info = env.step(ac)
+ if i % 10 == 0:
+ env.render(mode=render_mode)
+ if d:
+ print('After ', i, ' steps, done: ', d)
+ env.reset()
+
+ env.close()
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/hopper_jump/mp_wrapper.py b/alr_envs/alr/mujoco/hopper_jump/mp_wrapper.py
new file mode 100644
index 0000000..0d6768c
--- /dev/null
+++ b/alr_envs/alr/mujoco/hopper_jump/mp_wrapper.py
@@ -0,0 +1,31 @@
+from typing import Tuple, Union
+
+import numpy as np
+
+from mp_env_api import MPEnvWrapper
+
+
+class MPWrapper(MPEnvWrapper):
+ @property
+ def active_obs(self):
+ return np.hstack([
+ [False] * (5 + int(not self.exclude_current_positions_from_observation)), # position
+ [False] * 6, # velocity
+ [True]
+ ])
+
+ @property
+ def current_pos(self) -> Union[float, int, np.ndarray]:
+ return self.env.sim.data.qpos[3:6].copy()
+
+ @property
+ def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.env.sim.data.qvel[3:6].copy()
+
+ @property
+ def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+ raise ValueError("Goal position is not available and has to be learnt based on the environment.")
+
+ @property
+ def dt(self) -> Union[float, int]:
+ return self.env.dt
diff --git a/alr_envs/alr/mujoco/hopper_throw/__init__.py b/alr_envs/alr/mujoco/hopper_throw/__init__.py
new file mode 100644
index 0000000..989b5a9
--- /dev/null
+++ b/alr_envs/alr/mujoco/hopper_throw/__init__.py
@@ -0,0 +1 @@
+from .mp_wrapper import MPWrapper
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/hopper_throw/assets/hopper_throw.xml b/alr_envs/alr/mujoco/hopper_throw/assets/hopper_throw.xml
new file mode 100644
index 0000000..1c39602
--- /dev/null
+++ b/alr_envs/alr/mujoco/hopper_throw/assets/hopper_throw.xml
@@ -0,0 +1,56 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/alr_envs/alr/mujoco/hopper_throw/assets/hopper_throw_in_basket.xml b/alr_envs/alr/mujoco/hopper_throw/assets/hopper_throw_in_basket.xml
new file mode 100644
index 0000000..b4f0342
--- /dev/null
+++ b/alr_envs/alr/mujoco/hopper_throw/assets/hopper_throw_in_basket.xml
@@ -0,0 +1,132 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/hopper_throw/hopper_throw.py b/alr_envs/alr/mujoco/hopper_throw/hopper_throw.py
new file mode 100644
index 0000000..03553f2
--- /dev/null
+++ b/alr_envs/alr/mujoco/hopper_throw/hopper_throw.py
@@ -0,0 +1,113 @@
+import os
+
+from gym.envs.mujoco.hopper_v3 import HopperEnv
+import numpy as np
+
+MAX_EPISODE_STEPS_HOPPERTHROW = 250
+
+
+class ALRHopperThrowEnv(HopperEnv):
+ """
+ Initialization changes to normal Hopper:
+ - healthy_reward: 1.0 -> 0.0 -> 0.1
+ - forward_reward_weight -> 5.0
+ - healthy_angle_range: (-0.2, 0.2) -> (-float('inf'), float('inf'))
+
+ Reward changes to normal Hopper:
+ - velocity: (x_position_after - x_position_before) -> self.get_body_com("ball")[0]
+ """
+
+ def __init__(self,
+ xml_file='hopper_throw.xml',
+ forward_reward_weight=5.0,
+ ctrl_cost_weight=1e-3,
+ healthy_reward=0.1,
+ terminate_when_unhealthy=True,
+ healthy_state_range=(-100.0, 100.0),
+ healthy_z_range=(0.7, float('inf')),
+ healthy_angle_range=(-float('inf'), float('inf')),
+ reset_noise_scale=5e-3,
+ context = True,
+ exclude_current_positions_from_observation=True,
+ max_episode_steps=250):
+ xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
+ self.current_step = 0
+ self.max_episode_steps = max_episode_steps
+ self.context = context
+ self.goal = 0
+ super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
+ healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
+ exclude_current_positions_from_observation)
+
+ def step(self, action):
+
+ self.current_step += 1
+ self.do_simulation(action, self.frame_skip)
+ ball_pos_after = self.get_body_com("ball")[0] #abs(self.get_body_com("ball")[0]) # use x and y to get point and use euclid distance as reward?
+ ball_pos_after_y = self.get_body_com("ball")[2]
+
+ # done = self.done TODO We should use this, not sure why there is no other termination; ball_landed should be enough, because we only look at the throw itself? - Paul and Marc
+ ball_landed = self.get_body_com("ball")[2] <= 0.05
+ done = ball_landed
+
+ ctrl_cost = self.control_cost(action)
+ costs = ctrl_cost
+
+ rewards = 0
+
+ if self.current_step >= self.max_episode_steps or done:
+ distance_reward = -np.linalg.norm(ball_pos_after - self.goal) if self.context else \
+ self._forward_reward_weight * ball_pos_after
+ healthy_reward = 0 if self.context else self.healthy_reward * self.current_step
+
+ rewards = distance_reward + healthy_reward
+
+ observation = self._get_obs()
+ reward = rewards - costs
+ info = {
+ 'ball_pos': ball_pos_after,
+ 'ball_pos_y': ball_pos_after_y,
+ 'current_step' : self.current_step,
+ 'goal' : self.goal,
+ }
+
+ return observation, reward, done, info
+
+ def _get_obs(self):
+ return np.append(super()._get_obs(), self.goal)
+
+ def reset(self):
+ self.current_step = 0
+ self.goal = self.goal = np.random.uniform(2.0, 6.0, 1) # 0.5 8.0
+ return super().reset()
+
+ # overwrite reset_model to make it deterministic
+ def reset_model(self):
+ noise_low = -self._reset_noise_scale
+ noise_high = self._reset_noise_scale
+
+ qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
+ qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
+
+ self.set_state(qpos, qvel)
+
+ observation = self._get_obs()
+ return observation
+
+if __name__ == '__main__':
+ render_mode = "human" # "human" or "partial" or "final"
+ env = ALRHopperThrowEnv()
+ obs = env.reset()
+
+ for i in range(2000):
+ # objective.load_result("/tmp/cma")
+ # test with random actions
+ ac = env.action_space.sample()
+ obs, rew, d, info = env.step(ac)
+ if i % 10 == 0:
+ env.render(mode=render_mode)
+ if d:
+ print('After ', i, ' steps, done: ', d)
+ env.reset()
+
+ env.close()
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/hopper_throw/hopper_throw_in_basket.py b/alr_envs/alr/mujoco/hopper_throw/hopper_throw_in_basket.py
new file mode 100644
index 0000000..74a5b21
--- /dev/null
+++ b/alr_envs/alr/mujoco/hopper_throw/hopper_throw_in_basket.py
@@ -0,0 +1,143 @@
+import os
+from gym.envs.mujoco.hopper_v3 import HopperEnv
+
+import numpy as np
+
+
+MAX_EPISODE_STEPS_HOPPERTHROWINBASKET = 250
+
+
+class ALRHopperThrowInBasketEnv(HopperEnv):
+ """
+ Initialization changes to normal Hopper:
+ - healthy_reward: 1.0 -> 0.0
+ - healthy_angle_range: (-0.2, 0.2) -> (-float('inf'), float('inf'))
+ - hit_basket_reward: - -> 10
+
+ Reward changes to normal Hopper:
+ - velocity: (x_position_after - x_position_before) -> (ball_position_after - ball_position_before)
+ """
+
+ def __init__(self,
+ xml_file='hopper_throw_in_basket.xml',
+ forward_reward_weight=1.0,
+ ctrl_cost_weight=1e-3,
+ healthy_reward=0.0,
+ hit_basket_reward=10,
+ basket_size=0.3,
+ terminate_when_unhealthy=True,
+ healthy_state_range=(-100.0, 100.0),
+ healthy_z_range=(0.7, float('inf')),
+ healthy_angle_range=(-float('inf'), float('inf')),
+ reset_noise_scale=5e-3,
+ context=True,
+ penalty=0.0,
+ exclude_current_positions_from_observation=True,
+ max_episode_steps = 250):
+ self.hit_basket_reward = hit_basket_reward
+ self.current_step = 0
+ self.max_episode_steps = max_episode_steps
+ self.ball_in_basket = False
+ self.basket_size = basket_size
+ self.context = context
+ self.penalty = penalty
+ self.basket_x = 5
+ xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
+ super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
+ healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
+ exclude_current_positions_from_observation)
+
+ def step(self, action):
+
+ self.current_step += 1
+ self.do_simulation(action, self.frame_skip)
+ ball_pos = self.get_body_com("ball")
+
+ basket_pos = self.get_body_com("basket_ground")
+ basket_center = (basket_pos[0] + 0.5, basket_pos[1], basket_pos[2])
+
+ is_in_basket_x = ball_pos[0] >= basket_pos[0] and ball_pos[0] <= basket_pos[0] + self.basket_size
+ is_in_basket_y = ball_pos[1] >= basket_pos[1] - (self.basket_size/2) and ball_pos[1] <= basket_pos[1] + (self.basket_size/2)
+ is_in_basket_z = ball_pos[2] < 0.1
+ is_in_basket = is_in_basket_x and is_in_basket_y and is_in_basket_z
+ if is_in_basket: self.ball_in_basket = True
+
+ ball_landed = self.get_body_com("ball")[2] <= 0.05
+ done = ball_landed or is_in_basket
+
+ rewards = 0
+
+ ctrl_cost = self.control_cost(action)
+
+ costs = ctrl_cost
+
+ if self.current_step >= self.max_episode_steps or done:
+
+ if is_in_basket:
+ if not self.context:
+ rewards += self.hit_basket_reward
+ else:
+ dist = np.linalg.norm(ball_pos-basket_center)
+ if self.context:
+ rewards = -10 * dist
+ else:
+ rewards -= (dist*dist)
+ else:
+ # penalty not needed
+ rewards += ((action[:2] > 0) * self.penalty).sum() if self.current_step < 10 else 0 #too much of a penalty?
+
+ observation = self._get_obs()
+ reward = rewards - costs
+ info = {
+ 'ball_pos': ball_pos[0],
+ }
+
+ return observation, reward, done, info
+
+ def _get_obs(self):
+ return np.append(super()._get_obs(), self.basket_x)
+
+ def reset(self):
+ if self.max_episode_steps == 10:
+ # We have to initialize this here, because the spec is only added after creating the env.
+ self.max_episode_steps = self.spec.max_episode_steps
+
+ self.current_step = 0
+ self.ball_in_basket = False
+ if self.context:
+ basket_id = self.sim.model.body_name2id("basket_ground")
+ self.basket_x = np.random.uniform(3, 7, 1)
+ self.sim.model.body_pos[basket_id] = [self.basket_x, 0, 0]
+ return super().reset()
+
+ # overwrite reset_model to make it deterministic
+ def reset_model(self):
+ noise_low = -self._reset_noise_scale
+ noise_high = self._reset_noise_scale
+
+ qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
+ qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
+
+ self.set_state(qpos, qvel)
+
+ observation = self._get_obs()
+ return observation
+
+
+if __name__ == '__main__':
+ render_mode = "human" # "human" or "partial" or "final"
+ env = ALRHopperThrowInBasketEnv()
+ obs = env.reset()
+
+ for i in range(2000):
+ # objective.load_result("/tmp/cma")
+ # test with random actions
+ ac = env.action_space.sample()
+ obs, rew, d, info = env.step(ac)
+ if i % 10 == 0:
+ env.render(mode=render_mode)
+ if d:
+ print('After ', i, ' steps, done: ', d)
+ env.reset()
+
+ env.close()
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/hopper_throw/mp_wrapper.py b/alr_envs/alr/mujoco/hopper_throw/mp_wrapper.py
new file mode 100644
index 0000000..e5e9486
--- /dev/null
+++ b/alr_envs/alr/mujoco/hopper_throw/mp_wrapper.py
@@ -0,0 +1,30 @@
+from typing import Tuple, Union
+
+import numpy as np
+
+from mp_env_api import MPEnvWrapper
+
+
+class MPWrapper(MPEnvWrapper):
+ @property
+ def active_obs(self):
+ return np.hstack([
+ [False] * 17,
+ [True] # goal pos
+ ])
+
+ @property
+ def current_pos(self) -> Union[float, int, np.ndarray]:
+ return self.env.sim.data.qpos[3:6].copy()
+
+ @property
+ def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.env.sim.data.qvel[3:6].copy()
+
+ @property
+ def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+ raise ValueError("Goal position is not available and has to be learnt based on the environment.")
+
+ @property
+ def dt(self) -> Union[float, int]:
+ return self.env.dt
diff --git a/alr_envs/alr/mujoco/walker_2d_jump/__init__.py b/alr_envs/alr/mujoco/walker_2d_jump/__init__.py
new file mode 100644
index 0000000..989b5a9
--- /dev/null
+++ b/alr_envs/alr/mujoco/walker_2d_jump/__init__.py
@@ -0,0 +1 @@
+from .mp_wrapper import MPWrapper
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/walker_2d_jump/assets/walker2d.xml b/alr_envs/alr/mujoco/walker_2d_jump/assets/walker2d.xml
new file mode 100644
index 0000000..1b48e36
--- /dev/null
+++ b/alr_envs/alr/mujoco/walker_2d_jump/assets/walker2d.xml
@@ -0,0 +1,62 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/walker_2d_jump/mp_wrapper.py b/alr_envs/alr/mujoco/walker_2d_jump/mp_wrapper.py
new file mode 100644
index 0000000..445fa40
--- /dev/null
+++ b/alr_envs/alr/mujoco/walker_2d_jump/mp_wrapper.py
@@ -0,0 +1,30 @@
+from typing import Tuple, Union
+
+import numpy as np
+
+from mp_env_api import MPEnvWrapper
+
+
+class MPWrapper(MPEnvWrapper):
+ @property
+ def active_obs(self):
+ return np.hstack([
+ [False] * 17,
+ [True] # goal pos
+ ])
+
+ @property
+ def current_pos(self) -> Union[float, int, np.ndarray]:
+ return self.env.sim.data.qpos[3:9].copy()
+
+ @property
+ def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.env.sim.data.qvel[3:9].copy()
+
+ @property
+ def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+ raise ValueError("Goal position is not available and has to be learnt based on the environment.")
+
+ @property
+ def dt(self) -> Union[float, int]:
+ return self.env.dt
diff --git a/alr_envs/alr/mujoco/walker_2d_jump/walker_2d_jump.py b/alr_envs/alr/mujoco/walker_2d_jump/walker_2d_jump.py
new file mode 100644
index 0000000..009dd9d
--- /dev/null
+++ b/alr_envs/alr/mujoco/walker_2d_jump/walker_2d_jump.py
@@ -0,0 +1,113 @@
+import os
+from gym.envs.mujoco.walker2d_v3 import Walker2dEnv
+import numpy as np
+
+MAX_EPISODE_STEPS_WALKERJUMP = 300
+
+
+class ALRWalker2dJumpEnv(Walker2dEnv):
+ """
+ healthy reward 1.0 -> 0.005 -> 0.0025 not from alex
+ penalty 10 -> 0 not from alex
+ """
+
+ def __init__(self,
+ xml_file='walker2d.xml',
+ forward_reward_weight=1.0,
+ ctrl_cost_weight=1e-3,
+ healthy_reward=0.0025,
+ terminate_when_unhealthy=True,
+ healthy_z_range=(0.8, 2.0),
+ healthy_angle_range=(-1.0, 1.0),
+ reset_noise_scale=5e-3,
+ penalty=0,
+ context=True,
+ exclude_current_positions_from_observation=True,
+ max_episode_steps=300):
+ self.current_step = 0
+ self.max_episode_steps = max_episode_steps
+ self.max_height = 0
+ self._penalty = penalty
+ self.context = context
+ self.goal = 0
+ xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
+ super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
+ healthy_z_range, healthy_angle_range, reset_noise_scale,
+ exclude_current_positions_from_observation)
+
+ def step(self, action):
+ self.current_step += 1
+ self.do_simulation(action, self.frame_skip)
+ #pos_after = self.get_body_com("torso")[0]
+ height = self.get_body_com("torso")[2]
+
+ self.max_height = max(height, self.max_height)
+
+ fell_over = height < 0.2
+ done = fell_over
+
+ ctrl_cost = self.control_cost(action)
+ costs = ctrl_cost
+
+ if self.current_step >= self.max_episode_steps or done:
+ done = True
+ height_goal_distance = -10 * (np.linalg.norm(self.max_height - self.goal)) if self.context else self.max_height
+ healthy_reward = self.healthy_reward * self.current_step
+
+ rewards = height_goal_distance + healthy_reward
+ else:
+ # penalty not needed
+ rewards = 0
+ rewards += ((action[:2] > 0) * self._penalty).sum() if self.current_step < 4 else 0
+ rewards += ((action[3:5] > 0) * self._penalty).sum() if self.current_step < 4 else 0
+
+
+ observation = self._get_obs()
+ reward = rewards - costs
+ info = {
+ 'height': height,
+ 'max_height': self.max_height,
+ 'goal' : self.goal,
+ }
+
+ return observation, reward, done, info
+
+ def _get_obs(self):
+ return np.append(super()._get_obs(), self.goal)
+
+ def reset(self):
+ self.current_step = 0
+ self.max_height = 0
+ self.goal = np.random.uniform(1.5, 2.5, 1) # 1.5 3.0
+ return super().reset()
+
+ # overwrite reset_model to make it deterministic
+ def reset_model(self):
+ noise_low = -self._reset_noise_scale
+ noise_high = self._reset_noise_scale
+
+ qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
+ qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
+
+ self.set_state(qpos, qvel)
+
+ observation = self._get_obs()
+ return observation
+
+if __name__ == '__main__':
+ render_mode = "human" # "human" or "partial" or "final"
+ env = ALRWalker2dJumpEnv()
+ obs = env.reset()
+
+ for i in range(2000):
+ # objective.load_result("/tmp/cma")
+ # test with random actions
+ ac = env.action_space.sample()
+ obs, rew, d, info = env.step(ac)
+ if i % 10 == 0:
+ env.render(mode=render_mode)
+ if d:
+ print('After ', i, ' steps, done: ', d)
+ env.reset()
+
+ env.close()
\ No newline at end of file