Added environments from Paul + Marc
This commit is contained in:
parent
7ffe94dcfd
commit
092525889b
@ -11,6 +11,13 @@ from .mujoco.reacher.alr_reacher import ALRReacherEnv
|
|||||||
from .mujoco.reacher.balancing import BalancingEnv
|
from .mujoco.reacher.balancing import BalancingEnv
|
||||||
|
|
||||||
from alr_envs.alr.mujoco.table_tennis.tt_gym import MAX_EPISODE_STEPS
|
from alr_envs.alr.mujoco.table_tennis.tt_gym import MAX_EPISODE_STEPS
|
||||||
|
from .mujoco.ant_jump.ant_jump import MAX_EPISODE_STEPS_ANTJUMP
|
||||||
|
from .mujoco.half_cheetah_jump.half_cheetah_jump import MAX_EPISODE_STEPS_HALFCHEETAHJUMP
|
||||||
|
from .mujoco.hopper_jump.hopper_jump import MAX_EPISODE_STEPS_HOPPERJUMP
|
||||||
|
from .mujoco.hopper_jump.hopper_jump_on_box import MAX_EPISODE_STEPS_HOPPERJUMPONBOX
|
||||||
|
from .mujoco.hopper_throw.hopper_throw import MAX_EPISODE_STEPS_HOPPERTHROW
|
||||||
|
from .mujoco.hopper_throw.hopper_throw_in_basket import MAX_EPISODE_STEPS_HOPPERTHROWINBASKET
|
||||||
|
from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP
|
||||||
|
|
||||||
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
|
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
|
||||||
|
|
||||||
@ -185,6 +192,69 @@ register(
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
register(
|
||||||
|
id='ALRAntJump-v0',
|
||||||
|
entry_point='alr_envs.alr.mujoco:ALRAntJumpEnv',
|
||||||
|
max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP,
|
||||||
|
kwargs={
|
||||||
|
"max_episode_steps": MAX_EPISODE_STEPS_ANTJUMP
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
register(
|
||||||
|
id='ALRHalfCheetahJump-v0',
|
||||||
|
entry_point='alr_envs.alr.mujoco:ALRHalfCheetahJumpEnv',
|
||||||
|
max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP,
|
||||||
|
kwargs={
|
||||||
|
"max_episode_steps": MAX_EPISODE_STEPS_HALFCHEETAHJUMP
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
register(
|
||||||
|
id='ALRHopperJump-v0',
|
||||||
|
entry_point='alr_envs.alr.mujoco:ALRHopperJumpEnv',
|
||||||
|
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
|
||||||
|
kwargs={
|
||||||
|
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
register(
|
||||||
|
id='ALRHopperJumpOnBox-v0',
|
||||||
|
entry_point='alr_envs.alr.mujoco:ALRHopperJumpOnBoxEnv',
|
||||||
|
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX,
|
||||||
|
kwargs={
|
||||||
|
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMPONBOX
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
register(
|
||||||
|
id='ALRHopperThrow-v0',
|
||||||
|
entry_point='alr_envs.alr.mujoco:ALRHopperThrowEnv',
|
||||||
|
max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW,
|
||||||
|
kwargs={
|
||||||
|
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROW
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
register(
|
||||||
|
id='ALRHopperThrowInBasket-v0',
|
||||||
|
entry_point='alr_envs.alr.mujoco:ALRHopperThrowInBasketEnv',
|
||||||
|
max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET,
|
||||||
|
kwargs={
|
||||||
|
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROWINBASKET
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
register(
|
||||||
|
id='ALRWalker2DJump-v0',
|
||||||
|
entry_point='alr_envs.alr.mujoco:ALRWalker2dJumpEnv',
|
||||||
|
max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP,
|
||||||
|
kwargs={
|
||||||
|
"max_episode_steps": MAX_EPISODE_STEPS_WALKERJUMP
|
||||||
|
}
|
||||||
|
)
|
||||||
## Balancing Reacher
|
## Balancing Reacher
|
||||||
|
|
||||||
register(
|
register(
|
||||||
@ -431,3 +501,204 @@ for _v, cd in enumerate(ctxt_dim):
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||||
|
|
||||||
|
|
||||||
|
## AntJump
|
||||||
|
for _v, cd in enumerate(ctxt_dim):
|
||||||
|
_env_id = f'TableTennisProMP-v{_v}'
|
||||||
|
register(
|
||||||
|
id=_env_id,
|
||||||
|
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
||||||
|
kwargs={
|
||||||
|
"name": "alr_envs:TableTennis{}DCtxt-v0".format(cd),
|
||||||
|
"wrappers": [mujoco.table_tennis.MPWrapper],
|
||||||
|
"mp_kwargs": {
|
||||||
|
"num_dof": 7,
|
||||||
|
"num_basis": 2,
|
||||||
|
"duration": 1.25,
|
||||||
|
"post_traj_time": 1.5,
|
||||||
|
"policy_type": "motor",
|
||||||
|
"weights_scale": 1.0,
|
||||||
|
"zero_start": True,
|
||||||
|
"zero_goal": False,
|
||||||
|
"policy_kwargs": {
|
||||||
|
"p_gains": 0.5*np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]),
|
||||||
|
"d_gains": 0.5*np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||||
|
|
||||||
|
|
||||||
|
register(
|
||||||
|
id='ALRAntJumpProMP-v0',
|
||||||
|
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
||||||
|
kwargs={
|
||||||
|
"name": "alr_envs:ALRAntJump-v0",
|
||||||
|
"wrappers": [mujoco.ant_jump.MPWrapper],
|
||||||
|
"mp_kwargs": {
|
||||||
|
"num_dof": 8,
|
||||||
|
"num_basis": 5,
|
||||||
|
"duration": 10,
|
||||||
|
"post_traj_time": 0,
|
||||||
|
"policy_type": "motor",
|
||||||
|
"weights_scale": 1.0,
|
||||||
|
"zero_start": True,
|
||||||
|
"zero_goal": False,
|
||||||
|
"policy_kwargs": {
|
||||||
|
"p_gains": np.ones(8),
|
||||||
|
"d_gains": 0.1*np.ones(8)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRAntJumpProMP-v0')
|
||||||
|
|
||||||
|
register(
|
||||||
|
id='ALRHalfCheetahJumpProMP-v0',
|
||||||
|
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
||||||
|
kwargs={
|
||||||
|
"name": "alr_envs:ALRHalfCheetahJump-v0",
|
||||||
|
"wrappers": [mujoco.half_cheetah_jump.MPWrapper],
|
||||||
|
"mp_kwargs": {
|
||||||
|
"num_dof": 6,
|
||||||
|
"num_basis": 5,
|
||||||
|
"duration": 5,
|
||||||
|
"post_traj_time": 0,
|
||||||
|
"policy_type": "motor",
|
||||||
|
"weights_scale": 1.0,
|
||||||
|
"zero_start": True,
|
||||||
|
"zero_goal": False,
|
||||||
|
"policy_kwargs": {
|
||||||
|
"p_gains": np.ones(6),
|
||||||
|
"d_gains": 0.1*np.ones(6)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRHalfCheetahJumpProMP-v0')
|
||||||
|
|
||||||
|
|
||||||
|
register(
|
||||||
|
id='ALRHopperJumpProMP-v0',
|
||||||
|
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
||||||
|
kwargs={
|
||||||
|
"name": "alr_envs:ALRHopperJump-v0",
|
||||||
|
"wrappers": [mujoco.hopper_jump.MPWrapper],
|
||||||
|
"mp_kwargs": {
|
||||||
|
"num_dof": 3,
|
||||||
|
"num_basis": 5,
|
||||||
|
"duration": 2,
|
||||||
|
"post_traj_time": 0,
|
||||||
|
"policy_type": "motor",
|
||||||
|
"weights_scale": 1.0,
|
||||||
|
"zero_start": True,
|
||||||
|
"zero_goal": False,
|
||||||
|
"policy_kwargs": {
|
||||||
|
"p_gains": np.ones(3),
|
||||||
|
"d_gains": 0.1*np.ones(3)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRHopperJumpProMP-v0')
|
||||||
|
|
||||||
|
register(
|
||||||
|
id='ALRHopperJumpOnBoxProMP-v0',
|
||||||
|
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
||||||
|
kwargs={
|
||||||
|
"name": "alr_envs:ALRHopperJumpOnBox-v0",
|
||||||
|
"wrappers": [mujoco.hopper_jump.MPWrapper],
|
||||||
|
"mp_kwargs": {
|
||||||
|
"num_dof": 3,
|
||||||
|
"num_basis": 5,
|
||||||
|
"duration": 2,
|
||||||
|
"post_traj_time": 0,
|
||||||
|
"policy_type": "motor",
|
||||||
|
"weights_scale": 1.0,
|
||||||
|
"zero_start": True,
|
||||||
|
"zero_goal": False,
|
||||||
|
"policy_kwargs": {
|
||||||
|
"p_gains": np.ones(3),
|
||||||
|
"d_gains": 0.1*np.ones(3)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRHopperJumpOnBoxProMP-v0')
|
||||||
|
|
||||||
|
|
||||||
|
register(
|
||||||
|
id='ALRHopperThrowProMP-v0',
|
||||||
|
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
||||||
|
kwargs={
|
||||||
|
"name": "alr_envs:ALRHopperThrow-v0",
|
||||||
|
"wrappers": [mujoco.hopper_throw.MPWrapper],
|
||||||
|
"mp_kwargs": {
|
||||||
|
"num_dof": 3,
|
||||||
|
"num_basis": 5,
|
||||||
|
"duration": 2,
|
||||||
|
"post_traj_time": 0,
|
||||||
|
"policy_type": "motor",
|
||||||
|
"weights_scale": 1.0,
|
||||||
|
"zero_start": True,
|
||||||
|
"zero_goal": False,
|
||||||
|
"policy_kwargs": {
|
||||||
|
"p_gains": np.ones(3),
|
||||||
|
"d_gains": 0.1*np.ones(3)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRHopperThrowProMP-v0')
|
||||||
|
|
||||||
|
|
||||||
|
register(
|
||||||
|
id='ALRHopperThrowInBasketProMP-v0',
|
||||||
|
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
||||||
|
kwargs={
|
||||||
|
"name": "alr_envs:ALRHopperThrowInBasket-v0",
|
||||||
|
"wrappers": [mujoco.hopper_throw.MPWrapper],
|
||||||
|
"mp_kwargs": {
|
||||||
|
"num_dof": 3,
|
||||||
|
"num_basis": 5,
|
||||||
|
"duration": 2,
|
||||||
|
"post_traj_time": 0,
|
||||||
|
"policy_type": "motor",
|
||||||
|
"weights_scale": 1.0,
|
||||||
|
"zero_start": True,
|
||||||
|
"zero_goal": False,
|
||||||
|
"policy_kwargs": {
|
||||||
|
"p_gains": np.ones(3),
|
||||||
|
"d_gains": 0.1*np.ones(3)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRHopperThrowInBasketProMP-v0')
|
||||||
|
|
||||||
|
|
||||||
|
register(
|
||||||
|
id='ALRWalker2DJumpProMP-v0',
|
||||||
|
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
||||||
|
kwargs={
|
||||||
|
"name": "alr_envs:ALRWalker2DJump-v0",
|
||||||
|
"wrappers": [mujoco.walker_2d_jump.MPWrapper],
|
||||||
|
"mp_kwargs": {
|
||||||
|
"num_dof": 6,
|
||||||
|
"num_basis": 5,
|
||||||
|
"duration": 2.4,
|
||||||
|
"post_traj_time": 0,
|
||||||
|
"policy_type": "motor",
|
||||||
|
"weights_scale": 1.0,
|
||||||
|
"zero_start": True,
|
||||||
|
"zero_goal": False,
|
||||||
|
"policy_kwargs": {
|
||||||
|
"p_gains": np.ones(6),
|
||||||
|
"d_gains": 0.1*np.ones(6)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRWalker2DJumpProMP-v0')
|
||||||
|
@ -4,3 +4,10 @@ from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
|
|||||||
from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
|
from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
|
||||||
from .table_tennis.tt_gym import TTEnvGym
|
from .table_tennis.tt_gym import TTEnvGym
|
||||||
from .beerpong.beerpong import ALRBeerBongEnv
|
from .beerpong.beerpong import ALRBeerBongEnv
|
||||||
|
from .ant_jump.ant_jump import ALRAntJumpEnv
|
||||||
|
from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
|
||||||
|
from .hopper_jump.hopper_jump import ALRHopperJumpEnv
|
||||||
|
from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv
|
||||||
|
from .hopper_throw.hopper_throw import ALRHopperThrowEnv
|
||||||
|
from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv
|
||||||
|
from .walker_2d_jump.walker_2d_jump import ALRWalker2dJumpEnv
|
1
alr_envs/alr/mujoco/ant_jump/__init__.py
Normal file
1
alr_envs/alr/mujoco/ant_jump/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
from .mp_wrapper import MPWrapper
|
117
alr_envs/alr/mujoco/ant_jump/ant_jump.py
Normal file
117
alr_envs/alr/mujoco/ant_jump/ant_jump.py
Normal file
@ -0,0 +1,117 @@
|
|||||||
|
import numpy as np
|
||||||
|
from gym.envs.mujoco.ant_v3 import AntEnv
|
||||||
|
|
||||||
|
MAX_EPISODE_STEPS_ANTJUMP = 200
|
||||||
|
|
||||||
|
|
||||||
|
class ALRAntJumpEnv(AntEnv):
|
||||||
|
"""
|
||||||
|
Initialization changes to normal Ant:
|
||||||
|
- healthy_reward: 1.0 -> 0.01 -> 0.0 no healthy reward needed - Paul and Marc
|
||||||
|
- ctrl_cost_weight 0.5 -> 0.0
|
||||||
|
- contact_cost_weight: 5e-4 -> 0.0
|
||||||
|
- healthy_z_range: (0.2, 1.0) -> (0.3, float('inf')) !!!!! Does that make sense, limiting height?
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
xml_file='ant.xml',
|
||||||
|
ctrl_cost_weight=0.0,
|
||||||
|
contact_cost_weight=0.0,
|
||||||
|
healthy_reward=0.0,
|
||||||
|
terminate_when_unhealthy=True,
|
||||||
|
healthy_z_range=(0.3, float('inf')),
|
||||||
|
contact_force_range=(-1.0, 1.0),
|
||||||
|
reset_noise_scale=0.1,
|
||||||
|
context=True, # variable to decide if context is used or not
|
||||||
|
exclude_current_positions_from_observation=True,
|
||||||
|
max_episode_steps=200):
|
||||||
|
self.current_step = 0
|
||||||
|
self.max_height = 0
|
||||||
|
self.context = context
|
||||||
|
self.max_episode_steps = max_episode_steps
|
||||||
|
self.goal = 0 # goal when training with context
|
||||||
|
super().__init__(xml_file, ctrl_cost_weight, contact_cost_weight, healthy_reward, terminate_when_unhealthy,
|
||||||
|
healthy_z_range, contact_force_range, reset_noise_scale,
|
||||||
|
exclude_current_positions_from_observation)
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
|
||||||
|
self.current_step += 1
|
||||||
|
self.do_simulation(action, self.frame_skip)
|
||||||
|
|
||||||
|
height = self.get_body_com("torso")[2].copy()
|
||||||
|
|
||||||
|
self.max_height = max(height, self.max_height)
|
||||||
|
|
||||||
|
rewards = 0
|
||||||
|
|
||||||
|
ctrl_cost = self.control_cost(action)
|
||||||
|
contact_cost = self.contact_cost
|
||||||
|
|
||||||
|
costs = ctrl_cost + contact_cost
|
||||||
|
|
||||||
|
done = height < 0.3 # fall over -> is the 0.3 value from healthy_z_range? TODO change 0.3 to the value of healthy z angle
|
||||||
|
|
||||||
|
if self.current_step == self.max_episode_steps or done:
|
||||||
|
if self.context:
|
||||||
|
# -10 for scaling the value of the distance between the max_height and the goal height; only used when context is enabled
|
||||||
|
# height_reward = -10 * (np.linalg.norm(self.max_height - self.goal))
|
||||||
|
height_reward = -10*np.linalg.norm(self.max_height - self.goal)
|
||||||
|
# no healthy reward when using context, because we optimize a negative value
|
||||||
|
healthy_reward = 0
|
||||||
|
else:
|
||||||
|
height_reward = self.max_height - 0.7
|
||||||
|
healthy_reward = self.healthy_reward * self.current_step
|
||||||
|
|
||||||
|
rewards = height_reward + healthy_reward
|
||||||
|
|
||||||
|
obs = self._get_obs()
|
||||||
|
reward = rewards - costs
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'height': height,
|
||||||
|
'max_height': self.max_height,
|
||||||
|
'goal': self.goal
|
||||||
|
}
|
||||||
|
|
||||||
|
return obs, reward, done, info
|
||||||
|
|
||||||
|
def _get_obs(self):
|
||||||
|
return np.append(super()._get_obs(), self.goal)
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self.current_step = 0
|
||||||
|
self.max_height = 0
|
||||||
|
self.goal = np.random.uniform(1.0, 2.5,
|
||||||
|
1) # goal heights from 1.0 to 2.5; can be increased, but didnt work well with CMORE
|
||||||
|
return super().reset()
|
||||||
|
|
||||||
|
# reset_model had to be implemented in every env to make it deterministic
|
||||||
|
def reset_model(self):
|
||||||
|
noise_low = -self._reset_noise_scale
|
||||||
|
noise_high = self._reset_noise_scale
|
||||||
|
|
||||||
|
qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
|
||||||
|
qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
|
||||||
|
|
||||||
|
self.set_state(qpos, qvel)
|
||||||
|
|
||||||
|
observation = self._get_obs()
|
||||||
|
return observation
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
render_mode = "human" # "human" or "partial" or "final"
|
||||||
|
env = ALRAntJumpEnv()
|
||||||
|
obs = env.reset()
|
||||||
|
|
||||||
|
for i in range(2000):
|
||||||
|
# objective.load_result("/tmp/cma")
|
||||||
|
# test with random actions
|
||||||
|
ac = env.action_space.sample()
|
||||||
|
obs, rew, d, info = env.step(ac)
|
||||||
|
if i % 10 == 0:
|
||||||
|
env.render(mode=render_mode)
|
||||||
|
if d:
|
||||||
|
env.reset()
|
||||||
|
|
||||||
|
env.close()
|
81
alr_envs/alr/mujoco/ant_jump/assets/ant.xml
Normal file
81
alr_envs/alr/mujoco/ant_jump/assets/ant.xml
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
<mujoco model="ant">
|
||||||
|
<compiler angle="degree" coordinate="local" inertiafromgeom="true"/>
|
||||||
|
<option integrator="RK4" timestep="0.01"/>
|
||||||
|
<custom>
|
||||||
|
<numeric data="0.0 0.0 0.55 1.0 0.0 0.0 0.0 0.0 1.0 0.0 -1.0 0.0 -1.0 0.0 1.0" name="init_qpos"/>
|
||||||
|
</custom>
|
||||||
|
<default>
|
||||||
|
<joint armature="1" damping="1" limited="true"/>
|
||||||
|
<geom conaffinity="0" condim="3" density="5.0" friction="1 0.5 0.5" margin="0.01" rgba="0.8 0.6 0.4 1"/>
|
||||||
|
</default>
|
||||||
|
<asset>
|
||||||
|
<texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
|
||||||
|
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
|
||||||
|
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
|
||||||
|
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
|
||||||
|
<material name="geom" texture="texgeom" texuniform="true"/>
|
||||||
|
</asset>
|
||||||
|
<worldbody>
|
||||||
|
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
|
||||||
|
<geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
|
||||||
|
<body name="torso" pos="0 0 0.75">
|
||||||
|
<camera name="track" mode="trackcom" pos="0 -3 0.3" xyaxes="1 0 0 0 0 1"/>
|
||||||
|
<geom name="torso_geom" pos="0 0 0" size="0.25" type="sphere"/>
|
||||||
|
<joint armature="0" damping="0" limited="false" margin="0.01" name="root" pos="0 0 0" type="free"/>
|
||||||
|
<body name="front_left_leg" pos="0 0 0">
|
||||||
|
<geom fromto="0.0 0.0 0.0 0.2 0.2 0.0" name="aux_1_geom" size="0.08" type="capsule"/>
|
||||||
|
<body name="aux_1" pos="0.2 0.2 0">
|
||||||
|
<joint axis="0 0 1" name="hip_1" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
|
||||||
|
<geom fromto="0.0 0.0 0.0 0.2 0.2 0.0" name="left_leg_geom" size="0.08" type="capsule"/>
|
||||||
|
<body pos="0.2 0.2 0">
|
||||||
|
<joint axis="-1 1 0" name="ankle_1" pos="0.0 0.0 0.0" range="30 70" type="hinge"/>
|
||||||
|
<geom fromto="0.0 0.0 0.0 0.4 0.4 0.0" name="left_ankle_geom" size="0.08" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
<body name="front_right_leg" pos="0 0 0">
|
||||||
|
<geom fromto="0.0 0.0 0.0 -0.2 0.2 0.0" name="aux_2_geom" size="0.08" type="capsule"/>
|
||||||
|
<body name="aux_2" pos="-0.2 0.2 0">
|
||||||
|
<joint axis="0 0 1" name="hip_2" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
|
||||||
|
<geom fromto="0.0 0.0 0.0 -0.2 0.2 0.0" name="right_leg_geom" size="0.08" type="capsule"/>
|
||||||
|
<body pos="-0.2 0.2 0">
|
||||||
|
<joint axis="1 1 0" name="ankle_2" pos="0.0 0.0 0.0" range="-70 -30" type="hinge"/>
|
||||||
|
<geom fromto="0.0 0.0 0.0 -0.4 0.4 0.0" name="right_ankle_geom" size="0.08" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
<body name="back_leg" pos="0 0 0">
|
||||||
|
<geom fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" name="aux_3_geom" size="0.08" type="capsule"/>
|
||||||
|
<body name="aux_3" pos="-0.2 -0.2 0">
|
||||||
|
<joint axis="0 0 1" name="hip_3" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
|
||||||
|
<geom fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" name="back_leg_geom" size="0.08" type="capsule"/>
|
||||||
|
<body pos="-0.2 -0.2 0">
|
||||||
|
<joint axis="-1 1 0" name="ankle_3" pos="0.0 0.0 0.0" range="-70 -30" type="hinge"/>
|
||||||
|
<geom fromto="0.0 0.0 0.0 -0.4 -0.4 0.0" name="third_ankle_geom" size="0.08" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
<body name="right_back_leg" pos="0 0 0">
|
||||||
|
<geom fromto="0.0 0.0 0.0 0.2 -0.2 0.0" name="aux_4_geom" size="0.08" type="capsule"/>
|
||||||
|
<body name="aux_4" pos="0.2 -0.2 0">
|
||||||
|
<joint axis="0 0 1" name="hip_4" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
|
||||||
|
<geom fromto="0.0 0.0 0.0 0.2 -0.2 0.0" name="rightback_leg_geom" size="0.08" type="capsule"/>
|
||||||
|
<body pos="0.2 -0.2 0">
|
||||||
|
<joint axis="1 1 0" name="ankle_4" pos="0.0 0.0 0.0" range="30 70" type="hinge"/>
|
||||||
|
<geom fromto="0.0 0.0 0.0 0.4 -0.4 0.0" name="fourth_ankle_geom" size="0.08" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</worldbody>
|
||||||
|
<actuator>
|
||||||
|
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_4" gear="150"/>
|
||||||
|
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_4" gear="150"/>
|
||||||
|
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_1" gear="150"/>
|
||||||
|
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_1" gear="150"/>
|
||||||
|
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_2" gear="150"/>
|
||||||
|
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_2" gear="150"/>
|
||||||
|
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_3" gear="150"/>
|
||||||
|
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_3" gear="150"/>
|
||||||
|
</actuator>
|
||||||
|
</mujoco>
|
31
alr_envs/alr/mujoco/ant_jump/mp_wrapper.py
Normal file
31
alr_envs/alr/mujoco/ant_jump/mp_wrapper.py
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
from typing import Tuple, Union
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from mp_env_api import MPEnvWrapper
|
||||||
|
|
||||||
|
|
||||||
|
class MPWrapper(MPEnvWrapper):
|
||||||
|
|
||||||
|
@property
|
||||||
|
def active_obs(self):
|
||||||
|
return np.hstack([
|
||||||
|
[False] * 111, # ant has 111 dimensional observation space !!
|
||||||
|
[True] # goal height
|
||||||
|
])
|
||||||
|
|
||||||
|
@property
|
||||||
|
def current_pos(self) -> Union[float, int, np.ndarray]:
|
||||||
|
return self.env.sim.data.qpos[7:15].copy()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
|
return self.env.sim.data.qvel[6:14].copy()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
|
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def dt(self) -> Union[float, int]:
|
||||||
|
return self.env.dt
|
1
alr_envs/alr/mujoco/half_cheetah_jump/__init__.py
Normal file
1
alr_envs/alr/mujoco/half_cheetah_jump/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
from .mp_wrapper import MPWrapper
|
62
alr_envs/alr/mujoco/half_cheetah_jump/assets/cheetah.xml
Normal file
62
alr_envs/alr/mujoco/half_cheetah_jump/assets/cheetah.xml
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
<mujoco model="cheetah">
|
||||||
|
<compiler angle="radian" coordinate="local" inertiafromgeom="true" settotalmass="14"/>
|
||||||
|
<default>
|
||||||
|
<joint armature=".1" damping=".01" limited="true" solimplimit="0 .8 .03" solreflimit=".02 1" stiffness="8"/>
|
||||||
|
<geom conaffinity="0" condim="3" contype="1" friction=".4 .1 .1" rgba="0.8 0.6 .4 1" solimp="0.0 0.8 0.01" solref="0.02 1"/>
|
||||||
|
<motor ctrllimited="true" ctrlrange="-1 1"/>
|
||||||
|
</default>
|
||||||
|
<size nstack="300000" nuser_geom="1"/>
|
||||||
|
<option gravity="0 0 -9.81" timestep="0.01"/>
|
||||||
|
<asset>
|
||||||
|
<texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
|
||||||
|
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
|
||||||
|
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
|
||||||
|
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
|
||||||
|
<material name="geom" texture="texgeom" texuniform="true"/>
|
||||||
|
</asset>
|
||||||
|
<worldbody>
|
||||||
|
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
|
||||||
|
<geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
|
||||||
|
<body name="torso" pos="0 0 .7">
|
||||||
|
<camera name="track" mode="trackcom" pos="0 -3 0.3" xyaxes="1 0 0 0 0 1"/>
|
||||||
|
<joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
|
||||||
|
<joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" stiffness="0" type="slide"/>
|
||||||
|
<joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 0" stiffness="0" type="hinge"/>
|
||||||
|
<geom fromto="-.5 0 0 .5 0 0" name="torso" size="0.046" type="capsule"/>
|
||||||
|
<geom axisangle="0 1 0 .87" name="head" pos=".6 0 .1" size="0.046 .15" type="capsule"/>
|
||||||
|
<!-- <site name='tip' pos='.15 0 .11'/>-->
|
||||||
|
<body name="bthigh" pos="-.5 0 0">
|
||||||
|
<joint axis="0 1 0" damping="6" name="bthigh" pos="0 0 0" range="-.52 1.05" stiffness="240" type="hinge"/>
|
||||||
|
<geom axisangle="0 1 0 -3.8" name="bthigh" pos=".1 0 -.13" size="0.046 .145" type="capsule"/>
|
||||||
|
<body name="bshin" pos=".16 0 -.25">
|
||||||
|
<joint axis="0 1 0" damping="4.5" name="bshin" pos="0 0 0" range="-.785 .785" stiffness="180" type="hinge"/>
|
||||||
|
<geom axisangle="0 1 0 -2.03" name="bshin" pos="-.14 0 -.07" rgba="0.9 0.6 0.6 1" size="0.046 .15" type="capsule"/>
|
||||||
|
<body name="bfoot" pos="-.28 0 -.14">
|
||||||
|
<joint axis="0 1 0" damping="3" name="bfoot" pos="0 0 0" range="-.4 .785" stiffness="120" type="hinge"/>
|
||||||
|
<geom axisangle="0 1 0 -.27" name="bfoot" pos=".03 0 -.097" rgba="0.9 0.6 0.6 1" size="0.046 .094" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
<body name="fthigh" pos=".5 0 0">
|
||||||
|
<joint axis="0 1 0" damping="4.5" name="fthigh" pos="0 0 0" range="-1 .7" stiffness="180" type="hinge"/>
|
||||||
|
<geom axisangle="0 1 0 .52" name="fthigh" pos="-.07 0 -.12" size="0.046 .133" type="capsule"/>
|
||||||
|
<body name="fshin" pos="-.14 0 -.24">
|
||||||
|
<joint axis="0 1 0" damping="3" name="fshin" pos="0 0 0" range="-1.2 .87" stiffness="120" type="hinge"/>
|
||||||
|
<geom axisangle="0 1 0 -.6" name="fshin" pos=".065 0 -.09" rgba="0.9 0.6 0.6 1" size="0.046 .106" type="capsule"/>
|
||||||
|
<body name="ffoot" pos=".13 0 -.18">
|
||||||
|
<joint axis="0 1 0" damping="1.5" name="ffoot" pos="0 0 0" range="-.5 .5" stiffness="60" type="hinge"/>
|
||||||
|
<geom axisangle="0 1 0 -.6" name="ffoot" pos=".045 0 -.07" rgba="0.9 0.6 0.6 1" size="0.046 .07" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</worldbody>
|
||||||
|
<actuator>
|
||||||
|
<motor gear="120" joint="bthigh" name="bthigh"/>
|
||||||
|
<motor gear="90" joint="bshin" name="bshin"/>
|
||||||
|
<motor gear="60" joint="bfoot" name="bfoot"/>
|
||||||
|
<motor gear="120" joint="fthigh" name="fthigh"/>
|
||||||
|
<motor gear="60" joint="fshin" name="fshin"/>
|
||||||
|
<motor gear="30" joint="ffoot" name="ffoot"/>
|
||||||
|
</actuator>
|
||||||
|
</mujoco>
|
100
alr_envs/alr/mujoco/half_cheetah_jump/half_cheetah_jump.py
Normal file
100
alr_envs/alr/mujoco/half_cheetah_jump/half_cheetah_jump.py
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
import os
|
||||||
|
from gym.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
MAX_EPISODE_STEPS_HALFCHEETAHJUMP = 100
|
||||||
|
|
||||||
|
|
||||||
|
class ALRHalfCheetahJumpEnv(HalfCheetahEnv):
|
||||||
|
"""
|
||||||
|
ctrl_cost_weight 0.1 -> 0.0
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
xml_file='cheetah.xml',
|
||||||
|
forward_reward_weight=1.0,
|
||||||
|
ctrl_cost_weight=0.0,
|
||||||
|
reset_noise_scale=0.1,
|
||||||
|
context=True,
|
||||||
|
exclude_current_positions_from_observation=True,
|
||||||
|
max_episode_steps=100):
|
||||||
|
self.current_step = 0
|
||||||
|
self.max_height = 0
|
||||||
|
self.max_episode_steps = max_episode_steps
|
||||||
|
self.goal = 0
|
||||||
|
self.context = context
|
||||||
|
xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
|
||||||
|
super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, reset_noise_scale,
|
||||||
|
exclude_current_positions_from_observation)
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
|
||||||
|
self.current_step += 1
|
||||||
|
self.do_simulation(action, self.frame_skip)
|
||||||
|
|
||||||
|
height_after = self.get_body_com("torso")[2]
|
||||||
|
self.max_height = max(height_after, self.max_height)
|
||||||
|
|
||||||
|
## Didnt use fell_over, because base env also has no done condition - Paul and Marc
|
||||||
|
# fell_over = abs(self.sim.data.qpos[2]) > 2.5 # how to figure out if the cheetah fell over? -> 2.5 oke?
|
||||||
|
# TODO: Should a fall over be checked herE?
|
||||||
|
done = False
|
||||||
|
|
||||||
|
ctrl_cost = self.control_cost(action)
|
||||||
|
costs = ctrl_cost
|
||||||
|
|
||||||
|
if self.current_step == self.max_episode_steps:
|
||||||
|
height_goal_distance = -10*np.linalg.norm(self.max_height - self.goal) + 1e-8 if self.context \
|
||||||
|
else self.max_height
|
||||||
|
rewards = self._forward_reward_weight * height_goal_distance
|
||||||
|
else:
|
||||||
|
rewards = 0
|
||||||
|
|
||||||
|
observation = self._get_obs()
|
||||||
|
reward = rewards - costs
|
||||||
|
info = {
|
||||||
|
'height': height_after,
|
||||||
|
'max_height': self.max_height
|
||||||
|
}
|
||||||
|
|
||||||
|
return observation, reward, done, info
|
||||||
|
|
||||||
|
def _get_obs(self):
|
||||||
|
return np.append(super()._get_obs(), self.goal)
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self.max_height = 0
|
||||||
|
self.current_step = 0
|
||||||
|
self.goal = np.random.uniform(1.1, 1.6, 1) # 1.1 1.6
|
||||||
|
return super().reset()
|
||||||
|
|
||||||
|
# overwrite reset_model to make it deterministic
|
||||||
|
def reset_model(self):
|
||||||
|
noise_low = -self._reset_noise_scale
|
||||||
|
noise_high = self._reset_noise_scale
|
||||||
|
|
||||||
|
qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
|
||||||
|
qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
|
||||||
|
|
||||||
|
self.set_state(qpos, qvel)
|
||||||
|
|
||||||
|
observation = self._get_obs()
|
||||||
|
return observation
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
render_mode = "human" # "human" or "partial" or "final"
|
||||||
|
env = ALRHalfCheetahJumpEnv()
|
||||||
|
obs = env.reset()
|
||||||
|
|
||||||
|
for i in range(2000):
|
||||||
|
# objective.load_result("/tmp/cma")
|
||||||
|
# test with random actions
|
||||||
|
ac = env.action_space.sample()
|
||||||
|
obs, rew, d, info = env.step(ac)
|
||||||
|
if i % 10 == 0:
|
||||||
|
env.render(mode=render_mode)
|
||||||
|
if d:
|
||||||
|
print('After ', i, ' steps, done: ', d)
|
||||||
|
env.reset()
|
||||||
|
|
||||||
|
env.close()
|
30
alr_envs/alr/mujoco/half_cheetah_jump/mp_wrapper.py
Normal file
30
alr_envs/alr/mujoco/half_cheetah_jump/mp_wrapper.py
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
from typing import Tuple, Union
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from mp_env_api import MPEnvWrapper
|
||||||
|
|
||||||
|
|
||||||
|
class MPWrapper(MPEnvWrapper):
|
||||||
|
@property
|
||||||
|
def active_obs(self):
|
||||||
|
return np.hstack([
|
||||||
|
[False] * 17,
|
||||||
|
[True] # goal height
|
||||||
|
])
|
||||||
|
|
||||||
|
@property
|
||||||
|
def current_pos(self) -> Union[float, int, np.ndarray]:
|
||||||
|
return self.env.sim.data.qpos[3:9].copy()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
|
return self.env.sim.data.qvel[3:9].copy()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
|
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def dt(self) -> Union[float, int]:
|
||||||
|
return self.env.dt
|
1
alr_envs/alr/mujoco/hopper_jump/__init__.py
Normal file
1
alr_envs/alr/mujoco/hopper_jump/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
from .mp_wrapper import MPWrapper
|
48
alr_envs/alr/mujoco/hopper_jump/assets/hopper.xml
Normal file
48
alr_envs/alr/mujoco/hopper_jump/assets/hopper.xml
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
<mujoco model="hopper">
|
||||||
|
<compiler angle="degree" coordinate="global" inertiafromgeom="true"/>
|
||||||
|
<default>
|
||||||
|
<joint armature="1" damping="1" limited="true"/>
|
||||||
|
<geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1" solimp=".8 .8 .01" solref=".02 1"/>
|
||||||
|
<motor ctrllimited="true" ctrlrange="-.4 .4"/>
|
||||||
|
</default>
|
||||||
|
<option integrator="RK4" timestep="0.002"/>
|
||||||
|
<visual>
|
||||||
|
<map znear="0.02"/>
|
||||||
|
</visual>
|
||||||
|
<worldbody>
|
||||||
|
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
|
||||||
|
<geom conaffinity="1" condim="3" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 .125" type="plane" material="MatPlane"/>
|
||||||
|
<body name="torso" pos="0 0 1.25">
|
||||||
|
<camera name="track" mode="trackcom" pos="0 -3 1" xyaxes="1 0 0 0 0 1"/>
|
||||||
|
<joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
|
||||||
|
<joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" ref="1.25" stiffness="0" type="slide"/>
|
||||||
|
<joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 1.25" stiffness="0" type="hinge"/>
|
||||||
|
<geom friction="0.9" fromto="0 0 1.45 0 0 1.05" name="torso_geom" size="0.05" type="capsule"/>
|
||||||
|
<body name="thigh" pos="0 0 1.05">
|
||||||
|
<joint axis="0 -1 0" name="thigh_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
|
||||||
|
<geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_geom" size="0.05" type="capsule"/>
|
||||||
|
<body name="leg" pos="0 0 0.35">
|
||||||
|
<joint axis="0 -1 0" name="leg_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
|
||||||
|
<geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_geom" size="0.04" type="capsule"/>
|
||||||
|
<body name="foot" pos="0.13/2 0 0.1">
|
||||||
|
<joint axis="0 -1 0" name="foot_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
|
||||||
|
<geom friction="2.0" fromto="-0.13 0 0.1 0.26 0 0.1" name="foot_geom" size="0.06" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</worldbody>
|
||||||
|
<actuator>
|
||||||
|
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="thigh_joint"/>
|
||||||
|
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="leg_joint"/>
|
||||||
|
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="foot_joint"/>
|
||||||
|
</actuator>
|
||||||
|
<asset>
|
||||||
|
<texture type="skybox" builtin="gradient" rgb1=".4 .5 .6" rgb2="0 0 0"
|
||||||
|
width="100" height="100"/>
|
||||||
|
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
|
||||||
|
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
|
||||||
|
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
|
||||||
|
<material name="geom" texture="texgeom" texuniform="true"/>
|
||||||
|
</asset>
|
||||||
|
</mujoco>
|
@ -0,0 +1,51 @@
|
|||||||
|
<mujoco model="hopper">
|
||||||
|
<compiler angle="degree" coordinate="global" inertiafromgeom="true"/>
|
||||||
|
<default>
|
||||||
|
<joint armature="1" damping="1" limited="true"/>
|
||||||
|
<geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1" solimp=".8 .8 .01" solref=".02 1"/>
|
||||||
|
<motor ctrllimited="true" ctrlrange="-.4 .4"/>
|
||||||
|
</default>
|
||||||
|
<option integrator="RK4" timestep="0.002"/>
|
||||||
|
<visual>
|
||||||
|
<map znear="0.02"/>
|
||||||
|
</visual>
|
||||||
|
<worldbody>
|
||||||
|
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
|
||||||
|
<geom conaffinity="1" condim="3" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 .125" type="plane" material="MatPlane"/>
|
||||||
|
<body name="torso" pos="0 0 1.25">
|
||||||
|
<camera name="track" mode="trackcom" pos="0 -3 1" xyaxes="1 0 0 0 0 1"/>
|
||||||
|
<joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
|
||||||
|
<joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" ref="1.25" stiffness="0" type="slide"/>
|
||||||
|
<joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 1.25" stiffness="0" type="hinge"/>
|
||||||
|
<geom friction="0.9" fromto="0 0 1.45 0 0 1.05" name="torso_geom" size="0.05" type="capsule"/>
|
||||||
|
<body name="thigh" pos="0 0 1.05">
|
||||||
|
<joint axis="0 -1 0" name="thigh_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
|
||||||
|
<geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_geom" size="0.05" type="capsule"/>
|
||||||
|
<body name="leg" pos="0 0 0.35">
|
||||||
|
<joint axis="0 -1 0" name="leg_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
|
||||||
|
<geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_geom" size="0.04" type="capsule"/>
|
||||||
|
<body name="foot" pos="0.13/2 0 0.1">
|
||||||
|
<joint axis="0 -1 0" name="foot_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
|
||||||
|
<geom friction="2.0" fromto="-0.13 0 0.1 0.26 0 0.1" name="foot_geom" size="0.06" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
<body name="box" pos="1 0 0">
|
||||||
|
<geom friction="1.0" fromto="0.48 0 0 1 0 0" name="basket_ground_geom" size="0.3" type="box" rgba="1 0 0 1"/>
|
||||||
|
</body>
|
||||||
|
</worldbody>
|
||||||
|
<actuator>
|
||||||
|
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="thigh_joint"/>
|
||||||
|
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="leg_joint"/>
|
||||||
|
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="foot_joint"/>
|
||||||
|
</actuator>
|
||||||
|
<asset>
|
||||||
|
<texture type="skybox" builtin="gradient" rgb1=".4 .5 .6" rgb2="0 0 0"
|
||||||
|
width="100" height="100"/>
|
||||||
|
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
|
||||||
|
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
|
||||||
|
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
|
||||||
|
<material name="geom" texture="texgeom" texuniform="true"/>
|
||||||
|
</asset>
|
||||||
|
</mujoco>
|
110
alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
Normal file
110
alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
Normal file
@ -0,0 +1,110 @@
|
|||||||
|
from gym.envs.mujoco.hopper_v3 import HopperEnv
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
MAX_EPISODE_STEPS_HOPPERJUMP = 250
|
||||||
|
|
||||||
|
|
||||||
|
class ALRHopperJumpEnv(HopperEnv):
|
||||||
|
"""
|
||||||
|
Initialization changes to normal Hopper:
|
||||||
|
- healthy_reward: 1.0 -> 0.1 -> 0
|
||||||
|
- healthy_angle_range: (-0.2, 0.2) -> (-float('inf'), float('inf'))
|
||||||
|
- healthy_z_range: (0.7, float('inf')) -> (0.5, float('inf'))
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
xml_file='hopper.xml',
|
||||||
|
forward_reward_weight=1.0,
|
||||||
|
ctrl_cost_weight=1e-3,
|
||||||
|
healthy_reward=0.0,
|
||||||
|
penalty=0.0,
|
||||||
|
context=True,
|
||||||
|
terminate_when_unhealthy=True,
|
||||||
|
healthy_state_range=(-100.0, 100.0),
|
||||||
|
healthy_z_range=(0.5, float('inf')),
|
||||||
|
healthy_angle_range=(-float('inf'), float('inf')),
|
||||||
|
reset_noise_scale=5e-3,
|
||||||
|
exclude_current_positions_from_observation=True,
|
||||||
|
max_episode_steps=250):
|
||||||
|
self.current_step = 0
|
||||||
|
self.max_height = 0
|
||||||
|
self.max_episode_steps = max_episode_steps
|
||||||
|
self.penalty = penalty
|
||||||
|
self.goal = 0
|
||||||
|
self.context = context
|
||||||
|
self.exclude_current_positions_from_observation = exclude_current_positions_from_observation
|
||||||
|
super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
|
||||||
|
healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
|
||||||
|
exclude_current_positions_from_observation)
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
|
||||||
|
self.current_step += 1
|
||||||
|
self.do_simulation(action, self.frame_skip)
|
||||||
|
height_after = self.get_body_com("torso")[2]
|
||||||
|
self.max_height = max(height_after, self.max_height)
|
||||||
|
|
||||||
|
ctrl_cost = self.control_cost(action)
|
||||||
|
costs = ctrl_cost
|
||||||
|
done = False
|
||||||
|
|
||||||
|
if self.current_step >= self.max_episode_steps:
|
||||||
|
hight_goal_distance = -10*np.linalg.norm(self.max_height - self.goal) if self.context else self.max_height
|
||||||
|
healthy_reward = 0 if self.context else self.healthy_reward * self.current_step
|
||||||
|
height_reward = self._forward_reward_weight * hight_goal_distance # maybe move reward calculation into if structure and define two different _forward_reward_weight variables for context and episodic seperatley
|
||||||
|
rewards = height_reward + healthy_reward
|
||||||
|
|
||||||
|
else:
|
||||||
|
# penalty for wrong start direction of first two joints; not needed, could be removed
|
||||||
|
rewards = ((action[:2] > 0) * self.penalty).sum() if self.current_step < 10 else 0
|
||||||
|
|
||||||
|
observation = self._get_obs()
|
||||||
|
reward = rewards - costs
|
||||||
|
info = {
|
||||||
|
'height' : height_after,
|
||||||
|
'max_height': self.max_height,
|
||||||
|
'goal' : self.goal
|
||||||
|
}
|
||||||
|
|
||||||
|
return observation, reward, done, info
|
||||||
|
|
||||||
|
def _get_obs(self):
|
||||||
|
return np.append(super()._get_obs(), self.goal)
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self.goal = np.random.uniform(1.4, 2.3, 1) # 1.3 2.3
|
||||||
|
self.max_height = 0
|
||||||
|
self.current_step = 0
|
||||||
|
return super().reset()
|
||||||
|
|
||||||
|
# overwrite reset_model to make it deterministic
|
||||||
|
def reset_model(self):
|
||||||
|
noise_low = -self._reset_noise_scale
|
||||||
|
noise_high = self._reset_noise_scale
|
||||||
|
|
||||||
|
qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
|
||||||
|
qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
|
||||||
|
|
||||||
|
self.set_state(qpos, qvel)
|
||||||
|
|
||||||
|
observation = self._get_obs()
|
||||||
|
return observation
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
render_mode = "human" # "human" or "partial" or "final"
|
||||||
|
env = ALRHopperJumpEnv()
|
||||||
|
obs = env.reset()
|
||||||
|
|
||||||
|
for i in range(2000):
|
||||||
|
# objective.load_result("/tmp/cma")
|
||||||
|
# test with random actions
|
||||||
|
ac = env.action_space.sample()
|
||||||
|
obs, rew, d, info = env.step(ac)
|
||||||
|
if i % 10 == 0:
|
||||||
|
env.render(mode=render_mode)
|
||||||
|
if d:
|
||||||
|
print('After ', i, ' steps, done: ', d)
|
||||||
|
env.reset()
|
||||||
|
|
||||||
|
env.close()
|
170
alr_envs/alr/mujoco/hopper_jump/hopper_jump_on_box.py
Normal file
170
alr_envs/alr/mujoco/hopper_jump/hopper_jump_on_box.py
Normal file
@ -0,0 +1,170 @@
|
|||||||
|
from gym.envs.mujoco.hopper_v3 import HopperEnv
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import os
|
||||||
|
|
||||||
|
MAX_EPISODE_STEPS_HOPPERJUMPONBOX = 250
|
||||||
|
|
||||||
|
|
||||||
|
class ALRHopperJumpOnBoxEnv(HopperEnv):
|
||||||
|
"""
|
||||||
|
Initialization changes to normal Hopper:
|
||||||
|
- healthy_reward: 1.0 -> 0.01 -> 0.001
|
||||||
|
- healthy_angle_range: (-0.2, 0.2) -> (-float('inf'), float('inf'))
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
xml_file='hopper_jump_on_box.xml',
|
||||||
|
forward_reward_weight=1.0,
|
||||||
|
ctrl_cost_weight=1e-3,
|
||||||
|
healthy_reward=0.001,
|
||||||
|
terminate_when_unhealthy=True,
|
||||||
|
healthy_state_range=(-100.0, 100.0),
|
||||||
|
healthy_z_range=(0.7, float('inf')),
|
||||||
|
healthy_angle_range=(-float('inf'), float('inf')),
|
||||||
|
reset_noise_scale=5e-3,
|
||||||
|
context=True,
|
||||||
|
exclude_current_positions_from_observation=True,
|
||||||
|
max_episode_steps=250):
|
||||||
|
self.current_step = 0
|
||||||
|
self.max_height = 0
|
||||||
|
self.max_episode_steps = max_episode_steps
|
||||||
|
self.min_distance = 5000 # what value?
|
||||||
|
self.hopper_on_box = False
|
||||||
|
self.context = context
|
||||||
|
self.box_x = 1
|
||||||
|
xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
|
||||||
|
super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
|
||||||
|
healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
|
||||||
|
exclude_current_positions_from_observation)
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
|
||||||
|
self.current_step += 1
|
||||||
|
self.do_simulation(action, self.frame_skip)
|
||||||
|
height_after = self.get_body_com("torso")[2]
|
||||||
|
foot_pos = self.get_body_com("foot")
|
||||||
|
self.max_height = max(height_after, self.max_height)
|
||||||
|
|
||||||
|
vx, vz, vangle = self.sim.data.qvel[0:3]
|
||||||
|
|
||||||
|
s = self.state_vector()
|
||||||
|
fell_over = not (np.isfinite(s).all() and (np.abs(s[2:]) < 100).all() and (height_after > .7))
|
||||||
|
|
||||||
|
box_pos = self.get_body_com("box")
|
||||||
|
box_size = 0.3
|
||||||
|
box_height = 0.3
|
||||||
|
box_center = (box_pos[0] + (box_size / 2), box_pos[1], box_height)
|
||||||
|
foot_length = 0.3
|
||||||
|
foot_center = foot_pos[0] - (foot_length / 2)
|
||||||
|
|
||||||
|
dist = np.linalg.norm(foot_pos - box_center)
|
||||||
|
|
||||||
|
self.min_distance = min(dist, self.min_distance)
|
||||||
|
|
||||||
|
# check if foot is on box
|
||||||
|
is_on_box_x = box_pos[0] <= foot_center <= box_pos[0] + box_size
|
||||||
|
is_on_box_y = True # is y always true because he can only move in x and z direction?
|
||||||
|
is_on_box_z = box_height - 0.02 <= foot_pos[2] <= box_height + 0.02
|
||||||
|
is_on_box = is_on_box_x and is_on_box_y and is_on_box_z
|
||||||
|
if is_on_box: self.hopper_on_box = True
|
||||||
|
|
||||||
|
ctrl_cost = self.control_cost(action)
|
||||||
|
|
||||||
|
costs = ctrl_cost
|
||||||
|
|
||||||
|
done = fell_over or self.hopper_on_box
|
||||||
|
|
||||||
|
if self.current_step >= self.max_episode_steps or done:
|
||||||
|
done = False
|
||||||
|
|
||||||
|
max_height = self.max_height.copy()
|
||||||
|
min_distance = self.min_distance.copy()
|
||||||
|
|
||||||
|
alive_bonus = self._healthy_reward * self.current_step
|
||||||
|
box_bonus = 0
|
||||||
|
rewards = 0
|
||||||
|
|
||||||
|
# TODO explain what we did here for the calculation of the reward
|
||||||
|
if is_on_box:
|
||||||
|
if self.context:
|
||||||
|
rewards -= 100 * vx ** 2 if 100 * vx ** 2 < 1 else 1
|
||||||
|
else:
|
||||||
|
box_bonus = 10
|
||||||
|
rewards += box_bonus
|
||||||
|
# rewards -= dist * dist ???? why when already on box?
|
||||||
|
# reward -= 90 - abs(angle)
|
||||||
|
rewards -= 100 * vx ** 2 if 100 * vx ** 2 < 1 else 1
|
||||||
|
rewards += max_height * 3
|
||||||
|
rewards += alive_bonus
|
||||||
|
|
||||||
|
else:
|
||||||
|
if self.context:
|
||||||
|
rewards = -10 - min_distance
|
||||||
|
rewards += max_height * 3
|
||||||
|
else:
|
||||||
|
# reward -= (dist*dist)
|
||||||
|
rewards -= min_distance * min_distance
|
||||||
|
# rewards -= dist / self.max_distance
|
||||||
|
rewards += max_height
|
||||||
|
rewards += alive_bonus
|
||||||
|
|
||||||
|
else:
|
||||||
|
rewards = 0
|
||||||
|
|
||||||
|
observation = self._get_obs()
|
||||||
|
reward = rewards - costs
|
||||||
|
info = {
|
||||||
|
'height': height_after,
|
||||||
|
'max_height': self.max_height.copy(),
|
||||||
|
'min_distance': self.min_distance,
|
||||||
|
'goal': self.box_x,
|
||||||
|
}
|
||||||
|
|
||||||
|
return observation, reward, done, info
|
||||||
|
|
||||||
|
def _get_obs(self):
|
||||||
|
return np.append(super()._get_obs(), self.box_x)
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
|
||||||
|
self.max_height = 0
|
||||||
|
self.min_distance = 5000
|
||||||
|
self.current_step = 0
|
||||||
|
self.hopper_on_box = False
|
||||||
|
if self.context:
|
||||||
|
box_id = self.sim.model.body_name2id("box")
|
||||||
|
self.box_x = np.random.uniform(1, 3, 1)
|
||||||
|
self.sim.model.body_pos[box_id] = [self.box_x, 0, 0]
|
||||||
|
return super().reset()
|
||||||
|
|
||||||
|
# overwrite reset_model to make it deterministic
|
||||||
|
def reset_model(self):
|
||||||
|
noise_low = -self._reset_noise_scale
|
||||||
|
noise_high = self._reset_noise_scale
|
||||||
|
|
||||||
|
qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
|
||||||
|
qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
|
||||||
|
|
||||||
|
self.set_state(qpos, qvel)
|
||||||
|
|
||||||
|
observation = self._get_obs()
|
||||||
|
return observation
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
render_mode = "human" # "human" or "partial" or "final"
|
||||||
|
env = ALRHopperJumpOnBoxEnv()
|
||||||
|
obs = env.reset()
|
||||||
|
|
||||||
|
for i in range(2000):
|
||||||
|
# objective.load_result("/tmp/cma")
|
||||||
|
# test with random actions
|
||||||
|
ac = env.action_space.sample()
|
||||||
|
obs, rew, d, info = env.step(ac)
|
||||||
|
if i % 10 == 0:
|
||||||
|
env.render(mode=render_mode)
|
||||||
|
if d:
|
||||||
|
print('After ', i, ' steps, done: ', d)
|
||||||
|
env.reset()
|
||||||
|
|
||||||
|
env.close()
|
31
alr_envs/alr/mujoco/hopper_jump/mp_wrapper.py
Normal file
31
alr_envs/alr/mujoco/hopper_jump/mp_wrapper.py
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
from typing import Tuple, Union
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from mp_env_api import MPEnvWrapper
|
||||||
|
|
||||||
|
|
||||||
|
class MPWrapper(MPEnvWrapper):
|
||||||
|
@property
|
||||||
|
def active_obs(self):
|
||||||
|
return np.hstack([
|
||||||
|
[False] * (5 + int(not self.exclude_current_positions_from_observation)), # position
|
||||||
|
[False] * 6, # velocity
|
||||||
|
[True]
|
||||||
|
])
|
||||||
|
|
||||||
|
@property
|
||||||
|
def current_pos(self) -> Union[float, int, np.ndarray]:
|
||||||
|
return self.env.sim.data.qpos[3:6].copy()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
|
return self.env.sim.data.qvel[3:6].copy()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
|
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def dt(self) -> Union[float, int]:
|
||||||
|
return self.env.dt
|
1
alr_envs/alr/mujoco/hopper_throw/__init__.py
Normal file
1
alr_envs/alr/mujoco/hopper_throw/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
from .mp_wrapper import MPWrapper
|
56
alr_envs/alr/mujoco/hopper_throw/assets/hopper_throw.xml
Normal file
56
alr_envs/alr/mujoco/hopper_throw/assets/hopper_throw.xml
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
<mujoco model="hopper">
|
||||||
|
<compiler angle="degree" coordinate="global" inertiafromgeom="true"/>
|
||||||
|
<default>
|
||||||
|
<joint armature="1" damping="1" limited="true"/>
|
||||||
|
<geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1" solimp=".8 .8 .01" solref=".02 1"/>
|
||||||
|
<motor ctrllimited="true" ctrlrange="-.4 .4"/>
|
||||||
|
</default>
|
||||||
|
<option integrator="RK4" timestep="0.002"/>
|
||||||
|
<visual>
|
||||||
|
<map znear="0.02"/>
|
||||||
|
</visual>
|
||||||
|
<worldbody>
|
||||||
|
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
|
||||||
|
<geom conaffinity="1" condim="3" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 .125" type="plane" material="MatPlane"/>
|
||||||
|
<body name="torso" pos="0 0 1.25">
|
||||||
|
<camera name="track" mode="trackcom" pos="0 -3 1" xyaxes="1 0 0 0 0 1"/>
|
||||||
|
<joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
|
||||||
|
<joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" ref="1.25" stiffness="0" type="slide"/>
|
||||||
|
<joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 1.25" stiffness="0" type="hinge"/>
|
||||||
|
<geom friction="0.9" fromto="0 0 1.45 0 0 1.05" name="torso_geom" size="0.05" type="capsule"/>
|
||||||
|
<body name="thigh" pos="0 0 1.05">
|
||||||
|
<joint axis="0 -1 0" name="thigh_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
|
||||||
|
<geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_geom" size="0.05" type="capsule"/>
|
||||||
|
<body name="leg" pos="0 0 0.35">
|
||||||
|
<joint axis="0 -1 0" name="leg_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
|
||||||
|
<geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_geom" size="0.04" type="capsule"/>
|
||||||
|
<body name="foot" pos="0.13/2 0 0.1">
|
||||||
|
<joint axis="0 -1 0" name="foot_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
|
||||||
|
<geom friction="2.0" fromto="-0.13 0 0.1 0.26 0 0.1" name="foot_geom" size="0.06" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
<body name="ball" pos="0 0 1.53">
|
||||||
|
<joint armature="0" axis="1 0 0" damping="0.0" name="tar:x" pos="0 0 1.53" stiffness="0" type="slide" frictionloss="0" limited="false"/>
|
||||||
|
<joint armature="0" axis="0 1 0" damping="0.0" name="tar:y" pos="0 0 1.53" stiffness="0" type="slide" frictionloss="0" limited="false"/>
|
||||||
|
<joint armature="0" axis="0 0 1" damping="0.0" name="tar:z" pos="0 0 1.53" stiffness="0" type="slide" frictionloss="0" limited="false"/>
|
||||||
|
<geom pos="0 0 1.53" priority= "1" size="0.025 0.025 0.025" type="sphere" condim="4" name="ball_geom" rgba="0.8 0.2 0.1 1" mass="0.1"
|
||||||
|
friction="0.1 0.1 0.1" solimp="0.9 0.95 0.001 0.5 2" solref="-10000 -10"/>
|
||||||
|
<site name="target_ball" pos="0 0 1.53" size="0.04 0.04 0.04" rgba="1 0 0 1" type="sphere"/>
|
||||||
|
</body>
|
||||||
|
</worldbody>
|
||||||
|
<actuator>
|
||||||
|
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="thigh_joint"/>
|
||||||
|
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="leg_joint"/>
|
||||||
|
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="foot_joint"/>
|
||||||
|
</actuator>
|
||||||
|
<asset>
|
||||||
|
<texture type="skybox" builtin="gradient" rgb1=".4 .5 .6" rgb2="0 0 0"
|
||||||
|
width="100" height="100"/>
|
||||||
|
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
|
||||||
|
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
|
||||||
|
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
|
||||||
|
<material name="geom" texture="texgeom" texuniform="true"/>
|
||||||
|
</asset>
|
||||||
|
</mujoco>
|
@ -0,0 +1,132 @@
|
|||||||
|
<mujoco model="hopper">
|
||||||
|
<compiler angle="degree" coordinate="global" inertiafromgeom="true"/>
|
||||||
|
<default>
|
||||||
|
<joint armature="1" damping="1" limited="true"/>
|
||||||
|
<geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1" solimp=".8 .8 .01" solref=".02 1"/>
|
||||||
|
<motor ctrllimited="true" ctrlrange="-.4 .4"/>
|
||||||
|
</default>
|
||||||
|
<option integrator="RK4" timestep="0.002"/>
|
||||||
|
<visual>
|
||||||
|
<map znear="0.02"/>
|
||||||
|
</visual>
|
||||||
|
<worldbody>
|
||||||
|
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
|
||||||
|
<geom conaffinity="1" condim="3" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 .125" type="plane" material="MatPlane"/>
|
||||||
|
<body name="torso" pos="0 0 1.25">
|
||||||
|
<camera name="track" mode="trackcom" pos="0 -3 1" xyaxes="1 0 0 0 0 1"/>
|
||||||
|
<joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
|
||||||
|
<joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" ref="1.25" stiffness="0" type="slide"/>
|
||||||
|
<joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 1.25" stiffness="0" type="hinge"/>
|
||||||
|
<geom friction="0.9" fromto="0 0 1.45 0 0 1.05" name="torso_geom" size="0.05" type="capsule"/>
|
||||||
|
<body name="thigh" pos="0 0 1.05">
|
||||||
|
<joint axis="0 -1 0" name="thigh_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
|
||||||
|
<geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_geom" size="0.05" type="capsule"/>
|
||||||
|
<body name="leg" pos="0 0 0.35">
|
||||||
|
<joint axis="0 -1 0" name="leg_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
|
||||||
|
<geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_geom" size="0.04" type="capsule"/>
|
||||||
|
<body name="foot" pos="0.13/2 0 0.1">
|
||||||
|
<joint axis="0 -1 0" name="foot_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
|
||||||
|
<geom friction="2.0" fromto="-0.13 0 0.1 0.26 0 0.1" name="foot_geom" size="0.06" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
<body name="ball" pos="0 0 1.53">
|
||||||
|
<joint armature="0" axis="1 0 0" damping="0.0" name="tar:x" pos="0 0 1.53" stiffness="0" type="slide" frictionloss="0" limited="false"/>
|
||||||
|
<joint armature="0" axis="0 1 0" damping="0.0" name="tar:y" pos="0 0 1.53" stiffness="0" type="slide" frictionloss="0" limited="false"/>
|
||||||
|
<joint armature="0" axis="0 0 1" damping="0.0" name="tar:z" pos="0 0 1.53" stiffness="0" type="slide" frictionloss="0" limited="false"/>
|
||||||
|
<geom pos="0 0 1.53" priority= "1" size="0.025 0.025 0.025" type="sphere" condim="4" name="ball_geom" rgba="0.8 0.2 0.1 1" mass="0.1"
|
||||||
|
friction="0.1 0.1 0.1" solimp="0.9 0.95 0.001 0.5 2" solref="-10000 -10"/>
|
||||||
|
<site name="target_ball" pos="0 0 1.53" size="0.04 0.04 0.04" rgba="1 0 0 1" type="sphere"/>
|
||||||
|
</body>
|
||||||
|
<body name="basket_ground" pos="5 0 0">
|
||||||
|
<geom friction="0.9" fromto="5 0 0 5.3 0 0" name="basket_ground_geom" size="0.1 0.4 0.3" type="box"/>
|
||||||
|
<body name="edge1" pos="5 0 0">
|
||||||
|
<geom friction="2.0" fromto="5 0 0 5 0 0.2" name="edge1_geom" size="0.04" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
<body name="edge2" pos="5 0 0.05">
|
||||||
|
<geom friction="2.0" fromto="5 0.05 0 5 0.05 0.2" name="edge2_geom" size="0.04" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
<body name="edge3" pos="5 0 0.1">
|
||||||
|
<geom friction="2.0" fromto="5 0.1 0 5 0.1 0.2" name="edge3_geom" size="0.04" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
<body name="edge4" pos="5 0 0.15">
|
||||||
|
<geom friction="2.0" fromto="5 0.15 0 5 0.15 0.2" name="edge4_geom" size="0.04" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
<body name="edge5" pos="5.05 0 0.15">
|
||||||
|
<geom friction="2.0" fromto="5.05 0.15 0 5.05 0.15 0.2" name="edge5_geom" size="0.04" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
<body name="edge6" pos="5.1 0 0.15">
|
||||||
|
<geom friction="2.0" fromto="5.1 0.15 0 5.1 0.15 0.2" name="edge6_geom" size="0.04" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
<body name="edge7" pos="5.15 0 0.15">
|
||||||
|
<geom friction="2.0" fromto="5.15 0.15 0 5.15 0.15 0.2" name="edge7_geom" size="0.04" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
<body name="edge8" pos="5.2 0 0.15">
|
||||||
|
<geom friction="2.0" fromto="5.2 0.15 0 5.2 0.15 0.2" name="edge8_geom" size="0.04" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
<body name="edge9" pos="5.25 0 0.15">
|
||||||
|
<geom friction="2.0" fromto="5.25 0.15 0 5.25 0.15 0.2" name="edge9_geom" size="0.04" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
<body name="edge10" pos="5.3 0 0.15">
|
||||||
|
<geom friction="2.0" fromto="5.3 0.15 0 5.3 0.15 0.2" name="edge10_geom" size="0.04" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
<body name="edge11" pos="5.3 0 0.1">
|
||||||
|
<geom friction="2.0" fromto="5.3 0.1 0 5.3 0.1 0.2" name="edge11_geom" size="0.04" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
<body name="edge12" pos="5.3 0 0.05">
|
||||||
|
<geom friction="2.0" fromto="5.3 0.05 0 5.3 0.05 0.2" name="edge12_geom" size="0.04" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
<body name="edge13" pos="5.3 0 0.0">
|
||||||
|
<geom friction="2.0" fromto="5.3 0 0 5.3 0 0.2" name="edge13_geom" size="0.04" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
<body name="edge14" pos="5.3 0 -0.05">
|
||||||
|
<geom friction="2.0" fromto="5.3 -0.05 0 5.3 -0.05 0.2" name="edge14_geom" size="0.04" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
<body name="edge15" pos="5.3 0 -0.1">
|
||||||
|
<geom friction="2.0" fromto="5.3 -0.1 0 5.3 -0.1 0.2" name="edge15_geom" size="0.04" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
<body name="edge16" pos="5.3 0 -0.15">
|
||||||
|
<geom friction="2.0" fromto="5.3 -0.15 0 5.3 -0.15 0.2" name="edge16_geom" size="0.04" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
<body name="edge20" pos="5.25 0 -0.15">
|
||||||
|
<geom friction="2.0" fromto="5.25 -0.15 0 5.25 -0.15 0.2" name="edge20_geom" size="0.04" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
<body name="edge21" pos="5.2 0 -0.15">
|
||||||
|
<geom friction="2.0" fromto="5.2 -0.15 0 5.2 -0.15 0.2" name="edge21_geom" size="0.04" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
<body name="edge22" pos="5.15 0 -0.15">
|
||||||
|
<geom friction="2.0" fromto="5.15 -0.15 0 5.15 -0.15 0.2" name="edge22_geom" size="0.04" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
<body name="edge23" pos="5.1 0 -0.15">
|
||||||
|
<geom friction="2.0" fromto="5.1 -0.15 0 5.1 -0.15 0.2" name="edge23_geom" size="0.04" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
<body name="edge24" pos="5.05 0 -0.15">
|
||||||
|
<geom friction="2.0" fromto="5.05 -0.15 0 5.05 -0.15 0.2" name="edge24_geom" size="0.04" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
<body name="edge25" pos="5 0 -0.15">
|
||||||
|
<geom friction="2.0" fromto="5 -0.15 0 5 -0.15 0.2" name="edge25_geom" size="0.04" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
<body name="edge26" pos="5 0 -0.1">
|
||||||
|
<geom friction="2.0" fromto="5 -0.1 0 5 -0.1 0.2" name="edge26_geom" size="0.04" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
<body name="edge27" pos="5 0 -0.05">
|
||||||
|
<geom friction="2.0" fromto="5 -0.05 0 5 -0.05 0.2" name="edge27_geom" size="0.04" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</worldbody>
|
||||||
|
<actuator>
|
||||||
|
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="thigh_joint"/>
|
||||||
|
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="leg_joint"/>
|
||||||
|
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="foot_joint"/>
|
||||||
|
</actuator>
|
||||||
|
<asset>
|
||||||
|
<texture type="skybox" builtin="gradient" rgb1=".4 .5 .6" rgb2="0 0 0"
|
||||||
|
width="100" height="100"/>
|
||||||
|
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
|
||||||
|
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
|
||||||
|
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
|
||||||
|
<material name="geom" texture="texgeom" texuniform="true"/>
|
||||||
|
</asset>
|
||||||
|
</mujoco>
|
113
alr_envs/alr/mujoco/hopper_throw/hopper_throw.py
Normal file
113
alr_envs/alr/mujoco/hopper_throw/hopper_throw.py
Normal file
@ -0,0 +1,113 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
from gym.envs.mujoco.hopper_v3 import HopperEnv
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
MAX_EPISODE_STEPS_HOPPERTHROW = 250
|
||||||
|
|
||||||
|
|
||||||
|
class ALRHopperThrowEnv(HopperEnv):
|
||||||
|
"""
|
||||||
|
Initialization changes to normal Hopper:
|
||||||
|
- healthy_reward: 1.0 -> 0.0 -> 0.1
|
||||||
|
- forward_reward_weight -> 5.0
|
||||||
|
- healthy_angle_range: (-0.2, 0.2) -> (-float('inf'), float('inf'))
|
||||||
|
|
||||||
|
Reward changes to normal Hopper:
|
||||||
|
- velocity: (x_position_after - x_position_before) -> self.get_body_com("ball")[0]
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
xml_file='hopper_throw.xml',
|
||||||
|
forward_reward_weight=5.0,
|
||||||
|
ctrl_cost_weight=1e-3,
|
||||||
|
healthy_reward=0.1,
|
||||||
|
terminate_when_unhealthy=True,
|
||||||
|
healthy_state_range=(-100.0, 100.0),
|
||||||
|
healthy_z_range=(0.7, float('inf')),
|
||||||
|
healthy_angle_range=(-float('inf'), float('inf')),
|
||||||
|
reset_noise_scale=5e-3,
|
||||||
|
context = True,
|
||||||
|
exclude_current_positions_from_observation=True,
|
||||||
|
max_episode_steps=250):
|
||||||
|
xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
|
||||||
|
self.current_step = 0
|
||||||
|
self.max_episode_steps = max_episode_steps
|
||||||
|
self.context = context
|
||||||
|
self.goal = 0
|
||||||
|
super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
|
||||||
|
healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
|
||||||
|
exclude_current_positions_from_observation)
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
|
||||||
|
self.current_step += 1
|
||||||
|
self.do_simulation(action, self.frame_skip)
|
||||||
|
ball_pos_after = self.get_body_com("ball")[0] #abs(self.get_body_com("ball")[0]) # use x and y to get point and use euclid distance as reward?
|
||||||
|
ball_pos_after_y = self.get_body_com("ball")[2]
|
||||||
|
|
||||||
|
# done = self.done TODO We should use this, not sure why there is no other termination; ball_landed should be enough, because we only look at the throw itself? - Paul and Marc
|
||||||
|
ball_landed = self.get_body_com("ball")[2] <= 0.05
|
||||||
|
done = ball_landed
|
||||||
|
|
||||||
|
ctrl_cost = self.control_cost(action)
|
||||||
|
costs = ctrl_cost
|
||||||
|
|
||||||
|
rewards = 0
|
||||||
|
|
||||||
|
if self.current_step >= self.max_episode_steps or done:
|
||||||
|
distance_reward = -np.linalg.norm(ball_pos_after - self.goal) if self.context else \
|
||||||
|
self._forward_reward_weight * ball_pos_after
|
||||||
|
healthy_reward = 0 if self.context else self.healthy_reward * self.current_step
|
||||||
|
|
||||||
|
rewards = distance_reward + healthy_reward
|
||||||
|
|
||||||
|
observation = self._get_obs()
|
||||||
|
reward = rewards - costs
|
||||||
|
info = {
|
||||||
|
'ball_pos': ball_pos_after,
|
||||||
|
'ball_pos_y': ball_pos_after_y,
|
||||||
|
'current_step' : self.current_step,
|
||||||
|
'goal' : self.goal,
|
||||||
|
}
|
||||||
|
|
||||||
|
return observation, reward, done, info
|
||||||
|
|
||||||
|
def _get_obs(self):
|
||||||
|
return np.append(super()._get_obs(), self.goal)
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self.current_step = 0
|
||||||
|
self.goal = self.goal = np.random.uniform(2.0, 6.0, 1) # 0.5 8.0
|
||||||
|
return super().reset()
|
||||||
|
|
||||||
|
# overwrite reset_model to make it deterministic
|
||||||
|
def reset_model(self):
|
||||||
|
noise_low = -self._reset_noise_scale
|
||||||
|
noise_high = self._reset_noise_scale
|
||||||
|
|
||||||
|
qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
|
||||||
|
qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
|
||||||
|
|
||||||
|
self.set_state(qpos, qvel)
|
||||||
|
|
||||||
|
observation = self._get_obs()
|
||||||
|
return observation
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
render_mode = "human" # "human" or "partial" or "final"
|
||||||
|
env = ALRHopperThrowEnv()
|
||||||
|
obs = env.reset()
|
||||||
|
|
||||||
|
for i in range(2000):
|
||||||
|
# objective.load_result("/tmp/cma")
|
||||||
|
# test with random actions
|
||||||
|
ac = env.action_space.sample()
|
||||||
|
obs, rew, d, info = env.step(ac)
|
||||||
|
if i % 10 == 0:
|
||||||
|
env.render(mode=render_mode)
|
||||||
|
if d:
|
||||||
|
print('After ', i, ' steps, done: ', d)
|
||||||
|
env.reset()
|
||||||
|
|
||||||
|
env.close()
|
143
alr_envs/alr/mujoco/hopper_throw/hopper_throw_in_basket.py
Normal file
143
alr_envs/alr/mujoco/hopper_throw/hopper_throw_in_basket.py
Normal file
@ -0,0 +1,143 @@
|
|||||||
|
import os
|
||||||
|
from gym.envs.mujoco.hopper_v3 import HopperEnv
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
MAX_EPISODE_STEPS_HOPPERTHROWINBASKET = 250
|
||||||
|
|
||||||
|
|
||||||
|
class ALRHopperThrowInBasketEnv(HopperEnv):
|
||||||
|
"""
|
||||||
|
Initialization changes to normal Hopper:
|
||||||
|
- healthy_reward: 1.0 -> 0.0
|
||||||
|
- healthy_angle_range: (-0.2, 0.2) -> (-float('inf'), float('inf'))
|
||||||
|
- hit_basket_reward: - -> 10
|
||||||
|
|
||||||
|
Reward changes to normal Hopper:
|
||||||
|
- velocity: (x_position_after - x_position_before) -> (ball_position_after - ball_position_before)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
xml_file='hopper_throw_in_basket.xml',
|
||||||
|
forward_reward_weight=1.0,
|
||||||
|
ctrl_cost_weight=1e-3,
|
||||||
|
healthy_reward=0.0,
|
||||||
|
hit_basket_reward=10,
|
||||||
|
basket_size=0.3,
|
||||||
|
terminate_when_unhealthy=True,
|
||||||
|
healthy_state_range=(-100.0, 100.0),
|
||||||
|
healthy_z_range=(0.7, float('inf')),
|
||||||
|
healthy_angle_range=(-float('inf'), float('inf')),
|
||||||
|
reset_noise_scale=5e-3,
|
||||||
|
context=True,
|
||||||
|
penalty=0.0,
|
||||||
|
exclude_current_positions_from_observation=True,
|
||||||
|
max_episode_steps = 250):
|
||||||
|
self.hit_basket_reward = hit_basket_reward
|
||||||
|
self.current_step = 0
|
||||||
|
self.max_episode_steps = max_episode_steps
|
||||||
|
self.ball_in_basket = False
|
||||||
|
self.basket_size = basket_size
|
||||||
|
self.context = context
|
||||||
|
self.penalty = penalty
|
||||||
|
self.basket_x = 5
|
||||||
|
xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
|
||||||
|
super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
|
||||||
|
healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
|
||||||
|
exclude_current_positions_from_observation)
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
|
||||||
|
self.current_step += 1
|
||||||
|
self.do_simulation(action, self.frame_skip)
|
||||||
|
ball_pos = self.get_body_com("ball")
|
||||||
|
|
||||||
|
basket_pos = self.get_body_com("basket_ground")
|
||||||
|
basket_center = (basket_pos[0] + 0.5, basket_pos[1], basket_pos[2])
|
||||||
|
|
||||||
|
is_in_basket_x = ball_pos[0] >= basket_pos[0] and ball_pos[0] <= basket_pos[0] + self.basket_size
|
||||||
|
is_in_basket_y = ball_pos[1] >= basket_pos[1] - (self.basket_size/2) and ball_pos[1] <= basket_pos[1] + (self.basket_size/2)
|
||||||
|
is_in_basket_z = ball_pos[2] < 0.1
|
||||||
|
is_in_basket = is_in_basket_x and is_in_basket_y and is_in_basket_z
|
||||||
|
if is_in_basket: self.ball_in_basket = True
|
||||||
|
|
||||||
|
ball_landed = self.get_body_com("ball")[2] <= 0.05
|
||||||
|
done = ball_landed or is_in_basket
|
||||||
|
|
||||||
|
rewards = 0
|
||||||
|
|
||||||
|
ctrl_cost = self.control_cost(action)
|
||||||
|
|
||||||
|
costs = ctrl_cost
|
||||||
|
|
||||||
|
if self.current_step >= self.max_episode_steps or done:
|
||||||
|
|
||||||
|
if is_in_basket:
|
||||||
|
if not self.context:
|
||||||
|
rewards += self.hit_basket_reward
|
||||||
|
else:
|
||||||
|
dist = np.linalg.norm(ball_pos-basket_center)
|
||||||
|
if self.context:
|
||||||
|
rewards = -10 * dist
|
||||||
|
else:
|
||||||
|
rewards -= (dist*dist)
|
||||||
|
else:
|
||||||
|
# penalty not needed
|
||||||
|
rewards += ((action[:2] > 0) * self.penalty).sum() if self.current_step < 10 else 0 #too much of a penalty?
|
||||||
|
|
||||||
|
observation = self._get_obs()
|
||||||
|
reward = rewards - costs
|
||||||
|
info = {
|
||||||
|
'ball_pos': ball_pos[0],
|
||||||
|
}
|
||||||
|
|
||||||
|
return observation, reward, done, info
|
||||||
|
|
||||||
|
def _get_obs(self):
|
||||||
|
return np.append(super()._get_obs(), self.basket_x)
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
if self.max_episode_steps == 10:
|
||||||
|
# We have to initialize this here, because the spec is only added after creating the env.
|
||||||
|
self.max_episode_steps = self.spec.max_episode_steps
|
||||||
|
|
||||||
|
self.current_step = 0
|
||||||
|
self.ball_in_basket = False
|
||||||
|
if self.context:
|
||||||
|
basket_id = self.sim.model.body_name2id("basket_ground")
|
||||||
|
self.basket_x = np.random.uniform(3, 7, 1)
|
||||||
|
self.sim.model.body_pos[basket_id] = [self.basket_x, 0, 0]
|
||||||
|
return super().reset()
|
||||||
|
|
||||||
|
# overwrite reset_model to make it deterministic
|
||||||
|
def reset_model(self):
|
||||||
|
noise_low = -self._reset_noise_scale
|
||||||
|
noise_high = self._reset_noise_scale
|
||||||
|
|
||||||
|
qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
|
||||||
|
qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
|
||||||
|
|
||||||
|
self.set_state(qpos, qvel)
|
||||||
|
|
||||||
|
observation = self._get_obs()
|
||||||
|
return observation
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
render_mode = "human" # "human" or "partial" or "final"
|
||||||
|
env = ALRHopperThrowInBasketEnv()
|
||||||
|
obs = env.reset()
|
||||||
|
|
||||||
|
for i in range(2000):
|
||||||
|
# objective.load_result("/tmp/cma")
|
||||||
|
# test with random actions
|
||||||
|
ac = env.action_space.sample()
|
||||||
|
obs, rew, d, info = env.step(ac)
|
||||||
|
if i % 10 == 0:
|
||||||
|
env.render(mode=render_mode)
|
||||||
|
if d:
|
||||||
|
print('After ', i, ' steps, done: ', d)
|
||||||
|
env.reset()
|
||||||
|
|
||||||
|
env.close()
|
30
alr_envs/alr/mujoco/hopper_throw/mp_wrapper.py
Normal file
30
alr_envs/alr/mujoco/hopper_throw/mp_wrapper.py
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
from typing import Tuple, Union
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from mp_env_api import MPEnvWrapper
|
||||||
|
|
||||||
|
|
||||||
|
class MPWrapper(MPEnvWrapper):
|
||||||
|
@property
|
||||||
|
def active_obs(self):
|
||||||
|
return np.hstack([
|
||||||
|
[False] * 17,
|
||||||
|
[True] # goal pos
|
||||||
|
])
|
||||||
|
|
||||||
|
@property
|
||||||
|
def current_pos(self) -> Union[float, int, np.ndarray]:
|
||||||
|
return self.env.sim.data.qpos[3:6].copy()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
|
return self.env.sim.data.qvel[3:6].copy()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
|
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def dt(self) -> Union[float, int]:
|
||||||
|
return self.env.dt
|
1
alr_envs/alr/mujoco/walker_2d_jump/__init__.py
Normal file
1
alr_envs/alr/mujoco/walker_2d_jump/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
from .mp_wrapper import MPWrapper
|
62
alr_envs/alr/mujoco/walker_2d_jump/assets/walker2d.xml
Normal file
62
alr_envs/alr/mujoco/walker_2d_jump/assets/walker2d.xml
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
<mujoco model="walker2d">
|
||||||
|
<compiler angle="degree" coordinate="global" inertiafromgeom="true"/>
|
||||||
|
<default>
|
||||||
|
<joint armature="0.01" damping=".1" limited="true"/>
|
||||||
|
<geom conaffinity="0" condim="3" contype="1" density="1000" friction=".7 .1 .1" rgba="0.8 0.6 .4 1"/>
|
||||||
|
</default>
|
||||||
|
<option integrator="RK4" timestep="0.002"/>
|
||||||
|
<worldbody>
|
||||||
|
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
|
||||||
|
<geom conaffinity="1" condim="3" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane" material="MatPlane"/>
|
||||||
|
<body name="torso" pos="0 0 1.25">
|
||||||
|
<camera name="track" mode="trackcom" pos="0 -3 1" xyaxes="1 0 0 0 0 1"/>
|
||||||
|
<joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
|
||||||
|
<joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" ref="1.25" stiffness="0" type="slide"/>
|
||||||
|
<joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 1.25" stiffness="0" type="hinge"/>
|
||||||
|
<geom friction="0.9" fromto="0 0 1.45 0 0 1.05" name="torso_geom" size="0.05" type="capsule"/>
|
||||||
|
<body name="thigh" pos="0 0 1.05">
|
||||||
|
<joint axis="0 -1 0" name="thigh_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
|
||||||
|
<geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_geom" size="0.05" type="capsule"/>
|
||||||
|
<body name="leg" pos="0 0 0.35">
|
||||||
|
<joint axis="0 -1 0" name="leg_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
|
||||||
|
<geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_geom" size="0.04" type="capsule"/>
|
||||||
|
<body name="foot" pos="0.2/2 0 0.1">
|
||||||
|
<joint axis="0 -1 0" name="foot_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
|
||||||
|
<geom friction="0.9" fromto="-0.0 0 0.1 0.2 0 0.1" name="foot_geom" size="0.06" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
<!-- copied and then replace thigh->thigh_left, leg->leg_left, foot->foot_right -->
|
||||||
|
<body name="thigh_left" pos="0 0 1.05">
|
||||||
|
<joint axis="0 -1 0" name="thigh_left_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
|
||||||
|
<geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_left_geom" rgba=".7 .3 .6 1" size="0.05" type="capsule"/>
|
||||||
|
<body name="leg_left" pos="0 0 0.35">
|
||||||
|
<joint axis="0 -1 0" name="leg_left_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
|
||||||
|
<geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_left_geom" rgba=".7 .3 .6 1" size="0.04" type="capsule"/>
|
||||||
|
<body name="foot_left" pos="0.2/2 0 0.1">
|
||||||
|
<joint axis="0 -1 0" name="foot_left_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
|
||||||
|
<geom friction="1.9" fromto="-0.0 0 0.1 0.2 0 0.1" name="foot_left_geom" rgba=".7 .3 .6 1" size="0.06" type="capsule"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</worldbody>
|
||||||
|
<actuator>
|
||||||
|
<!-- <motor joint="torso_joint" ctrlrange="-100.0 100.0" isctrllimited="true"/>-->
|
||||||
|
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="thigh_joint"/>
|
||||||
|
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="leg_joint"/>
|
||||||
|
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="foot_joint"/>
|
||||||
|
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="thigh_left_joint"/>
|
||||||
|
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="leg_left_joint"/>
|
||||||
|
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="foot_left_joint"/>
|
||||||
|
<!-- <motor joint="finger2_rot" ctrlrange="-20.0 20.0" isctrllimited="true"/>-->
|
||||||
|
</actuator>
|
||||||
|
<asset>
|
||||||
|
<texture type="skybox" builtin="gradient" rgb1=".4 .5 .6" rgb2="0 0 0"
|
||||||
|
width="100" height="100"/>
|
||||||
|
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
|
||||||
|
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
|
||||||
|
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
|
||||||
|
<material name="geom" texture="texgeom" texuniform="true"/>
|
||||||
|
</asset>
|
||||||
|
</mujoco>
|
30
alr_envs/alr/mujoco/walker_2d_jump/mp_wrapper.py
Normal file
30
alr_envs/alr/mujoco/walker_2d_jump/mp_wrapper.py
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
from typing import Tuple, Union
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from mp_env_api import MPEnvWrapper
|
||||||
|
|
||||||
|
|
||||||
|
class MPWrapper(MPEnvWrapper):
|
||||||
|
@property
|
||||||
|
def active_obs(self):
|
||||||
|
return np.hstack([
|
||||||
|
[False] * 17,
|
||||||
|
[True] # goal pos
|
||||||
|
])
|
||||||
|
|
||||||
|
@property
|
||||||
|
def current_pos(self) -> Union[float, int, np.ndarray]:
|
||||||
|
return self.env.sim.data.qpos[3:9].copy()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
|
return self.env.sim.data.qvel[3:9].copy()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
|
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def dt(self) -> Union[float, int]:
|
||||||
|
return self.env.dt
|
113
alr_envs/alr/mujoco/walker_2d_jump/walker_2d_jump.py
Normal file
113
alr_envs/alr/mujoco/walker_2d_jump/walker_2d_jump.py
Normal file
@ -0,0 +1,113 @@
|
|||||||
|
import os
|
||||||
|
from gym.envs.mujoco.walker2d_v3 import Walker2dEnv
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
MAX_EPISODE_STEPS_WALKERJUMP = 300
|
||||||
|
|
||||||
|
|
||||||
|
class ALRWalker2dJumpEnv(Walker2dEnv):
|
||||||
|
"""
|
||||||
|
healthy reward 1.0 -> 0.005 -> 0.0025 not from alex
|
||||||
|
penalty 10 -> 0 not from alex
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
xml_file='walker2d.xml',
|
||||||
|
forward_reward_weight=1.0,
|
||||||
|
ctrl_cost_weight=1e-3,
|
||||||
|
healthy_reward=0.0025,
|
||||||
|
terminate_when_unhealthy=True,
|
||||||
|
healthy_z_range=(0.8, 2.0),
|
||||||
|
healthy_angle_range=(-1.0, 1.0),
|
||||||
|
reset_noise_scale=5e-3,
|
||||||
|
penalty=0,
|
||||||
|
context=True,
|
||||||
|
exclude_current_positions_from_observation=True,
|
||||||
|
max_episode_steps=300):
|
||||||
|
self.current_step = 0
|
||||||
|
self.max_episode_steps = max_episode_steps
|
||||||
|
self.max_height = 0
|
||||||
|
self._penalty = penalty
|
||||||
|
self.context = context
|
||||||
|
self.goal = 0
|
||||||
|
xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
|
||||||
|
super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
|
||||||
|
healthy_z_range, healthy_angle_range, reset_noise_scale,
|
||||||
|
exclude_current_positions_from_observation)
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
self.current_step += 1
|
||||||
|
self.do_simulation(action, self.frame_skip)
|
||||||
|
#pos_after = self.get_body_com("torso")[0]
|
||||||
|
height = self.get_body_com("torso")[2]
|
||||||
|
|
||||||
|
self.max_height = max(height, self.max_height)
|
||||||
|
|
||||||
|
fell_over = height < 0.2
|
||||||
|
done = fell_over
|
||||||
|
|
||||||
|
ctrl_cost = self.control_cost(action)
|
||||||
|
costs = ctrl_cost
|
||||||
|
|
||||||
|
if self.current_step >= self.max_episode_steps or done:
|
||||||
|
done = True
|
||||||
|
height_goal_distance = -10 * (np.linalg.norm(self.max_height - self.goal)) if self.context else self.max_height
|
||||||
|
healthy_reward = self.healthy_reward * self.current_step
|
||||||
|
|
||||||
|
rewards = height_goal_distance + healthy_reward
|
||||||
|
else:
|
||||||
|
# penalty not needed
|
||||||
|
rewards = 0
|
||||||
|
rewards += ((action[:2] > 0) * self._penalty).sum() if self.current_step < 4 else 0
|
||||||
|
rewards += ((action[3:5] > 0) * self._penalty).sum() if self.current_step < 4 else 0
|
||||||
|
|
||||||
|
|
||||||
|
observation = self._get_obs()
|
||||||
|
reward = rewards - costs
|
||||||
|
info = {
|
||||||
|
'height': height,
|
||||||
|
'max_height': self.max_height,
|
||||||
|
'goal' : self.goal,
|
||||||
|
}
|
||||||
|
|
||||||
|
return observation, reward, done, info
|
||||||
|
|
||||||
|
def _get_obs(self):
|
||||||
|
return np.append(super()._get_obs(), self.goal)
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self.current_step = 0
|
||||||
|
self.max_height = 0
|
||||||
|
self.goal = np.random.uniform(1.5, 2.5, 1) # 1.5 3.0
|
||||||
|
return super().reset()
|
||||||
|
|
||||||
|
# overwrite reset_model to make it deterministic
|
||||||
|
def reset_model(self):
|
||||||
|
noise_low = -self._reset_noise_scale
|
||||||
|
noise_high = self._reset_noise_scale
|
||||||
|
|
||||||
|
qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
|
||||||
|
qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
|
||||||
|
|
||||||
|
self.set_state(qpos, qvel)
|
||||||
|
|
||||||
|
observation = self._get_obs()
|
||||||
|
return observation
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
render_mode = "human" # "human" or "partial" or "final"
|
||||||
|
env = ALRWalker2dJumpEnv()
|
||||||
|
obs = env.reset()
|
||||||
|
|
||||||
|
for i in range(2000):
|
||||||
|
# objective.load_result("/tmp/cma")
|
||||||
|
# test with random actions
|
||||||
|
ac = env.action_space.sample()
|
||||||
|
obs, rew, d, info = env.step(ac)
|
||||||
|
if i % 10 == 0:
|
||||||
|
env.render(mode=render_mode)
|
||||||
|
if d:
|
||||||
|
print('After ', i, ' steps, done: ', d)
|
||||||
|
env.reset()
|
||||||
|
|
||||||
|
env.close()
|
Loading…
Reference in New Issue
Block a user