commit last version of
This commit is contained in:
parent
a26b9f463b
commit
2cc1ab759c
@ -391,8 +391,7 @@ register(
|
|||||||
max_episode_steps=600,
|
max_episode_steps=600,
|
||||||
kwargs={
|
kwargs={
|
||||||
"rndm_goal": False,
|
"rndm_goal": False,
|
||||||
"cup_goal_pos": [0.1, -2.0],
|
"cup_goal_pos": [0.1, -2.0]
|
||||||
"learn_release_step": True
|
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -404,8 +403,7 @@ register(
|
|||||||
max_episode_steps=600,
|
max_episode_steps=600,
|
||||||
kwargs={
|
kwargs={
|
||||||
"rndm_goal": True,
|
"rndm_goal": True,
|
||||||
"cup_goal_pos": [-0.3, -1.2],
|
"cup_goal_pos": [-0.3, -1.2]
|
||||||
"learn_release_step": True
|
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -646,7 +644,7 @@ for _v in _versions:
|
|||||||
},
|
},
|
||||||
"movement_primitives_kwargs": {
|
"movement_primitives_kwargs": {
|
||||||
'movement_primitives_type': 'promp',
|
'movement_primitives_type': 'promp',
|
||||||
'num_dof': 7
|
'action_dim': 7
|
||||||
},
|
},
|
||||||
"phase_generator_kwargs": {
|
"phase_generator_kwargs": {
|
||||||
'phase_generator_type': 'linear',
|
'phase_generator_type': 'linear',
|
||||||
@ -658,7 +656,9 @@ for _v in _versions:
|
|||||||
"controller_kwargs": {
|
"controller_kwargs": {
|
||||||
'controller_type': 'motor',
|
'controller_type': 'motor',
|
||||||
"p_gains": np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]),
|
"p_gains": np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]),
|
||||||
|
# "p_gains": 0.125*np.array([200, 300, 100, 100, 10, 10, 2.5]),
|
||||||
"d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]),
|
"d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]),
|
||||||
|
# "d_gains": 0.025*np.array([7, 15, 5, 2.5, 0.3, 0.3, 0.05]),
|
||||||
},
|
},
|
||||||
"basis_generator_kwargs": {
|
"basis_generator_kwargs": {
|
||||||
'basis_generator_type': 'zero_rbf',
|
'basis_generator_type': 'zero_rbf',
|
||||||
@ -753,29 +753,92 @@ for _v in _versions:
|
|||||||
)
|
)
|
||||||
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||||
|
|
||||||
|
# ## HopperJump
|
||||||
|
# _versions = ["v0", "v1"]
|
||||||
|
# for _v in _versions:
|
||||||
|
# _env_id = f'ALRHopperJumpProMP-{_v}'
|
||||||
|
# register(
|
||||||
|
# id= _env_id,
|
||||||
|
# entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
||||||
|
# kwargs={
|
||||||
|
# "name": f"alr_envs:ALRHopperJump-{_v}",
|
||||||
|
# "wrappers": [mujoco.hopper_jump.MPWrapper],
|
||||||
|
# "mp_kwargs": {
|
||||||
|
# "num_dof": 3,
|
||||||
|
# "num_basis": 5,
|
||||||
|
# "duration": 2,
|
||||||
|
# "post_traj_time": 0,
|
||||||
|
# "policy_type": "motor",
|
||||||
|
# "weights_scale": 1.0,
|
||||||
|
# "zero_start": True,
|
||||||
|
# "zero_goal": False,
|
||||||
|
# "policy_kwargs": {
|
||||||
|
# "p_gains": np.ones(3),
|
||||||
|
# "d_gains": 0.1*np.ones(3)
|
||||||
|
# }
|
||||||
|
# }
|
||||||
|
# }
|
||||||
|
# )
|
||||||
|
# ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||||
|
|
||||||
|
# ## HopperJump
|
||||||
|
# register(
|
||||||
|
# id= "ALRHopperJumpProMP-v2",
|
||||||
|
# entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
||||||
|
# kwargs={
|
||||||
|
# "name": f"alr_envs:ALRHopperJump-v2",
|
||||||
|
# "wrappers": [mujoco.hopper_jump.HighCtxtMPWrapper],
|
||||||
|
# "mp_kwargs": {
|
||||||
|
# "num_dof": 3,
|
||||||
|
# "num_basis": 5,
|
||||||
|
# "duration": 2,
|
||||||
|
# "post_traj_time": 0,
|
||||||
|
# "policy_type": "motor",
|
||||||
|
# "weights_scale": 1.0,
|
||||||
|
# "zero_start": True,
|
||||||
|
# "zero_goal": False,
|
||||||
|
# "policy_kwargs": {
|
||||||
|
# "p_gains": np.ones(3),
|
||||||
|
# "d_gains": 0.1*np.ones(3)
|
||||||
|
# }
|
||||||
|
# }
|
||||||
|
# }
|
||||||
|
# )
|
||||||
|
# ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v2")
|
||||||
|
|
||||||
## HopperJump
|
## HopperJump
|
||||||
_versions = ["v0", "v1"]
|
_versions = ["v0", "v1"]
|
||||||
for _v in _versions:
|
for _v in _versions:
|
||||||
_env_id = f'ALRHopperJumpProMP-{_v}'
|
_env_id = f'ALRHopperJumpProMP-{_v}'
|
||||||
register(
|
register(
|
||||||
id= _env_id,
|
id= _env_id,
|
||||||
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
|
||||||
kwargs={
|
kwargs={
|
||||||
"name": f"alr_envs:ALRHopperJump-{_v}",
|
"name": f"alr_envs:ALRHopperJump-{_v}",
|
||||||
"wrappers": [mujoco.hopper_jump.MPWrapper],
|
"wrappers": [mujoco.hopper_jump.NewMPWrapper],
|
||||||
"mp_kwargs": {
|
"ep_wrapper_kwargs": {
|
||||||
"num_dof": 3,
|
"weight_scale": 1
|
||||||
"num_basis": 5,
|
},
|
||||||
"duration": 2,
|
"movement_primitives_kwargs": {
|
||||||
"post_traj_time": 0,
|
'movement_primitives_type': 'promp',
|
||||||
"policy_type": "motor",
|
'action_dim': 3
|
||||||
"weights_scale": 1.0,
|
},
|
||||||
"zero_start": True,
|
"phase_generator_kwargs": {
|
||||||
"zero_goal": False,
|
'phase_generator_type': 'linear',
|
||||||
"policy_kwargs": {
|
'delay': 0,
|
||||||
"p_gains": np.ones(3),
|
'tau': 2, # initial value
|
||||||
"d_gains": 0.1*np.ones(3)
|
'learn_tau': False,
|
||||||
}
|
'learn_delay': False
|
||||||
|
},
|
||||||
|
"controller_kwargs": {
|
||||||
|
'controller_type': 'motor',
|
||||||
|
"p_gains": np.ones(3),
|
||||||
|
"d_gains": 0.1*np.ones(3),
|
||||||
|
},
|
||||||
|
"basis_generator_kwargs": {
|
||||||
|
'basis_generator_type': 'zero_rbf',
|
||||||
|
'num_basis': 5,
|
||||||
|
'num_basis_zero_start': 2
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
@ -784,25 +847,35 @@ for _v in _versions:
|
|||||||
## HopperJump
|
## HopperJump
|
||||||
register(
|
register(
|
||||||
id= "ALRHopperJumpProMP-v2",
|
id= "ALRHopperJumpProMP-v2",
|
||||||
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
|
||||||
kwargs={
|
kwargs={
|
||||||
"name": f"alr_envs:ALRHopperJump-v2",
|
"name": f"alr_envs:ALRHopperJump-v2",
|
||||||
"wrappers": [mujoco.hopper_jump.HighCtxtMPWrapper],
|
"wrappers": [mujoco.hopper_jump.NewHighCtxtMPWrapper],
|
||||||
"mp_kwargs": {
|
"ep_wrapper_kwargs": {
|
||||||
"num_dof": 3,
|
"weight_scale": 1
|
||||||
"num_basis": 5,
|
},
|
||||||
"duration": 2,
|
"movement_primitives_kwargs": {
|
||||||
"post_traj_time": 0,
|
'movement_primitives_type': 'promp',
|
||||||
"policy_type": "motor",
|
'action_dim': 3
|
||||||
"weights_scale": 1.0,
|
},
|
||||||
"zero_start": True,
|
"phase_generator_kwargs": {
|
||||||
"zero_goal": False,
|
'phase_generator_type': 'linear',
|
||||||
"policy_kwargs": {
|
'delay': 0,
|
||||||
|
'tau': 2, # initial value
|
||||||
|
'learn_tau': False,
|
||||||
|
'learn_delay': False
|
||||||
|
},
|
||||||
|
"controller_kwargs": {
|
||||||
|
'controller_type': 'motor',
|
||||||
"p_gains": np.ones(3),
|
"p_gains": np.ones(3),
|
||||||
"d_gains": 0.1*np.ones(3)
|
"d_gains": 0.1*np.ones(3),
|
||||||
|
},
|
||||||
|
"basis_generator_kwargs": {
|
||||||
|
'basis_generator_type': 'zero_rbf',
|
||||||
|
'num_basis': 5,
|
||||||
|
'num_basis_zero_start': 2
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
)
|
)
|
||||||
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v2")
|
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v2")
|
||||||
|
|
||||||
@ -916,11 +989,4 @@ for _v in _versions:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||||
|
|
||||||
|
|
||||||
# --------------------- Testing new mp wrapper -----------------------------------------------------
|
|
||||||
|
|
||||||
# register(
|
|
||||||
# id='ALRReacherProMP-v0'
|
|
||||||
# )
|
|
@ -1,4 +1,3 @@
|
|||||||
from .reacher.alr_reacher import ALRReacherEnv, ALRReacherOptCtrlEnv
|
|
||||||
from .reacher.balancing import BalancingEnv
|
from .reacher.balancing import BalancingEnv
|
||||||
from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
|
from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
|
||||||
from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
|
from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
|
||||||
@ -10,4 +9,5 @@ from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv
|
|||||||
from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv
|
from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv
|
||||||
from .hopper_throw.hopper_throw import ALRHopperThrowEnv
|
from .hopper_throw.hopper_throw import ALRHopperThrowEnv
|
||||||
from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv
|
from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv
|
||||||
from .walker_2d_jump.walker_2d_jump import ALRWalker2dJumpEnv
|
from .walker_2d_jump.walker_2d_jump import ALRWalker2dJumpEnv
|
||||||
|
from .reacher.alr_reacher import ALRReacherEnv
|
@ -3,7 +3,6 @@ import os
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from gym import utils
|
from gym import utils
|
||||||
from gym import spaces
|
|
||||||
from gym.envs.mujoco import MujocoEnv
|
from gym.envs.mujoco import MujocoEnv
|
||||||
from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
|
from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
|
||||||
|
|
||||||
@ -18,7 +17,7 @@ CUP_POS_MAX = np.array([0.32, -1.2])
|
|||||||
|
|
||||||
class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
||||||
def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False,
|
def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False,
|
||||||
rndm_goal=False, learn_release_step=True, cup_goal_pos=None):
|
rndm_goal=False, cup_goal_pos=None):
|
||||||
cup_goal_pos = np.array(cup_goal_pos if cup_goal_pos is not None else [-0.3, -1.2, 0.840])
|
cup_goal_pos = np.array(cup_goal_pos if cup_goal_pos is not None else [-0.3, -1.2, 0.840])
|
||||||
if cup_goal_pos.shape[0]==2:
|
if cup_goal_pos.shape[0]==2:
|
||||||
cup_goal_pos = np.insert(cup_goal_pos, 2, 0.840)
|
cup_goal_pos = np.insert(cup_goal_pos, 2, 0.840)
|
||||||
@ -43,7 +42,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
# self._release_step = 175 # time step of ball release
|
# self._release_step = 175 # time step of ball release
|
||||||
# self._release_step = 130 # time step of ball release
|
# self._release_step = 130 # time step of ball release
|
||||||
self._release_step = 100 # time step of ball release
|
self.release_step = 100 # time step of ball release
|
||||||
|
|
||||||
self.ep_length = 600 # based on 3 seconds with dt = 0.005 int(self.sim_time / self.dt)
|
self.ep_length = 600 # based on 3 seconds with dt = 0.005 int(self.sim_time / self.dt)
|
||||||
self.cup_table_id = 10
|
self.cup_table_id = 10
|
||||||
@ -52,7 +51,6 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
|||||||
self.noise_std = 0.01
|
self.noise_std = 0.01
|
||||||
else:
|
else:
|
||||||
self.noise_std = 0
|
self.noise_std = 0
|
||||||
self.learn_release_step = learn_release_step
|
|
||||||
reward_function = BeerPongReward
|
reward_function = BeerPongReward
|
||||||
self.reward_function = reward_function()
|
self.reward_function = reward_function()
|
||||||
|
|
||||||
@ -63,13 +61,6 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
|||||||
def start_pos(self):
|
def start_pos(self):
|
||||||
return self._start_pos
|
return self._start_pos
|
||||||
|
|
||||||
def _set_action_space(self):
|
|
||||||
bounds = self.model.actuator_ctrlrange.copy().astype(np.float32)
|
|
||||||
bounds = np.concatenate((bounds, [[50, self.ep_length*0.333]]), axis=0)
|
|
||||||
low, high = bounds.T
|
|
||||||
self.action_space = spaces.Box(low=low, high=high, dtype=np.float32)
|
|
||||||
return self.action_space
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def start_vel(self):
|
def start_vel(self):
|
||||||
return self._start_vel
|
return self._start_vel
|
||||||
@ -109,21 +100,22 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
|||||||
return self._get_obs()
|
return self._get_obs()
|
||||||
|
|
||||||
def step(self, a):
|
def step(self, a):
|
||||||
self._release_step = a[-1] if self.learn_release_step else self._release_step
|
# if a.shape[0] == 8: # we learn also when to release the ball
|
||||||
self._release_step = np.clip(self._release_step, self.action_space.low[-1], self.action_space.high[-1]) \
|
# self._release_step = a[-1]
|
||||||
if self.learn_release_step else self._release_step
|
# self._release_step = np.clip(self._release_step, 50, 250)
|
||||||
|
# self.release_step = 0.5/self.dt
|
||||||
reward_dist = 0.0
|
reward_dist = 0.0
|
||||||
angular_vel = 0.0
|
angular_vel = 0.0
|
||||||
applied_action = a[:a.shape[0]-int(self.learn_release_step)]
|
applied_action = a
|
||||||
reward_ctrl = - np.square(applied_action).sum()
|
reward_ctrl = - np.square(applied_action).sum()
|
||||||
if self.apply_gravity_comp:
|
if self.apply_gravity_comp:
|
||||||
applied_action += self.sim.data.qfrc_bias[:len(applied_action)].copy() / self.model.actuator_gear[:, 0]
|
applied_action += self.sim.data.qfrc_bias[:len(applied_action)].copy() / self.model.actuator_gear[:, 0]
|
||||||
try:
|
try:
|
||||||
self.do_simulation(applied_action, self.frame_skip)
|
self.do_simulation(applied_action, self.frame_skip)
|
||||||
if self._steps < self._release_step:
|
if self._steps < self.release_step:
|
||||||
self.sim.data.qpos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
|
self.sim.data.qpos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
|
||||||
self.sim.data.qvel[7::] = self.sim.data.site_xvelp[self.ball_site_id, :].copy()
|
self.sim.data.qvel[7::] = self.sim.data.site_xvelp[self.ball_site_id, :].copy()
|
||||||
elif self._steps == self._release_step and self.add_noise:
|
elif self._steps == self.release_step and self.add_noise:
|
||||||
self.sim.data.qvel[7::] += self.noise_std * np.random.randn(3)
|
self.sim.data.qvel[7::] += self.noise_std * np.random.randn(3)
|
||||||
crash = False
|
crash = False
|
||||||
except mujoco_py.builder.MujocoException:
|
except mujoco_py.builder.MujocoException:
|
||||||
@ -160,7 +152,8 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
|||||||
ball_pos=ball_pos,
|
ball_pos=ball_pos,
|
||||||
ball_vel=ball_vel,
|
ball_vel=ball_vel,
|
||||||
success=success,
|
success=success,
|
||||||
is_collided=is_collided, sim_crash=crash)
|
is_collided=is_collided, sim_crash=crash,
|
||||||
|
table_contact_first=int(not self.reward_function.ball_ground_contact_first))
|
||||||
infos.update(reward_infos)
|
infos.update(reward_infos)
|
||||||
|
|
||||||
return ob, reward, done, infos
|
return ob, reward, done, infos
|
||||||
|
@ -162,12 +162,16 @@ class BeerPongReward:
|
|||||||
min_dist_coeff, final_dist_coeff, rew_offset = 0, 1, 0
|
min_dist_coeff, final_dist_coeff, rew_offset = 0, 1, 0
|
||||||
reward = rew_offset - min_dist_coeff * min_dist ** 2 - final_dist_coeff * final_dist ** 2 - \
|
reward = rew_offset - min_dist_coeff * min_dist ** 2 - final_dist_coeff * final_dist ** 2 - \
|
||||||
1e-4 * np.mean(action_cost)
|
1e-4 * np.mean(action_cost)
|
||||||
if env.learn_release_step and not self.ball_in_cup:
|
|
||||||
too_small = (env._release_step<50)*(env._release_step-50)**2
|
|
||||||
too_big = (env._release_step>200)*0.2*(env._release_step-200)**2
|
|
||||||
reward = reward - too_small -too_big
|
|
||||||
# 1e-7*np.mean(action_cost)
|
# 1e-7*np.mean(action_cost)
|
||||||
|
# release step punishment
|
||||||
|
min_time_bound = 0.1
|
||||||
|
max_time_bound = 1.0
|
||||||
|
release_time = env.release_step*env.dt
|
||||||
|
release_time_rew = int(release_time<min_time_bound)*(-30-10*(release_time-min_time_bound)**2) \
|
||||||
|
+int(release_time>max_time_bound)*(-30-10*(release_time-max_time_bound)**2)
|
||||||
|
reward += release_time_rew
|
||||||
success = self.ball_in_cup
|
success = self.ball_in_cup
|
||||||
|
# print('release time :', release_time)
|
||||||
else:
|
else:
|
||||||
reward = - 1e-2 * action_cost
|
reward = - 1e-2 * action_cost
|
||||||
# reward = - 1e-4 * action_cost
|
# reward = - 1e-4 * action_cost
|
||||||
|
@ -21,24 +21,35 @@ class NewMPWrapper(EpisodicWrapper):
|
|||||||
[False] # env steps
|
[False] # env steps
|
||||||
])
|
])
|
||||||
|
|
||||||
def _step_callback(self, t: int, env_spec_params: Union[np.ndarray, None], step_action: np.ndarray) -> Union[np.ndarray]:
|
# def set_mp_action_space(self):
|
||||||
if self.env.learn_release_step:
|
# min_action_bounds, max_action_bounds = self.mp.get_param_bounds()
|
||||||
return np.concatenate((step_action, np.atleast_1d(env_spec_params)))
|
# if self.mp.learn_tau:
|
||||||
else:
|
# min_action_bounds[0] = 20*self.env.dt
|
||||||
return step_action
|
# max_action_bounds[0] = 260*self.env.dt
|
||||||
|
# mp_action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
|
||||||
|
# dtype=np.float32)
|
||||||
|
# return mp_action_space
|
||||||
|
|
||||||
|
# def _step_callback(self, t: int, env_spec_params: Union[np.ndarray, None], step_action: np.ndarray) -> Union[np.ndarray]:
|
||||||
|
# if self.mp.learn_tau:
|
||||||
|
# return np.concatenate((step_action, np.atleast_1d(env_spec_params)))
|
||||||
|
# else:
|
||||||
|
# return step_action
|
||||||
|
|
||||||
def _episode_callback(self, action: np.ndarray) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
|
def _episode_callback(self, action: np.ndarray) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
|
||||||
if self.env.learn_release_step:
|
if self.mp.learn_tau:
|
||||||
return action[:-1], action[-1] # mp_params, release step
|
self.env.env.release_step = action[0]/self.env.dt # Tau value
|
||||||
|
# self.env.env.release_step = np.clip(action[0]/self.env.dt, 20, 260) # Tau value
|
||||||
|
return action, None
|
||||||
else:
|
else:
|
||||||
return action, None
|
return action, None
|
||||||
|
|
||||||
def set_action_space(self):
|
# def set_action_space(self):
|
||||||
if self.env.learn_release_step:
|
# if self.mp.learn_tau:
|
||||||
min_action_bounds, max_action_bounds = self.mp.get_param_bounds()
|
# min_action_bounds, max_action_bounds = self.mp.get_param_bounds()
|
||||||
min_action_bounds = np.concatenate((min_action_bounds.numpy(), [self.env.action_space.low[-1]]))
|
# min_action_bounds = np.concatenate((min_action_bounds.numpy(), [self.env.action_space.low[-1]]))
|
||||||
max_action_bounds = np.concatenate((max_action_bounds.numpy(), [self.env.action_space.high[-1]]))
|
# max_action_bounds = np.concatenate((max_action_bounds.numpy(), [self.env.action_space.high[-1]]))
|
||||||
self.action_space = gym.spaces.Box(low=min_action_bounds, high=max_action_bounds, dtype=np.float32)
|
# self.action_space = gym.spaces.Box(low=min_action_bounds, high=max_action_bounds, dtype=np.float32)
|
||||||
return self.action_space
|
# return self.action_space
|
||||||
else:
|
# else:
|
||||||
return super(NewMPWrapper, self).set_action_space()
|
# return super(NewMPWrapper, self).set_action_space()
|
||||||
|
@ -1 +1,2 @@
|
|||||||
from .mp_wrapper import MPWrapper, HighCtxtMPWrapper
|
from .mp_wrapper import MPWrapper, HighCtxtMPWrapper
|
||||||
|
from .new_mp_wrapper import NewMPWrapper, NewHighCtxtMPWrapper
|
||||||
|
@ -80,8 +80,6 @@ class ALRHopperJumpEnv(HopperEnv):
|
|||||||
|
|
||||||
# overwrite reset_model to make it deterministic
|
# overwrite reset_model to make it deterministic
|
||||||
def reset_model(self):
|
def reset_model(self):
|
||||||
noise_low = -self._reset_noise_scale
|
|
||||||
noise_high = self._reset_noise_scale
|
|
||||||
|
|
||||||
qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
|
qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
|
||||||
qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
|
qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
|
||||||
@ -104,14 +102,26 @@ class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
|
|||||||
def reset_model(self):
|
def reset_model(self):
|
||||||
self._floor_geom_id = self.model.geom_name2id('floor')
|
self._floor_geom_id = self.model.geom_name2id('floor')
|
||||||
self._foot_geom_id = self.model.geom_name2id('foot_geom')
|
self._foot_geom_id = self.model.geom_name2id('foot_geom')
|
||||||
noise_low = -self._reset_noise_scale
|
noise_low = -np.ones(self.model.nq)*self._reset_noise_scale
|
||||||
noise_high = self._reset_noise_scale
|
noise_low[1] = 0
|
||||||
|
noise_low[2] = -0.3
|
||||||
|
noise_low[3] = -0.1
|
||||||
|
noise_low[4] = -1.1
|
||||||
|
noise_low[5] = -0.785
|
||||||
|
|
||||||
|
noise_high = np.ones(self.model.nq)*self._reset_noise_scale
|
||||||
|
noise_high[1] = 0
|
||||||
|
noise_high[2] = 0.3
|
||||||
|
noise_high[3] = 0
|
||||||
|
noise_high[4] = 0
|
||||||
|
noise_high[5] = 0.785
|
||||||
|
|
||||||
rnd_vec = self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
|
rnd_vec = self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
|
||||||
rnd_vec[2] *= 0.05 # the angle around the y axis shouldn't be too high as the agent then falls down quickly and
|
# rnd_vec[2] *= 0.05 # the angle around the y axis shouldn't be too high as the agent then falls down quickly and
|
||||||
# can not recover
|
# can not recover
|
||||||
rnd_vec[1] = np.clip(rnd_vec[1], 0, 0.3)
|
# rnd_vec[1] = np.clip(rnd_vec[1], 0, 0.3)
|
||||||
qpos = self.init_qpos + rnd_vec
|
qpos = self.init_qpos + rnd_vec
|
||||||
qvel = self.init_qvel #+ self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
|
qvel = self.init_qvel
|
||||||
|
|
||||||
self.set_state(qpos, qvel)
|
self.set_state(qpos, qvel)
|
||||||
|
|
||||||
@ -167,16 +177,19 @@ if __name__ == '__main__':
|
|||||||
env = ALRHopperJumpRndmPosEnv()
|
env = ALRHopperJumpRndmPosEnv()
|
||||||
obs = env.reset()
|
obs = env.reset()
|
||||||
|
|
||||||
for i in range(2000):
|
for k in range(10):
|
||||||
# objective.load_result("/tmp/cma")
|
obs = env.reset()
|
||||||
# test with random actions
|
print('observation :', obs[:6])
|
||||||
ac = env.action_space.sample()
|
for i in range(200):
|
||||||
obs, rew, d, info = env.step(ac)
|
# objective.load_result("/tmp/cma")
|
||||||
# if i % 10 == 0:
|
# test with random actions
|
||||||
# env.render(mode=render_mode)
|
ac = env.action_space.sample()
|
||||||
env.render(mode=render_mode)
|
obs, rew, d, info = env.step(ac)
|
||||||
if d:
|
# if i % 10 == 0:
|
||||||
print('After ', i, ' steps, done: ', d)
|
# env.render(mode=render_mode)
|
||||||
env.reset()
|
env.render(mode=render_mode)
|
||||||
|
if d:
|
||||||
|
print('After ', i, ' steps, done: ', d)
|
||||||
|
env.reset()
|
||||||
|
|
||||||
env.close()
|
env.close()
|
29
alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py
Normal file
29
alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
from alr_envs.mp.episodic_wrapper import EpisodicWrapper
|
||||||
|
from typing import Union, Tuple
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
class NewMPWrapper(EpisodicWrapper):
|
||||||
|
@property
|
||||||
|
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
|
return self.env.sim.data.qpos[3:6].copy()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
|
return self.env.sim.data.qvel[3:6].copy()
|
||||||
|
|
||||||
|
def set_active_obs(self):
|
||||||
|
return np.hstack([
|
||||||
|
[False] * (5 + int(not self.env.exclude_current_positions_from_observation)), # position
|
||||||
|
[False] * 6, # velocity
|
||||||
|
[True]
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
|
class NewHighCtxtMPWrapper(NewMPWrapper):
|
||||||
|
def set_active_obs(self):
|
||||||
|
return np.hstack([
|
||||||
|
[True] * (5 + int(not self.env.exclude_current_positions_from_observation)), # position
|
||||||
|
[False] * 6, # velocity
|
||||||
|
[False]
|
||||||
|
])
|
@ -68,35 +68,35 @@ class ALRReacherEnv(MujocoEnv, utils.EzPickle):
|
|||||||
def viewer_setup(self):
|
def viewer_setup(self):
|
||||||
self.viewer.cam.trackbodyid = 0
|
self.viewer.cam.trackbodyid = 0
|
||||||
|
|
||||||
# def reset_model(self):
|
|
||||||
# qpos = self.init_qpos
|
|
||||||
# if not hasattr(self, "goal"):
|
|
||||||
# self.goal = np.array([-0.25, 0.25])
|
|
||||||
# # self.goal = self.init_qpos.copy()[:2] + 0.05
|
|
||||||
# qpos[-2:] = self.goal
|
|
||||||
# qvel = self.init_qvel
|
|
||||||
# qvel[-2:] = 0
|
|
||||||
# self.set_state(qpos, qvel)
|
|
||||||
# self._steps = 0
|
|
||||||
#
|
|
||||||
# return self._get_obs()
|
|
||||||
|
|
||||||
def reset_model(self):
|
def reset_model(self):
|
||||||
qpos = self.init_qpos.copy()
|
qpos = self.init_qpos
|
||||||
while True:
|
if not hasattr(self, "goal"):
|
||||||
self.goal = self.np_random.uniform(low=-self.n_links / 10, high=self.n_links / 10, size=2)
|
self.goal = np.array([-0.25, 0.25])
|
||||||
# self.goal = self.np_random.uniform(low=0, high=self.n_links / 10, size=2)
|
# self.goal = self.init_qpos.copy()[:2] + 0.05
|
||||||
# self.goal = np.random.uniform(low=[-self.n_links / 10, 0], high=[0, self.n_links / 10], size=2)
|
|
||||||
if np.linalg.norm(self.goal) < self.n_links / 10:
|
|
||||||
break
|
|
||||||
qpos[-2:] = self.goal
|
qpos[-2:] = self.goal
|
||||||
qvel = self.init_qvel.copy()
|
qvel = self.init_qvel
|
||||||
qvel[-2:] = 0
|
qvel[-2:] = 0
|
||||||
self.set_state(qpos, qvel)
|
self.set_state(qpos, qvel)
|
||||||
self._steps = 0
|
self._steps = 0
|
||||||
|
|
||||||
return self._get_obs()
|
return self._get_obs()
|
||||||
|
|
||||||
|
# def reset_model(self):
|
||||||
|
# qpos = self.init_qpos.copy()
|
||||||
|
# while True:
|
||||||
|
# self.goal = self.np_random.uniform(low=-self.n_links / 10, high=self.n_links / 10, size=2)
|
||||||
|
# # self.goal = self.np_random.uniform(low=0, high=self.n_links / 10, size=2)
|
||||||
|
# # self.goal = np.random.uniform(low=[-self.n_links / 10, 0], high=[0, self.n_links / 10], size=2)
|
||||||
|
# if np.linalg.norm(self.goal) < self.n_links / 10:
|
||||||
|
# break
|
||||||
|
# qpos[-2:] = self.goal
|
||||||
|
# qvel = self.init_qvel.copy()
|
||||||
|
# qvel[-2:] = 0
|
||||||
|
# self.set_state(qpos, qvel)
|
||||||
|
# self._steps = 0
|
||||||
|
#
|
||||||
|
# return self._get_obs()
|
||||||
|
|
||||||
# def reset_model(self):
|
# def reset_model(self):
|
||||||
# qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
|
# qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
|
||||||
# while True:
|
# while True:
|
||||||
|
@ -50,13 +50,10 @@ class EpisodicWrapper(gym.Env, ABC):
|
|||||||
# rendering
|
# rendering
|
||||||
self.render_mode = render_mode
|
self.render_mode = render_mode
|
||||||
self.render_kwargs = {}
|
self.render_kwargs = {}
|
||||||
# self.time_steps = np.linspace(0, self.duration, self.traj_steps + 1)
|
|
||||||
self.time_steps = np.linspace(0, self.duration, self.traj_steps)
|
self.time_steps = np.linspace(0, self.duration, self.traj_steps)
|
||||||
self.mp.set_mp_times(self.time_steps)
|
self.mp.set_mp_times(self.time_steps)
|
||||||
# action_bounds = np.inf * np.ones((np.prod(self.mp.num_params)))
|
# action_bounds = np.inf * np.ones((np.prod(self.mp.num_params)))
|
||||||
min_action_bounds, max_action_bounds = mp.get_param_bounds()
|
self.mp_action_space = self.set_mp_action_space()
|
||||||
self.mp_action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
|
|
||||||
dtype=np.float32)
|
|
||||||
|
|
||||||
self.action_space = self.set_action_space()
|
self.action_space = self.set_action_space()
|
||||||
self.active_obs = self.set_active_obs()
|
self.active_obs = self.set_active_obs()
|
||||||
@ -65,7 +62,7 @@ class EpisodicWrapper(gym.Env, ABC):
|
|||||||
dtype=self.env.observation_space.dtype)
|
dtype=self.env.observation_space.dtype)
|
||||||
|
|
||||||
def get_trajectory(self, action: np.ndarray) -> Tuple:
|
def get_trajectory(self, action: np.ndarray) -> Tuple:
|
||||||
# TODO: this follows the implementation of the mp_pytorch library which includes the paramters tau and delay at
|
# TODO: this follows the implementation of the mp_pytorch library which includes the parameters tau and delay at
|
||||||
# the beginning of the array.
|
# the beginning of the array.
|
||||||
ignore_indices = int(self.mp.learn_tau) + int(self.mp.learn_delay)
|
ignore_indices = int(self.mp.learn_tau) + int(self.mp.learn_delay)
|
||||||
scaled_mp_params = action.copy()
|
scaled_mp_params = action.copy()
|
||||||
@ -84,6 +81,13 @@ class EpisodicWrapper(gym.Env, ABC):
|
|||||||
|
|
||||||
return trajectory, velocity
|
return trajectory, velocity
|
||||||
|
|
||||||
|
def set_mp_action_space(self):
|
||||||
|
"""This function can be used to set up an individual space for the parameters of the mp."""
|
||||||
|
min_action_bounds, max_action_bounds = self.mp.get_param_bounds()
|
||||||
|
mp_action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
|
||||||
|
dtype=np.float32)
|
||||||
|
return mp_action_space
|
||||||
|
|
||||||
def set_action_space(self):
|
def set_action_space(self):
|
||||||
"""
|
"""
|
||||||
This function can be used to modify the action space for considering actions which are not learned via motion
|
This function can be used to modify the action space for considering actions which are not learned via motion
|
||||||
@ -179,6 +183,7 @@ class EpisodicWrapper(gym.Env, ABC):
|
|||||||
step_action = self.controller.get_action(pos_vel[0], pos_vel[1], self.current_pos, self.current_vel)
|
step_action = self.controller.get_action(pos_vel[0], pos_vel[1], self.current_pos, self.current_vel)
|
||||||
step_action = self._step_callback(t, env_spec_params, step_action) # include possible callback info
|
step_action = self._step_callback(t, env_spec_params, step_action) # include possible callback info
|
||||||
c_action = np.clip(step_action, self.env.action_space.low, self.env.action_space.high)
|
c_action = np.clip(step_action, self.env.action_space.low, self.env.action_space.high)
|
||||||
|
# print('step/clipped action ratio: ', step_action/c_action)
|
||||||
obs, c_reward, done, info = self.env.step(c_action)
|
obs, c_reward, done, info = self.env.step(c_action)
|
||||||
if self.verbose >= 2:
|
if self.verbose >= 2:
|
||||||
actions[t, :] = c_action
|
actions[t, :] = c_action
|
||||||
|
@ -156,12 +156,11 @@ def make_mp_from_kwargs(
|
|||||||
ep_wrapper_kwargs['duration'] = dummy_env.spec.max_episode_steps*dummy_env.dt
|
ep_wrapper_kwargs['duration'] = dummy_env.spec.max_episode_steps*dummy_env.dt
|
||||||
if phase_kwargs.get('tau', None) is None:
|
if phase_kwargs.get('tau', None) is None:
|
||||||
phase_kwargs['tau'] = ep_wrapper_kwargs['duration']
|
phase_kwargs['tau'] = ep_wrapper_kwargs['duration']
|
||||||
action_dim = mp_kwargs.pop('num_dof', None)
|
mp_kwargs['action_dim'] = mp_kwargs.get('action_dim', np.prod(dummy_env.action_space.shape).item())
|
||||||
action_dim = action_dim if action_dim is not None else np.prod(dummy_env.action_space.shape).item()
|
|
||||||
phase_gen = get_phase_generator(**phase_kwargs)
|
phase_gen = get_phase_generator(**phase_kwargs)
|
||||||
basis_gen = get_basis_generator(phase_generator=phase_gen, **basis_kwargs)
|
basis_gen = get_basis_generator(phase_generator=phase_gen, **basis_kwargs)
|
||||||
controller = get_controller(**controller_kwargs)
|
controller = get_controller(**controller_kwargs)
|
||||||
mp = get_movement_primitive(action_dim=action_dim, basis_generator=basis_gen, **mp_kwargs)
|
mp = get_movement_primitive(basis_generator=basis_gen, **mp_kwargs)
|
||||||
_env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, mp=mp, controller=controller,
|
_env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, mp=mp, controller=controller,
|
||||||
ep_wrapper_kwargs=ep_wrapper_kwargs, seed=seed, **kwargs)
|
ep_wrapper_kwargs=ep_wrapper_kwargs, seed=seed, **kwargs)
|
||||||
return _env
|
return _env
|
||||||
|
Loading…
Reference in New Issue
Block a user