commit last version of
This commit is contained in:
parent
a26b9f463b
commit
2cc1ab759c
@ -391,8 +391,7 @@ register(
|
||||
max_episode_steps=600,
|
||||
kwargs={
|
||||
"rndm_goal": False,
|
||||
"cup_goal_pos": [0.1, -2.0],
|
||||
"learn_release_step": True
|
||||
"cup_goal_pos": [0.1, -2.0]
|
||||
}
|
||||
)
|
||||
|
||||
@ -404,8 +403,7 @@ register(
|
||||
max_episode_steps=600,
|
||||
kwargs={
|
||||
"rndm_goal": True,
|
||||
"cup_goal_pos": [-0.3, -1.2],
|
||||
"learn_release_step": True
|
||||
"cup_goal_pos": [-0.3, -1.2]
|
||||
}
|
||||
)
|
||||
|
||||
@ -646,7 +644,7 @@ for _v in _versions:
|
||||
},
|
||||
"movement_primitives_kwargs": {
|
||||
'movement_primitives_type': 'promp',
|
||||
'num_dof': 7
|
||||
'action_dim': 7
|
||||
},
|
||||
"phase_generator_kwargs": {
|
||||
'phase_generator_type': 'linear',
|
||||
@ -658,7 +656,9 @@ for _v in _versions:
|
||||
"controller_kwargs": {
|
||||
'controller_type': 'motor',
|
||||
"p_gains": np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]),
|
||||
# "p_gains": 0.125*np.array([200, 300, 100, 100, 10, 10, 2.5]),
|
||||
"d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]),
|
||||
# "d_gains": 0.025*np.array([7, 15, 5, 2.5, 0.3, 0.3, 0.05]),
|
||||
},
|
||||
"basis_generator_kwargs": {
|
||||
'basis_generator_type': 'zero_rbf',
|
||||
@ -753,29 +753,92 @@ for _v in _versions:
|
||||
)
|
||||
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
|
||||
# ## HopperJump
|
||||
# _versions = ["v0", "v1"]
|
||||
# for _v in _versions:
|
||||
# _env_id = f'ALRHopperJumpProMP-{_v}'
|
||||
# register(
|
||||
# id= _env_id,
|
||||
# entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
||||
# kwargs={
|
||||
# "name": f"alr_envs:ALRHopperJump-{_v}",
|
||||
# "wrappers": [mujoco.hopper_jump.MPWrapper],
|
||||
# "mp_kwargs": {
|
||||
# "num_dof": 3,
|
||||
# "num_basis": 5,
|
||||
# "duration": 2,
|
||||
# "post_traj_time": 0,
|
||||
# "policy_type": "motor",
|
||||
# "weights_scale": 1.0,
|
||||
# "zero_start": True,
|
||||
# "zero_goal": False,
|
||||
# "policy_kwargs": {
|
||||
# "p_gains": np.ones(3),
|
||||
# "d_gains": 0.1*np.ones(3)
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
# )
|
||||
# ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
|
||||
# ## HopperJump
|
||||
# register(
|
||||
# id= "ALRHopperJumpProMP-v2",
|
||||
# entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
||||
# kwargs={
|
||||
# "name": f"alr_envs:ALRHopperJump-v2",
|
||||
# "wrappers": [mujoco.hopper_jump.HighCtxtMPWrapper],
|
||||
# "mp_kwargs": {
|
||||
# "num_dof": 3,
|
||||
# "num_basis": 5,
|
||||
# "duration": 2,
|
||||
# "post_traj_time": 0,
|
||||
# "policy_type": "motor",
|
||||
# "weights_scale": 1.0,
|
||||
# "zero_start": True,
|
||||
# "zero_goal": False,
|
||||
# "policy_kwargs": {
|
||||
# "p_gains": np.ones(3),
|
||||
# "d_gains": 0.1*np.ones(3)
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
# )
|
||||
# ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v2")
|
||||
|
||||
## HopperJump
|
||||
_versions = ["v0", "v1"]
|
||||
for _v in _versions:
|
||||
_env_id = f'ALRHopperJumpProMP-{_v}'
|
||||
register(
|
||||
id= _env_id,
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
|
||||
kwargs={
|
||||
"name": f"alr_envs:ALRHopperJump-{_v}",
|
||||
"wrappers": [mujoco.hopper_jump.MPWrapper],
|
||||
"mp_kwargs": {
|
||||
"num_dof": 3,
|
||||
"num_basis": 5,
|
||||
"duration": 2,
|
||||
"post_traj_time": 0,
|
||||
"policy_type": "motor",
|
||||
"weights_scale": 1.0,
|
||||
"zero_start": True,
|
||||
"zero_goal": False,
|
||||
"policy_kwargs": {
|
||||
"wrappers": [mujoco.hopper_jump.NewMPWrapper],
|
||||
"ep_wrapper_kwargs": {
|
||||
"weight_scale": 1
|
||||
},
|
||||
"movement_primitives_kwargs": {
|
||||
'movement_primitives_type': 'promp',
|
||||
'action_dim': 3
|
||||
},
|
||||
"phase_generator_kwargs": {
|
||||
'phase_generator_type': 'linear',
|
||||
'delay': 0,
|
||||
'tau': 2, # initial value
|
||||
'learn_tau': False,
|
||||
'learn_delay': False
|
||||
},
|
||||
"controller_kwargs": {
|
||||
'controller_type': 'motor',
|
||||
"p_gains": np.ones(3),
|
||||
"d_gains": 0.1*np.ones(3)
|
||||
}
|
||||
"d_gains": 0.1*np.ones(3),
|
||||
},
|
||||
"basis_generator_kwargs": {
|
||||
'basis_generator_type': 'zero_rbf',
|
||||
'num_basis': 5,
|
||||
'num_basis_zero_start': 2
|
||||
}
|
||||
}
|
||||
)
|
||||
@ -784,23 +847,33 @@ for _v in _versions:
|
||||
## HopperJump
|
||||
register(
|
||||
id= "ALRHopperJumpProMP-v2",
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
|
||||
kwargs={
|
||||
"name": f"alr_envs:ALRHopperJump-v2",
|
||||
"wrappers": [mujoco.hopper_jump.HighCtxtMPWrapper],
|
||||
"mp_kwargs": {
|
||||
"num_dof": 3,
|
||||
"num_basis": 5,
|
||||
"duration": 2,
|
||||
"post_traj_time": 0,
|
||||
"policy_type": "motor",
|
||||
"weights_scale": 1.0,
|
||||
"zero_start": True,
|
||||
"zero_goal": False,
|
||||
"policy_kwargs": {
|
||||
"wrappers": [mujoco.hopper_jump.NewHighCtxtMPWrapper],
|
||||
"ep_wrapper_kwargs": {
|
||||
"weight_scale": 1
|
||||
},
|
||||
"movement_primitives_kwargs": {
|
||||
'movement_primitives_type': 'promp',
|
||||
'action_dim': 3
|
||||
},
|
||||
"phase_generator_kwargs": {
|
||||
'phase_generator_type': 'linear',
|
||||
'delay': 0,
|
||||
'tau': 2, # initial value
|
||||
'learn_tau': False,
|
||||
'learn_delay': False
|
||||
},
|
||||
"controller_kwargs": {
|
||||
'controller_type': 'motor',
|
||||
"p_gains": np.ones(3),
|
||||
"d_gains": 0.1*np.ones(3)
|
||||
}
|
||||
"d_gains": 0.1*np.ones(3),
|
||||
},
|
||||
"basis_generator_kwargs": {
|
||||
'basis_generator_type': 'zero_rbf',
|
||||
'num_basis': 5,
|
||||
'num_basis_zero_start': 2
|
||||
}
|
||||
}
|
||||
)
|
||||
@ -917,10 +990,3 @@ for _v in _versions:
|
||||
}
|
||||
)
|
||||
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
|
||||
|
||||
# --------------------- Testing new mp wrapper -----------------------------------------------------
|
||||
|
||||
# register(
|
||||
# id='ALRReacherProMP-v0'
|
||||
# )
|
@ -1,4 +1,3 @@
|
||||
from .reacher.alr_reacher import ALRReacherEnv, ALRReacherOptCtrlEnv
|
||||
from .reacher.balancing import BalancingEnv
|
||||
from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
|
||||
from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
|
||||
@ -11,3 +10,4 @@ from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv
|
||||
from .hopper_throw.hopper_throw import ALRHopperThrowEnv
|
||||
from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv
|
||||
from .walker_2d_jump.walker_2d_jump import ALRWalker2dJumpEnv
|
||||
from .reacher.alr_reacher import ALRReacherEnv
|
@ -3,7 +3,6 @@ import os
|
||||
|
||||
import numpy as np
|
||||
from gym import utils
|
||||
from gym import spaces
|
||||
from gym.envs.mujoco import MujocoEnv
|
||||
from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
|
||||
|
||||
@ -18,7 +17,7 @@ CUP_POS_MAX = np.array([0.32, -1.2])
|
||||
|
||||
class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
||||
def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False,
|
||||
rndm_goal=False, learn_release_step=True, cup_goal_pos=None):
|
||||
rndm_goal=False, cup_goal_pos=None):
|
||||
cup_goal_pos = np.array(cup_goal_pos if cup_goal_pos is not None else [-0.3, -1.2, 0.840])
|
||||
if cup_goal_pos.shape[0]==2:
|
||||
cup_goal_pos = np.insert(cup_goal_pos, 2, 0.840)
|
||||
@ -43,7 +42,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
||||
|
||||
# self._release_step = 175 # time step of ball release
|
||||
# self._release_step = 130 # time step of ball release
|
||||
self._release_step = 100 # time step of ball release
|
||||
self.release_step = 100 # time step of ball release
|
||||
|
||||
self.ep_length = 600 # based on 3 seconds with dt = 0.005 int(self.sim_time / self.dt)
|
||||
self.cup_table_id = 10
|
||||
@ -52,7 +51,6 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
||||
self.noise_std = 0.01
|
||||
else:
|
||||
self.noise_std = 0
|
||||
self.learn_release_step = learn_release_step
|
||||
reward_function = BeerPongReward
|
||||
self.reward_function = reward_function()
|
||||
|
||||
@ -63,13 +61,6 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
||||
def start_pos(self):
|
||||
return self._start_pos
|
||||
|
||||
def _set_action_space(self):
|
||||
bounds = self.model.actuator_ctrlrange.copy().astype(np.float32)
|
||||
bounds = np.concatenate((bounds, [[50, self.ep_length*0.333]]), axis=0)
|
||||
low, high = bounds.T
|
||||
self.action_space = spaces.Box(low=low, high=high, dtype=np.float32)
|
||||
return self.action_space
|
||||
|
||||
@property
|
||||
def start_vel(self):
|
||||
return self._start_vel
|
||||
@ -109,21 +100,22 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
||||
return self._get_obs()
|
||||
|
||||
def step(self, a):
|
||||
self._release_step = a[-1] if self.learn_release_step else self._release_step
|
||||
self._release_step = np.clip(self._release_step, self.action_space.low[-1], self.action_space.high[-1]) \
|
||||
if self.learn_release_step else self._release_step
|
||||
# if a.shape[0] == 8: # we learn also when to release the ball
|
||||
# self._release_step = a[-1]
|
||||
# self._release_step = np.clip(self._release_step, 50, 250)
|
||||
# self.release_step = 0.5/self.dt
|
||||
reward_dist = 0.0
|
||||
angular_vel = 0.0
|
||||
applied_action = a[:a.shape[0]-int(self.learn_release_step)]
|
||||
applied_action = a
|
||||
reward_ctrl = - np.square(applied_action).sum()
|
||||
if self.apply_gravity_comp:
|
||||
applied_action += self.sim.data.qfrc_bias[:len(applied_action)].copy() / self.model.actuator_gear[:, 0]
|
||||
try:
|
||||
self.do_simulation(applied_action, self.frame_skip)
|
||||
if self._steps < self._release_step:
|
||||
if self._steps < self.release_step:
|
||||
self.sim.data.qpos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
|
||||
self.sim.data.qvel[7::] = self.sim.data.site_xvelp[self.ball_site_id, :].copy()
|
||||
elif self._steps == self._release_step and self.add_noise:
|
||||
elif self._steps == self.release_step and self.add_noise:
|
||||
self.sim.data.qvel[7::] += self.noise_std * np.random.randn(3)
|
||||
crash = False
|
||||
except mujoco_py.builder.MujocoException:
|
||||
@ -160,7 +152,8 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
||||
ball_pos=ball_pos,
|
||||
ball_vel=ball_vel,
|
||||
success=success,
|
||||
is_collided=is_collided, sim_crash=crash)
|
||||
is_collided=is_collided, sim_crash=crash,
|
||||
table_contact_first=int(not self.reward_function.ball_ground_contact_first))
|
||||
infos.update(reward_infos)
|
||||
|
||||
return ob, reward, done, infos
|
||||
|
@ -162,12 +162,16 @@ class BeerPongReward:
|
||||
min_dist_coeff, final_dist_coeff, rew_offset = 0, 1, 0
|
||||
reward = rew_offset - min_dist_coeff * min_dist ** 2 - final_dist_coeff * final_dist ** 2 - \
|
||||
1e-4 * np.mean(action_cost)
|
||||
if env.learn_release_step and not self.ball_in_cup:
|
||||
too_small = (env._release_step<50)*(env._release_step-50)**2
|
||||
too_big = (env._release_step>200)*0.2*(env._release_step-200)**2
|
||||
reward = reward - too_small -too_big
|
||||
# 1e-7*np.mean(action_cost)
|
||||
# release step punishment
|
||||
min_time_bound = 0.1
|
||||
max_time_bound = 1.0
|
||||
release_time = env.release_step*env.dt
|
||||
release_time_rew = int(release_time<min_time_bound)*(-30-10*(release_time-min_time_bound)**2) \
|
||||
+int(release_time>max_time_bound)*(-30-10*(release_time-max_time_bound)**2)
|
||||
reward += release_time_rew
|
||||
success = self.ball_in_cup
|
||||
# print('release time :', release_time)
|
||||
else:
|
||||
reward = - 1e-2 * action_cost
|
||||
# reward = - 1e-4 * action_cost
|
||||
|
@ -21,24 +21,35 @@ class NewMPWrapper(EpisodicWrapper):
|
||||
[False] # env steps
|
||||
])
|
||||
|
||||
def _step_callback(self, t: int, env_spec_params: Union[np.ndarray, None], step_action: np.ndarray) -> Union[np.ndarray]:
|
||||
if self.env.learn_release_step:
|
||||
return np.concatenate((step_action, np.atleast_1d(env_spec_params)))
|
||||
else:
|
||||
return step_action
|
||||
# def set_mp_action_space(self):
|
||||
# min_action_bounds, max_action_bounds = self.mp.get_param_bounds()
|
||||
# if self.mp.learn_tau:
|
||||
# min_action_bounds[0] = 20*self.env.dt
|
||||
# max_action_bounds[0] = 260*self.env.dt
|
||||
# mp_action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
|
||||
# dtype=np.float32)
|
||||
# return mp_action_space
|
||||
|
||||
# def _step_callback(self, t: int, env_spec_params: Union[np.ndarray, None], step_action: np.ndarray) -> Union[np.ndarray]:
|
||||
# if self.mp.learn_tau:
|
||||
# return np.concatenate((step_action, np.atleast_1d(env_spec_params)))
|
||||
# else:
|
||||
# return step_action
|
||||
|
||||
def _episode_callback(self, action: np.ndarray) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
|
||||
if self.env.learn_release_step:
|
||||
return action[:-1], action[-1] # mp_params, release step
|
||||
if self.mp.learn_tau:
|
||||
self.env.env.release_step = action[0]/self.env.dt # Tau value
|
||||
# self.env.env.release_step = np.clip(action[0]/self.env.dt, 20, 260) # Tau value
|
||||
return action, None
|
||||
else:
|
||||
return action, None
|
||||
|
||||
def set_action_space(self):
|
||||
if self.env.learn_release_step:
|
||||
min_action_bounds, max_action_bounds = self.mp.get_param_bounds()
|
||||
min_action_bounds = np.concatenate((min_action_bounds.numpy(), [self.env.action_space.low[-1]]))
|
||||
max_action_bounds = np.concatenate((max_action_bounds.numpy(), [self.env.action_space.high[-1]]))
|
||||
self.action_space = gym.spaces.Box(low=min_action_bounds, high=max_action_bounds, dtype=np.float32)
|
||||
return self.action_space
|
||||
else:
|
||||
return super(NewMPWrapper, self).set_action_space()
|
||||
# def set_action_space(self):
|
||||
# if self.mp.learn_tau:
|
||||
# min_action_bounds, max_action_bounds = self.mp.get_param_bounds()
|
||||
# min_action_bounds = np.concatenate((min_action_bounds.numpy(), [self.env.action_space.low[-1]]))
|
||||
# max_action_bounds = np.concatenate((max_action_bounds.numpy(), [self.env.action_space.high[-1]]))
|
||||
# self.action_space = gym.spaces.Box(low=min_action_bounds, high=max_action_bounds, dtype=np.float32)
|
||||
# return self.action_space
|
||||
# else:
|
||||
# return super(NewMPWrapper, self).set_action_space()
|
||||
|
@ -1 +1,2 @@
|
||||
from .mp_wrapper import MPWrapper, HighCtxtMPWrapper
|
||||
from .new_mp_wrapper import NewMPWrapper, NewHighCtxtMPWrapper
|
||||
|
@ -80,8 +80,6 @@ class ALRHopperJumpEnv(HopperEnv):
|
||||
|
||||
# overwrite reset_model to make it deterministic
|
||||
def reset_model(self):
|
||||
noise_low = -self._reset_noise_scale
|
||||
noise_high = self._reset_noise_scale
|
||||
|
||||
qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
|
||||
qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
|
||||
@ -104,14 +102,26 @@ class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
|
||||
def reset_model(self):
|
||||
self._floor_geom_id = self.model.geom_name2id('floor')
|
||||
self._foot_geom_id = self.model.geom_name2id('foot_geom')
|
||||
noise_low = -self._reset_noise_scale
|
||||
noise_high = self._reset_noise_scale
|
||||
noise_low = -np.ones(self.model.nq)*self._reset_noise_scale
|
||||
noise_low[1] = 0
|
||||
noise_low[2] = -0.3
|
||||
noise_low[3] = -0.1
|
||||
noise_low[4] = -1.1
|
||||
noise_low[5] = -0.785
|
||||
|
||||
noise_high = np.ones(self.model.nq)*self._reset_noise_scale
|
||||
noise_high[1] = 0
|
||||
noise_high[2] = 0.3
|
||||
noise_high[3] = 0
|
||||
noise_high[4] = 0
|
||||
noise_high[5] = 0.785
|
||||
|
||||
rnd_vec = self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
|
||||
rnd_vec[2] *= 0.05 # the angle around the y axis shouldn't be too high as the agent then falls down quickly and
|
||||
# rnd_vec[2] *= 0.05 # the angle around the y axis shouldn't be too high as the agent then falls down quickly and
|
||||
# can not recover
|
||||
rnd_vec[1] = np.clip(rnd_vec[1], 0, 0.3)
|
||||
# rnd_vec[1] = np.clip(rnd_vec[1], 0, 0.3)
|
||||
qpos = self.init_qpos + rnd_vec
|
||||
qvel = self.init_qvel #+ self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
|
||||
qvel = self.init_qvel
|
||||
|
||||
self.set_state(qpos, qvel)
|
||||
|
||||
@ -167,7 +177,10 @@ if __name__ == '__main__':
|
||||
env = ALRHopperJumpRndmPosEnv()
|
||||
obs = env.reset()
|
||||
|
||||
for i in range(2000):
|
||||
for k in range(10):
|
||||
obs = env.reset()
|
||||
print('observation :', obs[:6])
|
||||
for i in range(200):
|
||||
# objective.load_result("/tmp/cma")
|
||||
# test with random actions
|
||||
ac = env.action_space.sample()
|
||||
|
29
alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py
Normal file
29
alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py
Normal file
@ -0,0 +1,29 @@
|
||||
from alr_envs.mp.episodic_wrapper import EpisodicWrapper
|
||||
from typing import Union, Tuple
|
||||
import numpy as np
|
||||
|
||||
|
||||
class NewMPWrapper(EpisodicWrapper):
|
||||
@property
|
||||
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||
return self.env.sim.data.qpos[3:6].copy()
|
||||
|
||||
@property
|
||||
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||
return self.env.sim.data.qvel[3:6].copy()
|
||||
|
||||
def set_active_obs(self):
|
||||
return np.hstack([
|
||||
[False] * (5 + int(not self.env.exclude_current_positions_from_observation)), # position
|
||||
[False] * 6, # velocity
|
||||
[True]
|
||||
])
|
||||
|
||||
|
||||
class NewHighCtxtMPWrapper(NewMPWrapper):
|
||||
def set_active_obs(self):
|
||||
return np.hstack([
|
||||
[True] * (5 + int(not self.env.exclude_current_positions_from_observation)), # position
|
||||
[False] * 6, # velocity
|
||||
[False]
|
||||
])
|
@ -68,35 +68,35 @@ class ALRReacherEnv(MujocoEnv, utils.EzPickle):
|
||||
def viewer_setup(self):
|
||||
self.viewer.cam.trackbodyid = 0
|
||||
|
||||
# def reset_model(self):
|
||||
# qpos = self.init_qpos
|
||||
# if not hasattr(self, "goal"):
|
||||
# self.goal = np.array([-0.25, 0.25])
|
||||
# # self.goal = self.init_qpos.copy()[:2] + 0.05
|
||||
# qpos[-2:] = self.goal
|
||||
# qvel = self.init_qvel
|
||||
# qvel[-2:] = 0
|
||||
# self.set_state(qpos, qvel)
|
||||
# self._steps = 0
|
||||
#
|
||||
# return self._get_obs()
|
||||
|
||||
def reset_model(self):
|
||||
qpos = self.init_qpos.copy()
|
||||
while True:
|
||||
self.goal = self.np_random.uniform(low=-self.n_links / 10, high=self.n_links / 10, size=2)
|
||||
# self.goal = self.np_random.uniform(low=0, high=self.n_links / 10, size=2)
|
||||
# self.goal = np.random.uniform(low=[-self.n_links / 10, 0], high=[0, self.n_links / 10], size=2)
|
||||
if np.linalg.norm(self.goal) < self.n_links / 10:
|
||||
break
|
||||
qpos = self.init_qpos
|
||||
if not hasattr(self, "goal"):
|
||||
self.goal = np.array([-0.25, 0.25])
|
||||
# self.goal = self.init_qpos.copy()[:2] + 0.05
|
||||
qpos[-2:] = self.goal
|
||||
qvel = self.init_qvel.copy()
|
||||
qvel = self.init_qvel
|
||||
qvel[-2:] = 0
|
||||
self.set_state(qpos, qvel)
|
||||
self._steps = 0
|
||||
|
||||
return self._get_obs()
|
||||
|
||||
# def reset_model(self):
|
||||
# qpos = self.init_qpos.copy()
|
||||
# while True:
|
||||
# self.goal = self.np_random.uniform(low=-self.n_links / 10, high=self.n_links / 10, size=2)
|
||||
# # self.goal = self.np_random.uniform(low=0, high=self.n_links / 10, size=2)
|
||||
# # self.goal = np.random.uniform(low=[-self.n_links / 10, 0], high=[0, self.n_links / 10], size=2)
|
||||
# if np.linalg.norm(self.goal) < self.n_links / 10:
|
||||
# break
|
||||
# qpos[-2:] = self.goal
|
||||
# qvel = self.init_qvel.copy()
|
||||
# qvel[-2:] = 0
|
||||
# self.set_state(qpos, qvel)
|
||||
# self._steps = 0
|
||||
#
|
||||
# return self._get_obs()
|
||||
|
||||
# def reset_model(self):
|
||||
# qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
|
||||
# while True:
|
||||
|
@ -50,13 +50,10 @@ class EpisodicWrapper(gym.Env, ABC):
|
||||
# rendering
|
||||
self.render_mode = render_mode
|
||||
self.render_kwargs = {}
|
||||
# self.time_steps = np.linspace(0, self.duration, self.traj_steps + 1)
|
||||
self.time_steps = np.linspace(0, self.duration, self.traj_steps)
|
||||
self.mp.set_mp_times(self.time_steps)
|
||||
# action_bounds = np.inf * np.ones((np.prod(self.mp.num_params)))
|
||||
min_action_bounds, max_action_bounds = mp.get_param_bounds()
|
||||
self.mp_action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
|
||||
dtype=np.float32)
|
||||
self.mp_action_space = self.set_mp_action_space()
|
||||
|
||||
self.action_space = self.set_action_space()
|
||||
self.active_obs = self.set_active_obs()
|
||||
@ -65,7 +62,7 @@ class EpisodicWrapper(gym.Env, ABC):
|
||||
dtype=self.env.observation_space.dtype)
|
||||
|
||||
def get_trajectory(self, action: np.ndarray) -> Tuple:
|
||||
# TODO: this follows the implementation of the mp_pytorch library which includes the paramters tau and delay at
|
||||
# TODO: this follows the implementation of the mp_pytorch library which includes the parameters tau and delay at
|
||||
# the beginning of the array.
|
||||
ignore_indices = int(self.mp.learn_tau) + int(self.mp.learn_delay)
|
||||
scaled_mp_params = action.copy()
|
||||
@ -84,6 +81,13 @@ class EpisodicWrapper(gym.Env, ABC):
|
||||
|
||||
return trajectory, velocity
|
||||
|
||||
def set_mp_action_space(self):
|
||||
"""This function can be used to set up an individual space for the parameters of the mp."""
|
||||
min_action_bounds, max_action_bounds = self.mp.get_param_bounds()
|
||||
mp_action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
|
||||
dtype=np.float32)
|
||||
return mp_action_space
|
||||
|
||||
def set_action_space(self):
|
||||
"""
|
||||
This function can be used to modify the action space for considering actions which are not learned via motion
|
||||
@ -179,6 +183,7 @@ class EpisodicWrapper(gym.Env, ABC):
|
||||
step_action = self.controller.get_action(pos_vel[0], pos_vel[1], self.current_pos, self.current_vel)
|
||||
step_action = self._step_callback(t, env_spec_params, step_action) # include possible callback info
|
||||
c_action = np.clip(step_action, self.env.action_space.low, self.env.action_space.high)
|
||||
# print('step/clipped action ratio: ', step_action/c_action)
|
||||
obs, c_reward, done, info = self.env.step(c_action)
|
||||
if self.verbose >= 2:
|
||||
actions[t, :] = c_action
|
||||
|
@ -156,12 +156,11 @@ def make_mp_from_kwargs(
|
||||
ep_wrapper_kwargs['duration'] = dummy_env.spec.max_episode_steps*dummy_env.dt
|
||||
if phase_kwargs.get('tau', None) is None:
|
||||
phase_kwargs['tau'] = ep_wrapper_kwargs['duration']
|
||||
action_dim = mp_kwargs.pop('num_dof', None)
|
||||
action_dim = action_dim if action_dim is not None else np.prod(dummy_env.action_space.shape).item()
|
||||
mp_kwargs['action_dim'] = mp_kwargs.get('action_dim', np.prod(dummy_env.action_space.shape).item())
|
||||
phase_gen = get_phase_generator(**phase_kwargs)
|
||||
basis_gen = get_basis_generator(phase_generator=phase_gen, **basis_kwargs)
|
||||
controller = get_controller(**controller_kwargs)
|
||||
mp = get_movement_primitive(action_dim=action_dim, basis_generator=basis_gen, **mp_kwargs)
|
||||
mp = get_movement_primitive(basis_generator=basis_gen, **mp_kwargs)
|
||||
_env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, mp=mp, controller=controller,
|
||||
ep_wrapper_kwargs=ep_wrapper_kwargs, seed=seed, **kwargs)
|
||||
return _env
|
||||
|
Loading…
Reference in New Issue
Block a user