commit last version of

This commit is contained in:
Onur 2022-05-05 18:50:20 +02:00
parent a26b9f463b
commit 2cc1ab759c
11 changed files with 250 additions and 129 deletions

View File

@ -391,8 +391,7 @@ register(
max_episode_steps=600, max_episode_steps=600,
kwargs={ kwargs={
"rndm_goal": False, "rndm_goal": False,
"cup_goal_pos": [0.1, -2.0], "cup_goal_pos": [0.1, -2.0]
"learn_release_step": True
} }
) )
@ -404,8 +403,7 @@ register(
max_episode_steps=600, max_episode_steps=600,
kwargs={ kwargs={
"rndm_goal": True, "rndm_goal": True,
"cup_goal_pos": [-0.3, -1.2], "cup_goal_pos": [-0.3, -1.2]
"learn_release_step": True
} }
) )
@ -646,7 +644,7 @@ for _v in _versions:
}, },
"movement_primitives_kwargs": { "movement_primitives_kwargs": {
'movement_primitives_type': 'promp', 'movement_primitives_type': 'promp',
'num_dof': 7 'action_dim': 7
}, },
"phase_generator_kwargs": { "phase_generator_kwargs": {
'phase_generator_type': 'linear', 'phase_generator_type': 'linear',
@ -658,7 +656,9 @@ for _v in _versions:
"controller_kwargs": { "controller_kwargs": {
'controller_type': 'motor', 'controller_type': 'motor',
"p_gains": np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]), "p_gains": np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]),
# "p_gains": 0.125*np.array([200, 300, 100, 100, 10, 10, 2.5]),
"d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]), "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]),
# "d_gains": 0.025*np.array([7, 15, 5, 2.5, 0.3, 0.3, 0.05]),
}, },
"basis_generator_kwargs": { "basis_generator_kwargs": {
'basis_generator_type': 'zero_rbf', 'basis_generator_type': 'zero_rbf',
@ -753,29 +753,92 @@ for _v in _versions:
) )
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
# ## HopperJump
# _versions = ["v0", "v1"]
# for _v in _versions:
# _env_id = f'ALRHopperJumpProMP-{_v}'
# register(
# id= _env_id,
# entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
# kwargs={
# "name": f"alr_envs:ALRHopperJump-{_v}",
# "wrappers": [mujoco.hopper_jump.MPWrapper],
# "mp_kwargs": {
# "num_dof": 3,
# "num_basis": 5,
# "duration": 2,
# "post_traj_time": 0,
# "policy_type": "motor",
# "weights_scale": 1.0,
# "zero_start": True,
# "zero_goal": False,
# "policy_kwargs": {
# "p_gains": np.ones(3),
# "d_gains": 0.1*np.ones(3)
# }
# }
# }
# )
# ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
# ## HopperJump
# register(
# id= "ALRHopperJumpProMP-v2",
# entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
# kwargs={
# "name": f"alr_envs:ALRHopperJump-v2",
# "wrappers": [mujoco.hopper_jump.HighCtxtMPWrapper],
# "mp_kwargs": {
# "num_dof": 3,
# "num_basis": 5,
# "duration": 2,
# "post_traj_time": 0,
# "policy_type": "motor",
# "weights_scale": 1.0,
# "zero_start": True,
# "zero_goal": False,
# "policy_kwargs": {
# "p_gains": np.ones(3),
# "d_gains": 0.1*np.ones(3)
# }
# }
# }
# )
# ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v2")
## HopperJump ## HopperJump
_versions = ["v0", "v1"] _versions = ["v0", "v1"]
for _v in _versions: for _v in _versions:
_env_id = f'ALRHopperJumpProMP-{_v}' _env_id = f'ALRHopperJumpProMP-{_v}'
register( register(
id= _env_id, id= _env_id,
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
kwargs={ kwargs={
"name": f"alr_envs:ALRHopperJump-{_v}", "name": f"alr_envs:ALRHopperJump-{_v}",
"wrappers": [mujoco.hopper_jump.MPWrapper], "wrappers": [mujoco.hopper_jump.NewMPWrapper],
"mp_kwargs": { "ep_wrapper_kwargs": {
"num_dof": 3, "weight_scale": 1
"num_basis": 5, },
"duration": 2, "movement_primitives_kwargs": {
"post_traj_time": 0, 'movement_primitives_type': 'promp',
"policy_type": "motor", 'action_dim': 3
"weights_scale": 1.0, },
"zero_start": True, "phase_generator_kwargs": {
"zero_goal": False, 'phase_generator_type': 'linear',
"policy_kwargs": { 'delay': 0,
'tau': 2, # initial value
'learn_tau': False,
'learn_delay': False
},
"controller_kwargs": {
'controller_type': 'motor',
"p_gains": np.ones(3), "p_gains": np.ones(3),
"d_gains": 0.1*np.ones(3) "d_gains": 0.1*np.ones(3),
} },
"basis_generator_kwargs": {
'basis_generator_type': 'zero_rbf',
'num_basis': 5,
'num_basis_zero_start': 2
} }
} }
) )
@ -784,23 +847,33 @@ for _v in _versions:
## HopperJump ## HopperJump
register( register(
id= "ALRHopperJumpProMP-v2", id= "ALRHopperJumpProMP-v2",
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
kwargs={ kwargs={
"name": f"alr_envs:ALRHopperJump-v2", "name": f"alr_envs:ALRHopperJump-v2",
"wrappers": [mujoco.hopper_jump.HighCtxtMPWrapper], "wrappers": [mujoco.hopper_jump.NewHighCtxtMPWrapper],
"mp_kwargs": { "ep_wrapper_kwargs": {
"num_dof": 3, "weight_scale": 1
"num_basis": 5, },
"duration": 2, "movement_primitives_kwargs": {
"post_traj_time": 0, 'movement_primitives_type': 'promp',
"policy_type": "motor", 'action_dim': 3
"weights_scale": 1.0, },
"zero_start": True, "phase_generator_kwargs": {
"zero_goal": False, 'phase_generator_type': 'linear',
"policy_kwargs": { 'delay': 0,
'tau': 2, # initial value
'learn_tau': False,
'learn_delay': False
},
"controller_kwargs": {
'controller_type': 'motor',
"p_gains": np.ones(3), "p_gains": np.ones(3),
"d_gains": 0.1*np.ones(3) "d_gains": 0.1*np.ones(3),
} },
"basis_generator_kwargs": {
'basis_generator_type': 'zero_rbf',
'num_basis': 5,
'num_basis_zero_start': 2
} }
} }
) )
@ -917,10 +990,3 @@ for _v in _versions:
} }
) )
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
# --------------------- Testing new mp wrapper -----------------------------------------------------
# register(
# id='ALRReacherProMP-v0'
# )

View File

@ -1,4 +1,3 @@
from .reacher.alr_reacher import ALRReacherEnv, ALRReacherOptCtrlEnv
from .reacher.balancing import BalancingEnv from .reacher.balancing import BalancingEnv
from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
@ -11,3 +10,4 @@ from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv
from .hopper_throw.hopper_throw import ALRHopperThrowEnv from .hopper_throw.hopper_throw import ALRHopperThrowEnv
from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv
from .walker_2d_jump.walker_2d_jump import ALRWalker2dJumpEnv from .walker_2d_jump.walker_2d_jump import ALRWalker2dJumpEnv
from .reacher.alr_reacher import ALRReacherEnv

View File

@ -3,7 +3,6 @@ import os
import numpy as np import numpy as np
from gym import utils from gym import utils
from gym import spaces
from gym.envs.mujoco import MujocoEnv from gym.envs.mujoco import MujocoEnv
from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
@ -18,7 +17,7 @@ CUP_POS_MAX = np.array([0.32, -1.2])
class ALRBeerBongEnv(MujocoEnv, utils.EzPickle): class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False,
rndm_goal=False, learn_release_step=True, cup_goal_pos=None): rndm_goal=False, cup_goal_pos=None):
cup_goal_pos = np.array(cup_goal_pos if cup_goal_pos is not None else [-0.3, -1.2, 0.840]) cup_goal_pos = np.array(cup_goal_pos if cup_goal_pos is not None else [-0.3, -1.2, 0.840])
if cup_goal_pos.shape[0]==2: if cup_goal_pos.shape[0]==2:
cup_goal_pos = np.insert(cup_goal_pos, 2, 0.840) cup_goal_pos = np.insert(cup_goal_pos, 2, 0.840)
@ -43,7 +42,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
# self._release_step = 175 # time step of ball release # self._release_step = 175 # time step of ball release
# self._release_step = 130 # time step of ball release # self._release_step = 130 # time step of ball release
self._release_step = 100 # time step of ball release self.release_step = 100 # time step of ball release
self.ep_length = 600 # based on 3 seconds with dt = 0.005 int(self.sim_time / self.dt) self.ep_length = 600 # based on 3 seconds with dt = 0.005 int(self.sim_time / self.dt)
self.cup_table_id = 10 self.cup_table_id = 10
@ -52,7 +51,6 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
self.noise_std = 0.01 self.noise_std = 0.01
else: else:
self.noise_std = 0 self.noise_std = 0
self.learn_release_step = learn_release_step
reward_function = BeerPongReward reward_function = BeerPongReward
self.reward_function = reward_function() self.reward_function = reward_function()
@ -63,13 +61,6 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
def start_pos(self): def start_pos(self):
return self._start_pos return self._start_pos
def _set_action_space(self):
bounds = self.model.actuator_ctrlrange.copy().astype(np.float32)
bounds = np.concatenate((bounds, [[50, self.ep_length*0.333]]), axis=0)
low, high = bounds.T
self.action_space = spaces.Box(low=low, high=high, dtype=np.float32)
return self.action_space
@property @property
def start_vel(self): def start_vel(self):
return self._start_vel return self._start_vel
@ -109,21 +100,22 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
return self._get_obs() return self._get_obs()
def step(self, a): def step(self, a):
self._release_step = a[-1] if self.learn_release_step else self._release_step # if a.shape[0] == 8: # we learn also when to release the ball
self._release_step = np.clip(self._release_step, self.action_space.low[-1], self.action_space.high[-1]) \ # self._release_step = a[-1]
if self.learn_release_step else self._release_step # self._release_step = np.clip(self._release_step, 50, 250)
# self.release_step = 0.5/self.dt
reward_dist = 0.0 reward_dist = 0.0
angular_vel = 0.0 angular_vel = 0.0
applied_action = a[:a.shape[0]-int(self.learn_release_step)] applied_action = a
reward_ctrl = - np.square(applied_action).sum() reward_ctrl = - np.square(applied_action).sum()
if self.apply_gravity_comp: if self.apply_gravity_comp:
applied_action += self.sim.data.qfrc_bias[:len(applied_action)].copy() / self.model.actuator_gear[:, 0] applied_action += self.sim.data.qfrc_bias[:len(applied_action)].copy() / self.model.actuator_gear[:, 0]
try: try:
self.do_simulation(applied_action, self.frame_skip) self.do_simulation(applied_action, self.frame_skip)
if self._steps < self._release_step: if self._steps < self.release_step:
self.sim.data.qpos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy() self.sim.data.qpos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
self.sim.data.qvel[7::] = self.sim.data.site_xvelp[self.ball_site_id, :].copy() self.sim.data.qvel[7::] = self.sim.data.site_xvelp[self.ball_site_id, :].copy()
elif self._steps == self._release_step and self.add_noise: elif self._steps == self.release_step and self.add_noise:
self.sim.data.qvel[7::] += self.noise_std * np.random.randn(3) self.sim.data.qvel[7::] += self.noise_std * np.random.randn(3)
crash = False crash = False
except mujoco_py.builder.MujocoException: except mujoco_py.builder.MujocoException:
@ -160,7 +152,8 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
ball_pos=ball_pos, ball_pos=ball_pos,
ball_vel=ball_vel, ball_vel=ball_vel,
success=success, success=success,
is_collided=is_collided, sim_crash=crash) is_collided=is_collided, sim_crash=crash,
table_contact_first=int(not self.reward_function.ball_ground_contact_first))
infos.update(reward_infos) infos.update(reward_infos)
return ob, reward, done, infos return ob, reward, done, infos

View File

@ -162,12 +162,16 @@ class BeerPongReward:
min_dist_coeff, final_dist_coeff, rew_offset = 0, 1, 0 min_dist_coeff, final_dist_coeff, rew_offset = 0, 1, 0
reward = rew_offset - min_dist_coeff * min_dist ** 2 - final_dist_coeff * final_dist ** 2 - \ reward = rew_offset - min_dist_coeff * min_dist ** 2 - final_dist_coeff * final_dist ** 2 - \
1e-4 * np.mean(action_cost) 1e-4 * np.mean(action_cost)
if env.learn_release_step and not self.ball_in_cup:
too_small = (env._release_step<50)*(env._release_step-50)**2
too_big = (env._release_step>200)*0.2*(env._release_step-200)**2
reward = reward - too_small -too_big
# 1e-7*np.mean(action_cost) # 1e-7*np.mean(action_cost)
# release step punishment
min_time_bound = 0.1
max_time_bound = 1.0
release_time = env.release_step*env.dt
release_time_rew = int(release_time<min_time_bound)*(-30-10*(release_time-min_time_bound)**2) \
+int(release_time>max_time_bound)*(-30-10*(release_time-max_time_bound)**2)
reward += release_time_rew
success = self.ball_in_cup success = self.ball_in_cup
# print('release time :', release_time)
else: else:
reward = - 1e-2 * action_cost reward = - 1e-2 * action_cost
# reward = - 1e-4 * action_cost # reward = - 1e-4 * action_cost

View File

@ -21,24 +21,35 @@ class NewMPWrapper(EpisodicWrapper):
[False] # env steps [False] # env steps
]) ])
def _step_callback(self, t: int, env_spec_params: Union[np.ndarray, None], step_action: np.ndarray) -> Union[np.ndarray]: # def set_mp_action_space(self):
if self.env.learn_release_step: # min_action_bounds, max_action_bounds = self.mp.get_param_bounds()
return np.concatenate((step_action, np.atleast_1d(env_spec_params))) # if self.mp.learn_tau:
else: # min_action_bounds[0] = 20*self.env.dt
return step_action # max_action_bounds[0] = 260*self.env.dt
# mp_action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
# dtype=np.float32)
# return mp_action_space
# def _step_callback(self, t: int, env_spec_params: Union[np.ndarray, None], step_action: np.ndarray) -> Union[np.ndarray]:
# if self.mp.learn_tau:
# return np.concatenate((step_action, np.atleast_1d(env_spec_params)))
# else:
# return step_action
def _episode_callback(self, action: np.ndarray) -> Tuple[np.ndarray, Union[np.ndarray, None]]: def _episode_callback(self, action: np.ndarray) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
if self.env.learn_release_step: if self.mp.learn_tau:
return action[:-1], action[-1] # mp_params, release step self.env.env.release_step = action[0]/self.env.dt # Tau value
# self.env.env.release_step = np.clip(action[0]/self.env.dt, 20, 260) # Tau value
return action, None
else: else:
return action, None return action, None
def set_action_space(self): # def set_action_space(self):
if self.env.learn_release_step: # if self.mp.learn_tau:
min_action_bounds, max_action_bounds = self.mp.get_param_bounds() # min_action_bounds, max_action_bounds = self.mp.get_param_bounds()
min_action_bounds = np.concatenate((min_action_bounds.numpy(), [self.env.action_space.low[-1]])) # min_action_bounds = np.concatenate((min_action_bounds.numpy(), [self.env.action_space.low[-1]]))
max_action_bounds = np.concatenate((max_action_bounds.numpy(), [self.env.action_space.high[-1]])) # max_action_bounds = np.concatenate((max_action_bounds.numpy(), [self.env.action_space.high[-1]]))
self.action_space = gym.spaces.Box(low=min_action_bounds, high=max_action_bounds, dtype=np.float32) # self.action_space = gym.spaces.Box(low=min_action_bounds, high=max_action_bounds, dtype=np.float32)
return self.action_space # return self.action_space
else: # else:
return super(NewMPWrapper, self).set_action_space() # return super(NewMPWrapper, self).set_action_space()

View File

@ -1 +1,2 @@
from .mp_wrapper import MPWrapper, HighCtxtMPWrapper from .mp_wrapper import MPWrapper, HighCtxtMPWrapper
from .new_mp_wrapper import NewMPWrapper, NewHighCtxtMPWrapper

View File

@ -80,8 +80,6 @@ class ALRHopperJumpEnv(HopperEnv):
# overwrite reset_model to make it deterministic # overwrite reset_model to make it deterministic
def reset_model(self): def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq) qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv) qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
@ -104,14 +102,26 @@ class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
def reset_model(self): def reset_model(self):
self._floor_geom_id = self.model.geom_name2id('floor') self._floor_geom_id = self.model.geom_name2id('floor')
self._foot_geom_id = self.model.geom_name2id('foot_geom') self._foot_geom_id = self.model.geom_name2id('foot_geom')
noise_low = -self._reset_noise_scale noise_low = -np.ones(self.model.nq)*self._reset_noise_scale
noise_high = self._reset_noise_scale noise_low[1] = 0
noise_low[2] = -0.3
noise_low[3] = -0.1
noise_low[4] = -1.1
noise_low[5] = -0.785
noise_high = np.ones(self.model.nq)*self._reset_noise_scale
noise_high[1] = 0
noise_high[2] = 0.3
noise_high[3] = 0
noise_high[4] = 0
noise_high[5] = 0.785
rnd_vec = self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq) rnd_vec = self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
rnd_vec[2] *= 0.05 # the angle around the y axis shouldn't be too high as the agent then falls down quickly and # rnd_vec[2] *= 0.05 # the angle around the y axis shouldn't be too high as the agent then falls down quickly and
# can not recover # can not recover
rnd_vec[1] = np.clip(rnd_vec[1], 0, 0.3) # rnd_vec[1] = np.clip(rnd_vec[1], 0, 0.3)
qpos = self.init_qpos + rnd_vec qpos = self.init_qpos + rnd_vec
qvel = self.init_qvel #+ self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv) qvel = self.init_qvel
self.set_state(qpos, qvel) self.set_state(qpos, qvel)
@ -167,7 +177,10 @@ if __name__ == '__main__':
env = ALRHopperJumpRndmPosEnv() env = ALRHopperJumpRndmPosEnv()
obs = env.reset() obs = env.reset()
for i in range(2000): for k in range(10):
obs = env.reset()
print('observation :', obs[:6])
for i in range(200):
# objective.load_result("/tmp/cma") # objective.load_result("/tmp/cma")
# test with random actions # test with random actions
ac = env.action_space.sample() ac = env.action_space.sample()

View File

@ -0,0 +1,29 @@
from alr_envs.mp.episodic_wrapper import EpisodicWrapper
from typing import Union, Tuple
import numpy as np
class NewMPWrapper(EpisodicWrapper):
@property
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.sim.data.qpos[3:6].copy()
@property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.sim.data.qvel[3:6].copy()
def set_active_obs(self):
return np.hstack([
[False] * (5 + int(not self.env.exclude_current_positions_from_observation)), # position
[False] * 6, # velocity
[True]
])
class NewHighCtxtMPWrapper(NewMPWrapper):
def set_active_obs(self):
return np.hstack([
[True] * (5 + int(not self.env.exclude_current_positions_from_observation)), # position
[False] * 6, # velocity
[False]
])

View File

@ -68,35 +68,35 @@ class ALRReacherEnv(MujocoEnv, utils.EzPickle):
def viewer_setup(self): def viewer_setup(self):
self.viewer.cam.trackbodyid = 0 self.viewer.cam.trackbodyid = 0
# def reset_model(self):
# qpos = self.init_qpos
# if not hasattr(self, "goal"):
# self.goal = np.array([-0.25, 0.25])
# # self.goal = self.init_qpos.copy()[:2] + 0.05
# qpos[-2:] = self.goal
# qvel = self.init_qvel
# qvel[-2:] = 0
# self.set_state(qpos, qvel)
# self._steps = 0
#
# return self._get_obs()
def reset_model(self): def reset_model(self):
qpos = self.init_qpos.copy() qpos = self.init_qpos
while True: if not hasattr(self, "goal"):
self.goal = self.np_random.uniform(low=-self.n_links / 10, high=self.n_links / 10, size=2) self.goal = np.array([-0.25, 0.25])
# self.goal = self.np_random.uniform(low=0, high=self.n_links / 10, size=2) # self.goal = self.init_qpos.copy()[:2] + 0.05
# self.goal = np.random.uniform(low=[-self.n_links / 10, 0], high=[0, self.n_links / 10], size=2)
if np.linalg.norm(self.goal) < self.n_links / 10:
break
qpos[-2:] = self.goal qpos[-2:] = self.goal
qvel = self.init_qvel.copy() qvel = self.init_qvel
qvel[-2:] = 0 qvel[-2:] = 0
self.set_state(qpos, qvel) self.set_state(qpos, qvel)
self._steps = 0 self._steps = 0
return self._get_obs() return self._get_obs()
# def reset_model(self):
# qpos = self.init_qpos.copy()
# while True:
# self.goal = self.np_random.uniform(low=-self.n_links / 10, high=self.n_links / 10, size=2)
# # self.goal = self.np_random.uniform(low=0, high=self.n_links / 10, size=2)
# # self.goal = np.random.uniform(low=[-self.n_links / 10, 0], high=[0, self.n_links / 10], size=2)
# if np.linalg.norm(self.goal) < self.n_links / 10:
# break
# qpos[-2:] = self.goal
# qvel = self.init_qvel.copy()
# qvel[-2:] = 0
# self.set_state(qpos, qvel)
# self._steps = 0
#
# return self._get_obs()
# def reset_model(self): # def reset_model(self):
# qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos # qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
# while True: # while True:

View File

@ -50,13 +50,10 @@ class EpisodicWrapper(gym.Env, ABC):
# rendering # rendering
self.render_mode = render_mode self.render_mode = render_mode
self.render_kwargs = {} self.render_kwargs = {}
# self.time_steps = np.linspace(0, self.duration, self.traj_steps + 1)
self.time_steps = np.linspace(0, self.duration, self.traj_steps) self.time_steps = np.linspace(0, self.duration, self.traj_steps)
self.mp.set_mp_times(self.time_steps) self.mp.set_mp_times(self.time_steps)
# action_bounds = np.inf * np.ones((np.prod(self.mp.num_params))) # action_bounds = np.inf * np.ones((np.prod(self.mp.num_params)))
min_action_bounds, max_action_bounds = mp.get_param_bounds() self.mp_action_space = self.set_mp_action_space()
self.mp_action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
dtype=np.float32)
self.action_space = self.set_action_space() self.action_space = self.set_action_space()
self.active_obs = self.set_active_obs() self.active_obs = self.set_active_obs()
@ -65,7 +62,7 @@ class EpisodicWrapper(gym.Env, ABC):
dtype=self.env.observation_space.dtype) dtype=self.env.observation_space.dtype)
def get_trajectory(self, action: np.ndarray) -> Tuple: def get_trajectory(self, action: np.ndarray) -> Tuple:
# TODO: this follows the implementation of the mp_pytorch library which includes the paramters tau and delay at # TODO: this follows the implementation of the mp_pytorch library which includes the parameters tau and delay at
# the beginning of the array. # the beginning of the array.
ignore_indices = int(self.mp.learn_tau) + int(self.mp.learn_delay) ignore_indices = int(self.mp.learn_tau) + int(self.mp.learn_delay)
scaled_mp_params = action.copy() scaled_mp_params = action.copy()
@ -84,6 +81,13 @@ class EpisodicWrapper(gym.Env, ABC):
return trajectory, velocity return trajectory, velocity
def set_mp_action_space(self):
"""This function can be used to set up an individual space for the parameters of the mp."""
min_action_bounds, max_action_bounds = self.mp.get_param_bounds()
mp_action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
dtype=np.float32)
return mp_action_space
def set_action_space(self): def set_action_space(self):
""" """
This function can be used to modify the action space for considering actions which are not learned via motion This function can be used to modify the action space for considering actions which are not learned via motion
@ -179,6 +183,7 @@ class EpisodicWrapper(gym.Env, ABC):
step_action = self.controller.get_action(pos_vel[0], pos_vel[1], self.current_pos, self.current_vel) step_action = self.controller.get_action(pos_vel[0], pos_vel[1], self.current_pos, self.current_vel)
step_action = self._step_callback(t, env_spec_params, step_action) # include possible callback info step_action = self._step_callback(t, env_spec_params, step_action) # include possible callback info
c_action = np.clip(step_action, self.env.action_space.low, self.env.action_space.high) c_action = np.clip(step_action, self.env.action_space.low, self.env.action_space.high)
# print('step/clipped action ratio: ', step_action/c_action)
obs, c_reward, done, info = self.env.step(c_action) obs, c_reward, done, info = self.env.step(c_action)
if self.verbose >= 2: if self.verbose >= 2:
actions[t, :] = c_action actions[t, :] = c_action

View File

@ -156,12 +156,11 @@ def make_mp_from_kwargs(
ep_wrapper_kwargs['duration'] = dummy_env.spec.max_episode_steps*dummy_env.dt ep_wrapper_kwargs['duration'] = dummy_env.spec.max_episode_steps*dummy_env.dt
if phase_kwargs.get('tau', None) is None: if phase_kwargs.get('tau', None) is None:
phase_kwargs['tau'] = ep_wrapper_kwargs['duration'] phase_kwargs['tau'] = ep_wrapper_kwargs['duration']
action_dim = mp_kwargs.pop('num_dof', None) mp_kwargs['action_dim'] = mp_kwargs.get('action_dim', np.prod(dummy_env.action_space.shape).item())
action_dim = action_dim if action_dim is not None else np.prod(dummy_env.action_space.shape).item()
phase_gen = get_phase_generator(**phase_kwargs) phase_gen = get_phase_generator(**phase_kwargs)
basis_gen = get_basis_generator(phase_generator=phase_gen, **basis_kwargs) basis_gen = get_basis_generator(phase_generator=phase_gen, **basis_kwargs)
controller = get_controller(**controller_kwargs) controller = get_controller(**controller_kwargs)
mp = get_movement_primitive(action_dim=action_dim, basis_generator=basis_gen, **mp_kwargs) mp = get_movement_primitive(basis_generator=basis_gen, **mp_kwargs)
_env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, mp=mp, controller=controller, _env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, mp=mp, controller=controller,
ep_wrapper_kwargs=ep_wrapper_kwargs, seed=seed, **kwargs) ep_wrapper_kwargs=ep_wrapper_kwargs, seed=seed, **kwargs)
return _env return _env