after deadline
This commit is contained in:
parent
c47845c0dd
commit
7bd9848c31
@ -203,6 +203,34 @@ register(
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
_vs = np.arange(101).tolist() + [1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1]
|
||||||
|
for i in _vs:
|
||||||
|
_env_id = f'ALRReacher{i}-v0'
|
||||||
|
register(
|
||||||
|
id=_env_id,
|
||||||
|
entry_point='alr_envs.alr.mujoco:ALRReacherEnv',
|
||||||
|
max_episode_steps=200,
|
||||||
|
kwargs={
|
||||||
|
"steps_before_reward": 0,
|
||||||
|
"n_links": 5,
|
||||||
|
"balance": False,
|
||||||
|
'ctrl_cost_weight': i
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
_env_id = f'ALRReacherSparse{i}-v0'
|
||||||
|
register(
|
||||||
|
id=_env_id,
|
||||||
|
entry_point='alr_envs.alr.mujoco:ALRReacherEnv',
|
||||||
|
max_episode_steps=200,
|
||||||
|
kwargs={
|
||||||
|
"steps_before_reward": 200,
|
||||||
|
"n_links": 5,
|
||||||
|
"balance": False,
|
||||||
|
'ctrl_cost_weight': i
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# CtxtFree are v0, Contextual are v1
|
# CtxtFree are v0, Contextual are v1
|
||||||
register(
|
register(
|
||||||
id='ALRAntJump-v0',
|
id='ALRAntJump-v0',
|
||||||
@ -458,6 +486,18 @@ register(
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Beerpong with episodic reward, but fixed release time step
|
||||||
|
register(
|
||||||
|
id='ALRBeerPong-v4',
|
||||||
|
entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvFixedReleaseStep',
|
||||||
|
max_episode_steps=300,
|
||||||
|
kwargs={
|
||||||
|
"rndm_goal": True,
|
||||||
|
"cup_goal_pos": [-0.3, -1.2],
|
||||||
|
"frame_skip": 2
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# Motion Primitive Environments
|
# Motion Primitive Environments
|
||||||
|
|
||||||
## Simple Reacher
|
## Simple Reacher
|
||||||
@ -648,6 +688,56 @@ for _v in _versions:
|
|||||||
)
|
)
|
||||||
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||||
|
|
||||||
|
|
||||||
|
_vs = np.arange(101).tolist() + [1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1]
|
||||||
|
for i in _vs:
|
||||||
|
_env_id = f'ALRReacher{i}ProMP-v0'
|
||||||
|
register(
|
||||||
|
id=_env_id,
|
||||||
|
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
||||||
|
kwargs={
|
||||||
|
"name": f"alr_envs:{_env_id.replace('ProMP', '')}",
|
||||||
|
"wrappers": [mujoco.reacher.MPWrapper],
|
||||||
|
"mp_kwargs": {
|
||||||
|
"num_dof": 5,
|
||||||
|
"num_basis": 5,
|
||||||
|
"duration": 4,
|
||||||
|
"policy_type": "motor",
|
||||||
|
# "weights_scale": 5,
|
||||||
|
"n_zero_basis": 1,
|
||||||
|
"zero_start": True,
|
||||||
|
"policy_kwargs": {
|
||||||
|
"p_gains": 1,
|
||||||
|
"d_gains": 0.1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
_env_id = f'ALRReacherSparse{i}ProMP-v0'
|
||||||
|
register(
|
||||||
|
id=_env_id,
|
||||||
|
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
||||||
|
kwargs={
|
||||||
|
"name": f"alr_envs:{_env_id.replace('ProMP', '')}",
|
||||||
|
"wrappers": [mujoco.reacher.MPWrapper],
|
||||||
|
"mp_kwargs": {
|
||||||
|
"num_dof": 5,
|
||||||
|
"num_basis": 5,
|
||||||
|
"duration": 4,
|
||||||
|
"policy_type": "motor",
|
||||||
|
# "weights_scale": 5,
|
||||||
|
"n_zero_basis": 1,
|
||||||
|
"zero_start": True,
|
||||||
|
"policy_kwargs": {
|
||||||
|
"p_gains": 1,
|
||||||
|
"d_gains": 0.1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# ## Beerpong
|
# ## Beerpong
|
||||||
# _versions = ["v0", "v1"]
|
# _versions = ["v0", "v1"]
|
||||||
# for _v in _versions:
|
# for _v in _versions:
|
||||||
@ -717,6 +807,42 @@ for _v in _versions:
|
|||||||
)
|
)
|
||||||
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||||
|
|
||||||
|
## Beerpong ProMP fixed release
|
||||||
|
_env_id = 'BeerpongProMP-v2'
|
||||||
|
register(
|
||||||
|
id=_env_id,
|
||||||
|
entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
|
||||||
|
kwargs={
|
||||||
|
"name": "alr_envs:ALRBeerPong-v4",
|
||||||
|
"wrappers": [mujoco.beerpong.NewMPWrapper],
|
||||||
|
"ep_wrapper_kwargs": {
|
||||||
|
"weight_scale": 1
|
||||||
|
},
|
||||||
|
"movement_primitives_kwargs": {
|
||||||
|
'movement_primitives_type': 'promp',
|
||||||
|
'action_dim': 7
|
||||||
|
},
|
||||||
|
"phase_generator_kwargs": {
|
||||||
|
'phase_generator_type': 'linear',
|
||||||
|
'delay': 0,
|
||||||
|
'tau': 0.62, # initial value
|
||||||
|
'learn_tau': False,
|
||||||
|
'learn_delay': False
|
||||||
|
},
|
||||||
|
"controller_kwargs": {
|
||||||
|
'controller_type': 'motor',
|
||||||
|
"p_gains": np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]),
|
||||||
|
"d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]),
|
||||||
|
},
|
||||||
|
"basis_generator_kwargs": {
|
||||||
|
'basis_generator_type': 'zero_rbf',
|
||||||
|
'num_basis': 2,
|
||||||
|
'num_basis_zero_start': 2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||||
|
|
||||||
## Table Tennis
|
## Table Tennis
|
||||||
ctxt_dim = [2, 4]
|
ctxt_dim = [2, 4]
|
||||||
for _v, cd in enumerate(ctxt_dim):
|
for _v, cd in enumerate(ctxt_dim):
|
||||||
|
@ -2,7 +2,7 @@ from .reacher.balancing import BalancingEnv
|
|||||||
from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
|
from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
|
||||||
from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
|
from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
|
||||||
from .table_tennis.tt_gym import TTEnvGym
|
from .table_tennis.tt_gym import TTEnvGym
|
||||||
from .beerpong.beerpong import ALRBeerBongEnv, ALRBeerBongEnvStepBased, ALRBeerBongEnvStepBasedEpisodicReward
|
from .beerpong.beerpong import ALRBeerBongEnv, ALRBeerBongEnvStepBased, ALRBeerBongEnvStepBasedEpisodicReward, ALRBeerBongEnvFixedReleaseStep
|
||||||
from .ant_jump.ant_jump import ALRAntJumpEnv
|
from .ant_jump.ant_jump import ALRAntJumpEnv
|
||||||
from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
|
from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
|
||||||
from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv, ALRHopperXYJumpEnv, ALRHopperXYJumpEnvStepBased
|
from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv, ALRHopperXYJumpEnv, ALRHopperXYJumpEnvStepBased
|
||||||
|
@ -185,6 +185,10 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
|||||||
def dt(self):
|
def dt(self):
|
||||||
return super(ALRBeerBongEnv, self).dt*self.repeat_action
|
return super(ALRBeerBongEnv, self).dt*self.repeat_action
|
||||||
|
|
||||||
|
class ALRBeerBongEnvFixedReleaseStep(ALRBeerBongEnv):
|
||||||
|
def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
|
||||||
|
super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
|
||||||
|
self.release_step = 62 # empirically evaluated for frame_skip=2!
|
||||||
|
|
||||||
class ALRBeerBongEnvStepBasedEpisodicReward(ALRBeerBongEnv):
|
class ALRBeerBongEnvStepBasedEpisodicReward(ALRBeerBongEnv):
|
||||||
def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
|
def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
|
||||||
@ -206,6 +210,25 @@ class ALRBeerBongEnvStepBasedEpisodicReward(ALRBeerBongEnv):
|
|||||||
# internal steps and thus, the observation also needs to be set correctly
|
# internal steps and thus, the observation also needs to be set correctly
|
||||||
return ob, reward, done, infos
|
return ob, reward, done, infos
|
||||||
|
|
||||||
|
|
||||||
|
# class ALRBeerBongEnvStepBasedEpisodicReward(ALRBeerBongEnv):
|
||||||
|
# def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
|
||||||
|
# super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
|
||||||
|
# self.release_step = 62 # empirically evaluated for frame_skip=2!
|
||||||
|
#
|
||||||
|
# def step(self, a):
|
||||||
|
# if self._steps < self.release_step:
|
||||||
|
# return super(ALRBeerBongEnvStepBasedEpisodicReward, self).step(a)
|
||||||
|
# else:
|
||||||
|
# sub_ob, sub_reward, done, sub_infos = super(ALRBeerBongEnvStepBasedEpisodicReward, self).step(np.zeros(a.shape))
|
||||||
|
# reward = sub_reward
|
||||||
|
# infos = sub_infos
|
||||||
|
# ob = sub_ob
|
||||||
|
# ob[-1] = self.release_step + 1 # Since we simulate until the end of the episode, PPO does not see the
|
||||||
|
# # internal steps and thus, the observation also needs to be set correctly
|
||||||
|
# return ob, reward, done, infos
|
||||||
|
|
||||||
|
|
||||||
class ALRBeerBongEnvStepBased(ALRBeerBongEnv):
|
class ALRBeerBongEnvStepBased(ALRBeerBongEnv):
|
||||||
def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
|
def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
|
||||||
super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
|
super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
|
||||||
@ -259,7 +282,8 @@ class ALRBeerBongEnvStepBased(ALRBeerBongEnv):
|
|||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# env = ALRBeerBongEnv(rndm_goal=True)
|
# env = ALRBeerBongEnv(rndm_goal=True)
|
||||||
# env = ALRBeerBongEnvStepBased(frame_skip=2, rndm_goal=True)
|
# env = ALRBeerBongEnvStepBased(frame_skip=2, rndm_goal=True)
|
||||||
env = ALRBeerBongEnvStepBasedEpisodicReward(frame_skip=2, rndm_goal=True)
|
# env = ALRBeerBongEnvStepBasedEpisodicReward(frame_skip=2, rndm_goal=True)
|
||||||
|
env = ALRBeerBongEnvFixedReleaseStep(frame_skip=2, rndm_goal=True)
|
||||||
import time
|
import time
|
||||||
env.reset()
|
env.reset()
|
||||||
env.render("human")
|
env.render("human")
|
||||||
|
@ -123,7 +123,6 @@ class ALRHopperJumpEnv(HopperEnv):
|
|||||||
class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
|
class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
|
||||||
|
|
||||||
def step(self, action):
|
def step(self, action):
|
||||||
|
|
||||||
self._floor_geom_id = self.model.geom_name2id('floor')
|
self._floor_geom_id = self.model.geom_name2id('floor')
|
||||||
self._foot_geom_id = self.model.geom_name2id('foot_geom')
|
self._foot_geom_id = self.model.geom_name2id('foot_geom')
|
||||||
|
|
||||||
@ -173,7 +172,8 @@ class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
|
|||||||
'goal_dist': goal_dist,
|
'goal_dist': goal_dist,
|
||||||
'height_rew': self.max_height,
|
'height_rew': self.max_height,
|
||||||
'healthy_reward': self.healthy_reward * 2,
|
'healthy_reward': self.healthy_reward * 2,
|
||||||
'healthy': self.is_healthy
|
'healthy': self.is_healthy,
|
||||||
|
'contact_dist': self.contact_dist if self.contact_dist is not None else 0
|
||||||
}
|
}
|
||||||
return observation, reward, done, info
|
return observation, reward, done, info
|
||||||
|
|
||||||
@ -194,7 +194,6 @@ class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
|
|||||||
rnd_vec = self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
|
rnd_vec = self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
|
||||||
qpos = self.init_qpos + rnd_vec
|
qpos = self.init_qpos + rnd_vec
|
||||||
qvel = self.init_qvel
|
qvel = self.init_qvel
|
||||||
|
|
||||||
self.set_state(qpos, qvel)
|
self.set_state(qpos, qvel)
|
||||||
|
|
||||||
observation = self._get_obs()
|
observation = self._get_obs()
|
||||||
@ -207,9 +206,6 @@ class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
|
|||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
super().reset()
|
super().reset()
|
||||||
# self.goal = np.random.uniform(-1.5, 1.5, 1)
|
|
||||||
# self.goal = np.random.uniform(0, 1.5, 1)
|
|
||||||
# self.goal = self.np_random.uniform(0, 1.5, 1)
|
|
||||||
self.goal = self.np_random.uniform(0.3, 1.35, 1)[0]
|
self.goal = self.np_random.uniform(0.3, 1.35, 1)[0]
|
||||||
self.sim.model.body_pos[self.sim.model.body_name2id('goal_site_body')] = np.array([self.goal, 0, 0])
|
self.sim.model.body_pos[self.sim.model.body_name2id('goal_site_body')] = np.array([self.goal, 0, 0])
|
||||||
return self.reset_model()
|
return self.reset_model()
|
||||||
@ -219,6 +215,16 @@ class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
|
|||||||
- np.array([self.goal, 0, 0])
|
- np.array([self.goal, 0, 0])
|
||||||
return np.concatenate((super(ALRHopperXYJumpEnv, self)._get_obs(), goal_diff))
|
return np.concatenate((super(ALRHopperXYJumpEnv, self)._get_obs(), goal_diff))
|
||||||
|
|
||||||
|
def set_context(self, context):
|
||||||
|
# context is 4 dimensional
|
||||||
|
qpos = self.init_qpos
|
||||||
|
qvel = self.init_qvel
|
||||||
|
qpos[-3:] = context[:3]
|
||||||
|
self.goal = context[-1]
|
||||||
|
self.set_state(qpos, qvel)
|
||||||
|
self.sim.model.body_pos[self.sim.model.body_name2id('goal_site_body')] = np.array([self.goal, 0, 0])
|
||||||
|
return self._get_obs()
|
||||||
|
|
||||||
class ALRHopperXYJumpEnvStepBased(ALRHopperXYJumpEnv):
|
class ALRHopperXYJumpEnvStepBased(ALRHopperXYJumpEnv):
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
@ -246,10 +252,6 @@ class ALRHopperXYJumpEnvStepBased(ALRHopperXYJumpEnv):
|
|||||||
reset_noise_scale, exclude_current_positions_from_observation, max_episode_steps)
|
reset_noise_scale, exclude_current_positions_from_observation, max_episode_steps)
|
||||||
|
|
||||||
def step(self, action):
|
def step(self, action):
|
||||||
print("")
|
|
||||||
print('height_scale: ', self.height_scale)
|
|
||||||
print('healthy_scale: ', self.healthy_scale)
|
|
||||||
print('dist_scale: ', self.dist_scale)
|
|
||||||
self._floor_geom_id = self.model.geom_name2id('floor')
|
self._floor_geom_id = self.model.geom_name2id('floor')
|
||||||
self._foot_geom_id = self.model.geom_name2id('foot_geom')
|
self._foot_geom_id = self.model.geom_name2id('foot_geom')
|
||||||
|
|
||||||
@ -268,6 +270,23 @@ class ALRHopperXYJumpEnvStepBased(ALRHopperXYJumpEnv):
|
|||||||
reward = -ctrl_cost + healthy_reward + dist_reward
|
reward = -ctrl_cost + healthy_reward + dist_reward
|
||||||
done = False
|
done = False
|
||||||
observation = self._get_obs()
|
observation = self._get_obs()
|
||||||
|
|
||||||
|
|
||||||
|
###########################################################
|
||||||
|
# This is only for logging the distance to goal when first having the contact
|
||||||
|
##########################################################
|
||||||
|
floor_contact = self._contact_checker(self._floor_geom_id,
|
||||||
|
self._foot_geom_id) if not self.contact_with_floor else False
|
||||||
|
if not self.init_floor_contact:
|
||||||
|
self.init_floor_contact = floor_contact
|
||||||
|
if self.init_floor_contact and not self.has_left_floor:
|
||||||
|
self.has_left_floor = not floor_contact
|
||||||
|
if not self.contact_with_floor and self.has_left_floor:
|
||||||
|
self.contact_with_floor = floor_contact
|
||||||
|
|
||||||
|
if self.contact_dist is None and self.contact_with_floor:
|
||||||
|
self.contact_dist = np.linalg.norm(self.sim.data.site_xpos[self.model.site_name2id('foot_site')]
|
||||||
|
- np.array([self.goal, 0, 0]))
|
||||||
info = {
|
info = {
|
||||||
'height': height_after,
|
'height': height_after,
|
||||||
'x_pos': site_pos_after,
|
'x_pos': site_pos_after,
|
||||||
@ -275,8 +294,9 @@ class ALRHopperXYJumpEnvStepBased(ALRHopperXYJumpEnv):
|
|||||||
'goal': self.goal,
|
'goal': self.goal,
|
||||||
'goal_dist': goal_dist,
|
'goal_dist': goal_dist,
|
||||||
'height_rew': self.max_height,
|
'height_rew': self.max_height,
|
||||||
'healthy_reward': self.healthy_reward * 2,
|
'healthy_reward': self.healthy_reward * self.healthy_reward,
|
||||||
'healthy': self.is_healthy
|
'healthy': self.is_healthy,
|
||||||
|
'contact_dist': self.contact_dist if self.contact_dist is not None else 0
|
||||||
}
|
}
|
||||||
return observation, reward, done, info
|
return observation, reward, done, info
|
||||||
|
|
||||||
@ -361,7 +381,7 @@ if __name__ == '__main__':
|
|||||||
# env = ALRHopperJumpRndmPosEnv()
|
# env = ALRHopperJumpRndmPosEnv()
|
||||||
obs = env.reset()
|
obs = env.reset()
|
||||||
|
|
||||||
for k in range(10):
|
for k in range(1000):
|
||||||
obs = env.reset()
|
obs = env.reset()
|
||||||
print('observation :', obs[:])
|
print('observation :', obs[:])
|
||||||
for i in range(200):
|
for i in range(200):
|
||||||
|
@ -38,4 +38,8 @@ class NewHighCtxtMPWrapper(NewMPWrapper):
|
|||||||
[False] * 6, # velocity
|
[False] * 6, # velocity
|
||||||
[True], # goal
|
[True], # goal
|
||||||
[False] * 3 # goal diff
|
[False] * 3 # goal diff
|
||||||
])
|
])
|
||||||
|
|
||||||
|
def set_context(self, context):
|
||||||
|
return self.get_observation_from_step(self.env.env.set_context(context))
|
||||||
|
|
||||||
|
@ -8,7 +8,8 @@ import alr_envs.utils.utils as alr_utils
|
|||||||
|
|
||||||
|
|
||||||
class ALRReacherEnv(MujocoEnv, utils.EzPickle):
|
class ALRReacherEnv(MujocoEnv, utils.EzPickle):
|
||||||
def __init__(self, steps_before_reward=200, n_links=5, balance=False):
|
def __init__(self, steps_before_reward: int = 200, n_links: int = 5, ctrl_cost_weight: int = 1,
|
||||||
|
balance: bool = False):
|
||||||
utils.EzPickle.__init__(**locals())
|
utils.EzPickle.__init__(**locals())
|
||||||
|
|
||||||
self._steps = 0
|
self._steps = 0
|
||||||
@ -17,6 +18,7 @@ class ALRReacherEnv(MujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
self.balance = balance
|
self.balance = balance
|
||||||
self.balance_weight = 1.0
|
self.balance_weight = 1.0
|
||||||
|
self.ctrl_cost_weight = ctrl_cost_weight
|
||||||
|
|
||||||
self.reward_weight = 1
|
self.reward_weight = 1
|
||||||
if steps_before_reward == 200:
|
if steps_before_reward == 200:
|
||||||
@ -40,7 +42,7 @@ class ALRReacherEnv(MujocoEnv, utils.EzPickle):
|
|||||||
angular_vel = 0.0
|
angular_vel = 0.0
|
||||||
reward_balance = 0.0
|
reward_balance = 0.0
|
||||||
is_delayed = self.steps_before_reward > 0
|
is_delayed = self.steps_before_reward > 0
|
||||||
reward_ctrl = - np.square(a).sum()
|
reward_ctrl = - np.square(a).sum() * self.ctrl_cost_weight
|
||||||
if self._steps >= self.steps_before_reward:
|
if self._steps >= self.steps_before_reward:
|
||||||
vec = self.get_body_com("fingertip") - self.get_body_com("target")
|
vec = self.get_body_com("fingertip") - self.get_body_com("target")
|
||||||
reward_dist -= self.reward_weight * np.linalg.norm(vec)
|
reward_dist -= self.reward_weight * np.linalg.norm(vec)
|
||||||
@ -48,9 +50,9 @@ class ALRReacherEnv(MujocoEnv, utils.EzPickle):
|
|||||||
# avoid giving this penalty for normal step based case
|
# avoid giving this penalty for normal step based case
|
||||||
# angular_vel -= 10 * np.linalg.norm(self.sim.data.qvel.flat[:self.n_links])
|
# angular_vel -= 10 * np.linalg.norm(self.sim.data.qvel.flat[:self.n_links])
|
||||||
angular_vel -= 10 * np.square(self.sim.data.qvel.flat[:self.n_links]).sum()
|
angular_vel -= 10 * np.square(self.sim.data.qvel.flat[:self.n_links]).sum()
|
||||||
if is_delayed:
|
# if is_delayed:
|
||||||
# Higher control penalty for sparse reward per timestep
|
# # Higher control penalty for sparse reward per timestep
|
||||||
reward_ctrl *= 10
|
# reward_ctrl *= 10
|
||||||
|
|
||||||
if self.balance:
|
if self.balance:
|
||||||
reward_balance -= self.balance_weight * np.abs(
|
reward_balance -= self.balance_weight * np.abs(
|
||||||
@ -68,35 +70,42 @@ class ALRReacherEnv(MujocoEnv, utils.EzPickle):
|
|||||||
def viewer_setup(self):
|
def viewer_setup(self):
|
||||||
self.viewer.cam.trackbodyid = 0
|
self.viewer.cam.trackbodyid = 0
|
||||||
|
|
||||||
def reset_model(self):
|
|
||||||
qpos = self.init_qpos
|
|
||||||
if not hasattr(self, "goal"):
|
|
||||||
self.goal = np.array([-0.25, 0.25])
|
|
||||||
# self.goal = self.init_qpos.copy()[:2] + 0.05
|
|
||||||
qpos[-2:] = self.goal
|
|
||||||
qvel = self.init_qvel
|
|
||||||
qvel[-2:] = 0
|
|
||||||
self.set_state(qpos, qvel)
|
|
||||||
self._steps = 0
|
|
||||||
|
|
||||||
return self._get_obs()
|
|
||||||
|
|
||||||
# def reset_model(self):
|
# def reset_model(self):
|
||||||
# qpos = self.init_qpos.copy()
|
# qpos = self.init_qpos
|
||||||
# while True:
|
# if not hasattr(self, "goal"):
|
||||||
# self.goal = self.np_random.uniform(low=-self.n_links / 10, high=self.n_links / 10, size=2)
|
# self.goal = np.array([-0.25, 0.25])
|
||||||
# # self.goal = self.np_random.uniform(low=0, high=self.n_links / 10, size=2)
|
# # self.goal = self.init_qpos.copy()[:2] + 0.05
|
||||||
# # self.goal = np.random.uniform(low=[-self.n_links / 10, 0], high=[0, self.n_links / 10], size=2)
|
|
||||||
# if np.linalg.norm(self.goal) < self.n_links / 10:
|
|
||||||
# break
|
|
||||||
# qpos[-2:] = self.goal
|
# qpos[-2:] = self.goal
|
||||||
# qvel = self.init_qvel.copy()
|
# qvel = self.init_qvel
|
||||||
# qvel[-2:] = 0
|
# qvel[-2:] = 0
|
||||||
# self.set_state(qpos, qvel)
|
# self.set_state(qpos, qvel)
|
||||||
# self._steps = 0
|
# self._steps = 0
|
||||||
#
|
#
|
||||||
# return self._get_obs()
|
# return self._get_obs()
|
||||||
|
|
||||||
|
def reset_model(self):
|
||||||
|
qpos = self.init_qpos.copy()
|
||||||
|
while True:
|
||||||
|
# full space
|
||||||
|
# self.goal = self.np_random.uniform(low=-self.n_links / 10, high=self.n_links / 10, size=2)
|
||||||
|
# I Quadrant
|
||||||
|
# self.goal = self.np_random.uniform(low=0, high=self.n_links / 10, size=2)
|
||||||
|
# II Quadrant
|
||||||
|
# self.goal = np.random.uniform(low=[-self.n_links / 10, 0], high=[0, self.n_links / 10], size=2)
|
||||||
|
# II + III Quadrant
|
||||||
|
# self.goal = np.random.uniform(low=-self.n_links / 10, high=[0, self.n_links / 10], size=2)
|
||||||
|
# I + II Quadrant
|
||||||
|
self.goal = np.random.uniform(low=[-self.n_links / 10, 0], high=self.n_links, size=2)
|
||||||
|
if np.linalg.norm(self.goal) < self.n_links / 10:
|
||||||
|
break
|
||||||
|
qpos[-2:] = self.goal
|
||||||
|
qvel = self.init_qvel.copy()
|
||||||
|
qvel[-2:] = 0
|
||||||
|
self.set_state(qpos, qvel)
|
||||||
|
self._steps = 0
|
||||||
|
|
||||||
|
return self._get_obs()
|
||||||
|
|
||||||
# def reset_model(self):
|
# def reset_model(self):
|
||||||
# qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
|
# qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
|
||||||
# while True:
|
# while True:
|
||||||
@ -140,4 +149,4 @@ if __name__ == '__main__':
|
|||||||
if d:
|
if d:
|
||||||
env.reset()
|
env.reset()
|
||||||
|
|
||||||
env.close()
|
env.close()
|
@ -40,4 +40,4 @@ class MPWrapper(MPEnvWrapper):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def dt(self) -> Union[float, int]:
|
def dt(self) -> Union[float, int]:
|
||||||
return self.env.dt
|
return self.env.dt
|
@ -98,7 +98,6 @@ def make(env_id: str, seed, **kwargs):
|
|||||||
|
|
||||||
return env
|
return env
|
||||||
|
|
||||||
|
|
||||||
def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], mp: MPInterface, controller: BaseController,
|
def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], mp: MPInterface, controller: BaseController,
|
||||||
ep_wrapper_kwargs: Mapping, seed=1, **kwargs):
|
ep_wrapper_kwargs: Mapping, seed=1, **kwargs):
|
||||||
"""
|
"""
|
||||||
|
Loading…
Reference in New Issue
Block a user