bp 2fs seems to work
This commit is contained in:
parent
2ea7f6a2ed
commit
59b15e82ea
@ -285,6 +285,20 @@ register(
|
|||||||
"healthy_reward": 1.0
|
"healthy_reward": 1.0
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
##### Hopper Jump step based reward
|
||||||
|
register(
|
||||||
|
id='ALRHopperJump-v4',
|
||||||
|
entry_point='alr_envs.alr.mujoco:ALRHopperXYJumpEnvStepBased',
|
||||||
|
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
|
||||||
|
kwargs={
|
||||||
|
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP,
|
||||||
|
"context": True,
|
||||||
|
"healthy_reward": 1.0
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# CtxtFree are v0, Contextual are v1
|
# CtxtFree are v0, Contextual are v1
|
||||||
register(
|
register(
|
||||||
id='ALRHopperJumpOnBox-v0',
|
id='ALRHopperJumpOnBox-v0',
|
||||||
@ -925,7 +939,7 @@ register(
|
|||||||
"basis_generator_kwargs": {
|
"basis_generator_kwargs": {
|
||||||
'basis_generator_type': 'zero_rbf',
|
'basis_generator_type': 'zero_rbf',
|
||||||
'num_basis': 5,
|
'num_basis': 5,
|
||||||
'num_basis_zero_start': 2
|
'num_basis_zero_start': 1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
@ -961,12 +975,48 @@ register(
|
|||||||
"basis_generator_kwargs": {
|
"basis_generator_kwargs": {
|
||||||
'basis_generator_type': 'zero_rbf',
|
'basis_generator_type': 'zero_rbf',
|
||||||
'num_basis': 5,
|
'num_basis': 5,
|
||||||
'num_basis_zero_start': 2
|
'num_basis_zero_start': 1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v3")
|
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v3")
|
||||||
|
|
||||||
|
|
||||||
|
## HopperJump
|
||||||
|
register(
|
||||||
|
id= "ALRHopperJumpProMP-v4",
|
||||||
|
entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
|
||||||
|
kwargs={
|
||||||
|
"name": f"alr_envs:ALRHopperJump-v4",
|
||||||
|
"wrappers": [mujoco.hopper_jump.NewMPWrapper],
|
||||||
|
"ep_wrapper_kwargs": {
|
||||||
|
"weight_scale": 1
|
||||||
|
},
|
||||||
|
"movement_primitives_kwargs": {
|
||||||
|
'movement_primitives_type': 'promp',
|
||||||
|
'action_dim': 3
|
||||||
|
},
|
||||||
|
"phase_generator_kwargs": {
|
||||||
|
'phase_generator_type': 'linear',
|
||||||
|
'delay': 0,
|
||||||
|
'tau': 2, # initial value
|
||||||
|
'learn_tau': False,
|
||||||
|
'learn_delay': False
|
||||||
|
},
|
||||||
|
"controller_kwargs": {
|
||||||
|
'controller_type': 'motor',
|
||||||
|
"p_gains": np.ones(3),
|
||||||
|
"d_gains": 0.1*np.ones(3),
|
||||||
|
},
|
||||||
|
"basis_generator_kwargs": {
|
||||||
|
'basis_generator_type': 'zero_rbf',
|
||||||
|
'num_basis': 5,
|
||||||
|
'num_basis_zero_start': 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v4")
|
||||||
|
|
||||||
## HopperJumpOnBox
|
## HopperJumpOnBox
|
||||||
_versions = ["v0", "v1"]
|
_versions = ["v0", "v1"]
|
||||||
for _v in _versions:
|
for _v in _versions:
|
||||||
|
@ -5,7 +5,7 @@ from .table_tennis.tt_gym import TTEnvGym
|
|||||||
from .beerpong.beerpong import ALRBeerBongEnv
|
from .beerpong.beerpong import ALRBeerBongEnv
|
||||||
from .ant_jump.ant_jump import ALRAntJumpEnv
|
from .ant_jump.ant_jump import ALRAntJumpEnv
|
||||||
from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
|
from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
|
||||||
from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv, ALRHopperXYJumpEnv
|
from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv, ALRHopperXYJumpEnv, ALRHopperXYJumpEnvStepBased
|
||||||
from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv
|
from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv
|
||||||
from .hopper_throw.hopper_throw import ALRHopperThrowEnv
|
from .hopper_throw.hopper_throw import ALRHopperThrowEnv
|
||||||
from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv
|
from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv
|
||||||
|
@ -189,7 +189,7 @@ if __name__ == "__main__":
|
|||||||
ac = np.zeros(7)
|
ac = np.zeros(7)
|
||||||
obs, rew, d, info = env.step(ac)
|
obs, rew, d, info = env.step(ac)
|
||||||
env.render("human")
|
env.render("human")
|
||||||
|
print(env.dt)
|
||||||
print(rew)
|
print(rew)
|
||||||
|
|
||||||
if d:
|
if d:
|
||||||
|
@ -120,7 +120,6 @@ class ALRHopperJumpEnv(HopperEnv):
|
|||||||
|
|
||||||
|
|
||||||
class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
|
class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
|
||||||
# The goal here is the desired x-Position of the Torso
|
|
||||||
|
|
||||||
def step(self, action):
|
def step(self, action):
|
||||||
|
|
||||||
@ -154,20 +153,15 @@ class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
|
|||||||
ctrl_cost = self.control_cost(action)
|
ctrl_cost = self.control_cost(action)
|
||||||
costs = ctrl_cost
|
costs = ctrl_cost
|
||||||
done = False
|
done = False
|
||||||
goal_dist = 0
|
goal_dist = np.atleast_1d(np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0], dtype=object)))[0]
|
||||||
rewards = 0
|
rewards = 0
|
||||||
if self.current_step >= self.max_episode_steps:
|
if self.current_step >= self.max_episode_steps:
|
||||||
# healthy_reward = 0 if self.context else self.healthy_reward * self.current_step
|
# healthy_reward = 0 if self.context else self.healthy_reward * self.current_step
|
||||||
healthy_reward = self.healthy_reward * 2 #* self.current_step
|
healthy_reward = self.healthy_reward * 2 #* self.current_step
|
||||||
goal_dist = np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0], dtype=object))[0]
|
|
||||||
contact_dist = self.contact_dist if self.contact_dist is not None else 5
|
contact_dist = self.contact_dist if self.contact_dist is not None else 5
|
||||||
dist_reward = self._forward_reward_weight * (-3*goal_dist + 10*self.max_height - 2*contact_dist)
|
dist_reward = self._forward_reward_weight * (-3*goal_dist + 10*self.max_height - 2*contact_dist)
|
||||||
rewards = dist_reward + healthy_reward
|
rewards = dist_reward + healthy_reward
|
||||||
|
|
||||||
# else:
|
|
||||||
## penalty for wrong start direction of first two joints; not needed, could be removed
|
|
||||||
# rewards = ((action[:2] > 0) * self.penalty).sum() if self.current_step < 10 else 0
|
|
||||||
|
|
||||||
observation = self._get_obs()
|
observation = self._get_obs()
|
||||||
reward = rewards - costs
|
reward = rewards - costs
|
||||||
info = {
|
info = {
|
||||||
@ -175,11 +169,10 @@ class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
|
|||||||
'x_pos': site_pos_after,
|
'x_pos': site_pos_after,
|
||||||
'max_height': self.max_height,
|
'max_height': self.max_height,
|
||||||
'goal': self.goal,
|
'goal': self.goal,
|
||||||
'dist_rew': goal_dist,
|
'goal_dist': goal_dist,
|
||||||
'height_rew': self.max_height,
|
'height_rew': self.max_height,
|
||||||
'healthy_reward': self.healthy_reward * 2
|
'healthy_reward': self.healthy_reward * 2
|
||||||
}
|
}
|
||||||
|
|
||||||
return observation, reward, done, info
|
return observation, reward, done, info
|
||||||
|
|
||||||
def reset_model(self):
|
def reset_model(self):
|
||||||
@ -213,12 +206,48 @@ class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
|
|||||||
def reset(self):
|
def reset(self):
|
||||||
super().reset()
|
super().reset()
|
||||||
# self.goal = np.random.uniform(-1.5, 1.5, 1)
|
# self.goal = np.random.uniform(-1.5, 1.5, 1)
|
||||||
self.goal = np.random.uniform(0, 1.5, 1)
|
# self.goal = np.random.uniform(0, 1.5, 1)
|
||||||
|
self.goal = self.np_random.uniform(0, 1.5, 1)
|
||||||
# self.goal = np.array([1.5])
|
# self.goal = np.array([1.5])
|
||||||
self.sim.model.body_pos[self.sim.model.body_name2id('goal_site_body')] = np.array([self.goal, 0, 0], dtype=object)
|
self.sim.model.body_pos[self.sim.model.body_name2id('goal_site_body')] = np.array([self.goal, 0, 0], dtype=object)
|
||||||
return self.reset_model()
|
return self.reset_model()
|
||||||
|
|
||||||
|
|
||||||
|
class ALRHopperXYJumpEnvStepBased(ALRHopperXYJumpEnv):
|
||||||
|
def step(self, action):
|
||||||
|
|
||||||
|
self._floor_geom_id = self.model.geom_name2id('floor')
|
||||||
|
self._foot_geom_id = self.model.geom_name2id('foot_geom')
|
||||||
|
|
||||||
|
self.current_step += 1
|
||||||
|
self.do_simulation(action, self.frame_skip)
|
||||||
|
height_after = self.get_body_com("torso")[2]
|
||||||
|
site_pos_after = self.sim.data.site_xpos[self.model.site_name2id('foot_site')].copy()
|
||||||
|
self.max_height = max(height_after, self.max_height)
|
||||||
|
ctrl_cost = self.control_cost(action)
|
||||||
|
|
||||||
|
healthy_reward = self.healthy_reward * 2
|
||||||
|
height_reward = 10*height_after
|
||||||
|
goal_dist = np.atleast_1d(np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0], dtype=object)))[0]
|
||||||
|
goal_dist_reward = -3*goal_dist
|
||||||
|
dist_reward = self._forward_reward_weight * (goal_dist_reward + height_reward)
|
||||||
|
reward = -ctrl_cost + healthy_reward + dist_reward
|
||||||
|
done = False
|
||||||
|
observation = self._get_obs()
|
||||||
|
info = {
|
||||||
|
'height': height_after,
|
||||||
|
'x_pos': site_pos_after,
|
||||||
|
'max_height': self.max_height,
|
||||||
|
'goal': self.goal,
|
||||||
|
'goal_dist': goal_dist,
|
||||||
|
'dist_rew': dist_reward,
|
||||||
|
'height_rew': height_reward,
|
||||||
|
'healthy_reward': healthy_reward,
|
||||||
|
'ctrl_reward': -ctrl_cost
|
||||||
|
}
|
||||||
|
|
||||||
|
return observation, reward, done, info
|
||||||
|
|
||||||
class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
|
class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
|
||||||
def __init__(self, max_episode_steps=250):
|
def __init__(self, max_episode_steps=250):
|
||||||
super(ALRHopperJumpRndmPosEnv, self).__init__(exclude_current_positions_from_observation=False,
|
super(ALRHopperJumpRndmPosEnv, self).__init__(exclude_current_positions_from_observation=False,
|
||||||
@ -293,7 +322,8 @@ class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
|
|||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
render_mode = "human" # "human" or "partial" or "final"
|
render_mode = "human" # "human" or "partial" or "final"
|
||||||
# env = ALRHopperJumpEnv()
|
# env = ALRHopperJumpEnv()
|
||||||
env = ALRHopperXYJumpEnv()
|
# env = ALRHopperXYJumpEnv()
|
||||||
|
env = ALRHopperXYJumpEnvStepBased()
|
||||||
# env = ALRHopperJumpRndmPosEnv()
|
# env = ALRHopperJumpRndmPosEnv()
|
||||||
obs = env.reset()
|
obs = env.reset()
|
||||||
|
|
||||||
|
@ -223,6 +223,9 @@ class EpisodicWrapper(gym.Env, ABC):
|
|||||||
def get_observation_from_step(self, observation: np.ndarray) -> np.ndarray:
|
def get_observation_from_step(self, observation: np.ndarray) -> np.ndarray:
|
||||||
return observation[self.active_obs]
|
return observation[self.active_obs]
|
||||||
|
|
||||||
|
def seed(self, seed=None):
|
||||||
|
self.env.seed(seed)
|
||||||
|
|
||||||
def plot_trajs(self, des_trajs, des_vels):
|
def plot_trajs(self, des_trajs, des_vels):
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
import matplotlib
|
import matplotlib
|
||||||
|
Loading…
Reference in New Issue
Block a user