bp 2fs seems to work

This commit is contained in:
Onur 2022-05-31 19:41:08 +02:00
parent 2ea7f6a2ed
commit 59b15e82ea
5 changed files with 98 additions and 15 deletions

View File

@ -285,6 +285,20 @@ register(
"healthy_reward": 1.0
}
)
##### Hopper Jump step based reward
register(
id='ALRHopperJump-v4',
entry_point='alr_envs.alr.mujoco:ALRHopperXYJumpEnvStepBased',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP,
"context": True,
"healthy_reward": 1.0
}
)
# CtxtFree are v0, Contextual are v1
register(
id='ALRHopperJumpOnBox-v0',
@ -925,7 +939,7 @@ register(
"basis_generator_kwargs": {
'basis_generator_type': 'zero_rbf',
'num_basis': 5,
'num_basis_zero_start': 2
'num_basis_zero_start': 1
}
}
)
@ -961,12 +975,48 @@ register(
"basis_generator_kwargs": {
'basis_generator_type': 'zero_rbf',
'num_basis': 5,
'num_basis_zero_start': 2
'num_basis_zero_start': 1
}
}
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v3")
## HopperJump
register(
id= "ALRHopperJumpProMP-v4",
entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
kwargs={
"name": f"alr_envs:ALRHopperJump-v4",
"wrappers": [mujoco.hopper_jump.NewMPWrapper],
"ep_wrapper_kwargs": {
"weight_scale": 1
},
"movement_primitives_kwargs": {
'movement_primitives_type': 'promp',
'action_dim': 3
},
"phase_generator_kwargs": {
'phase_generator_type': 'linear',
'delay': 0,
'tau': 2, # initial value
'learn_tau': False,
'learn_delay': False
},
"controller_kwargs": {
'controller_type': 'motor',
"p_gains": np.ones(3),
"d_gains": 0.1*np.ones(3),
},
"basis_generator_kwargs": {
'basis_generator_type': 'zero_rbf',
'num_basis': 5,
'num_basis_zero_start': 1
}
}
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v4")
## HopperJumpOnBox
_versions = ["v0", "v1"]
for _v in _versions:

View File

@ -5,7 +5,7 @@ from .table_tennis.tt_gym import TTEnvGym
from .beerpong.beerpong import ALRBeerBongEnv
from .ant_jump.ant_jump import ALRAntJumpEnv
from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv, ALRHopperXYJumpEnv
from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv, ALRHopperXYJumpEnv, ALRHopperXYJumpEnvStepBased
from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv
from .hopper_throw.hopper_throw import ALRHopperThrowEnv
from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv

View File

@ -189,7 +189,7 @@ if __name__ == "__main__":
ac = np.zeros(7)
obs, rew, d, info = env.step(ac)
env.render("human")
print(env.dt)
print(rew)
if d:

View File

@ -120,7 +120,6 @@ class ALRHopperJumpEnv(HopperEnv):
class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
# The goal here is the desired x-Position of the Torso
def step(self, action):
@ -154,20 +153,15 @@ class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
ctrl_cost = self.control_cost(action)
costs = ctrl_cost
done = False
goal_dist = 0
goal_dist = np.atleast_1d(np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0], dtype=object)))[0]
rewards = 0
if self.current_step >= self.max_episode_steps:
# healthy_reward = 0 if self.context else self.healthy_reward * self.current_step
healthy_reward = self.healthy_reward * 2 #* self.current_step
goal_dist = np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0], dtype=object))[0]
contact_dist = self.contact_dist if self.contact_dist is not None else 5
dist_reward = self._forward_reward_weight * (-3*goal_dist + 10*self.max_height - 2*contact_dist)
rewards = dist_reward + healthy_reward
# else:
## penalty for wrong start direction of first two joints; not needed, could be removed
# rewards = ((action[:2] > 0) * self.penalty).sum() if self.current_step < 10 else 0
observation = self._get_obs()
reward = rewards - costs
info = {
@ -175,11 +169,10 @@ class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
'x_pos': site_pos_after,
'max_height': self.max_height,
'goal': self.goal,
'dist_rew': goal_dist,
'goal_dist': goal_dist,
'height_rew': self.max_height,
'healthy_reward': self.healthy_reward * 2
}
return observation, reward, done, info
def reset_model(self):
@ -213,12 +206,48 @@ class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
def reset(self):
super().reset()
# self.goal = np.random.uniform(-1.5, 1.5, 1)
self.goal = np.random.uniform(0, 1.5, 1)
# self.goal = np.random.uniform(0, 1.5, 1)
self.goal = self.np_random.uniform(0, 1.5, 1)
# self.goal = np.array([1.5])
self.sim.model.body_pos[self.sim.model.body_name2id('goal_site_body')] = np.array([self.goal, 0, 0], dtype=object)
return self.reset_model()
class ALRHopperXYJumpEnvStepBased(ALRHopperXYJumpEnv):
def step(self, action):
self._floor_geom_id = self.model.geom_name2id('floor')
self._foot_geom_id = self.model.geom_name2id('foot_geom')
self.current_step += 1
self.do_simulation(action, self.frame_skip)
height_after = self.get_body_com("torso")[2]
site_pos_after = self.sim.data.site_xpos[self.model.site_name2id('foot_site')].copy()
self.max_height = max(height_after, self.max_height)
ctrl_cost = self.control_cost(action)
healthy_reward = self.healthy_reward * 2
height_reward = 10*height_after
goal_dist = np.atleast_1d(np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0], dtype=object)))[0]
goal_dist_reward = -3*goal_dist
dist_reward = self._forward_reward_weight * (goal_dist_reward + height_reward)
reward = -ctrl_cost + healthy_reward + dist_reward
done = False
observation = self._get_obs()
info = {
'height': height_after,
'x_pos': site_pos_after,
'max_height': self.max_height,
'goal': self.goal,
'goal_dist': goal_dist,
'dist_rew': dist_reward,
'height_rew': height_reward,
'healthy_reward': healthy_reward,
'ctrl_reward': -ctrl_cost
}
return observation, reward, done, info
class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
def __init__(self, max_episode_steps=250):
super(ALRHopperJumpRndmPosEnv, self).__init__(exclude_current_positions_from_observation=False,
@ -293,7 +322,8 @@ class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
if __name__ == '__main__':
render_mode = "human" # "human" or "partial" or "final"
# env = ALRHopperJumpEnv()
env = ALRHopperXYJumpEnv()
# env = ALRHopperXYJumpEnv()
env = ALRHopperXYJumpEnvStepBased()
# env = ALRHopperJumpRndmPosEnv()
obs = env.reset()

View File

@ -223,6 +223,9 @@ class EpisodicWrapper(gym.Env, ABC):
def get_observation_from_step(self, observation: np.ndarray) -> np.ndarray:
return observation[self.active_obs]
def seed(self, seed=None):
self.env.seed(seed)
def plot_trajs(self, des_trajs, des_vels):
import matplotlib.pyplot as plt
import matplotlib