Merge branch 'master' into dev_new_mp_api

Conflicts:
	alr_envs/alr/__init__.py
This commit is contained in:
Fabian 2022-05-05 16:57:40 +02:00
commit a26b9f463b
7 changed files with 269 additions and 79 deletions

View File

@ -538,16 +538,68 @@ for _v in _versions:
"wrappers": [classic_control.hole_reacher.MPWrapper], "wrappers": [classic_control.hole_reacher.MPWrapper],
"mp_kwargs": { "mp_kwargs": {
"num_dof": 5, "num_dof": 5,
"num_basis": 5, "num_basis": 3,
"duration": 2, "duration": 2,
"policy_type": "velocity", "policy_type": "velocity",
"weights_scale": 0.1, "weights_scale": 5,
"zero_start": True "zero_start": True
} }
} }
) )
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
## ALRReacher
_versions = ["ALRReacher-v0", "ALRLongReacher-v0", "ALRReacherSparse-v0", "ALRLongReacherSparse-v0"]
for _v in _versions:
_name = _v.split("-")
_env_id = f'{_name[0]}DMP-{_name[1]}'
register(
id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
# max_episode_steps=1,
kwargs={
"name": f"alr_envs:{_v}",
"wrappers": [mujoco.reacher.MPWrapper],
"mp_kwargs": {
"num_dof": 5 if "long" not in _v.lower() else 7,
"num_basis": 2,
"duration": 4,
"alpha_phase": 2,
"learn_goal": True,
"policy_type": "motor",
"weights_scale": 5,
"policy_kwargs": {
"p_gains": 1,
"d_gains": 0.1
}
}
}
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
_env_id = f'{_name[0]}ProMP-{_name[1]}'
register(
id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
kwargs={
"name": f"alr_envs:{_v}",
"wrappers": [mujoco.reacher.MPWrapper],
"mp_kwargs": {
"num_dof": 5 if "long" not in _v.lower() else 7,
"num_basis": 2,
"duration": 4,
"policy_type": "motor",
"weights_scale": 5,
"zero_start": True,
"policy_kwargs": {
"p_gains": 1,
"d_gains": 0.1
}
}
}
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
# ## Beerpong # ## Beerpong
# _versions = ["v0", "v1"] # _versions = ["v0", "v1"]
# for _v in _versions: # for _v in _versions:

View File

@ -45,6 +45,9 @@ class HoleReacherEnv(BaseReacherDirectEnv):
elif rew_fct == "vel_acc": elif rew_fct == "vel_acc":
from alr_envs.alr.classic_control.hole_reacher.hr_dist_vel_acc_reward import HolereacherReward from alr_envs.alr.classic_control.hole_reacher.hr_dist_vel_acc_reward import HolereacherReward
self.reward_function = HolereacherReward(allow_self_collision, allow_wall_collision, collision_penalty) self.reward_function = HolereacherReward(allow_self_collision, allow_wall_collision, collision_penalty)
elif rew_fct == "unbounded":
from alr_envs.alr.classic_control.hole_reacher.hr_unbounded_reward import HolereacherReward
self.reward_function = HolereacherReward(allow_self_collision, allow_wall_collision)
else: else:
raise ValueError("Unknown reward function {}".format(rew_fct)) raise ValueError("Unknown reward function {}".format(rew_fct))

View File

@ -0,0 +1,60 @@
import numpy as np
class HolereacherReward:
def __init__(self, allow_self_collision, allow_wall_collision):
# collision
self.allow_self_collision = allow_self_collision
self.allow_wall_collision = allow_wall_collision
self._is_collided = False
self.reward_factors = np.array((1, -5e-6))
def reset(self):
self._is_collided = False
def get_reward(self, env):
dist_reward = 0
success = False
self_collision = False
wall_collision = False
if not self.allow_self_collision:
self_collision = env._check_self_collision()
if not self.allow_wall_collision:
wall_collision = env.check_wall_collision()
self._is_collided = self_collision or wall_collision
if env._steps == 180 or self._is_collided:
self.end_eff_pos = np.copy(env.end_effector)
if env._steps == 199 or self._is_collided:
# return reward only in last time step
# Episode also terminates when colliding, hence return reward
dist = np.linalg.norm(self.end_eff_pos - env._goal)
if self._is_collided:
dist_reward = 0.25 * np.exp(- dist)
else:
if env.end_effector[1] > 0:
dist_reward = np.exp(- dist)
else:
dist_reward = 1 - self.end_eff_pos[1]
success = not self._is_collided
info = {"is_success": success,
"is_collided": self._is_collided,
"end_effector": np.copy(env.end_effector),
"joints": np.copy(env.current_pos)}
acc_cost = np.sum(env._acc ** 2)
reward_features = np.array((dist_reward, acc_cost))
reward = np.dot(reward_features, self.reward_factors)
return reward, info

View File

@ -0,0 +1 @@
from .mp_wrapper import MPWrapper

View File

@ -39,10 +39,13 @@
</body> </body>
<!-- Target --> <!-- Target -->
<body name="target" pos=".1 -.1 .01"> <body name="target" pos=".1 -.1 .01">
<joint armature="0" axis="1 0 0" damping="0" limited="true" name="target_x" pos="0 0 0" range="-.27 .27" ref=".1" stiffness="0" type="slide"/> <!-- <joint armature="0" axis="1 0 0" damping="0" limited="true" name="target_x" pos="0 0 0" range="-.27 .27" ref=".1" stiffness="0" type="slide"/>-->
<joint armature="0" axis="0 1 0" damping="0" limited="true" name="target_y" pos="0 0 0" range="-.27 .27" ref="-.1" stiffness="0" type="slide"/> <!-- <joint armature="0" axis="0 1 0" damping="0" limited="true" name="target_y" pos="0 0 0" range="-.27 .27" ref="-.1" stiffness="0" type="slide"/>-->
<joint armature="0" axis="1 0 0" damping="0" limited="true" name="target_x" pos="0 0 0" range="-.7 .7" ref=".1" stiffness="0" type="slide"/>
<joint armature="0" axis="0 1 0" damping="0" limited="true" name="target_y" pos="0 0 0" range="-.7 .7" ref="-.1" stiffness="0" type="slide"/>
<geom conaffinity="0" contype="0" name="target" pos="0 0 0" rgba="0.9 0.2 0.2 1" size=".009" type="sphere"/> <geom conaffinity="0" contype="0" name="target" pos="0 0 0" rgba="0.9 0.2 0.2 1" size=".009" type="sphere"/>
</body> </body>
<site name="context_space" pos="0 0.0 0.0" euler="0 0 0" size="0.5 0.5 0.01" rgba="0 1 0 0.1" type="box"/>
</worldbody> </worldbody>
<actuator> <actuator>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="joint0"/> <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="joint0"/>

View File

@ -0,0 +1,43 @@
from typing import Union
import numpy as np
from mp_env_api import MPEnvWrapper
class MPWrapper(MPEnvWrapper):
@property
def active_obs(self):
return np.concatenate([
[False] * self.n_links, # cos
[False] * self.n_links, # sin
[True] * 2, # goal position
[False] * self.n_links, # angular velocity
[False] * 3, # goal distance
# self.get_body_com("target"), # only return target to make problem harder
[False], # step
])
# @property
# def active_obs(self):
# return np.concatenate([
# [True] * self.n_links, # cos, True
# [True] * self.n_links, # sin, True
# [True] * 2, # goal position
# [True] * self.n_links, # angular velocity, True
# [True] * 3, # goal distance
# # self.get_body_com("target"), # only return target to make problem harder
# [False], # step
# ])
@property
def current_vel(self) -> Union[float, int, np.ndarray]:
return self.sim.data.qvel.flat[:self.n_links]
@property
def current_pos(self) -> Union[float, int, np.ndarray]:
return self.sim.data.qpos.flat[:self.n_links]
@property
def dt(self) -> Union[float, int]:
return self.env.dt

View File

@ -2,70 +2,98 @@ import numpy as np
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
from alr_envs import dmc, meta from alr_envs import dmc, meta
from alr_envs.alr import mujoco
from alr_envs.utils.make_env_helpers import make_promp_env from alr_envs.utils.make_env_helpers import make_promp_env
def visualize(env):
t = env.t
pos_features = env.mp.basis_generator.basis(t)
plt.plot(t, pos_features)
plt.show()
# This might work for some environments, however, please verify either way the correct trajectory information # This might work for some environments, however, please verify either way the correct trajectory information
# for your environment are extracted below # for your environment are extracted below
SEED = 10 SEED = 1
env_id = "ball_in_cup-catch" # env_id = "ball_in_cup-catch"
wrappers = [dmc.ball_in_cup.MPWrapper] env_id = "ALRReacherSparse-v0"
env_id = "button-press-v2"
wrappers = [mujoco.reacher.MPWrapper]
wrappers = [meta.goal_object_change_mp_wrapper.MPWrapper]
mp_kwargs = { mp_kwargs = {
"num_dof": 2, "num_dof": 4,
"num_basis": 10, "num_basis": 5,
"duration": 2, "duration": 6.25,
"width": 0.025, "policy_type": "metaworld",
"policy_type": "motor", "weights_scale": 10,
"weights_scale": 1,
"zero_start": True, "zero_start": True,
"policy_kwargs": { # "policy_kwargs": {
"p_gains": 1, # "p_gains": 1,
"d_gains": 1 # "d_gains": 0.1
} # }
} }
kwargs = dict(time_limit=2, episode_length=100) # kwargs = dict(time_limit=4, episode_length=200)
kwargs = {}
env = make_promp_env(env_id, wrappers, seed=SEED, mp_kwargs=mp_kwargs, env = make_promp_env(env_id, wrappers, seed=SEED, mp_kwargs=mp_kwargs, **kwargs)
**kwargs) env.action_space.seed(SEED)
# Plot difference between real trajectory and target MP trajectory # Plot difference between real trajectory and target MP trajectory
env.reset() env.reset()
pos, vel = env.mp_rollout(env.action_space.sample()) w = env.action_space.sample() # N(0,1)
visualize(env)
pos, vel = env.mp_rollout(w)
base_shape = env.full_action_space.shape base_shape = env.full_action_space.shape
actual_pos = np.zeros((len(pos), *base_shape)) actual_pos = np.zeros((len(pos), *base_shape))
actual_vel = np.zeros((len(pos), *base_shape)) actual_vel = np.zeros((len(pos), *base_shape))
act = np.zeros((len(pos), *base_shape)) act = np.zeros((len(pos), *base_shape))
plt.ion()
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
img = ax.imshow(env.env.render("rgb_array"))
fig.show()
for t, pos_vel in enumerate(zip(pos, vel)): for t, pos_vel in enumerate(zip(pos, vel)):
actions = env.policy.get_action(pos_vel[0], pos_vel[1],, self.current_vel, self.current_pos actions = env.policy.get_action(pos_vel[0], pos_vel[1],, self.current_vel, self.current_pos
actions = np.clip(actions, env.full_action_space.low, env.full_action_space.high) actions = np.clip(actions, env.full_action_space.low, env.full_action_space.high)
_, _, _, _ = env.env.step(actions) _, _, _, _ = env.env.step(actions)
if t % 15 == 0:
img.set_data(env.env.render("rgb_array"))
fig.canvas.draw()
fig.canvas.flush_events()
act[t, :] = actions act[t, :] = actions
# TODO verify for your environment # TODO verify for your environment
actual_pos[t, :] = env.current_pos actual_pos[t, :] = env.current_pos
actual_vel[t, :] = env.current_vel actual_vel[t, :] = 0 # env.current_vel
plt.figure(figsize=(15, 5)) plt.figure(figsize=(15, 5))
plt.subplot(131) plt.subplot(131)
plt.title("Position") plt.title("Position")
plt.plot(actual_pos, c='C0', label=["true" if i == 0 else "" for i in range(np.prod(base_shape))]) p1 = plt.plot(actual_pos, c='C0', label="true")
# plt.plot(actual_pos_ball, label="true pos ball") # plt.plot(actual_pos_ball, label="true pos ball")
plt.plot(pos, c='C1', label=["MP" if i == 0 else "" for i in range(np.prod(base_shape))]) p2 = plt.plot(pos, c='C1', label="MP") # , label=["MP" if i == 0 else None for i in range(np.prod(base_shape))])
plt.xlabel("Episode steps") plt.xlabel("Episode steps")
plt.legend() # plt.legend()
handles, labels = plt.gca().get_legend_handles_labels()
from collections import OrderedDict
by_label = OrderedDict(zip(labels, handles))
plt.legend(by_label.values(), by_label.keys())
plt.subplot(132) plt.subplot(132)
plt.title("Velocity") plt.title("Velocity")
plt.plot(actual_vel, c='C0', label=[f"true" if i == 0 else "" for i in range(np.prod(base_shape))]) plt.plot(actual_vel, c='C0', label="true")
plt.plot(vel, c='C1', label=[f"MP" if i == 0 else "" for i in range(np.prod(base_shape))]) plt.plot(vel, c='C1', label="MP")
plt.xlabel("Episode steps") plt.xlabel("Episode steps")
plt.legend()
plt.subplot(133) plt.subplot(133)
plt.title("Actions") plt.title(f"Actions {np.std(act, axis=0)}")
plt.plot(act, c="C0"), # label=[f"actions" if i == 0 else "" for i in range(np.prod(base_action_shape))]) plt.plot(act, c="C0"), # label=[f"actions" if i == 0 else "" for i in range(np.prod(base_action_shape))])
plt.xlabel("Episode steps") plt.xlabel("Episode steps")
# plt.legend() # plt.legend()