Merge pull request #7 from ALRhub/mp_wrappers_and_stuff

Mp wrappers and stuff
This commit is contained in:
ottofabian 2021-05-27 17:40:35 +02:00 committed by GitHub
commit 746d408a76
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 187 additions and 85 deletions

View File

@ -272,12 +272,20 @@ for v in versions:
} }
) )
# register( register(
# id='HoleReacherDetPMP-v0', id=f'HoleReacherDetPMP-{v}',
# entry_point='alr_envs.classic_control.hole_reacher:holereacher_detpmp', entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env',
# # max_episode_steps=1, kwargs={
# # TODO: add mp kwargs "name": f"alr_envs:HoleReacher-{v}",
# ) "num_dof": 5,
"num_basis": 5,
"duration": 2,
"width": 0.025,
"policy_type": "velocity",
"weights_scale": 0.2,
"zero_start": True
}
)
# TODO: properly add final_pos # TODO: properly add final_pos
register( register(
@ -335,6 +343,41 @@ register(
} }
) )
register(
id='ALRBallInACupSimpleDetPMP-v0',
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env',
kwargs={
"name": "alr_envs:ALRBallInACupSimple-v0",
"num_dof": 3,
"num_basis": 5,
"duration": 3.5,
"post_traj_time": 4.5,
"width": 0.0035,
# "off": -0.05,
"policy_type": "motor",
"weights_scale": 0.2,
"zero_start": True,
"zero_goal": True
}
)
register(
id='ALRBallInACupDetPMP-v0',
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env',
kwargs={
"name": "alr_envs:ALRBallInACupSimple-v0",
"num_dof": 7,
"num_basis": 5,
"duration": 3.5,
"post_traj_time": 4.5,
"width": 0.0035,
"policy_type": "motor",
"weights_scale": 0.2,
"zero_start": True,
"zero_goal": True
}
)
register( register(
id='ALRBallInACupGoalDMP-v0', id='ALRBallInACupGoalDMP-v0',
entry_point='alr_envs.utils.make_env_helpers:make_contextual_env', entry_point='alr_envs.utils.make_env_helpers:make_contextual_env',

View File

@ -7,10 +7,10 @@ from gym.utils import seeding
from matplotlib import patches from matplotlib import patches
from alr_envs.classic_control.utils import check_self_collision from alr_envs.classic_control.utils import check_self_collision
from alr_envs.utils.mps.mp_environments import MPEnv from alr_envs.utils.mps.mp_environments import AlrEnv
class HoleReacherEnv(MPEnv): class HoleReacherEnv(AlrEnv):
def __init__(self, n_links: int, hole_x: Union[None, float] = None, hole_depth: Union[None, float] = None, def __init__(self, n_links: int, hole_x: Union[None, float] = None, hole_depth: Union[None, float] = None,
hole_width: float = 1., random_start: bool = False, allow_self_collision: bool = False, hole_width: float = 1., random_start: bool = False, allow_self_collision: bool = False,
@ -71,14 +71,15 @@ class HoleReacherEnv(MPEnv):
A single step with an action in joint velocity space A single step with an action in joint velocity space
""" """
acc = (action - self._angle_velocity) / self.dt
self._angle_velocity = action self._angle_velocity = action
self._joint_angles = self._joint_angles + self.dt * self._angle_velocity self._joint_angles = self._joint_angles + self.dt * self._angle_velocity # + 0.001 * np.random.randn(5)
self._update_joints() self._update_joints()
acc = (action - self._angle_velocity) / self.dt
reward, info = self._get_reward(acc) reward, info = self._get_reward(acc)
info.update({"is_collided": self._is_collided}) info.update({"is_collided": self._is_collided})
self.end_effector_traj.append(np.copy(self.end_effector))
self._steps += 1 self._steps += 1
done = self._is_collided done = self._is_collided
@ -101,6 +102,7 @@ class HoleReacherEnv(MPEnv):
self._joints = np.zeros((self.n_links + 1, 2)) self._joints = np.zeros((self.n_links + 1, 2))
self._update_joints() self._update_joints()
self._steps = 0 self._steps = 0
self.end_effector_traj = []
return self._get_obs().copy() return self._get_obs().copy()

View File

@ -5,10 +5,10 @@ import numpy as np
from gym import spaces from gym import spaces
from gym.utils import seeding from gym.utils import seeding
from alr_envs.utils.mps.mp_environments import MPEnv from alr_envs.utils.mps.mp_environments import AlrEnv
class SimpleReacherEnv(MPEnv): class SimpleReacherEnv(AlrEnv):
""" """
Simple Reaching Task without any physics simulation. Simple Reaching Task without any physics simulation.
Returns no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions Returns no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions

View File

@ -6,10 +6,10 @@ import numpy as np
from gym.utils import seeding from gym.utils import seeding
from alr_envs.classic_control.utils import check_self_collision from alr_envs.classic_control.utils import check_self_collision
from alr_envs.utils.mps.mp_environments import MPEnv from alr_envs.utils.mps.mp_environments import AlrEnv
class ViaPointReacher(MPEnv): class ViaPointReacher(AlrEnv):
def __init__(self, n_links, random_start: bool = True, via_target: Union[None, Iterable] = None, def __init__(self, n_links, random_start: bool = True, via_target: Union[None, Iterable] = None,
target: Union[None, Iterable] = None, allow_self_collision=False, collision_penalty=1000): target: Union[None, Iterable] = None, allow_self_collision=False, collision_penalty=1000):

View File

@ -1,5 +1,6 @@
from collections import OrderedDict from collections import OrderedDict
import os import os
from abc import abstractmethod
from gym import error, spaces from gym import error, spaces
@ -142,18 +143,20 @@ class AlrMujocoEnv(gym.Env):
# methods to override: # methods to override:
# ---------------------------- # ----------------------------
@property
@abstractmethod
def active_obs(self):
"""Returns boolean mask for each observation entry
whether the observation is returned for the contextual case or not.
This effectively allows to filter unwanted or unnecessary observations from the full step-based case.
"""
return np.ones(self.observation_space.shape, dtype=bool)
def _get_obs(self): def _get_obs(self):
"""Returns the observation. """Returns the observation.
""" """
raise NotImplementedError() raise NotImplementedError()
def configure(self, *args, **kwargs):
"""
Helper method to set certain environment properties such as contexts in contextual environments since reset()
doesn't take arguments. Should be called before reset().
"""
pass
def reset_model(self): def reset_model(self):
""" """
Reset the robot degrees of freedom (qpos and qvel). Reset the robot degrees of freedom (qpos and qvel).

View File

@ -126,6 +126,7 @@
<geom name="cup_base" pos="0 -0.035 0.1165" euler="-1.57 0 0" type="cylinder" size="0.038 0.0045" solref="-10000 -100"/> <geom name="cup_base" pos="0 -0.035 0.1165" euler="-1.57 0 0" type="cylinder" size="0.038 0.0045" solref="-10000 -100"/>
<!-- <geom name="cup_base_contact" pos="0 -0.025 0.1165" euler="-1.57 0 0" type="cylinder" size="0.03 0.0005" solref="-10000 -100" rgba="0 0 255 1"/>--> <!-- <geom name="cup_base_contact" pos="0 -0.025 0.1165" euler="-1.57 0 0" type="cylinder" size="0.03 0.0005" solref="-10000 -100" rgba="0 0 255 1"/>-->
<geom name="cup_base_contact" pos="0 -0.005 0.1165" euler="-1.57 0 0" type="cylinder" size="0.02 0.0005" solref="-10000 -100" rgba="0 0 255 1"/> <geom name="cup_base_contact" pos="0 -0.005 0.1165" euler="-1.57 0 0" type="cylinder" size="0.02 0.0005" solref="-10000 -100" rgba="0 0 255 1"/>
<geom name="cup_base_contact_below" pos="0 -0.04 0.1165" euler="-1.57 0 0" type="cylinder" size="0.035 0.001" solref="-10000 -100" rgba="255 0 255 1"/>
<!-- <geom name="cup_geom11" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup11" />--> <!-- <geom name="cup_geom11" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup11" />-->
<!-- <geom name="cup_geom12" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup12" />--> <!-- <geom name="cup_geom12" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup12" />-->
<!-- <geom name="cup_geom13" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup13" />--> <!-- <geom name="cup_geom13" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup13" />-->

View File

@ -22,7 +22,7 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7]) self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7])
self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7]) self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
self.context = None self.context = context
utils.EzPickle.__init__(self) utils.EzPickle.__init__(self)
alr_mujoco_env.AlrMujocoEnv.__init__(self, alr_mujoco_env.AlrMujocoEnv.__init__(self,
@ -45,7 +45,6 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
else: else:
raise ValueError("Unknown reward type") raise ValueError("Unknown reward type")
self.reward_function = reward_function(self.sim_steps) self.reward_function = reward_function(self.sim_steps)
self.configure(context)
@property @property
def start_pos(self): def start_pos(self):
@ -69,10 +68,6 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
def current_vel(self): def current_vel(self):
return self.sim.data.qvel[0:7].copy() return self.sim.data.qvel[0:7].copy()
def configure(self, context):
self.context = context
self.reward_function.reset(context)
def reset_model(self): def reset_model(self):
init_pos_all = self.init_qpos.copy() init_pos_all = self.init_qpos.copy()
init_pos_robot = self._start_pos init_pos_robot = self._start_pos
@ -95,26 +90,27 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
reward_ctrl = - np.square(a).sum() reward_ctrl = - np.square(a).sum()
crash = self.do_simulation(a) crash = self.do_simulation(a)
joint_cons_viol = self.check_traj_in_joint_limits() # joint_cons_viol = self.check_traj_in_joint_limits()
self._q_pos.append(self.sim.data.qpos[0:7].ravel().copy()) self._q_pos.append(self.sim.data.qpos[0:7].ravel().copy())
self._q_vel.append(self.sim.data.qvel[0:7].ravel().copy()) self._q_vel.append(self.sim.data.qvel[0:7].ravel().copy())
ob = self._get_obs() ob = self._get_obs()
if not crash and not joint_cons_viol: if not crash:
reward, success, stop_sim = self.reward_function.compute_reward(a, self.sim, self._steps) reward, success, is_collided = self.reward_function.compute_reward(a, self)
done = success or self._steps == self.sim_steps - 1 or stop_sim done = success or self._steps == self.sim_steps - 1 or is_collided
self._steps += 1 self._steps += 1
else: else:
reward = -1000 reward = -2
success = False success = False
is_collided = False
done = True done = True
return ob, reward, done, dict(reward_dist=reward_dist, return ob, reward, done, dict(reward_dist=reward_dist,
reward_ctrl=reward_ctrl, reward_ctrl=reward_ctrl,
velocity=angular_vel, velocity=angular_vel,
traj=self._q_pos, is_success=success, traj=self._q_pos, is_success=success,
is_collided=crash or joint_cons_viol) is_collided=is_collided, sim_crash=crash)
def check_traj_in_joint_limits(self): def check_traj_in_joint_limits(self):
return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min) return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)
@ -129,6 +125,16 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
[self._steps], [self._steps],
]) ])
# TODO
@property
def active_obs(self):
return np.hstack([
[False] * 7, # cos
[False] * 7, # sin
# [True] * 2, # x-y coordinates of target distance
[False] # env steps
])
# These functions are for the task with 3 joint actuations # These functions are for the task with 3 joint actuations
def extend_des_pos(self, des_pos): def extend_des_pos(self, des_pos):
des_pos_full = self._start_pos.copy() des_pos_full = self._start_pos.copy()

View File

@ -6,7 +6,8 @@ class BallInACupReward(alr_reward_fct.AlrReward):
def __init__(self, sim_time): def __init__(self, sim_time):
self.sim_time = sim_time self.sim_time = sim_time
self.collision_objects = ["cup_geom1", "cup_geom2", "wrist_palm_link_convex_geom", self.collision_objects = ["cup_geom1", "cup_geom2", "cup_base_contact_below",
"wrist_palm_link_convex_geom",
"wrist_pitch_link_convex_decomposition_p1_geom", "wrist_pitch_link_convex_decomposition_p1_geom",
"wrist_pitch_link_convex_decomposition_p2_geom", "wrist_pitch_link_convex_decomposition_p2_geom",
"wrist_pitch_link_convex_decomposition_p3_geom", "wrist_pitch_link_convex_decomposition_p3_geom",
@ -20,6 +21,8 @@ class BallInACupReward(alr_reward_fct.AlrReward):
self.goal_id = None self.goal_id = None
self.goal_final_id = None self.goal_final_id = None
self.collision_ids = None self.collision_ids = None
self._is_collided = False
self.collision_penalty = 1
self.ball_traj = None self.ball_traj = None
self.dists = None self.dists = None
@ -36,49 +39,52 @@ class BallInACupReward(alr_reward_fct.AlrReward):
self.action_costs = [] self.action_costs = []
self.cup_angles = [] self.cup_angles = []
def compute_reward(self, action, sim, step, context=None): def compute_reward(self, action, env):
self.ball_id = sim.model._body_name2id["ball"] self.ball_id = env.sim.model._body_name2id["ball"]
self.ball_collision_id = sim.model._geom_name2id["ball_geom"] self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"]
self.goal_id = sim.model._site_name2id["cup_goal"] self.goal_id = env.sim.model._site_name2id["cup_goal"]
self.goal_final_id = sim.model._site_name2id["cup_goal_final"] self.goal_final_id = env.sim.model._site_name2id["cup_goal_final"]
self.collision_ids = [sim.model._geom_name2id[name] for name in self.collision_objects] self.collision_ids = [env.sim.model._geom_name2id[name] for name in self.collision_objects]
ball_in_cup = self.check_ball_in_cup(sim, self.ball_collision_id) ball_in_cup = self.check_ball_in_cup(env.sim, self.ball_collision_id)
# Compute the current distance from the ball to the inner part of the cup # Compute the current distance from the ball to the inner part of the cup
goal_pos = sim.data.site_xpos[self.goal_id] goal_pos = env.sim.data.site_xpos[self.goal_id]
ball_pos = sim.data.body_xpos[self.ball_id] ball_pos = env.sim.data.body_xpos[self.ball_id]
goal_final_pos = sim.data.site_xpos[self.goal_final_id] goal_final_pos = env.sim.data.site_xpos[self.goal_final_id]
self.dists.append(np.linalg.norm(goal_pos - ball_pos)) self.dists.append(np.linalg.norm(goal_pos - ball_pos))
self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos)) self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
self.ball_traj[step, :] = ball_pos self.ball_traj[env._steps, :] = ball_pos
cup_quat = np.copy(sim.data.body_xquat[sim.model._body_name2id["cup"]]) cup_quat = np.copy(env.sim.data.body_xquat[env.sim.model._body_name2id["cup"]])
self.cup_angles.append(np.arctan2(2 * (cup_quat[0] * cup_quat[1] + cup_quat[2] * cup_quat[3]), self.cup_angles.append(np.arctan2(2 * (cup_quat[0] * cup_quat[1] + cup_quat[2] * cup_quat[3]),
1 - 2 * (cup_quat[1]**2 + cup_quat[2]**2))) 1 - 2 * (cup_quat[1]**2 + cup_quat[2]**2)))
action_cost = np.sum(np.square(action)) action_cost = np.sum(np.square(action))
self.action_costs.append(action_cost) self.action_costs.append(action_cost)
if self.check_collision(sim): self._is_collided = self.check_collision(env.sim) or env.check_traj_in_joint_limits()
reward = - 1000
return reward, False, True
if step == self.sim_time - 1: if env._steps == env.sim_steps - 1 or self._is_collided:
t_min_dist = np.argmin(self.dists) t_min_dist = np.argmin(self.dists)
angle_min_dist = self.cup_angles[t_min_dist] angle_min_dist = self.cup_angles[t_min_dist]
cost_angle = (angle_min_dist - np.pi / 2)**2 cost_angle = (angle_min_dist - np.pi / 2)**2
min_dist = self.dists[t_min_dist] min_dist = self.dists[t_min_dist]
dist_final = self.dists_final[-1] dist_final = self.dists_final[-1]
min_dist_final = np.min(self.dists_final)
cost = 0.5 * min_dist + 0.5 * dist_final + 0.01 * cost_angle cost = 0.5 * dist_final + 0.05 * cost_angle # TODO: Increase cost_angle weight # 0.5 * min_dist +
reward = np.exp(-2 * cost) - 1e-3 * action_cost # reward = np.exp(-2 * cost) - 1e-2 * action_cost - self.collision_penalty * int(self._is_collided)
success = dist_final < 0.05 and ball_in_cup # reward = - dist_final**2 - 1e-4 * cost_angle - 1e-5 * action_cost - self.collision_penalty * int(self._is_collided)
reward = - dist_final**2 - min_dist_final**2 - 1e-4 * cost_angle - 1e-5 * action_cost - self.collision_penalty * int(self._is_collided)
success = dist_final < 0.05 and ball_in_cup and not self._is_collided
crash = self._is_collided
else: else:
reward = - 1e-3 * action_cost reward = - 1e-5 * action_cost # TODO: increase action_cost weight
success = False success = False
crash = False
return reward, success, False return reward, success, crash
def check_ball_in_cup(self, sim, ball_collision_id): def check_ball_in_cup(self, sim, ball_collision_id):
cup_base_collision_id = sim.model._geom_name2id["cup_base_contact"] cup_base_collision_id = sim.model._geom_name2id["cup_base_contact"]

View File

@ -105,6 +105,7 @@ class ALRBeerpongEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
def check_traj_in_joint_limits(self): def check_traj_in_joint_limits(self):
return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min) return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)
# TODO
def _get_obs(self): def _get_obs(self):
theta = self.sim.data.qpos.flat[:7] theta = self.sim.data.qpos.flat[:7]
return np.concatenate([ return np.concatenate([
@ -114,6 +115,10 @@ class ALRBeerpongEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
[self._steps], [self._steps],
]) ])
# TODO
def active_obs(self):
pass
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -40,6 +40,16 @@ def _flatten_list(l):
return [l__ for l_ in l for l__ in l_] return [l__ for l_ in l for l__ in l_]
class DummyDist:
def __init__(self, dim):
self.dim = dim
def sample(self, contexts):
contexts = np.atleast_2d(contexts)
n_samples = contexts.shape[0]
return np.random.normal(size=(n_samples, self.dim)), contexts
class AlrMpEnvSampler: class AlrMpEnvSampler:
""" """
An asynchronous sampler for non contextual MPWrapper environments. A sampler object can be called with a set of An asynchronous sampler for non contextual MPWrapper environments. A sampler object can be called with a set of
@ -100,9 +110,9 @@ class AlrContextualMpEnvSampler:
if __name__ == "__main__": if __name__ == "__main__":
env_name = "alr_envs:ALRBallInACupSimpleDMP-v0" env_name = "alr_envs:HoleReacherDetPMP-v1"
n_cpu = 8 n_cpu = 8
dim = 15 dim = 25
n_samples = 10 n_samples = 10
sampler = AlrMpEnvSampler(env_name, num_envs=n_cpu) sampler = AlrMpEnvSampler(env_name, num_envs=n_cpu)

View File

@ -2,28 +2,29 @@ import gym
import numpy as np import numpy as np
from mp_lib import det_promp from mp_lib import det_promp
from alr_envs.utils.mps.mp_environments import MPEnv from alr_envs.utils.mps.mp_environments import AlrEnv
from alr_envs.utils.mps.mp_wrapper import MPWrapper from alr_envs.utils.mps.mp_wrapper import MPWrapper
class DetPMPWrapper(MPWrapper): class DetPMPWrapper(MPWrapper):
def __init__(self, env: MPEnv, num_dof: int, num_basis: int, width: int, duration: int = 1, dt: float = 0.01, def __init__(self, env: AlrEnv, num_dof: int, num_basis: int, width: float, duration: float = 1, dt: float = 0.01,
post_traj_time: float = 0., policy_type: str = None, weights_scale: float = 1., post_traj_time: float = 0., policy_type: str = None, weights_scale: float = 1.,
zero_start: bool = False, zero_goal: bool = False, **mp_kwargs): zero_start: bool = False, zero_goal: bool = False, **mp_kwargs):
self.duration = duration # seconds self.duration = duration # seconds
dt = env.dt if hasattr(env, "dt") else dt
assert dt is not None
self.dt = dt
super().__init__(env, num_dof, dt, duration, post_traj_time, policy_type, weights_scale, num_basis=num_basis, super().__init__(env, num_dof, dt, duration, post_traj_time, policy_type, weights_scale, num_basis=num_basis,
width=width, zero_start=zero_start, zero_goal=zero_goal, **mp_kwargs) width=width, zero_start=zero_start, zero_goal=zero_goal, **mp_kwargs)
self.dt = dt
action_bounds = np.inf * np.ones((self.mp.n_basis * self.mp.n_dof)) action_bounds = np.inf * np.ones((self.mp.n_basis * self.mp.n_dof))
self.action_space = gym.spaces.Box(low=-action_bounds, high=action_bounds, dtype=np.float32) self.action_space = gym.spaces.Box(low=-action_bounds, high=action_bounds, dtype=np.float32)
def initialize_mp(self, num_dof: int, duration: int, dt: float, num_basis: int = 5, width: float = None, def initialize_mp(self, num_dof: int, duration: int, dt: float, num_basis: int = 5, width: float = None,
zero_start: bool = False, zero_goal: bool = False): off: float = 0.01, zero_start: bool = False, zero_goal: bool = False):
pmp = det_promp.DeterministicProMP(n_basis=num_basis, n_dof=num_dof, width=width, off=0.01, pmp = det_promp.DeterministicProMP(n_basis=num_basis, n_dof=num_dof, width=width, off=off,
zero_start=zero_start, zero_goal=zero_goal) zero_start=zero_start, zero_goal=zero_goal)
weights = np.zeros(shape=(num_basis, num_dof)) weights = np.zeros(shape=(num_basis, num_dof))
@ -32,10 +33,10 @@ class DetPMPWrapper(MPWrapper):
return pmp return pmp
def mp_rollout(self, action): def mp_rollout(self, action):
params = np.reshape(action, (self.mp.n_basis, self.mp.n_dof)) * self.weights_scale params = np.reshape(action, newshape=(self.mp.n_basis, self.mp.n_dof)) * self.weights_scale
self.mp.set_weights(self.duration, params) self.mp.set_weights(self.duration, params)
_, des_pos, des_vel, _ = self.mp.compute_trajectory(1 / self.dt, 1.) _, des_pos, des_vel, _ = self.mp.compute_trajectory(1 / self.dt, 1.)
if self.mp.zero_start: if self.mp.zero_start:
des_pos += self.start_pos[None, :] des_pos += self.env.start_pos[None, :]
return des_pos, des_vel return des_pos, des_vel

View File

@ -4,13 +4,13 @@ from mp_lib import dmps
from mp_lib.basis import DMPBasisGenerator from mp_lib.basis import DMPBasisGenerator
from mp_lib.phase import ExpDecayPhaseGenerator from mp_lib.phase import ExpDecayPhaseGenerator
from alr_envs.utils.mps.mp_environments import MPEnv from alr_envs.utils.mps.mp_environments import AlrEnv
from alr_envs.utils.mps.mp_wrapper import MPWrapper from alr_envs.utils.mps.mp_wrapper import MPWrapper
class DmpWrapper(MPWrapper): class DmpWrapper(MPWrapper):
def __init__(self, env: MPEnv, num_dof: int, num_basis: int, def __init__(self, env: AlrEnv, num_dof: int, num_basis: int,
duration: int = 1, alpha_phase: float = 2., dt: float = None, duration: int = 1, alpha_phase: float = 2., dt: float = None,
learn_goal: bool = False, post_traj_time: float = 0., learn_goal: bool = False, post_traj_time: float = 0.,
weights_scale: float = 1., goal_scale: float = 1., bandwidth_factor: float = 3., weights_scale: float = 1., goal_scale: float = 1., bandwidth_factor: float = 3.,
@ -40,7 +40,7 @@ class DmpWrapper(MPWrapper):
super().__init__(env, num_dof, dt, duration, post_traj_time, policy_type, weights_scale, render_mode, super().__init__(env, num_dof, dt, duration, post_traj_time, policy_type, weights_scale, render_mode,
num_basis=num_basis, alpha_phase=alpha_phase, bandwidth_factor=bandwidth_factor) num_basis=num_basis, alpha_phase=alpha_phase, bandwidth_factor=bandwidth_factor)
action_bounds = np.inf * np.ones((np.prod(self.mp.dmp_weights.shape) + (num_dof if learn_goal else 0))) action_bounds = np.inf * np.ones((np.prod(self.mp.weights.shape) + (num_dof if learn_goal else 0)))
self.action_space = gym.spaces.Box(low=-action_bounds, high=action_bounds, dtype=np.float32) self.action_space = gym.spaces.Box(low=-action_bounds, high=action_bounds, dtype=np.float32)
def initialize_mp(self, num_dof: int, duration: int, dt: float, num_basis: int = 5, alpha_phase: float = 2., def initialize_mp(self, num_dof: int, duration: int, dt: float, num_basis: int = 5, alpha_phase: float = 2.,
@ -51,7 +51,7 @@ class DmpWrapper(MPWrapper):
basis_bandwidth_factor=bandwidth_factor) basis_bandwidth_factor=bandwidth_factor)
dmp = dmps.DMP(num_dof=num_dof, basis_generator=basis_generator, phase_generator=phase_generator, dmp = dmps.DMP(num_dof=num_dof, basis_generator=basis_generator, phase_generator=phase_generator,
num_time_steps=int(duration / dt), dt=dt) duration=duration, dt=dt)
return dmp return dmp
@ -66,7 +66,7 @@ class DmpWrapper(MPWrapper):
goal_pos = self.env.goal_pos goal_pos = self.env.goal_pos
assert goal_pos is not None assert goal_pos is not None
weight_matrix = np.reshape(params, self.mp.dmp_weights.shape) # [num_basis, num_dof] weight_matrix = np.reshape(params, self.mp.weights.shape) # [num_basis, num_dof]
return goal_pos * self.goal_scale, weight_matrix * self.weights_scale return goal_pos * self.goal_scale, weight_matrix * self.weights_scale
def mp_rollout(self, action): def mp_rollout(self, action):

View File

@ -5,7 +5,7 @@ import gym
import numpy as np import numpy as np
class MPEnv(gym.Env): class AlrEnv(gym.Env):
@property @property
@abstractmethod @abstractmethod

View File

@ -3,13 +3,13 @@ from abc import ABC, abstractmethod
import gym import gym
import numpy as np import numpy as np
from alr_envs.utils.mps.mp_environments import MPEnv from alr_envs.utils.mps.mp_environments import AlrEnv
from alr_envs.utils.policies import get_policy_class from alr_envs.utils.policies import get_policy_class
class MPWrapper(gym.Wrapper, ABC): class MPWrapper(gym.Wrapper, ABC):
def __init__(self, env: MPEnv, num_dof: int, dt: float, duration: int = 1, post_traj_time: float = 0., def __init__(self, env: AlrEnv, num_dof: int, dt: float, duration: float = 1, post_traj_time: float = 0.,
policy_type: str = None, weights_scale: float = 1., render_mode: str = None, **mp_kwargs): policy_type: str = None, weights_scale: float = 1., render_mode: str = None, **mp_kwargs):
super().__init__(env) super().__init__(env)
@ -53,9 +53,6 @@ class MPWrapper(gym.Wrapper, ABC):
return obs, np.array(rewards), dones, infos return obs, np.array(rewards), dones, infos
def configure(self, context):
self.env.configure(context)
def reset(self): def reset(self):
return self.env.reset()[self.env.active_obs] return self.env.reset()[self.env.active_obs]
@ -65,7 +62,7 @@ class MPWrapper(gym.Wrapper, ABC):
if self.post_traj_steps > 0: if self.post_traj_steps > 0:
trajectory = np.vstack([trajectory, np.tile(trajectory[-1, :], [self.post_traj_steps, 1])]) trajectory = np.vstack([trajectory, np.tile(trajectory[-1, :], [self.post_traj_steps, 1])])
velocity = np.vstack([velocity, np.zeros(shape=(self.post_traj_steps, self.mp.num_dimensions))]) velocity = np.vstack([velocity, np.zeros(shape=(self.post_traj_steps, self.mp.n_dof))])
# self._trajectory = trajectory # self._trajectory = trajectory
# self._velocity = velocity # self._velocity = velocity
@ -105,7 +102,7 @@ class MPWrapper(gym.Wrapper, ABC):
raise NotImplementedError() raise NotImplementedError()
@abstractmethod @abstractmethod
def initialize_mp(self, num_dof: int, duration: int, dt: float, **kwargs): def initialize_mp(self, num_dof: int, duration: float, dt: float, **kwargs):
""" """
Create respective instance of MP Create respective instance of MP
Returns: Returns:

View File

@ -1,6 +1,7 @@
from collections import defaultdict from collections import defaultdict
import gym import gym
import numpy as np import numpy as np
from alr_envs.utils.mp_env_async_sampler import AlrMpEnvSampler, AlrContextualMpEnvSampler, DummyDist
def example_mujoco(): def example_mujoco():
@ -22,9 +23,9 @@ def example_mujoco():
obs = env.reset() obs = env.reset()
def example_dmp(): def example_mp(env_name="alr_envs:HoleReacherDMP-v0"):
# env = gym.make("alr_envs:ViaPointReacherDMP-v0") # env = gym.make("alr_envs:ViaPointReacherDMP-v0")
env = gym.make("alr_envs:HoleReacherDMP-v0") env = gym.make(env_name)
rewards = 0 rewards = 0
# env.render(mode=None) # env.render(mode=None)
obs = env.reset() obs = env.reset()
@ -79,9 +80,36 @@ def example_async(env_id="alr_envs:HoleReacherDMP-v0", n_cpu=4, seed=int('533D',
print(sample(envs, 16)) print(sample(envs, 16))
def example_async_sampler(env_name="alr_envs:HoleReacherDetPMP-v1", n_cpu=4):
n_samples = 10
sampler = AlrMpEnvSampler(env_name, num_envs=n_cpu)
dim = sampler.env.action_space.spaces[0].shape[0]
thetas = np.random.randn(n_samples, dim) # usually form a search distribution
_, rewards, __, ___ = sampler(thetas)
print(rewards)
def example_async_contextual_sampler(env_name="alr_envs:SimpleReacherDMP-v1", n_cpu=4):
sampler = AlrContextualMpEnvSampler(env_name, num_envs=n_cpu)
dim = sampler.env.action_space.spaces[0].shape[0]
dist = DummyDist(dim) # needs a sample function
n_samples = 10
new_samples, new_contexts, obs, new_rewards, done, infos = sampler(dist, n_samples)
print(new_rewards)
if __name__ == '__main__': if __name__ == '__main__':
# example_mujoco() # example_mujoco()
# example_dmp() # example_dmp("alr_envs:SimpleReacherDMP-v1")
example_async("alr_envs:LongSimpleReacherDMP-v0", 4) # example_async("alr_envs:LongSimpleReacherDMP-v0", 4)
# env = gym.make("alr_envs:HoleReacherDMP-v0", context=0.1) # example_async_contextual_sampler()
# env = gym.make("alr_envs:HoleReacherDMP-v1") # env = gym.make("alr_envs:HoleReacherDetPMP-v1")
env_name = "alr_envs:ALRBallInACupSimpleDetPMP-v0"
# example_async_sampler(env_name)
example_mp(env_name)