wrappers updated
This commit is contained in:
parent
fb4b857fb5
commit
3273f455c5
@ -1,6 +1,6 @@
|
|||||||
from alr_envs import dmc, meta, open_ai
|
from alr_envs import dmc, meta, open_ai
|
||||||
from alr_envs.utils.make_env_helpers import make, make_dmp_env, make_promp_env, make_rank
|
|
||||||
from alr_envs.utils import make_dmc
|
from alr_envs.utils import make_dmc
|
||||||
|
from alr_envs.utils.make_env_helpers import make, make_bb, make_rank
|
||||||
|
|
||||||
# Convenience function for all MP environments
|
# Convenience function for all MP environments
|
||||||
from .alr import ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS
|
from .alr import ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS
|
||||||
|
@ -406,8 +406,6 @@ ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("ViaPointReacherDMP-v0")
|
|||||||
|
|
||||||
kwargs_dict_via_point_reacher_promp = deepcopy(DEFAULT_MP_ENV_DICT)
|
kwargs_dict_via_point_reacher_promp = deepcopy(DEFAULT_MP_ENV_DICT)
|
||||||
kwargs_dict_via_point_reacher_promp['wrappers'].append('TODO') # TODO
|
kwargs_dict_via_point_reacher_promp['wrappers'].append('TODO') # TODO
|
||||||
kwargs_dict_via_point_reacher_promp['movement_primitives_kwargs']['action_dim'] = 5
|
|
||||||
kwargs_dict_via_point_reacher_promp['phase_generator_kwargs']['tau'] = 2
|
|
||||||
kwargs_dict_via_point_reacher_promp['controller_kwargs']['controller_type'] = 'velocity'
|
kwargs_dict_via_point_reacher_promp['controller_kwargs']['controller_type'] = 'velocity'
|
||||||
kwargs_dict_via_point_reacher_promp['name'] = "ViaPointReacherProMP-v0"
|
kwargs_dict_via_point_reacher_promp['name'] = "ViaPointReacherProMP-v0"
|
||||||
register(
|
register(
|
||||||
@ -448,10 +446,10 @@ for _v in _versions:
|
|||||||
kwargs_dict_hole_reacher_promp = deepcopy(DEFAULT_MP_ENV_DICT)
|
kwargs_dict_hole_reacher_promp = deepcopy(DEFAULT_MP_ENV_DICT)
|
||||||
kwargs_dict_hole_reacher_promp['wrappers'].append('TODO') # TODO
|
kwargs_dict_hole_reacher_promp['wrappers'].append('TODO') # TODO
|
||||||
kwargs_dict_hole_reacher_promp['ep_wrapper_kwargs']['weight_scale'] = 2
|
kwargs_dict_hole_reacher_promp['ep_wrapper_kwargs']['weight_scale'] = 2
|
||||||
kwargs_dict_hole_reacher_promp['movement_primitives_kwargs']['action_dim'] = 5
|
# kwargs_dict_hole_reacher_promp['movement_primitives_kwargs']['action_dim'] = 5
|
||||||
kwargs_dict_hole_reacher_promp['phase_generator_kwargs']['tau'] = 2
|
# kwargs_dict_hole_reacher_promp['phase_generator_kwargs']['tau'] = 2
|
||||||
kwargs_dict_hole_reacher_promp['controller_kwargs']['controller_type'] = 'velocity'
|
kwargs_dict_hole_reacher_promp['controller_kwargs']['controller_type'] = 'velocity'
|
||||||
kwargs_dict_hole_reacher_promp['basis_generator_kwargs']['num_basis'] = 5
|
# kwargs_dict_hole_reacher_promp['basis_generator_kwargs']['num_basis'] = 5
|
||||||
kwargs_dict_hole_reacher_promp['name'] = f"alr_envs:{_v}"
|
kwargs_dict_hole_reacher_promp['name'] = f"alr_envs:{_v}"
|
||||||
register(
|
register(
|
||||||
id=_env_id,
|
id=_env_id,
|
||||||
|
@ -2,12 +2,12 @@ from typing import Tuple, Union
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from mp_env_api import MPEnvWrapper
|
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
|
||||||
|
|
||||||
|
|
||||||
class MPWrapper(MPEnvWrapper):
|
class MPWrapper(RawInterfaceWrapper):
|
||||||
@property
|
|
||||||
def active_obs(self):
|
def get_context_mask(self):
|
||||||
return np.hstack([
|
return np.hstack([
|
||||||
[self.env.random_start] * self.env.n_links, # cos
|
[self.env.random_start] * self.env.n_links, # cos
|
||||||
[self.env.random_start] * self.env.n_links, # sin
|
[self.env.random_start] * self.env.n_links, # sin
|
||||||
@ -18,14 +18,6 @@ class MPWrapper(MPEnvWrapper):
|
|||||||
[False] # env steps
|
[False] # env steps
|
||||||
])
|
])
|
||||||
|
|
||||||
# @property
|
|
||||||
# def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
|
||||||
# return self._joint_angles.copy()
|
|
||||||
#
|
|
||||||
# @property
|
|
||||||
# def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
|
||||||
# return self._angle_velocity.copy()
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
return self.env.current_pos
|
return self.env.current_pos
|
||||||
@ -33,11 +25,3 @@ class MPWrapper(MPEnvWrapper):
|
|||||||
@property
|
@property
|
||||||
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
return self.env.current_vel
|
return self.env.current_vel
|
||||||
|
|
||||||
@property
|
|
||||||
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
|
||||||
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
|
|
||||||
|
|
||||||
@property
|
|
||||||
def dt(self) -> Union[float, int]:
|
|
||||||
return self.env.dt
|
|
||||||
|
@ -1,31 +0,0 @@
|
|||||||
from typing import Tuple, Union
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
|
|
||||||
|
|
||||||
|
|
||||||
class NewMPWrapper(RawInterfaceWrapper):
|
|
||||||
|
|
||||||
def get_context_mask(self):
|
|
||||||
return np.hstack([
|
|
||||||
[self.env.random_start] * self.env.n_links, # cos
|
|
||||||
[self.env.random_start] * self.env.n_links, # sin
|
|
||||||
[self.env.random_start] * self.env.n_links, # velocity
|
|
||||||
[self.env.initial_width is None], # hole width
|
|
||||||
# [self.env.hole_depth is None], # hole depth
|
|
||||||
[True] * 2, # x-y coordinates of target distance
|
|
||||||
[False] # env steps
|
|
||||||
])
|
|
||||||
|
|
||||||
@property
|
|
||||||
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
|
||||||
return self.env.current_pos
|
|
||||||
|
|
||||||
@property
|
|
||||||
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
|
||||||
return self.env.current_vel
|
|
||||||
|
|
||||||
@property
|
|
||||||
def dt(self) -> Union[float, int]:
|
|
||||||
return self.env.dt
|
|
@ -2,12 +2,12 @@ from typing import Tuple, Union
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from mp_env_api import MPEnvWrapper
|
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
|
||||||
|
|
||||||
|
|
||||||
class MPWrapper(MPEnvWrapper):
|
class MPWrapper(RawInterfaceWrapper):
|
||||||
@property
|
|
||||||
def active_obs(self):
|
def context_mask(self):
|
||||||
return np.hstack([
|
return np.hstack([
|
||||||
[self.env.random_start] * self.env.n_links, # cos
|
[self.env.random_start] * self.env.n_links, # cos
|
||||||
[self.env.random_start] * self.env.n_links, # sin
|
[self.env.random_start] * self.env.n_links, # sin
|
||||||
@ -24,10 +24,6 @@ class MPWrapper(MPEnvWrapper):
|
|||||||
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
return self.env.current_vel
|
return self.env.current_vel
|
||||||
|
|
||||||
@property
|
|
||||||
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
|
||||||
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def dt(self) -> Union[float, int]:
|
def dt(self) -> Union[float, int]:
|
||||||
return self.env.dt
|
return self.env.dt
|
||||||
|
@ -1,31 +0,0 @@
|
|||||||
from typing import Tuple, Union
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
from mp_env_api import MPEnvWrapper
|
|
||||||
|
|
||||||
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
|
|
||||||
|
|
||||||
|
|
||||||
class MPWrapper(RawInterfaceWrapper):
|
|
||||||
|
|
||||||
def context_mask(self):
|
|
||||||
return np.hstack([
|
|
||||||
[self.env.random_start] * self.env.n_links, # cos
|
|
||||||
[self.env.random_start] * self.env.n_links, # sin
|
|
||||||
[self.env.random_start] * self.env.n_links, # velocity
|
|
||||||
[True] * 2, # x-y coordinates of target distance
|
|
||||||
[False] # env steps
|
|
||||||
])
|
|
||||||
|
|
||||||
@property
|
|
||||||
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
|
||||||
return self.env.current_pos
|
|
||||||
|
|
||||||
@property
|
|
||||||
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
|
||||||
return self.env.current_vel
|
|
||||||
|
|
||||||
@property
|
|
||||||
def dt(self) -> Union[float, int]:
|
|
||||||
return self.env.dt
|
|
@ -2,12 +2,12 @@ from typing import Tuple, Union
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from mp_env_api import MPEnvWrapper
|
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
|
||||||
|
|
||||||
|
|
||||||
class MPWrapper(MPEnvWrapper):
|
class MPWrapper(RawInterfaceWrapper):
|
||||||
@property
|
@property
|
||||||
def active_obs(self):
|
def context_mask(self) -> np.ndarray:
|
||||||
return np.hstack([
|
return np.hstack([
|
||||||
[self.env.random_start] * self.env.n_links, # cos
|
[self.env.random_start] * self.env.n_links, # cos
|
||||||
[self.env.random_start] * self.env.n_links, # sin
|
[self.env.random_start] * self.env.n_links, # sin
|
||||||
|
@ -2,8 +2,6 @@ from typing import Tuple, Union
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from mp_env_api import MPEnvWrapper
|
|
||||||
|
|
||||||
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
|
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,13 +1,12 @@
|
|||||||
from .reacher.balancing import BalancingEnv
|
from .ant_jump.ant_jump import ALRAntJumpEnv
|
||||||
from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
|
from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
|
||||||
from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
|
from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
|
||||||
from .table_tennis.tt_gym import TTEnvGym
|
from .beerpong.beerpong import ALRBeerBongEnv
|
||||||
from .beerpong.beerpong import ALRBeerBongEnv, ALRBeerBongEnvStepBased, ALRBeerBongEnvStepBasedEpisodicReward, ALRBeerBongEnvFixedReleaseStep
|
|
||||||
from .ant_jump.ant_jump import ALRAntJumpEnv
|
|
||||||
from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
|
from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
|
||||||
from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv, ALRHopperXYJumpEnv, ALRHopperXYJumpEnvStepBased
|
|
||||||
from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv
|
from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv
|
||||||
from .hopper_throw.hopper_throw import ALRHopperThrowEnv
|
from .hopper_throw.hopper_throw import ALRHopperThrowEnv
|
||||||
from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv
|
from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv
|
||||||
|
from .reacher.alr_reacher import ALRReacherEnv
|
||||||
|
from .reacher.balancing import BalancingEnv
|
||||||
|
from .table_tennis.tt_gym import TTEnvGym
|
||||||
from .walker_2d_jump.walker_2d_jump import ALRWalker2dJumpEnv
|
from .walker_2d_jump.walker_2d_jump import ALRWalker2dJumpEnv
|
||||||
from .reacher.alr_reacher import ALRReacherEnv
|
|
@ -2,13 +2,13 @@ from typing import Tuple, Union
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from mp_env_api import MPEnvWrapper
|
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
|
||||||
|
|
||||||
|
|
||||||
class MPWrapper(MPEnvWrapper):
|
class MPWrapper(RawInterfaceWrapper):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def active_obs(self):
|
def context_mask(self) -> np.ndarray:
|
||||||
return np.hstack([
|
return np.hstack([
|
||||||
[False] * 111, # ant has 111 dimensional observation space !!
|
[False] * 111, # ant has 111 dimensional observation space !!
|
||||||
[True] # goal height
|
[True] # goal height
|
||||||
|
@ -2,13 +2,13 @@ from typing import Tuple, Union
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from mp_env_api import MPEnvWrapper
|
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
|
||||||
|
|
||||||
|
|
||||||
class BallInACupMPWrapper(MPEnvWrapper):
|
class BallInACupMPWrapper(RawInterfaceWrapper):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def active_obs(self):
|
def context_mask(self) -> np.ndarray:
|
||||||
# TODO: @Max Filter observations correctly
|
# TODO: @Max Filter observations correctly
|
||||||
return np.hstack([
|
return np.hstack([
|
||||||
[False] * 7, # cos
|
[False] * 7, # cos
|
||||||
|
@ -22,7 +22,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
|||||||
def __init__(
|
def __init__(
|
||||||
self, frame_skip=1, apply_gravity_comp=True, noisy=False,
|
self, frame_skip=1, apply_gravity_comp=True, noisy=False,
|
||||||
rndm_goal=False, cup_goal_pos=None
|
rndm_goal=False, cup_goal_pos=None
|
||||||
):
|
):
|
||||||
|
|
||||||
cup_goal_pos = np.array(cup_goal_pos if cup_goal_pos is not None else [-0.3, -1.2, 0.840])
|
cup_goal_pos = np.array(cup_goal_pos if cup_goal_pos is not None else [-0.3, -1.2, 0.840])
|
||||||
if cup_goal_pos.shape[0] == 2:
|
if cup_goal_pos.shape[0] == 2:
|
||||||
@ -154,7 +154,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
|||||||
success=success,
|
success=success,
|
||||||
is_collided=is_collided, sim_crash=crash,
|
is_collided=is_collided, sim_crash=crash,
|
||||||
table_contact_first=int(not self.reward_function.ball_ground_contact_first)
|
table_contact_first=int(not self.reward_function.ball_ground_contact_first)
|
||||||
)
|
)
|
||||||
infos.update(reward_infos)
|
infos.update(reward_infos)
|
||||||
return ob, reward, done, infos
|
return ob, reward, done, infos
|
||||||
|
|
||||||
@ -176,7 +176,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
|||||||
cup_goal_diff_top,
|
cup_goal_diff_top,
|
||||||
self.sim.model.body_pos[self.cup_table_id][:2].copy(),
|
self.sim.model.body_pos[self.cup_table_id][:2].copy(),
|
||||||
[self._steps],
|
[self._steps],
|
||||||
])
|
])
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def dt(self):
|
def dt(self):
|
||||||
|
@ -1,171 +0,0 @@
|
|||||||
import numpy as np
|
|
||||||
|
|
||||||
|
|
||||||
class BeerPongReward:
|
|
||||||
def __init__(self):
|
|
||||||
|
|
||||||
self.robot_collision_objects = ["wrist_palm_link_convex_geom",
|
|
||||||
"wrist_pitch_link_convex_decomposition_p1_geom",
|
|
||||||
"wrist_pitch_link_convex_decomposition_p2_geom",
|
|
||||||
"wrist_pitch_link_convex_decomposition_p3_geom",
|
|
||||||
"wrist_yaw_link_convex_decomposition_p1_geom",
|
|
||||||
"wrist_yaw_link_convex_decomposition_p2_geom",
|
|
||||||
"forearm_link_convex_decomposition_p1_geom",
|
|
||||||
"forearm_link_convex_decomposition_p2_geom",
|
|
||||||
"upper_arm_link_convex_decomposition_p1_geom",
|
|
||||||
"upper_arm_link_convex_decomposition_p2_geom",
|
|
||||||
"shoulder_link_convex_decomposition_p1_geom",
|
|
||||||
"shoulder_link_convex_decomposition_p2_geom",
|
|
||||||
"shoulder_link_convex_decomposition_p3_geom",
|
|
||||||
"base_link_convex_geom", "table_contact_geom"]
|
|
||||||
|
|
||||||
self.cup_collision_objects = ["cup_geom_table3", "cup_geom_table4", "cup_geom_table5", "cup_geom_table6",
|
|
||||||
"cup_geom_table7", "cup_geom_table8", "cup_geom_table9", "cup_geom_table10",
|
|
||||||
# "cup_base_table", "cup_base_table_contact",
|
|
||||||
"cup_geom_table15",
|
|
||||||
"cup_geom_table16",
|
|
||||||
"cup_geom_table17", "cup_geom1_table8",
|
|
||||||
# "cup_base_table_contact",
|
|
||||||
# "cup_base_table"
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
self.ball_traj = None
|
|
||||||
self.dists = None
|
|
||||||
self.dists_final = None
|
|
||||||
self.costs = None
|
|
||||||
self.action_costs = None
|
|
||||||
self.angle_rewards = None
|
|
||||||
self.cup_angles = None
|
|
||||||
self.cup_z_axes = None
|
|
||||||
self.collision_penalty = 500
|
|
||||||
self.reset(None)
|
|
||||||
|
|
||||||
def reset(self, context):
|
|
||||||
self.ball_traj = []
|
|
||||||
self.dists = []
|
|
||||||
self.dists_final = []
|
|
||||||
self.costs = []
|
|
||||||
self.action_costs = []
|
|
||||||
self.angle_rewards = []
|
|
||||||
self.cup_angles = []
|
|
||||||
self.cup_z_axes = []
|
|
||||||
self.ball_ground_contact = False
|
|
||||||
self.ball_table_contact = False
|
|
||||||
self.ball_wall_contact = False
|
|
||||||
self.ball_cup_contact = False
|
|
||||||
|
|
||||||
def compute_reward(self, env, action):
|
|
||||||
self.ball_id = env.sim.model._body_name2id["ball"]
|
|
||||||
self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"]
|
|
||||||
self.goal_id = env.sim.model._site_name2id["cup_goal_table"]
|
|
||||||
self.goal_final_id = env.sim.model._site_name2id["cup_goal_final_table"]
|
|
||||||
self.cup_collision_ids = [env.sim.model._geom_name2id[name] for name in self.cup_collision_objects]
|
|
||||||
self.cup_table_id = env.sim.model._body_name2id["cup_table"]
|
|
||||||
self.table_collision_id = env.sim.model._geom_name2id["table_contact_geom"]
|
|
||||||
self.wall_collision_id = env.sim.model._geom_name2id["wall"]
|
|
||||||
self.cup_table_collision_id = env.sim.model._geom_name2id["cup_base_table_contact"]
|
|
||||||
self.init_ball_pos_site_id = env.sim.model._site_name2id["init_ball_pos_site"]
|
|
||||||
self.ground_collision_id = env.sim.model._geom_name2id["ground"]
|
|
||||||
self.robot_collision_ids = [env.sim.model._geom_name2id[name] for name in self.robot_collision_objects]
|
|
||||||
|
|
||||||
goal_pos = env.sim.data.site_xpos[self.goal_id]
|
|
||||||
ball_pos = env.sim.data.body_xpos[self.ball_id]
|
|
||||||
ball_vel = env.sim.data.body_xvelp[self.ball_id]
|
|
||||||
goal_final_pos = env.sim.data.site_xpos[self.goal_final_id]
|
|
||||||
self.dists.append(np.linalg.norm(goal_pos - ball_pos))
|
|
||||||
self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
|
|
||||||
|
|
||||||
action_cost = np.sum(np.square(action))
|
|
||||||
self.action_costs.append(action_cost)
|
|
||||||
|
|
||||||
ball_table_bounce = self._check_collision_single_objects(env.sim, self.ball_collision_id,
|
|
||||||
self.table_collision_id)
|
|
||||||
|
|
||||||
if ball_table_bounce: # or ball_cup_table_cont or ball_wall_con
|
|
||||||
self.ball_table_contact = True
|
|
||||||
|
|
||||||
ball_cup_cont = self._check_collision_with_set_of_objects(env.sim, self.ball_collision_id,
|
|
||||||
self.cup_collision_ids)
|
|
||||||
if ball_cup_cont:
|
|
||||||
self.ball_cup_contact = True
|
|
||||||
|
|
||||||
ball_wall_cont = self._check_collision_single_objects(env.sim, self.ball_collision_id, self.wall_collision_id)
|
|
||||||
if ball_wall_cont and not self.ball_table_contact:
|
|
||||||
self.ball_wall_contact = True
|
|
||||||
|
|
||||||
ball_ground_contact = self._check_collision_single_objects(env.sim, self.ball_collision_id,
|
|
||||||
self.ground_collision_id)
|
|
||||||
if ball_ground_contact and not self.ball_table_contact:
|
|
||||||
self.ball_ground_contact = True
|
|
||||||
|
|
||||||
self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
|
|
||||||
if env._steps == env.ep_length - 1 or self._is_collided:
|
|
||||||
|
|
||||||
min_dist = np.min(self.dists)
|
|
||||||
|
|
||||||
ball_in_cup = self._check_collision_single_objects(env.sim, self.ball_collision_id, self.cup_table_collision_id)
|
|
||||||
|
|
||||||
cost_offset = 0
|
|
||||||
|
|
||||||
if self.ball_ground_contact: # or self.ball_wall_contact:
|
|
||||||
cost_offset += 2
|
|
||||||
|
|
||||||
if not self.ball_table_contact:
|
|
||||||
cost_offset += 2
|
|
||||||
|
|
||||||
if not ball_in_cup:
|
|
||||||
cost_offset += 2
|
|
||||||
cost = cost_offset + min_dist ** 2 + 0.5 * self.dists_final[-1] ** 2 + 1e-4 * action_cost # + min_dist ** 2
|
|
||||||
else:
|
|
||||||
if self.ball_cup_contact:
|
|
||||||
cost_offset += 1
|
|
||||||
cost = cost_offset + self.dists_final[-1] ** 2 + 1e-4 * action_cost
|
|
||||||
|
|
||||||
reward = - 1*cost - self.collision_penalty * int(self._is_collided)
|
|
||||||
success = ball_in_cup and not self.ball_ground_contact and not self.ball_wall_contact and not self.ball_cup_contact
|
|
||||||
else:
|
|
||||||
reward = - 1e-4 * action_cost
|
|
||||||
success = False
|
|
||||||
|
|
||||||
infos = {}
|
|
||||||
infos["success"] = success
|
|
||||||
infos["is_collided"] = self._is_collided
|
|
||||||
infos["ball_pos"] = ball_pos.copy()
|
|
||||||
infos["ball_vel"] = ball_vel.copy()
|
|
||||||
infos["action_cost"] = 5e-4 * action_cost
|
|
||||||
|
|
||||||
return reward, infos
|
|
||||||
|
|
||||||
def _check_collision_single_objects(self, sim, id_1, id_2):
|
|
||||||
for coni in range(0, sim.data.ncon):
|
|
||||||
con = sim.data.contact[coni]
|
|
||||||
|
|
||||||
collision = con.geom1 == id_1 and con.geom2 == id_2
|
|
||||||
collision_trans = con.geom1 == id_2 and con.geom2 == id_1
|
|
||||||
|
|
||||||
if collision or collision_trans:
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
def _check_collision_with_itself(self, sim, collision_ids):
|
|
||||||
col_1, col_2 = False, False
|
|
||||||
for j, id in enumerate(collision_ids):
|
|
||||||
col_1 = self._check_collision_with_set_of_objects(sim, id, collision_ids[:j])
|
|
||||||
if j != len(collision_ids) - 1:
|
|
||||||
col_2 = self._check_collision_with_set_of_objects(sim, id, collision_ids[j + 1:])
|
|
||||||
else:
|
|
||||||
col_2 = False
|
|
||||||
collision = True if col_1 or col_2 else False
|
|
||||||
return collision
|
|
||||||
|
|
||||||
def _check_collision_with_set_of_objects(self, sim, id_1, id_list):
|
|
||||||
for coni in range(0, sim.data.ncon):
|
|
||||||
con = sim.data.contact[coni]
|
|
||||||
|
|
||||||
collision = con.geom1 in id_list and con.geom2 == id_1
|
|
||||||
collision_trans = con.geom1 == id_1 and con.geom2 in id_list
|
|
||||||
|
|
||||||
if collision or collision_trans:
|
|
||||||
return True
|
|
||||||
return False
|
|
@ -1,141 +0,0 @@
|
|||||||
import numpy as np
|
|
||||||
from alr_envs.alr.mujoco import alr_reward_fct
|
|
||||||
|
|
||||||
|
|
||||||
class BeerpongReward(alr_reward_fct.AlrReward):
|
|
||||||
def __init__(self, sim, sim_time):
|
|
||||||
|
|
||||||
self.sim = sim
|
|
||||||
self.sim_time = sim_time
|
|
||||||
|
|
||||||
self.collision_objects = ["cup_geom1", "cup_geom2", "wrist_palm_link_convex_geom",
|
|
||||||
"wrist_pitch_link_convex_decomposition_p1_geom",
|
|
||||||
"wrist_pitch_link_convex_decomposition_p2_geom",
|
|
||||||
"wrist_pitch_link_convex_decomposition_p3_geom",
|
|
||||||
"wrist_yaw_link_convex_decomposition_p1_geom",
|
|
||||||
"wrist_yaw_link_convex_decomposition_p2_geom",
|
|
||||||
"forearm_link_convex_decomposition_p1_geom",
|
|
||||||
"forearm_link_convex_decomposition_p2_geom"]
|
|
||||||
|
|
||||||
self.ball_id = None
|
|
||||||
self.ball_collision_id = None
|
|
||||||
self.goal_id = None
|
|
||||||
self.goal_final_id = None
|
|
||||||
self.collision_ids = None
|
|
||||||
|
|
||||||
self.ball_traj = None
|
|
||||||
self.dists = None
|
|
||||||
self.dists_ctxt = None
|
|
||||||
self.dists_final = None
|
|
||||||
self.costs = None
|
|
||||||
|
|
||||||
self.reset(None)
|
|
||||||
|
|
||||||
def reset(self, context):
|
|
||||||
self.ball_traj = np.zeros(shape=(self.sim_time, 3))
|
|
||||||
self.dists = []
|
|
||||||
self.dists_ctxt = []
|
|
||||||
self.dists_final = []
|
|
||||||
self.costs = []
|
|
||||||
self.action_costs = []
|
|
||||||
self.context = context
|
|
||||||
self.ball_in_cup = False
|
|
||||||
self.dist_ctxt = 5
|
|
||||||
self.bounce_dist = 2
|
|
||||||
self.min_dist = 2
|
|
||||||
self.dist_final = 2
|
|
||||||
self.table_contact = False
|
|
||||||
|
|
||||||
self.ball_id = self.sim.model._body_name2id["ball"]
|
|
||||||
self.ball_collision_id = self.sim.model._geom_name2id["ball_geom"]
|
|
||||||
self.cup_robot_id = self.sim.model._site_name2id["cup_robot_final"]
|
|
||||||
self.goal_id = self.sim.model._site_name2id["cup_goal_table"]
|
|
||||||
self.goal_final_id = self.sim.model._site_name2id["cup_goal_final_table"]
|
|
||||||
self.collision_ids = [self.sim.model._geom_name2id[name] for name in self.collision_objects]
|
|
||||||
self.cup_table_id = self.sim.model._body_name2id["cup_table"]
|
|
||||||
self.bounce_table_id = self.sim.model._site_name2id["bounce_table"]
|
|
||||||
|
|
||||||
def compute_reward(self, action, sim, step):
|
|
||||||
action_cost = np.sum(np.square(action))
|
|
||||||
self.action_costs.append(action_cost)
|
|
||||||
|
|
||||||
stop_sim = False
|
|
||||||
success = False
|
|
||||||
|
|
||||||
if self.check_collision(sim):
|
|
||||||
reward = - 1e-2 * action_cost - 10
|
|
||||||
stop_sim = True
|
|
||||||
return reward, success, stop_sim
|
|
||||||
|
|
||||||
# Compute the current distance from the ball to the inner part of the cup
|
|
||||||
goal_pos = sim.data.site_xpos[self.goal_id]
|
|
||||||
ball_pos = sim.data.body_xpos[self.ball_id]
|
|
||||||
bounce_pos = sim.data.site_xpos[self.bounce_table_id]
|
|
||||||
goal_final_pos = sim.data.site_xpos[self.goal_final_id]
|
|
||||||
self.dists.append(np.linalg.norm(goal_pos - ball_pos))
|
|
||||||
self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
|
|
||||||
self.ball_traj[step, :] = ball_pos
|
|
||||||
|
|
||||||
ball_in_cup = self.check_ball_in_cup(sim, self.ball_collision_id)
|
|
||||||
table_contact = self.check_ball_table_contact(sim, self.ball_collision_id)
|
|
||||||
|
|
||||||
if table_contact and not self.table_contact:
|
|
||||||
self.bounce_dist = np.minimum((np.linalg.norm(bounce_pos - ball_pos)), 2)
|
|
||||||
self.table_contact = True
|
|
||||||
|
|
||||||
if step == self.sim_time - 1:
|
|
||||||
min_dist = np.min(self.dists)
|
|
||||||
self.min_dist = min_dist
|
|
||||||
dist_final = self.dists_final[-1]
|
|
||||||
self.dist_final = dist_final
|
|
||||||
|
|
||||||
cost = 0.33 * min_dist + 0.33 * dist_final + 0.33 * self.bounce_dist
|
|
||||||
reward = np.exp(-2 * cost) - 1e-2 * action_cost
|
|
||||||
success = self.bounce_dist < 0.05 and dist_final < 0.05 and ball_in_cup
|
|
||||||
else:
|
|
||||||
reward = - 1e-2 * action_cost
|
|
||||||
success = False
|
|
||||||
|
|
||||||
return reward, success, stop_sim
|
|
||||||
|
|
||||||
def _get_stage_wise_cost(self, ball_in_cup, min_dist, dist_final, dist_to_ctxt):
|
|
||||||
if not ball_in_cup:
|
|
||||||
cost = 3 + 2*(0.5 * min_dist**2 + 0.5 * dist_final**2)
|
|
||||||
else:
|
|
||||||
cost = 2 * dist_to_ctxt ** 2
|
|
||||||
print('Context Distance:', dist_to_ctxt)
|
|
||||||
return cost
|
|
||||||
|
|
||||||
def check_ball_table_contact(self, sim, ball_collision_id):
|
|
||||||
table_collision_id = sim.model._geom_name2id["table_contact_geom"]
|
|
||||||
for coni in range(0, sim.data.ncon):
|
|
||||||
con = sim.data.contact[coni]
|
|
||||||
collision = con.geom1 == table_collision_id and con.geom2 == ball_collision_id
|
|
||||||
collision_trans = con.geom1 == ball_collision_id and con.geom2 == table_collision_id
|
|
||||||
|
|
||||||
if collision or collision_trans:
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
def check_ball_in_cup(self, sim, ball_collision_id):
|
|
||||||
cup_base_collision_id = sim.model._geom_name2id["cup_base_table_contact"]
|
|
||||||
for coni in range(0, sim.data.ncon):
|
|
||||||
con = sim.data.contact[coni]
|
|
||||||
|
|
||||||
collision = con.geom1 == cup_base_collision_id and con.geom2 == ball_collision_id
|
|
||||||
collision_trans = con.geom1 == ball_collision_id and con.geom2 == cup_base_collision_id
|
|
||||||
|
|
||||||
if collision or collision_trans:
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
def check_collision(self, sim):
|
|
||||||
for coni in range(0, sim.data.ncon):
|
|
||||||
con = sim.data.contact[coni]
|
|
||||||
|
|
||||||
collision = con.geom1 in self.collision_ids and con.geom2 == self.ball_collision_id
|
|
||||||
collision_trans = con.geom1 == self.ball_collision_id and con.geom2 in self.collision_ids
|
|
||||||
|
|
||||||
if collision or collision_trans:
|
|
||||||
return True
|
|
||||||
return False
|
|
@ -1,166 +0,0 @@
|
|||||||
from gym import utils
|
|
||||||
import os
|
|
||||||
import numpy as np
|
|
||||||
from gym.envs.mujoco import MujocoEnv
|
|
||||||
|
|
||||||
|
|
||||||
class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
|
|
||||||
def __init__(self, n_substeps=4, apply_gravity_comp=True, reward_function=None):
|
|
||||||
self._steps = 0
|
|
||||||
|
|
||||||
self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
|
|
||||||
"beerpong" + ".xml")
|
|
||||||
|
|
||||||
self.start_pos = np.array([0.0, 1.35, 0.0, 1.18, 0.0, -0.786, -1.59])
|
|
||||||
self.start_vel = np.zeros(7)
|
|
||||||
|
|
||||||
self._q_pos = []
|
|
||||||
self._q_vel = []
|
|
||||||
# self.weight_matrix_scale = 50
|
|
||||||
self.max_ctrl = np.array([150., 125., 40., 60., 5., 5., 2.])
|
|
||||||
self.p_gains = 1 / self.max_ctrl * np.array([200, 300, 100, 100, 10, 10, 2.5])
|
|
||||||
self.d_gains = 1 / self.max_ctrl * np.array([7, 15, 5, 2.5, 0.3, 0.3, 0.05])
|
|
||||||
|
|
||||||
self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7])
|
|
||||||
self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
|
|
||||||
|
|
||||||
self.context = None
|
|
||||||
|
|
||||||
# alr_mujoco_env.AlrMujocoEnv.__init__(self,
|
|
||||||
# self.xml_path,
|
|
||||||
# apply_gravity_comp=apply_gravity_comp,
|
|
||||||
# n_substeps=n_substeps)
|
|
||||||
|
|
||||||
self.sim_time = 8 # seconds
|
|
||||||
# self.sim_steps = int(self.sim_time / self.dt)
|
|
||||||
if reward_function is None:
|
|
||||||
from alr_envs.alr.mujoco.beerpong.beerpong_reward_simple import BeerpongReward
|
|
||||||
reward_function = BeerpongReward
|
|
||||||
self.reward_function = reward_function(self.sim, self.sim_steps)
|
|
||||||
self.cup_robot_id = self.sim.model._site_name2id["cup_robot_final"]
|
|
||||||
self.ball_id = self.sim.model._body_name2id["ball"]
|
|
||||||
self.cup_table_id = self.sim.model._body_name2id["cup_table"]
|
|
||||||
# self.bounce_table_id = self.sim.model._body_name2id["bounce_table"]
|
|
||||||
|
|
||||||
MujocoEnv.__init__(self, model_path=self.xml_path, frame_skip=n_substeps)
|
|
||||||
utils.EzPickle.__init__(self)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def current_pos(self):
|
|
||||||
return self.sim.data.qpos[0:7].copy()
|
|
||||||
|
|
||||||
@property
|
|
||||||
def current_vel(self):
|
|
||||||
return self.sim.data.qvel[0:7].copy()
|
|
||||||
|
|
||||||
def configure(self, context):
|
|
||||||
if context is None:
|
|
||||||
context = np.array([0, -2, 0.840])
|
|
||||||
self.context = context
|
|
||||||
self.reward_function.reset(context)
|
|
||||||
|
|
||||||
def reset_model(self):
|
|
||||||
init_pos_all = self.init_qpos.copy()
|
|
||||||
init_pos_robot = self.start_pos
|
|
||||||
init_vel = np.zeros_like(init_pos_all)
|
|
||||||
|
|
||||||
self._steps = 0
|
|
||||||
self._q_pos = []
|
|
||||||
self._q_vel = []
|
|
||||||
|
|
||||||
start_pos = init_pos_all
|
|
||||||
start_pos[0:7] = init_pos_robot
|
|
||||||
# start_pos[7:] = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05])
|
|
||||||
|
|
||||||
self.set_state(start_pos, init_vel)
|
|
||||||
|
|
||||||
ball_pos = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05])
|
|
||||||
self.sim.model.body_pos[self.ball_id] = ball_pos.copy()
|
|
||||||
self.sim.model.body_pos[self.cup_table_id] = self.context.copy()
|
|
||||||
# self.sim.model.body_pos[self.bounce_table_id] = self.context.copy()
|
|
||||||
|
|
||||||
self.sim.forward()
|
|
||||||
|
|
||||||
return self._get_obs()
|
|
||||||
|
|
||||||
def step(self, a):
|
|
||||||
reward_dist = 0.0
|
|
||||||
angular_vel = 0.0
|
|
||||||
reward_ctrl = - np.square(a).sum()
|
|
||||||
action_cost = np.sum(np.square(a))
|
|
||||||
|
|
||||||
crash = self.do_simulation(a, self.frame_skip)
|
|
||||||
joint_cons_viol = self.check_traj_in_joint_limits()
|
|
||||||
|
|
||||||
self._q_pos.append(self.sim.data.qpos[0:7].ravel().copy())
|
|
||||||
self._q_vel.append(self.sim.data.qvel[0:7].ravel().copy())
|
|
||||||
|
|
||||||
ob = self._get_obs()
|
|
||||||
|
|
||||||
if not crash and not joint_cons_viol:
|
|
||||||
reward, success, stop_sim = self.reward_function.compute_reward(a, self.sim, self._steps)
|
|
||||||
done = success or self._steps == self.sim_steps - 1 or stop_sim
|
|
||||||
self._steps += 1
|
|
||||||
else:
|
|
||||||
reward = -10 - 1e-2 * action_cost
|
|
||||||
success = False
|
|
||||||
done = True
|
|
||||||
return ob, reward, done, dict(reward_dist=reward_dist,
|
|
||||||
reward_ctrl=reward_ctrl,
|
|
||||||
velocity=angular_vel,
|
|
||||||
traj=self._q_pos, is_success=success,
|
|
||||||
is_collided=crash or joint_cons_viol)
|
|
||||||
|
|
||||||
def check_traj_in_joint_limits(self):
|
|
||||||
return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)
|
|
||||||
|
|
||||||
def extend_des_pos(self, des_pos):
|
|
||||||
des_pos_full = self.start_pos.copy()
|
|
||||||
des_pos_full[1] = des_pos[0]
|
|
||||||
des_pos_full[3] = des_pos[1]
|
|
||||||
des_pos_full[5] = des_pos[2]
|
|
||||||
return des_pos_full
|
|
||||||
|
|
||||||
def extend_des_vel(self, des_vel):
|
|
||||||
des_vel_full = self.start_vel.copy()
|
|
||||||
des_vel_full[1] = des_vel[0]
|
|
||||||
des_vel_full[3] = des_vel[1]
|
|
||||||
des_vel_full[5] = des_vel[2]
|
|
||||||
return des_vel_full
|
|
||||||
|
|
||||||
def _get_obs(self):
|
|
||||||
theta = self.sim.data.qpos.flat[:7]
|
|
||||||
return np.concatenate([
|
|
||||||
np.cos(theta),
|
|
||||||
np.sin(theta),
|
|
||||||
# self.get_body_com("target"), # only return target to make problem harder
|
|
||||||
[self._steps],
|
|
||||||
])
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
env = ALRBeerpongEnv()
|
|
||||||
ctxt = np.array([0, -2, 0.840]) # initial
|
|
||||||
|
|
||||||
env.configure(ctxt)
|
|
||||||
env.reset()
|
|
||||||
env.render()
|
|
||||||
for i in range(16000):
|
|
||||||
# test with random actions
|
|
||||||
ac = 0.0 * env.action_space.sample()[0:7]
|
|
||||||
ac[1] = -0.01
|
|
||||||
ac[3] = - 0.01
|
|
||||||
ac[5] = -0.01
|
|
||||||
# ac = env.start_pos
|
|
||||||
# ac[0] += np.pi/2
|
|
||||||
obs, rew, d, info = env.step(ac)
|
|
||||||
env.render()
|
|
||||||
|
|
||||||
print(rew)
|
|
||||||
|
|
||||||
if d:
|
|
||||||
break
|
|
||||||
|
|
||||||
env.close()
|
|
||||||
|
|
@ -2,13 +2,13 @@ from typing import Tuple, Union
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
|
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
|
||||||
|
|
||||||
|
|
||||||
class MPWrapper(MPEnvWrapper):
|
class MPWrapper(RawInterfaceWrapper):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def active_obs(self):
|
def context_mask(self) -> np.ndarray:
|
||||||
return np.hstack([
|
return np.hstack([
|
||||||
[False] * 7, # cos
|
[False] * 7, # cos
|
||||||
[False] * 7, # sin
|
[False] * 7, # sin
|
||||||
|
@ -2,12 +2,12 @@ from typing import Tuple, Union
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from mp_env_api import MPEnvWrapper
|
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
|
||||||
|
|
||||||
|
|
||||||
class MPWrapper(MPEnvWrapper):
|
class MPWrapper(RawInterfaceWrapper):
|
||||||
@property
|
@property
|
||||||
def active_obs(self):
|
def context_mask(self) -> np.ndarray:
|
||||||
return np.hstack([
|
return np.hstack([
|
||||||
[False] * 17,
|
[False] * 17,
|
||||||
[True] # goal height
|
[True] # goal height
|
||||||
|
@ -54,7 +54,8 @@ class ALRHopperJumpEnv(HopperEnv):
|
|||||||
self.current_step += 1
|
self.current_step += 1
|
||||||
self.do_simulation(action, self.frame_skip)
|
self.do_simulation(action, self.frame_skip)
|
||||||
height_after = self.get_body_com("torso")[2]
|
height_after = self.get_body_com("torso")[2]
|
||||||
site_pos_after = self.sim.data.site_xpos[self.model.site_name2id('foot_site')].copy()
|
# site_pos_after = self.sim.data.site_xpos[self.model.site_name2id('foot_site')].copy()
|
||||||
|
site_pos_after = self.get_body_com('foot_site')
|
||||||
self.max_height = max(height_after, self.max_height)
|
self.max_height = max(height_after, self.max_height)
|
||||||
|
|
||||||
ctrl_cost = self.control_cost(action)
|
ctrl_cost = self.control_cost(action)
|
||||||
|
@ -2,12 +2,12 @@ from typing import Tuple, Union
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from mp_env_api import MPEnvWrapper
|
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
|
||||||
|
|
||||||
|
|
||||||
class MPWrapper(MPEnvWrapper):
|
class MPWrapper(RawInterfaceWrapper):
|
||||||
@property
|
@property
|
||||||
def active_obs(self):
|
def context_mask(self) -> np.ndarray:
|
||||||
return np.hstack([
|
return np.hstack([
|
||||||
[False] * (5 + int(not self.exclude_current_positions_from_observation)), # position
|
[False] * (5 + int(not self.exclude_current_positions_from_observation)), # position
|
||||||
[False] * 6, # velocity
|
[False] * 6, # velocity
|
||||||
|
@ -2,12 +2,12 @@ from typing import Tuple, Union
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from mp_env_api import MPEnvWrapper
|
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
|
||||||
|
|
||||||
|
|
||||||
class MPWrapper(MPEnvWrapper):
|
class MPWrapper(RawInterfaceWrapper):
|
||||||
@property
|
@property
|
||||||
def active_obs(self):
|
def context_mask(self) -> np.ndarray:
|
||||||
return np.hstack([
|
return np.hstack([
|
||||||
[False] * 17,
|
[False] * 17,
|
||||||
[True] # goal pos
|
[True] # goal pos
|
||||||
|
@ -2,8 +2,6 @@ from typing import Tuple, Union
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from mp_env_api import MPEnvWrapper
|
|
||||||
|
|
||||||
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
|
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
|
||||||
|
|
||||||
|
|
||||||
|
@ -1 +1,2 @@
|
|||||||
from .mp_wrapper import MPWrapper
|
from .mp_wrapper import MPWrapper
|
||||||
|
from .new_mp_wrapper import MPWrapper as NewMPWrapper
|
@ -1,13 +1,14 @@
|
|||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from mp_env_api import MPEnvWrapper
|
|
||||||
|
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
|
||||||
|
|
||||||
|
|
||||||
class MPWrapper(MPEnvWrapper):
|
class MPWrapper(RawInterfaceWrapper):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def active_obs(self):
|
def context_mask(self) -> np.ndarray:
|
||||||
return np.concatenate([
|
return np.concatenate([
|
||||||
[False] * self.n_links, # cos
|
[False] * self.n_links, # cos
|
||||||
[False] * self.n_links, # sin
|
[False] * self.n_links, # sin
|
||||||
|
@ -8,12 +8,6 @@ from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
|
|||||||
class MPWrapper(RawInterfaceWrapper):
|
class MPWrapper(RawInterfaceWrapper):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
|
||||||
return self.env.sim.data.qpos.flat[:self.env.n_links]
|
|
||||||
@property
|
|
||||||
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
|
||||||
return self.env.sim.data.qvel.flat[:self.env.n_links]
|
|
||||||
|
|
||||||
def context_mask(self):
|
def context_mask(self):
|
||||||
return np.concatenate([
|
return np.concatenate([
|
||||||
[False] * self.env.n_links, # cos
|
[False] * self.env.n_links, # cos
|
||||||
@ -24,3 +18,11 @@ class MPWrapper(RawInterfaceWrapper):
|
|||||||
# self.get_body_com("target"), # only return target to make problem harder
|
# self.get_body_com("target"), # only return target to make problem harder
|
||||||
[False], # step
|
[False], # step
|
||||||
])
|
])
|
||||||
|
|
||||||
|
@property
|
||||||
|
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
|
return self.env.sim.data.qpos.flat[:self.env.n_links]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
|
return self.env.sim.data.qvel.flat[:self.env.n_links]
|
||||||
|
@ -2,13 +2,13 @@ from typing import Tuple, Union
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
|
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
|
||||||
|
|
||||||
|
|
||||||
class MPWrapper(MPEnvWrapper):
|
class MPWrapper(RawInterfaceWrapper):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def active_obs(self):
|
def context_mask(self) -> np.ndarray:
|
||||||
# TODO: @Max Filter observations correctly
|
# TODO: @Max Filter observations correctly
|
||||||
return np.hstack([
|
return np.hstack([
|
||||||
[False] * 7, # Joint Pos
|
[False] * 7, # Joint Pos
|
||||||
|
@ -2,12 +2,12 @@ from typing import Tuple, Union
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from mp_env_api import MPEnvWrapper
|
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
|
||||||
|
|
||||||
|
|
||||||
class MPWrapper(MPEnvWrapper):
|
class MPWrapper(RawInterfaceWrapper):
|
||||||
@property
|
@property
|
||||||
def active_obs(self):
|
def context_mask(self) -> np.ndarray:
|
||||||
return np.hstack([
|
return np.hstack([
|
||||||
[False] * 17,
|
[False] * 17,
|
||||||
[True] # goal pos
|
[True] # goal pos
|
||||||
|
@ -2,8 +2,6 @@ from typing import Tuple, Union
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from mp_env_api import MPEnvWrapper
|
|
||||||
|
|
||||||
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
|
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
|
||||||
|
|
||||||
|
|
||||||
|
@ -2,13 +2,13 @@ from typing import Tuple, Union
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from mp_env_api import MPEnvWrapper
|
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
|
||||||
|
|
||||||
|
|
||||||
class MPWrapper(MPEnvWrapper):
|
class MPWrapper(RawInterfaceWrapper):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def active_obs(self):
|
def context_mask(self) -> np.ndarray:
|
||||||
# Joint and target positions are randomized, velocities are always set to 0.
|
# Joint and target positions are randomized, velocities are always set to 0.
|
||||||
return np.hstack([
|
return np.hstack([
|
||||||
[True] * 3, # target position
|
[True] * 3, # target position
|
||||||
|
@ -2,13 +2,13 @@ from typing import Tuple, Union
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from mp_env_api import MPEnvWrapper
|
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
|
||||||
|
|
||||||
|
|
||||||
class MPWrapper(MPEnvWrapper):
|
class MPWrapper(RawInterfaceWrapper):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def active_obs(self):
|
def context_mask(self) -> np.ndarray:
|
||||||
# Besides the ball position, the environment is always set to 0.
|
# Besides the ball position, the environment is always set to 0.
|
||||||
return np.hstack([
|
return np.hstack([
|
||||||
[False] * 2, # cup position
|
[False] * 2, # cup position
|
||||||
|
@ -2,18 +2,17 @@ from typing import Tuple, Union
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from mp_env_api import MPEnvWrapper
|
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
|
||||||
|
|
||||||
|
|
||||||
class MPWrapper(MPEnvWrapper):
|
class MPWrapper(RawInterfaceWrapper):
|
||||||
|
|
||||||
def __init__(self, env, n_poles: int = 1):
|
def __init__(self, env, n_poles: int = 1):
|
||||||
self.n_poles = n_poles
|
self.n_poles = n_poles
|
||||||
super().__init__(env)
|
super().__init__(env)
|
||||||
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def active_obs(self):
|
def context_mask(self) -> np.ndarray:
|
||||||
# Besides the ball position, the environment is always set to 0.
|
# Besides the ball position, the environment is always set to 0.
|
||||||
return np.hstack([
|
return np.hstack([
|
||||||
[True], # slider position
|
[True], # slider position
|
||||||
|
@ -2,13 +2,13 @@ from typing import Tuple, Union
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from mp_env_api import MPEnvWrapper
|
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
|
||||||
|
|
||||||
|
|
||||||
class MPWrapper(MPEnvWrapper):
|
class MPWrapper(RawInterfaceWrapper):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def active_obs(self):
|
def context_mask(self) -> np.ndarray:
|
||||||
# Joint and target positions are randomized, velocities are always set to 0.
|
# Joint and target positions are randomized, velocities are always set to 0.
|
||||||
return np.hstack([
|
return np.hstack([
|
||||||
[True] * 2, # joint position
|
[True] * 2, # joint position
|
||||||
|
@ -59,7 +59,7 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
|
|||||||
# Base DMC name, according to structure of above example
|
# Base DMC name, according to structure of above example
|
||||||
base_env = "ball_in_cup-catch"
|
base_env = "ball_in_cup-catch"
|
||||||
|
|
||||||
# Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper.
|
# Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper.
|
||||||
# You can also add other gym.Wrappers in case they are needed.
|
# You can also add other gym.Wrappers in case they are needed.
|
||||||
wrappers = [alr_envs.dmc.suite.ball_in_cup.MPWrapper]
|
wrappers = [alr_envs.dmc.suite.ball_in_cup.MPWrapper]
|
||||||
mp_kwargs = {
|
mp_kwargs = {
|
||||||
|
@ -62,7 +62,7 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
|
|||||||
# Base MetaWorld name, according to structure of above example
|
# Base MetaWorld name, according to structure of above example
|
||||||
base_env = "button-press-v2"
|
base_env = "button-press-v2"
|
||||||
|
|
||||||
# Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper.
|
# Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper.
|
||||||
# You can also add other gym.Wrappers in case they are needed.
|
# You can also add other gym.Wrappers in case they are needed.
|
||||||
wrappers = [alr_envs.meta.goal_and_object_change.MPWrapper]
|
wrappers = [alr_envs.meta.goal_and_object_change.MPWrapper]
|
||||||
mp_kwargs = {
|
mp_kwargs = {
|
||||||
|
@ -59,6 +59,17 @@ def example_custom_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=
|
|||||||
"""
|
"""
|
||||||
# Changing the traj_gen_kwargs is possible by providing them to gym.
|
# Changing the traj_gen_kwargs is possible by providing them to gym.
|
||||||
# E.g. here by providing way to many basis functions
|
# E.g. here by providing way to many basis functions
|
||||||
|
# mp_dict = alr_envs.from_default_config('ALRReacher-v0', {'basis_generator_kwargs': {'num_basis': 10}})
|
||||||
|
# mp_dict.update({'basis_generator_kwargs': {'num_basis': 10}})
|
||||||
|
# mp_dict.update({'black_box_kwargs': {'learn_sub_trajectories': True}})
|
||||||
|
# mp_dict.update({'black_box_kwargs': {'do_replanning': lambda pos, vel, t: lambda t: t % 100}})
|
||||||
|
|
||||||
|
# default env with promp and no learn_sub_trajectories and replanning
|
||||||
|
# env = alr_envs.make('ALRReacherProMP-v0', 1, n_links=7)
|
||||||
|
env = alr_envs.make('ALRReacherProMP-v0', 1, basis_generator_kwargs={'num_basis': 10}, n_links=7)
|
||||||
|
# env = alr_envs.make('ALRReacher-v0', seed=1, bb_kwargs=mp_dict, n_links=1)
|
||||||
|
# env = alr_envs.make_bb('ALRReacher-v0', **mp_dict)
|
||||||
|
|
||||||
mp_kwargs = {
|
mp_kwargs = {
|
||||||
"num_dof": 5,
|
"num_dof": 5,
|
||||||
"num_basis": 1000,
|
"num_basis": 1000,
|
||||||
@ -110,7 +121,7 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True):
|
|||||||
|
|
||||||
base_env = "alr_envs:HoleReacher-v1"
|
base_env = "alr_envs:HoleReacher-v1"
|
||||||
|
|
||||||
# Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper.
|
# Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper.
|
||||||
# You can also add other gym.Wrappers in case they are needed.
|
# You can also add other gym.Wrappers in case they are needed.
|
||||||
wrappers = [alr_envs.alr.classic_control.hole_reacher.MPWrapper]
|
wrappers = [alr_envs.alr.classic_control.hole_reacher.MPWrapper]
|
||||||
mp_kwargs = {
|
mp_kwargs = {
|
||||||
@ -148,14 +159,14 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True):
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
render = False
|
render = False
|
||||||
# DMP
|
# # DMP
|
||||||
example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render)
|
# example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render)
|
||||||
|
#
|
||||||
# ProMP
|
# # ProMP
|
||||||
example_mp("alr_envs:HoleReacherProMP-v1", seed=10, iterations=1, render=render)
|
# example_mp("alr_envs:HoleReacherProMP-v1", seed=10, iterations=1, render=render)
|
||||||
|
#
|
||||||
# DetProMP
|
# # DetProMP
|
||||||
example_mp("alr_envs:HoleReacherDetPMP-v1", seed=10, iterations=1, render=render)
|
# example_mp("alr_envs:HoleReacherDetPMP-v1", seed=10, iterations=1, render=render)
|
||||||
|
|
||||||
# Altered basis functions
|
# Altered basis functions
|
||||||
example_custom_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render)
|
example_custom_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render)
|
@ -4,7 +4,7 @@ import alr_envs
|
|||||||
def example_mp(env_name, seed=1):
|
def example_mp(env_name, seed=1):
|
||||||
"""
|
"""
|
||||||
Example for running a motion primitive based version of a OpenAI-gym environment, which is already registered.
|
Example for running a motion primitive based version of a OpenAI-gym environment, which is already registered.
|
||||||
For more information on motion primitive specific stuff, look at the trajectory_generator examples.
|
For more information on motion primitive specific stuff, look at the traj_gen examples.
|
||||||
Args:
|
Args:
|
||||||
env_name: ProMP env_id
|
env_name: ProMP env_id
|
||||||
seed: seed
|
seed: seed
|
||||||
|
@ -8,7 +8,7 @@ from alr_envs.utils.make_env_helpers import make_promp_env
|
|||||||
|
|
||||||
def visualize(env):
|
def visualize(env):
|
||||||
t = env.t
|
t = env.t
|
||||||
pos_features = env.trajectory_generator.basis_generator.basis(t)
|
pos_features = env.traj_gen.basis_generator.basis(t)
|
||||||
plt.plot(t, pos_features)
|
plt.plot(t, pos_features)
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
|
@ -2,10 +2,10 @@ from typing import Tuple, Union
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from mp_env_api import MPEnvWrapper
|
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
|
||||||
|
|
||||||
|
|
||||||
class MPWrapper(MPEnvWrapper):
|
class MPWrapper(RawInterfaceWrapper):
|
||||||
"""
|
"""
|
||||||
This Wrapper is for environments where merely the goal changes in the beginning
|
This Wrapper is for environments where merely the goal changes in the beginning
|
||||||
and no secondary objects or end effectors are altered at the start of an episode.
|
and no secondary objects or end effectors are altered at the start of an episode.
|
||||||
@ -27,7 +27,7 @@ class MPWrapper(MPEnvWrapper):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def active_obs(self):
|
def context_mask(self) -> np.ndarray:
|
||||||
# This structure is the same for all metaworld environments.
|
# This structure is the same for all metaworld environments.
|
||||||
# Only the observations which change could differ
|
# Only the observations which change could differ
|
||||||
return np.hstack([
|
return np.hstack([
|
||||||
|
@ -2,10 +2,10 @@ from typing import Tuple, Union
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from mp_env_api import MPEnvWrapper
|
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
|
||||||
|
|
||||||
|
|
||||||
class MPWrapper(MPEnvWrapper):
|
class MPWrapper(RawInterfaceWrapper):
|
||||||
"""
|
"""
|
||||||
This Wrapper is for environments where merely the goal changes in the beginning
|
This Wrapper is for environments where merely the goal changes in the beginning
|
||||||
and no secondary objects or end effectors are altered at the start of an episode.
|
and no secondary objects or end effectors are altered at the start of an episode.
|
||||||
@ -27,7 +27,7 @@ class MPWrapper(MPEnvWrapper):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def active_obs(self):
|
def context_mask(self) -> np.ndarray:
|
||||||
# This structure is the same for all metaworld environments.
|
# This structure is the same for all metaworld environments.
|
||||||
# Only the observations which change could differ
|
# Only the observations which change could differ
|
||||||
return np.hstack([
|
return np.hstack([
|
||||||
|
@ -2,10 +2,10 @@ from typing import Tuple, Union
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from mp_env_api import MPEnvWrapper
|
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
|
||||||
|
|
||||||
|
|
||||||
class MPWrapper(MPEnvWrapper):
|
class MPWrapper(RawInterfaceWrapper):
|
||||||
"""
|
"""
|
||||||
This Wrapper is for environments where merely the goal changes in the beginning
|
This Wrapper is for environments where merely the goal changes in the beginning
|
||||||
and no secondary objects or end effectors are altered at the start of an episode.
|
and no secondary objects or end effectors are altered at the start of an episode.
|
||||||
@ -27,7 +27,7 @@ class MPWrapper(MPEnvWrapper):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def active_obs(self):
|
def context_mask(self) -> np.ndarray:
|
||||||
# This structure is the same for all metaworld environments.
|
# This structure is the same for all metaworld environments.
|
||||||
# Only the observations which change could differ
|
# Only the observations which change could differ
|
||||||
return np.hstack([
|
return np.hstack([
|
||||||
|
@ -2,10 +2,10 @@ from typing import Tuple, Union
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from mp_env_api import MPEnvWrapper
|
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
|
||||||
|
|
||||||
|
|
||||||
class MPWrapper(MPEnvWrapper):
|
class MPWrapper(RawInterfaceWrapper):
|
||||||
"""
|
"""
|
||||||
This Wrapper is for environments where merely the goal changes in the beginning
|
This Wrapper is for environments where merely the goal changes in the beginning
|
||||||
and no secondary objects or end effectors are altered at the start of an episode.
|
and no secondary objects or end effectors are altered at the start of an episode.
|
||||||
@ -27,7 +27,7 @@ class MPWrapper(MPEnvWrapper):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def active_obs(self):
|
def context_mask(self) -> np.ndarray:
|
||||||
# This structure is the same for all metaworld environments.
|
# This structure is the same for all metaworld environments.
|
||||||
# Only the observations which change could differ
|
# Only the observations which change could differ
|
||||||
return np.hstack([
|
return np.hstack([
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
from abc import ABC
|
from abc import ABC
|
||||||
from typing import Tuple
|
from typing import Tuple, Union
|
||||||
|
|
||||||
import gym
|
import gym
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@ -16,7 +16,9 @@ class BlackBoxWrapper(gym.ObservationWrapper, ABC):
|
|||||||
def __init__(self,
|
def __init__(self,
|
||||||
env: RawInterfaceWrapper,
|
env: RawInterfaceWrapper,
|
||||||
trajectory_generator: MPInterface, tracking_controller: BaseController,
|
trajectory_generator: MPInterface, tracking_controller: BaseController,
|
||||||
duration: float, verbose: int = 1, sequencing: bool = True, reward_aggregation: callable = np.sum):
|
duration: float, verbose: int = 1, learn_sub_trajectories: bool = False,
|
||||||
|
replanning_schedule: Union[None, callable] = None,
|
||||||
|
reward_aggregation: callable = np.sum):
|
||||||
"""
|
"""
|
||||||
gym.Wrapper for leveraging a black box approach with a trajectory generator.
|
gym.Wrapper for leveraging a black box approach with a trajectory generator.
|
||||||
|
|
||||||
@ -26,6 +28,9 @@ class BlackBoxWrapper(gym.ObservationWrapper, ABC):
|
|||||||
tracking_controller: Translates the desired trajectory to raw action sequences
|
tracking_controller: Translates the desired trajectory to raw action sequences
|
||||||
duration: Length of the trajectory of the movement primitive in seconds
|
duration: Length of the trajectory of the movement primitive in seconds
|
||||||
verbose: level of detail for returned values in info dict.
|
verbose: level of detail for returned values in info dict.
|
||||||
|
learn_sub_trajectories: Transforms full episode learning into learning sub-trajectories, similar to
|
||||||
|
step-based learning
|
||||||
|
replanning_schedule: callable that receives
|
||||||
reward_aggregation: function that takes the np.ndarray of step rewards as input and returns the trajectory
|
reward_aggregation: function that takes the np.ndarray of step rewards as input and returns the trajectory
|
||||||
reward, default summation over all values.
|
reward, default summation over all values.
|
||||||
"""
|
"""
|
||||||
@ -33,21 +38,22 @@ class BlackBoxWrapper(gym.ObservationWrapper, ABC):
|
|||||||
|
|
||||||
self.env = env
|
self.env = env
|
||||||
self.duration = duration
|
self.duration = duration
|
||||||
self.sequencing = sequencing
|
self.learn_sub_trajectories = learn_sub_trajectories
|
||||||
|
self.replanning_schedule = replanning_schedule
|
||||||
self.current_traj_steps = 0
|
self.current_traj_steps = 0
|
||||||
|
|
||||||
# trajectory generation
|
# trajectory generation
|
||||||
self.trajectory_generator = trajectory_generator
|
self.traj_gen = trajectory_generator
|
||||||
self.tracking_controller = tracking_controller
|
self.tracking_controller = tracking_controller
|
||||||
# self.time_steps = np.linspace(0, self.duration, self.traj_steps)
|
# self.time_steps = np.linspace(0, self.duration, self.traj_steps)
|
||||||
# self.trajectory_generator.set_mp_times(self.time_steps)
|
# self.traj_gen.set_mp_times(self.time_steps)
|
||||||
self.trajectory_generator.set_duration(np.array([self.duration]), np.array([self.dt]))
|
self.traj_gen.set_duration(np.array([self.duration]), np.array([self.dt]))
|
||||||
|
|
||||||
# reward computation
|
# reward computation
|
||||||
self.reward_aggregation = reward_aggregation
|
self.reward_aggregation = reward_aggregation
|
||||||
|
|
||||||
# spaces
|
# spaces
|
||||||
self.return_context_observation = not (self.sequencing) # TODO or we_do_replanning?)
|
self.return_context_observation = not (self.learn_sub_trajectories or replanning_schedule)
|
||||||
self.traj_gen_action_space = self.get_traj_gen_action_space()
|
self.traj_gen_action_space = self.get_traj_gen_action_space()
|
||||||
self.action_space = self.get_action_space()
|
self.action_space = self.get_action_space()
|
||||||
self.observation_space = spaces.Box(low=self.env.observation_space.low[self.env.context_mask],
|
self.observation_space = spaces.Box(low=self.env.observation_space.low[self.env.context_mask],
|
||||||
@ -60,26 +66,26 @@ class BlackBoxWrapper(gym.ObservationWrapper, ABC):
|
|||||||
|
|
||||||
def observation(self, observation):
|
def observation(self, observation):
|
||||||
# return context space if we are
|
# return context space if we are
|
||||||
return observation[self.context_mask] if self.return_context_observation else observation
|
return observation[self.env.context_mask] if self.return_context_observation else observation
|
||||||
|
|
||||||
def get_trajectory(self, action: np.ndarray) -> Tuple:
|
def get_trajectory(self, action: np.ndarray) -> Tuple:
|
||||||
clipped_params = np.clip(action, self.traj_gen_action_space.low, self.traj_gen_action_space.high)
|
clipped_params = np.clip(action, self.traj_gen_action_space.low, self.traj_gen_action_space.high)
|
||||||
self.trajectory_generator.set_params(clipped_params)
|
self.traj_gen.set_params(clipped_params)
|
||||||
# if self.trajectory_generator.learn_tau:
|
# TODO: Bruce said DMP, ProMP, ProDMP can have 0 bc_time for sequencing
|
||||||
# self.trajectory_generator.set_mp_duration(self.trajectory_generator.tau, np.array([self.dt]))
|
# TODO Check with Bruce for replanning
|
||||||
# TODO: Bruce said DMP, ProMP, ProDMP can have 0 bc_time
|
self.traj_gen.set_boundary_conditions(
|
||||||
self.trajectory_generator.set_boundary_conditions(bc_time=np.zeros((1,)), bc_pos=self.current_pos,
|
bc_time=np.zeros((1,)) if not self.replanning_schedule else self.current_traj_steps * self.dt,
|
||||||
bc_vel=self.current_vel)
|
bc_pos=self.current_pos, bc_vel=self.current_vel)
|
||||||
# TODO: is this correct for replanning? Do we need to adjust anything here?
|
# TODO: is this correct for replanning? Do we need to adjust anything here?
|
||||||
self.trajectory_generator.set_duration(None if self.sequencing else self.duration, np.array([self.dt]))
|
self.traj_gen.set_duration(None if self.learn_sub_trajectories else self.duration, np.array([self.dt]))
|
||||||
traj_dict = self.trajectory_generator.get_trajs(get_pos=True, get_vel=True)
|
traj_dict = self.traj_gen.get_trajs(get_pos=True, get_vel=True)
|
||||||
trajectory_tensor, velocity_tensor = traj_dict['pos'], traj_dict['vel']
|
trajectory_tensor, velocity_tensor = traj_dict['pos'], traj_dict['vel']
|
||||||
|
|
||||||
return get_numpy(trajectory_tensor), get_numpy(velocity_tensor)
|
return get_numpy(trajectory_tensor), get_numpy(velocity_tensor)
|
||||||
|
|
||||||
def get_traj_gen_action_space(self):
|
def get_traj_gen_action_space(self):
|
||||||
"""This function can be used to set up an individual space for the parameters of the trajectory_generator."""
|
"""This function can be used to set up an individual space for the parameters of the traj_gen."""
|
||||||
min_action_bounds, max_action_bounds = self.trajectory_generator.get_param_bounds()
|
min_action_bounds, max_action_bounds = self.traj_gen.get_param_bounds()
|
||||||
mp_action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
|
mp_action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
|
||||||
dtype=np.float32)
|
dtype=np.float32)
|
||||||
return mp_action_space
|
return mp_action_space
|
||||||
@ -134,8 +140,11 @@ class BlackBoxWrapper(gym.ObservationWrapper, ABC):
|
|||||||
if self.render_kwargs:
|
if self.render_kwargs:
|
||||||
self.render(**self.render_kwargs)
|
self.render(**self.render_kwargs)
|
||||||
|
|
||||||
if done or self.env.do_replanning(self.current_pos, self.current_vel, obs, c_action,
|
if done:
|
||||||
t + 1 + self.current_traj_steps):
|
break
|
||||||
|
|
||||||
|
if self.replanning_schedule and self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action,
|
||||||
|
t + 1 + self.current_traj_steps):
|
||||||
break
|
break
|
||||||
|
|
||||||
infos.update({k: v[:t + 1] for k, v in infos.items()})
|
infos.update({k: v[:t + 1] for k, v in infos.items()})
|
||||||
@ -160,6 +169,7 @@ class BlackBoxWrapper(gym.ObservationWrapper, ABC):
|
|||||||
|
|
||||||
def reset(self, **kwargs):
|
def reset(self, **kwargs):
|
||||||
self.current_traj_steps = 0
|
self.current_traj_steps = 0
|
||||||
|
super(BlackBoxWrapper, self).reset(**kwargs)
|
||||||
|
|
||||||
def plot_trajs(self, des_trajs, des_vels):
|
def plot_trajs(self, des_trajs, des_vels):
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
@ -62,8 +62,8 @@ class RawInterfaceWrapper(gym.Wrapper):
|
|||||||
include other actions like ball releasing time for the beer pong environment.
|
include other actions like ball releasing time for the beer pong environment.
|
||||||
This only needs to be overwritten if the action space is modified.
|
This only needs to be overwritten if the action space is modified.
|
||||||
Args:
|
Args:
|
||||||
action: a vector instance of the whole action space, includes trajectory_generator parameters and additional parameters if
|
action: a vector instance of the whole action space, includes traj_gen parameters and additional parameters if
|
||||||
specified, else only trajectory_generator parameters
|
specified, else only traj_gen parameters
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple: mp_arguments and other arguments
|
Tuple: mp_arguments and other arguments
|
||||||
|
@ -1,10 +1,11 @@
|
|||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from mp_env_api import MPEnvWrapper
|
|
||||||
|
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
|
||||||
|
|
||||||
|
|
||||||
class MPWrapper(MPEnvWrapper):
|
class MPWrapper(RawInterfaceWrapper):
|
||||||
@property
|
@property
|
||||||
def current_vel(self) -> Union[float, int, np.ndarray]:
|
def current_vel(self) -> Union[float, int, np.ndarray]:
|
||||||
return np.array([self.state[1]])
|
return np.array([self.state[1]])
|
||||||
|
@ -1,10 +1,11 @@
|
|||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from mp_env_api import MPEnvWrapper
|
|
||||||
|
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
|
||||||
|
|
||||||
|
|
||||||
class MPWrapper(MPEnvWrapper):
|
class MPWrapper(RawInterfaceWrapper):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def current_vel(self) -> Union[float, int, np.ndarray]:
|
def current_vel(self) -> Union[float, int, np.ndarray]:
|
||||||
@ -13,7 +14,3 @@ class MPWrapper(MPEnvWrapper):
|
|||||||
@property
|
@property
|
||||||
def current_pos(self) -> Union[float, int, np.ndarray]:
|
def current_pos(self) -> Union[float, int, np.ndarray]:
|
||||||
return self.sim.data.qpos[:2]
|
return self.sim.data.qpos[:2]
|
||||||
|
|
||||||
@property
|
|
||||||
def dt(self) -> Union[float, int]:
|
|
||||||
return self.env.dt
|
|
@ -2,10 +2,10 @@ from typing import Union
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from mp_env_api import MPEnvWrapper
|
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
|
||||||
|
|
||||||
|
|
||||||
class MPWrapper(MPEnvWrapper):
|
class MPWrapper(RawInterfaceWrapper):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def active_obs(self):
|
def active_obs(self):
|
||||||
|
@ -1,18 +1,19 @@
|
|||||||
import warnings
|
import warnings
|
||||||
from typing import Iterable, Type, Union, Mapping, MutableMapping
|
from copy import deepcopy
|
||||||
|
from typing import Iterable, Type, Union, MutableMapping
|
||||||
|
|
||||||
import gym
|
import gym
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from gym.envs.registration import EnvSpec
|
from gym.envs.registration import EnvSpec, registry
|
||||||
from mp_pytorch import MPInterface
|
from gym.wrappers import TimeAwareObservation
|
||||||
|
|
||||||
from alr_envs.mp.basis_generator_factory import get_basis_generator
|
from alr_envs.mp.basis_generator_factory import get_basis_generator
|
||||||
from alr_envs.mp.black_box_wrapper import BlackBoxWrapper
|
from alr_envs.mp.black_box_wrapper import BlackBoxWrapper
|
||||||
from alr_envs.mp.controllers.base_controller import BaseController
|
|
||||||
from alr_envs.mp.controllers.controller_factory import get_controller
|
from alr_envs.mp.controllers.controller_factory import get_controller
|
||||||
from alr_envs.mp.mp_factory import get_trajectory_generator
|
from alr_envs.mp.mp_factory import get_trajectory_generator
|
||||||
from alr_envs.mp.phase_generator_factory import get_phase_generator
|
from alr_envs.mp.phase_generator_factory import get_phase_generator
|
||||||
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
|
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
|
||||||
|
from alr_envs.utils.utils import nested_update
|
||||||
|
|
||||||
|
|
||||||
def make_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwargs):
|
def make_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwargs):
|
||||||
@ -41,7 +42,15 @@ def make_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwa
|
|||||||
return f if return_callable else f()
|
return f if return_callable else f()
|
||||||
|
|
||||||
|
|
||||||
def make(env_id: str, seed, **kwargs):
|
def make(env_id, seed, **kwargs):
|
||||||
|
spec = registry.get(env_id)
|
||||||
|
# This access is required to allow for nested dict updates
|
||||||
|
all_kwargs = deepcopy(spec._kwargs)
|
||||||
|
nested_update(all_kwargs, **kwargs)
|
||||||
|
return _make(env_id, seed, **all_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def _make(env_id: str, seed, **kwargs):
|
||||||
"""
|
"""
|
||||||
Converts an env_id to an environment with the gym API.
|
Converts an env_id to an environment with the gym API.
|
||||||
This also works for DeepMind Control Suite interface_wrappers
|
This also works for DeepMind Control Suite interface_wrappers
|
||||||
@ -102,12 +111,12 @@ def _make_wrapped_env(
|
|||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Helper function for creating a wrapped gym environment using MPs.
|
Helper function for creating a wrapped gym environment using MPs.
|
||||||
It adds all provided wrappers to the specified environment and verifies at least one MPEnvWrapper is
|
It adds all provided wrappers to the specified environment and verifies at least one RawInterfaceWrapper is
|
||||||
provided to expose the interface for MPs.
|
provided to expose the interface for MPs.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
env_id: name of the environment
|
env_id: name of the environment
|
||||||
wrappers: list of wrappers (at least an MPEnvWrapper),
|
wrappers: list of wrappers (at least an RawInterfaceWrapper),
|
||||||
seed: seed of environment
|
seed: seed of environment
|
||||||
|
|
||||||
Returns: gym environment with all specified wrappers applied
|
Returns: gym environment with all specified wrappers applied
|
||||||
@ -126,22 +135,20 @@ def _make_wrapped_env(
|
|||||||
return _env
|
return _env
|
||||||
|
|
||||||
|
|
||||||
def make_bb_env(
|
def make_bb(
|
||||||
env_id: str, wrappers: Iterable, black_box_wrapper_kwargs: MutableMapping, traj_gen_kwargs: MutableMapping,
|
env_id: str, wrappers: Iterable, black_box_kwargs: MutableMapping, traj_gen_kwargs: MutableMapping,
|
||||||
controller_kwargs: MutableMapping, phase_kwargs: MutableMapping, basis_kwargs: MutableMapping, seed=1,
|
controller_kwargs: MutableMapping, phase_kwargs: MutableMapping, basis_kwargs: MutableMapping, seed=1,
|
||||||
sequenced=False, **kwargs):
|
**kwargs):
|
||||||
"""
|
"""
|
||||||
This can also be used standalone for manually building a custom DMP environment.
|
This can also be used standalone for manually building a custom DMP environment.
|
||||||
Args:
|
Args:
|
||||||
black_box_wrapper_kwargs: kwargs for the black-box wrapper
|
black_box_kwargs: kwargs for the black-box wrapper
|
||||||
basis_kwargs: kwargs for the basis generator
|
basis_kwargs: kwargs for the basis generator
|
||||||
phase_kwargs: kwargs for the phase generator
|
phase_kwargs: kwargs for the phase generator
|
||||||
controller_kwargs: kwargs for the tracking controller
|
controller_kwargs: kwargs for the tracking controller
|
||||||
env_id: base_env_name,
|
env_id: base_env_name,
|
||||||
wrappers: list of wrappers (at least an BlackBoxWrapper),
|
wrappers: list of wrappers (at least an BlackBoxWrapper),
|
||||||
seed: seed of environment
|
seed: seed of environment
|
||||||
sequenced: When true, this allows to sequence multiple ProMPs by specifying the duration of each sub-trajectory,
|
|
||||||
this behavior is much closer to step based learning.
|
|
||||||
traj_gen_kwargs: dict of at least {num_dof: int, num_basis: int} for DMP
|
traj_gen_kwargs: dict of at least {num_dof: int, num_basis: int} for DMP
|
||||||
|
|
||||||
Returns: DMP wrapped gym env
|
Returns: DMP wrapped gym env
|
||||||
@ -150,19 +157,33 @@ def make_bb_env(
|
|||||||
_verify_time_limit(traj_gen_kwargs.get("duration", None), kwargs.get("time_limit", None))
|
_verify_time_limit(traj_gen_kwargs.get("duration", None), kwargs.get("time_limit", None))
|
||||||
_env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed, **kwargs)
|
_env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed, **kwargs)
|
||||||
|
|
||||||
if black_box_wrapper_kwargs.get('duration', None) is None:
|
learn_sub_trajs = black_box_kwargs.get('learn_sub_trajectories')
|
||||||
black_box_wrapper_kwargs['duration'] = _env.spec.max_episode_steps * _env.dt
|
do_replanning = black_box_kwargs.get('replanning_schedule')
|
||||||
if phase_kwargs.get('tau', None) is None:
|
if learn_sub_trajs and do_replanning:
|
||||||
phase_kwargs['tau'] = black_box_wrapper_kwargs['duration']
|
raise ValueError('Cannot used sub-trajectory learning and replanning together.')
|
||||||
|
|
||||||
|
if learn_sub_trajs or do_replanning:
|
||||||
|
# add time_step observation when replanning
|
||||||
|
kwargs['wrappers'].append(TimeAwareObservation)
|
||||||
|
|
||||||
traj_gen_kwargs['action_dim'] = traj_gen_kwargs.get('action_dim', np.prod(_env.action_space.shape).item())
|
traj_gen_kwargs['action_dim'] = traj_gen_kwargs.get('action_dim', np.prod(_env.action_space.shape).item())
|
||||||
|
|
||||||
|
if black_box_kwargs.get('duration') is None:
|
||||||
|
black_box_kwargs['duration'] = _env.spec.max_episode_steps * _env.dt
|
||||||
|
if phase_kwargs.get('tau') is None:
|
||||||
|
phase_kwargs['tau'] = black_box_kwargs['duration']
|
||||||
|
|
||||||
|
if learn_sub_trajs is not None:
|
||||||
|
# We have to learn the length when learning sub_trajectories trajectories
|
||||||
|
phase_kwargs['learn_tau'] = True
|
||||||
|
|
||||||
phase_gen = get_phase_generator(**phase_kwargs)
|
phase_gen = get_phase_generator(**phase_kwargs)
|
||||||
basis_gen = get_basis_generator(phase_generator=phase_gen, **basis_kwargs)
|
basis_gen = get_basis_generator(phase_generator=phase_gen, **basis_kwargs)
|
||||||
controller = get_controller(**controller_kwargs)
|
controller = get_controller(**controller_kwargs)
|
||||||
traj_gen = get_trajectory_generator(basis_generator=basis_gen, **traj_gen_kwargs)
|
traj_gen = get_trajectory_generator(basis_generator=basis_gen, **traj_gen_kwargs)
|
||||||
|
|
||||||
bb_env = BlackBoxWrapper(_env, trajectory_generator=traj_gen, tracking_controller=controller,
|
bb_env = BlackBoxWrapper(_env, trajectory_generator=traj_gen, tracking_controller=controller,
|
||||||
**black_box_wrapper_kwargs)
|
**black_box_kwargs)
|
||||||
|
|
||||||
return bb_env
|
return bb_env
|
||||||
|
|
||||||
@ -204,16 +225,16 @@ def make_bb_env_helper(**kwargs):
|
|||||||
wrappers = kwargs.pop("wrappers")
|
wrappers = kwargs.pop("wrappers")
|
||||||
|
|
||||||
traj_gen_kwargs = kwargs.pop("traj_gen_kwargs", {})
|
traj_gen_kwargs = kwargs.pop("traj_gen_kwargs", {})
|
||||||
black_box_kwargs = kwargs.pop('black_box_wrapper_kwargs', {})
|
black_box_kwargs = kwargs.pop('black_box_kwargs', {})
|
||||||
contr_kwargs = kwargs.pop("controller_kwargs", {})
|
contr_kwargs = kwargs.pop("controller_kwargs", {})
|
||||||
phase_kwargs = kwargs.pop("phase_generator_kwargs", {})
|
phase_kwargs = kwargs.pop("phase_generator_kwargs", {})
|
||||||
basis_kwargs = kwargs.pop("basis_generator_kwargs", {})
|
basis_kwargs = kwargs.pop("basis_generator_kwargs", {})
|
||||||
|
|
||||||
return make_bb_env(env_id=kwargs.pop("name"), wrappers=wrappers,
|
return make_bb(env_id=kwargs.pop("name"), wrappers=wrappers,
|
||||||
black_box_wrapper_kwargs=black_box_kwargs,
|
black_box_kwargs=black_box_kwargs,
|
||||||
traj_gen_kwargs=traj_gen_kwargs, controller_kwargs=contr_kwargs,
|
traj_gen_kwargs=traj_gen_kwargs, controller_kwargs=contr_kwargs,
|
||||||
phase_kwargs=phase_kwargs,
|
phase_kwargs=phase_kwargs,
|
||||||
basis_kwargs=basis_kwargs, **kwargs, seed=seed)
|
basis_kwargs=basis_kwargs, **kwargs, seed=seed)
|
||||||
|
|
||||||
|
|
||||||
def _verify_time_limit(mp_time_limit: Union[None, float], env_time_limit: Union[None, float]):
|
def _verify_time_limit(mp_time_limit: Union[None, float], env_time_limit: Union[None, float]):
|
||||||
@ -224,7 +245,7 @@ def _verify_time_limit(mp_time_limit: Union[None, float], env_time_limit: Union[
|
|||||||
It can be found in the BaseMP class.
|
It can be found in the BaseMP class.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
mp_time_limit: max trajectory length of trajectory_generator in seconds
|
mp_time_limit: max trajectory length of traj_gen in seconds
|
||||||
env_time_limit: max trajectory length of DMC environment in seconds
|
env_time_limit: max trajectory length of DMC environment in seconds
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
from collections import Mapping, MutableMapping
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch as ch
|
import torch as ch
|
||||||
|
|
||||||
@ -23,4 +25,24 @@ def angle_normalize(x, type="deg"):
|
|||||||
|
|
||||||
|
|
||||||
def get_numpy(x: ch.Tensor):
|
def get_numpy(x: ch.Tensor):
|
||||||
|
"""
|
||||||
|
Returns numpy array from torch tensor
|
||||||
|
Args:
|
||||||
|
x:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
return x.detach().cpu().numpy()
|
return x.detach().cpu().numpy()
|
||||||
|
|
||||||
|
|
||||||
|
def nested_update(base: MutableMapping, update):
|
||||||
|
"""
|
||||||
|
Updated method for nested Mappings
|
||||||
|
Args:
|
||||||
|
base: main Mapping to be updated
|
||||||
|
update: updated values for base Mapping
|
||||||
|
|
||||||
|
"""
|
||||||
|
for k, v in update.items():
|
||||||
|
base[k] = nested_update(base.get(k, {}), v) if isinstance(v, Mapping) else v
|
||||||
|
Loading…
Reference in New Issue
Block a user