diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py index cf009a4..3d2415c 100644 --- a/alr_envs/alr/__init__.py +++ b/alr_envs/alr/__init__.py @@ -24,7 +24,7 @@ ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []} DEFAULT_BB_DICT = { "name": 'EnvName', "wrappers": [], - "traj_gen_kwargs": { + "trajectory_generator_kwargs": { 'trajectory_generator_type': 'promp' }, "phase_generator_kwargs": { @@ -231,7 +231,6 @@ register( entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvStepBased', max_episode_steps=300, kwargs={ - "rndm_goal": True, "cup_goal_pos": [-0.3, -1.2], "frame_skip": 2 } @@ -243,7 +242,6 @@ register( entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvFixedReleaseStep', max_episode_steps=300, kwargs={ - "rndm_goal": True, "cup_goal_pos": [-0.3, -1.2], "frame_skip": 2 } @@ -355,7 +353,7 @@ for _v in _versions: _env_id = f'{_name[0]}ProMP-{_name[1]}' kwargs_dict_hole_reacher_promp = deepcopy(DEFAULT_BB_DICT) kwargs_dict_hole_reacher_promp['wrappers'].append(classic_control.hole_reacher.MPWrapper) - kwargs_dict_hole_reacher_promp['traj_gen_kwargs']['weight_scale'] = 2 + kwargs_dict_hole_reacher_promp['trajectory_generator_kwargs']['weight_scale'] = 2 kwargs_dict_hole_reacher_promp['controller_kwargs']['controller_type'] = 'velocity' kwargs_dict_hole_reacher_promp['name'] = f"alr_envs:{_v}" register( diff --git a/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py b/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py index 19bb0c5..85bc784 100644 --- a/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py +++ b/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py @@ -2,12 +2,12 @@ from typing import Tuple, Union import numpy as np -from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper +from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper class MPWrapper(RawInterfaceWrapper): - def get_context_mask(self): + def context_mask(self): return np.hstack([ [self.env.random_start] * self.env.n_links, # cos [self.env.random_start] * self.env.n_links, # sin @@ -25,3 +25,7 @@ class MPWrapper(RawInterfaceWrapper): @property def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: return self.env.current_vel + + @property + def dt(self) -> Union[float, int]: + return self.env.dt diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py index dfd6ea4..117003f 100644 --- a/alr_envs/alr/mujoco/beerpong/beerpong.py +++ b/alr_envs/alr/mujoco/beerpong/beerpong.py @@ -19,12 +19,9 @@ CUP_POS_MAX = np.array([1.42, -1.25]) # CUP_POS_MAX = np.array([0.16, -1.7]) class ALRBeerBongEnv(MujocoEnv, utils.EzPickle): - def __init__( - self, frame_skip=1, apply_gravity_comp=True, noisy=False, - rndm_goal=False, cup_goal_pos=None - ): + def __init__(self, frame_skip=2, apply_gravity_comp=True): - cup_goal_pos = np.array(cup_goal_pos if cup_goal_pos is not None else [-0.3, -1.2, 0.840]) + cup_goal_pos = np.array([-0.3, -1.2, 0.840]) if cup_goal_pos.shape[0] == 2: cup_goal_pos = np.insert(cup_goal_pos, 2, 0.840) self.cup_goal_pos = np.array(cup_goal_pos) @@ -38,9 +35,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle): self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7]) self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7]) - self.rndm_goal = rndm_goal self.apply_gravity_comp = apply_gravity_comp - self.add_noise = noisy self._start_pos = np.array([0.0, 1.35, 0.0, 1.18, 0.0, -0.786, -1.59]) self._start_vel = np.zeros(7) @@ -48,17 +43,13 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle): self.ball_site_id = 0 self.ball_id = 11 - # self._release_step = 175 # time step of ball release - # self._release_step = 130 # time step of ball release self.release_step = 100 # time step of ball release self.ep_length = 600 // frame_skip self.cup_table_id = 10 - if noisy: - self.noise_std = 0.01 - else: - self.noise_std = 0 + self.add_noise = False + reward_function = BeerPongReward self.reward_function = reward_function() self.repeat_action = frame_skip @@ -74,7 +65,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle): return self._start_vel def reset(self): - self.reward_function.reset(self.add_noise) + self.reward_function.reset() return super().reset() def reset_model(self): @@ -88,15 +79,13 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle): start_pos[0:7] = init_pos_robot self.set_state(start_pos, init_vel) - self.sim.model.body_pos[self.cup_table_id] = self.cup_goal_pos start_pos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy() self.set_state(start_pos, init_vel) - if self.rndm_goal: - xy = self.np_random.uniform(CUP_POS_MIN, CUP_POS_MAX) - xyz = np.zeros(3) - xyz[:2] = xy - xyz[-1] = 0.840 - self.sim.model.body_pos[self.cup_table_id] = xyz + xy = self.np_random.uniform(CUP_POS_MIN, CUP_POS_MAX) + xyz = np.zeros(3) + xyz[:2] = xy + xyz[-1] = 0.840 + self.sim.model.body_pos[self.cup_table_id] = xyz return self._get_obs() def step(self, a): @@ -115,12 +104,9 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle): if self._steps < self.release_step: self.sim.data.qpos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy() self.sim.data.qvel[7::] = self.sim.data.site_xvelp[self.ball_site_id, :].copy() - elif self._steps == self.release_step and self.add_noise: - self.sim.data.qvel[7::] += self.noise_std * np.random.randn(3) crash = False except mujoco_py.builder.MujocoException: crash = True - # joint_cons_viol = self.check_traj_in_joint_limits() ob = self._get_obs() @@ -184,14 +170,14 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle): class ALRBeerBongEnvFixedReleaseStep(ALRBeerBongEnv): - def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None): - super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos) + def __init__(self, frame_skip=2, apply_gravity_comp=True): + super().__init__(frame_skip, apply_gravity_comp) self.release_step = 62 # empirically evaluated for frame_skip=2! class ALRBeerBongEnvStepBasedEpisodicReward(ALRBeerBongEnv): - def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None): - super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos) + def __init__(self, frame_skip=2, apply_gravity_comp=True): + super().__init__(frame_skip, apply_gravity_comp) self.release_step = 62 # empirically evaluated for frame_skip=2! def step(self, a): @@ -245,18 +231,21 @@ class ALRBeerBongEnvStepBasedEpisodicReward(ALRBeerBongEnv): if __name__ == "__main__": - # env = ALRBeerBongEnv(rndm_goal=True) - # env = ALRBeerBongEnvStepBased(frame_skip=2, rndm_goal=True) - # env = ALRBeerBongEnvStepBasedEpisodicReward(frame_skip=2, rndm_goal=True) - env = ALRBeerBongEnvFixedReleaseStep(frame_skip=2, rndm_goal=True) + env = ALRBeerBongEnv(frame_skip=2) + # env = ALRBeerBongEnvStepBased(frame_skip=2) + # env = ALRBeerBongEnvStepBasedEpisodicReward(frame_skip=2) + # env = ALRBeerBongEnvFixedReleaseStep(frame_skip=2) import time env.reset() env.render("human") for i in range(1500): - ac = 10 * env.action_space.sample() + # ac = 10 * env.action_space.sample() + ac = np.ones(7) # ac = np.zeros(7) - # ac[0] = -1 + # ac[0] = 0 + # ac[1] = -0.01 + # ac[3] = -0.01 # if env._steps > 150: # ac[0] = 1 obs, rew, d, info = env.step(ac) diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py index 7e5fda6..cd2ab75 100644 --- a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py +++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py @@ -32,6 +32,21 @@ class BeerPongReward: self.dists = None self.dists_final = None self.action_costs = None + self.ball_ground_contact_first = False + self.ball_table_contact = False + self.ball_wall_contact = False + self.ball_cup_contact = False + self.ball_in_cup = False + self.dist_ground_cup = -1 # distance floor to cup if first floor contact + + ### IDs + self.ball_collision_id = None + self.table_collision_id = None + self.wall_collision_id = None + self.cup_table_collision_id = None + self.ground_collision_id = None + self.cup_collision_ids = None + self.robot_collision_ids = None self.reset() self.is_initialized = False @@ -50,25 +65,27 @@ class BeerPongReward: if not self.is_initialized: self.is_initialized = True - self.ball_id = env.sim.model._body_name2id["ball"] - self.goal_id = env.sim.model._site_name2id["cup_goal_table"] - self.goal_final_id = env.sim.model._site_name2id["cup_goal_final_table"] - self.cup_table_id = env.sim.model._body_name2id["cup_table"] - - self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"] - self.table_collision_id = env.sim.model._geom_name2id["table_contact_geom"] - self.wall_collision_id = env.sim.model._geom_name2id["wall"] - self.cup_table_collision_id = env.sim.model._geom_name2id["cup_base_table_contact"] - self.ground_collision_id = env.sim.model._geom_name2id["ground"] - self.cup_collision_ids = [env.sim.model._geom_name2id[name] for name in self.cup_collision_objects] - self.robot_collision_ids = [env.sim.model._geom_name2id[name] for name in self.robot_collision_objects] + # TODO: Find a more elegant way to acces to the geom ids in each step -> less code + self.ball_collision_id = {env.model.geom_name2id("ball_geom")} + # self.ball_collision_id = env.model.geom_name2id("ball_geom") + self.table_collision_id = {env.model.geom_name2id("table_contact_geom")} + # self.table_collision_id = env.model.geom_name2id("table_contact_geom") + self.wall_collision_id = {env.model.geom_name2id("wall")} + # self.wall_collision_id = env.model.geom_name2id("wall") + self.cup_table_collision_id = {env.model.geom_name2id("cup_base_table_contact")} + # self.cup_table_collision_id = env.model.geom_name2id("cup_base_table_contact") + self.ground_collision_id = {env.model.geom_name2id("ground")} + # self.ground_collision_id = env.model.geom_name2id("ground") + self.cup_collision_ids = set(env.model.geom_name2id(name) for name in self.cup_collision_objects) + # self.cup_collision_ids = [env.model.geom_name2id(name) for name in self.cup_collision_objects] + self.robot_collision_ids = [env.model.geom_name2id(name) for name in self.robot_collision_objects] def compute_reward(self, env, action): - goal_pos = env.sim.data.site_xpos[self.goal_id] - ball_pos = env.sim.data.body_xpos[self.ball_id] - ball_vel = env.sim.data.body_xvelp[self.ball_id] - goal_final_pos = env.sim.data.site_xpos[self.goal_final_id] + goal_pos = env.data.get_site_xpos("cup_goal_table") + ball_pos = env.data.get_body_xpos("ball") + ball_vel = env.data.get_body_xvelp("ball") + goal_final_pos = env.data.get_site_xpos("cup_goal_final_table") self.check_contacts(env.sim) self.dists.append(np.linalg.norm(goal_pos - ball_pos)) @@ -77,16 +94,16 @@ class BeerPongReward: if self.ball_ground_contact_first and self.dist_ground_cup == -1 else self.dist_ground_cup action_cost = np.sum(np.square(action)) self.action_costs.append(np.copy(action_cost)) - # # ##################### Reward function which does not force to bounce once on the table (quad dist) ############ + # # ##################### Reward function which does not force to bounce once on the table (quad dist) ######### - self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids) + # Comment Onur: Is this needed? + # self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids) - if env._steps == env.ep_length - 1 or self._is_collided: + if env._steps == env.ep_length - 1:# or self._is_collided: min_dist = np.min(self.dists) final_dist = self.dists_final[-1] if self.ball_ground_contact_first: - min_dist_coeff, final_dist_coeff, ground_contact_dist_coeff, rew_offset = 1, 0.5, 2, -4 # relative rew offset when first bounding on ground - # min_dist_coeff, final_dist_coeff, ground_contact_dist_coeff, rew_offset = 1, 0.5, 0, -6 # absolute rew offset when first bouncing on ground + min_dist_coeff, final_dist_coeff, ground_contact_dist_coeff, rew_offset = 1, 0.5, 2, -4 else: if not self.ball_in_cup: if not self.ball_table_contact and not self.ball_cup_contact and not self.ball_wall_contact: @@ -95,85 +112,74 @@ class BeerPongReward: min_dist_coeff, final_dist_coeff, ground_contact_dist_coeff, rew_offset = 1, 0.5, 0, -2 else: min_dist_coeff, final_dist_coeff, ground_contact_dist_coeff, rew_offset = 0, 1, 0, 0 - # dist_ground_cup = 1 * self.dist_ground_cup action_cost = 1e-4 * np.mean(action_cost) reward = rew_offset - min_dist_coeff * min_dist ** 2 - final_dist_coeff * final_dist ** 2 - \ action_cost - ground_contact_dist_coeff * self.dist_ground_cup ** 2 - # 1e-7*np.mean(action_cost) # release step punishment min_time_bound = 0.1 max_time_bound = 1.0 release_time = env.release_step * env.dt - release_time_rew = int(release_time < min_time_bound) * (-30 - 10 * (release_time - min_time_bound) ** 2) \ - + int(release_time > max_time_bound) * (-30 - 10 * (release_time - max_time_bound) ** 2) + release_time_rew = int(release_time < min_time_bound) * (-30 - 10 * (release_time - min_time_bound) ** 2) + \ + int(release_time > max_time_bound) * (-30 - 10 * (release_time - max_time_bound) ** 2) reward += release_time_rew success = self.ball_in_cup - # print('release time :', release_time) - # print('dist_ground_cup :', dist_ground_cup) else: action_cost = 1e-2 * action_cost reward = - action_cost - # reward = - 1e-4 * action_cost - # reward = 0 success = False - # ################################################################################################################ - infos = {} - infos["success"] = success - infos["is_collided"] = self._is_collided - infos["ball_pos"] = ball_pos.copy() - infos["ball_vel"] = ball_vel.copy() - infos["action_cost"] = action_cost - infos["task_reward"] = reward + # ############################################################################################################## + infos = {"success": success, "ball_pos": ball_pos.copy(), + "ball_vel": ball_vel.copy(), "action_cost": action_cost, "task_reward": reward, "is_collided": False} # TODO: Check if is collided is needed return reward, infos def check_contacts(self, sim): if not self.ball_table_contact: - self.ball_table_contact = self._check_collision_single_objects(sim, self.ball_collision_id, - self.table_collision_id) + self.ball_table_contact = self._check_collision(sim, self.ball_collision_id, self.table_collision_id) if not self.ball_cup_contact: - self.ball_cup_contact = self._check_collision_with_set_of_objects(sim, self.ball_collision_id, - self.cup_collision_ids) + self.ball_cup_contact = self._check_collision(sim, self.ball_collision_id, self.cup_collision_ids) if not self.ball_wall_contact: - self.ball_wall_contact = self._check_collision_single_objects(sim, self.ball_collision_id, - self.wall_collision_id) + self.ball_wall_contact = self._check_collision(sim, self.ball_collision_id, self.wall_collision_id) if not self.ball_in_cup: - self.ball_in_cup = self._check_collision_single_objects(sim, self.ball_collision_id, - self.cup_table_collision_id) + self.ball_in_cup = self._check_collision(sim, self.ball_collision_id, self.cup_table_collision_id) if not self.ball_ground_contact_first: - if not self.ball_table_contact and not self.ball_cup_contact and not self.ball_wall_contact and not self.ball_in_cup: - self.ball_ground_contact_first = self._check_collision_single_objects(sim, self.ball_collision_id, - self.ground_collision_id) + if not self.ball_table_contact and not self.ball_cup_contact and not self.ball_wall_contact \ + and not self.ball_in_cup: + self.ball_ground_contact_first = self._check_collision(sim, self.ball_collision_id, + self.ground_collision_id) - def _check_collision_single_objects(self, sim, id_1, id_2): - for coni in range(0, sim.data.ncon): + # Checks if id_set1 has a collision with id_set2 + def _check_collision(self, sim, id_set_1, id_set_2): + """ + If id_set_2 is set to None, it will check for a collision with itself (id_set_1). + """ + collision_id_set = id_set_2 - id_set_1 if id_set_2 is not None else id_set_1 + for coni in range(sim.data.ncon): con = sim.data.contact[coni] - - collision = con.geom1 == id_1 and con.geom2 == id_2 - collision_trans = con.geom1 == id_2 and con.geom2 == id_1 - - if collision or collision_trans: + if ((con.geom1 in id_set_1 and con.geom2 in collision_id_set) or + (con.geom2 in id_set_1 and con.geom1 in collision_id_set)): return True return False - def _check_collision_with_itself(self, sim, collision_ids): - col_1, col_2 = False, False - for j, id in enumerate(collision_ids): - col_1 = self._check_collision_with_set_of_objects(sim, id, collision_ids[:j]) - if j != len(collision_ids) - 1: - col_2 = self._check_collision_with_set_of_objects(sim, id, collision_ids[j + 1:]) - else: - col_2 = False - collision = True if col_1 or col_2 else False - return collision + # def _check_collision_with_itself(self, sim, collision_ids): + # col_1, col_2 = False, False + # for j, id in enumerate(collision_ids): + # col_1 = self._check_collision_with_set_of_objects(sim, id, collision_ids[:j]) + # if j != len(collision_ids) - 1: + # col_2 = self._check_collision_with_set_of_objects(sim, id, collision_ids[j + 1:]) + # else: + # col_2 = False + # collision = True if col_1 or col_2 else False + # return collision - def _check_collision_with_set_of_objects(self, sim, id_1, id_list): - for coni in range(0, sim.data.ncon): - con = sim.data.contact[coni] - - collision = con.geom1 in id_list and con.geom2 == id_1 - collision_trans = con.geom1 == id_1 and con.geom2 in id_list - - if collision or collision_trans: - return True - return False + ### This function will not be needed if we really do not need to check for collision with itself + # def _check_collision_with_set_of_objects(self, sim, id_1, id_list): + # for coni in range(0, sim.data.ncon): + # con = sim.data.contact[coni] + # + # collision = con.geom1 in id_list and con.geom2 == id_1 + # collision_trans = con.geom1 == id_1 and con.geom2 in id_list + # + # if collision or collision_trans: + # return True + # return False diff --git a/alr_envs/alr/mujoco/reacher/__init__.py b/alr_envs/alr/mujoco/reacher/__init__.py index 8a04a02..c5e6d2f 100644 --- a/alr_envs/alr/mujoco/reacher/__init__.py +++ b/alr_envs/alr/mujoco/reacher/__init__.py @@ -1 +1 @@ -from .new_mp_wrapper import MPWrapper +from .mp_wrapper import MPWrapper diff --git a/alr_envs/utils/make_env_helpers.py b/alr_envs/utils/make_env_helpers.py index 3a8eec5..b5587a7 100644 --- a/alr_envs/utils/make_env_helpers.py +++ b/alr_envs/utils/make_env_helpers.py @@ -7,12 +7,12 @@ import numpy as np from gym.envs.registration import EnvSpec, registry from gym.wrappers import TimeAwareObservation -from alr_envs.black_box.factory.basis_generator_factory import get_basis_generator -from alr_envs.black_box.black_box_wrapper import BlackBoxWrapper -from alr_envs.black_box.controller.controller_factory import get_controller -from alr_envs.black_box.factory.trajectory_generator_factory import get_trajectory_generator -from alr_envs.black_box.factory.phase_generator_factory import get_phase_generator -from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper +from alr_envs.mp.basis_generator_factory import get_basis_generator +from alr_envs.mp.black_box_wrapper import BlackBoxWrapper +from alr_envs.mp.controllers.controller_factory import get_controller +from alr_envs.mp.mp_factory import get_trajectory_generator +from alr_envs.mp.phase_generator_factory import get_phase_generator +from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper from alr_envs.utils.utils import nested_update @@ -46,7 +46,6 @@ def make(env_id, seed, **kwargs): spec = registry.get(env_id) # This access is required to allow for nested dict updates all_kwargs = deepcopy(spec._kwargs) - # TODO append wrapper here nested_update(all_kwargs, **kwargs) return _make(env_id, seed, **all_kwargs) @@ -225,8 +224,8 @@ def make_bb_env_helper(**kwargs): seed = kwargs.pop("seed", None) wrappers = kwargs.pop("wrappers") - black_box_kwargs = kwargs.pop('black_box_kwargs', {}) traj_gen_kwargs = kwargs.pop("traj_gen_kwargs", {}) + black_box_kwargs = kwargs.pop('black_box_kwargs', {}) contr_kwargs = kwargs.pop("controller_kwargs", {}) phase_kwargs = kwargs.pop("phase_generator_kwargs", {}) basis_kwargs = kwargs.pop("basis_generator_kwargs", {})