diff --git a/README.md b/README.md index 4042450..edd1aac 100644 --- a/README.md +++ b/README.md @@ -81,7 +81,7 @@ trajectory. ```python import alr_envs -env = alr_envs.make('HoleReacherDetPMP-v0', seed=1) +env = alr_envs.make('HoleReacherProMP-v0', seed=1) # render() can be called once in the beginning with all necessary arguments. To turn it of again just call render(None). env.render() @@ -95,7 +95,7 @@ for i in range(5): ``` To show all available environments, we provide some additional convenience. Each value will return a dictionary with two -keys `DMP` and `DetPMP` that store a list of available environment names. +keys `DMP` and `ProMP` that store a list of available environment names. ```python import alr_envs @@ -193,7 +193,7 @@ mp_kwargs = {...} kwargs = {...} env = alr_envs.make_dmp_env(base_env_id, wrappers=wrappers, seed=1, mp_kwargs=mp_kwargs, **kwargs) # OR for a deterministic ProMP (other mp_kwargs are required): -# env = alr_envs.make_detpmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_args) +# env = alr_envs.make_promp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_args) rewards = 0 obs = env.reset() diff --git a/alr_envs/__init__.py b/alr_envs/__init__.py index 30fa7b8..858a66c 100644 --- a/alr_envs/__init__.py +++ b/alr_envs/__init__.py @@ -1,5 +1,5 @@ from alr_envs import dmc, meta, open_ai -from alr_envs.utils.make_env_helpers import make, make_detpmp_env, make_dmp_env, make_promp_env, make_rank +from alr_envs.utils.make_env_helpers import make, make_dmp_env, make_promp_env, make_rank from alr_envs.utils import make_dmc # Convenience function for all MP environments diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py index 7521ccf..90ec78c 100644 --- a/alr_envs/alr/__init__.py +++ b/alr_envs/alr/__init__.py @@ -10,7 +10,9 @@ from .mujoco.ball_in_a_cup.biac_pd import ALRBallInACupPDEnv from .mujoco.reacher.alr_reacher import ALRReacherEnv from .mujoco.reacher.balancing import BalancingEnv -ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": [], "DetPMP": []} +from alr_envs.alr.mujoco.table_tennis.tt_gym import MAX_EPISODE_STEPS + +ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []} # Classic Control ## Simple Reacher @@ -195,16 +197,20 @@ register( ) ## Table Tennis -from alr_envs.alr.mujoco.table_tennis.tt_gym import MAX_EPISODE_STEPS register(id='TableTennis2DCtxt-v0', - entry_point='alr_envs.alr.mujoco:TT_Env_Gym', + entry_point='alr_envs.alr.mujoco:TTEnvGym', max_episode_steps=MAX_EPISODE_STEPS, - kwargs={'ctxt_dim':2}) + kwargs={'ctxt_dim': 2}) + +register(id='TableTennis2DCtxt-v1', + entry_point='alr_envs.alr.mujoco:TTEnvGym', + max_episode_steps=MAX_EPISODE_STEPS, + kwargs={'ctxt_dim': 2, 'fixed_goal': True}) register(id='TableTennis4DCtxt-v0', - entry_point='alr_envs.alr.mujoco:TT_Env_Gym', + entry_point='alr_envs.alr.mujoco:TTEnvGym', max_episode_steps=MAX_EPISODE_STEPS, - kwargs={'ctxt_dim':4}) + kwargs={'ctxt_dim': 4}) ## BeerPong difficulties = ["simple", "intermediate", "hard", "hardest"] @@ -240,8 +246,12 @@ for _v in _versions: "duration": 2, "alpha_phase": 2, "learn_goal": True, - "policy_type": "velocity", + "policy_type": "motor", "weights_scale": 50, + "policy_kwargs": { + "p_gains": .6, + "d_gains": .075 + } } } ) @@ -260,33 +270,16 @@ for _v in _versions: "duration": 2, "policy_type": "motor", "weights_scale": 1, - "zero_start": True + "zero_start": True, + "policy_kwargs": { + "p_gains": .6, + "d_gains": .075 + } } } ) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - _env_id = f'{_name[0]}DetPMP-{_name[1]}' - register( - id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', - # max_episode_steps=1, - kwargs={ - "name": f"alr_envs:{_v}", - "wrappers": [classic_control.simple_reacher.MPWrapper], - "mp_kwargs": { - "num_dof": 2 if "long" not in _v.lower() else 5, - "num_basis": 5, - "duration": 2, - "width": 0.025, - "policy_type": "velocity", - "weights_scale": 0.2, - "zero_start": True - } - } - ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append(_env_id) - # Viapoint reacher register( id='ViaPointReacherDMP-v0', @@ -318,7 +311,7 @@ register( "num_dof": 5, "num_basis": 5, "duration": 2, - "policy_type": "motor", + "policy_type": "velocity", "weights_scale": 1, "zero_start": True } @@ -326,26 +319,6 @@ register( ) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ViaPointReacherProMP-v0") -register( - id='ViaPointReacherDetPMP-v0', - entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', - # max_episode_steps=1, - kwargs={ - "name": "alr_envs:ViaPointReacher-v0", - "wrappers": [classic_control.viapoint_reacher.MPWrapper], - "mp_kwargs": { - "num_dof": 5, - "num_basis": 5, - "duration": 2, - "width": 0.025, - "policy_type": "velocity", - "weights_scale": 0.2, - "zero_start": True - } - } -) -ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append("ViaPointReacherDetPMP-v0") - ## Hole Reacher _versions = ["v0", "v1", "v2"] for _v in _versions: @@ -391,71 +364,77 @@ for _v in _versions: ) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - _env_id = f'HoleReacherDetPMP-{_v}' +## Beerpong +_versions = ["v0", "v1", "v2", "v3"] +for _v in _versions: + _env_id = f'BeerpongProMP-{_v}' register( id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', kwargs={ - "name": f"alr_envs:HoleReacher-{_v}", - "wrappers": [classic_control.hole_reacher.MPWrapper], + "name": f"alr_envs:ALRBeerPong-{_v}", + "wrappers": [mujoco.beerpong.MPWrapper], "mp_kwargs": { - "num_dof": 5, - "num_basis": 5, - "duration": 2, - "width": 0.025, - "policy_type": "velocity", - "weights_scale": 0.2, - "zero_start": True + "num_dof": 7, + "num_basis": 2, + "duration": 1, + "post_traj_time": 2, + "policy_type": "motor", + "weights_scale": 1, + "zero_start": True, + "zero_goal": False, + "policy_kwargs": { + "p_gains": np.array([ 1.5, 5, 2.55, 3, 2., 2, 1.25]), + "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]) + } } } ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append(_env_id) - -## Beerpong -register( - id='BeerpongDetPMP-v0', - entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', - kwargs={ - "name": "alr_envs:ALRBeerPong-v0", - "wrappers": [mujoco.beerpong.MPWrapper], - "mp_kwargs": { - "num_dof": 7, - "num_basis": 2, - "n_zero_bases": 2, - "duration": 0.5, - "post_traj_time": 2.5, - "width": 0.01, - "off": 0.01, - "policy_type": "motor", - "weights_scale": 0.08, - "zero_start": True, - "zero_goal": False, - "policy_kwargs": { - "p_gains": np.array([ 1.5, 5, 2.55, 3, 2., 2, 1.25]), - "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]) - } - } - } -) -ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append("BeerpongDetPMP-v0") + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ## Table Tennis +ctxt_dim = [2, 4] +for _v, cd in enumerate(ctxt_dim): + _env_id = f'TableTennisProMP-v{_v}' + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', + kwargs={ + "name": "alr_envs:TableTennis{}DCtxt-v0".format(cd), + "wrappers": [mujoco.table_tennis.MPWrapper], + "mp_kwargs": { + "num_dof": 7, + "num_basis": 2, + "duration": 1.25, + "post_traj_time": 4.5, + "policy_type": "motor", + "weights_scale": 1.0, + "zero_start": True, + "zero_goal": False, + "policy_kwargs": { + "p_gains": 0.5*np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]), + "d_gains": 0.5*np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1]) + } + } + } + ) + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + register( - id='TableTennisDetPMP-v0', - entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + id='TableTennisProMP-v2', + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', kwargs={ - "name": "alr_envs:TableTennis4DCtxt-v0", + "name": "alr_envs:TableTennis2DCtxt-v1", "wrappers": [mujoco.table_tennis.MPWrapper], "mp_kwargs": { "num_dof": 7, "num_basis": 2, - "n_zero_bases": 2, - "duration": 1.25, - "post_traj_time": 4.5, - "width": 0.01, - "off": 0.01, + "duration": 1., + "post_traj_time": 2.5, "policy_type": "motor", - "weights_scale": 1.0, + "weights_scale": 1, + "off": -0.05, + "bandwidth_factor": 3.5, "zero_start": True, "zero_goal": False, "policy_kwargs": { @@ -465,4 +444,4 @@ register( } } ) -ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append("TableTennisDetPMP-v0") \ No newline at end of file +ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("TableTennisProMP-v2") diff --git a/alr_envs/alr/classic_control/README.MD b/alr_envs/alr/classic_control/README.MD index ebe2101..bd1b68b 100644 --- a/alr_envs/alr/classic_control/README.MD +++ b/alr_envs/alr/classic_control/README.MD @@ -13,9 +13,6 @@ |---|---|---|---|---| |`ViaPointReacherDMP-v0`| A DMP provides a trajectory for the `ViaPointReacher-v0` task. | 200 | 25 |`HoleReacherFixedGoalDMP-v0`| A DMP provides a trajectory for the `HoleReacher-v0` task with a fixed goal attractor. | 200 | 25 -|`HoleReacherDMP-v0`| A DMP provides a trajectory for the `HoleReacher-v0` task. The goal attractor needs to be learned. | 200 | 30 -|`ALRBallInACupSimpleDMP-v0`| A DMP provides a trajectory for the `ALRBallInACupSimple-v0` task where only 3 joints are actuated. | 4000 | 15 -|`ALRBallInACupDMP-v0`| A DMP provides a trajectory for the `ALRBallInACup-v0` task. | 4000 | 35 -|`ALRBallInACupGoalDMP-v0`| A DMP provides a trajectory for the `ALRBallInACupGoal-v0` task. | 4000 | 35 | 3 +|`HoleReacherDMP-v0`| A DMP provides a trajectory for the `HoleReacher-v0` task. The goal attractor needs to be learned. | 200 | 30 -[//]: |`HoleReacherDetPMP-v0`| \ No newline at end of file +[//]: |`HoleReacherProMPP-v0`| \ No newline at end of file diff --git a/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py b/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py index b44647e..292e40a 100644 --- a/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py +++ b/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py @@ -5,7 +5,6 @@ import matplotlib.pyplot as plt import numpy as np from gym.utils import seeding -from alr_envs.alr.classic_control.utils import check_self_collision from alr_envs.alr.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv diff --git a/alr_envs/alr/mujoco/__init__.py b/alr_envs/alr/mujoco/__init__.py index 30e1e7c..cdb3cde 100644 --- a/alr_envs/alr/mujoco/__init__.py +++ b/alr_envs/alr/mujoco/__init__.py @@ -2,5 +2,5 @@ from .reacher.alr_reacher import ALRReacherEnv from .reacher.balancing import BalancingEnv from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv -from .table_tennis.tt_gym import TT_Env_Gym +from .table_tennis.tt_gym import TTEnvGym from .beerpong.beerpong import ALRBeerBongEnv \ No newline at end of file diff --git a/alr_envs/alr/mujoco/ball_in_a_cup/utils.py b/alr_envs/alr/mujoco/ball_in_a_cup/utils.py deleted file mode 100644 index 0dd972c..0000000 --- a/alr_envs/alr/mujoco/ball_in_a_cup/utils.py +++ /dev/null @@ -1,107 +0,0 @@ -from alr_envs.alr.mujoco.ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv -from mp_env_api.mp_wrappers.detpmp_wrapper import DetPMPWrapper -from mp_env_api.mp_wrappers.dmp_wrapper import DmpWrapper - - -def make_contextual_env(rank, seed=0): - """ - Utility function for multiprocessed env. - - :param env_id: (str) the environment ID - :param num_env: (int) the number of environments you wish to have in subprocesses - :param seed: (int) the initial seed for RNG - :param rank: (int) index of the subprocess - :returns a function that generates an environment - """ - - def _init(): - env = ALRBallInACupEnv(reward_type="contextual_goal") - - env = DetPMPWrapper(env, num_dof=7, num_basis=5, width=0.005, duration=3.5, dt=env.dt, post_traj_time=4.5, - policy_type="motor", weights_scale=0.5, zero_start=True, zero_goal=True) - - env.seed(seed + rank) - return env - - return _init - - -def _make_env(rank, seed=0): - """ - Utility function for multiprocessed env. - - :param env_id: (str) the environment ID - :param num_env: (int) the number of environments you wish to have in subprocesses - :param seed: (int) the initial seed for RNG - :param rank: (int) index of the subprocess - :returns a function that generates an environment - """ - - def _init(): - env = ALRBallInACupEnv(reward_type="simple") - - env = DetPMPWrapper(env, num_dof=7, num_basis=5, width=0.005, duration=3.5, dt=env.dt, post_traj_time=4.5, - policy_type="motor", weights_scale=0.2, zero_start=True, zero_goal=True) - - env.seed(seed + rank) - return env - - return _init - - -def make_simple_env(rank, seed=0): - """ - Utility function for multiprocessed env. - - :param env_id: (str) the environment ID - :param num_env: (int) the number of environments you wish to have in subprocesses - :param seed: (int) the initial seed for RNG - :param rank: (int) index of the subprocess - :returns a function that generates an environment - """ - - def _init(): - env = ALRBallInACupEnv(reward_type="simple") - - env = DetPMPWrapper(env, num_dof=3, num_basis=5, width=0.005, duration=3.5, dt=env.dt, post_traj_time=4.5, - policy_type="motor", weights_scale=0.25, zero_start=True, zero_goal=True, off=-0.1) - - env.seed(seed + rank) - return env - - return _init - - -def make_simple_dmp_env(rank, seed=0): - """ - Utility function for multiprocessed env. - - :param env_id: (str) the environment ID - :param num_env: (int) the number of environments you wish to have in subprocesses - :param seed: (int) the initial seed for RNG - :param rank: (int) index of the subprocess - :returns a function that generates an environment - """ - - def _init(): - _env = ALRBallInACupEnv(reward_type="simple") - - _env = DmpWrapper(_env, - num_dof=3, - num_basis=5, - duration=3.5, - post_traj_time=4.5, - bandwidth_factor=2.5, - dt=_env.dt, - learn_goal=False, - alpha_phase=3, - start_pos=_env.start_pos[1::2], - final_pos=_env.start_pos[1::2], - policy_type="motor", - weights_scale=100, - ) - - _env.seed(seed + rank) - return _env - - return _init diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py index a10e54a..755710a 100644 --- a/alr_envs/alr/mujoco/beerpong/beerpong.py +++ b/alr_envs/alr/mujoco/beerpong/beerpong.py @@ -27,10 +27,10 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle): self.ball_site_id = 0 self.ball_id = 11 - self._release_step = 100 # time step of ball release + self._release_step = 175 # time step of ball release - self.sim_time = 4 # seconds - self.ep_length = 600 # based on 5 seconds with dt = 0.005 int(self.sim_time / self.dt) + self.sim_time = 3 # seconds + self.ep_length = 600 # based on 3 seconds with dt = 0.005 int(self.sim_time / self.dt) self.cup_table_id = 10 if noisy: @@ -127,24 +127,28 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle): self._steps += 1 else: reward = -30 + reward_infos = dict() success = False is_collided = False done = True ball_pos = np.zeros(3) ball_vel = np.zeros(3) - return ob, reward, done, dict(reward_dist=reward_dist, - reward_ctrl=reward_ctrl, - reward=reward, - velocity=angular_vel, - # traj=self._q_pos, - action=a, - q_pos=self.sim.data.qpos[0:7].ravel().copy(), - q_vel=self.sim.data.qvel[0:7].ravel().copy(), - ball_pos=ball_pos, - ball_vel=ball_vel, - is_success=success, - is_collided=is_collided, sim_crash=crash) + infos = dict(reward_dist=reward_dist, + reward_ctrl=reward_ctrl, + reward=reward, + velocity=angular_vel, + # traj=self._q_pos, + action=a, + q_pos=self.sim.data.qpos[0:7].ravel().copy(), + q_vel=self.sim.data.qvel[0:7].ravel().copy(), + ball_pos=ball_pos, + ball_vel=ball_vel, + success=success, + is_collided=is_collided, sim_crash=crash) + infos.update(reward_infos) + + return ob, reward, done, infos def check_traj_in_joint_limits(self): return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min) @@ -171,7 +175,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle): if __name__ == "__main__": - env = ALRBeerBongEnv(reward_type="no_context", difficulty='hardest') + env = ALRBeerBongEnv(reward_type="staged", difficulty='hardest') # env.configure(ctxt) env.reset() diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward.py index 3896e82..dc39ca8 100644 --- a/alr_envs/alr/mujoco/beerpong/beerpong_reward.py +++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward.py @@ -71,6 +71,7 @@ class BeerPongReward: goal_pos = env.sim.data.site_xpos[self.goal_id] ball_pos = env.sim.data.body_xpos[self.ball_id] + ball_vel = env.sim.data.body_xvelp[self.ball_id] goal_final_pos = env.sim.data.site_xpos[self.goal_final_id] self.dists.append(np.linalg.norm(goal_pos - ball_pos)) self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos)) @@ -131,6 +132,7 @@ class BeerPongReward: infos["success"] = success infos["is_collided"] = self._is_collided infos["ball_pos"] = ball_pos.copy() + infos["ball_vel"] = ball_vel.copy() infos["action_cost"] = 5e-4 * action_cost return reward, infos diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py index 9d1d878..e94b470 100644 --- a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py +++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py @@ -81,32 +81,36 @@ class BeerPongReward: action_cost = np.sum(np.square(action)) self.action_costs.append(action_cost) + if not self.ball_table_contact: + self.ball_table_contact = self._check_collision_single_objects(env.sim, self.ball_collision_id, + self.table_collision_id) + self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids) if env._steps == env.ep_length - 1 or self._is_collided: min_dist = np.min(self.dists) - ball_table_bounce = self._check_collision_single_objects(env.sim, self.ball_collision_id, - self.table_collision_id) - ball_cup_table_cont = self._check_collision_with_set_of_objects(env.sim, self.ball_collision_id, - self.cup_collision_ids) - ball_wall_cont = self._check_collision_single_objects(env.sim, self.ball_collision_id, - self.wall_collision_id) + final_dist = self.dists_final[-1] + ball_in_cup = self._check_collision_single_objects(env.sim, self.ball_collision_id, self.cup_table_collision_id) - if not ball_in_cup: - cost_offset = 2 - if not ball_cup_table_cont and not ball_table_bounce and not ball_wall_cont: - cost_offset += 2 - cost = cost_offset + min_dist ** 2 + 0.5 * self.dists_final[-1] ** 2 + 1e-7 * action_cost - else: - cost = self.dists_final[-1] ** 2 + 1.5 * action_cost * 1e-7 - reward = - 1 * cost - self.collision_penalty * int(self._is_collided) + # encourage bounce before falling into cup + if not ball_in_cup: + if not self.ball_table_contact: + reward = 0.2 * (1 - np.tanh(min_dist ** 2)) + 0.1 * (1 - np.tanh(final_dist ** 2)) + else: + reward = (1 - np.tanh(min_dist ** 2)) + 0.5 * (1 - np.tanh(final_dist ** 2)) + else: + if not self.ball_table_contact: + reward = 2 * (1 - np.tanh(final_dist ** 2)) + 1 * (1 - np.tanh(min_dist ** 2)) + 1 + else: + reward = 2 * (1 - np.tanh(final_dist ** 2)) + 1 * (1 - np.tanh(min_dist ** 2)) + 3 + + # reward = - 1 * cost - self.collision_penalty * int(self._is_collided) success = ball_in_cup crash = self._is_collided else: - reward = - 1e-7 * action_cost - cost = 0 + reward = - 1e-2 * action_cost success = False crash = False @@ -115,26 +119,11 @@ class BeerPongReward: infos["is_collided"] = self._is_collided infos["ball_pos"] = ball_pos.copy() infos["ball_vel"] = ball_vel.copy() - infos["action_cost"] = 5e-4 * action_cost - infos["task_cost"] = cost + infos["action_cost"] = action_cost + infos["task_reward"] = reward return reward, infos - def get_cost_offset(self): - if self.ball_ground_contact: - return 200 - - if not self.ball_table_contact: - return 100 - - if not self.ball_in_cup: - return 50 - - if self.ball_in_cup and self.ball_cup_contact and not self.noisy_bp: - return 10 - - return 0 - def _check_collision_single_objects(self, sim, id_1, id_2): for coni in range(0, sim.data.ncon): con = sim.data.contact[coni] diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_simple.py b/alr_envs/alr/mujoco/beerpong/beerpong_simple.py index 73da83d..1708d38 100644 --- a/alr_envs/alr/mujoco/beerpong/beerpong_simple.py +++ b/alr_envs/alr/mujoco/beerpong/beerpong_simple.py @@ -6,8 +6,6 @@ from gym.envs.mujoco import MujocoEnv class ALRBeerpongEnv(MujocoEnv, utils.EzPickle): def __init__(self, n_substeps=4, apply_gravity_comp=True, reward_function=None): - utils.EzPickle.__init__(**locals()) - self._steps = 0 self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", @@ -28,15 +26,13 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle): self.context = None - MujocoEnv.__init__(self, model_path=self.xml_path, frame_skip=n_substeps) - # alr_mujoco_env.AlrMujocoEnv.__init__(self, # self.xml_path, # apply_gravity_comp=apply_gravity_comp, # n_substeps=n_substeps) self.sim_time = 8 # seconds - self.sim_steps = int(self.sim_time / self.dt) + # self.sim_steps = int(self.sim_time / self.dt) if reward_function is None: from alr_envs.alr.mujoco.beerpong.beerpong_reward_simple import BeerpongReward reward_function = BeerpongReward @@ -46,6 +42,9 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle): self.cup_table_id = self.sim.model._body_name2id["cup_table"] # self.bounce_table_id = self.sim.model._body_name2id["bounce_table"] + MujocoEnv.__init__(self, model_path=self.xml_path, frame_skip=n_substeps) + utils.EzPickle.__init__(self) + @property def current_pos(self): return self.sim.data.qpos[0:7].copy() @@ -90,7 +89,7 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle): reward_ctrl = - np.square(a).sum() action_cost = np.sum(np.square(a)) - crash = self.do_simulation(a) + crash = self.do_simulation(a, self.frame_skip) joint_cons_viol = self.check_traj_in_joint_limits() self._q_pos.append(self.sim.data.qpos[0:7].ravel().copy()) diff --git a/alr_envs/alr/mujoco/beerpong/utils.py b/alr_envs/alr/mujoco/beerpong/utils.py deleted file mode 100644 index f43e881..0000000 --- a/alr_envs/alr/mujoco/beerpong/utils.py +++ /dev/null @@ -1,72 +0,0 @@ -from alr_envs.utils.mps.detpmp_wrapper import DetPMPWrapper -from alr_envs.alr.mujoco.beerpong.beerpong import ALRBeerpongEnv -from alr_envs.alr.mujoco.beerpong.beerpong_simple import ALRBeerpongEnv as ALRBeerpongEnvSimple - - -def make_contextual_env(rank, seed=0): - """ - Utility function for multiprocessed env. - - :param env_id: (str) the environment ID - :param num_env: (int) the number of environments you wish to have in subprocesses - :param seed: (int) the initial seed for RNG - :param rank: (int) index of the subprocess - :returns a function that generates an environment - """ - - def _init(): - env = ALRBeerpongEnv() - - env = DetPMPWrapper(env, num_dof=7, num_basis=5, width=0.005, duration=3.5, dt=env.dt, post_traj_time=4.5, - policy_type="motor", weights_scale=0.5, zero_start=True, zero_goal=True) - - env.seed(seed + rank) - return env - - return _init - - -def _make_env(rank, seed=0): - """ - Utility function for multiprocessed env. - - :param env_id: (str) the environment ID - :param num_env: (int) the number of environments you wish to have in subprocesses - :param seed: (int) the initial seed for RNG - :param rank: (int) index of the subprocess - :returns a function that generates an environment - """ - - def _init(): - env = ALRBeerpongEnvSimple() - - env = DetPMPWrapper(env, num_dof=7, num_basis=5, width=0.005, duration=3.5, dt=env.dt, post_traj_time=4.5, - policy_type="motor", weights_scale=0.25, zero_start=True, zero_goal=True) - - env.seed(seed + rank) - return env - - return _init - - -def make_simple_env(rank, seed=0): - """ - Utility function for multiprocessed env. - - :param env_id: (str) the environment ID - :param num_env: (int) the number of environments you wish to have in subprocesses - :param seed: (int) the initial seed for RNG - :param rank: (int) index of the subprocess - :returns a function that generates an environment - """ - - def _init(): - env = ALRBeerpongEnvSimple() - - env = DetPMPWrapper(env, num_dof=3, num_basis=5, width=0.005, duration=3.5, dt=env.dt, post_traj_time=4.5, - policy_type="motor", weights_scale=0.5, zero_start=True, zero_goal=True) - - env.seed(seed + rank) - return env - - return _init diff --git a/alr_envs/alr/mujoco/table_tennis/tt_gym.py b/alr_envs/alr/mujoco/table_tennis/tt_gym.py index 635d49d..d1c2dc3 100644 --- a/alr_envs/alr/mujoco/table_tennis/tt_gym.py +++ b/alr_envs/alr/mujoco/table_tennis/tt_gym.py @@ -10,7 +10,7 @@ from alr_envs.alr.mujoco.table_tennis.tt_reward import TT_Reward #TODO: Check for simulation stability. Make sure the code runs even for sim crash -MAX_EPISODE_STEPS = 1375 +MAX_EPISODE_STEPS = 1750 BALL_NAME_CONTACT = "target_ball_contact" BALL_NAME = "target_ball" TABLE_NAME = 'table_tennis_table' @@ -22,24 +22,30 @@ RACKET_NAME = 'bat' CONTEXT_RANGE_BOUNDS_2DIM = np.array([[-1.2, -0.6], [-0.2, 0.0]]) CONTEXT_RANGE_BOUNDS_4DIM = np.array([[-1.35, -0.75, -1.25, -0.75], [-0.1, 0.75, -0.1, 0.75]]) -class TT_Env_Gym(MujocoEnv, utils.EzPickle): - def __init__(self, ctxt_dim=2): +class TTEnvGym(MujocoEnv, utils.EzPickle): + + def __init__(self, ctxt_dim=2, fixed_goal=False): model_path = os.path.join(os.path.dirname(__file__), "xml", 'table_tennis_env.xml') self.ctxt_dim = ctxt_dim + self.fixed_goal = fixed_goal if ctxt_dim == 2: self.context_range_bounds = CONTEXT_RANGE_BOUNDS_2DIM - self.goal = np.zeros(3) # 2 x,y + 1z + if self.fixed_goal: + self.goal = np.array([-1, -0.1, 0]) + else: + self.goal = np.zeros(3) # 2 x,y + 1z elif ctxt_dim == 4: self.context_range_bounds = CONTEXT_RANGE_BOUNDS_4DIM self.goal = np.zeros(3) else: raise ValueError("either 2 or 4 dimensional Contexts available") - action_space_low = np.array([-2.6, -2.0, -2.8, -0.9, -4.8, -1.6, -2.2]) - action_space_high = np.array([2.6, 2.0, 2.8, 3.1, 1.3, 1.6, 2.2]) - self.action_space = spaces.Box(low=action_space_low, high=action_space_high, dtype='float64') + # has no effect as it is overwritten in init of super + # action_space_low = np.array([-2.6, -2.0, -2.8, -0.9, -4.8, -1.6, -2.2]) + # action_space_high = np.array([2.6, 2.0, 2.8, 3.1, 1.3, 1.6, 2.2]) + # self.action_space = spaces.Box(low=action_space_low, high=action_space_high, dtype='float64') self.time_steps = 0 self.init_qpos_tt = np.array([0, 0, 0, 1.5, 0, 0, 1.5, 0, 0, 0]) @@ -47,10 +53,10 @@ class TT_Env_Gym(MujocoEnv, utils.EzPickle): self.reward_func = TT_Reward(self.ctxt_dim) self.ball_landing_pos = None - self.hited_ball = False + self.hit_ball = False self.ball_contact_after_hit = False self._ids_set = False - super(TT_Env_Gym, self).__init__(model_path=model_path, frame_skip=1) + super(TTEnvGym, self).__init__(model_path=model_path, frame_skip=1) self.ball_id = self.sim.model._body_name2id[BALL_NAME] # find the proper -> not protected func. self.ball_contact_id = self.sim.model._geom_name2id[BALL_NAME_CONTACT] self.table_contact_id = self.sim.model._geom_name2id[TABLE_NAME] @@ -77,15 +83,18 @@ class TT_Env_Gym(MujocoEnv, utils.EzPickle): return obs def sample_context(self): - return np.random.uniform(self.context_range_bounds[0], self.context_range_bounds[1], size=self.ctxt_dim) + return self.np_random.uniform(self.context_range_bounds[0], self.context_range_bounds[1], size=self.ctxt_dim) def reset_model(self): self.set_state(self.init_qpos_tt, self.init_qvel_tt) # reset to initial sim state self.time_steps = 0 self.ball_landing_pos = None - self.hited_ball = False + self.hit_ball = False self.ball_contact_after_hit = False - self.goal = self.sample_context()[:2] + if self.fixed_goal: + self.goal = self.goal[:2] + else: + self.goal = self.sample_context()[:2] if self.ctxt_dim == 2: initial_ball_state = ball_init(random=False) # fixed velocity, fixed position elif self.ctxt_dim == 4: @@ -122,12 +131,12 @@ class TT_Env_Gym(MujocoEnv, utils.EzPickle): if not self._ids_set: self._set_ids() done = False - episode_end = False if self.time_steps+1