diff --git a/alr_envs/__init__.py b/alr_envs/__init__.py
deleted file mode 100644
index 858a66c..0000000
--- a/alr_envs/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-from alr_envs import dmc, meta, open_ai
-from alr_envs.utils.make_env_helpers import make, make_dmp_env, make_promp_env, make_rank
-from alr_envs.utils import make_dmc
-
-# Convenience function for all MP environments
-from .alr import ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS
-from .dmc import ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS
-from .meta import ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS
-from .open_ai import ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS
-
-ALL_MOTION_PRIMITIVE_ENVIRONMENTS = {
- key: value + ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS[key] +
- ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS[key] +
- ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS[key]
- for key, value in ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS.items()}
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
deleted file mode 100644
index 8a7140d..0000000
--- a/alr_envs/alr/__init__.py
+++ /dev/null
@@ -1,499 +0,0 @@
-import numpy as np
-from gym import register
-
-from . import classic_control, mujoco
-from .classic_control.hole_reacher.hole_reacher import HoleReacherEnv
-from .classic_control.simple_reacher.simple_reacher import SimpleReacherEnv
-from .classic_control.viapoint_reacher.viapoint_reacher import ViaPointReacherEnv
-from .mujoco.ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
-from .mujoco.ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
-from .mujoco.reacher.alr_reacher import ALRReacherEnv
-from .mujoco.reacher.balancing import BalancingEnv
-
-from .mujoco.table_tennis.tt_gym import MAX_EPISODE_STEPS
-
-ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
-
-# Classic Control
-## Simple Reacher
-register(
- id='SimpleReacher-v0',
- entry_point='alr_envs.alr.classic_control:SimpleReacherEnv',
- max_episode_steps=200,
- kwargs={
- "n_links": 2,
- }
-)
-
-register(
- id='SimpleReacher-v1',
- entry_point='alr_envs.alr.classic_control:SimpleReacherEnv',
- max_episode_steps=200,
- kwargs={
- "n_links": 2,
- "random_start": False
- }
-)
-
-register(
- id='LongSimpleReacher-v0',
- entry_point='alr_envs.alr.classic_control:SimpleReacherEnv',
- max_episode_steps=200,
- kwargs={
- "n_links": 5,
- }
-)
-
-register(
- id='LongSimpleReacher-v1',
- entry_point='alr_envs.alr.classic_control:SimpleReacherEnv',
- max_episode_steps=200,
- kwargs={
- "n_links": 5,
- "random_start": False
- }
-)
-
-## Viapoint Reacher
-
-register(
- id='ViaPointReacher-v0',
- entry_point='alr_envs.alr.classic_control:ViaPointReacherEnv',
- max_episode_steps=200,
- kwargs={
- "n_links": 5,
- "allow_self_collision": False,
- "collision_penalty": 1000
- }
-)
-
-## Hole Reacher
-register(
- id='HoleReacher-v0',
- entry_point='alr_envs.alr.classic_control:HoleReacherEnv',
- max_episode_steps=200,
- kwargs={
- "n_links": 5,
- "random_start": True,
- "allow_self_collision": False,
- "allow_wall_collision": False,
- "hole_width": None,
- "hole_depth": 1,
- "hole_x": None,
- "collision_penalty": 100,
- }
-)
-
-register(
- id='HoleReacher-v1',
- entry_point='alr_envs.alr.classic_control:HoleReacherEnv',
- max_episode_steps=200,
- kwargs={
- "n_links": 5,
- "random_start": False,
- "allow_self_collision": False,
- "allow_wall_collision": False,
- "hole_width": 0.25,
- "hole_depth": 1,
- "hole_x": None,
- "collision_penalty": 100,
- }
-)
-
-register(
- id='HoleReacher-v2',
- entry_point='alr_envs.alr.classic_control:HoleReacherEnv',
- max_episode_steps=200,
- kwargs={
- "n_links": 5,
- "random_start": False,
- "allow_self_collision": False,
- "allow_wall_collision": False,
- "hole_width": 0.25,
- "hole_depth": 1,
- "hole_x": 2,
- "collision_penalty": 1,
- }
-)
-
-# Mujoco
-
-## Reacher
-register(
- id='ALRReacher-v0',
- entry_point='alr_envs.alr.mujoco:ALRReacherEnv',
- max_episode_steps=200,
- kwargs={
- "steps_before_reward": 0,
- "n_links": 5,
- "balance": False,
- }
-)
-
-register(
- id='ALRReacherSparse-v0',
- entry_point='alr_envs.alr.mujoco:ALRReacherEnv',
- max_episode_steps=200,
- kwargs={
- "steps_before_reward": 200,
- "n_links": 5,
- "balance": False,
- }
-)
-
-register(
- id='ALRReacherSparseBalanced-v0',
- entry_point='alr_envs.alr.mujoco:ALRReacherEnv',
- max_episode_steps=200,
- kwargs={
- "steps_before_reward": 200,
- "n_links": 5,
- "balance": True,
- }
-)
-
-register(
- id='ALRLongReacher-v0',
- entry_point='alr_envs.alr.mujoco:ALRReacherEnv',
- max_episode_steps=200,
- kwargs={
- "steps_before_reward": 0,
- "n_links": 7,
- "balance": False,
- }
-)
-
-register(
- id='ALRLongReacherSparse-v0',
- entry_point='alr_envs.alr.mujoco:ALRReacherEnv',
- max_episode_steps=200,
- kwargs={
- "steps_before_reward": 200,
- "n_links": 7,
- "balance": False,
- }
-)
-
-register(
- id='ALRLongReacherSparseBalanced-v0',
- entry_point='alr_envs.alr.mujoco:ALRReacherEnv',
- max_episode_steps=200,
- kwargs={
- "steps_before_reward": 200,
- "n_links": 7,
- "balance": True,
- }
-)
-
-## Balancing Reacher
-
-register(
- id='Balancing-v0',
- entry_point='alr_envs.alr.mujoco:BalancingEnv',
- max_episode_steps=200,
- kwargs={
- "n_links": 5,
- }
-)
-
-## Table Tennis
-register(id='TableTennis2DCtxt-v0',
- entry_point='alr_envs.alr.mujoco:TTEnvGym',
- max_episode_steps=MAX_EPISODE_STEPS,
- kwargs={'ctxt_dim': 2})
-
-register(id='TableTennis2DCtxt-v1',
- entry_point='alr_envs.alr.mujoco:TTEnvGym',
- max_episode_steps=MAX_EPISODE_STEPS,
- kwargs={'ctxt_dim': 2, 'fixed_goal': True})
-
-register(id='TableTennis4DCtxt-v0',
- entry_point='alr_envs.alr.mujoco:TTEnvGym',
- max_episode_steps=MAX_EPISODE_STEPS,
- kwargs={'ctxt_dim': 4})
-
-## BeerPong
-difficulties = ["simple", "intermediate", "hard", "hardest"]
-
-for v, difficulty in enumerate(difficulties):
- register(
- id='ALRBeerPong-v{}'.format(v),
- entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv',
- max_episode_steps=600,
- kwargs={
- "difficulty": difficulty,
- "reward_type": "staged",
- }
- )
-
-# Motion Primitive Environments
-
-## Simple Reacher
-_versions = ["SimpleReacher-v0", "SimpleReacher-v1", "LongSimpleReacher-v0", "LongSimpleReacher-v1"]
-for _v in _versions:
- _name = _v.split("-")
- _env_id = f'{_name[0]}DMP-{_name[1]}'
- register(
- id=_env_id,
- entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
- # max_episode_steps=1,
- kwargs={
- "name": f"alr_envs:{_v}",
- "wrappers": [classic_control.simple_reacher.MPWrapper],
- "mp_kwargs": {
- "num_dof": 2 if "long" not in _v.lower() else 5,
- "num_basis": 5,
- "duration": 2,
- "alpha_phase": 2,
- "learn_goal": True,
- "policy_type": "motor",
- "weights_scale": 50,
- "policy_kwargs": {
- "p_gains": .6,
- "d_gains": .075
- }
- }
- }
- )
- ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
-
- _env_id = f'{_name[0]}ProMP-{_name[1]}'
- register(
- id=_env_id,
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": f"alr_envs:{_v}",
- "wrappers": [classic_control.simple_reacher.MPWrapper],
- "mp_kwargs": {
- "num_dof": 2 if "long" not in _v.lower() else 5,
- "num_basis": 5,
- "duration": 2,
- "policy_type": "motor",
- "weights_scale": 1,
- "zero_start": True,
- "policy_kwargs": {
- "p_gains": .6,
- "d_gains": .075
- }
- }
- }
- )
- ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-# Viapoint reacher
-register(
- id='ViaPointReacherDMP-v0',
- entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
- # max_episode_steps=1,
- kwargs={
- "name": "alr_envs:ViaPointReacher-v0",
- "wrappers": [classic_control.viapoint_reacher.MPWrapper],
- "mp_kwargs": {
- "num_dof": 5,
- "num_basis": 5,
- "duration": 2,
- "learn_goal": True,
- "alpha_phase": 2,
- "policy_type": "velocity",
- "weights_scale": 50,
- }
- }
-)
-ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("ViaPointReacherDMP-v0")
-
-register(
- id="ViaPointReacherProMP-v0",
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": f"alr_envs:ViaPointReacher-v0",
- "wrappers": [classic_control.viapoint_reacher.MPWrapper],
- "mp_kwargs": {
- "num_dof": 5,
- "num_basis": 5,
- "duration": 2,
- "policy_type": "velocity",
- "weights_scale": 1,
- "zero_start": True
- }
- }
-)
-ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ViaPointReacherProMP-v0")
-
-## Hole Reacher
-_versions = ["v0", "v1", "v2"]
-for _v in _versions:
- _env_id = f'HoleReacherDMP-{_v}'
- register(
- id=_env_id,
- entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
- # max_episode_steps=1,
- kwargs={
- "name": f"alr_envs:HoleReacher-{_v}",
- "wrappers": [classic_control.hole_reacher.MPWrapper],
- "mp_kwargs": {
- "num_dof": 5,
- "num_basis": 5,
- "duration": 2,
- "learn_goal": True,
- "alpha_phase": 2.5,
- "bandwidth_factor": 2,
- "policy_type": "velocity",
- "weights_scale": 50,
- "goal_scale": 0.1
- }
- }
- )
- ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
-
- _env_id = f'HoleReacherProMP-{_v}'
- register(
- id=_env_id,
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": f"alr_envs:HoleReacher-{_v}",
- "wrappers": [classic_control.hole_reacher.MPWrapper],
- "mp_kwargs": {
- "num_dof": 5,
- "num_basis": 5,
- "duration": 2,
- "policy_type": "velocity",
- "weights_scale": 5,
- "zero_start": True
- }
- }
- )
- ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-## ALRReacher
-_versions = ["ALRReacher-v0", "ALRLongReacher-v0", "ALRReacherSparse-v0", "ALRLongReacherSparse-v0"]
-for _v in _versions:
- _name = _v.split("-")
- _env_id = f'{_name[0]}DMP-{_name[1]}'
- register(
- id=_env_id,
- entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
- # max_episode_steps=1,
- kwargs={
- "name": f"alr_envs:{_v}",
- "wrappers": [mujoco.reacher.MPWrapper],
- "mp_kwargs": {
- "num_dof": 5 if "long" not in _v.lower() else 7,
- "num_basis": 2,
- "duration": 4,
- "alpha_phase": 2,
- "learn_goal": True,
- "policy_type": "motor",
- "weights_scale": 5,
- "policy_kwargs": {
- "p_gains": 1,
- "d_gains": 0.1
- }
- }
- }
- )
- ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
-
- _env_id = f'{_name[0]}ProMP-{_name[1]}'
- register(
- id=_env_id,
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": f"alr_envs:{_v}",
- "wrappers": [mujoco.reacher.MPWrapper],
- "mp_kwargs": {
- "num_dof": 5 if "long" not in _v.lower() else 7,
- "num_basis": 1,
- "duration": 4,
- "policy_type": "motor",
- "weights_scale": 5,
- "zero_start": True,
- "policy_kwargs": {
- "p_gains": 1,
- "d_gains": 0.1
- }
- }
- }
- )
- ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-## Beerpong
-_versions = ["v0", "v1", "v2", "v3"]
-for _v in _versions:
- _env_id = f'BeerpongProMP-{_v}'
- register(
- id=_env_id,
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": f"alr_envs:ALRBeerPong-{_v}",
- "wrappers": [mujoco.beerpong.MPWrapper],
- "mp_kwargs": {
- "num_dof": 7,
- "num_basis": 2,
- "duration": 1,
- "post_traj_time": 2,
- "policy_type": "motor",
- "weights_scale": 1,
- "zero_start": True,
- "zero_goal": False,
- "policy_kwargs": {
- "p_gains": np.array([ 1.5, 5, 2.55, 3, 2., 2, 1.25]),
- "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125])
- }
- }
- }
- )
- ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-## Table Tennis
-ctxt_dim = [2, 4]
-for _v, cd in enumerate(ctxt_dim):
- _env_id = f'TableTennisProMP-v{_v}'
- register(
- id=_env_id,
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": "alr_envs:TableTennis{}DCtxt-v0".format(cd),
- "wrappers": [mujoco.table_tennis.MPWrapper],
- "mp_kwargs": {
- "num_dof": 7,
- "num_basis": 2,
- "duration": 1.25,
- "post_traj_time": 4.5,
- "policy_type": "motor",
- "weights_scale": 1.0,
- "zero_start": True,
- "zero_goal": False,
- "policy_kwargs": {
- "p_gains": 0.5*np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]),
- "d_gains": 0.5*np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1])
- }
- }
- }
- )
- ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-register(
- id='TableTennisProMP-v2',
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": "alr_envs:TableTennis2DCtxt-v1",
- "wrappers": [mujoco.table_tennis.MPWrapper],
- "mp_kwargs": {
- "num_dof": 7,
- "num_basis": 2,
- "duration": 1.,
- "post_traj_time": 2.5,
- "policy_type": "motor",
- "weights_scale": 1,
- "off": -0.05,
- "bandwidth_factor": 3.5,
- "zero_start": True,
- "zero_goal": False,
- "policy_kwargs": {
- "p_gains": 0.5*np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]),
- "d_gains": 0.5*np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1])
- }
- }
- }
-)
-ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("TableTennisProMP-v2")
diff --git a/alr_envs/alr/classic_control/README.MD b/alr_envs/alr/classic_control/README.MD
deleted file mode 100644
index bd1b68b..0000000
--- a/alr_envs/alr/classic_control/README.MD
+++ /dev/null
@@ -1,18 +0,0 @@
-### Classic Control
-
-## Step-based Environments
-|Name| Description|Horizon|Action Dimension|Observation Dimension
-|---|---|---|---|---|
-|`SimpleReacher-v0`| Simple reaching task (2 links) without any physics simulation. Provides no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions towards the end of the trajectory.| 200 | 2 | 9
-|`LongSimpleReacher-v0`| Simple reaching task (5 links) without any physics simulation. Provides no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions towards the end of the trajectory.| 200 | 5 | 18
-|`ViaPointReacher-v0`| Simple reaching task leveraging a via point, which supports self collision detection. Provides a reward only at 100 and 199 for reaching the viapoint and goal point, respectively.| 200 | 5 | 18
-|`HoleReacher-v0`| 5 link reaching task where the end-effector needs to reach into a narrow hole without collding with itself or walls | 200 | 5 | 18
-
-## MP Environments
-|Name| Description|Horizon|Action Dimension|Context Dimension
-|---|---|---|---|---|
-|`ViaPointReacherDMP-v0`| A DMP provides a trajectory for the `ViaPointReacher-v0` task. | 200 | 25
-|`HoleReacherFixedGoalDMP-v0`| A DMP provides a trajectory for the `HoleReacher-v0` task with a fixed goal attractor. | 200 | 25
-|`HoleReacherDMP-v0`| A DMP provides a trajectory for the `HoleReacher-v0` task. The goal attractor needs to be learned. | 200 | 30
-
-[//]: |`HoleReacherProMPP-v0`|
\ No newline at end of file
diff --git a/alr_envs/alr/classic_control/__init__.py b/alr_envs/alr/classic_control/__init__.py
deleted file mode 100644
index 73575ab..0000000
--- a/alr_envs/alr/classic_control/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from .hole_reacher.hole_reacher import HoleReacherEnv
-from .simple_reacher.simple_reacher import SimpleReacherEnv
-from .viapoint_reacher.viapoint_reacher import ViaPointReacherEnv
diff --git a/alr_envs/alr/classic_control/base_reacher/base_reacher.py b/alr_envs/alr/classic_control/base_reacher/base_reacher.py
deleted file mode 100644
index 1b1ad19..0000000
--- a/alr_envs/alr/classic_control/base_reacher/base_reacher.py
+++ /dev/null
@@ -1,146 +0,0 @@
-from typing import Iterable, Union
-from abc import ABC, abstractmethod
-import gym
-import matplotlib.pyplot as plt
-import numpy as np
-from gym import spaces
-from gym.utils import seeding
-from alr_envs.alr.classic_control.utils import intersect
-
-
-class BaseReacherEnv(gym.Env, ABC):
- """
- Base class for all reaching environments.
- """
-
- def __init__(self, n_links: int, random_start: bool = True,
- allow_self_collision: bool = False):
- super().__init__()
- self.link_lengths = np.ones(n_links)
- self.n_links = n_links
- self._dt = 0.01
-
- self.random_start = random_start
-
- self.allow_self_collision = allow_self_collision
-
- # state
- self._joints = None
- self._joint_angles = None
- self._angle_velocity = None
- self._acc = None
- self._start_pos = np.hstack([[np.pi / 2], np.zeros(self.n_links - 1)])
- self._start_vel = np.zeros(self.n_links)
-
- # joint limits
- self.j_min = -np.pi * np.ones(n_links)
- self.j_max = np.pi * np.ones(n_links)
-
- self.steps_before_reward = 199
-
- state_bound = np.hstack([
- [np.pi] * self.n_links, # cos
- [np.pi] * self.n_links, # sin
- [np.inf] * self.n_links, # velocity
- [np.inf] * 2, # x-y coordinates of target distance
- [np.inf] # env steps, because reward start after n steps TODO: Maybe
- ])
-
- self.observation_space = spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape)
-
- self.reward_function = None # Needs to be set in sub class
-
- # containers for plotting
- self.metadata = {'render.modes': ["human"]}
- self.fig = None
-
- self._steps = 0
- self.seed()
-
- @property
- def dt(self) -> Union[float, int]:
- return self._dt
-
- @property
- def current_pos(self):
- return self._joint_angles.copy()
-
- @property
- def current_vel(self):
- return self._angle_velocity.copy()
-
- def reset(self):
- # Sample only orientation of first link, i.e. the arm is always straight.
- if self.random_start:
- first_joint = self.np_random.uniform(np.pi / 4, 3 * np.pi / 4)
- self._joint_angles = np.hstack([[first_joint], np.zeros(self.n_links - 1)])
- self._start_pos = self._joint_angles.copy()
- else:
- self._joint_angles = self._start_pos
-
- self._angle_velocity = self._start_vel
- self._joints = np.zeros((self.n_links + 1, 2))
- self._update_joints()
- self._steps = 0
-
- return self._get_obs().copy()
-
- @abstractmethod
- def step(self, action: np.ndarray):
- """
- A single step with action in angular velocity space
- """
- raise NotImplementedError
-
- def _update_joints(self):
- """
- update joints to get new end-effector position. The other links are only required for rendering.
- Returns:
-
- """
- angles = np.cumsum(self._joint_angles)
- x = self.link_lengths * np.vstack([np.cos(angles), np.sin(angles)])
- self._joints[1:] = self._joints[0] + np.cumsum(x.T, axis=0)
-
- def _check_self_collision(self):
- """Checks whether line segments intersect"""
-
- if self.allow_self_collision:
- return False
-
- if np.any(self._joint_angles > self.j_max) or np.any(self._joint_angles < self.j_min):
- return True
-
- link_lines = np.stack((self._joints[:-1, :], self._joints[1:, :]), axis=1)
- for i, line1 in enumerate(link_lines):
- for line2 in link_lines[i + 2:, :]:
- if intersect(line1[0], line1[-1], line2[0], line2[-1]):
- return True
- return False
-
- @abstractmethod
- def _get_reward(self, action: np.ndarray) -> (float, dict):
- pass
-
- @abstractmethod
- def _get_obs(self) -> np.ndarray:
- pass
-
- @abstractmethod
- def _check_collisions(self) -> bool:
- pass
-
- @abstractmethod
- def _terminate(self, info) -> bool:
- return False
-
- def seed(self, seed=None):
- self.np_random, seed = seeding.np_random(seed)
- return [seed]
-
- def close(self):
- del self.fig
-
- @property
- def end_effector(self):
- return self._joints[self.n_links].T
diff --git a/alr_envs/alr/classic_control/base_reacher/base_reacher_direct.py b/alr_envs/alr/classic_control/base_reacher/base_reacher_direct.py
deleted file mode 100644
index dc79827..0000000
--- a/alr_envs/alr/classic_control/base_reacher/base_reacher_direct.py
+++ /dev/null
@@ -1,37 +0,0 @@
-from abc import ABC
-
-from gym import spaces
-import numpy as np
-from alr_envs.alr.classic_control.base_reacher.base_reacher import BaseReacherEnv
-
-
-class BaseReacherDirectEnv(BaseReacherEnv, ABC):
- """
- Base class for directly controlled reaching environments
- """
- def __init__(self, n_links: int, random_start: bool = True,
- allow_self_collision: bool = False):
- super().__init__(n_links, random_start, allow_self_collision)
-
- self.max_vel = 2 * np.pi
- action_bound = np.ones((self.n_links,)) * self.max_vel
- self.action_space = spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape)
-
- def step(self, action: np.ndarray):
- """
- A single step with action in angular velocity space
- """
-
- self._acc = (action - self._angle_velocity) / self.dt
- self._angle_velocity = action
- self._joint_angles = self._joint_angles + self.dt * self._angle_velocity
- self._update_joints()
-
- self._is_collided = self._check_collisions()
-
- reward, info = self._get_reward(action)
-
- self._steps += 1
- done = self._terminate(info)
-
- return self._get_obs().copy(), reward, done, info
diff --git a/alr_envs/alr/classic_control/base_reacher/base_reacher_torque.py b/alr_envs/alr/classic_control/base_reacher/base_reacher_torque.py
deleted file mode 100644
index 094f632..0000000
--- a/alr_envs/alr/classic_control/base_reacher/base_reacher_torque.py
+++ /dev/null
@@ -1,36 +0,0 @@
-from abc import ABC
-
-from gym import spaces
-import numpy as np
-from alr_envs.alr.classic_control.base_reacher.base_reacher import BaseReacherEnv
-
-
-class BaseReacherTorqueEnv(BaseReacherEnv, ABC):
- """
- Base class for torque controlled reaching environments
- """
- def __init__(self, n_links: int, random_start: bool = True,
- allow_self_collision: bool = False):
- super().__init__(n_links, random_start, allow_self_collision)
-
- self.max_torque = 1000
- action_bound = np.ones((self.n_links,)) * self.max_torque
- self.action_space = spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape)
-
- def step(self, action: np.ndarray):
- """
- A single step with action in torque space
- """
-
- self._angle_velocity = self._angle_velocity + self.dt * action
- self._joint_angles = self._joint_angles + self.dt * self._angle_velocity
- self._update_joints()
-
- self._is_collided = self._check_collisions()
-
- reward, info = self._get_reward(action)
-
- self._steps += 1
- done = False
-
- return self._get_obs().copy(), reward, done, info
diff --git a/alr_envs/alr/classic_control/hole_reacher/__init__.py b/alr_envs/alr/classic_control/hole_reacher/__init__.py
deleted file mode 100644
index c5e6d2f..0000000
--- a/alr_envs/alr/classic_control/hole_reacher/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .mp_wrapper import MPWrapper
diff --git a/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py b/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py
deleted file mode 100644
index 208f005..0000000
--- a/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py
+++ /dev/null
@@ -1,231 +0,0 @@
-from typing import Union
-
-import gym
-import matplotlib.pyplot as plt
-import numpy as np
-from matplotlib import patches
-
-from alr_envs.alr.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv
-
-
-class HoleReacherEnv(BaseReacherDirectEnv):
- def __init__(self, n_links: int, hole_x: Union[None, float] = None, hole_depth: Union[None, float] = None,
- hole_width: float = 1., random_start: bool = False, allow_self_collision: bool = False,
- allow_wall_collision: bool = False, collision_penalty: float = 1000, rew_fct: str = "simple"):
-
- super().__init__(n_links, random_start, allow_self_collision)
-
- # provided initial parameters
- self.initial_x = hole_x # x-position of center of hole
- self.initial_width = hole_width # width of hole
- self.initial_depth = hole_depth # depth of hole
-
- # temp container for current env state
- self._tmp_x = None
- self._tmp_width = None
- self._tmp_depth = None
- self._goal = None # x-y coordinates for reaching the center at the bottom of the hole
-
- # action_bound = np.pi * np.ones((self.n_links,))
- state_bound = np.hstack([
- [np.pi] * self.n_links, # cos
- [np.pi] * self.n_links, # sin
- [np.inf] * self.n_links, # velocity
- [np.inf], # hole width
- # [np.inf], # hole depth
- [np.inf] * 2, # x-y coordinates of target distance
- [np.inf] # env steps, because reward start after n steps TODO: Maybe
- ])
- # self.action_space = gym.spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape)
- self.observation_space = gym.spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape)
-
- if rew_fct == "simple":
- from alr_envs.alr.classic_control.hole_reacher.hr_simple_reward import HolereacherReward
- self.reward_function = HolereacherReward(allow_self_collision, allow_wall_collision, collision_penalty)
- elif rew_fct == "vel_acc":
- from alr_envs.alr.classic_control.hole_reacher.hr_dist_vel_acc_reward import HolereacherReward
- self.reward_function = HolereacherReward(allow_self_collision, allow_wall_collision, collision_penalty)
- else:
- raise ValueError("Unknown reward function {}".format(rew_fct))
-
- def reset(self):
- self._generate_hole()
- self._set_patches()
- self.reward_function.reset()
-
- return super().reset()
-
- def _get_reward(self, action: np.ndarray) -> (float, dict):
- return self.reward_function.get_reward(self)
-
- def _terminate(self, info):
- return info["is_collided"]
-
- def _generate_hole(self):
- if self.initial_width is None:
- width = self.np_random.uniform(0.15, 0.5)
- else:
- width = np.copy(self.initial_width)
- if self.initial_x is None:
- # sample whole on left or right side
- direction = self.np_random.choice([-1, 1])
- # Hole center needs to be half the width away from the arm to give a valid setting.
- x = direction * self.np_random.uniform(width / 2, 3.5)
- else:
- x = np.copy(self.initial_x)
- if self.initial_depth is None:
- # TODO we do not want this right now.
- depth = self.np_random.uniform(1, 1)
- else:
- depth = np.copy(self.initial_depth)
-
- self._tmp_width = width
- self._tmp_x = x
- self._tmp_depth = depth
- self._goal = np.hstack([self._tmp_x, -self._tmp_depth])
-
- self._line_ground_left = np.array([-self.n_links, 0, x - width / 2, 0])
- self._line_ground_right = np.array([x + width / 2, 0, self.n_links, 0])
- self._line_ground_hole = np.array([x - width / 2, -depth, x + width / 2, -depth])
- self._line_hole_left = np.array([x - width / 2, -depth, x - width / 2, 0])
- self._line_hole_right = np.array([x + width / 2, -depth, x + width / 2, 0])
-
- self.ground_lines = np.stack((self._line_ground_left,
- self._line_ground_right,
- self._line_ground_hole,
- self._line_hole_left,
- self._line_hole_right))
-
- def _get_obs(self):
- theta = self._joint_angles
- return np.hstack([
- np.cos(theta),
- np.sin(theta),
- self._angle_velocity,
- self._tmp_width,
- # self._tmp_hole_depth,
- self.end_effector - self._goal,
- self._steps
- ]).astype(np.float32)
-
- def _get_line_points(self, num_points_per_link=1):
- theta = self._joint_angles[:, None]
-
- intermediate_points = np.linspace(0, 1, num_points_per_link) if num_points_per_link > 1 else 1
- accumulated_theta = np.cumsum(theta, axis=0)
- end_effector = np.zeros(shape=(self.n_links, num_points_per_link, 2))
-
- x = np.cos(accumulated_theta) * self.link_lengths[:, None] * intermediate_points
- y = np.sin(accumulated_theta) * self.link_lengths[:, None] * intermediate_points
-
- end_effector[0, :, 0] = x[0, :]
- end_effector[0, :, 1] = y[0, :]
-
- for i in range(1, self.n_links):
- end_effector[i, :, 0] = x[i, :] + end_effector[i - 1, -1, 0]
- end_effector[i, :, 1] = y[i, :] + end_effector[i - 1, -1, 1]
-
- return np.squeeze(end_effector + self._joints[0, :])
-
- def _check_collisions(self) -> bool:
- return self._check_self_collision() or self.check_wall_collision()
-
- def check_wall_collision(self):
- line_points = self._get_line_points(num_points_per_link=100)
-
- # all points that are before the hole in x
- r, c = np.where(line_points[:, :, 0] < (self._tmp_x - self._tmp_width / 2))
-
- # check if any of those points are below surface
- nr_line_points_below_surface_before_hole = np.sum(line_points[r, c, 1] < 0)
-
- if nr_line_points_below_surface_before_hole > 0:
- return True
-
- # all points that are after the hole in x
- r, c = np.where(line_points[:, :, 0] > (self._tmp_x + self._tmp_width / 2))
-
- # check if any of those points are below surface
- nr_line_points_below_surface_after_hole = np.sum(line_points[r, c, 1] < 0)
-
- if nr_line_points_below_surface_after_hole > 0:
- return True
-
- # all points that are above the hole
- r, c = np.where((line_points[:, :, 0] > (self._tmp_x - self._tmp_width / 2)) & (
- line_points[:, :, 0] < (self._tmp_x + self._tmp_width / 2)))
-
- # check if any of those points are below surface
- nr_line_points_below_surface_in_hole = np.sum(line_points[r, c, 1] < -self._tmp_depth)
-
- if nr_line_points_below_surface_in_hole > 0:
- return True
-
- return False
-
- def render(self, mode='human'):
- if self.fig is None:
- # Create base figure once on the beginning. Afterwards only update
- plt.ion()
- self.fig = plt.figure()
- ax = self.fig.add_subplot(1, 1, 1)
-
- # limits
- lim = np.sum(self.link_lengths) + 0.5
- ax.set_xlim([-lim, lim])
- ax.set_ylim([-1.1, lim])
-
- self.line, = ax.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k')
- self._set_patches()
- self.fig.show()
-
- self.fig.gca().set_title(
- f"Iteration: {self._steps}, distance: {np.linalg.norm(self.end_effector - self._goal) ** 2}")
-
- if mode == "human":
-
- # arm
- self.line.set_data(self._joints[:, 0], self._joints[:, 1])
-
- self.fig.canvas.draw()
- self.fig.canvas.flush_events()
-
- elif mode == "partial":
- if self._steps % 20 == 0 or self._steps in [1, 199] or self._is_collided:
- # Arm
- plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k',
- alpha=self._steps / 200)
-
- def _set_patches(self):
- if self.fig is not None:
- # self.fig.gca().patches = []
- left_block = patches.Rectangle((-self.n_links, -self._tmp_depth),
- self.n_links + self._tmp_x - self._tmp_width / 2,
- self._tmp_depth,
- fill=True, edgecolor='k', facecolor='k')
- right_block = patches.Rectangle((self._tmp_x + self._tmp_width / 2, -self._tmp_depth),
- self.n_links - self._tmp_x + self._tmp_width / 2,
- self._tmp_depth,
- fill=True, edgecolor='k', facecolor='k')
- hole_floor = patches.Rectangle((self._tmp_x - self._tmp_width / 2, -self._tmp_depth),
- self._tmp_width,
- 1 - self._tmp_depth,
- fill=True, edgecolor='k', facecolor='k')
-
- # Add the patch to the Axes
- self.fig.gca().add_patch(left_block)
- self.fig.gca().add_patch(right_block)
- self.fig.gca().add_patch(hole_floor)
-
-
-if __name__ == "__main__":
- import time
- env = HoleReacherEnv(5)
- env.reset()
-
- for i in range(10000):
- ac = env.action_space.sample()
- obs, rew, done, info = env.step(ac)
- env.render()
- if done:
- env.reset()
diff --git a/alr_envs/alr/classic_control/hole_reacher/hr_dist_vel_acc_reward.py b/alr_envs/alr/classic_control/hole_reacher/hr_dist_vel_acc_reward.py
deleted file mode 100644
index 1d75b66..0000000
--- a/alr_envs/alr/classic_control/hole_reacher/hr_dist_vel_acc_reward.py
+++ /dev/null
@@ -1,60 +0,0 @@
-import numpy as np
-
-
-class HolereacherReward:
- def __init__(self, allow_self_collision, allow_wall_collision, collision_penalty):
- self.collision_penalty = collision_penalty
-
- # collision
- self.allow_self_collision = allow_self_collision
- self.allow_wall_collision = allow_wall_collision
- self.collision_penalty = collision_penalty
- self._is_collided = False
-
- self.reward_factors = np.array((-1, -1e-4, -1e-6, -collision_penalty, 0))
-
- def reset(self):
- self._is_collided = False
- self.collision_dist = 0
-
- def get_reward(self, env):
- dist_cost = 0
- collision_cost = 0
- time_cost = 0
- success = False
-
- self_collision = False
- wall_collision = False
-
- if not self._is_collided:
- if not self.allow_self_collision:
- self_collision = env._check_self_collision()
-
- if not self.allow_wall_collision:
- wall_collision = env.check_wall_collision()
-
- self._is_collided = self_collision or wall_collision
-
- self.collision_dist = np.linalg.norm(env.end_effector - env._goal)
-
- if env._steps == 199: # or self._is_collided:
- # return reward only in last time step
- # Episode also terminates when colliding, hence return reward
- dist = np.linalg.norm(env.end_effector - env._goal)
-
- success = dist < 0.005 and not self._is_collided
- dist_cost = dist ** 2
- collision_cost = self._is_collided * self.collision_dist ** 2
- time_cost = 199 - env._steps
-
- info = {"is_success": success,
- "is_collided": self._is_collided,
- "end_effector": np.copy(env.end_effector)}
-
- vel_cost = np.sum(env._angle_velocity ** 2)
- acc_cost = np.sum(env._acc ** 2)
-
- reward_features = np.array((dist_cost, vel_cost, acc_cost, collision_cost, time_cost))
- reward = np.dot(reward_features, self.reward_factors)
-
- return reward, info
diff --git a/alr_envs/alr/classic_control/hole_reacher/hr_simple_reward.py b/alr_envs/alr/classic_control/hole_reacher/hr_simple_reward.py
deleted file mode 100644
index 5820ab1..0000000
--- a/alr_envs/alr/classic_control/hole_reacher/hr_simple_reward.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import numpy as np
-
-
-class HolereacherReward:
- def __init__(self, allow_self_collision, allow_wall_collision, collision_penalty):
- self.collision_penalty = collision_penalty
-
- # collision
- self.allow_self_collision = allow_self_collision
- self.allow_wall_collision = allow_wall_collision
- self.collision_penalty = collision_penalty
- self._is_collided = False
-
- self.reward_factors = np.array((-1, -5e-8, -collision_penalty))
-
- def reset(self):
- self._is_collided = False
-
- def get_reward(self, env):
- dist_cost = 0
- collision_cost = 0
- success = False
-
- self_collision = False
- wall_collision = False
-
- if not self.allow_self_collision:
- self_collision = env._check_self_collision()
-
- if not self.allow_wall_collision:
- wall_collision = env.check_wall_collision()
-
- self._is_collided = self_collision or wall_collision
-
- if env._steps == 199 or self._is_collided:
- # return reward only in last time step
- # Episode also terminates when colliding, hence return reward
- dist = np.linalg.norm(env.end_effector - env._goal)
- dist_cost = dist ** 2
- collision_cost = int(self._is_collided)
-
- success = dist < 0.005 and not self._is_collided
-
- info = {"is_success": success,
- "is_collided": self._is_collided,
- "end_effector": np.copy(env.end_effector)}
-
- acc_cost = np.sum(env._acc ** 2)
-
- reward_features = np.array((dist_cost, acc_cost, collision_cost))
- reward = np.dot(reward_features, self.reward_factors)
-
- return reward, info
diff --git a/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py b/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py
deleted file mode 100644
index feb545f..0000000
--- a/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py
+++ /dev/null
@@ -1,43 +0,0 @@
-from typing import Tuple, Union
-
-import numpy as np
-
-from mp_env_api import MPEnvWrapper
-
-
-class MPWrapper(MPEnvWrapper):
- @property
- def active_obs(self):
- return np.hstack([
- [self.env.random_start] * self.env.n_links, # cos
- [self.env.random_start] * self.env.n_links, # sin
- [self.env.random_start] * self.env.n_links, # velocity
- [self.env.initial_width is None], # hole width
- # [self.env.hole_depth is None], # hole depth
- [True] * 2, # x-y coordinates of target distance
- [False] # env steps
- ])
-
- # @property
- # def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
- # return self._joint_angles.copy()
- #
- # @property
- # def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
- # return self._angle_velocity.copy()
-
- @property
- def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.env.current_pos
-
- @property
- def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.env.current_vel
-
- @property
- def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
- raise ValueError("Goal position is not available and has to be learnt based on the environment.")
-
- @property
- def dt(self) -> Union[float, int]:
- return self.env.dt
diff --git a/alr_envs/alr/classic_control/simple_reacher/__init__.py b/alr_envs/alr/classic_control/simple_reacher/__init__.py
deleted file mode 100644
index 989b5a9..0000000
--- a/alr_envs/alr/classic_control/simple_reacher/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .mp_wrapper import MPWrapper
\ No newline at end of file
diff --git a/alr_envs/alr/classic_control/simple_reacher/mp_wrapper.py b/alr_envs/alr/classic_control/simple_reacher/mp_wrapper.py
deleted file mode 100644
index 4b71e3a..0000000
--- a/alr_envs/alr/classic_control/simple_reacher/mp_wrapper.py
+++ /dev/null
@@ -1,33 +0,0 @@
-from typing import Tuple, Union
-
-import numpy as np
-
-from mp_env_api import MPEnvWrapper
-
-
-class MPWrapper(MPEnvWrapper):
- @property
- def active_obs(self):
- return np.hstack([
- [self.env.random_start] * self.env.n_links, # cos
- [self.env.random_start] * self.env.n_links, # sin
- [self.env.random_start] * self.env.n_links, # velocity
- [True] * 2, # x-y coordinates of target distance
- [False] # env steps
- ])
-
- @property
- def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.env.current_pos
-
- @property
- def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.env.current_vel
-
- @property
- def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
- raise ValueError("Goal position is not available and has to be learnt based on the environment.")
-
- @property
- def dt(self) -> Union[float, int]:
- return self.env.dt
diff --git a/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py b/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py
deleted file mode 100644
index dac06a3..0000000
--- a/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py
+++ /dev/null
@@ -1,139 +0,0 @@
-from typing import Iterable, Union
-
-import matplotlib.pyplot as plt
-import numpy as np
-from gym import spaces
-
-from alr_envs.alr.classic_control.base_reacher.base_reacher_torque import BaseReacherTorqueEnv
-
-
-class SimpleReacherEnv(BaseReacherTorqueEnv):
- """
- Simple Reaching Task without any physics simulation.
- Returns no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions
- towards the end of the trajectory.
- """
-
- def __init__(self, n_links: int, target: Union[None, Iterable] = None, random_start: bool = True,
- allow_self_collision: bool = False,):
- super().__init__(n_links, random_start, allow_self_collision)
-
- # provided initial parameters
- self.inital_target = target
-
- # temp container for current env state
- self._goal = None
-
- self._start_pos = np.zeros(self.n_links)
-
- self.steps_before_reward = 199
-
- state_bound = np.hstack([
- [np.pi] * self.n_links, # cos
- [np.pi] * self.n_links, # sin
- [np.inf] * self.n_links, # velocity
- [np.inf] * 2, # x-y coordinates of target distance
- [np.inf] # env steps, because reward start after n steps TODO: Maybe
- ])
- self.observation_space = spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape)
-
- # @property
- # def start_pos(self):
- # return self._start_pos
-
- def reset(self):
- self._generate_goal()
-
- return super().reset()
-
- def _get_reward(self, action: np.ndarray):
- diff = self.end_effector - self._goal
- reward_dist = 0
-
- if not self.allow_self_collision:
- self._is_collided = self._check_self_collision()
-
- if self._steps >= self.steps_before_reward:
- reward_dist -= np.linalg.norm(diff)
- # reward_dist = np.exp(-0.1 * diff ** 2).mean()
- # reward_dist = - (diff ** 2).mean()
-
- reward_ctrl = (action ** 2).sum()
- reward = reward_dist - reward_ctrl
- return reward, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl)
-
- def _terminate(self, info):
- return False
-
- def _get_obs(self):
- theta = self._joint_angles
- return np.hstack([
- np.cos(theta),
- np.sin(theta),
- self._angle_velocity,
- self.end_effector - self._goal,
- self._steps
- ]).astype(np.float32)
-
- def _generate_goal(self):
-
- if self.inital_target is None:
-
- total_length = np.sum(self.link_lengths)
- goal = np.array([total_length, total_length])
- while np.linalg.norm(goal) >= total_length:
- goal = self.np_random.uniform(low=-total_length, high=total_length, size=2)
- else:
- goal = np.copy(self.inital_target)
-
- self._goal = goal
-
- def _check_collisions(self) -> bool:
- return self._check_self_collision()
-
- def render(self, mode='human'): # pragma: no cover
- if self.fig is None:
- # Create base figure once on the beginning. Afterwards only update
- plt.ion()
- self.fig = plt.figure()
- ax = self.fig.add_subplot(1, 1, 1)
-
- # limits
- lim = np.sum(self.link_lengths) + 0.5
- ax.set_xlim([-lim, lim])
- ax.set_ylim([-lim, lim])
-
- self.line, = ax.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k')
- goal_pos = self._goal.T
- self.goal_point, = ax.plot(goal_pos[0], goal_pos[1], 'gx')
- self.goal_dist, = ax.plot([self.end_effector[0], goal_pos[0]], [self.end_effector[1], goal_pos[1]], 'g--')
-
- self.fig.show()
-
- self.fig.gca().set_title(f"Iteration: {self._steps}, distance: {self.end_effector - self._goal}")
-
- # goal
- goal_pos = self._goal.T
- if self._steps == 1:
- self.goal_point.set_data(goal_pos[0], goal_pos[1])
-
- # arm
- self.line.set_data(self._joints[:, 0], self._joints[:, 1])
-
- # distance between end effector and goal
- self.goal_dist.set_data([self.end_effector[0], goal_pos[0]], [self.end_effector[1], goal_pos[1]])
-
- self.fig.canvas.draw()
- self.fig.canvas.flush_events()
-
-
-if __name__ == "__main__":
- env = SimpleReacherEnv(5)
- env.reset()
- for i in range(200):
- ac = env.action_space.sample()
- obs, rew, done, info = env.step(ac)
-
- env.render()
- if done:
- break
diff --git a/alr_envs/alr/classic_control/utils.py b/alr_envs/alr/classic_control/utils.py
deleted file mode 100644
index b557a0a..0000000
--- a/alr_envs/alr/classic_control/utils.py
+++ /dev/null
@@ -1,21 +0,0 @@
-import numpy as np
-
-
-def ccw(A, B, C):
- return (C[1] - A[1]) * (B[0] - A[0]) - (B[1] - A[1]) * (C[0] - A[0]) > 1e-12
-
-
-def intersect(A, B, C, D):
- """
- Checks whether line segments AB and CD intersect
- """
- return ccw(A, C, D) != ccw(B, C, D) and ccw(A, B, C) != ccw(A, B, D)
-
-
-def check_self_collision(line_points):
- """Checks whether line segments intersect"""
- for i, line1 in enumerate(line_points):
- for line2 in line_points[i + 2:, :, :]:
- if intersect(line1[0], line1[-1], line2[0], line2[-1]):
- return True
- return False
diff --git a/alr_envs/alr/classic_control/viapoint_reacher/__init__.py b/alr_envs/alr/classic_control/viapoint_reacher/__init__.py
deleted file mode 100644
index 989b5a9..0000000
--- a/alr_envs/alr/classic_control/viapoint_reacher/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .mp_wrapper import MPWrapper
\ No newline at end of file
diff --git a/alr_envs/alr/classic_control/viapoint_reacher/mp_wrapper.py b/alr_envs/alr/classic_control/viapoint_reacher/mp_wrapper.py
deleted file mode 100644
index 6b3e85d..0000000
--- a/alr_envs/alr/classic_control/viapoint_reacher/mp_wrapper.py
+++ /dev/null
@@ -1,34 +0,0 @@
-from typing import Tuple, Union
-
-import numpy as np
-
-from mp_env_api import MPEnvWrapper
-
-
-class MPWrapper(MPEnvWrapper):
- @property
- def active_obs(self):
- return np.hstack([
- [self.env.random_start] * self.env.n_links, # cos
- [self.env.random_start] * self.env.n_links, # sin
- [self.env.random_start] * self.env.n_links, # velocity
- [self.env.initial_via_target is None] * 2, # x-y coordinates of via point distance
- [True] * 2, # x-y coordinates of target distance
- [False] # env steps
- ])
-
- @property
- def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.env.current_pos
-
- @property
- def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.env.current_vel
-
- @property
- def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
- raise ValueError("Goal position is not available and has to be learnt based on the environment.")
-
- @property
- def dt(self) -> Union[float, int]:
- return self.env.dt
diff --git a/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py b/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py
deleted file mode 100644
index 292e40a..0000000
--- a/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py
+++ /dev/null
@@ -1,196 +0,0 @@
-from typing import Iterable, Union
-
-import gym
-import matplotlib.pyplot as plt
-import numpy as np
-from gym.utils import seeding
-
-from alr_envs.alr.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv
-
-
-class ViaPointReacherEnv(BaseReacherDirectEnv):
-
- def __init__(self, n_links, random_start: bool = False, via_target: Union[None, Iterable] = None,
- target: Union[None, Iterable] = None, allow_self_collision=False, collision_penalty=1000):
-
- super().__init__(n_links, random_start, allow_self_collision)
-
- # provided initial parameters
- self.intitial_target = target # provided target value
- self.initial_via_target = via_target # provided via point target value
-
- # temp container for current env state
- self._via_point = np.ones(2)
- self._goal = np.array((n_links, 0))
-
- # collision
- self.collision_penalty = collision_penalty
-
- state_bound = np.hstack([
- [np.pi] * self.n_links, # cos
- [np.pi] * self.n_links, # sin
- [np.inf] * self.n_links, # velocity
- [np.inf] * 2, # x-y coordinates of via point distance
- [np.inf] * 2, # x-y coordinates of target distance
- [np.inf] # env steps, because reward start after n steps
- ])
- self.observation_space = gym.spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape)
-
- # @property
- # def start_pos(self):
- # return self._start_pos
-
- def reset(self):
- self._generate_goal()
- return super().reset()
-
- def _generate_goal(self):
- # TODO: Maybe improve this later, this can yield quite a lot of invalid settings
-
- total_length = np.sum(self.link_lengths)
-
- # rejection sampled point in inner circle with 0.5*Radius
- if self.initial_via_target is None:
- via_target = np.array([total_length, total_length])
- while np.linalg.norm(via_target) >= 0.5 * total_length:
- via_target = self.np_random.uniform(low=-0.5 * total_length, high=0.5 * total_length, size=2)
- else:
- via_target = np.copy(self.initial_via_target)
-
- # rejection sampled point in outer circle
- if self.intitial_target is None:
- goal = np.array([total_length, total_length])
- while np.linalg.norm(goal) >= total_length or np.linalg.norm(goal) <= 0.5 * total_length:
- goal = self.np_random.uniform(low=-total_length, high=total_length, size=2)
- else:
- goal = np.copy(self.intitial_target)
-
- self._via_point = via_target
- self._goal = goal
-
- def _get_reward(self, acc):
- success = False
- reward = -np.inf
-
- if not self.allow_self_collision:
- self._is_collided = self._check_self_collision()
-
- if not self._is_collided:
- dist = np.inf
- # return intermediate reward for via point
- if self._steps == 100:
- dist = np.linalg.norm(self.end_effector - self._via_point)
- # return reward in last time step for goal
- elif self._steps == 199:
- dist = np.linalg.norm(self.end_effector - self._goal)
-
- success = dist < 0.005
- else:
- # Episode terminates when colliding, hence return reward
- dist = np.linalg.norm(self.end_effector - self._goal)
- reward = -self.collision_penalty
-
- reward -= dist ** 2
- reward -= 5e-8 * np.sum(acc ** 2)
- info = {"is_success": success,
- "is_collided": self._is_collided,
- "end_effector": np.copy(self.end_effector)}
-
- return reward, info
-
- def _terminate(self, info):
- return info["is_collided"]
-
- def _get_obs(self):
- theta = self._joint_angles
- return np.hstack([
- np.cos(theta),
- np.sin(theta),
- self._angle_velocity,
- self.end_effector - self._via_point,
- self.end_effector - self._goal,
- self._steps
- ]).astype(np.float32)
-
- def _check_collisions(self) -> bool:
- return self._check_self_collision()
-
- def render(self, mode='human'):
- goal_pos = self._goal.T
- via_pos = self._via_point.T
-
- if self.fig is None:
- # Create base figure once on the beginning. Afterwards only update
- plt.ion()
- self.fig = plt.figure()
- ax = self.fig.add_subplot(1, 1, 1)
-
- # limits
- lim = np.sum(self.link_lengths) + 0.5
- ax.set_xlim([-lim, lim])
- ax.set_ylim([-lim, lim])
-
- self.line, = ax.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k')
- self.goal_point_plot, = ax.plot(goal_pos[0], goal_pos[1], 'go')
- self.via_point_plot, = ax.plot(via_pos[0], via_pos[1], 'gx')
-
- self.fig.show()
-
- self.fig.gca().set_title(f"Iteration: {self._steps}, distance: {self.end_effector - self._goal}")
-
- if mode == "human":
- # goal
- if self._steps == 1:
- self.goal_point_plot.set_data(goal_pos[0], goal_pos[1])
- self.via_point_plot.set_data(via_pos[0], goal_pos[1])
-
- # arm
- self.line.set_data(self._joints[:, 0], self._joints[:, 1])
-
- self.fig.canvas.draw()
- self.fig.canvas.flush_events()
-
- elif mode == "partial":
- if self._steps == 1:
- # fig, ax = plt.subplots()
- # Add the patch to the Axes
- [plt.gca().add_patch(rect) for rect in self.patches]
- # plt.pause(0.01)
-
- if self._steps % 20 == 0 or self._steps in [1, 199] or self._is_collided:
- # Arm
- plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k', alpha=self._steps / 200)
- # ax.plot(line_points_in_taskspace[:, 0, 0],
- # line_points_in_taskspace[:, 0, 1],
- # line_points_in_taskspace[:, -1, 0],
- # line_points_in_taskspace[:, -1, 1], marker='o', color='k', alpha=t / 200)
-
- lim = np.sum(self.link_lengths) + 0.5
- plt.xlim([-lim, lim])
- plt.ylim([-1.1, lim])
- plt.pause(0.01)
-
- elif mode == "final":
- if self._steps == 199 or self._is_collided:
- # fig, ax = plt.subplots()
-
- # Add the patch to the Axes
- [plt.gca().add_patch(rect) for rect in self.patches]
-
- plt.xlim(-self.n_links, self.n_links), plt.ylim(-1, self.n_links)
- # Arm
- plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k')
-
- plt.pause(0.01)
-
-if __name__ == "__main__":
- import time
- env = ViaPointReacherEnv(5)
- env.reset()
-
- for i in range(10000):
- ac = env.action_space.sample()
- obs, rew, done, info = env.step(ac)
- env.render()
- if done:
- env.reset()
diff --git a/alr_envs/alr/mujoco/README.MD b/alr_envs/alr/mujoco/README.MD
deleted file mode 100644
index 0ea5a1f..0000000
--- a/alr_envs/alr/mujoco/README.MD
+++ /dev/null
@@ -1,15 +0,0 @@
-# Custom Mujoco tasks
-
-## Step-based Environments
-|Name| Description|Horizon|Action Dimension|Observation Dimension
-|---|---|---|---|---|
-|`ALRReacher-v0`|Modified (5 links) Mujoco gym's `Reacher-v2` (2 links)| 200 | 5 | 21
-|`ALRReacherSparse-v0`|Same as `ALRReacher-v0`, but the distance penalty is only provided in the last time step.| 200 | 5 | 21
-|`ALRReacherSparseBalanced-v0`|Same as `ALRReacherSparse-v0`, but the end-effector has to remain upright.| 200 | 5 | 21
-|`ALRLongReacher-v0`|Modified (7 links) Mujoco gym's `Reacher-v2` (2 links)| 200 | 7 | 27
-|`ALRLongReacherSparse-v0`|Same as `ALRLongReacher-v0`, but the distance penalty is only provided in the last time step.| 200 | 7 | 27
-|`ALRLongReacherSparseBalanced-v0`|Same as `ALRLongReacherSparse-v0`, but the end-effector has to remain upright.| 200 | 7 | 27
-|`ALRBallInACupSimple-v0`| Ball-in-a-cup task where a robot needs to catch a ball attached to a cup at its end-effector. | 4000 | 3 | wip
-|`ALRBallInACup-v0`| Ball-in-a-cup task where a robot needs to catch a ball attached to a cup at its end-effector | 4000 | 7 | wip
-|`ALRBallInACupGoal-v0`| Similar to `ALRBallInACupSimple-v0` but the ball needs to be caught at a specified goal position | 4000 | 7 | wip
-
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/__init__.py b/alr_envs/alr/mujoco/__init__.py
deleted file mode 100644
index cdb3cde..0000000
--- a/alr_envs/alr/mujoco/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-from .reacher.alr_reacher import ALRReacherEnv
-from .reacher.balancing import BalancingEnv
-from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
-from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
-from .table_tennis.tt_gym import TTEnvGym
-from .beerpong.beerpong import ALRBeerBongEnv
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/alr_reward_fct.py b/alr_envs/alr/mujoco/alr_reward_fct.py
deleted file mode 100644
index c96dd07..0000000
--- a/alr_envs/alr/mujoco/alr_reward_fct.py
+++ /dev/null
@@ -1,21 +0,0 @@
-class AlrReward:
- """
- A base class for non-Markovian reward functions which may need trajectory information to calculate an episodic
- reward. Call the methods in reset() and step() of the environment.
- """
-
- # methods to override:
- # ----------------------------
- def reset(self, *args, **kwargs):
- """
- Reset the reward function, empty state buffers before an episode, set contexts that influence reward, etc.
- """
- raise NotImplementedError
-
- def compute_reward(self, *args, **kwargs):
- """
-
- Returns: Useful things to return are reward values, success flags or crash flags
-
- """
- raise NotImplementedError
diff --git a/alr_envs/alr/mujoco/ball_in_a_cup/__init__.py b/alr_envs/alr/mujoco/ball_in_a_cup/__init__.py
deleted file mode 100644
index 8b13789..0000000
--- a/alr_envs/alr/mujoco/ball_in_a_cup/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/alr_envs/alr/mujoco/ball_in_a_cup/assets/biac_base.xml b/alr_envs/alr/mujoco/ball_in_a_cup/assets/biac_base.xml
deleted file mode 100644
index 9229ad5..0000000
--- a/alr_envs/alr/mujoco/ball_in_a_cup/assets/biac_base.xml
+++ /dev/null
@@ -1,361 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup.py b/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup.py
deleted file mode 100644
index 7cff670..0000000
--- a/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup.py
+++ /dev/null
@@ -1,196 +0,0 @@
-from gym import utils
-import os
-import numpy as np
-from gym.envs.mujoco import MujocoEnv
-
-
-
-class ALRBallInACupEnv(MujocoEnv, utils.EzPickle):
- def __init__(self, n_substeps=4, apply_gravity_comp=True, simplified: bool = False,
- reward_type: str = None, context: np.ndarray = None):
- utils.EzPickle.__init__(**locals())
- self._steps = 0
-
- self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "biac_base.xml")
-
- self._q_pos = []
- self._q_vel = []
- # self.weight_matrix_scale = 50
- self.max_ctrl = np.array([150., 125., 40., 60., 5., 5., 2.])
-
- self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7])
- self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
-
- self.context = context
-
- alr_mujoco_env.AlrMujocoEnv.__init__(self,
- self.xml_path,
- apply_gravity_comp=apply_gravity_comp,
- n_substeps=n_substeps)
- self._start_pos = np.array([0.0, 0.58760536, 0.0, 1.36004913, 0.0, -0.32072943, -1.57])
- self._start_vel = np.zeros(7)
-
- self.simplified = simplified
-
- self.sim_time = 8 # seconds
- self.sim_steps = int(self.sim_time / self.dt)
- if reward_type == "no_context":
- from alr_envs.alr.mujoco.ball_in_a_cup.ball_in_a_cup_reward_simple import BallInACupReward
- reward_function = BallInACupReward
- elif reward_type == "contextual_goal":
- from alr_envs.alr.mujoco.ball_in_a_cup.ball_in_a_cup_reward import BallInACupReward
- reward_function = BallInACupReward
- else:
- raise ValueError("Unknown reward type: {}".format(reward_type))
- self.reward_function = reward_function(self.sim_steps)
-
- @property
- def start_pos(self):
- if self.simplified:
- return self._start_pos[1::2]
- else:
- return self._start_pos
-
- @property
- def start_vel(self):
- if self.simplified:
- return self._start_vel[1::2]
- else:
- return self._start_vel
-
- @property
- def current_pos(self):
- return self.sim.data.qpos[0:7].copy()
-
- @property
- def current_vel(self):
- return self.sim.data.qvel[0:7].copy()
-
- def reset(self):
- self.reward_function.reset(None)
- return super().reset()
-
- def reset_model(self):
- init_pos_all = self.init_qpos.copy()
- init_pos_robot = self._start_pos
- init_vel = np.zeros_like(init_pos_all)
-
- self._steps = 0
- self._q_pos = []
- self._q_vel = []
-
- start_pos = init_pos_all
- start_pos[0:7] = init_pos_robot
-
- self.set_state(start_pos, init_vel)
-
- return self._get_obs()
-
- def step(self, a):
- reward_dist = 0.0
- angular_vel = 0.0
- reward_ctrl = - np.square(a).sum()
-
- crash = self.do_simulation(a)
- # joint_cons_viol = self.check_traj_in_joint_limits()
-
- self._q_pos.append(self.sim.data.qpos[0:7].ravel().copy())
- self._q_vel.append(self.sim.data.qvel[0:7].ravel().copy())
-
- ob = self._get_obs()
-
- if not crash:
- reward, success, is_collided = self.reward_function.compute_reward(a, self)
- done = success or self._steps == self.sim_steps - 1 or is_collided
- self._steps += 1
- else:
- reward = -2000
- success = False
- is_collided = False
- done = True
- return ob, reward, done, dict(reward_dist=reward_dist,
- reward_ctrl=reward_ctrl,
- velocity=angular_vel,
- # traj=self._q_pos,
- action=a,
- q_pos=self.sim.data.qpos[0:7].ravel().copy(),
- q_vel=self.sim.data.qvel[0:7].ravel().copy(),
- is_success=success,
- is_collided=is_collided, sim_crash=crash)
-
- def check_traj_in_joint_limits(self):
- return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)
-
- # TODO: extend observation space
- def _get_obs(self):
- theta = self.sim.data.qpos.flat[:7]
- return np.concatenate([
- np.cos(theta),
- np.sin(theta),
- # self.get_body_com("target"), # only return target to make problem harder
- [self._steps],
- ])
-
- # TODO
- @property
- def active_obs(self):
- return np.hstack([
- [False] * 7, # cos
- [False] * 7, # sin
- # [True] * 2, # x-y coordinates of target distance
- [False] # env steps
- ])
-
- # These functions are for the task with 3 joint actuations
- def extend_des_pos(self, des_pos):
- des_pos_full = self._start_pos.copy()
- des_pos_full[1] = des_pos[0]
- des_pos_full[3] = des_pos[1]
- des_pos_full[5] = des_pos[2]
- return des_pos_full
-
- def extend_des_vel(self, des_vel):
- des_vel_full = self._start_vel.copy()
- des_vel_full[1] = des_vel[0]
- des_vel_full[3] = des_vel[1]
- des_vel_full[5] = des_vel[2]
- return des_vel_full
-
- def render(self, render_mode, **render_kwargs):
- if render_mode == "plot_trajectory":
- if self._steps == 1:
- import matplotlib.pyplot as plt
- # plt.ion()
- self.fig, self.axs = plt.subplots(3, 1)
-
- if self._steps <= 1750:
- for ax, cp in zip(self.axs, self.current_pos[1::2]):
- ax.scatter(self._steps, cp, s=2, marker=".")
-
- # self.fig.show()
-
- else:
- super().render(render_mode, **render_kwargs)
-
-
-if __name__ == "__main__":
- env = ALRBallInACupEnv()
- ctxt = np.array([-0.20869846, -0.66376693, 1.18088501])
-
- env.configure(ctxt)
- env.reset()
- # env.render()
- for i in range(16000):
- # test with random actions
- ac = 0.001 * env.action_space.sample()[0:7]
- # ac = env.start_pos
- # ac[0] += np.pi/2
- obs, rew, d, info = env.step(ac)
- # env.render()
-
- print(rew)
-
- if d:
- break
-
- env.close()
diff --git a/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_mp_wrapper.py b/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_mp_wrapper.py
deleted file mode 100644
index 945fa8d..0000000
--- a/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_mp_wrapper.py
+++ /dev/null
@@ -1,42 +0,0 @@
-from typing import Tuple, Union
-
-import numpy as np
-
-from mp_env_api import MPEnvWrapper
-
-
-class BallInACupMPWrapper(MPEnvWrapper):
-
- @property
- def active_obs(self):
- # TODO: @Max Filter observations correctly
- return np.hstack([
- [False] * 7, # cos
- [False] * 7, # sin
- # [True] * 2, # x-y coordinates of target distance
- [False] # env steps
- ])
-
- @property
- def start_pos(self):
- if self.simplified:
- return self._start_pos[1::2]
- else:
- return self._start_pos
-
- @property
- def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.sim.data.qpos[0:7].copy()
-
- @property
- def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.sim.data.qvel[0:7].copy()
-
- @property
- def goal_pos(self):
- # TODO: @Max I think the default value of returning to the start is reasonable here
- raise ValueError("Goal position is not available and has to be learnt based on the environment.")
-
- @property
- def dt(self) -> Union[float, int]:
- return self.env.dt
diff --git a/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py b/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py
deleted file mode 100644
index 4ea4381..0000000
--- a/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py
+++ /dev/null
@@ -1,142 +0,0 @@
-import numpy as np
-from alr_envs.alr.mujoco import alr_reward_fct
-
-
-class BallInACupReward(alr_reward_fct.AlrReward):
- def __init__(self, sim_time):
- self.sim_time = sim_time
-
- self.collision_objects = ["cup_geom1", "cup_geom2", "wrist_palm_link_convex_geom",
- "wrist_pitch_link_convex_decomposition_p1_geom",
- "wrist_pitch_link_convex_decomposition_p2_geom",
- "wrist_pitch_link_convex_decomposition_p3_geom",
- "wrist_yaw_link_convex_decomposition_p1_geom",
- "wrist_yaw_link_convex_decomposition_p2_geom",
- "forearm_link_convex_decomposition_p1_geom",
- "forearm_link_convex_decomposition_p2_geom"]
-
- self.ball_id = None
- self.ball_collision_id = None
- self.goal_id = None
- self.goal_final_id = None
- self.collision_ids = None
-
- self.ball_traj = None
- self.dists = None
- self.dists_ctxt = None
- self.dists_final = None
- self.costs = None
-
- self.reset(None)
-
- def reset(self, context):
- self.ball_traj = np.zeros(shape=(self.sim_time, 3))
- self.cup_traj = np.zeros(shape=(self.sim_time, 3))
- self.dists = []
- self.dists_ctxt = []
- self.dists_final = []
- self.costs = []
- self.context = context
- self.ball_in_cup = False
- self.ball_above_threshold = False
- self.dist_ctxt = 3
- self.action_costs = []
- self.cup_angles = []
-
- def compute_reward(self, action, sim, step):
- action_cost = np.sum(np.square(action))
- self.action_costs.append(action_cost)
-
- stop_sim = False
- success = False
-
- self.ball_id = sim.model._body_name2id["ball"]
- self.ball_collision_id = sim.model._geom_name2id["ball_geom"]
- self.goal_id = sim.model._site_name2id["cup_goal"]
- self.goal_final_id = sim.model._site_name2id["cup_goal_final"]
- self.collision_ids = [sim.model._geom_name2id[name] for name in self.collision_objects]
-
- if self.check_collision(sim):
- reward = - 1e-3 * action_cost - 1000
- stop_sim = True
- return reward, success, stop_sim
-
- # Compute the current distance from the ball to the inner part of the cup
- goal_pos = sim.data.site_xpos[self.goal_id]
- ball_pos = sim.data.body_xpos[self.ball_id]
- goal_final_pos = sim.data.site_xpos[self.goal_final_id]
- self.dists.append(np.linalg.norm(goal_pos - ball_pos))
- self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
- self.dists_ctxt.append(np.linalg.norm(ball_pos - self.context))
- self.ball_traj[step, :] = np.copy(ball_pos)
- self.cup_traj[step, :] = np.copy(goal_pos) # ?
- cup_quat = np.copy(sim.data.body_xquat[sim.model._body_name2id["cup"]])
- self.cup_angles.append(np.arctan2(2 * (cup_quat[0] * cup_quat[1] + cup_quat[2] * cup_quat[3]),
- 1 - 2 * (cup_quat[1] ** 2 + cup_quat[2] ** 2)))
-
- # Determine the first time when ball is in cup
- if not self.ball_in_cup:
- ball_in_cup = self.check_ball_in_cup(sim, self.ball_collision_id)
- self.ball_in_cup = ball_in_cup
- if ball_in_cup:
- dist_to_ctxt = np.linalg.norm(ball_pos - self.context)
- self.dist_ctxt = dist_to_ctxt
-
- if step == self.sim_time - 1:
- t_min_dist = np.argmin(self.dists)
- angle_min_dist = self.cup_angles[t_min_dist]
- cost_angle = (angle_min_dist - np.pi / 2) ** 2
-
- min_dist = np.min(self.dists)
- dist_final = self.dists_final[-1]
- # dist_ctxt = self.dists_ctxt[-1]
-
- # # max distance between ball and cup and cup height at that time
- # ball_to_cup_diff = self.ball_traj[:, 2] - self.cup_traj[:, 2]
- # t_max_diff = np.argmax(ball_to_cup_diff)
- # t_max_ball_height = np.argmax(self.ball_traj[:, 2])
- # max_ball_height = np.max(self.ball_traj[:, 2])
-
- # cost = self._get_stage_wise_cost(ball_in_cup, min_dist, dist_final, dist_ctxt)
- cost = 0.5 * min_dist + 0.5 * dist_final + 0.3 * np.minimum(self.dist_ctxt, 3) + 0.01 * cost_angle
- reward = np.exp(-2 * cost) - 1e-3 * action_cost
- # if max_ball_height < self.context[2] or ball_to_cup_diff[t_max_ball_height] < 0:
- # reward -= 1
-
- success = dist_final < 0.05 and self.dist_ctxt < 0.05
- else:
- reward = - 1e-3 * action_cost
- success = False
-
- return reward, success, stop_sim
-
- def _get_stage_wise_cost(self, ball_in_cup, min_dist, dist_final, dist_to_ctxt):
- if not ball_in_cup:
- cost = 3 + 2*(0.5 * min_dist**2 + 0.5 * dist_final**2)
- else:
- cost = 2 * dist_to_ctxt ** 2
- print('Context Distance:', dist_to_ctxt)
- return cost
-
- def check_ball_in_cup(self, sim, ball_collision_id):
- cup_base_collision_id = sim.model._geom_name2id["cup_base_contact"]
- for coni in range(0, sim.data.ncon):
- con = sim.data.contact[coni]
-
- collision = con.geom1 == cup_base_collision_id and con.geom2 == ball_collision_id
- collision_trans = con.geom1 == ball_collision_id and con.geom2 == cup_base_collision_id
-
- if collision or collision_trans:
- return True
- return False
-
- def check_collision(self, sim):
- for coni in range(0, sim.data.ncon):
- con = sim.data.contact[coni]
-
- collision = con.geom1 in self.collision_ids and con.geom2 == self.ball_collision_id
- collision_trans = con.geom1 == self.ball_collision_id and con.geom2 in self.collision_ids
-
- if collision or collision_trans:
- return True
- return False
diff --git a/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py b/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py
deleted file mode 100644
index a147d89..0000000
--- a/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py
+++ /dev/null
@@ -1,116 +0,0 @@
-import numpy as np
-from alr_envs.alr.mujoco import alr_reward_fct
-
-
-class BallInACupReward(alr_reward_fct.AlrReward):
- def __init__(self, env):
- self.env = env
- self.collision_objects = ["cup_geom1", "cup_geom2", "cup_base_contact_below",
- "wrist_palm_link_convex_geom",
- "wrist_pitch_link_convex_decomposition_p1_geom",
- "wrist_pitch_link_convex_decomposition_p2_geom",
- "wrist_pitch_link_convex_decomposition_p3_geom",
- "wrist_yaw_link_convex_decomposition_p1_geom",
- "wrist_yaw_link_convex_decomposition_p2_geom",
- "forearm_link_convex_decomposition_p1_geom",
- "forearm_link_convex_decomposition_p2_geom"]
-
- self.ball_id = None
- self.ball_collision_id = None
- self.goal_id = None
- self.goal_final_id = None
- self.collision_ids = None
- self._is_collided = False
- self.collision_penalty = 1000
-
- self.ball_traj = None
- self.dists = None
- self.dists_final = None
- self.costs = None
-
- self.reset(None)
-
- def reset(self, context):
- # self.sim_time = self.env.sim.dtsim_time
- self.ball_traj = [] # np.zeros(shape=(self.sim_time, 3))
- self.dists = []
- self.dists_final = []
- self.costs = []
- self.action_costs = []
- self.angle_costs = []
- self.cup_angles = []
-
- def compute_reward(self, action):
- self.ball_id = self.env.sim.model._body_name2id["ball"]
- self.ball_collision_id = self.env.sim.model._geom_name2id["ball_geom"]
- self.goal_id = self.env.sim.model._site_name2id["cup_goal"]
- self.goal_final_id = self.env.sim.model._site_name2id["cup_goal_final"]
- self.collision_ids = [self.env.sim.model._geom_name2id[name] for name in self.collision_objects]
-
- ball_in_cup = self.check_ball_in_cup(self.env.sim, self.ball_collision_id)
-
- # Compute the current distance from the ball to the inner part of the cup
- goal_pos = self.env.sim.data.site_xpos[self.goal_id]
- ball_pos = self.env.sim.data.body_xpos[self.ball_id]
- goal_final_pos = self.env.sim.data.site_xpos[self.goal_final_id]
- self.dists.append(np.linalg.norm(goal_pos - ball_pos))
- self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
- # self.ball_traj[self.env._steps, :] = ball_pos
- self.ball_traj.append(ball_pos)
- cup_quat = np.copy(self.env.sim.data.body_xquat[self.env.sim.model._body_name2id["cup"]])
- cup_angle = np.arctan2(2 * (cup_quat[0] * cup_quat[1] + cup_quat[2] * cup_quat[3]),
- 1 - 2 * (cup_quat[1]**2 + cup_quat[2]**2))
- cost_angle = (cup_angle - np.pi / 2) ** 2
- self.angle_costs.append(cost_angle)
- self.cup_angles.append(cup_angle)
-
- action_cost = np.sum(np.square(action))
- self.action_costs.append(action_cost)
-
- self._is_collided = self.check_collision(self.env.sim) or self.env.check_traj_in_joint_limits()
-
- if self.env._steps == self.env.ep_length - 1 or self._is_collided:
- t_min_dist = np.argmin(self.dists)
- angle_min_dist = self.cup_angles[t_min_dist]
- # cost_angle = (angle_min_dist - np.pi / 2)**2
-
-
- # min_dist = self.dists[t_min_dist]
- dist_final = self.dists_final[-1]
- min_dist_final = np.min(self.dists_final)
-
- # cost = 0.5 * dist_final + 0.05 * cost_angle # TODO: Increase cost_angle weight # 0.5 * min_dist +
- # reward = np.exp(-2 * cost) - 1e-2 * action_cost - self.collision_penalty * int(self._is_collided)
- # reward = - dist_final**2 - 1e-4 * cost_angle - 1e-5 * action_cost - self.collision_penalty * int(self._is_collided)
- reward = - dist_final**2 - min_dist_final**2 - 1e-4 * cost_angle - 1e-3 * action_cost - self.collision_penalty * int(self._is_collided)
- success = dist_final < 0.05 and ball_in_cup and not self._is_collided
- crash = self._is_collided
- else:
- reward = - 1e-3 * action_cost - 1e-4 * cost_angle # TODO: increase action_cost weight
- success = False
- crash = False
-
- return reward, success, crash
-
- def check_ball_in_cup(self, sim, ball_collision_id):
- cup_base_collision_id = sim.model._geom_name2id["cup_base_contact"]
- for coni in range(0, sim.data.ncon):
- con = sim.data.contact[coni]
-
- collision = con.geom1 == cup_base_collision_id and con.geom2 == ball_collision_id
- collision_trans = con.geom1 == ball_collision_id and con.geom2 == cup_base_collision_id
-
- if collision or collision_trans:
- return True
- return False
-
- def check_collision(self, sim):
- for coni in range(0, sim.data.ncon):
- con = sim.data.contact[coni]
-
- collision = con.geom1 in self.collision_ids and con.geom2 == self.ball_collision_id
- collision_trans = con.geom1 == self.ball_collision_id and con.geom2 in self.collision_ids
-
- if collision or collision_trans:
- return True
- return False
diff --git a/alr_envs/alr/mujoco/ball_in_a_cup/biac_pd.py b/alr_envs/alr/mujoco/ball_in_a_cup/biac_pd.py
deleted file mode 100644
index b047c54..0000000
--- a/alr_envs/alr/mujoco/ball_in_a_cup/biac_pd.py
+++ /dev/null
@@ -1,205 +0,0 @@
-import os
-
-import gym.envs.mujoco
-import gym.envs.mujoco as mujoco_env
-import mujoco_py.builder
-import numpy as np
-from gym import utils
-
-from mp_env_api.mp_wrappers.detpmp_wrapper import DetPMPWrapper
-from mp_env_api.utils.policies import PDControllerExtend
-
-
-def make_detpmp_env(**kwargs):
- name = kwargs.pop("name")
- _env = gym.make(name)
- policy = PDControllerExtend(_env, p_gains=kwargs.pop('p_gains'), d_gains=kwargs.pop('d_gains'))
- kwargs['policy_type'] = policy
- return DetPMPWrapper(_env, **kwargs)
-
-
-class ALRBallInACupPDEnv(mujoco_env.MujocoEnv, utils.EzPickle):
- def __init__(self, frame_skip=4, apply_gravity_comp=True, simplified: bool = False,
- reward_type: str = None, context: np.ndarray = None):
- utils.EzPickle.__init__(**locals())
- self._steps = 0
-
- self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "biac_base.xml")
-
- self.max_ctrl = np.array([150., 125., 40., 60., 5., 5., 2.])
-
- self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7])
- self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
-
- self.context = context
- self.apply_gravity_comp = apply_gravity_comp
- self.simplified = simplified
-
- self._start_pos = np.array([0.0, 0.58760536, 0.0, 1.36004913, 0.0, -0.32072943, -1.57])
- self._start_vel = np.zeros(7)
-
- self.sim_time = 8 # seconds
- self._dt = 0.02
- self.ep_length = 4000 # based on 8 seconds with dt = 0.02 int(self.sim_time / self.dt)
- if reward_type == "no_context":
- from alr_envs.alr.mujoco.ball_in_a_cup.ball_in_a_cup_reward_simple import BallInACupReward
- reward_function = BallInACupReward
- elif reward_type == "contextual_goal":
- from alr_envs.alr.mujoco.ball_in_a_cup.ball_in_a_cup_reward import BallInACupReward
- reward_function = BallInACupReward
- else:
- raise ValueError("Unknown reward type: {}".format(reward_type))
- self.reward_function = reward_function(self)
-
- mujoco_env.MujocoEnv.__init__(self, self.xml_path, frame_skip)
-
- @property
- def dt(self):
- return self._dt
-
- # TODO: @Max is this even needed?
- @property
- def start_vel(self):
- if self.simplified:
- return self._start_vel[1::2]
- else:
- return self._start_vel
-
- # def _set_action_space(self):
- # if self.simplified:
- # bounds = self.model.actuator_ctrlrange.copy().astype(np.float32)[1::2]
- # else:
- # bounds = self.model.actuator_ctrlrange.copy().astype(np.float32)
- # low, high = bounds.T
- # self.action_space = spaces.Box(low=low, high=high, dtype=np.float32)
- # return self.action_space
-
- def reset(self):
- self.reward_function.reset(None)
- return super().reset()
-
- def reset_model(self):
- init_pos_all = self.init_qpos.copy()
- init_pos_robot = self._start_pos
- init_vel = np.zeros_like(init_pos_all)
-
- self._steps = 0
- self._q_pos = []
- self._q_vel = []
-
- start_pos = init_pos_all
- start_pos[0:7] = init_pos_robot
-
- self.set_state(start_pos, init_vel)
-
- return self._get_obs()
-
- def step(self, a):
- reward_dist = 0.0
- angular_vel = 0.0
- reward_ctrl = - np.square(a).sum()
-
- # if self.simplified:
- # tmp = np.zeros(7)
- # tmp[1::2] = a
- # a = tmp
-
- if self.apply_gravity_comp:
- a += self.sim.data.qfrc_bias[:len(a)].copy() / self.model.actuator_gear[:, 0]
-
- crash = False
- try:
- self.do_simulation(a, self.frame_skip)
- except mujoco_py.builder.MujocoException:
- crash = True
- # joint_cons_viol = self.check_traj_in_joint_limits()
-
- ob = self._get_obs()
-
- if not crash:
- reward, success, is_collided = self.reward_function.compute_reward(a)
- done = success or is_collided # self._steps == self.sim_steps - 1
- self._steps += 1
- else:
- reward = -2000
- success = False
- is_collided = False
- done = True
-
- return ob, reward, done, dict(reward_dist=reward_dist,
- reward_ctrl=reward_ctrl,
- velocity=angular_vel,
- # traj=self._q_pos,
- action=a,
- q_pos=self.sim.data.qpos[0:7].ravel().copy(),
- q_vel=self.sim.data.qvel[0:7].ravel().copy(),
- is_success=success,
- is_collided=is_collided, sim_crash=crash)
-
- def check_traj_in_joint_limits(self):
- return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)
-
- # TODO: extend observation space
- def _get_obs(self):
- theta = self.sim.data.qpos.flat[:7]
- return np.concatenate([
- np.cos(theta),
- np.sin(theta),
- # self.get_body_com("target"), # only return target to make problem harder
- [self._steps],
- ])
-
- # These functions are for the task with 3 joint actuations
- def extend_des_pos(self, des_pos):
- des_pos_full = self._start_pos.copy()
- des_pos_full[1] = des_pos[0]
- des_pos_full[3] = des_pos[1]
- des_pos_full[5] = des_pos[2]
- return des_pos_full
-
- def extend_des_vel(self, des_vel):
- des_vel_full = self._start_vel.copy()
- des_vel_full[1] = des_vel[0]
- des_vel_full[3] = des_vel[1]
- des_vel_full[5] = des_vel[2]
- return des_vel_full
-
- def render(self, render_mode, **render_kwargs):
- if render_mode == "plot_trajectory":
- if self._steps == 1:
- import matplotlib.pyplot as plt
- # plt.ion()
- self.fig, self.axs = plt.subplots(3, 1)
-
- if self._steps <= 1750:
- for ax, cp in zip(self.axs, self.current_pos[1::2]):
- ax.scatter(self._steps, cp, s=2, marker=".")
-
- # self.fig.show()
-
- else:
- super().render(render_mode, **render_kwargs)
-
-
-if __name__ == "__main__":
- env = ALRBallInACupPDEnv(reward_type="no_context", simplified=True)
- # env = gym.make("alr_envs:ALRBallInACupPDSimpleDetPMP-v0")
- # ctxt = np.array([-0.20869846, -0.66376693, 1.18088501])
-
- # env.configure(ctxt)
- env.reset()
- env.render("human")
- for i in range(16000):
- # test with random actions
- ac = 0.02 * env.action_space.sample()[0:7]
- # ac = env.start_pos
- # ac[0] += np.pi/2
- obs, rew, d, info = env.step(ac)
- env.render("human")
-
- print(rew)
-
- if d:
- break
-
- env.close()
diff --git a/alr_envs/alr/mujoco/beerpong/__init__.py b/alr_envs/alr/mujoco/beerpong/__init__.py
deleted file mode 100644
index 989b5a9..0000000
--- a/alr_envs/alr/mujoco/beerpong/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .mp_wrapper import MPWrapper
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/beerpong/assets/beerpong.xml b/alr_envs/alr/mujoco/beerpong/assets/beerpong.xml
deleted file mode 100644
index 017bbad..0000000
--- a/alr_envs/alr/mujoco/beerpong/assets/beerpong.xml
+++ /dev/null
@@ -1,238 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml b/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml
deleted file mode 100644
index e96d2bc..0000000
--- a/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml
+++ /dev/null
@@ -1,187 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py
deleted file mode 100644
index 755710a..0000000
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ /dev/null
@@ -1,193 +0,0 @@
-import mujoco_py.builder
-import os
-
-import numpy as np
-from gym import utils
-from gym.envs.mujoco import MujocoEnv
-
-
-class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
- def __init__(self, frame_skip=1, apply_gravity_comp=True, reward_type: str = "staged", noisy=False,
- context: np.ndarray = None, difficulty='simple'):
- self._steps = 0
-
- self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
- "beerpong_wo_cup" + ".xml")
-
- self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7])
- self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
-
- self.context = context
- self.apply_gravity_comp = apply_gravity_comp
- self.add_noise = noisy
-
- self._start_pos = np.array([0.0, 1.35, 0.0, 1.18, 0.0, -0.786, -1.59])
- self._start_vel = np.zeros(7)
-
- self.ball_site_id = 0
- self.ball_id = 11
-
- self._release_step = 175 # time step of ball release
-
- self.sim_time = 3 # seconds
- self.ep_length = 600 # based on 3 seconds with dt = 0.005 int(self.sim_time / self.dt)
- self.cup_table_id = 10
-
- if noisy:
- self.noise_std = 0.01
- else:
- self.noise_std = 0
-
- if difficulty == 'simple':
- self.cup_goal_pos = np.array([0, -1.7, 0.840])
- elif difficulty == 'intermediate':
- self.cup_goal_pos = np.array([0.3, -1.5, 0.840])
- elif difficulty == 'hard':
- self.cup_goal_pos = np.array([-0.3, -2.2, 0.840])
- elif difficulty == 'hardest':
- self.cup_goal_pos = np.array([-0.3, -1.2, 0.840])
-
- if reward_type == "no_context":
- from alr_envs.alr.mujoco.beerpong.beerpong_reward import BeerPongReward
- reward_function = BeerPongReward
- elif reward_type == "staged":
- from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
- reward_function = BeerPongReward
- else:
- raise ValueError("Unknown reward type: {}".format(reward_type))
- self.reward_function = reward_function()
-
- MujocoEnv.__init__(self, self.xml_path, frame_skip)
- utils.EzPickle.__init__(self)
-
- @property
- def start_pos(self):
- return self._start_pos
-
- @property
- def start_vel(self):
- return self._start_vel
-
- @property
- def current_pos(self):
- return self.sim.data.qpos[0:7].copy()
-
- @property
- def current_vel(self):
- return self.sim.data.qvel[0:7].copy()
-
- def reset(self):
- self.reward_function.reset(self.add_noise)
- return super().reset()
-
- def reset_model(self):
- init_pos_all = self.init_qpos.copy()
- init_pos_robot = self.start_pos
- init_vel = np.zeros_like(init_pos_all)
-
- self._steps = 0
-
- start_pos = init_pos_all
- start_pos[0:7] = init_pos_robot
-
- self.set_state(start_pos, init_vel)
- self.sim.model.body_pos[self.cup_table_id] = self.cup_goal_pos
- start_pos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
- self.set_state(start_pos, init_vel)
- return self._get_obs()
-
- def step(self, a):
- reward_dist = 0.0
- angular_vel = 0.0
- reward_ctrl = - np.square(a).sum()
-
- if self.apply_gravity_comp:
- a = a + self.sim.data.qfrc_bias[:len(a)].copy() / self.model.actuator_gear[:, 0]
- try:
- self.do_simulation(a, self.frame_skip)
- if self._steps < self._release_step:
- self.sim.data.qpos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
- self.sim.data.qvel[7::] = self.sim.data.site_xvelp[self.ball_site_id, :].copy()
- elif self._steps == self._release_step and self.add_noise:
- self.sim.data.qvel[7::] += self.noise_std * np.random.randn(3)
- crash = False
- except mujoco_py.builder.MujocoException:
- crash = True
- # joint_cons_viol = self.check_traj_in_joint_limits()
-
- ob = self._get_obs()
-
- if not crash:
- reward, reward_infos = self.reward_function.compute_reward(self, a)
- success = reward_infos['success']
- is_collided = reward_infos['is_collided']
- ball_pos = reward_infos['ball_pos']
- ball_vel = reward_infos['ball_vel']
- done = is_collided or self._steps == self.ep_length - 1
- self._steps += 1
- else:
- reward = -30
- reward_infos = dict()
- success = False
- is_collided = False
- done = True
- ball_pos = np.zeros(3)
- ball_vel = np.zeros(3)
-
- infos = dict(reward_dist=reward_dist,
- reward_ctrl=reward_ctrl,
- reward=reward,
- velocity=angular_vel,
- # traj=self._q_pos,
- action=a,
- q_pos=self.sim.data.qpos[0:7].ravel().copy(),
- q_vel=self.sim.data.qvel[0:7].ravel().copy(),
- ball_pos=ball_pos,
- ball_vel=ball_vel,
- success=success,
- is_collided=is_collided, sim_crash=crash)
- infos.update(reward_infos)
-
- return ob, reward, done, infos
-
- def check_traj_in_joint_limits(self):
- return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)
-
- # TODO: extend observation space
- def _get_obs(self):
- theta = self.sim.data.qpos.flat[:7]
- return np.concatenate([
- np.cos(theta),
- np.sin(theta),
- # self.get_body_com("target"), # only return target to make problem harder
- [self._steps],
- ])
-
- # TODO
- @property
- def active_obs(self):
- return np.hstack([
- [False] * 7, # cos
- [False] * 7, # sin
- # [True] * 2, # x-y coordinates of target distance
- [False] # env steps
- ])
-
-
-if __name__ == "__main__":
- env = ALRBeerBongEnv(reward_type="staged", difficulty='hardest')
-
- # env.configure(ctxt)
- env.reset()
- env.render("human")
- for i in range(800):
- ac = 10 * env.action_space.sample()[0:7]
- obs, rew, d, info = env.step(ac)
- env.render("human")
-
- print(rew)
-
- if d:
- break
-
- env.close()
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward.py
deleted file mode 100644
index dc39ca8..0000000
--- a/alr_envs/alr/mujoco/beerpong/beerpong_reward.py
+++ /dev/null
@@ -1,171 +0,0 @@
-import numpy as np
-
-
-class BeerPongReward:
- def __init__(self):
-
- self.robot_collision_objects = ["wrist_palm_link_convex_geom",
- "wrist_pitch_link_convex_decomposition_p1_geom",
- "wrist_pitch_link_convex_decomposition_p2_geom",
- "wrist_pitch_link_convex_decomposition_p3_geom",
- "wrist_yaw_link_convex_decomposition_p1_geom",
- "wrist_yaw_link_convex_decomposition_p2_geom",
- "forearm_link_convex_decomposition_p1_geom",
- "forearm_link_convex_decomposition_p2_geom",
- "upper_arm_link_convex_decomposition_p1_geom",
- "upper_arm_link_convex_decomposition_p2_geom",
- "shoulder_link_convex_decomposition_p1_geom",
- "shoulder_link_convex_decomposition_p2_geom",
- "shoulder_link_convex_decomposition_p3_geom",
- "base_link_convex_geom", "table_contact_geom"]
-
- self.cup_collision_objects = ["cup_geom_table3", "cup_geom_table4", "cup_geom_table5", "cup_geom_table6",
- "cup_geom_table7", "cup_geom_table8", "cup_geom_table9", "cup_geom_table10",
- # "cup_base_table", "cup_base_table_contact",
- "cup_geom_table15",
- "cup_geom_table16",
- "cup_geom_table17", "cup_geom1_table8",
- # "cup_base_table_contact",
- # "cup_base_table"
- ]
-
-
- self.ball_traj = None
- self.dists = None
- self.dists_final = None
- self.costs = None
- self.action_costs = None
- self.angle_rewards = None
- self.cup_angles = None
- self.cup_z_axes = None
- self.collision_penalty = 500
- self.reset(None)
-
- def reset(self, context):
- self.ball_traj = []
- self.dists = []
- self.dists_final = []
- self.costs = []
- self.action_costs = []
- self.angle_rewards = []
- self.cup_angles = []
- self.cup_z_axes = []
- self.ball_ground_contact = False
- self.ball_table_contact = False
- self.ball_wall_contact = False
- self.ball_cup_contact = False
-
- def compute_reward(self, env, action):
- self.ball_id = env.sim.model._body_name2id["ball"]
- self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"]
- self.goal_id = env.sim.model._site_name2id["cup_goal_table"]
- self.goal_final_id = env.sim.model._site_name2id["cup_goal_final_table"]
- self.cup_collision_ids = [env.sim.model._geom_name2id[name] for name in self.cup_collision_objects]
- self.cup_table_id = env.sim.model._body_name2id["cup_table"]
- self.table_collision_id = env.sim.model._geom_name2id["table_contact_geom"]
- self.wall_collision_id = env.sim.model._geom_name2id["wall"]
- self.cup_table_collision_id = env.sim.model._geom_name2id["cup_base_table_contact"]
- self.init_ball_pos_site_id = env.sim.model._site_name2id["init_ball_pos_site"]
- self.ground_collision_id = env.sim.model._geom_name2id["ground"]
- self.robot_collision_ids = [env.sim.model._geom_name2id[name] for name in self.robot_collision_objects]
-
- goal_pos = env.sim.data.site_xpos[self.goal_id]
- ball_pos = env.sim.data.body_xpos[self.ball_id]
- ball_vel = env.sim.data.body_xvelp[self.ball_id]
- goal_final_pos = env.sim.data.site_xpos[self.goal_final_id]
- self.dists.append(np.linalg.norm(goal_pos - ball_pos))
- self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
-
- action_cost = np.sum(np.square(action))
- self.action_costs.append(action_cost)
-
- ball_table_bounce = self._check_collision_single_objects(env.sim, self.ball_collision_id,
- self.table_collision_id)
-
- if ball_table_bounce: # or ball_cup_table_cont or ball_wall_con
- self.ball_table_contact = True
-
- ball_cup_cont = self._check_collision_with_set_of_objects(env.sim, self.ball_collision_id,
- self.cup_collision_ids)
- if ball_cup_cont:
- self.ball_cup_contact = True
-
- ball_wall_cont = self._check_collision_single_objects(env.sim, self.ball_collision_id, self.wall_collision_id)
- if ball_wall_cont and not self.ball_table_contact:
- self.ball_wall_contact = True
-
- ball_ground_contact = self._check_collision_single_objects(env.sim, self.ball_collision_id,
- self.ground_collision_id)
- if ball_ground_contact and not self.ball_table_contact:
- self.ball_ground_contact = True
-
- self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
- if env._steps == env.ep_length - 1 or self._is_collided:
-
- min_dist = np.min(self.dists)
-
- ball_in_cup = self._check_collision_single_objects(env.sim, self.ball_collision_id, self.cup_table_collision_id)
-
- cost_offset = 0
-
- if self.ball_ground_contact: # or self.ball_wall_contact:
- cost_offset += 2
-
- if not self.ball_table_contact:
- cost_offset += 2
-
- if not ball_in_cup:
- cost_offset += 2
- cost = cost_offset + min_dist ** 2 + 0.5 * self.dists_final[-1] ** 2 + 1e-4 * action_cost # + min_dist ** 2
- else:
- if self.ball_cup_contact:
- cost_offset += 1
- cost = cost_offset + self.dists_final[-1] ** 2 + 1e-4 * action_cost
-
- reward = - 1*cost - self.collision_penalty * int(self._is_collided)
- success = ball_in_cup and not self.ball_ground_contact and not self.ball_wall_contact and not self.ball_cup_contact
- else:
- reward = - 1e-4 * action_cost
- success = False
-
- infos = {}
- infos["success"] = success
- infos["is_collided"] = self._is_collided
- infos["ball_pos"] = ball_pos.copy()
- infos["ball_vel"] = ball_vel.copy()
- infos["action_cost"] = 5e-4 * action_cost
-
- return reward, infos
-
- def _check_collision_single_objects(self, sim, id_1, id_2):
- for coni in range(0, sim.data.ncon):
- con = sim.data.contact[coni]
-
- collision = con.geom1 == id_1 and con.geom2 == id_2
- collision_trans = con.geom1 == id_2 and con.geom2 == id_1
-
- if collision or collision_trans:
- return True
- return False
-
- def _check_collision_with_itself(self, sim, collision_ids):
- col_1, col_2 = False, False
- for j, id in enumerate(collision_ids):
- col_1 = self._check_collision_with_set_of_objects(sim, id, collision_ids[:j])
- if j != len(collision_ids) - 1:
- col_2 = self._check_collision_with_set_of_objects(sim, id, collision_ids[j + 1:])
- else:
- col_2 = False
- collision = True if col_1 or col_2 else False
- return collision
-
- def _check_collision_with_set_of_objects(self, sim, id_1, id_list):
- for coni in range(0, sim.data.ncon):
- con = sim.data.contact[coni]
-
- collision = con.geom1 in id_list and con.geom2 == id_1
- collision_trans = con.geom1 == id_1 and con.geom2 in id_list
-
- if collision or collision_trans:
- return True
- return False
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward_simple.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward_simple.py
deleted file mode 100644
index fbe2163..0000000
--- a/alr_envs/alr/mujoco/beerpong/beerpong_reward_simple.py
+++ /dev/null
@@ -1,141 +0,0 @@
-import numpy as np
-from alr_envs.alr.mujoco import alr_reward_fct
-
-
-class BeerpongReward(alr_reward_fct.AlrReward):
- def __init__(self, sim, sim_time):
-
- self.sim = sim
- self.sim_time = sim_time
-
- self.collision_objects = ["cup_geom1", "cup_geom2", "wrist_palm_link_convex_geom",
- "wrist_pitch_link_convex_decomposition_p1_geom",
- "wrist_pitch_link_convex_decomposition_p2_geom",
- "wrist_pitch_link_convex_decomposition_p3_geom",
- "wrist_yaw_link_convex_decomposition_p1_geom",
- "wrist_yaw_link_convex_decomposition_p2_geom",
- "forearm_link_convex_decomposition_p1_geom",
- "forearm_link_convex_decomposition_p2_geom"]
-
- self.ball_id = None
- self.ball_collision_id = None
- self.goal_id = None
- self.goal_final_id = None
- self.collision_ids = None
-
- self.ball_traj = None
- self.dists = None
- self.dists_ctxt = None
- self.dists_final = None
- self.costs = None
-
- self.reset(None)
-
- def reset(self, context):
- self.ball_traj = np.zeros(shape=(self.sim_time, 3))
- self.dists = []
- self.dists_ctxt = []
- self.dists_final = []
- self.costs = []
- self.action_costs = []
- self.context = context
- self.ball_in_cup = False
- self.dist_ctxt = 5
- self.bounce_dist = 2
- self.min_dist = 2
- self.dist_final = 2
- self.table_contact = False
-
- self.ball_id = self.sim.model._body_name2id["ball"]
- self.ball_collision_id = self.sim.model._geom_name2id["ball_geom"]
- self.cup_robot_id = self.sim.model._site_name2id["cup_robot_final"]
- self.goal_id = self.sim.model._site_name2id["cup_goal_table"]
- self.goal_final_id = self.sim.model._site_name2id["cup_goal_final_table"]
- self.collision_ids = [self.sim.model._geom_name2id[name] for name in self.collision_objects]
- self.cup_table_id = self.sim.model._body_name2id["cup_table"]
- self.bounce_table_id = self.sim.model._site_name2id["bounce_table"]
-
- def compute_reward(self, action, sim, step):
- action_cost = np.sum(np.square(action))
- self.action_costs.append(action_cost)
-
- stop_sim = False
- success = False
-
- if self.check_collision(sim):
- reward = - 1e-2 * action_cost - 10
- stop_sim = True
- return reward, success, stop_sim
-
- # Compute the current distance from the ball to the inner part of the cup
- goal_pos = sim.data.site_xpos[self.goal_id]
- ball_pos = sim.data.body_xpos[self.ball_id]
- bounce_pos = sim.data.site_xpos[self.bounce_table_id]
- goal_final_pos = sim.data.site_xpos[self.goal_final_id]
- self.dists.append(np.linalg.norm(goal_pos - ball_pos))
- self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
- self.ball_traj[step, :] = ball_pos
-
- ball_in_cup = self.check_ball_in_cup(sim, self.ball_collision_id)
- table_contact = self.check_ball_table_contact(sim, self.ball_collision_id)
-
- if table_contact and not self.table_contact:
- self.bounce_dist = np.minimum((np.linalg.norm(bounce_pos - ball_pos)), 2)
- self.table_contact = True
-
- if step == self.sim_time - 1:
- min_dist = np.min(self.dists)
- self.min_dist = min_dist
- dist_final = self.dists_final[-1]
- self.dist_final = dist_final
-
- cost = 0.33 * min_dist + 0.33 * dist_final + 0.33 * self.bounce_dist
- reward = np.exp(-2 * cost) - 1e-2 * action_cost
- success = self.bounce_dist < 0.05 and dist_final < 0.05 and ball_in_cup
- else:
- reward = - 1e-2 * action_cost
- success = False
-
- return reward, success, stop_sim
-
- def _get_stage_wise_cost(self, ball_in_cup, min_dist, dist_final, dist_to_ctxt):
- if not ball_in_cup:
- cost = 3 + 2*(0.5 * min_dist**2 + 0.5 * dist_final**2)
- else:
- cost = 2 * dist_to_ctxt ** 2
- print('Context Distance:', dist_to_ctxt)
- return cost
-
- def check_ball_table_contact(self, sim, ball_collision_id):
- table_collision_id = sim.model._geom_name2id["table_contact_geom"]
- for coni in range(0, sim.data.ncon):
- con = sim.data.contact[coni]
- collision = con.geom1 == table_collision_id and con.geom2 == ball_collision_id
- collision_trans = con.geom1 == ball_collision_id and con.geom2 == table_collision_id
-
- if collision or collision_trans:
- return True
- return False
-
- def check_ball_in_cup(self, sim, ball_collision_id):
- cup_base_collision_id = sim.model._geom_name2id["cup_base_table_contact"]
- for coni in range(0, sim.data.ncon):
- con = sim.data.contact[coni]
-
- collision = con.geom1 == cup_base_collision_id and con.geom2 == ball_collision_id
- collision_trans = con.geom1 == ball_collision_id and con.geom2 == cup_base_collision_id
-
- if collision or collision_trans:
- return True
- return False
-
- def check_collision(self, sim):
- for coni in range(0, sim.data.ncon):
- con = sim.data.contact[coni]
-
- collision = con.geom1 in self.collision_ids and con.geom2 == self.ball_collision_id
- collision_trans = con.geom1 == self.ball_collision_id and con.geom2 in self.collision_ids
-
- if collision or collision_trans:
- return True
- return False
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
deleted file mode 100644
index e94b470..0000000
--- a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
+++ /dev/null
@@ -1,158 +0,0 @@
-import numpy as np
-
-
-class BeerPongReward:
- def __init__(self):
-
- self.robot_collision_objects = ["wrist_palm_link_convex_geom",
- "wrist_pitch_link_convex_decomposition_p1_geom",
- "wrist_pitch_link_convex_decomposition_p2_geom",
- "wrist_pitch_link_convex_decomposition_p3_geom",
- "wrist_yaw_link_convex_decomposition_p1_geom",
- "wrist_yaw_link_convex_decomposition_p2_geom",
- "forearm_link_convex_decomposition_p1_geom",
- "forearm_link_convex_decomposition_p2_geom",
- "upper_arm_link_convex_decomposition_p1_geom",
- "upper_arm_link_convex_decomposition_p2_geom",
- "shoulder_link_convex_decomposition_p1_geom",
- "shoulder_link_convex_decomposition_p2_geom",
- "shoulder_link_convex_decomposition_p3_geom",
- "base_link_convex_geom", "table_contact_geom"]
-
- self.cup_collision_objects = ["cup_geom_table3", "cup_geom_table4", "cup_geom_table5", "cup_geom_table6",
- "cup_geom_table7", "cup_geom_table8", "cup_geom_table9", "cup_geom_table10",
- # "cup_base_table", "cup_base_table_contact",
- "cup_geom_table15",
- "cup_geom_table16",
- "cup_geom_table17", "cup_geom1_table8",
- # "cup_base_table_contact",
- # "cup_base_table"
- ]
-
-
- self.ball_traj = None
- self.dists = None
- self.dists_final = None
- self.costs = None
- self.action_costs = None
- self.angle_rewards = None
- self.cup_angles = None
- self.cup_z_axes = None
- self.collision_penalty = 500
- self.reset(None)
-
- def reset(self, noisy):
- self.ball_traj = []
- self.dists = []
- self.dists_final = []
- self.costs = []
- self.action_costs = []
- self.angle_rewards = []
- self.cup_angles = []
- self.cup_z_axes = []
- self.ball_ground_contact = False
- self.ball_table_contact = False
- self.ball_wall_contact = False
- self.ball_cup_contact = False
- self.noisy_bp = noisy
- self._t_min_final_dist = -1
-
- def compute_reward(self, env, action):
- self.ball_id = env.sim.model._body_name2id["ball"]
- self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"]
- self.goal_id = env.sim.model._site_name2id["cup_goal_table"]
- self.goal_final_id = env.sim.model._site_name2id["cup_goal_final_table"]
- self.cup_collision_ids = [env.sim.model._geom_name2id[name] for name in self.cup_collision_objects]
- self.cup_table_id = env.sim.model._body_name2id["cup_table"]
- self.table_collision_id = env.sim.model._geom_name2id["table_contact_geom"]
- self.wall_collision_id = env.sim.model._geom_name2id["wall"]
- self.cup_table_collision_id = env.sim.model._geom_name2id["cup_base_table_contact"]
- self.init_ball_pos_site_id = env.sim.model._site_name2id["init_ball_pos_site"]
- self.ground_collision_id = env.sim.model._geom_name2id["ground"]
- self.robot_collision_ids = [env.sim.model._geom_name2id[name] for name in self.robot_collision_objects]
-
- goal_pos = env.sim.data.site_xpos[self.goal_id]
- ball_pos = env.sim.data.body_xpos[self.ball_id]
- ball_vel = env.sim.data.body_xvelp[self.ball_id]
- goal_final_pos = env.sim.data.site_xpos[self.goal_final_id]
- self.dists.append(np.linalg.norm(goal_pos - ball_pos))
- self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
-
- action_cost = np.sum(np.square(action))
- self.action_costs.append(action_cost)
-
- if not self.ball_table_contact:
- self.ball_table_contact = self._check_collision_single_objects(env.sim, self.ball_collision_id,
- self.table_collision_id)
-
- self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
- if env._steps == env.ep_length - 1 or self._is_collided:
-
- min_dist = np.min(self.dists)
- final_dist = self.dists_final[-1]
-
- ball_in_cup = self._check_collision_single_objects(env.sim, self.ball_collision_id,
- self.cup_table_collision_id)
-
- # encourage bounce before falling into cup
- if not ball_in_cup:
- if not self.ball_table_contact:
- reward = 0.2 * (1 - np.tanh(min_dist ** 2)) + 0.1 * (1 - np.tanh(final_dist ** 2))
- else:
- reward = (1 - np.tanh(min_dist ** 2)) + 0.5 * (1 - np.tanh(final_dist ** 2))
- else:
- if not self.ball_table_contact:
- reward = 2 * (1 - np.tanh(final_dist ** 2)) + 1 * (1 - np.tanh(min_dist ** 2)) + 1
- else:
- reward = 2 * (1 - np.tanh(final_dist ** 2)) + 1 * (1 - np.tanh(min_dist ** 2)) + 3
-
- # reward = - 1 * cost - self.collision_penalty * int(self._is_collided)
- success = ball_in_cup
- crash = self._is_collided
- else:
- reward = - 1e-2 * action_cost
- success = False
- crash = False
-
- infos = {}
- infos["success"] = success
- infos["is_collided"] = self._is_collided
- infos["ball_pos"] = ball_pos.copy()
- infos["ball_vel"] = ball_vel.copy()
- infos["action_cost"] = action_cost
- infos["task_reward"] = reward
-
- return reward, infos
-
- def _check_collision_single_objects(self, sim, id_1, id_2):
- for coni in range(0, sim.data.ncon):
- con = sim.data.contact[coni]
-
- collision = con.geom1 == id_1 and con.geom2 == id_2
- collision_trans = con.geom1 == id_2 and con.geom2 == id_1
-
- if collision or collision_trans:
- return True
- return False
-
- def _check_collision_with_itself(self, sim, collision_ids):
- col_1, col_2 = False, False
- for j, id in enumerate(collision_ids):
- col_1 = self._check_collision_with_set_of_objects(sim, id, collision_ids[:j])
- if j != len(collision_ids) - 1:
- col_2 = self._check_collision_with_set_of_objects(sim, id, collision_ids[j + 1:])
- else:
- col_2 = False
- collision = True if col_1 or col_2 else False
- return collision
-
- def _check_collision_with_set_of_objects(self, sim, id_1, id_list):
- for coni in range(0, sim.data.ncon):
- con = sim.data.contact[coni]
-
- collision = con.geom1 in id_list and con.geom2 == id_1
- collision_trans = con.geom1 == id_1 and con.geom2 in id_list
-
- if collision or collision_trans:
- return True
- return False
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_simple.py b/alr_envs/alr/mujoco/beerpong/beerpong_simple.py
deleted file mode 100644
index 1708d38..0000000
--- a/alr_envs/alr/mujoco/beerpong/beerpong_simple.py
+++ /dev/null
@@ -1,166 +0,0 @@
-from gym import utils
-import os
-import numpy as np
-from gym.envs.mujoco import MujocoEnv
-
-
-class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
- def __init__(self, n_substeps=4, apply_gravity_comp=True, reward_function=None):
- self._steps = 0
-
- self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
- "beerpong" + ".xml")
-
- self.start_pos = np.array([0.0, 1.35, 0.0, 1.18, 0.0, -0.786, -1.59])
- self.start_vel = np.zeros(7)
-
- self._q_pos = []
- self._q_vel = []
- # self.weight_matrix_scale = 50
- self.max_ctrl = np.array([150., 125., 40., 60., 5., 5., 2.])
- self.p_gains = 1 / self.max_ctrl * np.array([200, 300, 100, 100, 10, 10, 2.5])
- self.d_gains = 1 / self.max_ctrl * np.array([7, 15, 5, 2.5, 0.3, 0.3, 0.05])
-
- self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7])
- self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
-
- self.context = None
-
- # alr_mujoco_env.AlrMujocoEnv.__init__(self,
- # self.xml_path,
- # apply_gravity_comp=apply_gravity_comp,
- # n_substeps=n_substeps)
-
- self.sim_time = 8 # seconds
- # self.sim_steps = int(self.sim_time / self.dt)
- if reward_function is None:
- from alr_envs.alr.mujoco.beerpong.beerpong_reward_simple import BeerpongReward
- reward_function = BeerpongReward
- self.reward_function = reward_function(self.sim, self.sim_steps)
- self.cup_robot_id = self.sim.model._site_name2id["cup_robot_final"]
- self.ball_id = self.sim.model._body_name2id["ball"]
- self.cup_table_id = self.sim.model._body_name2id["cup_table"]
- # self.bounce_table_id = self.sim.model._body_name2id["bounce_table"]
-
- MujocoEnv.__init__(self, model_path=self.xml_path, frame_skip=n_substeps)
- utils.EzPickle.__init__(self)
-
- @property
- def current_pos(self):
- return self.sim.data.qpos[0:7].copy()
-
- @property
- def current_vel(self):
- return self.sim.data.qvel[0:7].copy()
-
- def configure(self, context):
- if context is None:
- context = np.array([0, -2, 0.840])
- self.context = context
- self.reward_function.reset(context)
-
- def reset_model(self):
- init_pos_all = self.init_qpos.copy()
- init_pos_robot = self.start_pos
- init_vel = np.zeros_like(init_pos_all)
-
- self._steps = 0
- self._q_pos = []
- self._q_vel = []
-
- start_pos = init_pos_all
- start_pos[0:7] = init_pos_robot
- # start_pos[7:] = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05])
-
- self.set_state(start_pos, init_vel)
-
- ball_pos = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05])
- self.sim.model.body_pos[self.ball_id] = ball_pos.copy()
- self.sim.model.body_pos[self.cup_table_id] = self.context.copy()
- # self.sim.model.body_pos[self.bounce_table_id] = self.context.copy()
-
- self.sim.forward()
-
- return self._get_obs()
-
- def step(self, a):
- reward_dist = 0.0
- angular_vel = 0.0
- reward_ctrl = - np.square(a).sum()
- action_cost = np.sum(np.square(a))
-
- crash = self.do_simulation(a, self.frame_skip)
- joint_cons_viol = self.check_traj_in_joint_limits()
-
- self._q_pos.append(self.sim.data.qpos[0:7].ravel().copy())
- self._q_vel.append(self.sim.data.qvel[0:7].ravel().copy())
-
- ob = self._get_obs()
-
- if not crash and not joint_cons_viol:
- reward, success, stop_sim = self.reward_function.compute_reward(a, self.sim, self._steps)
- done = success or self._steps == self.sim_steps - 1 or stop_sim
- self._steps += 1
- else:
- reward = -10 - 1e-2 * action_cost
- success = False
- done = True
- return ob, reward, done, dict(reward_dist=reward_dist,
- reward_ctrl=reward_ctrl,
- velocity=angular_vel,
- traj=self._q_pos, is_success=success,
- is_collided=crash or joint_cons_viol)
-
- def check_traj_in_joint_limits(self):
- return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)
-
- def extend_des_pos(self, des_pos):
- des_pos_full = self.start_pos.copy()
- des_pos_full[1] = des_pos[0]
- des_pos_full[3] = des_pos[1]
- des_pos_full[5] = des_pos[2]
- return des_pos_full
-
- def extend_des_vel(self, des_vel):
- des_vel_full = self.start_vel.copy()
- des_vel_full[1] = des_vel[0]
- des_vel_full[3] = des_vel[1]
- des_vel_full[5] = des_vel[2]
- return des_vel_full
-
- def _get_obs(self):
- theta = self.sim.data.qpos.flat[:7]
- return np.concatenate([
- np.cos(theta),
- np.sin(theta),
- # self.get_body_com("target"), # only return target to make problem harder
- [self._steps],
- ])
-
-
-
-if __name__ == "__main__":
- env = ALRBeerpongEnv()
- ctxt = np.array([0, -2, 0.840]) # initial
-
- env.configure(ctxt)
- env.reset()
- env.render()
- for i in range(16000):
- # test with random actions
- ac = 0.0 * env.action_space.sample()[0:7]
- ac[1] = -0.01
- ac[3] = - 0.01
- ac[5] = -0.01
- # ac = env.start_pos
- # ac[0] += np.pi/2
- obs, rew, d, info = env.step(ac)
- env.render()
-
- print(rew)
-
- if d:
- break
-
- env.close()
-
diff --git a/alr_envs/alr/mujoco/beerpong/mp_wrapper.py b/alr_envs/alr/mujoco/beerpong/mp_wrapper.py
deleted file mode 100644
index 87c6b7e..0000000
--- a/alr_envs/alr/mujoco/beerpong/mp_wrapper.py
+++ /dev/null
@@ -1,39 +0,0 @@
-from typing import Tuple, Union
-
-import numpy as np
-
-from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
-
-
-class MPWrapper(MPEnvWrapper):
-
- @property
- def active_obs(self):
- # TODO: @Max Filter observations correctly
- return np.hstack([
- [False] * 7, # cos
- [False] * 7, # sin
- # [True] * 2, # x-y coordinates of target distance
- [False] # env steps
- ])
-
- @property
- def start_pos(self):
- return self._start_pos
-
- @property
- def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.sim.data.qpos[0:7].copy()
-
- @property
- def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.sim.data.qvel[0:7].copy()
-
- @property
- def goal_pos(self):
- # TODO: @Max I think the default value of returning to the start is reasonable here
- raise ValueError("Goal position is not available and has to be learnt based on the environment.")
-
- @property
- def dt(self) -> Union[float, int]:
- return self.env.dt
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/__init__.py b/alr_envs/alr/mujoco/gym_table_tennis/__init__.py
deleted file mode 100644
index 8b13789..0000000
--- a/alr_envs/alr/mujoco/gym_table_tennis/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/__init__.py b/alr_envs/alr/mujoco/gym_table_tennis/envs/__init__.py
deleted file mode 100644
index 8b13789..0000000
--- a/alr_envs/alr/mujoco/gym_table_tennis/envs/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_7_motor_actuator.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_7_motor_actuator.xml
deleted file mode 100644
index 7772d14..0000000
--- a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_7_motor_actuator.xml
+++ /dev/null
@@ -1,12 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_barrett_wam_7dof_left.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_barrett_wam_7dof_left.xml
deleted file mode 100644
index a8df915..0000000
--- a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_barrett_wam_7dof_left.xml
+++ /dev/null
@@ -1,76 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_barrett_wam_7dof_right.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_barrett_wam_7dof_right.xml
deleted file mode 100644
index 011b95a..0000000
--- a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_barrett_wam_7dof_right.xml
+++ /dev/null
@@ -1,95 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_table.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_table.xml
deleted file mode 100644
index ad1ae35..0000000
--- a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_table.xml
+++ /dev/null
@@ -1,38 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_target_ball.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_target_ball.xml
deleted file mode 100644
index eb2b347..0000000
--- a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_target_ball.xml
+++ /dev/null
@@ -1,10 +0,0 @@
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_test_balls.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_test_balls.xml
deleted file mode 100644
index 29a21e1..0000000
--- a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_test_balls.xml
+++ /dev/null
@@ -1,80 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/base_link_convex.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/base_link_convex.stl
deleted file mode 100644
index 133b112..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/base_link_convex.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/base_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/base_link_fine.stl
deleted file mode 100644
index 047e9df..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/base_link_fine.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_dist_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_dist_link_fine.stl
deleted file mode 100644
index 5ff94a2..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_dist_link_fine.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_med_link_convex.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_med_link_convex.stl
deleted file mode 100644
index c548448..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_med_link_convex.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_med_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_med_link_fine.stl
deleted file mode 100644
index 495160d..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_med_link_fine.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_convex_decomposition_p1.stl
deleted file mode 100644
index b4bb322..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_convex_decomposition_p1.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_convex_decomposition_p3.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_convex_decomposition_p3.stl
deleted file mode 100644
index f05174e..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_convex_decomposition_p3.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_fine.stl
deleted file mode 100644
index eb252d9..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_fine.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p1.stl
deleted file mode 100644
index c039f0d..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p1.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p2.stl
deleted file mode 100644
index 250acaf..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p2.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p3.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p3.stl
deleted file mode 100644
index 993d0f7..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p3.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p4.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p4.stl
deleted file mode 100644
index 8448a3f..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p4.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/elbow_link_convex.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/elbow_link_convex.stl
deleted file mode 100644
index b34963d..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/elbow_link_convex.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_convex_decomposition_p1.stl
deleted file mode 100644
index e6aa6b6..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_convex_decomposition_p1.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_convex_decomposition_p2.stl
deleted file mode 100644
index 667902e..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_convex_decomposition_p2.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_fine.stl
deleted file mode 100644
index ed66bbb..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_fine.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p1.stl
deleted file mode 100644
index aba957d..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p1.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p2.stl
deleted file mode 100644
index 5cca6a9..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p2.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p3.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p3.stl
deleted file mode 100644
index 3343e27..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p3.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_fine.stl
deleted file mode 100644
index ae505fd..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_fine.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_pitch_link_convex.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_pitch_link_convex.stl
deleted file mode 100644
index c36cfec..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_pitch_link_convex.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_pitch_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_pitch_link_fine.stl
deleted file mode 100644
index dc633c4..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_pitch_link_fine.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_convex_decomposition_p1.stl
deleted file mode 100644
index 82d0093..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_convex_decomposition_p1.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_convex_decomposition_p2.stl
deleted file mode 100644
index 7fd5a55..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_convex_decomposition_p2.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_fine.stl
deleted file mode 100644
index 76353ae..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_fine.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_palm_link_convex.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_palm_link_convex.stl
deleted file mode 100644
index a0386f6..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_palm_link_convex.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p1.stl
deleted file mode 100644
index c36f88f..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p1.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p2.stl
deleted file mode 100644
index d00cac1..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p2.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p3.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p3.stl
deleted file mode 100644
index 34d1d8b..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p3.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_fine.stl
deleted file mode 100644
index 13d2f73..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_fine.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_convex_decomposition_p1.stl
deleted file mode 100644
index 06e857f..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_convex_decomposition_p1.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_convex_decomposition_p2.stl
deleted file mode 100644
index 48e1bb1..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_convex_decomposition_p2.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_fine.stl
deleted file mode 100644
index 0d95239..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_fine.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/right_arm_actuator.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/right_arm_actuator.xml
deleted file mode 100644
index 9abf102..0000000
--- a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/right_arm_actuator.xml
+++ /dev/null
@@ -1,19 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/shared.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/shared.xml
deleted file mode 100644
index dfbc37a..0000000
--- a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/shared.xml
+++ /dev/null
@@ -1,49 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/table_tennis_env.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/table_tennis_env.xml
deleted file mode 100644
index f2432bb..0000000
--- a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/table_tennis_env.xml
+++ /dev/null
@@ -1,41 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/table_tennis_env.py b/alr_envs/alr/mujoco/gym_table_tennis/envs/table_tennis_env.py
deleted file mode 100644
index 9b16ae1..0000000
--- a/alr_envs/alr/mujoco/gym_table_tennis/envs/table_tennis_env.py
+++ /dev/null
@@ -1,244 +0,0 @@
-import numpy as np
-from gym import spaces
-from gym.envs.robotics import robot_env, utils
-# import xml.etree.ElementTree as ET
-from alr_envs.alr.mujoco.gym_table_tennis.utils.rewards.hierarchical_reward import HierarchicalRewardTableTennis
-import glfw
-from alr_envs.alr.mujoco.gym_table_tennis.utils.experiment import ball_initialize
-from pathlib import Path
-import os
-
-
-class TableTennisEnv(robot_env.RobotEnv):
- """Class for Table Tennis environment.
- """
- def __init__(self, n_substeps=1,
- model_path=None,
- initial_qpos=None,
- initial_ball_state=None,
- config=None,
- reward_obj=None
- ):
- """Initializes a new mujoco based Table Tennis environment.
-
- Args:
- model_path (string): path to the environments XML file
- initial_qpos (dict): a dictionary of joint names and values that define the initial
- n_actions: Number of joints
- n_substeps (int): number of substeps the simulation runs on every call to step
- scale (double): limit maximum change in position
- initial_ball_state: to reset the ball state
- """
- # self.config = config.config
- if model_path is None:
- path_cws = Path.cwd()
- print(path_cws)
- current_dir = Path(os.path.split(os.path.realpath(__file__))[0])
- table_tennis_env_xml_path = current_dir / "assets"/"table_tennis_env.xml"
- model_path = str(table_tennis_env_xml_path)
- self.config = config
- action_space = True # self.config['trajectory']['args']['action_space']
- time_step = 0.002 # self.config['mujoco_sim_env']['args']["time_step"]
- if initial_qpos is None:
- initial_qpos = {"wam/base_yaw_joint_right": 1.5,
- "wam/shoulder_pitch_joint_right": 1,
- "wam/shoulder_yaw_joint_right": 0,
- "wam/elbow_pitch_joint_right": 1,
- "wam/wrist_yaw_joint_right": 1,
- "wam/wrist_pitch_joint_right": 0,
- "wam/palm_yaw_joint_right": 0}
- # initial_qpos = [1.5, 1, 0, 1, 1, 0, 0] # self.config['robot_config']['args']['initial_qpos']
-
- # TODO should read all configuration in config
- assert initial_qpos is not None, "Must initialize the initial q position of robot arm"
- n_actions = 7
- self.initial_qpos_value = np.array(list(initial_qpos.values())).copy()
- # self.initial_qpos_value = np.array(initial_qpos)
- # # change time step in .xml file
- # tree = ET.parse(model_path)
- # root = tree.getroot()
- # for option in root.findall('option'):
- # option.set("timestep", str(time_step))
- #
- # tree.write(model_path)
-
- super(TableTennisEnv, self).__init__(
- model_path=model_path, n_substeps=n_substeps, n_actions=n_actions,
- initial_qpos=initial_qpos)
-
- if action_space:
- self.action_space = spaces.Box(low=np.array([-2.6, -2.0, -2.8, -0.9, -4.8, -1.6, -2.2]),
- high=np.array([2.6, 2.0, 2.8, 3.1, 1.3, 1.6, 2.2]),
- dtype='float64')
- else:
- self.action_space = spaces.Box(low=np.array([-np.inf] * 7),
- high=np.array([-np.inf] * 7),
- dtype='float64')
- self.scale = None
- self.desired_pos = None
- self.n_actions = n_actions
- self.action = None
- self.time_step = time_step
- self._dt = time_step
- self.paddle_center_pos = self.sim.data.get_site_xpos('wam/paddle_center')
- if reward_obj is None:
- self.reward_obj = HierarchicalRewardTableTennis()
- else:
- self.reward_obj = reward_obj
-
- if initial_ball_state is not None:
- self.initial_ball_state = initial_ball_state
- else:
- self.initial_ball_state = ball_initialize(random=False)
- self.target_ball_pos = self.sim.data.get_site_xpos("target_ball")
- self.racket_center_pos = self.sim.data.get_site_xpos("wam/paddle_center")
-
- def close(self):
- if self.viewer is not None:
- glfw.destroy_window(self.viewer.window)
- # self.viewer.window.close()
- self.viewer = None
- self._viewers = {}
-
- # GoalEnv methods
- # ----------------------------
- def compute_reward(self, achieved_goal, goal, info):
- # reset the reward, if action valid
- # right_court_contact_obj = ["target_ball", "table_tennis_table_right_side"]
- # right_court_contact_detector = self.reward_obj.contact_detection(self, right_court_contact_obj)
- # if right_court_contact_detector:
- # print("can detect the table ball contact")
- self.reward_obj.total_reward = 0
- # Stage 1 Hitting
- self.reward_obj.hitting(self)
- # if not hitted, return the highest reward
- if not self.reward_obj.goal_achievement:
- # return self.reward_obj.highest_reward
- return self.reward_obj.total_reward
- # # Stage 2 Right Table Contact
- # self.reward_obj.right_table_contact(self)
- # if not self.reward_obj.goal_achievement:
- # return self.reward_obj.highest_reward
- # # Stage 2 Net Contact
- # self.reward_obj.net_contact(self)
- # if not self.reward_obj.goal_achievement:
- # return self.reward_obj.highest_reward
- # Stage 3 Opponent court Contact
- # self.reward_obj.landing_on_opponent_court(self)
- # if not self.reward_obj.goal_achievement:
- # print("self.reward_obj.highest_reward: ", self.reward_obj.highest_reward)
- # TODO
- self.reward_obj.target_achievement(self)
- # return self.reward_obj.highest_reward
- return self.reward_obj.total_reward
-
- def _reset_sim(self):
- self.sim.set_state(self.initial_state)
- [initial_x, initial_y, initial_z, v_x, v_y, v_z] = self.initial_ball_state
- self.sim.data.set_joint_qpos('tar:x', initial_x)
- self.sim.data.set_joint_qpos('tar:y', initial_y)
- self.sim.data.set_joint_qpos('tar:z', initial_z)
- self.energy_corrected = True
- self.give_reflection_reward = False
-
- # velocity is positive direction
- self.sim.data.set_joint_qvel('tar:x', v_x)
- self.sim.data.set_joint_qvel('tar:y', v_y)
- self.sim.data.set_joint_qvel('tar:z', v_z)
-
- # Apply gravity compensation
- if self.sim.data.qfrc_applied[:7] is not self.sim.data.qfrc_bias[:7]:
- self.sim.data.qfrc_applied[:7] = self.sim.data.qfrc_bias[:7]
- self.sim.forward()
- return True
-
- def _env_setup(self, initial_qpos):
- for name, value in initial_qpos.items():
- self.sim.data.set_joint_qpos(name, value)
-
- # Apply gravity compensation
- if self.sim.data.qfrc_applied[:7] is not self.sim.data.qfrc_bias[:7]:
- self.sim.data.qfrc_applied[:7] = self.sim.data.qfrc_bias[:7]
- self.sim.forward()
-
- # Get the target position
- self.initial_paddle_center_xpos = self.sim.data.get_site_xpos('wam/paddle_center').copy()
- self.initial_paddle_center_vel = None # self.sim.get_site_
-
- def _sample_goal(self):
- goal = self.initial_paddle_center_xpos[:3] + self.np_random.uniform(-0.2, 0.2, size=3)
- return goal.copy()
-
- def _get_obs(self):
-
- # positions of racket center
- paddle_center_pos = self.sim.data.get_site_xpos('wam/paddle_center')
- ball_pos = self.sim.data.get_site_xpos("target_ball")
-
- dt = self.sim.nsubsteps * self.sim.model.opt.timestep
- paddle_center_velp = self.sim.data.get_site_xvelp('wam/paddle_center') * dt
- robot_qpos, robot_qvel = utils.robot_get_obs(self.sim)
-
- wrist_state = robot_qpos[-3:]
- wrist_vel = robot_qvel[-3:] * dt # change to a scalar if the gripper is made symmetric
-
- # achieved_goal = paddle_body_EE_pos
- obs = np.concatenate([
- paddle_center_pos, paddle_center_velp, wrist_state, wrist_vel
- ])
-
- out_dict = {
- 'observation': obs.copy(),
- 'achieved_goal': paddle_center_pos.copy(),
- 'desired_goal': self.goal.copy(),
- 'q_pos': self.sim.data.qpos[:].copy(),
- "ball_pos": ball_pos.copy(),
- # "hitting_flag": self.reward_obj.hitting_flag
- }
-
- return out_dict
-
- def _step_callback(self):
- pass
-
- def _set_action(self, action):
- # Apply gravity compensation
- if self.sim.data.qfrc_applied[:7] is not self.sim.data.qfrc_bias[:7]:
- self.sim.data.qfrc_applied[:7] = self.sim.data.qfrc_bias[:7]
- # print("set action process running")
- assert action.shape == (self.n_actions,)
- self.action = action.copy() # ensure that we don't change the action outside of this scope
- pos_ctrl = self.action[:] # limit maximum change in position
- pos_ctrl = np.clip(pos_ctrl, self.action_space.low, self.action_space.high)
-
- # get desired trajectory
- self.sim.data.qpos[:7] = pos_ctrl
- self.sim.forward()
- self.desired_pos = self.sim.data.get_site_xpos('wam/paddle_center').copy()
-
- self.sim.data.ctrl[:] = pos_ctrl
-
- def _is_success(self, achieved_goal, desired_goal):
- pass
-
-
-if __name__ == '__main__':
- render_mode = "human" # "human" or "partial" or "final"
- env = TableTennisEnv()
- env.reset()
- # env.render(mode=render_mode)
-
- for i in range(500):
- # objective.load_result("/tmp/cma")
- # test with random actions
- ac = env.action_space.sample()
- # ac[0] += np.pi/2
- obs, rew, d, info = env.step(ac)
- env.render(mode=render_mode)
-
- print(rew)
-
- if d:
- break
-
- env.close()
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/utils/__init__.py b/alr_envs/alr/mujoco/gym_table_tennis/utils/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/utils/experiment.py b/alr_envs/alr/mujoco/gym_table_tennis/utils/experiment.py
deleted file mode 100644
index a106d68..0000000
--- a/alr_envs/alr/mujoco/gym_table_tennis/utils/experiment.py
+++ /dev/null
@@ -1,83 +0,0 @@
-import numpy as np
-from gym.utils import seeding
-from alr_envs.alr.mujoco.gym_table_tennis.utils.util import read_yaml, read_json
-from pathlib import Path
-
-
-def ball_initialize(random=False, scale=False, context_range=None, scale_value=None):
- if random:
- if scale:
- # if scale_value is None:
- scale_value = context_scale_initialize(context_range)
- v_x, v_y, v_z = [2.5, 2, 0.5] * scale_value
- dx = 1
- dy = 0
- dz = 0.05
- else:
- seed = None
- np_random, seed = seeding.np_random(seed)
- dx = np_random.uniform(-0.1, 0.1)
- dy = np_random.uniform(-0.1, 0.1)
- dz = np_random.uniform(-0.1, 0.1)
-
- v_x = np_random.uniform(1.7, 1.8)
- v_y = np_random.uniform(0.7, 0.8)
- v_z = np_random.uniform(0.1, 0.2)
- # print(dx, dy, dz, v_x, v_y, v_z)
- # else:
- # dx = -0.1
- # dy = 0.05
- # dz = 0.05
- # v_x = 1.5
- # v_y = 0.7
- # v_z = 0.06
- # initial_x = -0.6 + dx
- # initial_y = -0.3 + dy
- # initial_z = 0.8 + dz
- else:
- if scale:
- v_x, v_y, v_z = [2.5, 2, 0.5] * scale_value
- else:
- v_x = 2.5
- v_y = 2
- v_z = 0.5
- dx = 1
- dy = 0
- dz = 0.05
-
- initial_x = 0 + dx
- initial_y = -0.2 + dy
- initial_z = 0.3 + dz
- # print("initial ball state: ", initial_x, initial_y, initial_z, v_x, v_y, v_z)
- initial_ball_state = np.array([initial_x, initial_y, initial_z, v_x, v_y, v_z])
- return initial_ball_state
-
-
-def context_scale_initialize(range):
- """
-
- Returns:
-
- """
- low, high = range
- scale = np.random.uniform(low, high, 1)
- return scale
-
-
-def config_handle_generation(config_file_path):
- """Generate config handle for multiprocessing
-
- Args:
- config_file_path:
-
- Returns:
-
- """
- cfg_fname = Path(config_file_path)
- # .json and .yml file
- if cfg_fname.suffix == ".json":
- config = read_json(cfg_fname)
- elif cfg_fname.suffix == ".yml":
- config = read_yaml(cfg_fname)
-
- return config
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/utils/rewards/__init__.py b/alr_envs/alr/mujoco/gym_table_tennis/utils/rewards/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/utils/rewards/hierarchical_reward.py b/alr_envs/alr/mujoco/gym_table_tennis/utils/rewards/hierarchical_reward.py
deleted file mode 100644
index fe69104..0000000
--- a/alr_envs/alr/mujoco/gym_table_tennis/utils/rewards/hierarchical_reward.py
+++ /dev/null
@@ -1,402 +0,0 @@
-import numpy as np
-import logging
-
-
-class HierarchicalRewardTableTennis(object):
- """Class for hierarchical reward function for table tennis experiment.
-
- Return Highest Reward.
- Reward = 0
- Step 1: Action Valid. Upper Bound 0
- [-∞, 0]
- Reward += -1 * |hit_duration - hit_duration_threshold| * |hit_duration < hit_duration_threshold| * 10
- Step 2: Hitting. Upper Bound 2
- if hitting:
- [0, 2]
- Reward = 2 * (1 - tanh(|shortest_hitting_dist|))
- if not hitting:
- [0, 0.2]
- Reward = 2 * (1 - tanh(|shortest_hitting_dist|))
- Step 3: Target Point Achievement. Upper Bound 6
- [0, 4]
- if table_contact_detector:
- Reward += 1
- Reward += (1 - tanh(|shortest_hitting_dist|)) * 2
- if contact_coordinate[0] < 0:
- Reward += 1
- else:
- Reward += 0
- elif:
- Reward += (1 - tanh(|shortest_hitting_dist|))
- """
-
- def __init__(self):
- self.reward = None
- self.goal_achievement = False
- self.total_reward = 0
- self.shortest_hitting_dist = 1000
- self.highest_reward = -1000
- self.lowest_corner_dist = 100
- self.right_court_contact_detector = False
- self.table_contact_detector = False
- self.floor_contact_detector = False
- self.radius = 0.025
- self.min_ball_x_pos = 100
- self.hit_contact_detector = False
- self.net_contact_detector = False
- self.ratio = 1
- self.lowest_z = 100
- self.target_flag = False
- self.dist_target_virtual = 100
- self.ball_z_pos_lowest = 100
- self.hitting_flag = False
- self.hitting_time_point = None
- self.ctxt_dim = None
- self.context_range_bounds = None
- # self.ctxt_out_of_range_punishment = None
- # self.ctxt_in_side_of_range_punishment = None
- #
- # def check_where_invalid(self, ctxt, context_range_bounds, set_to_valid_region=False):
- # idx_max = []
- # idx_min = []
- # for dim in range(self.ctxt_dim):
- # min_dim = context_range_bounds[0][dim]
- # max_dim = context_range_bounds[1][dim]
- # idx_max_c = np.where(ctxt[:, dim] > max_dim)[0]
- # idx_min_c = np.where(ctxt[:, dim] < min_dim)[0]
- # if set_to_valid_region:
- # if idx_max_c.shape[0] != 0:
- # ctxt[idx_max_c, dim] = max_dim
- # if idx_min_c.shape[0] != 0:
- # ctxt[idx_min_c, dim] = min_dim
- # idx_max.append(idx_max_c)
- # idx_min.append(idx_min_c)
- # return idx_max, idx_min, ctxt
-
- def check_valid(self, scale, context_range_bounds):
-
- min_dim = context_range_bounds[0][0]
- max_dim = context_range_bounds[1][0]
- valid = (scale < max_dim) and (scale > min_dim)
- return valid
-
- @classmethod
- def goal_distance(cls, goal_a, goal_b):
- assert goal_a.shape == goal_b.shape
- return np.linalg.norm(goal_a - goal_b, axis=-1)
-
- def refresh_highest_reward(self):
- if self.total_reward >= self.highest_reward:
- self.highest_reward = self.total_reward
-
- def duration_valid(self):
- pass
-
- def huge_value_unstable(self):
- self.total_reward += -10
- self.highest_reward = -1
-
- def context_valid(self, context):
- valid = self.check_valid(context.copy(), context_range_bounds=self.context_range_bounds)
- # when using dirac punishments
- if valid:
- self.total_reward += 1 # If Action Valid and Context Valid, total_reward = 0
- else:
- self.total_reward += 0
- self.refresh_highest_reward()
-
-
-
- # If in the ctxt, add 1, otherwise, 0
-
- def action_valid(self, durations=None):
- """Ensure the execution of the robot movement with parameters which are in a valid domain.
-
- Time should always be positive,
- the joint position of the robot should be a subset of [−π, π].
- if all parameters are valid, the robot gets a zero score,
- otherwise it gets a negative score proportional to how much it is beyond the valid parameter domain.
-
- Returns:
- rewards: if valid, reward is equal to 0.
- if not valid, reward is negative and proportional to the distance beyond the valid parameter domain
- """
- assert durations.shape[0] == 2, "durations type should be np.array and the shape should be 2"
- # pre_duration = durations[0]
- hit_duration = durations[1]
- # pre_duration_thres = 0.01
- hit_duration_thres = 1
- # self.goal_achievement = np.all(
- # [(pre_duration > pre_duration_thres), (hit_duration > hit_duration_thres), (0.3 < pre_duration < 0.6)])
- self.goal_achievement = (hit_duration > hit_duration_thres)
- if self.goal_achievement:
- self.total_reward = -1
- self.goal_achievement = True
- else:
- # self.total_reward += -1 * ((np.abs(pre_duration - pre_duration_thres) * int(
- # pre_duration < pre_duration_thres) + np.abs(hit_duration - hit_duration_thres) * int(
- # hit_duration < hit_duration_thres)) * 10)
- self.total_reward = -1 * ((np.abs(hit_duration - hit_duration_thres) * int(
- hit_duration < hit_duration_thres)) * 10)
- self.total_reward += -1
- self.goal_achievement = False
- self.refresh_highest_reward()
-
- def motion_penalty(self, action, high_motion_penalty):
- """Protects the robot from high acceleration and dangerous movement.
- """
- if not high_motion_penalty:
- reward_ctrl = - 0.05 * np.square(action).sum()
- else:
- reward_ctrl = - 0.075 * np.square(action).sum()
- self.total_reward += reward_ctrl
- self.refresh_highest_reward()
- self.goal_achievement = True
-
- def hitting(self, env): # , target_ball_pos, racket_center_pos, hit_contact_detector=False
- """Hitting reward calculation
-
- If racket successfully hit the ball, the reward +1
- Otherwise calculate the distance between the center of racket and the center of ball,
- reward = tanh(r/dist) if dist<1 reward almost 2 , if dist >= 1 reward is between [0, 0.2]
-
-
- Args:
- env:
-
- Returns:
-
- """
-
- hit_contact_obj = ["target_ball", "bat"]
- target_ball_pos = env.target_ball_pos
- racket_center_pos = env.racket_center_pos
- # hit contact detection
- # Record the hitting history
- self.hitting_flag = False
- if not self.hit_contact_detector:
- self.hit_contact_detector = self.contact_detection(env, hit_contact_obj)
- if self.hit_contact_detector:
- print("First time detect hitting")
- self.hitting_flag = True
- if self.hit_contact_detector:
-
- # TODO
- dist = self.goal_distance(target_ball_pos, racket_center_pos)
- if dist < 0:
- dist = 0
- # print("goal distance is:", dist)
- if dist <= self.shortest_hitting_dist:
- self.shortest_hitting_dist = dist
- # print("shortest_hitting_dist is:", self.shortest_hitting_dist)
- # Keep the shortest hitting distance.
- dist_reward = 2 * (1 - np.tanh(np.abs(self.shortest_hitting_dist)))
-
- # TODO sparse
- # dist_reward = 2
-
- self.total_reward += dist_reward
- self.goal_achievement = True
-
- # if self.hitting_time_point is not None and self.hitting_time_point > 600:
- # self.total_reward += 1
-
- else:
- dist = self.goal_distance(target_ball_pos, racket_center_pos)
- if dist <= self.shortest_hitting_dist:
- self.shortest_hitting_dist = dist
- dist_reward = 1 - np.tanh(self.shortest_hitting_dist)
- reward = 0.2 * dist_reward # because it does not hit the ball, so multiply 0.2
- self.total_reward += reward
- self.goal_achievement = False
-
- self.refresh_highest_reward()
-
- @classmethod
- def relu(cls, x):
- return np.maximum(0, x)
-
- # def right_table_contact(self, env):
- # right_court_contact_obj = ["target_ball", "table_tennis_table_right_side"]
- # if env.target_ball_pos[0] >= 0 and env.target_ball_pos[2] >= 0.7:
- # # update right court contact detection
- # if not self.right_court_contact_detector:
- # self.right_court_contact_detector = self.contact_detection(env, right_court_contact_obj)
- # if self.right_court_contact_detector:
- # self.contact_x_pos = env.target_ball_pos[0]
- # if self.right_court_contact_detector:
- # self.total_reward += 1 - norm(0.685, 1).pdf(self.contact_x_pos) # x axis middle of right table
- # self.goal_achievement = False
- # else:
- # self.total_reward += 1
- # self.goal_achievement = True
- # # else:
- # # self.total_reward += 0
- # # self.goal_achievement = False
- # self.refresh_highest_reward()
-
- # def net_contact(self, env):
- # net_contact_obj = ["target_ball", "table_tennis_net"]
- # # net_contact_detector = self.contact_detection(env, net_contact_obj)
- # # ball_x_pos = env.target_ball_pos[0]
- # # if self.min_ball_x_pos >= ball_x_pos:
- # # self.min_ball_x_pos = ball_x_pos
- # # table_left_edge_x_pos = -1.37
- # # if np.abs(ball_x_pos) <= 0.01: # x threshold of net
- # # if self.lowest_z >= env.target_ball_pos[2]:
- # # self.lowest_z = env.target_ball_pos[2]
- # # # construct a gaussian distribution of z
- # # z_reward = 4 - norm(0, 0.1).pdf(self.lowest_z - 0.07625) # maximum 4
- # # self.total_reward += z_reward
- # # self.total_reward += 2 - np.minimum(1, self.relu(np.abs(self.min_ball_x_pos)))
- # if not self.net_contact_detector:
- # self.net_contact_detector = self.contact_detection(env, net_contact_obj)
- # if self.net_contact_detector:
- # self.total_reward += 0 # very high cost
- # self.goal_achievement = False
- # else:
- # self.total_reward += 1
- # self.goal_achievement = True
- # self.refresh_highest_reward()
-
- # def landing_on_opponent_court(self, env):
- # # Very sparse reward
- # # don't contact the right side court
- # # right_court_contact_obj = ["target_ball", "table_tennis_table_right_side"]
- # # right_court_contact_detector = self.contact_detection(env, right_court_contact_obj)
- # left_court_contact_obj = ["target_ball", "table_tennis_table_left_side"]
- # # left_court_contact_detector = self.contact_detection(env, left_court_contact_obj)
- # # record the contact history
- # # if not self.right_court_contact_detector:
- # # self.right_court_contact_detector = self.contact_detection(env, right_court_contact_obj)
- # if not self.table_contact_detector:
- # self.table_contact_detector = self.contact_detection(env, left_court_contact_obj)
- #
- # dist_left_up_corner = self.goal_distance(env.target_ball_pos, env.sim.data.get_site_xpos("left_up_corner"))
- # dist_middle_up_corner = self.goal_distance(env.target_ball_pos, env.sim.data.get_site_xpos("middle_up_corner"))
- # dist_left_down_corner = self.goal_distance(env.target_ball_pos, env.sim.data.get_site_xpos("left_down_corner"))
- # dist_middle_down_corner = self.goal_distance(env.target_ball_pos,
- # env.sim.data.get_site_xpos("middle_down_corner"))
- # dist_array = np.array(
- # [dist_left_up_corner, dist_middle_up_corner, dist_left_down_corner, dist_middle_down_corner])
- # dist_corner = np.amin(dist_array)
- # if self.lowest_corner_dist >= dist_corner:
- # self.lowest_corner_dist = dist_corner
- #
- # right_contact_cost = 1
- # left_contact_reward = 2
- # dist_left_table_reward = (2 - np.tanh(self.lowest_corner_dist))
- # # TODO Try multi dimensional gaussian distribution
- # # contact only the left side court
- # if self.right_court_contact_detector:
- # self.total_reward += 0
- # self.goal_achievement = False
- # if self.table_contact_detector:
- # self.total_reward += left_contact_reward
- # self.goal_achievement = False
- # else:
- # self.total_reward += dist_left_table_reward
- # self.goal_achievement = False
- # else:
- # self.total_reward += right_contact_cost
- # if self.table_contact_detector:
- # self.total_reward += left_contact_reward
- # self.goal_achievement = True
- # else:
- # self.total_reward += dist_left_table_reward
- # self.goal_achievement = False
- # self.refresh_highest_reward()
- # # if self.left_court_contact_detector and not self.right_court_contact_detector:
- # # self.total_reward += self.ratio * left_contact_reward
- # # print("only left court reward return!!!!!!!!!")
- # # print("contact only left court!!!!!!")
- # # self.goal_achievement = True
- # # # no contact with table
- # # elif not self.right_court_contact_detector and not self.left_court_contact_detector:
- # # self.total_reward += 0 + self.ratio * dist_left_table_reward
- # # self.goal_achievement = False
- # # # contact both side
- # # elif self.right_court_contact_detector and self.left_court_contact_detector:
- # # self.total_reward += self.ratio * (left_contact_reward - right_contact_cost) # cost of contact of right court
- # # self.goal_achievement = False
- # # # contact only the right side court
- # # elif self.right_court_contact_detector and not self.left_court_contact_detector:
- # # self.total_reward += 0 + self.ratio * (
- # # dist_left_table_reward - right_contact_cost) # cost of contact of right court
- # # self.goal_achievement = False
-
- def target_achievement(self, env):
- target_coordinate = np.array([-0.5, -0.5])
- # net_contact_obj = ["target_ball", "table_tennis_net"]
- table_contact_obj = ["target_ball", "table_tennis_table"]
- floor_contact_obj = ["target_ball", "floor"]
-
- if 0.78 < env.target_ball_pos[2] < 0.8:
- dist_target_virtual = np.linalg.norm(env.target_ball_pos[:2] - target_coordinate)
- if self.dist_target_virtual > dist_target_virtual:
- self.dist_target_virtual = dist_target_virtual
- if -0.07 < env.target_ball_pos[0] < 0.07 and env.sim.data.get_joint_qvel('tar:x') < 0:
- if self.ball_z_pos_lowest > env.target_ball_pos[2]:
- self.ball_z_pos_lowest = env.target_ball_pos[2].copy()
- # if not self.net_contact_detector:
- # self.net_contact_detector = self.contact_detection(env, net_contact_obj)
- if not self.table_contact_detector:
- self.table_contact_detector = self.contact_detection(env, table_contact_obj)
- if not self.floor_contact_detector:
- self.floor_contact_detector = self.contact_detection(env, floor_contact_obj)
- if not self.target_flag:
- # Table Contact Reward.
- if self.table_contact_detector:
- self.total_reward += 1
- # only update when the first contact because of the flag
- contact_coordinate = env.target_ball_pos[:2].copy()
- print("contact table ball coordinate: ", env.target_ball_pos)
- logging.info("contact table ball coordinate: {}".format(env.target_ball_pos))
- dist_target = np.linalg.norm(contact_coordinate - target_coordinate)
- self.total_reward += (1 - np.tanh(dist_target)) * 2
- self.target_flag = True
- # Net Contact Reward. Precondition: Table Contact exits.
- if contact_coordinate[0] < 0:
- print("left table contact")
- logging.info("~~~~~~~~~~~~~~~left table contact~~~~~~~~~~~~~~~")
- self.total_reward += 1
- # TODO Z coordinate reward
- # self.total_reward += np.maximum(np.tanh(self.ball_z_pos_lowest), 0)
- self.goal_achievement = True
- else:
- print("right table contact")
- logging.info("~~~~~~~~~~~~~~~right table contact~~~~~~~~~~~~~~~")
- self.total_reward += 0
- self.goal_achievement = False
- # if self.net_contact_detector:
- # self.total_reward += 0
- # self.goal_achievement = False
- # else:
- # self.total_reward += 1
- # self.goal_achievement = False
- # Floor Contact Reward. Precondition: Table Contact exits.
- elif self.floor_contact_detector:
- self.total_reward += (1 - np.tanh(self.dist_target_virtual))
- self.target_flag = True
- self.goal_achievement = False
- # No Contact of Floor or Table, flying
- else:
- pass
- # else:
- # print("Flag is True already")
- self.refresh_highest_reward()
-
- def distance_to_target(self):
- pass
-
- @classmethod
- def contact_detection(cls, env, goal_contact):
- for i in range(env.sim.data.ncon):
- contact = env.sim.data.contact[i]
- achieved_geom1_name = env.sim.model.geom_id2name(contact.geom1)
- achieved_geom2_name = env.sim.model.geom_id2name(contact.geom2)
- if np.all([(achieved_geom1_name in goal_contact), (achieved_geom2_name in goal_contact)]):
- print("contact of " + achieved_geom1_name + " " + achieved_geom2_name)
- return True
- else:
- return False
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/utils/rewards/rewards.py b/alr_envs/alr/mujoco/gym_table_tennis/utils/rewards/rewards.py
deleted file mode 100644
index 6e6aa32..0000000
--- a/alr_envs/alr/mujoco/gym_table_tennis/utils/rewards/rewards.py
+++ /dev/null
@@ -1,136 +0,0 @@
-# Copyright 2017 The dm_control Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-# """Soft indicator function evaluating whether a number is within bounds."""
-#
-# from __future__ import absolute_import
-# from __future__ import division
-# from __future__ import print_function
-
-# Internal dependencies.
-import numpy as np
-
-# The value returned by tolerance() at `margin` distance from `bounds` interval.
-_DEFAULT_VALUE_AT_MARGIN = 0.1
-
-
-def _sigmoids(x, value_at_1, sigmoid):
- """Returns 1 when `x` == 0, between 0 and 1 otherwise.
-
- Args:
- x: A scalar or numpy array.
- value_at_1: A float between 0 and 1 specifying the output when `x` == 1.
- sigmoid: String, choice of sigmoid type.
-
- Returns:
- A numpy array with values between 0.0 and 1.0.
-
- Raises:
- ValueError: If not 0 < `value_at_1` < 1, except for `linear`, `cosine` and
- `quadratic` sigmoids which allow `value_at_1` == 0.
- ValueError: If `sigmoid` is of an unknown type.
- """
- if sigmoid in ('cosine', 'linear', 'quadratic'):
- if not 0 <= value_at_1 < 1:
- raise ValueError('`value_at_1` must be nonnegative and smaller than 1, '
- 'got {}.'.format(value_at_1))
- else:
- if not 0 < value_at_1 < 1:
- raise ValueError('`value_at_1` must be strictly between 0 and 1, '
- 'got {}.'.format(value_at_1))
-
- if sigmoid == 'gaussian':
- scale = np.sqrt(-2 * np.log(value_at_1))
- return np.exp(-0.5 * (x*scale)**2)
-
- elif sigmoid == 'hyperbolic':
- scale = np.arccosh(1/value_at_1)
- return 1 / np.cosh(x*scale)
-
- elif sigmoid == 'long_tail':
- scale = np.sqrt(1/value_at_1 - 1)
- return 1 / ((x*scale)**2 + 1)
-
- elif sigmoid == 'cosine':
- scale = np.arccos(2*value_at_1 - 1) / np.pi
- scaled_x = x*scale
- return np.where(abs(scaled_x) < 1, (1 + np.cos(np.pi*scaled_x))/2, 0.0)
-
- elif sigmoid == 'linear':
- scale = 1-value_at_1
- scaled_x = x*scale
- return np.where(abs(scaled_x) < 1, 1 - scaled_x, 0.0)
-
- elif sigmoid == 'quadratic':
- scale = np.sqrt(1-value_at_1)
- scaled_x = x*scale
- return np.where(abs(scaled_x) < 1, 1 - scaled_x**2, 0.0)
-
- elif sigmoid == 'tanh_squared':
- scale = np.arctanh(np.sqrt(1-value_at_1))
- return 1 - np.tanh(x*scale)**2
-
- else:
- raise ValueError('Unknown sigmoid type {!r}.'.format(sigmoid))
-
-
-def tolerance(x, bounds=(0.0, 0.0), margin=0.0, sigmoid='gaussian',
- value_at_margin=_DEFAULT_VALUE_AT_MARGIN):
- """Returns 1 when `x` falls inside the bounds, between 0 and 1 otherwise.
-
- Args:
- x: A scalar or numpy array.
- bounds: A tuple of floats specifying inclusive `(lower, upper)` bounds for
- the target interval. These can be infinite if the interval is unbounded
- at one or both ends, or they can be equal to one another if the target
- value is exact.
- margin: Float. Parameter that controls how steeply the output decreases as
- `x` moves out-of-bounds.
- * If `margin == 0` then the output will be 0 for all values of `x`
- outside of `bounds`.
- * If `margin > 0` then the output will decrease sigmoidally with
- increasing distance from the nearest bound.
- sigmoid: String, choice of sigmoid type. Valid values are: 'gaussian',
- 'linear', 'hyperbolic', 'long_tail', 'cosine', 'tanh_squared'.
- value_at_margin: A float between 0 and 1 specifying the output value when
- the distance from `x` to the nearest bound is equal to `margin`. Ignored
- if `margin == 0`.
-
- Returns:
- A float or numpy array with values between 0.0 and 1.0.
-
- Raises:
- ValueError: If `bounds[0] > bounds[1]`.
- ValueError: If `margin` is negative.
- """
- lower, upper = bounds
- if lower > upper:
- raise ValueError('Lower bound must be <= upper bound.')
- if margin < 0:
- raise ValueError('`margin` must be non-negative.')
-
- in_bounds = np.logical_and(lower <= x, x <= upper)
- if margin == 0:
- value = np.where(in_bounds, 1.0, 0.0)
- else:
- d = np.where(x < lower, lower - x, x - upper) / margin
- value = np.where(in_bounds, 1.0, _sigmoids(d, value_at_margin, sigmoid))
-
- return float(value) if np.isscalar(x) else value
-
-
-
-
-
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/utils/util.py b/alr_envs/alr/mujoco/gym_table_tennis/utils/util.py
deleted file mode 100644
index fa308e3..0000000
--- a/alr_envs/alr/mujoco/gym_table_tennis/utils/util.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import json
-import yaml
-import xml.etree.ElementTree as ET
-from collections import OrderedDict
-from pathlib import Path
-
-
-def read_json(fname):
- fname = Path(fname)
- with fname.open('rt') as handle:
- return json.load(handle, object_hook=OrderedDict)
-
-
-def write_json(content, fname):
- fname = Path(fname)
- with fname.open('wt') as handle:
- json.dump(content, handle, indent=4, sort_keys=False)
-
-
-def read_yaml(fname):
- fname = Path(fname)
- with fname.open('rt') as handle:
- return yaml.load(handle, Loader=yaml.FullLoader)
-
-
-def write_yaml(content, fname):
- fname = Path(fname)
- with fname.open('wt') as handle:
- yaml.dump(content, handle)
-
-
-def config_save(dir_path, config):
- dir_path = Path(dir_path)
- config_path_json = dir_path / "config.json"
- config_path_yaml = dir_path / "config.yml"
- # .json and .yml file,save 2 version of configuration.
- write_json(config, config_path_json)
- write_yaml(config, config_path_yaml)
-
-
-def change_kp_in_xml(kp_list,
- model_path="/home/zhou/slow/table_tennis_rl/simulation/gymTableTennis/gym_table_tennis/simple_reacher/robotics/assets/table_tennis/right_arm_actuator.xml"):
- tree = ET.parse(model_path)
- root = tree.getroot()
- # for actuator in root.find("actuator"):
- for position, kp in zip(root.iter('position'), kp_list):
- position.set("kp", str(kp))
- tree.write(model_path)
-
diff --git a/alr_envs/alr/mujoco/meshes/wam/base_link_convex.stl b/alr_envs/alr/mujoco/meshes/wam/base_link_convex.stl
deleted file mode 100755
index 133b112..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/base_link_convex.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/base_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/base_link_fine.stl
deleted file mode 100755
index 047e9df..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/base_link_fine.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_convex.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_convex.stl
deleted file mode 100644
index 3b05c27..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_convex.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_fine.stl
deleted file mode 100644
index 5ff94a2..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_fine.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_convex.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_convex.stl
deleted file mode 100644
index c548448..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_convex.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_fine.stl
deleted file mode 100644
index 495160d..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_fine.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl
deleted file mode 100644
index b4bb322..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl
deleted file mode 100644
index 7b2f001..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl
deleted file mode 100644
index f05174e..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_fine.stl
deleted file mode 100644
index eb252d9..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_fine.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_fine.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_fine.stl
deleted file mode 100644
index 0a986fa..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_fine.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl
deleted file mode 100644
index c039f0d..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl
deleted file mode 100644
index 250acaf..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl
deleted file mode 100644
index 993d0f7..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl
deleted file mode 100644
index 8448a3f..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/cup.stl b/alr_envs/alr/mujoco/meshes/wam/cup.stl
deleted file mode 100644
index bc34058..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/cup.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/cup_split1.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split1.stl
deleted file mode 100644
index c80aa61..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/cup_split1.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/cup_split10.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split10.stl
deleted file mode 100644
index bd5708b..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/cup_split10.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/cup_split11.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split11.stl
deleted file mode 100644
index ac81da2..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/cup_split11.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/cup_split12.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split12.stl
deleted file mode 100644
index a18e96e..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/cup_split12.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/cup_split13.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split13.stl
deleted file mode 100644
index f0e5832..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/cup_split13.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/cup_split14.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split14.stl
deleted file mode 100644
index 41a3e94..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/cup_split14.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/cup_split15.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split15.stl
deleted file mode 100644
index 7a26643..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/cup_split15.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/cup_split16.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split16.stl
deleted file mode 100644
index 155b24e..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/cup_split16.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/cup_split17.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split17.stl
deleted file mode 100644
index 2fe8d95..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/cup_split17.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/cup_split18.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split18.stl
deleted file mode 100644
index f5287b2..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/cup_split18.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/cup_split2.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split2.stl
deleted file mode 100644
index 5c1e50c..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/cup_split2.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/cup_split3.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split3.stl
deleted file mode 100644
index ef6d547..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/cup_split3.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/cup_split4.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split4.stl
deleted file mode 100644
index 5476296..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/cup_split4.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/cup_split5.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split5.stl
deleted file mode 100644
index ccfcd42..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/cup_split5.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/cup_split6.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split6.stl
deleted file mode 100644
index 72d6287..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/cup_split6.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/cup_split7.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split7.stl
deleted file mode 100644
index d4918f2..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/cup_split7.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/cup_split8.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split8.stl
deleted file mode 100644
index 8a0cd84..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/cup_split8.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/cup_split9.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split9.stl
deleted file mode 100644
index 4281a69..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/cup_split9.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/elbow_link_convex.stl b/alr_envs/alr/mujoco/meshes/wam/elbow_link_convex.stl
deleted file mode 100755
index b34963d..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/elbow_link_convex.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/elbow_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/elbow_link_fine.stl
deleted file mode 100755
index f6a1515..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/elbow_link_fine.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/forearm_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/meshes/wam/forearm_link_convex_decomposition_p1.stl
deleted file mode 100755
index e6aa6b6..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/forearm_link_convex_decomposition_p1.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/forearm_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/meshes/wam/forearm_link_convex_decomposition_p2.stl
deleted file mode 100755
index 667902e..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/forearm_link_convex_decomposition_p2.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/forearm_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/forearm_link_fine.stl
deleted file mode 100755
index ed66bbb..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/forearm_link_fine.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/shoulder_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/meshes/wam/shoulder_link_convex_decomposition_p1.stl
deleted file mode 100755
index aba957d..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/shoulder_link_convex_decomposition_p1.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/shoulder_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/meshes/wam/shoulder_link_convex_decomposition_p2.stl
deleted file mode 100755
index 5cca6a9..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/shoulder_link_convex_decomposition_p2.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/shoulder_link_convex_decomposition_p3.stl b/alr_envs/alr/mujoco/meshes/wam/shoulder_link_convex_decomposition_p3.stl
deleted file mode 100755
index 3343e27..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/shoulder_link_convex_decomposition_p3.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/shoulder_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/shoulder_link_fine.stl
deleted file mode 100755
index ae505fd..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/shoulder_link_fine.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/shoulder_pitch_link_convex.stl b/alr_envs/alr/mujoco/meshes/wam/shoulder_pitch_link_convex.stl
deleted file mode 100755
index c36cfec..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/shoulder_pitch_link_convex.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/shoulder_pitch_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/shoulder_pitch_link_fine.stl
deleted file mode 100755
index dc633c4..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/shoulder_pitch_link_fine.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/upper_arm_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/meshes/wam/upper_arm_link_convex_decomposition_p1.stl
deleted file mode 100755
index 82d0093..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/upper_arm_link_convex_decomposition_p1.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/upper_arm_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/meshes/wam/upper_arm_link_convex_decomposition_p2.stl
deleted file mode 100755
index 7fd5a55..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/upper_arm_link_convex_decomposition_p2.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/upper_arm_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/upper_arm_link_fine.stl
deleted file mode 100755
index 76353ae..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/upper_arm_link_fine.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/wrist_palm_link_convex.stl b/alr_envs/alr/mujoco/meshes/wam/wrist_palm_link_convex.stl
deleted file mode 100755
index a0386f6..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/wrist_palm_link_convex.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/wrist_palm_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/wrist_palm_link_fine.stl
deleted file mode 100755
index f6b41ad..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/wrist_palm_link_fine.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl
deleted file mode 100644
index c36f88f..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl
deleted file mode 100644
index d00cac1..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p3.stl b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p3.stl
deleted file mode 100755
index 34d1d8b..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p3.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_fine.stl
deleted file mode 100644
index 13d2f73..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_fine.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/wrist_yaw_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/meshes/wam/wrist_yaw_link_convex_decomposition_p1.stl
deleted file mode 100755
index 06e857f..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/wrist_yaw_link_convex_decomposition_p1.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/wrist_yaw_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/meshes/wam/wrist_yaw_link_convex_decomposition_p2.stl
deleted file mode 100755
index 48e1bb1..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/wrist_yaw_link_convex_decomposition_p2.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/wrist_yaw_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/wrist_yaw_link_fine.stl
deleted file mode 100644
index 0d95239..0000000
Binary files a/alr_envs/alr/mujoco/meshes/wam/wrist_yaw_link_fine.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/reacher/__init__.py b/alr_envs/alr/mujoco/reacher/__init__.py
deleted file mode 100644
index 989b5a9..0000000
--- a/alr_envs/alr/mujoco/reacher/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .mp_wrapper import MPWrapper
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/reacher/alr_reacher.py b/alr_envs/alr/mujoco/reacher/alr_reacher.py
deleted file mode 100644
index c2b5f18..0000000
--- a/alr_envs/alr/mujoco/reacher/alr_reacher.py
+++ /dev/null
@@ -1,139 +0,0 @@
-import os
-
-import numpy as np
-from gym import utils
-from gym.envs.mujoco import MujocoEnv
-
-import alr_envs.utils.utils as alr_utils
-
-
-class ALRReacherEnv(MujocoEnv, utils.EzPickle):
- def __init__(self, steps_before_reward=200, n_links=5, balance=False):
- utils.EzPickle.__init__(**locals())
-
- self._steps = 0
- self.steps_before_reward = steps_before_reward
- self.n_links = n_links
-
- self.balance = balance
- self.balance_weight = 1.0
-
- self.reward_weight = 1
- if steps_before_reward == 200:
- self.reward_weight = 200
- elif steps_before_reward == 50:
- self.reward_weight = 50
-
- if n_links == 5:
- file_name = 'reacher_5links.xml'
- elif n_links == 7:
- file_name = 'reacher_7links.xml'
- else:
- raise ValueError(f"Invalid number of links {n_links}, only 5 or 7 allowed.")
-
- MujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), "assets", file_name), 2)
-
- def step(self, a):
- self._steps += 1
-
- reward_dist = 0.0
- angular_vel = 0.0
- reward_balance = 0.0
- if self._steps >= self.steps_before_reward:
- vec = self.get_body_com("fingertip") - self.get_body_com("target")
- reward_dist -= self.reward_weight * np.linalg.norm(vec)
- if self.steps_before_reward > 0:
- # avoid giving this penalty for normal step based case
- # angular_vel -= 10 * np.linalg.norm(self.sim.data.qvel.flat[:self.n_links])
- angular_vel -= 10 * np.square(self.sim.data.qvel.flat[:self.n_links]).sum()
- reward_ctrl = - 10 * np.square(a).sum()
-
- if self.balance:
- reward_balance -= self.balance_weight * np.abs(
- alr_utils.angle_normalize(np.sum(self.sim.data.qpos.flat[:self.n_links]), type="rad"))
-
- reward = reward_dist + reward_ctrl + angular_vel + reward_balance
- self.do_simulation(a, self.frame_skip)
- ob = self._get_obs()
- done = False
- return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl,
- velocity=angular_vel, reward_balance=reward_balance,
- end_effector=self.get_body_com("fingertip").copy(),
- goal=self.goal if hasattr(self, "goal") else None)
-
- def viewer_setup(self):
- self.viewer.cam.trackbodyid = 0
-
- # def reset_model(self):
- # qpos = self.init_qpos
- # if not hasattr(self, "goal"):
- # self.goal = np.array([-0.25, 0.25])
- # # self.goal = self.init_qpos.copy()[:2] + 0.05
- # qpos[-2:] = self.goal
- # qvel = self.init_qvel
- # qvel[-2:] = 0
- # self.set_state(qpos, qvel)
- # self._steps = 0
- #
- # return self._get_obs()
-
- def reset_model(self):
- qpos = self.init_qpos.copy()
- while True:
- self.goal = self.np_random.uniform(low=-self.n_links / 10, high=self.n_links / 10, size=2)
- # self.goal = self.np_random.uniform(low=0, high=self.n_links / 10, size=2)
- # self.goal = np.random.uniform(low=[-self.n_links / 10, 0], high=[0, self.n_links / 10], size=2)
- if np.linalg.norm(self.goal) < self.n_links / 10:
- break
- qpos[-2:] = self.goal
- qvel = self.init_qvel.copy()
- qvel[-2:] = 0
- self.set_state(qpos, qvel)
- self._steps = 0
-
- return self._get_obs()
-
- # def reset_model(self):
- # qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
- # while True:
- # self.goal = self.np_random.uniform(low=-self.n_links / 10, high=self.n_links / 10, size=2)
- # if np.linalg.norm(self.goal) < self.n_links / 10:
- # break
- # qpos[-2:] = self.goal
- # qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
- # qvel[-2:] = 0
- # self.set_state(qpos, qvel)
- # self._steps = 0
- #
- # return self._get_obs()
-
- def _get_obs(self):
- theta = self.sim.data.qpos.flat[:self.n_links]
- target = self.get_body_com("target")
- return np.concatenate([
- np.cos(theta),
- np.sin(theta),
- target[:2], # x-y of goal position
- self.sim.data.qvel.flat[:self.n_links], # angular velocity
- self.get_body_com("fingertip") - target, # goal distance
- [self._steps],
- ])
-
-
-if __name__ == '__main__':
- nl = 5
- render_mode = "human" # "human" or "partial" or "final"
- env = ALRReacherEnv(n_links=nl)
- obs = env.reset()
-
- for i in range(2000):
- # objective.load_result("/tmp/cma")
- # test with random actions
- ac = env.action_space.sample()
- obs, rew, d, info = env.step(ac)
- if i % 10 == 0:
- env.render(mode=render_mode)
- if d:
- env.reset()
-
- env.close()
diff --git a/alr_envs/alr/mujoco/reacher/assets/reacher_5links.xml b/alr_envs/alr/mujoco/reacher/assets/reacher_5links.xml
deleted file mode 100644
index 25a3208..0000000
--- a/alr_envs/alr/mujoco/reacher/assets/reacher_5links.xml
+++ /dev/null
@@ -1,57 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/reacher/assets/reacher_7links.xml b/alr_envs/alr/mujoco/reacher/assets/reacher_7links.xml
deleted file mode 100644
index 6da5461..0000000
--- a/alr_envs/alr/mujoco/reacher/assets/reacher_7links.xml
+++ /dev/null
@@ -1,75 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/reacher/balancing.py b/alr_envs/alr/mujoco/reacher/balancing.py
deleted file mode 100644
index 3e34298..0000000
--- a/alr_envs/alr/mujoco/reacher/balancing.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import os
-
-import numpy as np
-from gym import utils
-from gym.envs.mujoco import mujoco_env
-
-import alr_envs.utils.utils as alr_utils
-
-
-class BalancingEnv(mujoco_env.MujocoEnv, utils.EzPickle):
- def __init__(self, n_links=5):
- utils.EzPickle.__init__(**locals())
-
- self.n_links = n_links
-
- if n_links == 5:
- file_name = 'reacher_5links.xml'
- elif n_links == 7:
- file_name = 'reacher_7links.xml'
- else:
- raise ValueError(f"Invalid number of links {n_links}, only 5 or 7 allowed.")
-
- mujoco_env.MujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), "assets", file_name), 2)
-
- def step(self, a):
- angle = alr_utils.angle_normalize(np.sum(self.sim.data.qpos.flat[:self.n_links]), type="rad")
- reward = - np.abs(angle)
-
- self.do_simulation(a, self.frame_skip)
- ob = self._get_obs()
- done = False
- return ob, reward, done, dict(angle=angle, end_effector=self.get_body_com("fingertip").copy())
-
- def viewer_setup(self):
- self.viewer.cam.trackbodyid = 1
-
- def reset_model(self):
- # This also generates a goal, we however do not need/use it
- qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
- qpos[-2:] = 0
- qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
- qvel[-2:] = 0
- self.set_state(qpos, qvel)
-
- return self._get_obs()
-
- def _get_obs(self):
- theta = self.sim.data.qpos.flat[:self.n_links]
- return np.concatenate([
- np.cos(theta),
- np.sin(theta),
- self.sim.data.qvel.flat[:self.n_links], # this is angular velocity
- ])
diff --git a/alr_envs/alr/mujoco/reacher/mp_wrapper.py b/alr_envs/alr/mujoco/reacher/mp_wrapper.py
deleted file mode 100644
index abcdc50..0000000
--- a/alr_envs/alr/mujoco/reacher/mp_wrapper.py
+++ /dev/null
@@ -1,43 +0,0 @@
-from typing import Union
-
-import numpy as np
-from mp_env_api import MPEnvWrapper
-
-
-class MPWrapper(MPEnvWrapper):
-
- @property
- def active_obs(self):
- return np.concatenate([
- [False] * self.n_links, # cos
- [False] * self.n_links, # sin
- [True] * 2, # goal position
- [False] * self.n_links, # angular velocity
- [False] * 3, # goal distance
- # self.get_body_com("target"), # only return target to make problem harder
- [False], # step
- ])
-
- # @property
- # def active_obs(self):
- # return np.concatenate([
- # [True] * self.n_links, # cos, True
- # [True] * self.n_links, # sin, True
- # [True] * 2, # goal position
- # [True] * self.n_links, # angular velocity, True
- # [True] * 3, # goal distance
- # # self.get_body_com("target"), # only return target to make problem harder
- # [False], # step
- # ])
-
- @property
- def current_vel(self) -> Union[float, int, np.ndarray]:
- return self.sim.data.qvel.flat[:self.n_links]
-
- @property
- def current_pos(self) -> Union[float, int, np.ndarray]:
- return self.sim.data.qpos.flat[:self.n_links]
-
- @property
- def dt(self) -> Union[float, int]:
- return self.env.dt
diff --git a/alr_envs/alr/mujoco/table_tennis/__init__.py b/alr_envs/alr/mujoco/table_tennis/__init__.py
deleted file mode 100644
index c5e6d2f..0000000
--- a/alr_envs/alr/mujoco/table_tennis/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .mp_wrapper import MPWrapper
diff --git a/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py b/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py
deleted file mode 100644
index 86d5a00..0000000
--- a/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py
+++ /dev/null
@@ -1,38 +0,0 @@
-from typing import Tuple, Union
-
-import numpy as np
-
-from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
-
-
-class MPWrapper(MPEnvWrapper):
-
- @property
- def active_obs(self):
- # TODO: @Max Filter observations correctly
- return np.hstack([
- [True] * 7, # Joint Pos
- [True] * 3, # Ball pos
- [True] * 3 # goal pos
- ])
-
- @property
- def start_pos(self):
- return self.self.init_qpos_tt
-
- @property
- def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.sim.data.qpos[:7].copy()
-
- @property
- def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.sim.data.qvel[:7].copy()
-
- @property
- def goal_pos(self):
- # TODO: @Max I think the default value of returning to the start is reasonable here
- raise ValueError("Goal position is not available and has to be learnt based on the environment.")
-
- @property
- def dt(self) -> Union[float, int]:
- return self.env.dt
diff --git a/alr_envs/alr/mujoco/table_tennis/tt_gym.py b/alr_envs/alr/mujoco/table_tennis/tt_gym.py
deleted file mode 100644
index d1c2dc3..0000000
--- a/alr_envs/alr/mujoco/table_tennis/tt_gym.py
+++ /dev/null
@@ -1,180 +0,0 @@
-import os
-
-import numpy as np
-import mujoco_py
-from gym import utils, spaces
-from gym.envs.mujoco import MujocoEnv
-
-from alr_envs.alr.mujoco.table_tennis.tt_utils import ball_init
-from alr_envs.alr.mujoco.table_tennis.tt_reward import TT_Reward
-
-#TODO: Check for simulation stability. Make sure the code runs even for sim crash
-
-MAX_EPISODE_STEPS = 1750
-BALL_NAME_CONTACT = "target_ball_contact"
-BALL_NAME = "target_ball"
-TABLE_NAME = 'table_tennis_table'
-FLOOR_NAME = 'floor'
-PADDLE_CONTACT_1_NAME = 'bat'
-PADDLE_CONTACT_2_NAME = 'bat_back'
-RACKET_NAME = 'bat'
-# CONTEXT_RANGE_BOUNDS_2DIM = np.array([[-1.2, -0.6], [-0.2, 0.6]])
-CONTEXT_RANGE_BOUNDS_2DIM = np.array([[-1.2, -0.6], [-0.2, 0.0]])
-CONTEXT_RANGE_BOUNDS_4DIM = np.array([[-1.35, -0.75, -1.25, -0.75], [-0.1, 0.75, -0.1, 0.75]])
-
-
-class TTEnvGym(MujocoEnv, utils.EzPickle):
-
- def __init__(self, ctxt_dim=2, fixed_goal=False):
- model_path = os.path.join(os.path.dirname(__file__), "xml", 'table_tennis_env.xml')
-
- self.ctxt_dim = ctxt_dim
- self.fixed_goal = fixed_goal
- if ctxt_dim == 2:
- self.context_range_bounds = CONTEXT_RANGE_BOUNDS_2DIM
- if self.fixed_goal:
- self.goal = np.array([-1, -0.1, 0])
- else:
- self.goal = np.zeros(3) # 2 x,y + 1z
- elif ctxt_dim == 4:
- self.context_range_bounds = CONTEXT_RANGE_BOUNDS_4DIM
- self.goal = np.zeros(3)
- else:
- raise ValueError("either 2 or 4 dimensional Contexts available")
-
- # has no effect as it is overwritten in init of super
- # action_space_low = np.array([-2.6, -2.0, -2.8, -0.9, -4.8, -1.6, -2.2])
- # action_space_high = np.array([2.6, 2.0, 2.8, 3.1, 1.3, 1.6, 2.2])
- # self.action_space = spaces.Box(low=action_space_low, high=action_space_high, dtype='float64')
-
- self.time_steps = 0
- self.init_qpos_tt = np.array([0, 0, 0, 1.5, 0, 0, 1.5, 0, 0, 0])
- self.init_qvel_tt = np.zeros(10)
-
- self.reward_func = TT_Reward(self.ctxt_dim)
- self.ball_landing_pos = None
- self.hit_ball = False
- self.ball_contact_after_hit = False
- self._ids_set = False
- super(TTEnvGym, self).__init__(model_path=model_path, frame_skip=1)
- self.ball_id = self.sim.model._body_name2id[BALL_NAME] # find the proper -> not protected func.
- self.ball_contact_id = self.sim.model._geom_name2id[BALL_NAME_CONTACT]
- self.table_contact_id = self.sim.model._geom_name2id[TABLE_NAME]
- self.floor_contact_id = self.sim.model._geom_name2id[FLOOR_NAME]
- self.paddle_contact_id_1 = self.sim.model._geom_name2id[PADDLE_CONTACT_1_NAME] # check if we need both or only this
- self.paddle_contact_id_2 = self.sim.model._geom_name2id[PADDLE_CONTACT_2_NAME] # check if we need both or only this
- self.racket_id = self.sim.model._geom_name2id[RACKET_NAME]
-
- def _set_ids(self):
- self.ball_id = self.sim.model._body_name2id[BALL_NAME] # find the proper -> not protected func.
- self.table_contact_id = self.sim.model._geom_name2id[TABLE_NAME]
- self.floor_contact_id = self.sim.model._geom_name2id[FLOOR_NAME]
- self.paddle_contact_id_1 = self.sim.model._geom_name2id[PADDLE_CONTACT_1_NAME] # check if we need both or only this
- self.paddle_contact_id_2 = self.sim.model._geom_name2id[PADDLE_CONTACT_2_NAME] # check if we need both or only this
- self.racket_id = self.sim.model._geom_name2id[RACKET_NAME]
- self.ball_contact_id = self.sim.model._geom_name2id[BALL_NAME_CONTACT]
- self._ids_set = True
-
- def _get_obs(self):
- ball_pos = self.sim.data.body_xpos[self.ball_id]
- obs = np.concatenate([self.sim.data.qpos[:7].copy(), # 7 joint positions
- ball_pos,
- self.goal.copy()])
- return obs
-
- def sample_context(self):
- return self.np_random.uniform(self.context_range_bounds[0], self.context_range_bounds[1], size=self.ctxt_dim)
-
- def reset_model(self):
- self.set_state(self.init_qpos_tt, self.init_qvel_tt) # reset to initial sim state
- self.time_steps = 0
- self.ball_landing_pos = None
- self.hit_ball = False
- self.ball_contact_after_hit = False
- if self.fixed_goal:
- self.goal = self.goal[:2]
- else:
- self.goal = self.sample_context()[:2]
- if self.ctxt_dim == 2:
- initial_ball_state = ball_init(random=False) # fixed velocity, fixed position
- elif self.ctxt_dim == 4:
- initial_ball_state = ball_init(random=False)#raise NotImplementedError
-
- self.sim.data.set_joint_qpos('tar:x', initial_ball_state[0])
- self.sim.data.set_joint_qpos('tar:y', initial_ball_state[1])
- self.sim.data.set_joint_qpos('tar:z', initial_ball_state[2])
-
- self.sim.data.set_joint_qvel('tar:x', initial_ball_state[3])
- self.sim.data.set_joint_qvel('tar:y', initial_ball_state[4])
- self.sim.data.set_joint_qvel('tar:z', initial_ball_state[5])
-
- z_extended_goal_pos = np.concatenate((self.goal[:2], [0.77]))
- self.goal = z_extended_goal_pos
- self.sim.model.body_pos[5] = self.goal[:3] # Desired Landing Position, Yellow
- self.sim.model.body_pos[3] = np.array([0, 0, 0.5]) # Outgoing Ball Landing Position, Green
- self.sim.model.body_pos[4] = np.array([0, 0, 0.5]) # Incoming Ball Landing Position, Red
- self.sim.forward()
-
- self.reward_func.reset(self.goal) # reset the reward function
- return self._get_obs()
-
- def _contact_checker(self, id_1, id_2):
- for coni in range(0, self.sim.data.ncon):
- con = self.sim.data.contact[coni]
- collision = con.geom1 == id_1 and con.geom2 == id_2
- collision_trans = con.geom1 == id_2 and con.geom2 == id_1
- if collision or collision_trans:
- return True
- return False
-
- def step(self, action):
- if not self._ids_set:
- self._set_ids()
- done = False
- episode_end = False if self.time_steps + 1 < MAX_EPISODE_STEPS else True
- if not self.hit_ball:
- self.hit_ball = self._contact_checker(self.ball_contact_id, self.paddle_contact_id_1) # check for one side
- if not self.hit_ball:
- self.hit_ball = self._contact_checker(self.ball_contact_id, self.paddle_contact_id_2) # check for other side
- if self.hit_ball:
- if not self.ball_contact_after_hit:
- if self._contact_checker(self.ball_contact_id, self.floor_contact_id): # first check contact with floor
- self.ball_contact_after_hit = True
- self.ball_landing_pos = self.sim.data.body_xpos[self.ball_id]
- elif self._contact_checker(self.ball_contact_id, self.table_contact_id): # second check contact with table
- self.ball_contact_after_hit = True
- self.ball_landing_pos = self.sim.data.body_xpos[self.ball_id]
- c_ball_pos = self.sim.data.body_xpos[self.ball_id]
- racket_pos = self.sim.data.geom_xpos[self.racket_id] # TODO: use this to reach out the position of the paddle?
- if self.ball_landing_pos is not None:
- done = True
- episode_end =True
- reward = self.reward_func.get_reward(episode_end, c_ball_pos, racket_pos, self.hit_ball, self.ball_landing_pos)
- self.time_steps += 1
- # gravity compensation on joints:
- #action += self.sim.data.qfrc_bias[:7].copy()
- try:
- self.do_simulation(action, self.frame_skip)
- except mujoco_py.MujocoException as e:
- print('Simulation got unstable returning')
- done = True
- reward = -25
- ob = self._get_obs()
- info = {"hit_ball": self.hit_ball,
- "q_pos": np.copy(self.sim.data.qpos[:7]),
- "ball_pos": np.copy(self.sim.data.qpos[7:])}
- return ob, reward, done, info # might add some information here ....
-
- def set_context(self, context):
- old_state = self.sim.get_state()
- qpos = old_state.qpos.copy()
- qvel = old_state.qvel.copy()
- self.set_state(qpos, qvel)
- self.goal = context
- z_extended_goal_pos = np.concatenate((self.goal[:self.ctxt_dim], [0.77]))
- if self.ctxt_dim == 4:
- z_extended_goal_pos = np.concatenate((z_extended_goal_pos, [0.77]))
- self.goal = z_extended_goal_pos
- self.sim.model.body_pos[5] = self.goal[:3] # TODO: Missing: Setting the desired incomoing landing position
- self.sim.forward()
- return self._get_obs()
diff --git a/alr_envs/alr/mujoco/table_tennis/tt_reward.py b/alr_envs/alr/mujoco/table_tennis/tt_reward.py
deleted file mode 100644
index 0e1bebf..0000000
--- a/alr_envs/alr/mujoco/table_tennis/tt_reward.py
+++ /dev/null
@@ -1,48 +0,0 @@
-import numpy as np
-
-
-class TT_Reward:
-
- def __init__(self, ctxt_dim):
- self.ctxt_dim = ctxt_dim
- self.c_goal = None # current desired landing point
- self.c_ball_traj = []
- self.c_racket_traj = []
- self.constant = 8
-
- def get_reward(self, episode_end, ball_position, racket_pos, hited_ball, ball_landing_pos):
- self.c_ball_traj.append(ball_position.copy())
- self.c_racket_traj.append(racket_pos.copy())
- if not episode_end:
- return 0
- else:
- # # seems to work for episodic case
- min_r_b_dist = np.min(np.linalg.norm(np.array(self.c_ball_traj) - np.array(self.c_racket_traj), axis=1))
- if not hited_ball:
- return 0.2 * (1 - np.tanh(min_r_b_dist**2))
- else:
- if ball_landing_pos is None:
- min_b_des_b_dist = np.min(np.linalg.norm(np.array(self.c_ball_traj)[:,:2] - self.c_goal[:2], axis=1))
- return 2 * (1 - np.tanh(min_r_b_dist ** 2)) + (1 - np.tanh(min_b_des_b_dist**2))
- else:
- min_b_des_b_land_dist = np.linalg.norm(self.c_goal[:2] - ball_landing_pos[:2])
- over_net_bonus = int(ball_landing_pos[0] < 0)
- return 2 * (1 - np.tanh(min_r_b_dist ** 2)) + 4 * (1 - np.tanh(min_b_des_b_land_dist ** 2)) + over_net_bonus
-
-
- # if not hited_ball:
- # min_r_b_dist = 1 + np.min(np.linalg.norm(np.array(self.c_ball_traj) - np.array(self.c_racket_traj), axis=1))
- # return -min_r_b_dist
- # else:
- # if ball_landing_pos is None:
- # dist_to_des_pos = 1-np.power(np.linalg.norm(self.c_goal - ball_position), 0.75)/self.constant
- # else:
- # dist_to_des_pos = 1-np.power(np.linalg.norm(self.c_goal - ball_landing_pos), 0.75)/self.constant
- # if dist_to_des_pos < -0.2:
- # dist_to_des_pos = -0.2
- # return -dist_to_des_pos
-
- def reset(self, goal):
- self.c_goal = goal.copy()
- self.c_ball_traj = []
- self.c_racket_traj = []
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/table_tennis/tt_utils.py b/alr_envs/alr/mujoco/table_tennis/tt_utils.py
deleted file mode 100644
index 04a1b97..0000000
--- a/alr_envs/alr/mujoco/table_tennis/tt_utils.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import numpy as np
-
-
-def ball_init(random=False, context_range=None):
- if random:
- dx = np.random.uniform(-0.1, 0.1) # TODO: clarify these numbers?
- dy = np.random.uniform(-0.1, 0.1) # TODO: clarify these numbers?
- dz = np.random.uniform(-0.1, 0.1) # TODO: clarify these numbers?
-
- v_x = np.random.uniform(1.7, 1.8)
- v_y = np.random.uniform(0.7, 0.8)
- v_z = np.random.uniform(0.1, 0.2)
- else:
- dx = 1
- dy = 0
- dz = 0.05
-
- v_x = 2.5
- v_y = 2
- v_z = 0.5
-
- initial_x = 0 + dx - 1.2
- initial_y = -0.2 + dy - 0.6
- initial_z = 0.3 + dz + 1.5
- initial_ball_state = np.array([initial_x, initial_y, initial_z, v_x, v_y, v_z])
- return initial_ball_state
diff --git a/alr_envs/alr/mujoco/table_tennis/xml/include_7_motor_actuator.xml b/alr_envs/alr/mujoco/table_tennis/xml/include_7_motor_actuator.xml
deleted file mode 100644
index ec94d96..0000000
--- a/alr_envs/alr/mujoco/table_tennis/xml/include_7_motor_actuator.xml
+++ /dev/null
@@ -1,12 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/alr_envs/alr/mujoco/table_tennis/xml/include_barrett_wam_7dof_right.xml b/alr_envs/alr/mujoco/table_tennis/xml/include_barrett_wam_7dof_right.xml
deleted file mode 100644
index 7e04c28..0000000
--- a/alr_envs/alr/mujoco/table_tennis/xml/include_barrett_wam_7dof_right.xml
+++ /dev/null
@@ -1,103 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/table_tennis/xml/include_table.xml b/alr_envs/alr/mujoco/table_tennis/xml/include_table.xml
deleted file mode 100644
index c313489..0000000
--- a/alr_envs/alr/mujoco/table_tennis/xml/include_table.xml
+++ /dev/null
@@ -1,30 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/table_tennis/xml/include_target_ball.xml b/alr_envs/alr/mujoco/table_tennis/xml/include_target_ball.xml
deleted file mode 100644
index 19543e2..0000000
--- a/alr_envs/alr/mujoco/table_tennis/xml/include_target_ball.xml
+++ /dev/null
@@ -1,10 +0,0 @@
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/table_tennis/xml/right_arm_actuator.xml b/alr_envs/alr/mujoco/table_tennis/xml/right_arm_actuator.xml
deleted file mode 100644
index 25d0366..0000000
--- a/alr_envs/alr/mujoco/table_tennis/xml/right_arm_actuator.xml
+++ /dev/null
@@ -1,47 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/alr_envs/alr/mujoco/table_tennis/xml/shared.xml b/alr_envs/alr/mujoco/table_tennis/xml/shared.xml
deleted file mode 100644
index e349992..0000000
--- a/alr_envs/alr/mujoco/table_tennis/xml/shared.xml
+++ /dev/null
@@ -1,46 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/table_tennis/xml/table_tennis_env.xml b/alr_envs/alr/mujoco/table_tennis/xml/table_tennis_env.xml
deleted file mode 100644
index 9609a6e..0000000
--- a/alr_envs/alr/mujoco/table_tennis/xml/table_tennis_env.xml
+++ /dev/null
@@ -1,18 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/alr_envs/dmc/README.MD b/alr_envs/dmc/README.MD
deleted file mode 100644
index 040a9a0..0000000
--- a/alr_envs/dmc/README.MD
+++ /dev/null
@@ -1,19 +0,0 @@
-# DeepMind Control (DMC) Wrappers
-
-These are the Environment Wrappers for selected
-[DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control)
-environments in order to use our Motion Primitive gym interface with them.
-
-## MP Environments
-
-[//]: <> (These environments are wrapped-versions of their Deep Mind Control Suite (DMC) counterparts. Given most task can be)
-[//]: <> (solved in shorter horizon lengths than the original 1000 steps, we often shorten the episodes for those task.)
-
-|Name| Description|Trajectory Horizon|Action Dimension|Context Dimension
-|---|---|---|---|---|
-|`dmc_ball_in_cup-catch_promp-v0`| A ProMP wrapped version of the "catch" task for the "ball_in_cup" environment. | 1000 | 10 | 2
-|`dmc_ball_in_cup-catch_dmp-v0`| A DMP wrapped version of the "catch" task for the "ball_in_cup" environment. | 1000| 10 | 2
-|`dmc_reacher-easy_promp-v0`| A ProMP wrapped version of the "easy" task for the "reacher" environment. | 1000 | 10 | 4
-|`dmc_reacher-easy_dmp-v0`| A DMP wrapped version of the "easy" task for the "reacher" environment. | 1000| 10 | 4
-|`dmc_reacher-hard_promp-v0`| A ProMP wrapped version of the "hard" task for the "reacher" environment.| 1000 | 10 | 4
-|`dmc_reacher-hard_dmp-v0`| A DMP wrapped version of the "hard" task for the "reacher" environment. | 1000 | 10 | 4
diff --git a/alr_envs/dmc/__init__.py b/alr_envs/dmc/__init__.py
deleted file mode 100644
index ac34415..0000000
--- a/alr_envs/dmc/__init__.py
+++ /dev/null
@@ -1,378 +0,0 @@
-from . import manipulation, suite
-
-ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
-
-from gym.envs.registration import register
-
-# DeepMind Control Suite (DMC)
-
-register(
- id=f'dmc_ball_in_cup-catch_dmp-v0',
- entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
- # max_episode_steps=1,
- kwargs={
- "name": f"ball_in_cup-catch",
- "time_limit": 20,
- "episode_length": 1000,
- "wrappers": [suite.ball_in_cup.MPWrapper],
- "mp_kwargs": {
- "num_dof": 2,
- "num_basis": 5,
- "duration": 20,
- "learn_goal": True,
- "alpha_phase": 2,
- "bandwidth_factor": 2,
- "policy_type": "motor",
- "goal_scale": 0.1,
- "policy_kwargs": {
- "p_gains": 50,
- "d_gains": 1
- }
- }
- }
-)
-ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_ball_in_cup-catch_dmp-v0")
-
-register(
- id=f'dmc_ball_in_cup-catch_promp-v0',
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": f"ball_in_cup-catch",
- "time_limit": 20,
- "episode_length": 1000,
- "wrappers": [suite.ball_in_cup.MPWrapper],
- "mp_kwargs": {
- "num_dof": 2,
- "num_basis": 5,
- "duration": 20,
- "policy_type": "motor",
- "zero_start": True,
- "policy_kwargs": {
- "p_gains": 50,
- "d_gains": 1
- }
- }
- }
-)
-ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_ball_in_cup-catch_promp-v0")
-
-register(
- id=f'dmc_reacher-easy_dmp-v0',
- entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
- # max_episode_steps=1,
- kwargs={
- "name": f"reacher-easy",
- "time_limit": 20,
- "episode_length": 1000,
- "wrappers": [suite.reacher.MPWrapper],
- "mp_kwargs": {
- "num_dof": 2,
- "num_basis": 5,
- "duration": 20,
- "learn_goal": True,
- "alpha_phase": 2,
- "bandwidth_factor": 2,
- "policy_type": "motor",
- "weights_scale": 50,
- "goal_scale": 0.1,
- "policy_kwargs": {
- "p_gains": 50,
- "d_gains": 1
- }
- }
- }
-)
-ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-easy_dmp-v0")
-
-register(
- id=f'dmc_reacher-easy_promp-v0',
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": f"reacher-easy",
- "time_limit": 20,
- "episode_length": 1000,
- "wrappers": [suite.reacher.MPWrapper],
- "mp_kwargs": {
- "num_dof": 2,
- "num_basis": 5,
- "duration": 20,
- "policy_type": "motor",
- "weights_scale": 0.2,
- "zero_start": True,
- "policy_kwargs": {
- "p_gains": 50,
- "d_gains": 1
- }
- }
- }
-)
-ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-easy_promp-v0")
-
-register(
- id=f'dmc_reacher-hard_dmp-v0',
- entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
- # max_episode_steps=1,
- kwargs={
- "name": f"reacher-hard",
- "time_limit": 20,
- "episode_length": 1000,
- "wrappers": [suite.reacher.MPWrapper],
- "mp_kwargs": {
- "num_dof": 2,
- "num_basis": 5,
- "duration": 20,
- "learn_goal": True,
- "alpha_phase": 2,
- "bandwidth_factor": 2,
- "policy_type": "motor",
- "weights_scale": 50,
- "goal_scale": 0.1,
- "policy_kwargs": {
- "p_gains": 50,
- "d_gains": 1
- }
- }
- }
-)
-ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-hard_dmp-v0")
-
-register(
- id=f'dmc_reacher-hard_promp-v0',
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": f"reacher-hard",
- "time_limit": 20,
- "episode_length": 1000,
- "wrappers": [suite.reacher.MPWrapper],
- "mp_kwargs": {
- "num_dof": 2,
- "num_basis": 5,
- "duration": 20,
- "policy_type": "motor",
- "weights_scale": 0.2,
- "zero_start": True,
- "policy_kwargs": {
- "p_gains": 50,
- "d_gains": 1
- }
- }
- }
-)
-ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-hard_promp-v0")
-
-_dmc_cartpole_tasks = ["balance", "balance_sparse", "swingup", "swingup_sparse"]
-
-for _task in _dmc_cartpole_tasks:
- _env_id = f'dmc_cartpole-{_task}_dmp-v0'
- register(
- id=_env_id,
- entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
- # max_episode_steps=1,
- kwargs={
- "name": f"cartpole-{_task}",
- # "time_limit": 1,
- "camera_id": 0,
- "episode_length": 1000,
- "wrappers": [suite.cartpole.MPWrapper],
- "mp_kwargs": {
- "num_dof": 1,
- "num_basis": 5,
- "duration": 10,
- "learn_goal": True,
- "alpha_phase": 2,
- "bandwidth_factor": 2,
- "policy_type": "motor",
- "weights_scale": 50,
- "goal_scale": 0.1,
- "policy_kwargs": {
- "p_gains": 10,
- "d_gains": 10
- }
- }
- }
- )
- ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
-
- _env_id = f'dmc_cartpole-{_task}_promp-v0'
- register(
- id=_env_id,
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": f"cartpole-{_task}",
- # "time_limit": 1,
- "camera_id": 0,
- "episode_length": 1000,
- "wrappers": [suite.cartpole.MPWrapper],
- "mp_kwargs": {
- "num_dof": 1,
- "num_basis": 5,
- "duration": 10,
- "policy_type": "motor",
- "weights_scale": 0.2,
- "zero_start": True,
- "policy_kwargs": {
- "p_gains": 10,
- "d_gains": 10
- }
- }
- }
- )
- ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-_env_id = f'dmc_cartpole-two_poles_dmp-v0'
-register(
- id=_env_id,
- entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
- # max_episode_steps=1,
- kwargs={
- "name": f"cartpole-two_poles",
- # "time_limit": 1,
- "camera_id": 0,
- "episode_length": 1000,
- "wrappers": [suite.cartpole.TwoPolesMPWrapper],
- "mp_kwargs": {
- "num_dof": 1,
- "num_basis": 5,
- "duration": 10,
- "learn_goal": True,
- "alpha_phase": 2,
- "bandwidth_factor": 2,
- "policy_type": "motor",
- "weights_scale": 50,
- "goal_scale": 0.1,
- "policy_kwargs": {
- "p_gains": 10,
- "d_gains": 10
- }
- }
- }
-)
-ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
-
-_env_id = f'dmc_cartpole-two_poles_promp-v0'
-register(
- id=_env_id,
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": f"cartpole-two_poles",
- # "time_limit": 1,
- "camera_id": 0,
- "episode_length": 1000,
- "wrappers": [suite.cartpole.TwoPolesMPWrapper],
- "mp_kwargs": {
- "num_dof": 1,
- "num_basis": 5,
- "duration": 10,
- "policy_type": "motor",
- "weights_scale": 0.2,
- "zero_start": True,
- "policy_kwargs": {
- "p_gains": 10,
- "d_gains": 10
- }
- }
- }
-)
-ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-_env_id = f'dmc_cartpole-three_poles_dmp-v0'
-register(
- id=_env_id,
- entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
- # max_episode_steps=1,
- kwargs={
- "name": f"cartpole-three_poles",
- # "time_limit": 1,
- "camera_id": 0,
- "episode_length": 1000,
- "wrappers": [suite.cartpole.ThreePolesMPWrapper],
- "mp_kwargs": {
- "num_dof": 1,
- "num_basis": 5,
- "duration": 10,
- "learn_goal": True,
- "alpha_phase": 2,
- "bandwidth_factor": 2,
- "policy_type": "motor",
- "weights_scale": 50,
- "goal_scale": 0.1,
- "policy_kwargs": {
- "p_gains": 10,
- "d_gains": 10
- }
- }
- }
-)
-ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
-
-_env_id = f'dmc_cartpole-three_poles_promp-v0'
-register(
- id=_env_id,
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": f"cartpole-three_poles",
- # "time_limit": 1,
- "camera_id": 0,
- "episode_length": 1000,
- "wrappers": [suite.cartpole.ThreePolesMPWrapper],
- "mp_kwargs": {
- "num_dof": 1,
- "num_basis": 5,
- "duration": 10,
- "policy_type": "motor",
- "weights_scale": 0.2,
- "zero_start": True,
- "policy_kwargs": {
- "p_gains": 10,
- "d_gains": 10
- }
- }
- }
-)
-ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-# DeepMind Manipulation
-
-register(
- id=f'dmc_manipulation-reach_site_dmp-v0',
- entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
- # max_episode_steps=1,
- kwargs={
- "name": f"manipulation-reach_site_features",
- # "time_limit": 1,
- "episode_length": 250,
- "wrappers": [manipulation.reach_site.MPWrapper],
- "mp_kwargs": {
- "num_dof": 9,
- "num_basis": 5,
- "duration": 10,
- "learn_goal": True,
- "alpha_phase": 2,
- "bandwidth_factor": 2,
- "policy_type": "velocity",
- "weights_scale": 50,
- "goal_scale": 0.1,
- }
- }
-)
-ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_manipulation-reach_site_dmp-v0")
-
-register(
- id=f'dmc_manipulation-reach_site_promp-v0',
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": f"manipulation-reach_site_features",
- # "time_limit": 1,
- "episode_length": 250,
- "wrappers": [manipulation.reach_site.MPWrapper],
- "mp_kwargs": {
- "num_dof": 9,
- "num_basis": 5,
- "duration": 10,
- "policy_type": "velocity",
- "weights_scale": 0.2,
- "zero_start": True,
- }
- }
-)
-ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_manipulation-reach_site_promp-v0")
diff --git a/alr_envs/dmc/dmc_wrapper.py b/alr_envs/dmc/dmc_wrapper.py
deleted file mode 100644
index aa6c7aa..0000000
--- a/alr_envs/dmc/dmc_wrapper.py
+++ /dev/null
@@ -1,206 +0,0 @@
-# Adopted from: https://github.com/denisyarats/dmc2gym/blob/master/dmc2gym/wrappers.py
-# License: MIT
-# Copyright (c) 2020 Denis Yarats
-import collections
-from typing import Any, Dict, Tuple
-
-import numpy as np
-from dm_control import manipulation, suite
-from dm_env import specs
-from gym import core, spaces
-
-
-def _spec_to_box(spec):
- def extract_min_max(s):
- assert s.dtype == np.float64 or s.dtype == np.float32, f"Only float64 and float32 types are allowed, instead {s.dtype} was found"
- dim = int(np.prod(s.shape))
- if type(s) == specs.Array:
- bound = np.inf * np.ones(dim, dtype=s.dtype)
- return -bound, bound
- elif type(s) == specs.BoundedArray:
- zeros = np.zeros(dim, dtype=s.dtype)
- return s.minimum + zeros, s.maximum + zeros
-
- mins, maxs = [], []
- for s in spec:
- mn, mx = extract_min_max(s)
- mins.append(mn)
- maxs.append(mx)
- low = np.concatenate(mins, axis=0)
- high = np.concatenate(maxs, axis=0)
- assert low.shape == high.shape
- return spaces.Box(low, high, dtype=s.dtype)
-
-
-def _flatten_obs(obs: collections.MutableMapping):
- """
- Flattens an observation of type MutableMapping, e.g. a dict to a 1D array.
- Args:
- obs: observation to flatten
-
- Returns: 1D array of observation
-
- """
-
- if not isinstance(obs, collections.MutableMapping):
- raise ValueError(f'Requires dict-like observations structure. {type(obs)} found.')
-
- # Keep key order consistent for non OrderedDicts
- keys = obs.keys() if isinstance(obs, collections.OrderedDict) else sorted(obs.keys())
-
- obs_vals = [np.array([obs[key]]) if np.isscalar(obs[key]) else obs[key].ravel() for key in keys]
- return np.concatenate(obs_vals)
-
-
-class DMCWrapper(core.Env):
- def __init__(
- self,
- domain_name: str,
- task_name: str,
- task_kwargs: dict = {},
- visualize_reward: bool = True,
- from_pixels: bool = False,
- height: int = 84,
- width: int = 84,
- camera_id: int = 0,
- frame_skip: int = 1,
- environment_kwargs: dict = None,
- channels_first: bool = True
- ):
- assert 'random' in task_kwargs, 'Please specify a seed for deterministic behavior.'
- self._from_pixels = from_pixels
- self._height = height
- self._width = width
- self._camera_id = camera_id
- self._frame_skip = frame_skip
- self._channels_first = channels_first
-
- # create task
- if domain_name == "manipulation":
- assert not from_pixels and not task_name.endswith("_vision"), \
- "TODO: Vision interface for manipulation is different to suite and needs to be implemented"
- self._env = manipulation.load(environment_name=task_name, seed=task_kwargs['random'])
- else:
- self._env = suite.load(domain_name=domain_name, task_name=task_name, task_kwargs=task_kwargs,
- visualize_reward=visualize_reward, environment_kwargs=environment_kwargs)
-
- # action and observation space
- self._action_space = _spec_to_box([self._env.action_spec()])
- self._observation_space = _spec_to_box(self._env.observation_spec().values())
-
- self._last_state = None
- self.viewer = None
-
- # set seed
- self.seed(seed=task_kwargs.get('random', 1))
-
- def __getattr__(self, item):
- """Propagate only non-existent properties to wrapped env."""
- if item.startswith('_'):
- raise AttributeError("attempted to get missing private attribute '{}'".format(item))
- if item in self.__dict__:
- return getattr(self, item)
- return getattr(self._env, item)
-
- def _get_obs(self, time_step):
- if self._from_pixels:
- obs = self.render(
- mode="rgb_array",
- height=self._height,
- width=self._width,
- camera_id=self._camera_id
- )
- if self._channels_first:
- obs = obs.transpose(2, 0, 1).copy()
- else:
- obs = _flatten_obs(time_step.observation).astype(self.observation_space.dtype)
- return obs
-
- @property
- def observation_space(self):
- return self._observation_space
-
- @property
- def action_space(self):
- return self._action_space
-
- @property
- def dt(self):
- return self._env.control_timestep() * self._frame_skip
-
- @property
- def base_step_limit(self):
- """
- Returns: max_episode_steps of the underlying DMC env
-
- """
- # Accessing private attribute because DMC does not expose time_limit or step_limit.
- # Only the current time_step/time as well as the control_timestep can be accessed.
- try:
- return (self._env._step_limit + self._frame_skip - 1) // self._frame_skip
- except AttributeError as e:
- return self._env._time_limit / self.dt
-
- def seed(self, seed=None):
- self._action_space.seed(seed)
- self._observation_space.seed(seed)
-
- def step(self, action) -> Tuple[np.ndarray, float, bool, Dict[str, Any]]:
- assert self._action_space.contains(action)
- reward = 0
- extra = {'internal_state': self._env.physics.get_state().copy()}
-
- for _ in range(self._frame_skip):
- time_step = self._env.step(action)
- reward += time_step.reward or 0.
- done = time_step.last()
- if done:
- break
-
- self._last_state = _flatten_obs(time_step.observation)
- obs = self._get_obs(time_step)
- extra['discount'] = time_step.discount
- return obs, reward, done, extra
-
- def reset(self) -> np.ndarray:
- time_step = self._env.reset()
- self._last_state = _flatten_obs(time_step.observation)
- obs = self._get_obs(time_step)
- return obs
-
- def render(self, mode='rgb_array', height=None, width=None, camera_id=0):
- if self._last_state is None:
- raise ValueError('Environment not ready to render. Call reset() first.')
-
- camera_id = camera_id or self._camera_id
-
- # assert mode == 'rgb_array', 'only support rgb_array mode, given %s' % mode
- if mode == "rgb_array":
- height = height or self._height
- width = width or self._width
- return self._env.physics.render(height=height, width=width, camera_id=camera_id)
-
- elif mode == 'human':
- if self.viewer is None:
- # pylint: disable=import-outside-toplevel
- # pylint: disable=g-import-not-at-top
- from gym.envs.classic_control import rendering
- self.viewer = rendering.SimpleImageViewer()
- # Render max available buffer size. Larger is only possible by altering the XML.
- img = self._env.physics.render(height=self._env.physics.model.vis.global_.offheight,
- width=self._env.physics.model.vis.global_.offwidth,
- camera_id=camera_id)
- self.viewer.imshow(img)
- return self.viewer.isopen
-
- def close(self):
- super().close()
- if self.viewer is not None and self.viewer.isopen:
- self.viewer.close()
-
- @property
- def reward_range(self) -> Tuple[float, float]:
- reward_spec = self._env.reward_spec()
- if isinstance(reward_spec, specs.BoundedArray):
- return reward_spec.minimum, reward_spec.maximum
- return -float('inf'), float('inf')
diff --git a/alr_envs/dmc/manipulation/__init__.py b/alr_envs/dmc/manipulation/__init__.py
deleted file mode 100644
index f3a4147..0000000
--- a/alr_envs/dmc/manipulation/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from . import reach_site
diff --git a/alr_envs/dmc/manipulation/reach_site/__init__.py b/alr_envs/dmc/manipulation/reach_site/__init__.py
deleted file mode 100644
index 989b5a9..0000000
--- a/alr_envs/dmc/manipulation/reach_site/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .mp_wrapper import MPWrapper
\ No newline at end of file
diff --git a/alr_envs/dmc/manipulation/reach_site/mp_wrapper.py b/alr_envs/dmc/manipulation/reach_site/mp_wrapper.py
deleted file mode 100644
index 2d03f7b..0000000
--- a/alr_envs/dmc/manipulation/reach_site/mp_wrapper.py
+++ /dev/null
@@ -1,38 +0,0 @@
-from typing import Tuple, Union
-
-import numpy as np
-
-from mp_env_api import MPEnvWrapper
-
-
-class MPWrapper(MPEnvWrapper):
-
- @property
- def active_obs(self):
- # Joint and target positions are randomized, velocities are always set to 0.
- return np.hstack([
- [True] * 3, # target position
- [True] * 12, # sin/cos arm joint position
- [True] * 6, # arm joint torques
- [False] * 6, # arm joint velocities
- [True] * 3, # sin/cos hand joint position
- [False] * 3, # hand joint velocities
- [True] * 3, # hand pinch site position
- [True] * 9, # pinch site rmat
- ])
-
- @property
- def current_pos(self) -> Union[float, int, np.ndarray]:
- return self.env.physics.named.data.qpos[:]
-
- @property
- def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.env.physics.named.data.qvel[:]
-
- @property
- def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
- raise ValueError("Goal position is not available and has to be learnt based on the environment.")
-
- @property
- def dt(self) -> Union[float, int]:
- return self.env.dt
diff --git a/alr_envs/dmc/suite/__init__.py b/alr_envs/dmc/suite/__init__.py
deleted file mode 100644
index 7d206fc..0000000
--- a/alr_envs/dmc/suite/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from . import cartpole, ball_in_cup, reacher
diff --git a/alr_envs/dmc/suite/ball_in_cup/__init__.py b/alr_envs/dmc/suite/ball_in_cup/__init__.py
deleted file mode 100644
index 989b5a9..0000000
--- a/alr_envs/dmc/suite/ball_in_cup/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .mp_wrapper import MPWrapper
\ No newline at end of file
diff --git a/alr_envs/dmc/suite/ball_in_cup/mp_wrapper.py b/alr_envs/dmc/suite/ball_in_cup/mp_wrapper.py
deleted file mode 100644
index fb068b3..0000000
--- a/alr_envs/dmc/suite/ball_in_cup/mp_wrapper.py
+++ /dev/null
@@ -1,34 +0,0 @@
-from typing import Tuple, Union
-
-import numpy as np
-
-from mp_env_api import MPEnvWrapper
-
-
-class MPWrapper(MPEnvWrapper):
-
- @property
- def active_obs(self):
- # Besides the ball position, the environment is always set to 0.
- return np.hstack([
- [False] * 2, # cup position
- [True] * 2, # ball position
- [False] * 2, # cup velocity
- [False] * 2, # ball velocity
- ])
-
- @property
- def current_pos(self) -> Union[float, int, np.ndarray]:
- return np.hstack([self.env.physics.named.data.qpos['cup_x'], self.env.physics.named.data.qpos['cup_z']])
-
- @property
- def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
- return np.hstack([self.env.physics.named.data.qvel['cup_x'], self.env.physics.named.data.qvel['cup_z']])
-
- @property
- def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
- raise ValueError("Goal position is not available and has to be learnt based on the environment.")
-
- @property
- def dt(self) -> Union[float, int]:
- return self.env.dt
diff --git a/alr_envs/dmc/suite/cartpole/__init__.py b/alr_envs/dmc/suite/cartpole/__init__.py
deleted file mode 100644
index c5f9bee..0000000
--- a/alr_envs/dmc/suite/cartpole/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from .mp_wrapper import MPWrapper
-from .mp_wrapper import TwoPolesMPWrapper
-from .mp_wrapper import ThreePolesMPWrapper
diff --git a/alr_envs/dmc/suite/cartpole/mp_wrapper.py b/alr_envs/dmc/suite/cartpole/mp_wrapper.py
deleted file mode 100644
index 1ca99f5..0000000
--- a/alr_envs/dmc/suite/cartpole/mp_wrapper.py
+++ /dev/null
@@ -1,51 +0,0 @@
-from typing import Tuple, Union
-
-import numpy as np
-
-from mp_env_api import MPEnvWrapper
-
-
-class MPWrapper(MPEnvWrapper):
-
- def __init__(self, env, n_poles: int = 1):
- self.n_poles = n_poles
- super().__init__(env)
-
-
- @property
- def active_obs(self):
- # Besides the ball position, the environment is always set to 0.
- return np.hstack([
- [True], # slider position
- [True] * 2 * self.n_poles, # sin/cos hinge angles
- [True], # slider velocity
- [True] * self.n_poles, # hinge velocities
- ])
-
- @property
- def current_pos(self) -> Union[float, int, np.ndarray]:
- return self.env.physics.named.data.qpos["slider"]
-
- @property
- def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.env.physics.named.data.qvel["slider"]
-
- @property
- def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
- raise ValueError("Goal position is not available and has to be learnt based on the environment.")
-
- @property
- def dt(self) -> Union[float, int]:
- return self.env.dt
-
-
-class TwoPolesMPWrapper(MPWrapper):
-
- def __init__(self, env):
- super().__init__(env, n_poles=2)
-
-
-class ThreePolesMPWrapper(MPWrapper):
-
- def __init__(self, env):
- super().__init__(env, n_poles=3)
diff --git a/alr_envs/dmc/suite/reacher/__init__.py b/alr_envs/dmc/suite/reacher/__init__.py
deleted file mode 100644
index 989b5a9..0000000
--- a/alr_envs/dmc/suite/reacher/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .mp_wrapper import MPWrapper
\ No newline at end of file
diff --git a/alr_envs/dmc/suite/reacher/mp_wrapper.py b/alr_envs/dmc/suite/reacher/mp_wrapper.py
deleted file mode 100644
index 86bc992..0000000
--- a/alr_envs/dmc/suite/reacher/mp_wrapper.py
+++ /dev/null
@@ -1,33 +0,0 @@
-from typing import Tuple, Union
-
-import numpy as np
-
-from mp_env_api import MPEnvWrapper
-
-
-class MPWrapper(MPEnvWrapper):
-
- @property
- def active_obs(self):
- # Joint and target positions are randomized, velocities are always set to 0.
- return np.hstack([
- [True] * 2, # joint position
- [True] * 2, # target position
- [False] * 2, # joint velocity
- ])
-
- @property
- def current_pos(self) -> Union[float, int, np.ndarray]:
- return self.env.physics.named.data.qpos[:]
-
- @property
- def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.env.physics.named.data.qvel[:]
-
- @property
- def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
- raise ValueError("Goal position is not available and has to be learnt based on the environment.")
-
- @property
- def dt(self) -> Union[float, int]:
- return self.env.dt
diff --git a/alr_envs/examples/__init__.py b/alr_envs/examples/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/alr_envs/examples/examples_dmc.py b/alr_envs/examples/examples_dmc.py
deleted file mode 100644
index d223d3c..0000000
--- a/alr_envs/examples/examples_dmc.py
+++ /dev/null
@@ -1,135 +0,0 @@
-import alr_envs
-
-
-def example_dmc(env_id="fish-swim", seed=1, iterations=1000, render=True):
- """
- Example for running a DMC based env in the step based setting.
- The env_id has to be specified as `domain_name-task_name` or
- for manipulation tasks as `manipulation-environment_name`
-
- Args:
- env_id: Either `domain_name-task_name` or `manipulation-environment_name`
- seed: seed for deterministic behaviour
- iterations: Number of rollout steps to run
- render: Render the episode
-
- Returns:
-
- """
- env = alr_envs.make(env_id, seed)
- rewards = 0
- obs = env.reset()
- print("observation shape:", env.observation_space.shape)
- print("action shape:", env.action_space.shape)
-
- for i in range(iterations):
- ac = env.action_space.sample()
- obs, reward, done, info = env.step(ac)
- rewards += reward
-
- if render:
- env.render("human")
-
- if done:
- print(env_id, rewards)
- rewards = 0
- obs = env.reset()
-
- env.close()
- del env
-
-
-def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
- """
- Example for running a custom motion primitive based environments.
- Our already registered environments follow the same structure.
- Hence, this also allows to adjust hyperparameters of the motion primitives.
- Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks.
- We appreciate PRs for custom environments (especially MP wrappers of existing tasks)
- for our repo: https://github.com/ALRhub/alr_envs/
- Args:
- seed: seed for deterministic behaviour
- iterations: Number of rollout steps to run
- render: Render the episode
-
- Returns:
-
- """
-
- # Base DMC name, according to structure of above example
- base_env = "ball_in_cup-catch"
-
- # Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper.
- # You can also add other gym.Wrappers in case they are needed.
- wrappers = [alr_envs.dmc.suite.ball_in_cup.MPWrapper]
- mp_kwargs = {
- "num_dof": 2, # degrees of fredom a.k.a. the old action space dimensionality
- "num_basis": 5, # number of basis functions, the new action space has size num_dof x num_basis
- "duration": 20, # length of trajectory in s, number of steps = duration / dt
- "learn_goal": True, # learn the goal position (recommended)
- "alpha_phase": 2,
- "bandwidth_factor": 2,
- "policy_type": "motor", # controller type, 'velocity', 'position', and 'motor' (torque control)
- "weights_scale": 1, # scaling of MP weights
- "goal_scale": 1, # scaling of learned goal position
- "policy_kwargs": { # only required for torque control/PD-Controller
- "p_gains": 0.2,
- "d_gains": 0.05
- }
- }
- kwargs = {
- "time_limit": 20, # same as duration value but as max horizon for underlying DMC environment
- "episode_length": 1000, # corresponding number of episode steps
- # "frame_skip": 1
- }
- env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs, **kwargs)
- # OR for a deterministic ProMP (other mp_kwargs are required, see metaworld_examples):
- # env = alr_envs.make_promp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_args)
-
- # This renders the full MP trajectory
- # It is only required to call render() once in the beginning, which renders every consecutive trajectory.
- # Resetting to no rendering, can be achieved by render(mode=None).
- # It is also possible to change them mode multiple times when
- # e.g. only every nth trajectory should be displayed.
- if render:
- env.render(mode="human")
-
- rewards = 0
- obs = env.reset()
-
- # number of samples/full trajectories (multiple environment steps)
- for i in range(iterations):
- ac = env.action_space.sample()
- obs, reward, done, info = env.step(ac)
- rewards += reward
-
- if done:
- print(base_env, rewards)
- rewards = 0
- obs = env.reset()
-
- env.close()
- del env
-
-
-if __name__ == '__main__':
- # Disclaimer: DMC environments require the seed to be specified in the beginning.
- # Adjusting it afterwards with env.seed() is not recommended as it does not affect the underlying physics.
-
- # For rendering DMC
- # export MUJOCO_GL="osmesa"
- render = False
-
- # # Standard DMC Suite tasks
- example_dmc("fish-swim", seed=10, iterations=1000, render=render)
-
- # Manipulation tasks
- # Disclaimer: The vision versions are currently not integrated and yield an error
- example_dmc("manipulation-reach_site_features", seed=10, iterations=250, render=render)
-
- # Gym + DMC hybrid task provided in the MP framework
- example_dmc("dmc_ball_in_cup-catch_promp-v0", seed=10, iterations=1, render=render)
-
- # Custom DMC task
- # Different seed, because the episode is longer for this example and the name+seed combo is already registered above
- example_custom_dmc_and_mp(seed=11, iterations=1, render=render)
diff --git a/alr_envs/examples/examples_general.py b/alr_envs/examples/examples_general.py
deleted file mode 100644
index d043d6d..0000000
--- a/alr_envs/examples/examples_general.py
+++ /dev/null
@@ -1,102 +0,0 @@
-from collections import defaultdict
-
-import gym
-import numpy as np
-
-import alr_envs
-
-
-def example_general(env_id="Pendulum-v0", seed=1, iterations=1000, render=True):
- """
- Example for running any env in the step based setting.
- This also includes DMC environments when leveraging our custom make_env function.
-
- Args:
- env_id: OpenAI/Custom gym task id or either `domain_name-task_name` or `manipulation-environment_name` for DMC tasks
- seed: seed for deterministic behaviour
- iterations: Number of rollout steps to run
- render: Render the episode
-
- Returns:
-
- """
-
- env = alr_envs.make(env_id, seed)
- rewards = 0
- obs = env.reset()
- print("Observation shape: ", env.observation_space.shape)
- print("Action shape: ", env.action_space.shape)
-
- # number of environment steps
- for i in range(iterations):
- obs, reward, done, info = env.step(env.action_space.sample())
- rewards += reward
-
- if render:
- env.render()
-
- if done:
- print(rewards)
- rewards = 0
- obs = env.reset()
-
-
-def example_async(env_id="alr_envs:HoleReacher-v0", n_cpu=4, seed=int('533D', 16), n_samples=800):
- """
- Example for running any env in a vectorized multiprocessing setting to generate more samples faster.
- This also includes DMC and DMP environments when leveraging our custom make_env function.
- Be aware, increasing the number of environments reduces the total length of the individual episodes.
-
- Args:
- env_id: OpenAI/Custom gym task id or either `domain_name-task_name` or `manipulation-environment_name` for DMC tasks
- seed: seed for deterministic behaviour
- n_cpu: Number of cpus cores to use in parallel
- n_samples: number of samples generated in total by all environments.
-
- Returns: Tuple of (obs, reward, done, info) with type np.ndarray
-
- """
- env = gym.vector.AsyncVectorEnv([alr_envs.make_rank(env_id, seed, i) for i in range(n_cpu)])
- # OR
- # envs = gym.vector.AsyncVectorEnv([make_env(env_id, seed + i) for i in range(n_cpu)])
-
- # for plotting
- rewards = np.zeros(n_cpu)
- buffer = defaultdict(list)
-
- obs = env.reset()
-
- # this would generate more samples than requested if n_samples % num_envs != 0
- repeat = int(np.ceil(n_samples / env.num_envs))
- for i in range(repeat):
- obs, reward, done, info = env.step(env.action_space.sample())
- buffer['obs'].append(obs)
- buffer['reward'].append(reward)
- buffer['done'].append(done)
- buffer['info'].append(info)
- rewards += reward
- if np.any(done):
- print(f"Reward at iteration {i}: {rewards[done]}")
- rewards[done] = 0
-
- # do not return values above threshold
- return (*map(lambda v: np.stack(v)[:n_samples], buffer.values()),)
-
-
-if __name__ == '__main__':
- render = True
-
- # Basic gym task
- example_general("Pendulum-v0", seed=10, iterations=200, render=render)
-
- # # Basis task from framework
- example_general("alr_envs:HoleReacher-v0", seed=10, iterations=200, render=render)
-
- # # OpenAI Mujoco task
- example_general("HalfCheetah-v2", seed=10, render=render)
-
- # # Mujoco task from framework
- example_general("alr_envs:ALRReacher-v0", seed=10, iterations=200, render=render)
-
- # Vectorized multiprocessing environments
- example_async(env_id="alr_envs:HoleReacher-v0", n_cpu=2, seed=int('533D', 16), n_samples=2 * 200)
diff --git a/alr_envs/examples/examples_metaworld.py b/alr_envs/examples/examples_metaworld.py
deleted file mode 100644
index 9ead50c..0000000
--- a/alr_envs/examples/examples_metaworld.py
+++ /dev/null
@@ -1,128 +0,0 @@
-import alr_envs
-
-
-def example_dmc(env_id="fish-swim", seed=1, iterations=1000, render=True):
- """
- Example for running a MetaWorld based env in the step based setting.
- The env_id has to be specified as `task_name-v2`. V1 versions are not supported and we always
- return the observable goal version.
- All tasks can be found here: https://arxiv.org/pdf/1910.10897.pdf or https://meta-world.github.io/
-
- Args:
- env_id: `task_name-v2`
- seed: seed for deterministic behaviour (TODO: currently not working due to an issue in MetaWorld code)
- iterations: Number of rollout steps to run
- render: Render the episode
-
- Returns:
-
- """
- env = alr_envs.make(env_id, seed)
- rewards = 0
- obs = env.reset()
- print("observation shape:", env.observation_space.shape)
- print("action shape:", env.action_space.shape)
-
- for i in range(iterations):
- ac = env.action_space.sample()
- obs, reward, done, info = env.step(ac)
- rewards += reward
-
- if render:
- # THIS NEEDS TO BE SET TO FALSE FOR NOW, BECAUSE THE INTERFACE FOR RENDERING IS DIFFERENT TO BASIC GYM
- # TODO: Remove this, when Metaworld fixes its interface.
- env.render(False)
-
- if done:
- print(env_id, rewards)
- rewards = 0
- obs = env.reset()
-
- env.close()
- del env
-
-
-def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
- """
- Example for running a custom motion primitive based environments.
- Our already registered environments follow the same structure.
- Hence, this also allows to adjust hyperparameters of the motion primitives.
- Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks.
- We appreciate PRs for custom environments (especially MP wrappers of existing tasks)
- for our repo: https://github.com/ALRhub/alr_envs/
- Args:
- seed: seed for deterministic behaviour (TODO: currently not working due to an issue in MetaWorld code)
- iterations: Number of rollout steps to run
- render: Render the episode (TODO: currently not working due to an issue in MetaWorld code)
-
- Returns:
-
- """
-
- # Base MetaWorld name, according to structure of above example
- base_env = "button-press-v2"
-
- # Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper.
- # You can also add other gym.Wrappers in case they are needed.
- wrappers = [alr_envs.meta.goal_and_object_change.MPWrapper]
- mp_kwargs = {
- "num_dof": 4, # degrees of fredom a.k.a. the old action space dimensionality
- "num_basis": 5, # number of basis functions, the new action space has size num_dof x num_basis
- "duration": 6.25, # length of trajectory in s, number of steps = duration / dt
- "post_traj_time": 0, # pad trajectory with additional zeros at the end (recommended: 0)
- "width": 0.025, # width of the basis functions
- "zero_start": True, # start from current environment position if True
- "weights_scale": 1, # scaling of MP weights
- "policy_type": "metaworld", # custom controller type for metaworld environments
- }
-
- env = alr_envs.make_promp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)
- # OR for a DMP (other mp_kwargs are required, see dmc_examples):
- # env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs, **kwargs)
-
- # This renders the full MP trajectory
- # It is only required to call render() once in the beginning, which renders every consecutive trajectory.
- # Resetting to no rendering, can be achieved by render(mode=None).
- # It is also possible to change them mode multiple times when
- # e.g. only every nth trajectory should be displayed.
- if render:
- raise ValueError("Metaworld render interface bug does not allow to render() fixes its interface. "
- "A temporary workaround is to alter their code in MujocoEnv render() from "
- "`if not offscreen` to `if not offscreen or offscreen == 'human'`.")
- # TODO: Remove this, when Metaworld fixes its interface.
- # env.render(mode="human")
-
- rewards = 0
- obs = env.reset()
-
- # number of samples/full trajectories (multiple environment steps)
- for i in range(iterations):
- ac = env.action_space.sample()
- obs, reward, done, info = env.step(ac)
- rewards += reward
-
- if done:
- print(base_env, rewards)
- rewards = 0
- obs = env.reset()
-
- env.close()
- del env
-
-
-if __name__ == '__main__':
- # Disclaimer: MetaWorld environments require the seed to be specified in the beginning.
- # Adjusting it afterwards with env.seed() is not recommended as it may not affect the underlying behavior.
-
- # For rendering it might be necessary to specify your OpenGL installation
- # export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so
- render = False
-
- # # Standard DMC Suite tasks
- example_dmc("button-press-v2", seed=10, iterations=500, render=render)
-
- # MP + MetaWorld hybrid task provided in the our framework
- example_dmc("ButtonPressProMP-v2", seed=10, iterations=1, render=render)
-
- # Custom MetaWorld task
- example_custom_dmc_and_mp(seed=10, iterations=1, render=render)
diff --git a/alr_envs/examples/examples_motion_primitives.py b/alr_envs/examples/examples_motion_primitives.py
deleted file mode 100644
index 1a679df..0000000
--- a/alr_envs/examples/examples_motion_primitives.py
+++ /dev/null
@@ -1,164 +0,0 @@
-import alr_envs
-
-
-def example_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=1, render=True):
- """
- Example for running a motion primitive based environment, which is already registered
- Args:
- env_name: DMP env_id
- seed: seed for deterministic behaviour
- iterations: Number of rollout steps to run
- render: Render the episode
-
- Returns:
-
- """
- # While in this case gym.make() is possible to use as well, we recommend our custom make env function.
- # First, it already takes care of seeding and second enables the use of DMC tasks within the gym interface.
- env = alr_envs.make(env_name, seed)
-
- rewards = 0
- # env.render(mode=None)
- obs = env.reset()
-
- # number of samples/full trajectories (multiple environment steps)
- for i in range(iterations):
-
- if render and i % 2 == 0:
- # This renders the full MP trajectory
- # It is only required to call render() once in the beginning, which renders every consecutive trajectory.
- # Resetting to no rendering, can be achieved by render(mode=None).
- # It is also possible to change the mode multiple times when
- # e.g. only every second trajectory should be displayed, such as here
- # Just make sure the correct mode is set before executing the step.
- env.render(mode="human")
- else:
- env.render(mode=None)
-
- ac = env.action_space.sample()
- obs, reward, done, info = env.step(ac)
- rewards += reward
-
- if done:
- print(rewards)
- rewards = 0
- obs = env.reset()
-
-
-def example_custom_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=1, render=True):
- """
- Example for running a motion primitive based environment, which is already registered
- Args:
- env_name: DMP env_id
- seed: seed for deterministic behaviour
- iterations: Number of rollout steps to run
- render: Render the episode
-
- Returns:
-
- """
- # Changing the mp_kwargs is possible by providing them to gym.
- # E.g. here by providing way to many basis functions
- mp_kwargs = {
- "num_dof": 5,
- "num_basis": 1000,
- "duration": 2,
- "learn_goal": True,
- "alpha_phase": 2,
- "bandwidth_factor": 2,
- "policy_type": "velocity",
- "weights_scale": 50,
- "goal_scale": 0.1
- }
- env = alr_envs.make(env_name, seed, mp_kwargs=mp_kwargs)
-
- # This time rendering every trajectory
- if render:
- env.render(mode="human")
-
- rewards = 0
- obs = env.reset()
-
- # number of samples/full trajectories (multiple environment steps)
- for i in range(iterations):
- ac = env.action_space.sample()
- obs, reward, done, info = env.step(ac)
- rewards += reward
-
- if done:
- print(rewards)
- rewards = 0
- obs = env.reset()
-
-
-def example_fully_custom_mp(seed=1, iterations=1, render=True):
- """
- Example for running a custom motion primitive based environments.
- Our already registered environments follow the same structure.
- Hence, this also allows to adjust hyperparameters of the motion primitives.
- Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks.
- We appreciate PRs for custom environments (especially MP wrappers of existing tasks)
- for our repo: https://github.com/ALRhub/alr_envs/
- Args:
- seed: seed
- iterations: Number of rollout steps to run
- render: Render the episode
-
- Returns:
-
- """
-
- base_env = "alr_envs:HoleReacher-v1"
-
- # Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper.
- # You can also add other gym.Wrappers in case they are needed.
- wrappers = [alr_envs.alr.classic_control.hole_reacher.MPWrapper]
- mp_kwargs = {
- "num_dof": 5,
- "num_basis": 5,
- "duration": 2,
- "learn_goal": True,
- "alpha_phase": 2,
- "bandwidth_factor": 2,
- "policy_type": "velocity",
- "weights_scale": 50,
- "goal_scale": 0.1
- }
- env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)
- # OR for a deterministic ProMP:
- # env = make_promp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)
-
- if render:
- env.render(mode="human")
-
- rewards = 0
- obs = env.reset()
-
- # number of samples/full trajectories (multiple environment steps)
- for i in range(iterations):
- ac = env.action_space.sample()
- obs, reward, done, info = env.step(ac)
- rewards += reward
-
- if done:
- print(rewards)
- rewards = 0
- obs = env.reset()
-
-
-if __name__ == '__main__':
- render = False
- # DMP
- example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render)
-
- # ProMP
- example_mp("alr_envs:HoleReacherProMP-v1", seed=10, iterations=1, render=render)
-
- # DetProMP
- example_mp("alr_envs:HoleReacherDetPMP-v1", seed=10, iterations=1, render=render)
-
- # Altered basis functions
- example_custom_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render)
-
- # Custom MP
- example_fully_custom_mp(seed=10, iterations=1, render=render)
diff --git a/alr_envs/examples/examples_open_ai.py b/alr_envs/examples/examples_open_ai.py
deleted file mode 100644
index dc0c558..0000000
--- a/alr_envs/examples/examples_open_ai.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import alr_envs
-
-
-def example_mp(env_name, seed=1):
- """
- Example for running a motion primitive based version of a OpenAI-gym environment, which is already registered.
- For more information on motion primitive specific stuff, look at the mp examples.
- Args:
- env_name: ProMP env_id
- seed: seed
-
- Returns:
-
- """
- # While in this case gym.make() is possible to use as well, we recommend our custom make env function.
- env = alr_envs.make(env_name, seed)
-
- rewards = 0
- obs = env.reset()
-
- # number of samples/full trajectories (multiple environment steps)
- for i in range(10):
- ac = env.action_space.sample()
- obs, reward, done, info = env.step(ac)
- rewards += reward
-
- if done:
- print(rewards)
- rewards = 0
- obs = env.reset()
-
-
-if __name__ == '__main__':
- # DMP - not supported yet
- # example_mp("ReacherDMP-v2")
-
- # DetProMP
- example_mp("ContinuousMountainCarProMP-v0")
- example_mp("ReacherProMP-v2")
- example_mp("FetchReachDenseProMP-v1")
- example_mp("FetchSlideDenseProMP-v1")
diff --git a/alr_envs/examples/pd_control_gain_tuning.py b/alr_envs/examples/pd_control_gain_tuning.py
deleted file mode 100644
index eff432a..0000000
--- a/alr_envs/examples/pd_control_gain_tuning.py
+++ /dev/null
@@ -1,100 +0,0 @@
-import numpy as np
-from matplotlib import pyplot as plt
-
-from alr_envs import dmc, meta
-from alr_envs.alr import mujoco
-from alr_envs.utils.make_env_helpers import make_promp_env
-
-
-def visualize(env):
- t = env.t
- pos_features = env.mp.basis_generator.basis(t)
- plt.plot(t, pos_features)
- plt.show()
-
-
-# This might work for some environments, however, please verify either way the correct trajectory information
-# for your environment are extracted below
-SEED = 1
-# env_id = "ball_in_cup-catch"
-env_id = "ALRReacherSparse-v0"
-env_id = "button-press-v2"
-wrappers = [mujoco.reacher.MPWrapper]
-wrappers = [meta.goal_object_change_mp_wrapper.MPWrapper]
-
-mp_kwargs = {
- "num_dof": 4,
- "num_basis": 5,
- "duration": 6.25,
- "policy_type": "metaworld",
- "weights_scale": 10,
- "zero_start": True,
- # "policy_kwargs": {
- # "p_gains": 1,
- # "d_gains": 0.1
- # }
-}
-
-# kwargs = dict(time_limit=4, episode_length=200)
-kwargs = {}
-
-env = make_promp_env(env_id, wrappers, seed=SEED, mp_kwargs=mp_kwargs, **kwargs)
-env.action_space.seed(SEED)
-
-# Plot difference between real trajectory and target MP trajectory
-env.reset()
-w = env.action_space.sample() # N(0,1)
-visualize(env)
-pos, vel = env.mp_rollout(w)
-
-base_shape = env.full_action_space.shape
-actual_pos = np.zeros((len(pos), *base_shape))
-actual_vel = np.zeros((len(pos), *base_shape))
-act = np.zeros((len(pos), *base_shape))
-
-plt.ion()
-fig = plt.figure()
-ax = fig.add_subplot(1, 1, 1)
-img = ax.imshow(env.env.render("rgb_array"))
-fig.show()
-
-for t, pos_vel in enumerate(zip(pos, vel)):
- actions = env.policy.get_action(pos_vel[0], pos_vel[1])
- actions = np.clip(actions, env.full_action_space.low, env.full_action_space.high)
- _, _, _, _ = env.env.step(actions)
- if t % 15 == 0:
- img.set_data(env.env.render("rgb_array"))
- fig.canvas.draw()
- fig.canvas.flush_events()
- act[t, :] = actions
- # TODO verify for your environment
- actual_pos[t, :] = env.current_pos
- actual_vel[t, :] = 0 # env.current_vel
-
-plt.figure(figsize=(15, 5))
-
-plt.subplot(131)
-plt.title("Position")
-p1 = plt.plot(actual_pos, c='C0', label="true")
-# plt.plot(actual_pos_ball, label="true pos ball")
-p2 = plt.plot(pos, c='C1', label="MP") # , label=["MP" if i == 0 else None for i in range(np.prod(base_shape))])
-plt.xlabel("Episode steps")
-# plt.legend()
-handles, labels = plt.gca().get_legend_handles_labels()
-from collections import OrderedDict
-
-by_label = OrderedDict(zip(labels, handles))
-plt.legend(by_label.values(), by_label.keys())
-
-plt.subplot(132)
-plt.title("Velocity")
-plt.plot(actual_vel, c='C0', label="true")
-plt.plot(vel, c='C1', label="MP")
-plt.xlabel("Episode steps")
-
-plt.subplot(133)
-plt.title(f"Actions {np.std(act, axis=0)}")
-plt.plot(act, c="C0"), # label=[f"actions" if i == 0 else "" for i in range(np.prod(base_action_shape))])
-plt.xlabel("Episode steps")
-# plt.legend()
-plt.show()
diff --git a/alr_envs/meta/README.MD b/alr_envs/meta/README.MD
deleted file mode 100644
index c8d9cd1..0000000
--- a/alr_envs/meta/README.MD
+++ /dev/null
@@ -1,26 +0,0 @@
-# MetaWorld Wrappers
-
-These are the Environment Wrappers for selected [Metaworld](https://meta-world.github.io/) environments in order to use our Motion Primitive gym interface with them.
-All Metaworld environments have a 39 dimensional observation space with the same structure. The tasks differ only in the objective and the initial observations that are randomized.
-Unused observations are zeroed out. E.g. for `Button-Press-v2` the observation mask looks the following:
-```python
- return np.hstack([
- # Current observation
- [False] * 3, # end-effector position
- [False] * 1, # normalized gripper open distance
- [True] * 3, # main object position
- [False] * 4, # main object quaternion
- [False] * 3, # secondary object position
- [False] * 4, # secondary object quaternion
- # Previous observation
- [False] * 3, # previous end-effector position
- [False] * 1, # previous normalized gripper open distance
- [False] * 3, # previous main object position
- [False] * 4, # previous main object quaternion
- [False] * 3, # previous second object position
- [False] * 4, # previous second object quaternion
- # Goal
- [True] * 3, # goal position
- ])
-```
-For other tasks only the boolean values have to be adjusted accordingly.
\ No newline at end of file
diff --git a/alr_envs/meta/__init__.py b/alr_envs/meta/__init__.py
deleted file mode 100644
index 5651224..0000000
--- a/alr_envs/meta/__init__.py
+++ /dev/null
@@ -1,111 +0,0 @@
-from gym import register
-
-from . import goal_object_change_mp_wrapper, goal_change_mp_wrapper, goal_endeffector_change_mp_wrapper, \
- object_change_mp_wrapper
-
-ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
-
-# MetaWorld
-
-_goal_change_envs = ["assembly-v2", "pick-out-of-hole-v2", "plate-slide-v2", "plate-slide-back-v2",
- "plate-slide-side-v2", "plate-slide-back-side-v2"]
-for _task in _goal_change_envs:
- task_id_split = _task.split("-")
- name = "".join([s.capitalize() for s in task_id_split[:-1]])
- _env_id = f'{name}ProMP-{task_id_split[-1]}'
- register(
- id=_env_id,
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": _task,
- "wrappers": [goal_change_mp_wrapper.MPWrapper],
- "mp_kwargs": {
- "num_dof": 4,
- "num_basis": 5,
- "duration": 6.25,
- "post_traj_time": 0,
- "zero_start": True,
- "policy_type": "metaworld",
- }
- }
- )
- ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-_object_change_envs = ["bin-picking-v2", "hammer-v2", "sweep-into-v2"]
-for _task in _object_change_envs:
- task_id_split = _task.split("-")
- name = "".join([s.capitalize() for s in task_id_split[:-1]])
- _env_id = f'{name}ProMP-{task_id_split[-1]}'
- register(
- id=_env_id,
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": _task,
- "wrappers": [object_change_mp_wrapper.MPWrapper],
- "mp_kwargs": {
- "num_dof": 4,
- "num_basis": 5,
- "duration": 6.25,
- "post_traj_time": 0,
- "zero_start": True,
- "policy_type": "metaworld",
- }
- }
- )
- ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-_goal_and_object_change_envs = ["box-close-v2", "button-press-v2", "button-press-wall-v2", "button-press-topdown-v2",
- "button-press-topdown-wall-v2", "coffee-button-v2", "coffee-pull-v2",
- "coffee-push-v2", "dial-turn-v2", "disassemble-v2", "door-close-v2",
- "door-lock-v2", "door-open-v2", "door-unlock-v2", "hand-insert-v2",
- "drawer-close-v2", "drawer-open-v2", "faucet-open-v2", "faucet-close-v2",
- "handle-press-side-v2", "handle-press-v2", "handle-pull-side-v2",
- "handle-pull-v2", "lever-pull-v2", "peg-insert-side-v2", "pick-place-wall-v2",
- "reach-v2", "push-back-v2", "push-v2", "pick-place-v2", "peg-unplug-side-v2",
- "soccer-v2", "stick-push-v2", "stick-pull-v2", "push-wall-v2", "reach-wall-v2",
- "shelf-place-v2", "sweep-v2", "window-open-v2", "window-close-v2"
- ]
-for _task in _goal_and_object_change_envs:
- task_id_split = _task.split("-")
- name = "".join([s.capitalize() for s in task_id_split[:-1]])
- _env_id = f'{name}ProMP-{task_id_split[-1]}'
- register(
- id=_env_id,
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": _task,
- "wrappers": [goal_object_change_mp_wrapper.MPWrapper],
- "mp_kwargs": {
- "num_dof": 4,
- "num_basis": 5,
- "duration": 6.25,
- "post_traj_time": 0,
- "zero_start": True,
- "policy_type": "metaworld",
- }
- }
- )
- ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-_goal_and_endeffector_change_envs = ["basketball-v2"]
-for _task in _goal_and_endeffector_change_envs:
- task_id_split = _task.split("-")
- name = "".join([s.capitalize() for s in task_id_split[:-1]])
- _env_id = f'{name}ProMP-{task_id_split[-1]}'
- register(
- id=_env_id,
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": _task,
- "wrappers": [goal_endeffector_change_mp_wrapper.MPWrapper],
- "mp_kwargs": {
- "num_dof": 4,
- "num_basis": 5,
- "duration": 6.25,
- "post_traj_time": 0,
- "zero_start": True,
- "policy_type": "metaworld",
- }
- }
- )
- ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
diff --git a/alr_envs/meta/goal_change_mp_wrapper.py b/alr_envs/meta/goal_change_mp_wrapper.py
deleted file mode 100644
index a558365..0000000
--- a/alr_envs/meta/goal_change_mp_wrapper.py
+++ /dev/null
@@ -1,68 +0,0 @@
-from typing import Tuple, Union
-
-import numpy as np
-
-from mp_env_api import MPEnvWrapper
-
-
-class MPWrapper(MPEnvWrapper):
- """
- This Wrapper is for environments where merely the goal changes in the beginning
- and no secondary objects or end effectors are altered at the start of an episode.
- You can verify this by executing the code below for your environment id and check if the output is non-zero
- at the same indices.
- ```python
- import alr_envs
- env = alr_envs.make(env_id, 1)
- print(env.reset() - env.reset())
- array([ 0. , 0. , 0. , 0. , 0,
- 0 , 0 , 0. , 0. , 0. ,
- 0. , 0. , 0. , 0. , 0. ,
- 0. , 0. , 0. , 0. , 0. ,
- 0. , 0. , 0 , 0 , 0 ,
- 0. , 0. , 0. , 0. , 0. ,
- 0. , 0. , 0. , 0. , 0. ,
- 0. , !=0 , !=0 , !=0])
- ```
- """
-
- @property
- def active_obs(self):
- # This structure is the same for all metaworld environments.
- # Only the observations which change could differ
- return np.hstack([
- # Current observation
- [False] * 3, # end-effector position
- [False] * 1, # normalized gripper open distance
- [False] * 3, # main object position
- [False] * 4, # main object quaternion
- [False] * 3, # secondary object position
- [False] * 4, # secondary object quaternion
- # Previous observation
- # TODO: Include previous values? According to their source they might be wrong for the first iteration.
- [False] * 3, # previous end-effector position
- [False] * 1, # previous normalized gripper open distance
- [False] * 3, # previous main object position
- [False] * 4, # previous main object quaternion
- [False] * 3, # previous second object position
- [False] * 4, # previous second object quaternion
- # Goal
- [True] * 3, # goal position
- ])
-
- @property
- def current_pos(self) -> Union[float, int, np.ndarray]:
- r_close = self.env.data.get_joint_qpos("r_close")
- return np.hstack([self.env.data.mocap_pos.flatten() / self.env.action_scale, r_close])
-
- @property
- def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
- raise NotImplementedError("Velocity cannot be retrieved.")
-
- @property
- def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
- raise ValueError("Goal position is not available and has to be learnt based on the environment.")
-
- @property
- def dt(self) -> Union[float, int]:
- return self.env.dt
diff --git a/alr_envs/meta/goal_endeffector_change_mp_wrapper.py b/alr_envs/meta/goal_endeffector_change_mp_wrapper.py
deleted file mode 100644
index 8912a72..0000000
--- a/alr_envs/meta/goal_endeffector_change_mp_wrapper.py
+++ /dev/null
@@ -1,68 +0,0 @@
-from typing import Tuple, Union
-
-import numpy as np
-
-from mp_env_api import MPEnvWrapper
-
-
-class MPWrapper(MPEnvWrapper):
- """
- This Wrapper is for environments where merely the goal changes in the beginning
- and no secondary objects or end effectors are altered at the start of an episode.
- You can verify this by executing the code below for your environment id and check if the output is non-zero
- at the same indices.
- ```python
- import alr_envs
- env = alr_envs.make(env_id, 1)
- print(env.reset() - env.reset())
- array([ !=0 , !=0 , !=0 , 0. , 0.,
- 0. , 0. , 0. , 0. , 0. ,
- 0. , 0. , 0. , 0. , 0. ,
- 0. , 0. , 0. , !=0 , !=0 ,
- !=0 , 0. , 0. , 0. , 0. ,
- 0. , 0. , 0. , 0. , 0. ,
- 0. , 0. , 0. , 0. , 0. ,
- 0. , !=0 , !=0 , !=0])
- ```
- """
-
- @property
- def active_obs(self):
- # This structure is the same for all metaworld environments.
- # Only the observations which change could differ
- return np.hstack([
- # Current observation
- [True] * 3, # end-effector position
- [False] * 1, # normalized gripper open distance
- [False] * 3, # main object position
- [False] * 4, # main object quaternion
- [False] * 3, # secondary object position
- [False] * 4, # secondary object quaternion
- # Previous observation
- # TODO: Include previous values? According to their source they might be wrong for the first iteration.
- [False] * 3, # previous end-effector position
- [False] * 1, # previous normalized gripper open distance
- [False] * 3, # previous main object position
- [False] * 4, # previous main object quaternion
- [False] * 3, # previous second object position
- [False] * 4, # previous second object quaternion
- # Goal
- [True] * 3, # goal position
- ])
-
- @property
- def current_pos(self) -> Union[float, int, np.ndarray]:
- r_close = self.env.data.get_joint_qpos("r_close")
- return np.hstack([self.env.data.mocap_pos.flatten() / self.env.action_scale, r_close])
-
- @property
- def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
- raise NotImplementedError("Velocity cannot be retrieved.")
-
- @property
- def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
- raise ValueError("Goal position is not available and has to be learnt based on the environment.")
-
- @property
- def dt(self) -> Union[float, int]:
- return self.env.dt
diff --git a/alr_envs/meta/goal_object_change_mp_wrapper.py b/alr_envs/meta/goal_object_change_mp_wrapper.py
deleted file mode 100644
index 63e16b7..0000000
--- a/alr_envs/meta/goal_object_change_mp_wrapper.py
+++ /dev/null
@@ -1,68 +0,0 @@
-from typing import Tuple, Union
-
-import numpy as np
-
-from mp_env_api import MPEnvWrapper
-
-
-class MPWrapper(MPEnvWrapper):
- """
- This Wrapper is for environments where merely the goal changes in the beginning
- and no secondary objects or end effectors are altered at the start of an episode.
- You can verify this by executing the code below for your environment id and check if the output is non-zero
- at the same indices.
- ```python
- import alr_envs
- env = alr_envs.make(env_id, 1)
- print(env.reset() - env.reset())
- array([ 0. , 0. , 0. , 0. , !=0,
- !=0 , !=0 , 0. , 0. , 0. ,
- 0. , 0. , 0. , 0. , 0. ,
- 0. , 0. , 0. , 0. , 0. ,
- 0. , 0. , !=0 , !=0 , !=0 ,
- 0. , 0. , 0. , 0. , 0. ,
- 0. , 0. , 0. , 0. , 0. ,
- 0. , !=0 , !=0 , !=0])
- ```
- """
-
- @property
- def active_obs(self):
- # This structure is the same for all metaworld environments.
- # Only the observations which change could differ
- return np.hstack([
- # Current observation
- [False] * 3, # end-effector position
- [False] * 1, # normalized gripper open distance
- [True] * 3, # main object position
- [False] * 4, # main object quaternion
- [False] * 3, # secondary object position
- [False] * 4, # secondary object quaternion
- # Previous observation
- # TODO: Include previous values? According to their source they might be wrong for the first iteration.
- [False] * 3, # previous end-effector position
- [False] * 1, # previous normalized gripper open distance
- [False] * 3, # previous main object position
- [False] * 4, # previous main object quaternion
- [False] * 3, # previous second object position
- [False] * 4, # previous second object quaternion
- # Goal
- [True] * 3, # goal position
- ])
-
- @property
- def current_pos(self) -> Union[float, int, np.ndarray]:
- r_close = self.env.data.get_joint_qpos("r_close")
- return np.hstack([self.env.data.mocap_pos.flatten() / self.env.action_scale, r_close])
-
- @property
- def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
- raise NotImplementedError("Velocity cannot be retrieved.")
-
- @property
- def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
- raise ValueError("Goal position is not available and has to be learnt based on the environment.")
-
- @property
- def dt(self) -> Union[float, int]:
- return self.env.dt
diff --git a/alr_envs/meta/object_change_mp_wrapper.py b/alr_envs/meta/object_change_mp_wrapper.py
deleted file mode 100644
index 4293148..0000000
--- a/alr_envs/meta/object_change_mp_wrapper.py
+++ /dev/null
@@ -1,68 +0,0 @@
-from typing import Tuple, Union
-
-import numpy as np
-
-from mp_env_api import MPEnvWrapper
-
-
-class MPWrapper(MPEnvWrapper):
- """
- This Wrapper is for environments where merely the goal changes in the beginning
- and no secondary objects or end effectors are altered at the start of an episode.
- You can verify this by executing the code below for your environment id and check if the output is non-zero
- at the same indices.
- ```python
- import alr_envs
- env = alr_envs.make(env_id, 1)
- print(env.reset() - env.reset())
- array([ 0. , 0. , 0. , 0. , !=0 ,
- !=0 , !=0 , 0. , 0. , 0. ,
- 0. , 0. , 0. , 0. , 0. ,
- 0. , 0. , 0. , 0. , 0. ,
- 0. , 0. , 0 , 0 , 0 ,
- 0. , 0. , 0. , 0. , 0. ,
- 0. , 0. , 0. , 0. , 0. ,
- 0. , 0. , 0. , 0.])
- ```
- """
-
- @property
- def active_obs(self):
- # This structure is the same for all metaworld environments.
- # Only the observations which change could differ
- return np.hstack([
- # Current observation
- [False] * 3, # end-effector position
- [False] * 1, # normalized gripper open distance
- [False] * 3, # main object position
- [False] * 4, # main object quaternion
- [False] * 3, # secondary object position
- [False] * 4, # secondary object quaternion
- # Previous observation
- # TODO: Include previous values? According to their source they might be wrong for the first iteration.
- [False] * 3, # previous end-effector position
- [False] * 1, # previous normalized gripper open distance
- [False] * 3, # previous main object position
- [False] * 4, # previous main object quaternion
- [False] * 3, # previous second object position
- [False] * 4, # previous second object quaternion
- # Goal
- [True] * 3, # goal position
- ])
-
- @property
- def current_pos(self) -> Union[float, int, np.ndarray]:
- r_close = self.env.data.get_joint_qpos("r_close")
- return np.hstack([self.env.data.mocap_pos.flatten() / self.env.action_scale, r_close])
-
- @property
- def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
- raise NotImplementedError("Velocity cannot be retrieved.")
-
- @property
- def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
- raise ValueError("Goal position is not available and has to be learnt based on the environment.")
-
- @property
- def dt(self) -> Union[float, int]:
- return self.env.dt
diff --git a/alr_envs/open_ai/README.MD b/alr_envs/open_ai/README.MD
deleted file mode 100644
index 62d1f20..0000000
--- a/alr_envs/open_ai/README.MD
+++ /dev/null
@@ -1,14 +0,0 @@
-# OpenAI Gym Wrappers
-
-These are the Environment Wrappers for selected [OpenAI Gym](https://gym.openai.com/) environments to use
-the Motion Primitive gym interface for them.
-
-## MP Environments
-These environments are wrapped-versions of their OpenAI-gym counterparts.
-
-|Name| Description|Trajectory Horizon|Action Dimension|Context Dimension
-|---|---|---|---|---|
-|`ContinuousMountainCarProMP-v0`| A ProMP wrapped version of the ContinuousMountainCar-v0 environment. | 100 | 1
-|`ReacherProMP-v2`| A ProMP wrapped version of the Reacher-v2 environment. | 50 | 2
-|`FetchSlideDenseProMP-v1`| A ProMP wrapped version of the FetchSlideDense-v1 environment. | 50 | 4
-|`FetchReachDenseProMP-v1`| A ProMP wrapped version of the FetchReachDense-v1 environment. | 50 | 4
diff --git a/alr_envs/open_ai/__init__.py b/alr_envs/open_ai/__init__.py
deleted file mode 100644
index 41b770f..0000000
--- a/alr_envs/open_ai/__init__.py
+++ /dev/null
@@ -1,154 +0,0 @@
-from gym import register
-from gym.wrappers import FlattenObservation
-
-from . import classic_control, mujoco, robotics
-
-ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
-
-# Short Continuous Mountain Car
-register(
- id="MountainCarContinuous-v1",
- entry_point="gym.envs.classic_control:Continuous_MountainCarEnv",
- max_episode_steps=100,
- reward_threshold=90.0,
-)
-
-# Open AI
-# Classic Control
-register(
- id='ContinuousMountainCarProMP-v1',
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": "alr_envs:MountainCarContinuous-v1",
- "wrappers": [classic_control.continuous_mountain_car.MPWrapper],
- "mp_kwargs": {
- "num_dof": 1,
- "num_basis": 4,
- "duration": 2,
- "post_traj_time": 0,
- "zero_start": True,
- "policy_type": "motor",
- "policy_kwargs": {
- "p_gains": 1.,
- "d_gains": 1.
- }
- }
- }
-)
-ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ContinuousMountainCarProMP-v1")
-
-register(
- id='ContinuousMountainCarProMP-v0',
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": "gym.envs.classic_control:MountainCarContinuous-v0",
- "wrappers": [classic_control.continuous_mountain_car.MPWrapper],
- "mp_kwargs": {
- "num_dof": 1,
- "num_basis": 4,
- "duration": 19.98,
- "post_traj_time": 0,
- "zero_start": True,
- "policy_type": "motor",
- "policy_kwargs": {
- "p_gains": 1.,
- "d_gains": 1.
- }
- }
- }
-)
-ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ContinuousMountainCarProMP-v0")
-
-register(
- id='ReacherProMP-v2',
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": "gym.envs.mujoco:Reacher-v2",
- "wrappers": [mujoco.reacher_v2.MPWrapper],
- "mp_kwargs": {
- "num_dof": 2,
- "num_basis": 6,
- "duration": 1,
- "post_traj_time": 0,
- "zero_start": True,
- "policy_type": "motor",
- "policy_kwargs": {
- "p_gains": .6,
- "d_gains": .075
- }
- }
- }
-)
-ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ReacherProMP-v2")
-
-register(
- id='FetchSlideDenseProMP-v1',
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": "gym.envs.robotics:FetchSlideDense-v1",
- "wrappers": [FlattenObservation, robotics.fetch.MPWrapper],
- "mp_kwargs": {
- "num_dof": 4,
- "num_basis": 5,
- "duration": 2,
- "post_traj_time": 0,
- "zero_start": True,
- "policy_type": "position"
- }
- }
-)
-ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("FetchSlideDenseProMP-v1")
-
-register(
- id='FetchSlideProMP-v1',
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": "gym.envs.robotics:FetchSlide-v1",
- "wrappers": [FlattenObservation, robotics.fetch.MPWrapper],
- "mp_kwargs": {
- "num_dof": 4,
- "num_basis": 5,
- "duration": 2,
- "post_traj_time": 0,
- "zero_start": True,
- "policy_type": "position"
- }
- }
-)
-ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("FetchSlideProMP-v1")
-
-register(
- id='FetchReachDenseProMP-v1',
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": "gym.envs.robotics:FetchReachDense-v1",
- "wrappers": [FlattenObservation, robotics.fetch.MPWrapper],
- "mp_kwargs": {
- "num_dof": 4,
- "num_basis": 5,
- "duration": 2,
- "post_traj_time": 0,
- "zero_start": True,
- "policy_type": "position"
- }
- }
-)
-ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("FetchReachDenseProMP-v1")
-
-register(
- id='FetchReachProMP-v1',
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": "gym.envs.robotics:FetchReach-v1",
- "wrappers": [FlattenObservation, robotics.fetch.MPWrapper],
- "mp_kwargs": {
- "num_dof": 4,
- "num_basis": 5,
- "duration": 2,
- "post_traj_time": 0,
- "zero_start": True,
- "policy_type": "position"
- }
- }
-)
-ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("FetchReachProMP-v1")
diff --git a/alr_envs/open_ai/classic_control/__init__.py b/alr_envs/open_ai/classic_control/__init__.py
deleted file mode 100644
index b998494..0000000
--- a/alr_envs/open_ai/classic_control/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from . import continuous_mountain_car
\ No newline at end of file
diff --git a/alr_envs/open_ai/classic_control/continuous_mountain_car/__init__.py b/alr_envs/open_ai/classic_control/continuous_mountain_car/__init__.py
deleted file mode 100644
index 989b5a9..0000000
--- a/alr_envs/open_ai/classic_control/continuous_mountain_car/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .mp_wrapper import MPWrapper
\ No newline at end of file
diff --git a/alr_envs/open_ai/classic_control/continuous_mountain_car/mp_wrapper.py b/alr_envs/open_ai/classic_control/continuous_mountain_car/mp_wrapper.py
deleted file mode 100644
index 2a2357a..0000000
--- a/alr_envs/open_ai/classic_control/continuous_mountain_car/mp_wrapper.py
+++ /dev/null
@@ -1,22 +0,0 @@
-from typing import Union
-
-import numpy as np
-from mp_env_api import MPEnvWrapper
-
-
-class MPWrapper(MPEnvWrapper):
- @property
- def current_vel(self) -> Union[float, int, np.ndarray]:
- return np.array([self.state[1]])
-
- @property
- def current_pos(self) -> Union[float, int, np.ndarray]:
- return np.array([self.state[0]])
-
- @property
- def goal_pos(self):
- raise ValueError("Goal position is not available and has to be learnt based on the environment.")
-
- @property
- def dt(self) -> Union[float, int]:
- return 0.02
\ No newline at end of file
diff --git a/alr_envs/open_ai/mujoco/__init__.py b/alr_envs/open_ai/mujoco/__init__.py
deleted file mode 100644
index 3082d19..0000000
--- a/alr_envs/open_ai/mujoco/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from . import reacher_v2
diff --git a/alr_envs/open_ai/mujoco/reacher_v2/__init__.py b/alr_envs/open_ai/mujoco/reacher_v2/__init__.py
deleted file mode 100644
index 989b5a9..0000000
--- a/alr_envs/open_ai/mujoco/reacher_v2/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .mp_wrapper import MPWrapper
\ No newline at end of file
diff --git a/alr_envs/open_ai/mujoco/reacher_v2/mp_wrapper.py b/alr_envs/open_ai/mujoco/reacher_v2/mp_wrapper.py
deleted file mode 100644
index 16202e5..0000000
--- a/alr_envs/open_ai/mujoco/reacher_v2/mp_wrapper.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from typing import Union
-
-import numpy as np
-from mp_env_api import MPEnvWrapper
-
-
-class MPWrapper(MPEnvWrapper):
-
- @property
- def current_vel(self) -> Union[float, int, np.ndarray]:
- return self.sim.data.qvel[:2]
-
- @property
- def current_pos(self) -> Union[float, int, np.ndarray]:
- return self.sim.data.qpos[:2]
-
- @property
- def dt(self) -> Union[float, int]:
- return self.env.dt
\ No newline at end of file
diff --git a/alr_envs/open_ai/robotics/__init__.py b/alr_envs/open_ai/robotics/__init__.py
deleted file mode 100644
index 13c65b3..0000000
--- a/alr_envs/open_ai/robotics/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from . import fetch
\ No newline at end of file
diff --git a/alr_envs/open_ai/robotics/fetch/__init__.py b/alr_envs/open_ai/robotics/fetch/__init__.py
deleted file mode 100644
index 989b5a9..0000000
--- a/alr_envs/open_ai/robotics/fetch/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .mp_wrapper import MPWrapper
\ No newline at end of file
diff --git a/alr_envs/open_ai/robotics/fetch/mp_wrapper.py b/alr_envs/open_ai/robotics/fetch/mp_wrapper.py
deleted file mode 100644
index 218e175..0000000
--- a/alr_envs/open_ai/robotics/fetch/mp_wrapper.py
+++ /dev/null
@@ -1,49 +0,0 @@
-from typing import Union
-
-import numpy as np
-
-from mp_env_api import MPEnvWrapper
-
-
-class MPWrapper(MPEnvWrapper):
-
- @property
- def active_obs(self):
- return np.hstack([
- [False] * 3, # achieved goal
- [True] * 3, # desired/true goal
- [False] * 3, # grip pos
- [True, True, False] * int(self.has_object), # object position
- [True, True, False] * int(self.has_object), # object relative position
- [False] * 2, # gripper state
- [False] * 3 * int(self.has_object), # object rotation
- [False] * 3 * int(self.has_object), # object velocity position
- [False] * 3 * int(self.has_object), # object velocity rotation
- [False] * 3, # grip velocity position
- [False] * 2, # gripper velocity
- ]).astype(bool)
-
- @property
- def current_vel(self) -> Union[float, int, np.ndarray]:
- dt = self.sim.nsubsteps * self.sim.model.opt.timestep
- grip_velp = self.sim.data.get_site_xvelp("robot0:grip") * dt
- # gripper state should be symmetric for left and right.
- # They are controlled with only one action for both gripper joints
- gripper_state = self.sim.data.get_joint_qvel('robot0:r_gripper_finger_joint') * dt
- return np.hstack([grip_velp, gripper_state])
-
- @property
- def current_pos(self) -> Union[float, int, np.ndarray]:
- grip_pos = self.sim.data.get_site_xpos("robot0:grip")
- # gripper state should be symmetric for left and right.
- # They are controlled with only one action for both gripper joints
- gripper_state = self.sim.data.get_joint_qpos('robot0:r_gripper_finger_joint')
- return np.hstack([grip_pos, gripper_state])
-
- @property
- def goal_pos(self):
- raise ValueError("Goal position is not available and has to be learnt based on the environment.")
-
- @property
- def dt(self) -> Union[float, int]:
- return self.env.dt
diff --git a/alr_envs/utils/__init__.py b/alr_envs/utils/__init__.py
deleted file mode 100644
index bb4b0bc..0000000
--- a/alr_envs/utils/__init__.py
+++ /dev/null
@@ -1,66 +0,0 @@
-import re
-from typing import Union
-
-import gym
-from gym.envs.registration import register
-
-from alr_envs.utils.make_env_helpers import make
-
-
-def make_dmc(
- id: str,
- seed: int = 1,
- visualize_reward: bool = True,
- from_pixels: bool = False,
- height: int = 84,
- width: int = 84,
- camera_id: int = 0,
- frame_skip: int = 1,
- episode_length: Union[None, int] = None,
- environment_kwargs: dict = {},
- time_limit: Union[None, float] = None,
- channels_first: bool = True
-):
- # Adopted from: https://github.com/denisyarats/dmc2gym/blob/master/dmc2gym/__init__.py
- # License: MIT
- # Copyright (c) 2020 Denis Yarats
-
- assert re.match(r"\w+-\w+", id), "env_id does not have the following structure: 'domain_name-task_name'"
- domain_name, task_name = id.split("-")
-
- env_id = f'dmc_{domain_name}_{task_name}_{seed}-v1'
-
- if from_pixels:
- assert not visualize_reward, 'cannot use visualize reward when learning from pixels'
-
- # shorten episode length
- if episode_length is None:
- # Default lengths for benchmarking suite is 1000 and for manipulation tasks 250
- episode_length = 250 if domain_name == "manipulation" else 1000
-
- max_episode_steps = (episode_length + frame_skip - 1) // frame_skip
- if env_id not in gym.envs.registry.env_specs:
- task_kwargs = {'random': seed}
- # if seed is not None:
- # task_kwargs['random'] = seed
- if time_limit is not None:
- task_kwargs['time_limit'] = time_limit
- register(
- id=env_id,
- entry_point='alr_envs.dmc.dmc_wrapper:DMCWrapper',
- kwargs=dict(
- domain_name=domain_name,
- task_name=task_name,
- task_kwargs=task_kwargs,
- environment_kwargs=environment_kwargs,
- visualize_reward=visualize_reward,
- from_pixels=from_pixels,
- height=height,
- width=width,
- camera_id=camera_id,
- frame_skip=frame_skip,
- channels_first=channels_first,
- ),
- max_episode_steps=max_episode_steps,
- )
- return gym.make(env_id)
diff --git a/alr_envs/utils/make_env_helpers.py b/alr_envs/utils/make_env_helpers.py
deleted file mode 100644
index 4300439..0000000
--- a/alr_envs/utils/make_env_helpers.py
+++ /dev/null
@@ -1,224 +0,0 @@
-import warnings
-from typing import Iterable, Type, Union
-
-import gym
-import numpy as np
-from gym.envs.registration import EnvSpec
-
-from mp_env_api import MPEnvWrapper
-from mp_env_api.mp_wrappers.dmp_wrapper import DmpWrapper
-from mp_env_api.mp_wrappers.promp_wrapper import ProMPWrapper
-
-
-def make_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwargs):
- """
- TODO: Do we need this?
- Generate a callable to create a new gym environment with a given seed.
- The rank is added to the seed and can be used for example when using vector environments.
- E.g. [make_rank("my_env_name-v0", 123, i) for i in range(8)] creates a list of 8 environments
- with seeds 123 through 130.
- Hence, testing environments should be seeded with a value which is offset by the number of training environments.
- Here e.g. [make_rank("my_env_name-v0", 123 + 8, i) for i in range(5)] for 5 testing environmetns
-
- Args:
- env_id: name of the environment
- seed: seed for deterministic behaviour
- rank: environment rank for deterministic over multiple seeds behaviour
- return_callable: If True returns a callable to create the environment instead of the environment itself.
-
- Returns:
-
- """
-
- def f():
- return make(env_id, seed + rank, **kwargs)
-
- return f if return_callable else f()
-
-
-def make(env_id: str, seed, **kwargs):
- """
- Converts an env_id to an environment with the gym API.
- This also works for DeepMind Control Suite interface_wrappers
- for which domain name and task name are expected to be separated by "-".
- Args:
- env_id: gym name or env_id of the form "domain_name-task_name" for DMC tasks
- **kwargs: Additional kwargs for the constructor such as pixel observations, etc.
-
- Returns: Gym environment
-
- """
- if any([det_pmp in env_id for det_pmp in ["DetPMP", "detpmp"]]):
- warnings.warn("DetPMP is deprecated and converted to ProMP")
- env_id = env_id.replace("DetPMP", "ProMP")
- env_id = env_id.replace("detpmp", "promp")
-
- try:
- # Add seed to kwargs in case it is a predefined gym+dmc hybrid environment.
- if env_id.startswith("dmc"):
- kwargs.update({"seed": seed})
-
- # Gym
- env = gym.make(env_id, **kwargs)
- env.seed(seed)
- env.action_space.seed(seed)
- env.observation_space.seed(seed)
- except gym.error.Error:
-
- # MetaWorld env
- import metaworld
- if env_id in metaworld.ML1.ENV_NAMES:
- env = metaworld.envs.ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE[env_id + "-goal-observable"](seed=seed, **kwargs)
- # setting this avoids generating the same initialization after each reset
- env._freeze_rand_vec = False
- # Manually set spec, as metaworld environments are not registered via gym
- env.unwrapped.spec = EnvSpec(env_id)
- # Set Timelimit based on the maximum allowed path length of the environment
- env = gym.wrappers.TimeLimit(env, max_episode_steps=env.max_path_length)
- env.seed(seed)
- env.action_space.seed(seed)
- env.observation_space.seed(seed)
- env.goal_space.seed(seed)
-
- else:
- # DMC
- from alr_envs import make_dmc
- env = make_dmc(env_id, seed=seed, **kwargs)
-
- assert env.base_step_limit == env.spec.max_episode_steps, \
- f"The specified 'episode_length' of {env.spec.max_episode_steps} steps for gym is different from " \
- f"the DMC environment specification of {env.base_step_limit} steps."
-
- return env
-
-
-def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], seed=1, **kwargs):
- """
- Helper function for creating a wrapped gym environment using MPs.
- It adds all provided wrappers to the specified environment and verifies at least one MPEnvWrapper is
- provided to expose the interface for MPs.
-
- Args:
- env_id: name of the environment
- wrappers: list of wrappers (at least an MPEnvWrapper),
- seed: seed of environment
-
- Returns: gym environment with all specified wrappers applied
-
- """
- # _env = gym.make(env_id)
- _env = make(env_id, seed, **kwargs)
-
- assert any(issubclass(w, MPEnvWrapper) for w in wrappers), \
- "At least one MPEnvWrapper is required in order to leverage motion primitive environments."
- for w in wrappers:
- _env = w(_env)
-
- return _env
-
-
-def make_dmp_env(env_id: str, wrappers: Iterable, seed=1, mp_kwargs={}, **kwargs):
- """
- This can also be used standalone for manually building a custom DMP environment.
- Args:
- env_id: base_env_name,
- wrappers: list of wrappers (at least an MPEnvWrapper),
- seed: seed of environment
- mp_kwargs: dict of at least {num_dof: int, num_basis: int} for DMP
-
- Returns: DMP wrapped gym env
-
- """
- _verify_time_limit(mp_kwargs.get("duration", None), kwargs.get("time_limit", None))
-
- _env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed, **kwargs)
-
- _verify_dof(_env, mp_kwargs.get("num_dof"))
-
- return DmpWrapper(_env, **mp_kwargs)
-
-
-def make_promp_env(env_id: str, wrappers: Iterable, seed=1, mp_kwargs={}, **kwargs):
- """
- This can also be used standalone for manually building a custom ProMP environment.
- Args:
- env_id: base_env_name,
- wrappers: list of wrappers (at least an MPEnvWrapper),
- mp_kwargs: dict of at least {num_dof: int, num_basis: int, width: int}
-
- Returns: ProMP wrapped gym env
-
- """
- _verify_time_limit(mp_kwargs.get("duration", None), kwargs.get("time_limit", None))
-
- _env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed, **kwargs)
-
- _verify_dof(_env, mp_kwargs.get("num_dof"))
-
- return ProMPWrapper(_env, **mp_kwargs)
-
-
-def make_dmp_env_helper(**kwargs):
- """
- Helper function for registering a DMP gym environments.
- Args:
- **kwargs: expects at least the following:
- {
- "name": base_env_name,
- "wrappers": list of wrappers (at least an MPEnvWrapper),
- "mp_kwargs": dict of at least {num_dof: int, num_basis: int} for DMP
- }
-
- Returns: DMP wrapped gym env
-
- """
- seed = kwargs.pop("seed", None)
- return make_dmp_env(env_id=kwargs.pop("name"), wrappers=kwargs.pop("wrappers"), seed=seed,
- mp_kwargs=kwargs.pop("mp_kwargs"), **kwargs)
-
-
-def make_promp_env_helper(**kwargs):
- """
- Helper function for registering ProMP gym environments.
- This can also be used standalone for manually building a custom ProMP environment.
- Args:
- **kwargs: expects at least the following:
- {
- "name": base_env_name,
- "wrappers": list of wrappers (at least an MPEnvWrapper),
- "mp_kwargs": dict of at least {num_dof: int, num_basis: int, width: int}
- }
-
- Returns: ProMP wrapped gym env
-
- """
- seed = kwargs.pop("seed", None)
- return make_promp_env(env_id=kwargs.pop("name"), wrappers=kwargs.pop("wrappers"), seed=seed,
- mp_kwargs=kwargs.pop("mp_kwargs"), **kwargs)
-
-
-def _verify_time_limit(mp_time_limit: Union[None, float], env_time_limit: Union[None, float]):
- """
- When using DMC check if a manually specified time limit matches the trajectory duration the MP receives.
- Mostly, the time_limit for DMC is not specified and the default values from DMC are taken.
- This check, however, can only been done after instantiating the environment.
- It can be found in the BaseMP class.
-
- Args:
- mp_time_limit: max trajectory length of mp in seconds
- env_time_limit: max trajectory length of DMC environment in seconds
-
- Returns:
-
- """
- if mp_time_limit is not None and env_time_limit is not None:
- assert mp_time_limit == env_time_limit, \
- f"The specified 'time_limit' of {env_time_limit}s does not match " \
- f"the duration of {mp_time_limit}s for the MP."
-
-
-def _verify_dof(base_env: gym.Env, dof: int):
- action_shape = np.prod(base_env.action_space.shape)
- assert dof == action_shape, \
- f"The specified degrees of freedom ('num_dof') {dof} do not match " \
- f"the action space of {action_shape} the base environments"
diff --git a/alr_envs/utils/utils.py b/alr_envs/utils/utils.py
deleted file mode 100644
index 3354db3..0000000
--- a/alr_envs/utils/utils.py
+++ /dev/null
@@ -1,21 +0,0 @@
-import numpy as np
-
-
-def angle_normalize(x, type="deg"):
- """
- normalize angle x to [-pi,pi].
- Args:
- x: Angle in either degrees or radians
- type: one of "deg" or "rad" for x being in degrees or radians
-
- Returns:
-
- """
-
- if type not in ["deg", "rad"]: raise ValueError(f"Invalid type {type}. Choose one of 'deg' or 'rad'.")
-
- if type == "deg":
- x = np.deg2rad(x) # x * pi / 180
-
- two_pi = 2 * np.pi
- return x - two_pi * np.floor((x + np.pi) / two_pi)