Merge branch '26-sequencingreplanning-feature-for-episode-based-environments'

Conflicts:
	fancy_gym/dmc/README.MD
	fancy_gym/dmc/manipulation/__init__.py
	fancy_gym/dmc/manipulation/reach_site/__init__.py
	fancy_gym/dmc/manipulation/reach_site/mp_wrapper.py
	fancy_gym/dmc/suite/__init__.py
	fancy_gym/dmc/suite/ball_in_cup/__init__.py
	fancy_gym/dmc/suite/ball_in_cup/mp_wrapper.py
	fancy_gym/dmc/suite/cartpole/__init__.py
	fancy_gym/dmc/suite/cartpole/mp_wrapper.py
	fancy_gym/dmc/suite/reacher/__init__.py
	fancy_gym/dmc/suite/reacher/mp_wrapper.py
	fancy_gym/envs/classic_control/README.MD
	fancy_gym/envs/classic_control/__init__.py
	fancy_gym/envs/classic_control/base_reacher/base_reacher.py
	fancy_gym/envs/classic_control/base_reacher/base_reacher_direct.py
	fancy_gym/envs/classic_control/base_reacher/base_reacher_torque.py
	fancy_gym/envs/classic_control/hole_reacher/__init__.py
	fancy_gym/envs/classic_control/hole_reacher/hole_reacher.py
	fancy_gym/envs/classic_control/hole_reacher/hr_dist_vel_acc_reward.py
	fancy_gym/envs/classic_control/hole_reacher/hr_simple_reward.py
	fancy_gym/envs/classic_control/hole_reacher/mp_wrapper.py
	fancy_gym/envs/classic_control/simple_reacher/__init__.py
	fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py
	fancy_gym/envs/classic_control/utils.py
	fancy_gym/envs/classic_control/viapoint_reacher/__init__.py
	fancy_gym/envs/classic_control/viapoint_reacher/mp_wrapper.py
	fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py
	fancy_gym/envs/mujoco/README.MD
	fancy_gym/envs/mujoco/beerpong/assets/beerpong.xml
	fancy_gym/envs/mujoco/beerpong/assets/beerpong_wo_cup.xml
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/base_link_convex.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/base_link_fine.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_dist_link_convex.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_dist_link_fine.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_med_link_convex.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_med_link_fine.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_fine.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/bhand_palm_fine.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/cup.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/cup_split1.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/cup_split10.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/cup_split11.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/cup_split12.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/cup_split13.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/cup_split14.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/cup_split15.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/cup_split16.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/cup_split17.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/cup_split18.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/cup_split2.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/cup_split3.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/cup_split4.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/cup_split5.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/cup_split6.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/cup_split7.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/cup_split8.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/cup_split9.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/elbow_link_convex.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/elbow_link_fine.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/forearm_link_convex_decomposition_p1.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/forearm_link_convex_decomposition_p2.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/forearm_link_fine.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/shoulder_link_convex_decomposition_p1.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/shoulder_link_convex_decomposition_p2.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/shoulder_link_convex_decomposition_p3.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/shoulder_link_fine.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/shoulder_pitch_link_convex.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/shoulder_pitch_link_fine.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/upper_arm_link_convex_decomposition_p1.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/upper_arm_link_convex_decomposition_p2.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/upper_arm_link_fine.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/wrist_palm_link_convex.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/wrist_palm_link_fine.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p3.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_fine.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/wrist_yaw_link_convex_decomposition_p1.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/wrist_yaw_link_convex_decomposition_p2.stl
	fancy_gym/envs/mujoco/beerpong/assets/meshes/wam/wrist_yaw_link_fine.stl
	fancy_gym/envs/mujoco/hopper_throw/__init__.py
	fancy_gym/envs/mujoco/reacher/assets/reacher_5links.xml
	fancy_gym/envs/mujoco/reacher/assets/reacher_7links.xml
	fancy_gym/examples/examples_dmc.py
	fancy_gym/examples/examples_general.py
	fancy_gym/examples/examples_metaworld.py
	fancy_gym/meta/README.MD
	fancy_gym/meta/__init__.py
	fancy_gym/meta/goal_change_mp_wrapper.py
	fancy_gym/meta/goal_endeffector_change_mp_wrapper.py
	fancy_gym/meta/goal_object_change_mp_wrapper.py
	fancy_gym/meta/object_change_mp_wrapper.py
	fancy_gym/open_ai/README.MD
	fancy_gym/open_ai/deprecated_needs_gym_robotics/robotics/__init__.py
	fancy_gym/open_ai/deprecated_needs_gym_robotics/robotics/fetch/__init__.py
	fancy_gym/open_ai/deprecated_needs_gym_robotics/robotics/fetch/mp_wrapper.py
	fancy_gym/open_ai/mujoco/__init__.py
	fancy_gym/open_ai/mujoco/reacher_v2/__init__.py
	fancy_gym/utils/__init__.py
	test/test_dmc.py
This commit is contained in:
Fabian 2022-09-26 08:43:43 +02:00
commit 756a9fb039
179 changed files with 8309 additions and 212 deletions

231
README.md
View File

@ -1,199 +1,218 @@
## ALR Robotics Control Environments
# Fancy Gym
This project offers a large variety of reinforcement learning environments under the unifying interface of [OpenAI gym](https://gym.openai.com/).
Besides, we also provide support (under the OpenAI interface) for the benchmark suites
`fancy_gym` offers a large variety of reinforcement learning environments under the unifying interface
of [OpenAI gym](https://gym.openai.com/). We provide support (under the OpenAI gym interface) for the benchmark suites
[DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control)
(DMC) and [Metaworld](https://meta-world.github.io/). Custom (Mujoco) gym environments can be created according
to [this guide](https://github.com/openai/gym/blob/master/docs/creating-environments.md). Unlike existing libraries, we
additionally support to control agents with Dynamic Movement Primitives (DMPs) and Probabilistic Movement Primitives (ProMP,
we only consider the mean usually).
(DMC) and [Metaworld](https://meta-world.github.io/). If those are not sufficient and you want to create your own custom
gym environments, use [this guide](https://www.gymlibrary.ml/content/environment_creation/). We highly appreciate it, if
you would then submit a PR for this environment to become part of `fancy_gym`.
In comparison to existing libraries, we additionally support to control agents with movement primitives, such as Dynamic
Movement Primitives (DMPs) and Probabilistic Movement Primitives (ProMP).
## Motion Primitive Environments (Episodic environments)
## Movement Primitive Environments (Episode-Based/Black-Box Environments)
Unlike step-based environments, motion primitive (MP) environments are closer related to stochastic search, black box
optimization, and methods that are often used in robotics. MP environments are trajectory-based and always execute a full
trajectory, which is generated by a Dynamic Motion Primitive (DMP) or a Probabilistic Motion Primitive (ProMP). The
generated trajectory is translated into individual step-wise actions by a controller. The exact choice of controller is,
however, dependent on the type of environment. We currently support position, velocity, and PD-Controllers for position,
velocity, and torque control, respectively. The goal of all MP environments is still to learn a policy. Yet, an action
represents the parametrization of the motion primitives to generate a suitable trajectory. Additionally, in this
framework we support all of this also for the contextual setting, for which we expose all changing substates of the
task as a single observation in the beginning. This requires to predict a new action/MP parametrization for each
trajectory. All environments provide next to the cumulative episode reward all collected information from each
step as part of the info dictionary. This information should, however, mainly be used for debugging and logging.
|Key| Description|
|---|---|
`trajectory`| Generated trajectory from MP
`step_actions`| Step-wise executed action based on controller output
`step_observations`| Step-wise intermediate observations
`step_rewards`| Step-wise rewards
`trajectory_length`| Total number of environment interactions
`other`| All other information from the underlying environment are returned as a list with length `trajectory_length` maintaining the original key. In case some information are not provided every time step, the missing values are filled with `None`.
Unlike step-based environments, movement primitive (MP) environments are closer related to stochastic search, black-box
optimization, and methods that are often used in traditional robotics and control. MP environments are typically
episode-based and execute a full trajectory, which is generated by a trajectory generator, such as a Dynamic Movement
Primitive (DMP) or a Probabilistic Movement Primitive (ProMP). The generated trajectory is translated into individual
step-wise actions by a trajectory tracking controller. The exact choice of controller is, however, dependent on the type
of environment. We currently support position, velocity, and PD-Controllers for position, velocity, and torque control,
respectively as well as a special controller for the MetaWorld control suite.
The goal of all MP environments is still to learn an optimal policy. Yet, an action represents the parametrization of
the motion primitives to generate a suitable trajectory. Additionally, in this framework we support all of this also for
the contextual setting, i.e. we expose the context space - a subset of the observation space - in the beginning of the
episode. This requires to predict a new action/MP parametrization for each context.
## Installation
1. Clone the repository
```bash
git clone git@github.com:ALRhub/alr_envs.git
git clone git@github.com:ALRhub/fancy_gym.git
```
2. Go to the folder
```bash
cd alr_envs
cd fancy_gym
```
3. Install with
```bash
pip install -e .
pip install -e .
```
## Using the framework
In case you want to use dm_control oder metaworld, you can install them by specifying extras
We prepared [multiple examples](alr_envs/examples/), please have a look there for more specific examples.
```bash
pip install -e .[dmc, metaworld]
```
### Step-wise environments
> **Note:**
> While our library already fully supports the new mujoco bindings, metaworld still relies on
> [mujoco_py](https://github.com/openai/mujoco-py), hence make sure to have mujoco 2.1 installed beforehand.
## How to use Fancy Gym
We will only show the basics here and prepared [multiple examples](fancy_gym/examples/) for a more detailed look.
### Step-wise Environments
```python
import alr_envs
import fancy_gym
env = alr_envs.make('HoleReacher-v0', seed=1)
state = env.reset()
env = fancy_gym.make('Reacher5d-v0', seed=1)
obs = env.reset()
for i in range(1000):
state, reward, done, info = env.step(env.action_space.sample())
action = env.action_space.sample()
obs, reward, done, info = env.step(action)
if i % 5 == 0:
env.render()
if done:
state = env.reset()
obs = env.reset()
```
For Deepmind control tasks we expect the `env_id` to be specified as `domain_name-task_name` or for manipulation tasks
as `manipulation-environment_name`. All other environments can be created based on their original name.
When using `dm_control` tasks we expect the `env_id` to be specified as `dmc:domain_name-task_name` or for manipulation
tasks as `dmc:manipulation-environment_name`. For `metaworld` tasks, we require the structure `metaworld:env_id-v2`, our
custom tasks and standard gym environments can be created without prefixes.
Existing MP tasks can be created the same way as above. Just keep in mind, calling `step()` always executs a full
trajectory.
### Black-box Environments
All environments provide by default the cumulative episode reward, this can however be changed if necessary. Optionally,
each environment returns all collected information from each step as part of the infos. This information is, however,
mainly meant for debugging as well as logging and not for training.
|Key| Description|Type
|---|---|---|
`positions`| Generated trajectory from MP | Optional
`velocities`| Generated trajectory from MP | Optional
`step_actions`| Step-wise executed action based on controller output | Optional
`step_observations`| Step-wise intermediate observations | Optional
`step_rewards`| Step-wise rewards | Optional
`trajectory_length`| Total number of environment interactions | Always
`other`| All other information from the underlying environment are returned as a list with length `trajectory_length` maintaining the original key. In case some information are not provided every time step, the missing values are filled with `None`. | Always
Existing MP tasks can be created the same way as above. Just keep in mind, calling `step()` executes a full trajectory.
> **Note:**
> Currently, we are also in the process of enabling replanning as well as learning of sub-trajectories.
> This allows to split the episode into multiple trajectories and is a hybrid setting between step-based and
> black-box leaning.
> While this is already implemented, it is still in beta and requires further testing.
> Feel free to try it and open an issue with any problems that occur.
```python
import alr_envs
import fancy_gym
env = alr_envs.make('HoleReacherProMP-v0', seed=1)
# render() can be called once in the beginning with all necessary arguments. To turn it of again just call render(None).
env.render()
env = fancy_gym.make('Reacher5dProMP-v0', seed=1)
# render() can be called once in the beginning with all necessary arguments.
# To turn it of again just call render() without any arguments.
env.render(mode='human')
state = env.reset()
# This returns the context information, not the full state observation
obs = env.reset()
for i in range(5):
state, reward, done, info = env.step(env.action_space.sample())
action = env.action_space.sample()
obs, reward, done, info = env.step(action)
# Not really necessary as the environments resets itself after each trajectory anyway.
state = env.reset()
# Done is always True as we are working on the episode level, hence we always reset()
obs = env.reset()
```
To show all available environments, we provide some additional convenience. Each value will return a dictionary with two
keys `DMP` and `ProMP` that store a list of available environment names.
To show all available environments, we provide some additional convenience variables. All of them return a dictionary
with two keys `DMP` and `ProMP` that store a list of available environment ids.
```python
import alr_envs
import fancy_gym
print("Custom MP tasks:")
print(alr_envs.ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS)
print("Fancy Black-box tasks:")
print(fancy_gym.ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS)
print("OpenAI Gym MP tasks:")
print(alr_envs.ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS)
print("OpenAI Gym Black-box tasks:")
print(fancy_gym.ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS)
print("Deepmind Control MP tasks:")
print(alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS)
print("Deepmind Control Black-box tasks:")
print(fancy_gym.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS)
print("MetaWorld MP tasks:")
print(alr_envs.ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS)
print("MetaWorld Black-box tasks:")
print(fancy_gym.ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS)
```
### How to create a new MP task
In case a required task is not supported yet in the MP framework, it can be created relatively easy. For the task at
hand, the following interface needs to be implemented.
hand, the following [interface](fancy_gym/black_box/raw_interface_wrapper.py) needs to be implemented.
```python
from abc import abstractmethod
from typing import Union, Tuple
import gym
import numpy as np
from mp_env_api import MPEnvWrapper
class MPWrapper(MPEnvWrapper):
class RawInterfaceWrapper(gym.Wrapper):
@property
def active_obs(self):
def context_mask(self) -> np.ndarray:
"""
Returns boolean mask for each substate in the full observation.
It determines whether the observation is returned for the contextual case or not.
This effectively allows to filter unwanted or unnecessary observations from the full step-based case.
E.g. Velocities starting at 0 are only changing after the first action. Given we only receive the first
observation, the velocities are not necessary in the observation for the MP task.
Returns boolean mask of the same shape as the observation space.
It determines whether the observation is returned for the contextual case or not.
This effectively allows to filter unwanted or unnecessary observations from the full step-based case.
E.g. Velocities starting at 0 are only changing after the first action. Given we only receive the
context/part of the first observation, the velocities are not necessary in the observation for the task.
Returns:
bool array representing the indices of the observations
"""
return np.ones(self.observation_space.shape, dtype=bool)
return np.ones(self.env.observation_space.shape[0], dtype=bool)
@property
def current_vel(self):
@abstractmethod
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
"""
Returns the current velocity of the action/control dimension.
Returns the current position of the action/control dimension.
The dimensionality has to match the action/control dimension.
This is not required when exclusively using position control,
it should, however, be implemented regardless.
E.g. The joint velocities that are directly or indirectly controlled by the action.
"""
raise NotImplementedError()
@property
def current_pos(self):
"""
Returns the current position of the action/control dimension.
The dimensionality has to match the action/control dimension.
This is not required when exclusively using velocity control,
This is not required when exclusively using velocity control,
it should, however, be implemented regardless.
E.g. The joint positions that are directly or indirectly controlled by the action.
"""
raise NotImplementedError()
@property
def goal_pos(self):
@abstractmethod
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
"""
Returns a predefined final position of the action/control dimension.
This is only required for the DMP and is most of the time learned instead.
"""
raise NotImplementedError()
@property
def dt(self):
"""
Returns the time between two simulated steps of the environment
Returns the current velocity of the action/control dimension.
The dimensionality has to match the action/control dimension.
This is not required when exclusively using position control,
it should, however, be implemented regardless.
E.g. The joint velocities that are directly or indirectly controlled by the action.
"""
raise NotImplementedError()
```
If you created a new task wrapper, feel free to open a PR, so we can integrate it for others to use as well.
Without the integration the task can still be used. A rough outline can be shown here, for more details we recommend
having a look at the [examples](alr_envs/examples/).
If you created a new task wrapper, feel free to open a PR, so we can integrate it for others to use as well. Without the
integration the task can still be used. A rough outline can be shown here, for more details we recommend having a look
at the [examples](fancy_gym/examples/).
```python
import alr_envs
import fancy_gym
# Base environment name, according to structure of above example
base_env_id = "ball_in_cup-catch"
# Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper.
# Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInferfaceWrapper.
# You can also add other gym.Wrappers in case they are needed,
# e.g. gym.wrappers.FlattenObservation for dict observations
wrappers = [alr_envs.dmc.suite.ball_in_cup.MPWrapper]
mp_kwargs = {...}
wrappers = [fancy_gym.dmc.suite.ball_in_cup.MPWrapper]
kwargs = {...}
env = alr_envs.make_dmp_env(base_env_id, wrappers=wrappers, seed=1, mp_kwargs=mp_kwargs, **kwargs)
# OR for a deterministic ProMP (other mp_kwargs are required):
# env = alr_envs.make_promp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_args)
env = fancy_gym.make_bb(base_env_id, wrappers=wrappers, seed=0, **kwargs)
rewards = 0
obs = env.reset()

13
fancy_gym/__init__.py Normal file
View File

@ -0,0 +1,13 @@
from fancy_gym import dmc, meta, open_ai
from fancy_gym.utils.make_env_helpers import make, make_bb, make_rank
from .dmc import ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS
# Convenience function for all MP environments
from .envs import ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS
from .meta import ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS
from .open_ai import ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS
ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {
key: value + ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] +
ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] +
ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key]
for key, value in ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items()}

View File

@ -0,0 +1,183 @@
from typing import Tuple, Optional
import gym
import numpy as np
from gym import spaces
from mp_pytorch.mp.mp_interfaces import MPInterface
from fancy_gym.black_box.controller.base_controller import BaseController
from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
from fancy_gym.utils.utils import get_numpy
class BlackBoxWrapper(gym.ObservationWrapper):
def __init__(self,
env: RawInterfaceWrapper,
trajectory_generator: MPInterface,
tracking_controller: BaseController,
duration: float,
verbose: int = 1,
learn_sub_trajectories: bool = False,
replanning_schedule: Optional[callable] = None,
reward_aggregation: callable = np.sum
):
"""
gym.Wrapper for leveraging a black box approach with a trajectory generator.
Args:
env: The (wrapped) environment this wrapper is applied on
trajectory_generator: Generates the full or partial trajectory
tracking_controller: Translates the desired trajectory to raw action sequences
duration: Length of the trajectory of the movement primitive in seconds
verbose: level of detail for returned values in info dict.
learn_sub_trajectories: Transforms full episode learning into learning sub-trajectories, similar to
step-based learning
replanning_schedule: callable that receives
reward_aggregation: function that takes the np.ndarray of step rewards as input and returns the trajectory
reward, default summation over all values.
"""
super().__init__(env)
self.duration = duration
self.learn_sub_trajectories = learn_sub_trajectories
self.do_replanning = replanning_schedule is not None
self.replanning_schedule = replanning_schedule or (lambda *x: False)
self.current_traj_steps = 0
# trajectory generation
self.traj_gen = trajectory_generator
self.tracking_controller = tracking_controller
# self.time_steps = np.linspace(0, self.duration, self.traj_steps)
# self.traj_gen.set_mp_times(self.time_steps)
self.traj_gen.set_duration(self.duration, self.dt)
# reward computation
self.reward_aggregation = reward_aggregation
# spaces
self.return_context_observation = not (learn_sub_trajectories or self.do_replanning)
self.traj_gen_action_space = self._get_traj_gen_action_space()
self.action_space = self._get_action_space()
self.observation_space = self._get_observation_space()
# rendering
self.render_kwargs = {}
self.verbose = verbose
def observation(self, observation):
# return context space if we are
obs = observation[self.env.context_mask] if self.return_context_observation else observation
# cast dtype because metaworld returns incorrect that throws gym error
return obs.astype(self.observation_space.dtype)
def get_trajectory(self, action: np.ndarray) -> Tuple:
clipped_params = np.clip(action, self.traj_gen_action_space.low, self.traj_gen_action_space.high)
self.traj_gen.set_params(clipped_params)
# TODO: is this correct for replanning? Do we need to adjust anything here?
bc_time = np.array(0 if not self.do_replanning else self.current_traj_steps * self.dt)
self.traj_gen.set_boundary_conditions(bc_time, self.current_pos, self.current_vel)
# TODO: remove the - self.dt after Bruces fix.
self.traj_gen.set_duration(None if self.learn_sub_trajectories else self.duration, self.dt)
# traj_dict = self.traj_gen.get_trajs(get_pos=True, get_vel=True)
trajectory = get_numpy(self.traj_gen.get_traj_pos())
velocity = get_numpy(self.traj_gen.get_traj_vel())
print(len(trajectory))
if self.do_replanning:
# Remove first part of trajectory as this is already over
trajectory = trajectory[self.current_traj_steps:]
velocity = velocity[self.current_traj_steps:]
return trajectory, velocity
def _get_traj_gen_action_space(self):
"""This function can be used to set up an individual space for the parameters of the traj_gen."""
min_action_bounds, max_action_bounds = self.traj_gen.get_params_bounds()
action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
dtype=self.env.action_space.dtype)
return action_space
def _get_action_space(self):
"""
This function can be used to modify the action space for considering actions which are not learned via movement
primitives. E.g. ball releasing time for the beer pong task. By default, it is the parameter space of the
movement primitive.
Only needs to be overwritten if the action space needs to be modified.
"""
try:
return self.traj_gen_action_space
except AttributeError:
return self._get_traj_gen_action_space()
def _get_observation_space(self):
if self.return_context_observation:
mask = self.env.context_mask
# return full observation
min_obs_bound = self.env.observation_space.low[mask]
max_obs_bound = self.env.observation_space.high[mask]
return spaces.Box(low=min_obs_bound, high=max_obs_bound, dtype=self.env.observation_space.dtype)
return self.env.observation_space
def step(self, action: np.ndarray):
""" This function generates a trajectory based on a MP and then does the usual loop over reset and step"""
# TODO remove this part, right now only needed for beer pong
mp_params, env_spec_params = self.env.episode_callback(action, self.traj_gen)
trajectory, velocity = self.get_trajectory(mp_params)
trajectory_length = len(trajectory)
rewards = np.zeros(shape=(trajectory_length,))
if self.verbose >= 2:
actions = np.zeros(shape=(trajectory_length,) + self.env.action_space.shape)
observations = np.zeros(shape=(trajectory_length,) + self.env.observation_space.shape,
dtype=self.env.observation_space.dtype)
infos = dict()
done = False
for t, (pos, vel) in enumerate(zip(trajectory, velocity)):
step_action = self.tracking_controller.get_action(pos, vel, self.current_pos, self.current_vel)
c_action = np.clip(step_action, self.env.action_space.low, self.env.action_space.high)
obs, c_reward, done, info = self.env.step(c_action)
rewards[t] = c_reward
if self.verbose >= 2:
actions[t, :] = c_action
observations[t, :] = obs
for k, v in info.items():
elems = infos.get(k, [None] * trajectory_length)
elems[t] = v
infos[k] = elems
if self.render_kwargs:
self.env.render(**self.render_kwargs)
if done or self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action,
t + 1 + self.current_traj_steps):
break
infos.update({k: v[:t] for k, v in infos.items()})
self.current_traj_steps += t + 1
if self.verbose >= 2:
infos['positions'] = trajectory
infos['velocities'] = velocity
infos['step_actions'] = actions[:t + 1]
infos['step_observations'] = observations[:t + 1]
infos['step_rewards'] = rewards[:t + 1]
infos['trajectory_length'] = t + 1
trajectory_return = self.reward_aggregation(rewards[:t + 1])
return self.observation(obs), trajectory_return, done, infos
def render(self, **kwargs):
"""Only set render options here, such that they can be used during the rollout.
This only needs to be called once"""
self.render_kwargs = kwargs
def reset(self, *, seed: Optional[int] = None, return_info: bool = False, options: Optional[dict] = None):
self.current_traj_steps = 0
return super(BlackBoxWrapper, self).reset()

View File

@ -0,0 +1,4 @@
class BaseController:
def get_action(self, des_pos, des_vel, c_pos, c_vel):
raise NotImplementedError

View File

@ -0,0 +1,24 @@
import numpy as np
from fancy_gym.black_box.controller.base_controller import BaseController
class MetaWorldController(BaseController):
"""
A Metaworld Controller. Using position and velocity information from a provided environment,
the tracking_controller calculates a response based on the desired position and velocity.
Unlike the other Controllers, this is a special tracking_controller for MetaWorld environments.
They use a position delta for the xyz coordinates and a raw position for the gripper opening.
"""
def get_action(self, des_pos, des_vel, c_pos, c_vel):
gripper_pos = des_pos[-1]
cur_pos = c_pos[:-1]
xyz_pos = des_pos[:-1]
assert xyz_pos.shape == cur_pos.shape, \
f"Mismatch in dimension between desired position {xyz_pos.shape} and current position {cur_pos.shape}"
trq = np.hstack([(xyz_pos - cur_pos), gripper_pos])
return trq

View File

@ -0,0 +1,28 @@
from typing import Union, Tuple
from fancy_gym.black_box.controller.base_controller import BaseController
class PDController(BaseController):
"""
A PD-Controller. Using position and velocity information from a provided environment,
the tracking_controller calculates a response based on the desired position and velocity
:param env: A position environment
:param p_gains: Factors for the proportional gains
:param d_gains: Factors for the differential gains
"""
def __init__(self,
p_gains: Union[float, Tuple] = 1,
d_gains: Union[float, Tuple] = 0.5):
self.p_gains = p_gains
self.d_gains = d_gains
def get_action(self, des_pos, des_vel, c_pos, c_vel):
assert des_pos.shape == c_pos.shape, \
f"Mismatch in dimension between desired position {des_pos.shape} and current position {c_pos.shape}"
assert des_vel.shape == c_vel.shape, \
f"Mismatch in dimension between desired velocity {des_vel.shape} and current velocity {c_vel.shape}"
trq = self.p_gains * (des_pos - c_pos) + self.d_gains * (des_vel - c_vel)
return trq

View File

@ -0,0 +1,9 @@
from fancy_gym.black_box.controller.base_controller import BaseController
class PosController(BaseController):
"""
A Position Controller. The tracking_controller calculates a response only based on the desired position.
"""
def get_action(self, des_pos, des_vel, c_pos, c_vel):
return des_pos

View File

@ -0,0 +1,9 @@
from fancy_gym.black_box.controller.base_controller import BaseController
class VelController(BaseController):
"""
A Velocity Controller. The tracking_controller calculates a response only based on the desired velocity.
"""
def get_action(self, des_pos, des_vel, c_pos, c_vel):
return des_vel

View File

View File

@ -0,0 +1,18 @@
from mp_pytorch.basis_gn import NormalizedRBFBasisGenerator, ZeroPaddingNormalizedRBFBasisGenerator
from mp_pytorch.phase_gn import PhaseGenerator
ALL_TYPES = ["rbf", "zero_rbf", "rhythmic"]
def get_basis_generator(basis_generator_type: str, phase_generator: PhaseGenerator, **kwargs):
basis_generator_type = basis_generator_type.lower()
if basis_generator_type == "rbf":
return NormalizedRBFBasisGenerator(phase_generator, **kwargs)
elif basis_generator_type == "zero_rbf":
return ZeroPaddingNormalizedRBFBasisGenerator(phase_generator, **kwargs)
elif basis_generator_type == "rhythmic":
raise NotImplementedError()
# return RhythmicBasisGenerator(phase_generator, **kwargs)
else:
raise ValueError(f"Specified basis generator type {basis_generator_type} not supported, "
f"please choose one of {ALL_TYPES}.")

View File

@ -0,0 +1,21 @@
from fancy_gym.black_box.controller.meta_world_controller import MetaWorldController
from fancy_gym.black_box.controller.pd_controller import PDController
from fancy_gym.black_box.controller.pos_controller import PosController
from fancy_gym.black_box.controller.vel_controller import VelController
ALL_TYPES = ["motor", "velocity", "position", "metaworld"]
def get_controller(controller_type: str, **kwargs):
controller_type = controller_type.lower()
if controller_type == "motor":
return PDController(**kwargs)
elif controller_type == "velocity":
return VelController()
elif controller_type == "position":
return PosController()
elif controller_type == "metaworld":
return MetaWorldController()
else:
raise ValueError(f"Specified controller type {controller_type} not supported, "
f"please choose one of {ALL_TYPES}.")

View File

@ -0,0 +1,23 @@
from mp_pytorch.phase_gn import LinearPhaseGenerator, ExpDecayPhaseGenerator
# from mp_pytorch.phase_gn.rhythmic_phase_generator import RhythmicPhaseGenerator
# from mp_pytorch.phase_gn.smooth_phase_generator import SmoothPhaseGenerator
ALL_TYPES = ["linear", "exp", "rhythmic", "smooth"]
def get_phase_generator(phase_generator_type, **kwargs):
phase_generator_type = phase_generator_type.lower()
if phase_generator_type == "linear":
return LinearPhaseGenerator(**kwargs)
elif phase_generator_type == "exp":
return ExpDecayPhaseGenerator(**kwargs)
elif phase_generator_type == "rhythmic":
raise NotImplementedError()
# return RhythmicPhaseGenerator(**kwargs)
elif phase_generator_type == "smooth":
raise NotImplementedError()
# return SmoothPhaseGenerator(**kwargs)
else:
raise ValueError(f"Specified phase generator type {phase_generator_type} not supported, "
f"please choose one of {ALL_TYPES}.")

View File

@ -0,0 +1,21 @@
from mp_pytorch.basis_gn import BasisGenerator
from mp_pytorch.mp import ProDMP, DMP, ProMP
ALL_TYPES = ["promp", "dmp", "idmp"]
def get_trajectory_generator(
trajectory_generator_type: str, action_dim: int, basis_generator: BasisGenerator, **kwargs
):
trajectory_generator_type = trajectory_generator_type.lower()
if trajectory_generator_type == "promp":
return ProMP(basis_generator, action_dim, **kwargs)
elif trajectory_generator_type == "dmp":
return DMP(basis_generator, action_dim, **kwargs)
elif trajectory_generator_type == 'prodmp':
from mp_pytorch.basis_gn import ProDMPBasisGenerator
assert isinstance(basis_generator, ProDMPBasisGenerator)
return ProDMP(basis_generator, action_dim, **kwargs)
else:
raise ValueError(f"Specified movement primitive type {trajectory_generator_type} not supported, "
f"please choose one of {ALL_TYPES}.")

View File

@ -0,0 +1,71 @@
from abc import abstractmethod
from typing import Union, Tuple
import gym
import numpy as np
from mp_pytorch.mp.mp_interfaces import MPInterface
class RawInterfaceWrapper(gym.Wrapper):
@property
def context_mask(self) -> np.ndarray:
"""
Returns boolean mask of the same shape as the observation space.
It determines whether the observation is returned for the contextual case or not.
This effectively allows to filter unwanted or unnecessary observations from the full step-based case.
E.g. Velocities starting at 0 are only changing after the first action. Given we only receive the
context/part of the first observation, the velocities are not necessary in the observation for the task.
Returns:
bool array representing the indices of the observations
"""
return np.ones(self.env.observation_space.shape[0], dtype=bool)
@property
@abstractmethod
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
"""
Returns the current position of the action/control dimension.
The dimensionality has to match the action/control dimension.
This is not required when exclusively using velocity control,
it should, however, be implemented regardless.
E.g. The joint positions that are directly or indirectly controlled by the action.
"""
raise NotImplementedError()
@property
@abstractmethod
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
"""
Returns the current velocity of the action/control dimension.
The dimensionality has to match the action/control dimension.
This is not required when exclusively using position control,
it should, however, be implemented regardless.
E.g. The joint velocities that are directly or indirectly controlled by the action.
"""
raise NotImplementedError()
@property
def dt(self) -> float:
"""
Control frequency of the environment
Returns: float
"""
return self.env.dt
def episode_callback(self, action: np.ndarray, traj_gen: MPInterface) -> Tuple[
np.ndarray, Union[np.ndarray, None]]:
"""
Used to extract the parameters for the movement primitive and other parameters from an action array which might
include other actions like ball releasing time for the beer pong environment.
This only needs to be overwritten if the action space is modified.
Args:
action: a vector instance of the whole action space, includes traj_gen parameters and additional parameters if
specified, else only traj_gen parameters
Returns:
Tuple: mp_arguments and other arguments
"""
return action, None

19
fancy_gym/dmc/README.MD Normal file
View File

@ -0,0 +1,19 @@
# DeepMind Control (DMC) Wrappers
These are the Environment Wrappers for selected
[DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control)
environments in order to use our Motion Primitive gym interface with them.
## MP Environments
[//]: <> (These environments are wrapped-versions of their Deep Mind Control Suite &#40;DMC&#41; counterparts. Given most task can be)
[//]: <> (solved in shorter horizon lengths than the original 1000 steps, we often shorten the episodes for those task.)
|Name| Description|Trajectory Horizon|Action Dimension|Context Dimension
|---|---|---|---|---|
|`dmc_ball_in_cup-catch_promp-v0`| A ProMP wrapped version of the "catch" task for the "ball_in_cup" environment. | 1000 | 10 | 2
|`dmc_ball_in_cup-catch_dmp-v0`| A DMP wrapped version of the "catch" task for the "ball_in_cup" environment. | 1000| 10 | 2
|`dmc_reacher-easy_promp-v0`| A ProMP wrapped version of the "easy" task for the "reacher" environment. | 1000 | 10 | 4
|`dmc_reacher-easy_dmp-v0`| A DMP wrapped version of the "easy" task for the "reacher" environment. | 1000| 10 | 4
|`dmc_reacher-hard_promp-v0`| A ProMP wrapped version of the "hard" task for the "reacher" environment.| 1000 | 10 | 4
|`dmc_reacher-hard_dmp-v0`| A DMP wrapped version of the "hard" task for the "reacher" environment. | 1000 | 10 | 4

245
fancy_gym/dmc/__init__.py Normal file
View File

@ -0,0 +1,245 @@
from copy import deepcopy
from . import manipulation, suite
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
from gym.envs.registration import register
DEFAULT_BB_DICT_ProMP = {
"name": 'EnvName',
"wrappers": [],
"trajectory_generator_kwargs": {
'trajectory_generator_type': 'promp'
},
"phase_generator_kwargs": {
'phase_generator_type': 'linear'
},
"controller_kwargs": {
'controller_type': 'motor',
"p_gains": 50.,
"d_gains": 1.,
},
"basis_generator_kwargs": {
'basis_generator_type': 'zero_rbf',
'num_basis': 5,
'num_basis_zero_start': 1
}
}
DEFAULT_BB_DICT_DMP = {
"name": 'EnvName',
"wrappers": [],
"trajectory_generator_kwargs": {
'trajectory_generator_type': 'dmp'
},
"phase_generator_kwargs": {
'phase_generator_type': 'exp'
},
"controller_kwargs": {
'controller_type': 'motor',
"p_gains": 50.,
"d_gains": 1.,
},
"basis_generator_kwargs": {
'basis_generator_type': 'rbf',
'num_basis': 5
}
}
# DeepMind Control Suite (DMC)
kwargs_dict_bic_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_bic_dmp['name'] = f"dmc:ball_in_cup-catch"
kwargs_dict_bic_dmp['wrappers'].append(suite.ball_in_cup.MPWrapper)
# bandwidth_factor=2
kwargs_dict_bic_dmp['phase_generator_kwargs']['alpha_phase'] = 2
kwargs_dict_bic_dmp['trajectory_generator_kwargs']['weight_scale'] = 10 # TODO: weight scale 1, but goal scale 0.1
register(
id=f'dmc_ball_in_cup-catch_dmp-v0',
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_bic_dmp
)
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_ball_in_cup-catch_dmp-v0")
kwargs_dict_bic_promp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_bic_promp['name'] = f"dmc:ball_in_cup-catch"
kwargs_dict_bic_promp['wrappers'].append(suite.ball_in_cup.MPWrapper)
register(
id=f'dmc_ball_in_cup-catch_promp-v0',
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_bic_promp
)
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_ball_in_cup-catch_promp-v0")
kwargs_dict_reacher_easy_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_reacher_easy_dmp['name'] = f"dmc:reacher-easy"
kwargs_dict_reacher_easy_dmp['wrappers'].append(suite.reacher.MPWrapper)
# bandwidth_factor=2
kwargs_dict_reacher_easy_dmp['phase_generator_kwargs']['alpha_phase'] = 2
# TODO: weight scale 50, but goal scale 0.1
kwargs_dict_reacher_easy_dmp['trajectory_generator_kwargs']['weight_scale'] = 500
register(
id=f'dmc_reacher-easy_dmp-v0',
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_bic_dmp
)
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-easy_dmp-v0")
kwargs_dict_reacher_easy_promp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_reacher_easy_promp['name'] = f"dmc:reacher-easy"
kwargs_dict_reacher_easy_promp['wrappers'].append(suite.reacher.MPWrapper)
kwargs_dict_reacher_easy_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2
register(
id=f'dmc_reacher-easy_promp-v0',
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_reacher_easy_promp
)
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-easy_promp-v0")
kwargs_dict_reacher_hard_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_reacher_hard_dmp['name'] = f"dmc:reacher-hard"
kwargs_dict_reacher_hard_dmp['wrappers'].append(suite.reacher.MPWrapper)
# bandwidth_factor = 2
kwargs_dict_reacher_hard_dmp['phase_generator_kwargs']['alpha_phase'] = 2
# TODO: weight scale 50, but goal scale 0.1
kwargs_dict_reacher_hard_dmp['trajectory_generator_kwargs']['weight_scale'] = 500
register(
id=f'dmc_reacher-hard_dmp-v0',
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_reacher_hard_dmp
)
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-hard_dmp-v0")
kwargs_dict_reacher_hard_promp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_reacher_hard_promp['name'] = f"dmc:reacher-hard"
kwargs_dict_reacher_hard_promp['wrappers'].append(suite.reacher.MPWrapper)
kwargs_dict_reacher_hard_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2
register(
id=f'dmc_reacher-hard_promp-v0',
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_reacher_hard_promp
)
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-hard_promp-v0")
_dmc_cartpole_tasks = ["balance", "balance_sparse", "swingup", "swingup_sparse"]
for _task in _dmc_cartpole_tasks:
_env_id = f'dmc_cartpole-{_task}_dmp-v0'
kwargs_dict_cartpole_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_cartpole_dmp['name'] = f"dmc:cartpole-{_task}"
kwargs_dict_cartpole_dmp['wrappers'].append(suite.cartpole.MPWrapper)
# bandwidth_factor = 2
kwargs_dict_cartpole_dmp['phase_generator_kwargs']['alpha_phase'] = 2
# TODO: weight scale 50, but goal scale 0.1
kwargs_dict_cartpole_dmp['trajectory_generator_kwargs']['weight_scale'] = 500
kwargs_dict_cartpole_dmp['controller_kwargs']['p_gains'] = 10
kwargs_dict_cartpole_dmp['controller_kwargs']['d_gains'] = 10
register(
id=_env_id,
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_cartpole_dmp
)
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
_env_id = f'dmc_cartpole-{_task}_promp-v0'
kwargs_dict_cartpole_promp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_cartpole_promp['name'] = f"dmc:cartpole-{_task}"
kwargs_dict_cartpole_promp['wrappers'].append(suite.cartpole.MPWrapper)
kwargs_dict_cartpole_promp['controller_kwargs']['p_gains'] = 10
kwargs_dict_cartpole_promp['controller_kwargs']['d_gains'] = 10
kwargs_dict_cartpole_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2
register(
id=_env_id,
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_cartpole_promp
)
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
kwargs_dict_cartpole2poles_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_cartpole2poles_dmp['name'] = f"dmc:cartpole-two_poles"
kwargs_dict_cartpole2poles_dmp['wrappers'].append(suite.cartpole.TwoPolesMPWrapper)
# bandwidth_factor = 2
kwargs_dict_cartpole2poles_dmp['phase_generator_kwargs']['alpha_phase'] = 2
# TODO: weight scale 50, but goal scale 0.1
kwargs_dict_cartpole2poles_dmp['trajectory_generator_kwargs']['weight_scale'] = 500
kwargs_dict_cartpole2poles_dmp['controller_kwargs']['p_gains'] = 10
kwargs_dict_cartpole2poles_dmp['controller_kwargs']['d_gains'] = 10
_env_id = f'dmc_cartpole-two_poles_dmp-v0'
register(
id=_env_id,
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_cartpole2poles_dmp
)
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
kwargs_dict_cartpole2poles_promp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_cartpole2poles_promp['name'] = f"dmc:cartpole-two_poles"
kwargs_dict_cartpole2poles_promp['wrappers'].append(suite.cartpole.TwoPolesMPWrapper)
kwargs_dict_cartpole2poles_promp['controller_kwargs']['p_gains'] = 10
kwargs_dict_cartpole2poles_promp['controller_kwargs']['d_gains'] = 10
kwargs_dict_cartpole2poles_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2
_env_id = f'dmc_cartpole-two_poles_promp-v0'
register(
id=_env_id,
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_cartpole2poles_promp
)
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
kwargs_dict_cartpole3poles_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_cartpole3poles_dmp['name'] = f"dmc:cartpole-three_poles"
kwargs_dict_cartpole3poles_dmp['wrappers'].append(suite.cartpole.ThreePolesMPWrapper)
# bandwidth_factor = 2
kwargs_dict_cartpole3poles_dmp['phase_generator_kwargs']['alpha_phase'] = 2
# TODO: weight scale 50, but goal scale 0.1
kwargs_dict_cartpole3poles_dmp['trajectory_generator_kwargs']['weight_scale'] = 500
kwargs_dict_cartpole3poles_dmp['controller_kwargs']['p_gains'] = 10
kwargs_dict_cartpole3poles_dmp['controller_kwargs']['d_gains'] = 10
_env_id = f'dmc_cartpole-three_poles_dmp-v0'
register(
id=_env_id,
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_cartpole3poles_dmp
)
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
kwargs_dict_cartpole3poles_promp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_cartpole3poles_promp['name'] = f"dmc:cartpole-three_poles"
kwargs_dict_cartpole3poles_promp['wrappers'].append(suite.cartpole.ThreePolesMPWrapper)
kwargs_dict_cartpole3poles_promp['controller_kwargs']['p_gains'] = 10
kwargs_dict_cartpole3poles_promp['controller_kwargs']['d_gains'] = 10
kwargs_dict_cartpole3poles_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2
_env_id = f'dmc_cartpole-three_poles_promp-v0'
register(
id=_env_id,
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_cartpole3poles_promp
)
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
# DeepMind Manipulation
kwargs_dict_mani_reach_site_features_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_mani_reach_site_features_dmp['name'] = f"dmc:manipulation-reach_site_features"
kwargs_dict_mani_reach_site_features_dmp['wrappers'].append(manipulation.reach_site.MPWrapper)
kwargs_dict_mani_reach_site_features_dmp['phase_generator_kwargs']['alpha_phase'] = 2
# TODO: weight scale 50, but goal scale 0.1
kwargs_dict_mani_reach_site_features_dmp['trajectory_generator_kwargs']['weight_scale'] = 500
kwargs_dict_mani_reach_site_features_dmp['controller_kwargs']['controller_type'] = 'velocity'
register(
id=f'dmc_manipulation-reach_site_dmp-v0',
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_mani_reach_site_features_dmp
)
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_manipulation-reach_site_dmp-v0")
kwargs_dict_mani_reach_site_features_promp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_mani_reach_site_features_promp['name'] = f"dmc:manipulation-reach_site_features"
kwargs_dict_mani_reach_site_features_promp['wrappers'].append(manipulation.reach_site.MPWrapper)
kwargs_dict_mani_reach_site_features_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2
kwargs_dict_mani_reach_site_features_promp['controller_kwargs']['controller_type'] = 'velocity'
register(
id=f'dmc_manipulation-reach_site_promp-v0',
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_mani_reach_site_features_promp
)
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_manipulation-reach_site_promp-v0")

View File

@ -0,0 +1,186 @@
# Adopted from: https://github.com/denisyarats/dmc2gym/blob/master/dmc2gym/wrappers.py
# License: MIT
# Copyright (c) 2020 Denis Yarats
import collections
from collections.abc import MutableMapping
from typing import Any, Dict, Tuple, Optional, Union, Callable
import gym
import numpy as np
from dm_control import composer
from dm_control.rl import control
from dm_env import specs
from gym import spaces
from gym.core import ObsType
def _spec_to_box(spec):
def extract_min_max(s):
assert s.dtype == np.float64 or s.dtype == np.float32, \
f"Only float64 and float32 types are allowed, instead {s.dtype} was found"
dim = int(np.prod(s.shape))
if type(s) == specs.Array:
bound = np.inf * np.ones(dim, dtype=s.dtype)
return -bound, bound
elif type(s) == specs.BoundedArray:
zeros = np.zeros(dim, dtype=s.dtype)
return s.minimum + zeros, s.maximum + zeros
mins, maxs = [], []
for s in spec:
mn, mx = extract_min_max(s)
mins.append(mn)
maxs.append(mx)
low = np.concatenate(mins, axis=0)
high = np.concatenate(maxs, axis=0)
assert low.shape == high.shape
return spaces.Box(low, high, dtype=s.dtype)
def _flatten_obs(obs: MutableMapping):
"""
Flattens an observation of type MutableMapping, e.g. a dict to a 1D array.
Args:
obs: observation to flatten
Returns: 1D array of observation
"""
if not isinstance(obs, MutableMapping):
raise ValueError(f'Requires dict-like observations structure. {type(obs)} found.')
# Keep key order consistent for non OrderedDicts
keys = obs.keys() if isinstance(obs, collections.OrderedDict) else sorted(obs.keys())
obs_vals = [np.array([obs[key]]) if np.isscalar(obs[key]) else obs[key].ravel() for key in keys]
return np.concatenate(obs_vals)
class DMCWrapper(gym.Env):
def __init__(self,
env: Callable[[], Union[composer.Environment, control.Environment]],
):
# TODO: Currently this is required to be a function because dmc does not allow to copy composers environments
self._env = env()
# action and observation space
self._action_space = _spec_to_box([self._env.action_spec()])
self._observation_space = _spec_to_box(self._env.observation_spec().values())
self._window = None
self.id = 'dmc'
def __getattr__(self, item):
"""Propagate only non-existent properties to wrapped env."""
if item.startswith('_'):
raise AttributeError("attempted to get missing private attribute '{}'".format(item))
if item in self.__dict__:
return getattr(self, item)
return getattr(self._env, item)
def _get_obs(self, time_step):
obs = _flatten_obs(time_step.observation).astype(self.observation_space.dtype)
return obs
@property
def observation_space(self):
return self._observation_space
@property
def action_space(self):
return self._action_space
@property
def dt(self):
return self._env.control_timestep()
def seed(self, seed=None):
self._action_space.seed(seed)
self._observation_space.seed(seed)
def step(self, action) -> Tuple[np.ndarray, float, bool, Dict[str, Any]]:
assert self._action_space.contains(action)
extra = {'internal_state': self._env.physics.get_state().copy()}
time_step = self._env.step(action)
reward = time_step.reward or 0.
done = time_step.last()
obs = self._get_obs(time_step)
extra['discount'] = time_step.discount
return obs, reward, done, extra
def reset(self, *, seed: Optional[int] = None, return_info: bool = False,
options: Optional[dict] = None, ) -> Union[ObsType, Tuple[ObsType, dict]]:
time_step = self._env.reset()
obs = self._get_obs(time_step)
return obs
def render(self, mode='rgb_array', height=240, width=320, camera_id=-1, overlays=(), depth=False,
segmentation=False, scene_option=None, render_flag_overrides=None):
# assert mode == 'rgb_array', 'only support rgb_array mode, given %s' % mode
if mode == "rgb_array":
return self._env.physics.render(height=height, width=width, camera_id=camera_id, overlays=overlays,
depth=depth, segmentation=segmentation, scene_option=scene_option,
render_flag_overrides=render_flag_overrides)
# Render max available buffer size. Larger is only possible by altering the XML.
img = self._env.physics.render(height=self._env.physics.model.vis.global_.offheight,
width=self._env.physics.model.vis.global_.offwidth,
camera_id=camera_id, overlays=overlays, depth=depth, segmentation=segmentation,
scene_option=scene_option, render_flag_overrides=render_flag_overrides)
if depth:
img = np.dstack([img.astype(np.uint8)] * 3)
if mode == 'human':
try:
import cv2
if self._window is None:
self._window = cv2.namedWindow(self.id, cv2.WINDOW_AUTOSIZE)
cv2.imshow(self.id, img[..., ::-1]) # Image in BGR
cv2.waitKey(1)
except ImportError:
raise gym.error.DependencyNotInstalled("Rendering requires opencv. Run `pip install opencv-python`")
# PYGAME seems to destroy some global rendering configs from the physics render
# except ImportError:
# import pygame
# img_copy = img.copy().transpose((1, 0, 2))
# if self._window is None:
# pygame.init()
# pygame.display.init()
# self._window = pygame.display.set_mode(img_copy.shape[:2])
# self.clock = pygame.time.Clock()
#
# surf = pygame.surfarray.make_surface(img_copy)
# self._window.blit(surf, (0, 0))
# pygame.event.pump()
# self.clock.tick(30)
# pygame.display.flip()
def close(self):
super().close()
if self._window is not None:
try:
import cv2
cv2.destroyWindow(self.id)
except ImportError:
import pygame
pygame.display.quit()
pygame.quit()
@property
def reward_range(self) -> Tuple[float, float]:
reward_spec = self._env.reward_spec()
if isinstance(reward_spec, specs.BoundedArray):
return reward_spec.minimum, reward_spec.maximum
return -float('inf'), float('inf')
@property
def metadata(self):
return {'render.modes': ['human', 'rgb_array'],
'video.frames_per_second': round(1.0 / self._env.control_timestep())}

View File

@ -0,0 +1 @@
from . import reach_site

View File

@ -0,0 +1 @@
from .mp_wrapper import MPWrapper

View File

@ -0,0 +1,38 @@
from typing import Tuple, Union
import numpy as np
from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
@property
def context_mask(self) -> np.ndarray:
# Joint and target positions are randomized, velocities are always set to 0.
return np.hstack([
[True] * 3, # target position
[True] * 12, # sin/cos arm joint position
[True] * 6, # arm joint torques
[False] * 6, # arm joint velocities
[True] * 3, # sin/cos hand joint position
[False] * 3, # hand joint velocities
[True] * 3, # hand pinch site position
[True] * 9, # pinch site rmat
])
@property
def current_pos(self) -> Union[float, int, np.ndarray]:
return self.env.physics.named.data.qpos[:]
@property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.physics.named.data.qvel[:]
@property
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
@property
def dt(self) -> Union[float, int]:
return self.env.dt

View File

@ -0,0 +1 @@
from . import cartpole, ball_in_cup, reacher

View File

@ -0,0 +1 @@
from .mp_wrapper import MPWrapper

View File

@ -0,0 +1,34 @@
from typing import Tuple, Union
import numpy as np
from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
@property
def context_mask(self) -> np.ndarray:
# Besides the ball position, the environment is always set to 0.
return np.hstack([
[False] * 2, # cup position
[True] * 2, # ball position
[False] * 2, # cup velocity
[False] * 2, # ball velocity
])
@property
def current_pos(self) -> Union[float, int, np.ndarray]:
return np.hstack([self.env.physics.named.data.qpos['cup_x'], self.env.physics.named.data.qpos['cup_z']])
@property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return np.hstack([self.env.physics.named.data.qvel['cup_x'], self.env.physics.named.data.qvel['cup_z']])
@property
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
@property
def dt(self) -> Union[float, int]:
return self.env.dt

View File

@ -0,0 +1,3 @@
from .mp_wrapper import MPWrapper
from .mp_wrapper import ThreePolesMPWrapper
from .mp_wrapper import TwoPolesMPWrapper

View File

@ -0,0 +1,50 @@
from typing import Tuple, Union
import numpy as np
from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
def __init__(self, env, n_poles: int = 1):
self.n_poles = n_poles
super().__init__(env)
@property
def context_mask(self) -> np.ndarray:
# Besides the ball position, the environment is always set to 0.
return np.hstack([
[True], # slider position
[True] * 2 * self.n_poles, # sin/cos hinge angles
[True], # slider velocity
[True] * self.n_poles, # hinge velocities
])
@property
def current_pos(self) -> Union[float, int, np.ndarray]:
return self.env.physics.named.data.qpos["slider"]
@property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.physics.named.data.qvel["slider"]
@property
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
@property
def dt(self) -> Union[float, int]:
return self.env.dt
class TwoPolesMPWrapper(MPWrapper):
def __init__(self, env):
super().__init__(env, n_poles=2)
class ThreePolesMPWrapper(MPWrapper):
def __init__(self, env):
super().__init__(env, n_poles=3)

View File

@ -0,0 +1 @@
from .mp_wrapper import MPWrapper

View File

@ -0,0 +1,33 @@
from typing import Tuple, Union
import numpy as np
from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
@property
def context_mask(self) -> np.ndarray:
# Joint and target positions are randomized, velocities are always set to 0.
return np.hstack([
[True] * 2, # joint position
[True] * 2, # target position
[False] * 2, # joint velocity
])
@property
def current_pos(self) -> Union[float, int, np.ndarray]:
return self.env.physics.named.data.qpos[:]
@property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.physics.named.data.qvel[:]
@property
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
@property
def dt(self) -> Union[float, int]:
return self.env.dt

664
fancy_gym/envs/__init__.py Normal file
View File

@ -0,0 +1,664 @@
from copy import deepcopy
import numpy as np
from gym import register
from . import classic_control, mujoco
from .classic_control.hole_reacher.hole_reacher import HoleReacherEnv
from .classic_control.simple_reacher.simple_reacher import SimpleReacherEnv
from .classic_control.viapoint_reacher.viapoint_reacher import ViaPointReacherEnv
from .mujoco.ant_jump.ant_jump import MAX_EPISODE_STEPS_ANTJUMP
from .mujoco.beerpong.beerpong import MAX_EPISODE_STEPS_BEERPONG, FIXED_RELEASE_STEP
from .mujoco.half_cheetah_jump.half_cheetah_jump import MAX_EPISODE_STEPS_HALFCHEETAHJUMP
from .mujoco.hopper_jump.hopper_jump import MAX_EPISODE_STEPS_HOPPERJUMP
from .mujoco.hopper_jump.hopper_jump_on_box import MAX_EPISODE_STEPS_HOPPERJUMPONBOX
from .mujoco.hopper_throw.hopper_throw import MAX_EPISODE_STEPS_HOPPERTHROW
from .mujoco.hopper_throw.hopper_throw_in_basket import MAX_EPISODE_STEPS_HOPPERTHROWINBASKET
from .mujoco.reacher.reacher import ReacherEnv, MAX_EPISODE_STEPS_REACHER
from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
DEFAULT_BB_DICT_ProMP = {
"name": 'EnvName',
"wrappers": [],
"trajectory_generator_kwargs": {
'trajectory_generator_type': 'promp'
},
"phase_generator_kwargs": {
'phase_generator_type': 'linear'
},
"controller_kwargs": {
'controller_type': 'motor',
"p_gains": 1.0,
"d_gains": 0.1,
},
"basis_generator_kwargs": {
'basis_generator_type': 'zero_rbf',
'num_basis': 5,
'num_basis_zero_start': 1
}
}
DEFAULT_BB_DICT_DMP = {
"name": 'EnvName',
"wrappers": [],
"trajectory_generator_kwargs": {
'trajectory_generator_type': 'dmp'
},
"phase_generator_kwargs": {
'phase_generator_type': 'exp'
},
"controller_kwargs": {
'controller_type': 'motor',
"p_gains": 1.0,
"d_gains": 0.1,
},
"basis_generator_kwargs": {
'basis_generator_type': 'rbf',
'num_basis': 5
}
}
# Classic Control
## Simple Reacher
register(
id='SimpleReacher-v0',
entry_point='fancy_gym.envs.classic_control:SimpleReacherEnv',
max_episode_steps=200,
kwargs={
"n_links": 2,
}
)
register(
id='LongSimpleReacher-v0',
entry_point='fancy_gym.envs.classic_control:SimpleReacherEnv',
max_episode_steps=200,
kwargs={
"n_links": 5,
}
)
## Viapoint Reacher
register(
id='ViaPointReacher-v0',
entry_point='fancy_gym.envs.classic_control:ViaPointReacherEnv',
max_episode_steps=200,
kwargs={
"n_links": 5,
"allow_self_collision": False,
"collision_penalty": 1000
}
)
## Hole Reacher
register(
id='HoleReacher-v0',
entry_point='fancy_gym.envs.classic_control:HoleReacherEnv',
max_episode_steps=200,
kwargs={
"n_links": 5,
"random_start": True,
"allow_self_collision": False,
"allow_wall_collision": False,
"hole_width": None,
"hole_depth": 1,
"hole_x": None,
"collision_penalty": 100,
}
)
# Mujoco
## Mujoco Reacher
for _dims in [5, 7]:
register(
id=f'Reacher{_dims}d-v0',
entry_point='fancy_gym.envs.mujoco:ReacherEnv',
max_episode_steps=MAX_EPISODE_STEPS_REACHER,
kwargs={
"n_links": _dims,
}
)
register(
id=f'Reacher{_dims}dSparse-v0',
entry_point='fancy_gym.envs.mujoco:ReacherEnv',
max_episode_steps=200,
kwargs={
"sparse": True,
'reward_weight': 200,
"n_links": _dims,
}
)
register(
id='HopperJumpSparse-v0',
entry_point='fancy_gym.envs.mujoco:HopperJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
kwargs={
"sparse": True,
}
)
register(
id='HopperJump-v0',
entry_point='fancy_gym.envs.mujoco:HopperJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
kwargs={
"sparse": False,
"healthy_reward": 1.0,
"contact_weight": 0.0,
"height_weight": 3.0,
}
)
register(
id='AntJump-v0',
entry_point='fancy_gym.envs.mujoco:AntJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP,
)
register(
id='HalfCheetahJump-v0',
entry_point='fancy_gym.envs.mujoco:HalfCheetahJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP,
)
register(
id='HopperJumpOnBox-v0',
entry_point='fancy_gym.envs.mujoco:HopperJumpOnBoxEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX,
)
register(
id='HopperThrow-v0',
entry_point='fancy_gym.envs.mujoco:HopperThrowEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW,
)
register(
id='HopperThrowInBasket-v0',
entry_point='fancy_gym.envs.mujoco:HopperThrowInBasketEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET,
)
register(
id='Walker2DJump-v0',
entry_point='fancy_gym.envs.mujoco:Walker2dJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP,
)
register(
id='BeerPong-v0',
entry_point='fancy_gym.envs.mujoco:BeerPongEnv',
max_episode_steps=MAX_EPISODE_STEPS_BEERPONG,
)
# Here we use the same reward as in BeerPong-v0, but now consider after the release,
# only one time step, i.e. we simulate until the end of th episode
register(
id='BeerPongStepBased-v0',
entry_point='fancy_gym.envs.mujoco:BeerPongEnvStepBasedEpisodicReward',
max_episode_steps=FIXED_RELEASE_STEP,
)
# movement Primitive Environments
## Simple Reacher
_versions = ["SimpleReacher-v0", "LongSimpleReacher-v0"]
for _v in _versions:
_name = _v.split("-")
_env_id = f'{_name[0]}DMP-{_name[1]}'
kwargs_dict_simple_reacher_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_simple_reacher_dmp['wrappers'].append(classic_control.simple_reacher.MPWrapper)
kwargs_dict_simple_reacher_dmp['controller_kwargs']['p_gains'] = 0.6
kwargs_dict_simple_reacher_dmp['controller_kwargs']['d_gains'] = 0.075
kwargs_dict_simple_reacher_dmp['trajectory_generator_kwargs']['weight_scale'] = 50
kwargs_dict_simple_reacher_dmp['phase_generator_kwargs']['alpha_phase'] = 2
kwargs_dict_simple_reacher_dmp['name'] = f"{_v}"
register(
id=_env_id,
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_simple_reacher_dmp
)
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
_env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_simple_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
kwargs_dict_simple_reacher_promp['wrappers'].append(classic_control.simple_reacher.MPWrapper)
kwargs_dict_simple_reacher_promp['controller_kwargs']['p_gains'] = 0.6
kwargs_dict_simple_reacher_promp['controller_kwargs']['d_gains'] = 0.075
kwargs_dict_simple_reacher_promp['name'] = _v
register(
id=_env_id,
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_simple_reacher_promp
)
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
# Viapoint reacher
kwargs_dict_via_point_reacher_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_via_point_reacher_dmp['wrappers'].append(classic_control.viapoint_reacher.MPWrapper)
kwargs_dict_via_point_reacher_dmp['controller_kwargs']['controller_type'] = 'velocity'
kwargs_dict_via_point_reacher_dmp['trajectory_generator_kwargs']['weight_scale'] = 50
kwargs_dict_via_point_reacher_dmp['phase_generator_kwargs']['alpha_phase'] = 2
kwargs_dict_via_point_reacher_dmp['name'] = "ViaPointReacher-v0"
register(
id='ViaPointReacherDMP-v0',
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
# max_episode_steps=1,
kwargs=kwargs_dict_via_point_reacher_dmp
)
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("ViaPointReacherDMP-v0")
kwargs_dict_via_point_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
kwargs_dict_via_point_reacher_promp['wrappers'].append(classic_control.viapoint_reacher.MPWrapper)
kwargs_dict_via_point_reacher_promp['controller_kwargs']['controller_type'] = 'velocity'
kwargs_dict_via_point_reacher_promp['name'] = "ViaPointReacher-v0"
register(
id="ViaPointReacherProMP-v0",
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_via_point_reacher_promp
)
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ViaPointReacherProMP-v0")
## Hole Reacher
_versions = ["HoleReacher-v0"]
for _v in _versions:
_name = _v.split("-")
_env_id = f'{_name[0]}DMP-{_name[1]}'
kwargs_dict_hole_reacher_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_hole_reacher_dmp['wrappers'].append(classic_control.hole_reacher.MPWrapper)
kwargs_dict_hole_reacher_dmp['controller_kwargs']['controller_type'] = 'velocity'
# TODO: Before it was weight scale 50 and goal scale 0.1. We now only have weight scale and thus set it to 500. Check
kwargs_dict_hole_reacher_dmp['trajectory_generator_kwargs']['weight_scale'] = 500
kwargs_dict_hole_reacher_dmp['phase_generator_kwargs']['alpha_phase'] = 2.5
kwargs_dict_hole_reacher_dmp['name'] = _v
register(
id=_env_id,
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
# max_episode_steps=1,
kwargs=kwargs_dict_hole_reacher_dmp
)
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
_env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_hole_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
kwargs_dict_hole_reacher_promp['wrappers'].append(classic_control.hole_reacher.MPWrapper)
kwargs_dict_hole_reacher_promp['trajectory_generator_kwargs']['weight_scale'] = 2
kwargs_dict_hole_reacher_promp['controller_kwargs']['controller_type'] = 'velocity'
kwargs_dict_hole_reacher_promp['name'] = f"{_v}"
register(
id=_env_id,
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_hole_reacher_promp
)
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
## ReacherNd
_versions = ["Reacher5d-v0", "Reacher7d-v0", "Reacher5dSparse-v0", "Reacher7dSparse-v0"]
for _v in _versions:
_name = _v.split("-")
_env_id = f'{_name[0]}DMP-{_name[1]}'
kwargs_dict_reacher_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_reacher_dmp['wrappers'].append(mujoco.reacher.MPWrapper)
kwargs_dict_reacher_dmp['phase_generator_kwargs']['alpha_phase'] = 2
kwargs_dict_reacher_dmp['name'] = _v
register(
id=_env_id,
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
# max_episode_steps=1,
kwargs=kwargs_dict_reacher_dmp
)
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
_env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
kwargs_dict_reacher_promp['wrappers'].append(mujoco.reacher.MPWrapper)
kwargs_dict_reacher_promp['name'] = _v
register(
id=_env_id,
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_reacher_promp
)
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
########################################################################################################################
## Beerpong ProMP
_versions = ['BeerPong-v0']
for _v in _versions:
_name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_bp_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
kwargs_dict_bp_promp['wrappers'].append(mujoco.beerpong.MPWrapper)
kwargs_dict_bp_promp['phase_generator_kwargs']['learn_tau'] = True
kwargs_dict_bp_promp['controller_kwargs']['p_gains'] = np.array([1.5, 5, 2.55, 3, 2., 2, 1.25])
kwargs_dict_bp_promp['controller_kwargs']['d_gains'] = np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125])
kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis'] = 2
kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis_zero_start'] = 2
kwargs_dict_bp_promp['name'] = _v
register(
id=_env_id,
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_bp_promp
)
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
### BP with Fixed release
_versions = ["BeerPongStepBased-v0", 'BeerPong-v0']
for _v in _versions:
if _v != 'BeerPong-v0':
_name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}'
else:
_env_id = 'BeerPongFixedReleaseProMP-v0'
kwargs_dict_bp_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
kwargs_dict_bp_promp['wrappers'].append(mujoco.beerpong.MPWrapper)
kwargs_dict_bp_promp['phase_generator_kwargs']['tau'] = 0.62
kwargs_dict_bp_promp['controller_kwargs']['p_gains'] = np.array([1.5, 5, 2.55, 3, 2., 2, 1.25])
kwargs_dict_bp_promp['controller_kwargs']['d_gains'] = np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125])
kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis'] = 2
kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis_zero_start'] = 2
kwargs_dict_bp_promp['name'] = _v
register(
id=_env_id,
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_bp_promp
)
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
########################################################################################################################
## Table Tennis needs to be fixed according to Zhou's implementation
# TODO: Add later when finished
# ########################################################################################################################
#
# ## AntJump
# _versions = ['AntJump-v0']
# for _v in _versions:
# _name = _v.split("-")
# _env_id = f'{_name[0]}ProMP-{_name[1]}'
# kwargs_dict_ant_jump_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
# kwargs_dict_ant_jump_promp['wrappers'].append(mujoco.ant_jump.MPWrapper)
# kwargs_dict_ant_jump_promp['name'] = _v
# register(
# id=_env_id,
# entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
# kwargs=kwargs_dict_ant_jump_promp
# )
# ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
#
# ########################################################################################################################
#
# ## HalfCheetahJump
# _versions = ['HalfCheetahJump-v0']
# for _v in _versions:
# _name = _v.split("-")
# _env_id = f'{_name[0]}ProMP-{_name[1]}'
# kwargs_dict_halfcheetah_jump_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
# kwargs_dict_halfcheetah_jump_promp['wrappers'].append(mujoco.half_cheetah_jump.MPWrapper)
# kwargs_dict_halfcheetah_jump_promp['name'] = _v
# register(
# id=_env_id,
# entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
# kwargs=kwargs_dict_halfcheetah_jump_promp
# )
# ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
#
# ########################################################################################################################
## HopperJump
_versions = ['HopperJump-v0', 'HopperJumpSparse-v0',
# 'HopperJumpOnBox-v0', 'HopperThrow-v0', 'HopperThrowInBasket-v0'
]
# TODO: Check if all environments work with the same MPWrapper
for _v in _versions:
_name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_hopper_jump_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
kwargs_dict_hopper_jump_promp['wrappers'].append(mujoco.hopper_jump.MPWrapper)
kwargs_dict_hopper_jump_promp['name'] = _v
register(
id=_env_id,
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_hopper_jump_promp
)
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
# ########################################################################################################################
#
#
# ## Walker2DJump
# _versions = ['Walker2DJump-v0']
# for _v in _versions:
# _name = _v.split("-")
# _env_id = f'{_name[0]}ProMP-{_name[1]}'
# kwargs_dict_walker2d_jump_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
# kwargs_dict_walker2d_jump_promp['wrappers'].append(mujoco.walker_2d_jump.MPWrapper)
# kwargs_dict_walker2d_jump_promp['name'] = _v
# register(
# id=_env_id,
# entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
# kwargs=kwargs_dict_walker2d_jump_promp
# )
# ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
### Depricated, we will not provide non random starts anymore
"""
register(
id='SimpleReacher-v1',
entry_point='fancy_gym.envs.classic_control:SimpleReacherEnv',
max_episode_steps=200,
kwargs={
"n_links": 2,
"random_start": False
}
)
register(
id='LongSimpleReacher-v1',
entry_point='fancy_gym.envs.classic_control:SimpleReacherEnv',
max_episode_steps=200,
kwargs={
"n_links": 5,
"random_start": False
}
)
register(
id='HoleReacher-v1',
entry_point='fancy_gym.envs.classic_control:HoleReacherEnv',
max_episode_steps=200,
kwargs={
"n_links": 5,
"random_start": False,
"allow_self_collision": False,
"allow_wall_collision": False,
"hole_width": 0.25,
"hole_depth": 1,
"hole_x": None,
"collision_penalty": 100,
}
)
register(
id='HoleReacher-v2',
entry_point='fancy_gym.envs.classic_control:HoleReacherEnv',
max_episode_steps=200,
kwargs={
"n_links": 5,
"random_start": False,
"allow_self_collision": False,
"allow_wall_collision": False,
"hole_width": 0.25,
"hole_depth": 1,
"hole_x": 2,
"collision_penalty": 1,
}
)
# CtxtFree are v0, Contextual are v1
register(
id='AntJump-v0',
entry_point='fancy_gym.envs.mujoco:AntJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP,
kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_ANTJUMP,
"context": False
}
)
# CtxtFree are v0, Contextual are v1
register(
id='HalfCheetahJump-v0',
entry_point='fancy_gym.envs.mujoco:HalfCheetahJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP,
kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_HALFCHEETAHJUMP,
"context": False
}
)
register(
id='HopperJump-v0',
entry_point='fancy_gym.envs.mujoco:HopperJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP,
"context": False,
"healthy_reward": 1.0
}
)
"""
### Deprecated used for CorL paper
"""
_vs = np.arange(101).tolist() + [1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1]
for i in _vs:
_env_id = f'ALRReacher{i}-v0'
register(
id=_env_id,
entry_point='fancy_gym.envs.mujoco:ReacherEnv',
max_episode_steps=200,
kwargs={
"steps_before_reward": 0,
"n_links": 5,
"balance": False,
'_ctrl_cost_weight': i
}
)
_env_id = f'ALRReacherSparse{i}-v0'
register(
id=_env_id,
entry_point='fancy_gym.envs.mujoco:ReacherEnv',
max_episode_steps=200,
kwargs={
"steps_before_reward": 200,
"n_links": 5,
"balance": False,
'_ctrl_cost_weight': i
}
)
_vs = np.arange(101).tolist() + [1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1]
for i in _vs:
_env_id = f'ALRReacher{i}ProMP-v0'
register(
id=_env_id,
entry_point='fancy_gym.utils.make_env_helpers:make_promp_env_helper',
kwargs={
"name": f"{_env_id.replace('ProMP', '')}",
"wrappers": [mujoco.reacher.MPWrapper],
"mp_kwargs": {
"num_dof": 5,
"num_basis": 5,
"duration": 4,
"policy_type": "motor",
# "weights_scale": 5,
"n_zero_basis": 1,
"zero_start": True,
"policy_kwargs": {
"p_gains": 1,
"d_gains": 0.1
}
}
}
)
_env_id = f'ALRReacherSparse{i}ProMP-v0'
register(
id=_env_id,
entry_point='fancy_gym.utils.make_env_helpers:make_promp_env_helper',
kwargs={
"name": f"{_env_id.replace('ProMP', '')}",
"wrappers": [mujoco.reacher.MPWrapper],
"mp_kwargs": {
"num_dof": 5,
"num_basis": 5,
"duration": 4,
"policy_type": "motor",
# "weights_scale": 5,
"n_zero_basis": 1,
"zero_start": True,
"policy_kwargs": {
"p_gains": 1,
"d_gains": 0.1
}
}
}
)
register(
id='HopperJumpOnBox-v0',
entry_point='fancy_gym.envs.mujoco:HopperJumpOnBoxEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX,
kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMPONBOX,
"context": False
}
)
register(
id='HopperThrow-v0',
entry_point='fancy_gym.envs.mujoco:HopperThrowEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW,
kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROW,
"context": False
}
)
register(
id='HopperThrowInBasket-v0',
entry_point='fancy_gym.envs.mujoco:HopperThrowInBasketEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET,
kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROWINBASKET,
"context": False
}
)
register(
id='Walker2DJump-v0',
entry_point='fancy_gym.envs.mujoco:Walker2dJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP,
kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_WALKERJUMP,
"context": False
}
)
register(id='TableTennis2DCtxt-v1',
entry_point='fancy_gym.envs.mujoco:TTEnvGym',
max_episode_steps=MAX_EPISODE_STEPS,
kwargs={'ctxt_dim': 2, 'fixed_goal': True})
register(
id='BeerPong-v0',
entry_point='fancy_gym.envs.mujoco:BeerBongEnv',
max_episode_steps=300,
kwargs={
"rndm_goal": False,
"cup_goal_pos": [0.1, -2.0],
"frame_skip": 2
}
)
"""

View File

@ -0,0 +1,18 @@
### Classic Control
## Step-based Environments
|Name| Description|Horizon|Action Dimension|Observation Dimension
|---|---|---|---|---|
|`SimpleReacher-v0`| Simple reaching task (2 links) without any physics simulation. Provides no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions towards the end of the trajectory.| 200 | 2 | 9
|`LongSimpleReacher-v0`| Simple reaching task (5 links) without any physics simulation. Provides no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions towards the end of the trajectory.| 200 | 5 | 18
|`ViaPointReacher-v0`| Simple reaching task leveraging a via point, which supports self collision detection. Provides a reward only at 100 and 199 for reaching the viapoint and goal point, respectively.| 200 | 5 | 18
|`HoleReacher-v0`| 5 link reaching task where the end-effector needs to reach into a narrow hole without collding with itself or walls | 200 | 5 | 18
## MP Environments
|Name| Description|Horizon|Action Dimension|Context Dimension
|---|---|---|---|---|
|`ViaPointReacherDMP-v0`| A DMP provides a trajectory for the `ViaPointReacher-v0` task. | 200 | 25
|`HoleReacherFixedGoalDMP-v0`| A DMP provides a trajectory for the `HoleReacher-v0` task with a fixed goal attractor. | 200 | 25
|`HoleReacherDMP-v0`| A DMP provides a trajectory for the `HoleReacher-v0` task. The goal attractor needs to be learned. | 200 | 30
[//]: |`HoleReacherProMPP-v0`|

View File

@ -0,0 +1,3 @@
from .hole_reacher.hole_reacher import HoleReacherEnv
from .simple_reacher.simple_reacher import SimpleReacherEnv
from .viapoint_reacher.viapoint_reacher import ViaPointReacherEnv

View File

@ -0,0 +1,148 @@
from abc import ABC, abstractmethod
from typing import Union, Tuple, Optional
import gym
import numpy as np
from gym import spaces
from gym.core import ObsType
from gym.utils import seeding
from fancy_gym.envs.classic_control.utils import intersect
class BaseReacherEnv(gym.Env, ABC):
"""
Base class for all reaching environments.
"""
def __init__(self, n_links: int, random_start: bool = True, allow_self_collision: bool = False):
super().__init__()
self.link_lengths = np.ones(n_links)
self.n_links = n_links
self._dt = 0.01
self.random_start = random_start
self.allow_self_collision = allow_self_collision
# state
self._joints = None
self._joint_angles = None
self._angle_velocity = None
self._acc = None
self._start_pos = np.hstack([[np.pi / 2], np.zeros(self.n_links - 1)])
self._start_vel = np.zeros(self.n_links)
# joint limits
self.j_min = -np.pi * np.ones(n_links)
self.j_max = np.pi * np.ones(n_links)
self.steps_before_reward = 199
state_bound = np.hstack([
[np.pi] * self.n_links, # cos
[np.pi] * self.n_links, # sin
[np.inf] * self.n_links, # velocity
[np.inf] * 2, # x-y coordinates of target distance
[np.inf] # env steps, because reward start after n steps TODO: Maybe
])
self.observation_space = spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape)
self.reward_function = None # Needs to be set in sub class
# containers for plotting
self.metadata = {'render.modes': ["human"]}
self.fig = None
self._steps = 0
self.seed()
@property
def dt(self) -> Union[float, int]:
return self._dt
@property
def current_pos(self):
return self._joint_angles.copy()
@property
def current_vel(self):
return self._angle_velocity.copy()
def reset(self, *, seed: Optional[int] = None, return_info: bool = False,
options: Optional[dict] = None, ) -> Union[ObsType, Tuple[ObsType, dict]]:
# Sample only orientation of first link, i.e. the arm is always straight.
if self.random_start:
first_joint = self.np_random.uniform(np.pi / 4, 3 * np.pi / 4)
self._joint_angles = np.hstack([[first_joint], np.zeros(self.n_links - 1)])
self._start_pos = self._joint_angles.copy()
else:
self._joint_angles = self._start_pos
self._angle_velocity = self._start_vel
self._joints = np.zeros((self.n_links + 1, 2))
self._update_joints()
self._steps = 0
return self._get_obs().copy()
@abstractmethod
def step(self, action: np.ndarray):
"""
A single step with action in angular velocity space
"""
raise NotImplementedError
def _update_joints(self):
"""
update joints to get new end-effector position. The other links are only required for rendering.
Returns:
"""
angles = np.cumsum(self._joint_angles)
x = self.link_lengths * np.vstack([np.cos(angles), np.sin(angles)])
self._joints[1:] = self._joints[0] + np.cumsum(x.T, axis=0)
def _check_self_collision(self):
"""Checks whether line segments intersect"""
if self.allow_self_collision:
return False
if np.any(self._joint_angles > self.j_max) or np.any(self._joint_angles < self.j_min):
return True
link_lines = np.stack((self._joints[:-1, :], self._joints[1:, :]), axis=1)
for i, line1 in enumerate(link_lines):
for line2 in link_lines[i + 2:, :]:
if intersect(line1[0], line1[-1], line2[0], line2[-1]):
return True
return False
@abstractmethod
def _get_reward(self, action: np.ndarray) -> (float, dict):
pass
@abstractmethod
def _get_obs(self) -> np.ndarray:
pass
@abstractmethod
def _check_collisions(self) -> bool:
pass
@abstractmethod
def _terminate(self, info) -> bool:
return False
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def close(self):
del self.fig
@property
def end_effector(self):
return self._joints[self.n_links].T

View File

@ -0,0 +1,39 @@
from abc import ABC
import numpy as np
from gym import spaces
from fancy_gym.envs.classic_control.base_reacher.base_reacher import BaseReacherEnv
class BaseReacherDirectEnv(BaseReacherEnv, ABC):
"""
Base class for directly controlled reaching environments
"""
def __init__(self, n_links: int, random_start: bool = True,
allow_self_collision: bool = False):
super().__init__(n_links, random_start, allow_self_collision)
self.max_vel = 2 * np.pi
action_bound = np.ones((self.n_links,)) * self.max_vel
self.action_space = spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape)
def step(self, action: np.ndarray):
"""
A single step with action in angular velocity space
"""
self._acc = (action - self._angle_velocity) / self.dt
self._angle_velocity = action
self._joint_angles = self._joint_angles + self.dt * self._angle_velocity
self._update_joints()
self._is_collided = self._check_collisions()
reward, info = self._get_reward(action)
self._steps += 1
done = self._terminate(info)
return self._get_obs().copy(), reward, done, info

View File

@ -0,0 +1,38 @@
from abc import ABC
import numpy as np
from gym import spaces
from fancy_gym.envs.classic_control.base_reacher.base_reacher import BaseReacherEnv
class BaseReacherTorqueEnv(BaseReacherEnv, ABC):
"""
Base class for torque controlled reaching environments
"""
def __init__(self, n_links: int, random_start: bool = True,
allow_self_collision: bool = False):
super().__init__(n_links, random_start, allow_self_collision)
self.max_torque = 1000
action_bound = np.ones((self.n_links,)) * self.max_torque
self.action_space = spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape)
def step(self, action: np.ndarray):
"""
A single step with action in torque space
"""
self._angle_velocity = self._angle_velocity + self.dt * action
self._joint_angles = self._joint_angles + self.dt * self._angle_velocity
self._update_joints()
self._is_collided = self._check_collisions()
reward, info = self._get_reward(action)
self._steps += 1
done = False
return self._get_obs().copy(), reward, done, info

View File

@ -0,0 +1 @@
from .mp_wrapper import MPWrapper

View File

@ -0,0 +1,238 @@
from typing import Union, Optional, Tuple
import gym
import matplotlib.pyplot as plt
import numpy as np
from gym.core import ObsType
from matplotlib import patches
from fancy_gym.envs.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv
MAX_EPISODE_STEPS_HOLEREACHER = 200
class HoleReacherEnv(BaseReacherDirectEnv):
def __init__(self, n_links: int, hole_x: Union[None, float] = None, hole_depth: Union[None, float] = None,
hole_width: float = 1., random_start: bool = False, allow_self_collision: bool = False,
allow_wall_collision: bool = False, collision_penalty: float = 1000, rew_fct: str = "simple"):
super().__init__(n_links, random_start, allow_self_collision)
# provided initial parameters
self.initial_x = hole_x # x-position of center of hole
self.initial_width = hole_width # width of hole
self.initial_depth = hole_depth # depth of hole
# temp container for current env state
self._tmp_x = None
self._tmp_width = None
self._tmp_depth = None
self._goal = None # x-y coordinates for reaching the center at the bottom of the hole
# action_bound = np.pi * np.ones((self.n_links,))
state_bound = np.hstack([
[np.pi] * self.n_links, # cos
[np.pi] * self.n_links, # sin
[np.inf] * self.n_links, # velocity
[np.inf], # hole width
# [np.inf], # hole depth
[np.inf] * 2, # x-y coordinates of target distance
[np.inf] # env steps, because reward start after n steps TODO: Maybe
])
# self.action_space = gym.spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape)
self.observation_space = gym.spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape)
if rew_fct == "simple":
from fancy_gym.envs.classic_control.hole_reacher.hr_simple_reward import HolereacherReward
self.reward_function = HolereacherReward(allow_self_collision, allow_wall_collision, collision_penalty)
elif rew_fct == "vel_acc":
from fancy_gym.envs.classic_control.hole_reacher.hr_dist_vel_acc_reward import HolereacherReward
self.reward_function = HolereacherReward(allow_self_collision, allow_wall_collision, collision_penalty)
elif rew_fct == "unbounded":
from fancy_gym.envs.classic_control.hole_reacher.hr_unbounded_reward import HolereacherReward
self.reward_function = HolereacherReward(allow_self_collision, allow_wall_collision)
else:
raise ValueError("Unknown reward function {}".format(rew_fct))
def reset(self, *, seed: Optional[int] = None, return_info: bool = False,
options: Optional[dict] = None, ) -> Union[ObsType, Tuple[ObsType, dict]]:
self._generate_hole()
self._set_patches()
self.reward_function.reset()
return super().reset()
def _get_reward(self, action: np.ndarray) -> (float, dict):
return self.reward_function.get_reward(self)
def _terminate(self, info):
return info["is_collided"]
def _generate_hole(self):
if self.initial_width is None:
width = self.np_random.uniform(0.15, 0.5)
else:
width = np.copy(self.initial_width)
if self.initial_x is None:
# sample whole on left or right side
direction = self.np_random.choice([-1, 1])
# Hole center needs to be half the width away from the arm to give a valid setting.
x = direction * self.np_random.uniform(width / 2, 3.5)
else:
x = np.copy(self.initial_x)
if self.initial_depth is None:
# TODO we do not want this right now.
depth = self.np_random.uniform(1, 1)
else:
depth = np.copy(self.initial_depth)
self._tmp_width = width
self._tmp_x = x
self._tmp_depth = depth
self._goal = np.hstack([self._tmp_x, -self._tmp_depth])
self._line_ground_left = np.array([-self.n_links, 0, x - width / 2, 0])
self._line_ground_right = np.array([x + width / 2, 0, self.n_links, 0])
self._line_ground_hole = np.array([x - width / 2, -depth, x + width / 2, -depth])
self._line_hole_left = np.array([x - width / 2, -depth, x - width / 2, 0])
self._line_hole_right = np.array([x + width / 2, -depth, x + width / 2, 0])
self.ground_lines = np.stack((self._line_ground_left,
self._line_ground_right,
self._line_ground_hole,
self._line_hole_left,
self._line_hole_right))
def _get_obs(self):
theta = self._joint_angles
return np.hstack([
np.cos(theta),
np.sin(theta),
self._angle_velocity,
self._tmp_width,
# self._tmp_hole_depth,
self.end_effector - self._goal,
self._steps
]).astype(np.float32)
def _get_line_points(self, num_points_per_link=1):
theta = self._joint_angles[:, None]
intermediate_points = np.linspace(0, 1, num_points_per_link) if num_points_per_link > 1 else 1
accumulated_theta = np.cumsum(theta, axis=0)
end_effector = np.zeros(shape=(self.n_links, num_points_per_link, 2))
x = np.cos(accumulated_theta) * self.link_lengths[:, None] * intermediate_points
y = np.sin(accumulated_theta) * self.link_lengths[:, None] * intermediate_points
end_effector[0, :, 0] = x[0, :]
end_effector[0, :, 1] = y[0, :]
for i in range(1, self.n_links):
end_effector[i, :, 0] = x[i, :] + end_effector[i - 1, -1, 0]
end_effector[i, :, 1] = y[i, :] + end_effector[i - 1, -1, 1]
return np.squeeze(end_effector + self._joints[0, :])
def _check_collisions(self) -> bool:
return self._check_self_collision() or self.check_wall_collision()
def check_wall_collision(self):
line_points = self._get_line_points(num_points_per_link=100)
# all points that are before the hole in x
r, c = np.where(line_points[:, :, 0] < (self._tmp_x - self._tmp_width / 2))
# check if any of those points are below surface
nr_line_points_below_surface_before_hole = np.sum(line_points[r, c, 1] < 0)
if nr_line_points_below_surface_before_hole > 0:
return True
# all points that are after the hole in x
r, c = np.where(line_points[:, :, 0] > (self._tmp_x + self._tmp_width / 2))
# check if any of those points are below surface
nr_line_points_below_surface_after_hole = np.sum(line_points[r, c, 1] < 0)
if nr_line_points_below_surface_after_hole > 0:
return True
# all points that are above the hole
r, c = np.where((line_points[:, :, 0] > (self._tmp_x - self._tmp_width / 2)) & (
line_points[:, :, 0] < (self._tmp_x + self._tmp_width / 2)))
# check if any of those points are below surface
nr_line_points_below_surface_in_hole = np.sum(line_points[r, c, 1] < -self._tmp_depth)
if nr_line_points_below_surface_in_hole > 0:
return True
return False
def render(self, mode='human'):
if self.fig is None:
# Create base figure once on the beginning. Afterwards only update
plt.ion()
self.fig = plt.figure()
ax = self.fig.add_subplot(1, 1, 1)
# limits
lim = np.sum(self.link_lengths) + 0.5
ax.set_xlim([-lim, lim])
ax.set_ylim([-1.1, lim])
self.line, = ax.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k')
self._set_patches()
self.fig.show()
self.fig.gca().set_title(
f"Iteration: {self._steps}, distance: {np.linalg.norm(self.end_effector - self._goal) ** 2}")
if mode == "human":
# arm
self.line.set_data(self._joints[:, 0], self._joints[:, 1])
self.fig.canvas.draw()
self.fig.canvas.flush_events()
elif mode == "partial":
if self._steps % 20 == 0 or self._steps in [1, 199] or self._is_collided:
# Arm
plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k',
alpha=self._steps / 200)
def _set_patches(self):
if self.fig is not None:
# self.fig.gca().patches = []
left_block = patches.Rectangle((-self.n_links, -self._tmp_depth),
self.n_links + self._tmp_x - self._tmp_width / 2,
self._tmp_depth,
fill=True, edgecolor='k', facecolor='k')
right_block = patches.Rectangle((self._tmp_x + self._tmp_width / 2, -self._tmp_depth),
self.n_links - self._tmp_x + self._tmp_width / 2,
self._tmp_depth,
fill=True, edgecolor='k', facecolor='k')
hole_floor = patches.Rectangle((self._tmp_x - self._tmp_width / 2, -self._tmp_depth),
self._tmp_width,
1 - self._tmp_depth,
fill=True, edgecolor='k', facecolor='k')
# Add the patch to the Axes
self.fig.gca().add_patch(left_block)
self.fig.gca().add_patch(right_block)
self.fig.gca().add_patch(hole_floor)
if __name__ == "__main__":
env = HoleReacherEnv(5)
env.reset()
for i in range(10000):
ac = env.action_space.sample()
obs, rew, done, info = env.step(ac)
env.render()
if done:
env.reset()

View File

@ -0,0 +1,60 @@
import numpy as np
class HolereacherReward:
def __init__(self, allow_self_collision, allow_wall_collision, collision_penalty):
self.collision_penalty = collision_penalty
# collision
self.allow_self_collision = allow_self_collision
self.allow_wall_collision = allow_wall_collision
self.collision_penalty = collision_penalty
self._is_collided = False
self.reward_factors = np.array((-1, -1e-4, -1e-6, -collision_penalty, 0))
def reset(self):
self._is_collided = False
self.collision_dist = 0
def get_reward(self, env):
dist_cost = 0
collision_cost = 0
time_cost = 0
success = False
self_collision = False
wall_collision = False
if not self._is_collided:
if not self.allow_self_collision:
self_collision = env._check_self_collision()
if not self.allow_wall_collision:
wall_collision = env.check_wall_collision()
self._is_collided = self_collision or wall_collision
self.collision_dist = np.linalg.norm(env.end_effector - env._goal)
if env._steps == 199: # or self._is_collided:
# return reward only in last time step
# Episode also terminates when colliding, hence return reward
dist = np.linalg.norm(env.end_effector - env._goal)
success = dist < 0.005 and not self._is_collided
dist_cost = dist ** 2
collision_cost = self._is_collided * self.collision_dist ** 2
time_cost = 199 - env._steps
info = {"is_success": success,
"is_collided": self._is_collided,
"end_effector": np.copy(env.end_effector)}
vel_cost = np.sum(env._angle_velocity ** 2)
acc_cost = np.sum(env._acc ** 2)
reward_features = np.array((dist_cost, vel_cost, acc_cost, collision_cost, time_cost))
reward = np.dot(reward_features, self.reward_factors)
return reward, info

View File

@ -0,0 +1,53 @@
import numpy as np
class HolereacherReward:
def __init__(self, allow_self_collision, allow_wall_collision, collision_penalty):
self.collision_penalty = collision_penalty
# collision
self.allow_self_collision = allow_self_collision
self.allow_wall_collision = allow_wall_collision
self.collision_penalty = collision_penalty
self._is_collided = False
self.reward_factors = np.array((-1, -5e-8, -collision_penalty))
def reset(self):
self._is_collided = False
def get_reward(self, env):
dist_cost = 0
collision_cost = 0
success = False
self_collision = False
wall_collision = False
if not self.allow_self_collision:
self_collision = env._check_self_collision()
if not self.allow_wall_collision:
wall_collision = env.check_wall_collision()
self._is_collided = self_collision or wall_collision
if env._steps == 199 or self._is_collided:
# return reward only in last time step
# Episode also terminates when colliding, hence return reward
dist = np.linalg.norm(env.end_effector - env._goal)
dist_cost = dist ** 2
collision_cost = int(self._is_collided)
success = dist < 0.005 and not self._is_collided
info = {"is_success": success,
"is_collided": self._is_collided,
"end_effector": np.copy(env.end_effector)}
acc_cost = np.sum(env._acc ** 2)
reward_features = np.array((dist_cost, acc_cost, collision_cost))
reward = np.dot(reward_features, self.reward_factors)
return reward, info

View File

@ -0,0 +1,60 @@
import numpy as np
class HolereacherReward:
def __init__(self, allow_self_collision, allow_wall_collision):
# collision
self.allow_self_collision = allow_self_collision
self.allow_wall_collision = allow_wall_collision
self._is_collided = False
self.reward_factors = np.array((1, -5e-6))
def reset(self):
self._is_collided = False
def get_reward(self, env):
dist_reward = 0
success = False
self_collision = False
wall_collision = False
if not self.allow_self_collision:
self_collision = env._check_self_collision()
if not self.allow_wall_collision:
wall_collision = env.check_wall_collision()
self._is_collided = self_collision or wall_collision
if env._steps == 180 or self._is_collided:
self.end_eff_pos = np.copy(env.end_effector)
if env._steps == 199 or self._is_collided:
# return reward only in last time step
# Episode also terminates when colliding, hence return reward
dist = np.linalg.norm(self.end_eff_pos - env._goal)
if self._is_collided:
dist_reward = 0.25 * np.exp(- dist)
else:
if env.end_effector[1] > 0:
dist_reward = np.exp(- dist)
else:
dist_reward = 1 - self.end_eff_pos[1]
success = not self._is_collided
info = {"is_success": success,
"is_collided": self._is_collided,
"end_effector": np.copy(env.end_effector),
"joints": np.copy(env.current_pos)}
acc_cost = np.sum(env._acc ** 2)
reward_features = np.array((dist_reward, acc_cost))
reward = np.dot(reward_features, self.reward_factors)
return reward, info

View File

@ -0,0 +1,28 @@
from typing import Tuple, Union
import numpy as np
from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
@property
def context_mask(self):
return np.hstack([
[self.env.random_start] * self.env.n_links, # cos
[self.env.random_start] * self.env.n_links, # sin
[self.env.random_start] * self.env.n_links, # velocity
[self.env.initial_width is None], # hole width
# [self.env.hole_depth is None], # hole depth
[True] * 2, # x-y coordinates of target distance
[False] # env steps
])
@property
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.current_pos
@property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.current_vel

View File

@ -0,0 +1 @@
from .mp_wrapper import MPWrapper

View File

@ -0,0 +1,26 @@
from typing import Tuple, Union
import numpy as np
from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
@property
def context_mask(self):
return np.hstack([
[self.env.random_start] * self.env.n_links, # cos
[self.env.random_start] * self.env.n_links, # sin
[self.env.random_start] * self.env.n_links, # velocity
[True] * 2, # x-y coordinates of target distance
[False] # env steps
])
@property
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.current_pos
@property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.current_vel

View File

@ -0,0 +1,141 @@
from typing import Iterable, Union, Optional, Tuple
import matplotlib.pyplot as plt
import numpy as np
from gym import spaces
from gym.core import ObsType
from fancy_gym.envs.classic_control.base_reacher.base_reacher_torque import BaseReacherTorqueEnv
class SimpleReacherEnv(BaseReacherTorqueEnv):
"""
Simple Reaching Task without any physics simulation.
Returns no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions
towards the end of the trajectory.
"""
def __init__(self, n_links: int, target: Union[None, Iterable] = None, random_start: bool = True,
allow_self_collision: bool = False, ):
super().__init__(n_links, random_start, allow_self_collision)
# provided initial parameters
self.inital_target = target
# temp container for current env state
self._goal = None
self._start_pos = np.zeros(self.n_links)
self.steps_before_reward = 199
state_bound = np.hstack([
[np.pi] * self.n_links, # cos
[np.pi] * self.n_links, # sin
[np.inf] * self.n_links, # velocity
[np.inf] * 2, # x-y coordinates of target distance
[np.inf] # env steps, because reward start after n steps TODO: Maybe
])
self.observation_space = spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape)
# @property
# def start_pos(self):
# return self._start_pos
def reset(self, *, seed: Optional[int] = None, return_info: bool = False,
options: Optional[dict] = None, ) -> Union[ObsType, Tuple[ObsType, dict]]:
self._generate_goal()
return super().reset()
def _get_reward(self, action: np.ndarray):
diff = self.end_effector - self._goal
reward_dist = 0
if not self.allow_self_collision:
self._is_collided = self._check_self_collision()
if self._steps >= self.steps_before_reward:
reward_dist -= np.linalg.norm(diff)
# reward_dist = np.exp(-0.1 * diff ** 2).mean()
# reward_dist = - (diff ** 2).mean()
reward_ctrl = (action ** 2).sum()
reward = reward_dist - reward_ctrl
return reward, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl)
def _terminate(self, info):
return False
def _get_obs(self):
theta = self._joint_angles
return np.hstack([
np.cos(theta),
np.sin(theta),
self._angle_velocity,
self.end_effector - self._goal,
self._steps
]).astype(np.float32)
def _generate_goal(self):
if self.inital_target is None:
total_length = np.sum(self.link_lengths)
goal = np.array([total_length, total_length])
while np.linalg.norm(goal) >= total_length:
goal = self.np_random.uniform(low=-total_length, high=total_length, size=2)
else:
goal = np.copy(self.inital_target)
self._goal = goal
def _check_collisions(self) -> bool:
return self._check_self_collision()
def render(self, mode='human'): # pragma: no cover
if self.fig is None:
# Create base figure once on the beginning. Afterwards only update
plt.ion()
self.fig = plt.figure()
ax = self.fig.add_subplot(1, 1, 1)
# limits
lim = np.sum(self.link_lengths) + 0.5
ax.set_xlim([-lim, lim])
ax.set_ylim([-lim, lim])
self.line, = ax.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k')
goal_pos = self._goal.T
self.goal_point, = ax.plot(goal_pos[0], goal_pos[1], 'gx')
self.goal_dist, = ax.plot([self.end_effector[0], goal_pos[0]], [self.end_effector[1], goal_pos[1]], 'g--')
self.fig.show()
self.fig.gca().set_title(f"Iteration: {self._steps}, distance: {self.end_effector - self._goal}")
# goal
goal_pos = self._goal.T
if self._steps == 1:
self.goal_point.set_data(goal_pos[0], goal_pos[1])
# arm
self.line.set_data(self._joints[:, 0], self._joints[:, 1])
# distance between end effector and goal
self.goal_dist.set_data([self.end_effector[0], goal_pos[0]], [self.end_effector[1], goal_pos[1]])
self.fig.canvas.draw()
self.fig.canvas.flush_events()
if __name__ == "__main__":
env = SimpleReacherEnv(5)
env.reset()
for i in range(200):
ac = env.action_space.sample()
obs, rew, done, info = env.step(ac)
env.render()
if done:
break

View File

@ -0,0 +1,18 @@
def ccw(A, B, C):
return (C[1] - A[1]) * (B[0] - A[0]) - (B[1] - A[1]) * (C[0] - A[0]) > 1e-12
def intersect(A, B, C, D):
"""
Checks whether line segments AB and CD intersect
"""
return ccw(A, C, D) != ccw(B, C, D) and ccw(A, B, C) != ccw(A, B, D)
def check_self_collision(line_points):
"""Checks whether line segments intersect"""
for i, line1 in enumerate(line_points):
for line2 in line_points[i + 2:, :, :]:
if intersect(line1[0], line1[-1], line2[0], line2[-1]):
return True
return False

View File

@ -0,0 +1 @@
from .mp_wrapper import MPWrapper

View File

@ -0,0 +1,27 @@
from typing import Tuple, Union
import numpy as np
from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
@property
def context_mask(self):
return np.hstack([
[self.env.random_start] * self.env.n_links, # cos
[self.env.random_start] * self.env.n_links, # sin
[self.env.random_start] * self.env.n_links, # velocity
[self.env.initial_via_target is None] * 2, # x-y coordinates of via point distance
[True] * 2, # x-y coordinates of target distance
[False] # env steps
])
@property
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.current_pos
@property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.current_vel

View File

@ -0,0 +1,198 @@
from typing import Iterable, Union, Tuple, Optional
import gym
import matplotlib.pyplot as plt
import numpy as np
from gym.core import ObsType
from fancy_gym.envs.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv
class ViaPointReacherEnv(BaseReacherDirectEnv):
def __init__(self, n_links, random_start: bool = False, via_target: Union[None, Iterable] = None,
target: Union[None, Iterable] = None, allow_self_collision=False, collision_penalty=1000):
super().__init__(n_links, random_start, allow_self_collision)
# provided initial parameters
self.intitial_target = target # provided target value
self.initial_via_target = via_target # provided via point target value
# temp container for current env state
self._via_point = np.ones(2)
self._goal = np.array((n_links, 0))
# collision
self.collision_penalty = collision_penalty
state_bound = np.hstack([
[np.pi] * self.n_links, # cos
[np.pi] * self.n_links, # sin
[np.inf] * self.n_links, # velocity
[np.inf] * 2, # x-y coordinates of via point distance
[np.inf] * 2, # x-y coordinates of target distance
[np.inf] # env steps, because reward start after n steps
])
self.observation_space = gym.spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape)
# @property
# def start_pos(self):
# return self._start_pos
def reset(self, *, seed: Optional[int] = None, return_info: bool = False,
options: Optional[dict] = None, ) -> Union[ObsType, Tuple[ObsType, dict]]:
self._generate_goal()
return super().reset()
def _generate_goal(self):
# TODO: Maybe improve this later, this can yield quite a lot of invalid settings
total_length = np.sum(self.link_lengths)
# rejection sampled point in inner circle with 0.5*Radius
if self.initial_via_target is None:
via_target = np.array([total_length, total_length])
while np.linalg.norm(via_target) >= 0.5 * total_length:
via_target = self.np_random.uniform(low=-0.5 * total_length, high=0.5 * total_length, size=2)
else:
via_target = np.copy(self.initial_via_target)
# rejection sampled point in outer circle
if self.intitial_target is None:
goal = np.array([total_length, total_length])
while np.linalg.norm(goal) >= total_length or np.linalg.norm(goal) <= 0.5 * total_length:
goal = self.np_random.uniform(low=-total_length, high=total_length, size=2)
else:
goal = np.copy(self.intitial_target)
self._via_point = via_target
self._goal = goal
def _get_reward(self, acc):
success = False
reward = -np.inf
if not self.allow_self_collision:
self._is_collided = self._check_self_collision()
if not self._is_collided:
dist = np.inf
# return intermediate reward for via point
if self._steps == 100:
dist = np.linalg.norm(self.end_effector - self._via_point)
# return reward in last time step for goal
elif self._steps == 199:
dist = np.linalg.norm(self.end_effector - self._goal)
success = dist < 0.005
else:
# Episode terminates when colliding, hence return reward
dist = np.linalg.norm(self.end_effector - self._goal)
reward = -self.collision_penalty
reward -= dist ** 2
reward -= 5e-8 * np.sum(acc ** 2)
info = {"is_success": success,
"is_collided": self._is_collided,
"end_effector": np.copy(self.end_effector)}
return reward, info
def _terminate(self, info):
return info["is_collided"]
def _get_obs(self):
theta = self._joint_angles
return np.hstack([
np.cos(theta),
np.sin(theta),
self._angle_velocity,
self.end_effector - self._via_point,
self.end_effector - self._goal,
self._steps
]).astype(np.float32)
def _check_collisions(self) -> bool:
return self._check_self_collision()
def render(self, mode='human'):
goal_pos = self._goal.T
via_pos = self._via_point.T
if self.fig is None:
# Create base figure once on the beginning. Afterwards only update
plt.ion()
self.fig = plt.figure()
ax = self.fig.add_subplot(1, 1, 1)
# limits
lim = np.sum(self.link_lengths) + 0.5
ax.set_xlim([-lim, lim])
ax.set_ylim([-lim, lim])
self.line, = ax.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k')
self.goal_point_plot, = ax.plot(goal_pos[0], goal_pos[1], 'go')
self.via_point_plot, = ax.plot(via_pos[0], via_pos[1], 'gx')
self.fig.show()
self.fig.gca().set_title(f"Iteration: {self._steps}, distance: {self.end_effector - self._goal}")
if mode == "human":
# goal
if self._steps == 1:
self.goal_point_plot.set_data(goal_pos[0], goal_pos[1])
self.via_point_plot.set_data(via_pos[0], goal_pos[1])
# arm
self.line.set_data(self._joints[:, 0], self._joints[:, 1])
self.fig.canvas.draw()
self.fig.canvas.flush_events()
elif mode == "partial":
if self._steps == 1:
# fig, ax = plt.subplots()
# Add the patch to the Axes
[plt.gca().add_patch(rect) for rect in self.patches]
# plt.pause(0.01)
if self._steps % 20 == 0 or self._steps in [1, 199] or self._is_collided:
# Arm
plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k', alpha=self._steps / 200)
# ax.plot(line_points_in_taskspace[:, 0, 0],
# line_points_in_taskspace[:, 0, 1],
# line_points_in_taskspace[:, -1, 0],
# line_points_in_taskspace[:, -1, 1], marker='o', color='k', alpha=t / 200)
lim = np.sum(self.link_lengths) + 0.5
plt.xlim([-lim, lim])
plt.ylim([-1.1, lim])
plt.pause(0.01)
elif mode == "final":
if self._steps == 199 or self._is_collided:
# fig, ax = plt.subplots()
# Add the patch to the Axes
[plt.gca().add_patch(rect) for rect in self.patches]
plt.xlim(-self.n_links, self.n_links), plt.ylim(-1, self.n_links)
# Arm
plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k')
plt.pause(0.01)
if __name__ == "__main__":
env = ViaPointReacherEnv(5)
env.reset()
for i in range(10000):
ac = env.action_space.sample()
obs, rew, done, info = env.step(ac)
env.render()
if done:
env.reset()

View File

@ -0,0 +1,15 @@
# Custom Mujoco tasks
## Step-based Environments
|Name| Description|Horizon|Action Dimension|Observation Dimension
|---|---|---|---|---|
|`ALRReacher-v0`|Modified (5 links) Mujoco gym's `Reacher-v2` (2 links)| 200 | 5 | 21
|`ALRReacherSparse-v0`|Same as `ALRReacher-v0`, but the distance penalty is only provided in the last time step.| 200 | 5 | 21
|`ALRReacherSparseBalanced-v0`|Same as `ALRReacherSparse-v0`, but the end-effector has to remain upright.| 200 | 5 | 21
|`ALRLongReacher-v0`|Modified (7 links) Mujoco gym's `Reacher-v2` (2 links)| 200 | 7 | 27
|`ALRLongReacherSparse-v0`|Same as `ALRLongReacher-v0`, but the distance penalty is only provided in the last time step.| 200 | 7 | 27
|`ALRLongReacherSparseBalanced-v0`|Same as `ALRLongReacherSparse-v0`, but the end-effector has to remain upright.| 200 | 7 | 27
|`ALRBallInACupSimple-v0`| Ball-in-a-cup task where a robot needs to catch a ball attached to a cup at its end-effector. | 4000 | 3 | wip
|`ALRBallInACup-v0`| Ball-in-a-cup task where a robot needs to catch a ball attached to a cup at its end-effector | 4000 | 7 | wip
|`ALRBallInACupGoal-v0`| Similar to `ALRBallInACupSimple-v0` but the ball needs to be caught at a specified goal position | 4000 | 7 | wip

View File

@ -0,0 +1,9 @@
from .ant_jump.ant_jump import AntJumpEnv
from .beerpong.beerpong import BeerPongEnv, BeerPongEnvStepBasedEpisodicReward
from .half_cheetah_jump.half_cheetah_jump import HalfCheetahJumpEnv
from .hopper_jump.hopper_jump import HopperJumpEnv
from .hopper_jump.hopper_jump_on_box import HopperJumpOnBoxEnv
from .hopper_throw.hopper_throw import HopperThrowEnv
from .hopper_throw.hopper_throw_in_basket import HopperThrowInBasketEnv
from .reacher.reacher import ReacherEnv
from .walker_2d_jump.walker_2d_jump import Walker2dJumpEnv

View File

@ -0,0 +1 @@
from .mp_wrapper import MPWrapper

View File

@ -0,0 +1,109 @@
from typing import Tuple, Union, Optional
import numpy as np
from gym.core import ObsType
from gym.envs.mujoco.ant_v4 import AntEnv
MAX_EPISODE_STEPS_ANTJUMP = 200
# TODO: This environment was not tested yet. Do the following todos and test it.
# TODO: Right now this environment only considers jumping to a specific height, which is not nice. It should be extended
# to the same structure as the Hopper, where the angles are randomized (->contexts) and the agent should jump as heigh
# as possible, while landing at a specific target position
class AntJumpEnv(AntEnv):
"""
Initialization changes to normal Ant:
- healthy_reward: 1.0 -> 0.01 -> 0.0 no healthy reward needed - Paul and Marc
- _ctrl_cost_weight 0.5 -> 0.0
- contact_cost_weight: 5e-4 -> 0.0
- healthy_z_range: (0.2, 1.0) -> (0.3, float('inf')) !!!!! Does that make sense, limiting height?
"""
def __init__(self,
xml_file='ant.xml',
ctrl_cost_weight=0.0,
contact_cost_weight=0.0,
healthy_reward=0.0,
terminate_when_unhealthy=True,
healthy_z_range=(0.3, float('inf')),
contact_force_range=(-1.0, 1.0),
reset_noise_scale=0.1,
exclude_current_positions_from_observation=True,
):
self.current_step = 0
self.max_height = 0
self.goal = 0
super().__init__(xml_file=xml_file,
ctrl_cost_weight=ctrl_cost_weight,
contact_cost_weight=contact_cost_weight,
healthy_reward=healthy_reward,
terminate_when_unhealthy=terminate_when_unhealthy,
healthy_z_range=healthy_z_range,
contact_force_range=contact_force_range,
reset_noise_scale=reset_noise_scale,
exclude_current_positions_from_observation=exclude_current_positions_from_observation)
def step(self, action):
self.current_step += 1
self.do_simulation(action, self.frame_skip)
height = self.get_body_com("torso")[2].copy()
self.max_height = max(height, self.max_height)
rewards = 0
ctrl_cost = self.control_cost(action)
contact_cost = self.contact_cost
costs = ctrl_cost + contact_cost
done = bool(height < 0.3) # fall over -> is the 0.3 value from healthy_z_range? TODO change 0.3 to the value of healthy z angle
if self.current_step == MAX_EPISODE_STEPS_ANTJUMP or done:
# -10 for scaling the value of the distance between the max_height and the goal height; only used when context is enabled
# height_reward = -10 * (np.linalg.norm(self.max_height - self.goal))
height_reward = -10 * np.linalg.norm(self.max_height - self.goal)
# no healthy reward when using context, because we optimize a negative value
healthy_reward = 0
rewards = height_reward + healthy_reward
obs = self._get_obs()
reward = rewards - costs
info = {
'height': height,
'max_height': self.max_height,
'goal': self.goal
}
return obs, reward, done, info
def _get_obs(self):
return np.append(super()._get_obs(), self.goal)
def reset(self, *, seed: Optional[int] = None, return_info: bool = False,
options: Optional[dict] = None, ) -> Union[ObsType, Tuple[ObsType, dict]]:
self.current_step = 0
self.max_height = 0
# goal heights from 1.0 to 2.5; can be increased, but didnt work well with CMORE
self.goal = self.np_random.uniform(1.0, 2.5, 1)
return super().reset()
# reset_model had to be implemented in every env to make it deterministic
def reset_model(self):
# Todo remove if not needed
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation

View File

@ -0,0 +1,81 @@
<mujoco model="ant">
<compiler angle="degree" coordinate="local" inertiafromgeom="true"/>
<option integrator="RK4" timestep="0.01"/>
<custom>
<numeric data="0.0 0.0 0.55 1.0 0.0 0.0 0.0 0.0 1.0 0.0 -1.0 0.0 -1.0 0.0 1.0" name="init_qpos"/>
</custom>
<default>
<joint armature="1" damping="1" limited="true"/>
<geom conaffinity="0" condim="3" density="5.0" friction="1 0.5 0.5" margin="0.01" rgba="0.8 0.6 0.4 1"/>
</default>
<asset>
<texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
<body name="torso" pos="0 0 0.75">
<camera name="track" mode="trackcom" pos="0 -3 0.3" xyaxes="1 0 0 0 0 1"/>
<geom name="torso_geom" pos="0 0 0" size="0.25" type="sphere"/>
<joint armature="0" damping="0" limited="false" margin="0.01" name="root" pos="0 0 0" type="free"/>
<body name="front_left_leg" pos="0 0 0">
<geom fromto="0.0 0.0 0.0 0.2 0.2 0.0" name="aux_1_geom" size="0.08" type="capsule"/>
<body name="aux_1" pos="0.2 0.2 0">
<joint axis="0 0 1" name="hip_1" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 0.2 0.2 0.0" name="left_leg_geom" size="0.08" type="capsule"/>
<body pos="0.2 0.2 0">
<joint axis="-1 1 0" name="ankle_1" pos="0.0 0.0 0.0" range="30 70" type="hinge"/>
<geom fromto="0.0 0.0 0.0 0.4 0.4 0.0" name="left_ankle_geom" size="0.08" type="capsule"/>
</body>
</body>
</body>
<body name="front_right_leg" pos="0 0 0">
<geom fromto="0.0 0.0 0.0 -0.2 0.2 0.0" name="aux_2_geom" size="0.08" type="capsule"/>
<body name="aux_2" pos="-0.2 0.2 0">
<joint axis="0 0 1" name="hip_2" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 -0.2 0.2 0.0" name="right_leg_geom" size="0.08" type="capsule"/>
<body pos="-0.2 0.2 0">
<joint axis="1 1 0" name="ankle_2" pos="0.0 0.0 0.0" range="-70 -30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 -0.4 0.4 0.0" name="right_ankle_geom" size="0.08" type="capsule"/>
</body>
</body>
</body>
<body name="back_leg" pos="0 0 0">
<geom fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" name="aux_3_geom" size="0.08" type="capsule"/>
<body name="aux_3" pos="-0.2 -0.2 0">
<joint axis="0 0 1" name="hip_3" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" name="back_leg_geom" size="0.08" type="capsule"/>
<body pos="-0.2 -0.2 0">
<joint axis="-1 1 0" name="ankle_3" pos="0.0 0.0 0.0" range="-70 -30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 -0.4 -0.4 0.0" name="third_ankle_geom" size="0.08" type="capsule"/>
</body>
</body>
</body>
<body name="right_back_leg" pos="0 0 0">
<geom fromto="0.0 0.0 0.0 0.2 -0.2 0.0" name="aux_4_geom" size="0.08" type="capsule"/>
<body name="aux_4" pos="0.2 -0.2 0">
<joint axis="0 0 1" name="hip_4" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 0.2 -0.2 0.0" name="rightback_leg_geom" size="0.08" type="capsule"/>
<body pos="0.2 -0.2 0">
<joint axis="1 1 0" name="ankle_4" pos="0.0 0.0 0.0" range="30 70" type="hinge"/>
<geom fromto="0.0 0.0 0.0 0.4 -0.4 0.0" name="fourth_ankle_geom" size="0.08" type="capsule"/>
</body>
</body>
</body>
</body>
</worldbody>
<actuator>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_4" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_4" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_1" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_1" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_2" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_2" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_3" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_3" gear="150"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,23 @@
from typing import Union, Tuple
import numpy as np
from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
@property
def context_mask(self):
return np.hstack([
[False] * 111, # ant has 111 dimensional observation space !!
[True] # goal height
])
@property
def current_pos(self) -> Union[float, int, np.ndarray]:
return self.data.qpos[7:15].copy()
@property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.data.qvel[6:14].copy()

View File

@ -0,0 +1 @@
from .mp_wrapper import MPWrapper

View File

@ -0,0 +1,238 @@
<mujoco model="wam(v1.31)">
<compiler angle="radian" meshdir="../../meshes/wam/" />
<option timestep="0.0005" integrator="Euler" />
<size njmax="500" nconmax="100" />
<default class="main">
<joint limited="true" frictionloss="0.001" damping="0.07"/>
<default class="viz">
<geom type="mesh" contype="0" conaffinity="0" group="1" rgba="0.7 0.7 0.7 1" />
</default>
<default class="col">
<geom type="mesh" contype="0" rgba="0.5 0.6 0.7 1" />
</default>
</default>
<asset>
<texture type="2d" name="groundplane" builtin="checker" mark="edge" rgb1="0.25 0.26 0.25" rgb2="0.22 0.22 0.22" markrgb="0.3 0.3 0.3" width="100" height="100" />
<material name="MatGnd" texture="groundplane" texrepeat="5 5" specular="1" shininess="0.3" reflectance="1e-05" />
<mesh name="base_link_fine" file="base_link_fine.stl" />
<mesh name="base_link_convex" file="base_link_convex.stl" />
<mesh name="shoulder_link_fine" file="shoulder_link_fine.stl" />
<mesh name="shoulder_link_convex_decomposition_p1" file="shoulder_link_convex_decomposition_p1.stl" />
<mesh name="shoulder_link_convex_decomposition_p2" file="shoulder_link_convex_decomposition_p2.stl" />
<mesh name="shoulder_link_convex_decomposition_p3" file="shoulder_link_convex_decomposition_p3.stl" />
<mesh name="shoulder_pitch_link_fine" file="shoulder_pitch_link_fine.stl" />
<mesh name="shoulder_pitch_link_convex" file="shoulder_pitch_link_convex.stl" />
<mesh name="upper_arm_link_fine" file="upper_arm_link_fine.stl" />
<mesh name="upper_arm_link_convex_decomposition_p1" file="upper_arm_link_convex_decomposition_p1.stl" />
<mesh name="upper_arm_link_convex_decomposition_p2" file="upper_arm_link_convex_decomposition_p2.stl" />
<mesh name="elbow_link_fine" file="elbow_link_fine.stl" />
<mesh name="elbow_link_convex" file="elbow_link_convex.stl" />
<mesh name="forearm_link_fine" file="forearm_link_fine.stl" />
<mesh name="forearm_link_convex_decomposition_p1" file="forearm_link_convex_decomposition_p1.stl" />
<mesh name="forearm_link_convex_decomposition_p2" file="forearm_link_convex_decomposition_p2.stl" />
<mesh name="wrist_yaw_link_fine" file="wrist_yaw_link_fine.stl" />
<mesh name="wrist_yaw_link_convex_decomposition_p1" file="wrist_yaw_link_convex_decomposition_p1.stl" />
<mesh name="wrist_yaw_link_convex_decomposition_p2" file="wrist_yaw_link_convex_decomposition_p2.stl" />
<mesh name="wrist_pitch_link_fine" file="wrist_pitch_link_fine.stl" />
<mesh name="wrist_pitch_link_convex_decomposition_p1" file="wrist_pitch_link_convex_decomposition_p1.stl" />
<mesh name="wrist_pitch_link_convex_decomposition_p2" file="wrist_pitch_link_convex_decomposition_p2.stl" />
<mesh name="wrist_pitch_link_convex_decomposition_p3" file="wrist_pitch_link_convex_decomposition_p3.stl" />
<mesh name="wrist_palm_link_fine" file="wrist_palm_link_fine.stl" />
<mesh name="wrist_palm_link_convex" file="wrist_palm_link_convex.stl" />
<mesh name="cup1" file="cup_split1.stl" scale="0.001 0.001 0.001" />
<mesh name="cup2" file="cup_split2.stl" scale="0.001 0.001 0.001" />
<mesh name="cup3" file="cup_split3.stl" scale="0.001 0.001 0.001" />
<mesh name="cup4" file="cup_split4.stl" scale="0.001 0.001 0.001" />
<mesh name="cup5" file="cup_split5.stl" scale="0.001 0.001 0.001" />
<mesh name="cup6" file="cup_split6.stl" scale="0.001 0.001 0.001" />
<mesh name="cup7" file="cup_split7.stl" scale="0.001 0.001 0.001" />
<mesh name="cup8" file="cup_split8.stl" scale="0.001 0.001 0.001" />
<mesh name="cup9" file="cup_split9.stl" scale="0.001 0.001 0.001" />
<mesh name="cup10" file="cup_split10.stl" scale="0.001 0.001 0.001" />
<mesh name="cup11" file="cup_split11.stl" scale="0.001 0.001 0.001" />
<mesh name="cup12" file="cup_split12.stl" scale="0.001 0.001 0.001" />
<mesh name="cup13" file="cup_split13.stl" scale="0.001 0.001 0.001" />
<mesh name="cup14" file="cup_split14.stl" scale="0.001 0.001 0.001" />
<mesh name="cup15" file="cup_split15.stl" scale="0.001 0.001 0.001" />
<mesh name="cup16" file="cup_split16.stl" scale="0.001 0.001 0.001" />
<mesh name="cup17" file="cup_split17.stl" scale="0.001 0.001 0.001" />
<mesh name="cup18" file="cup_split18.stl" scale="0.001 0.001 0.001" />
<mesh name="cup3_table" file="cup_split3.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup4_table" file="cup_split4.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup5_table" file="cup_split5.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup6_table" file="cup_split6.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup7_table" file="cup_split7.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup8_table" file="cup_split8.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup9_table" file="cup_split9.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup10_table" file="cup_split10.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup15_table" file="cup_split15.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup16_table" file="cup_split16.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup17_table" file="cup_split17.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup18_table" file="cup_split18.stl" scale="0.00211 0.00211 0.01" />
</asset>
<worldbody>
<geom name="ground" size="5 5 1" type="plane" material="MatGnd" />
<light pos="0.1 0.2 1.3" dir="-0.0758098 -0.32162 -0.985527" directional="true" cutoff="60" exponent="1" diffuse="1 1 1" specular="0.1 0.1 0.1" />
<body name="wam/base_link" pos="0 0 0.6">
<inertial pos="6.93764e-06 0.0542887 0.076438" quat="0.496481 0.503509 -0.503703 0.496255" mass="27.5544" diaginertia="0.432537 0.318732 0.219528" />
<geom class="viz" quat="0.707107 0 0 -0.707107" mesh="base_link_fine" />
<geom class="col" quat="0.707107 0 0 -0.707107" mesh="base_link_convex" name="base_link_convex_geom"/>
<body name="wam/shoulder_yaw_link" pos="0 0 0.16" quat="0.707107 0 0 -0.707107">
<inertial pos="-0.00443422 -0.00066489 -0.12189" quat="0.999995 0.000984795 0.00270132 0.00136071" mass="10.7677" diaginertia="0.507411 0.462983 0.113271" />
<joint name="wam/base_yaw_joint" pos="0 0 0" axis="0 0 1" range="-2.6 2.6" />
<geom class="viz" pos="0 0 0.186" mesh="shoulder_link_fine" />
<geom class="col" pos="0 0 0.186" mesh="shoulder_link_convex_decomposition_p1" name="shoulder_link_convex_decomposition_p1_geom"/>
<geom class="col" pos="0 0 0.186" mesh="shoulder_link_convex_decomposition_p2" name="shoulder_link_convex_decomposition_p2_geom"/>
<geom class="col" pos="0 0 0.186" mesh="shoulder_link_convex_decomposition_p3" name="shoulder_link_convex_decomposition_p3_geom"/>
<body name="wam/shoulder_pitch_link" pos="0 0 0.184" quat="0.707107 -0.707107 0 0">
<inertial pos="-0.00236983 -0.0154211 0.0310561" quat="0.961781 -0.272983 0.0167269 0.0133385" mass="3.87494" diaginertia="0.0214207 0.0167101 0.0126465" />
<joint name="wam/shoulder_pitch_joint" pos="0 0 0" axis="0 0 1" range="-1.985 1.985" />
<geom class="viz" mesh="shoulder_pitch_link_fine" />
<geom class="col" mesh="shoulder_pitch_link_convex" />
<body name="wam/upper_arm_link" pos="0 -0.505 0" quat="0.707107 0.707107 0 0">
<inertial pos="-0.0382586 3.309e-05 -0.207508" quat="0.705455 0.0381914 0.0383402 0.706686" mass="1.80228" diaginertia="0.0665697 0.0634285 0.00622701" />
<joint name="wam/shoulder_yaw_joint" pos="0 0 0" axis="0 0 1" range="-2.8 2.8" />
<geom class="viz" pos="0 0 -0.505" mesh="upper_arm_link_fine" />
<geom class="col" pos="0 0 -0.505" mesh="upper_arm_link_convex_decomposition_p1" name="upper_arm_link_convex_decomposition_p1_geom"/>
<geom class="col" pos="0 0 -0.505" mesh="upper_arm_link_convex_decomposition_p2" name="upper_arm_link_convex_decomposition_p2_geom"/>
<body name="wam/forearm_link" pos="0.045 0 0.045" quat="0.707107 -0.707107 0 0">
<inertial pos="0.00498512 -0.132717 -0.00022942" quat="0.546303 0.447151 -0.548676 0.447842" mass="2.40017" diaginertia="0.0196896 0.0152225 0.00749914" />
<joint name="wam/elbow_pitch_joint" pos="0 0 0" axis="0 0 1" range="-0.9 3.14159" />
<geom class="viz" mesh="elbow_link_fine" />
<geom class="col" mesh="elbow_link_convex" />
<geom class="viz" pos="-0.045 -0.073 0" quat="0.707388 0.706825 0 0" mesh="forearm_link_fine" />
<geom class="col" pos="-0.045 -0.073 0" quat="0.707388 0.706825 0 0" mesh="forearm_link_convex_decomposition_p1" name="forearm_link_convex_decomposition_p1_geom" />
<geom class="col" pos="-0.045 -0.073 0" quat="0.707388 0.706825 0 0" mesh="forearm_link_convex_decomposition_p2" name="forearm_link_convex_decomposition_p2_geom" />
<body name="wam/wrist_yaw_link" pos="-0.045 0 0" quat="0.707107 0.707107 0 0">
<inertial pos="8.921e-05 0.00435824 -0.00511217" quat="0.708528 -0.000120667 0.000107481 0.705683" mass="0.12376" diaginertia="0.0112011 0.0111887 7.58188e-05" />
<joint name="wam/wrist_yaw_joint" pos="0 0 0" axis="0 0 1" range="-4.55 1.25" />
<geom class="viz" pos="0 0 0.3" mesh="wrist_yaw_link_fine" />
<geom class="col" pos="0 0 0.3" mesh="wrist_yaw_link_convex_decomposition_p1" name="wrist_yaw_link_convex_decomposition_p1_geom" />
<geom class="col" pos="0 0 0.3" mesh="wrist_yaw_link_convex_decomposition_p2" name="wrist_yaw_link_convex_decomposition_p2_geom" />
<body name="wam/wrist_pitch_link" pos="0 0 0.3" quat="0.707107 -0.707107 0 0">
<inertial pos="-0.00012262 -0.0246834 -0.0170319" quat="0.994687 -0.102891 0.000824211 -0.00336105" mass="0.417974" diaginertia="0.000555166 0.000463174 0.00023407" />
<joint name="wam/wrist_pitch_joint" pos="0 0 0" axis="0 0 1" range="-1.5707 1.5707" />
<geom class="viz" mesh="wrist_pitch_link_fine" />
<geom class="col" mesh="wrist_pitch_link_convex_decomposition_p1" name="wrist_pitch_link_convex_decomposition_p1_geom" />
<geom class="col" mesh="wrist_pitch_link_convex_decomposition_p2" name="wrist_pitch_link_convex_decomposition_p2_geom" />
<geom class="col" mesh="wrist_pitch_link_convex_decomposition_p3" name="wrist_pitch_link_convex_decomposition_p3_geom" />
<body name="wam/wrist_palm_link" pos="0 -0.06 0" quat="0.707107 0.707107 0 0">
<inertial pos="-7.974e-05 -0.00323552 -0.00016313" quat="0.594752 0.382453 0.382453 0.594752" mass="0.0686475" diaginertia="7.408e-05 3.81466e-05 3.76434e-05" />
<joint name="wam/palm_yaw_joint" pos="0 0 0" axis="0 0 1" range="-2.7 2.7" />
<geom class="viz" pos="0 0 -0.06" mesh="wrist_palm_link_fine" />
<geom class="col" pos="0 0 -0.06" mesh="wrist_palm_link_convex" name="wrist_palm_link_convex_geom" />
<body name="cup" pos="0 0 0" quat="-0.000203673 0 0 1">
<inertial pos="-3.75236e-10 8.27811e-05 0.0947015" quat="0.999945 -0.0104888 0 0" mass="0.132" diaginertia="0.000285643 0.000270485 9.65696e-05" />
<geom priority="1" name="cup_geom1" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup1" />
<geom priority="1" name="cup_geom2" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup2" />
<geom priority="1" name="cup_geom3" pos="0 0.015 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup3" />
<geom priority="1" name="cup_geom4" pos="0 0.015 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup4" />
<geom priority="1" name="cup_geom5" pos="0 0.015 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup5" />
<geom priority="1" name="cup_geom6" pos="0 0.015 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup6" />
<geom priority="1" name="cup_geom7" pos="0 0.015 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup7" />
<geom priority="1" name="cup_geom8" pos="0 0.015 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup8" />
<geom priority="1" name="cup_geom9" pos="0 0.015 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup9" />
<geom priority="1" name="cup_geom10" pos="0 0.015 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup10" />
<geom name="cup_base" pos="0 -0.05 0.1165" euler="-1.57 0 0" type="cylinder" size="0.038 0.025" solref="-10000 -100"/>
<geom name="cup_base_contact" pos="0 -0.005 0.1165" euler="-1.57 0 0" type="cylinder" size="0.03 0.0005" solref="-10000 -100" rgba="0 0 255 1"/>
<geom priority="1" name="cup_geom15" pos="0 0.015 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup15" />
<geom priority="1" name="cup_geom16" pos="0 0.015 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup16" />
<geom priority="1" name="cup_geom17" pos="0 0.015 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup17" />
<geom priority="1" name="cup_geom18" pos="0 0.015 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup18" />
<site name="cup_robot" pos="0 0.05 0.1165" rgba="255 0 0 1"/>
<site name="cup_robot_final" pos="0 -0.025 0.1165" rgba="0 255 0 1"/>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
<body name="table_body" pos="0 -1.85 0.4025">
<geom name="table" type="box" size="0.4 0.6 0.4" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"
solref="-10000 -100"/>
<geom name="table_contact_geom" type="box" size="0.4 0.6 0.01" pos="0 0 0.41" rgba="1.4 0.8 0.45 1" solimp="0.999 0.999 0.001"
solref="-10000 -100"/>
</body>
<geom name="table_robot" type="box" size="0.1 0.1 0.3" pos="0 0.00 0.3025" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"
solref="-10000 -100"/>
<geom name="wall" type="box" quat="1 0 0 0" size="0.4 0.04 1.1" pos="0. -2.45 1.1" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"
solref="-10000 -100"/>
<!-- <geom name="side_wall_left" type="box" quat="1 0 0 0" size="0.04 0.4 0.5" pos="0.45 -2.05 1.1" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"-->
<!-- solref="-10000 -100"/>-->
<!-- <geom name="side_wall_right" type="box" quat="1 0 0 0" size="0.04 0.4 0.5" pos="-0.45 -2.05 1.1" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"-->
<!-- solref="-10000 -100"/>-->
<!-- <body name="cup_table" pos="0.32 -1.55 0.84" quat="0.7071068 0.7071068 0 0">-->
<!-- <geom priority= "1" type="box" size ="0.1 0.1 0.1" name="cup_base_table" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -10" mass="10"/>-->
<!-- </body>-->
<body name="cup_table" pos="0.32 -1.55 0.84" quat="0.7071068 0.7071068 0 0">
<inertial pos="-3.75236e-10 8.27811e-05 0.0947015" quat="0.999945 -0.0104888 0 0" mass="10.132" diaginertia="0.000285643 0.000270485 9.65696e-05" />
<geom priority= "1" name="cup_geom_table3" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup3_table" mass="10"/>
<geom priority= "1" name="cup_geom_table4" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup4_table" mass="10"/>
<geom priority= "1" name="cup_geom_table5" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup5_table" mass="10"/>
<geom priority= "1" name="cup_geom_table6" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup6_table" mass="10"/>
<geom priority= "1" name="cup_geom_table7" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup7_table" mass="10"/>
<geom priority= "1" name="cup_geom_table8" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup8_table" mass="10"/>
<geom priority= "1" name="cup_geom_table9" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup9_table" mass="10"/>
<geom priority= "1" name="cup_geom_table10" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -010" type="mesh" mesh="cup10_table" mass="10"/>
<geom priority= "1" name="cup_base_table" pos="0 -0.035 0.1337249" euler="-1.57 0 0" type="cylinder" size="0.08 0.045" solref="-10000 -100" mass="10"/>
<geom priority= "1" name="cup_base_table_contact" pos="0 0.015 0.1337249" euler="-1.57 0 0" type="cylinder" size="0.06 0.0005" solref="-10000 -100" rgba="0 0 255 1" mass="10"/>
<geom priority= "1" name="cup_geom_table15" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup15_table" mass="10"/>
<geom priority= "1" name="cup_geom_table16" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup16_table" mass="10"/>
<geom priority= "1" name="cup_geom_table17" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup17_table" mass="10"/>
<geom priority= "1" name="cup_geom1_table8" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup18_table" mass="10"/>
<site name="cup_goal_table" pos="0 0.11 0.1337249" rgba="255 0 0 1"/>
<site name="cup_goal_final_table" pos="0.0 0.025 0.1337249" rgba="0 255 0 1"/>
<site name="bounce_table" pos="0.0 -0.015 -0.2 " rgba="255 255 0 1"/>
<!-- <site name="cup_goal_final_table" pos="0.0 -0.025 0.1337249" rgba="0 255 0 1"/>-->
</body>
<!-- <body name="ball" pos="0.0 -0.813 2.382">-->
<body name="ball" pos="0.0 -0.813 2.382">
<joint axis="1 0 0" damping="0.0" name="tar:x" pos="0 0 0" stiffness="0" type="slide" frictionloss="0" limited="false"/>
<joint axis="0 1 0" damping="0.0" name="tar:y" pos="0 0 0" stiffness="0" type="slide" frictionloss="0" limited="false"/>
<joint axis="0 0 1" damping="0.0" name="tar:z" pos="0 0 0" stiffness="0" type="slide" frictionloss="0" limited="false"/>
<geom priority= "1" size="0.025 0.025 0.025" type="sphere" condim="4" name="ball_geom" rgba="0.8 0.2 0.1 1" mass="0.1"
friction="0.1 0.1 0.1" solimp="0.9 0.95 0.001 0.5 2" solref="-10000 -10"/>
<!-- friction="0.1 0.1 0.1" solimp="1 1 0" solref="0.1 0.03"/>-->
<site name="target_ball" pos="0 0 0" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"/>
</body>
<camera name="visualization" mode="targetbody" target="wam/wrist_yaw_link" pos="1.5 -0.4 2.2"/>
<camera name="experiment" mode="fixed" quat="0.44418059 0.41778323 0.54301123 0.57732103" pos="1.5 -0.3 1.33" />
<site name="test" pos="0.1 0.1 0.1" rgba="0 0 1 1" type="sphere"/>
</worldbody>
<!-- <actuator>-->
<!-- <position ctrlrange="-2.6 2.6" joint="wam/base_yaw_joint" kp="800"/>-->
<!-- <position ctrlrange="-1.985 1.985" joint="wam/shoulder_pitch_joint" kp="800"/>-->
<!-- <position ctrlrange="-2.8 2.8" joint="wam/shoulder_yaw_joint" kp="800"/>-->
<!-- <position ctrlrange="-0.9 3.14159" joint="wam/elbow_pitch_joint" kp="800"/>-->
<!-- <position ctrlrange="-4.55 1.25" joint="wam/wrist_yaw_joint" kp="100"/>-->
<!-- <position ctrlrange="-1.5707 1.5707" joint="wam/wrist_pitch_joint" kp="2000"/>-->
<!-- <position ctrlrange="-2.7 2.7" joint="wam/palm_yaw_joint" kp="100"/>-->
<!-- </actuator>-->
<!-- <actuator>-->
<!-- <motor ctrlrange="-150 150" joint="wam/base_yaw_joint"/>-->
<!-- <motor ctrlrange="-125 125" joint="wam/shoulder_pitch_joint"/>-->
<!-- <motor ctrlrange="-40 40" joint="wam/shoulder_yaw_joint"/>-->
<!-- <motor ctrlrange="-60 60" joint="wam/elbow_pitch_joint"/>-->
<!-- <motor ctrlrange="-5 5" joint="wam/wrist_yaw_joint"/>-->
<!-- <motor ctrlrange="-5 5" joint="wam/wrist_pitch_joint"/>-->
<!-- <motor ctrlrange="-2 2" joint="wam/palm_yaw_joint"/>-->
<!-- </actuator>-->
<actuator>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="150.0" joint="wam/base_yaw_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="125.0" joint="wam/shoulder_pitch_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="40.0" joint="wam/shoulder_yaw_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="60.0" joint="wam/elbow_pitch_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="5.0" joint="wam/wrist_yaw_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="5.0" joint="wam/wrist_pitch_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="2.0" joint="wam/palm_yaw_joint"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,187 @@
<mujoco model="wam(v1.31)">
<compiler angle="radian" meshdir="../../meshes/wam/" />
<option timestep="0.005" integrator="Euler" />
<size njmax="500" nconmax="100" />
<default class="main">
<joint limited="true" frictionloss="0.001" damping="0.07"/>
<default class="viz">
<geom type="mesh" contype="0" conaffinity="0" group="1" rgba="0.7 0.7 0.7 1" />
</default>
<default class="col">
<geom type="mesh" contype="0" rgba="0.5 0.6 0.7 1" />
</default>
</default>
<asset>
<texture type="2d" name="groundplane" builtin="checker" mark="edge" rgb1="0.25 0.26 0.25" rgb2="0.22 0.22 0.22" markrgb="0.3 0.3 0.3" width="100" height="100" />
<material name="MatGnd" texture="groundplane" texrepeat="5 5" specular="1" shininess="0.3" reflectance="1e-05" />
<mesh name="base_link_fine" file="base_link_fine.stl" />
<mesh name="base_link_convex" file="base_link_convex.stl" />
<mesh name="shoulder_link_fine" file="shoulder_link_fine.stl" />
<mesh name="shoulder_link_convex_decomposition_p1" file="shoulder_link_convex_decomposition_p1.stl" />
<mesh name="shoulder_link_convex_decomposition_p2" file="shoulder_link_convex_decomposition_p2.stl" />
<mesh name="shoulder_link_convex_decomposition_p3" file="shoulder_link_convex_decomposition_p3.stl" />
<mesh name="shoulder_pitch_link_fine" file="shoulder_pitch_link_fine.stl" />
<mesh name="shoulder_pitch_link_convex" file="shoulder_pitch_link_convex.stl" />
<mesh name="upper_arm_link_fine" file="upper_arm_link_fine.stl" />
<mesh name="upper_arm_link_convex_decomposition_p1" file="upper_arm_link_convex_decomposition_p1.stl" />
<mesh name="upper_arm_link_convex_decomposition_p2" file="upper_arm_link_convex_decomposition_p2.stl" />
<mesh name="elbow_link_fine" file="elbow_link_fine.stl" />
<mesh name="elbow_link_convex" file="elbow_link_convex.stl" />
<mesh name="forearm_link_fine" file="forearm_link_fine.stl" />
<mesh name="forearm_link_convex_decomposition_p1" file="forearm_link_convex_decomposition_p1.stl" />
<mesh name="forearm_link_convex_decomposition_p2" file="forearm_link_convex_decomposition_p2.stl" />
<mesh name="wrist_yaw_link_fine" file="wrist_yaw_link_fine.stl" />
<mesh name="wrist_yaw_link_convex_decomposition_p1" file="wrist_yaw_link_convex_decomposition_p1.stl" />
<mesh name="wrist_yaw_link_convex_decomposition_p2" file="wrist_yaw_link_convex_decomposition_p2.stl" />
<mesh name="wrist_pitch_link_fine" file="wrist_pitch_link_fine.stl" />
<mesh name="wrist_pitch_link_convex_decomposition_p1" file="wrist_pitch_link_convex_decomposition_p1.stl" />
<mesh name="wrist_pitch_link_convex_decomposition_p2" file="wrist_pitch_link_convex_decomposition_p2.stl" />
<mesh name="wrist_pitch_link_convex_decomposition_p3" file="wrist_pitch_link_convex_decomposition_p3.stl" />
<mesh name="wrist_palm_link_fine" file="wrist_palm_link_fine.stl" />
<mesh name="wrist_palm_link_convex" file="wrist_palm_link_convex.stl" />
<mesh name="cup1" file="cup_split1.stl" scale="0.001 0.001 0.001" />
<mesh name="cup2" file="cup_split2.stl" scale="0.001 0.001 0.001" />
<mesh name="cup3" file="cup_split3.stl" scale="0.001 0.001 0.001" />
<mesh name="cup4" file="cup_split4.stl" scale="0.001 0.001 0.001" />
<mesh name="cup5" file="cup_split5.stl" scale="0.001 0.001 0.001" />
<mesh name="cup6" file="cup_split6.stl" scale="0.001 0.001 0.001" />
<mesh name="cup7" file="cup_split7.stl" scale="0.001 0.001 0.001" />
<mesh name="cup8" file="cup_split8.stl" scale="0.001 0.001 0.001" />
<mesh name="cup9" file="cup_split9.stl" scale="0.001 0.001 0.001" />
<mesh name="cup10" file="cup_split10.stl" scale="0.001 0.001 0.001" />
<mesh name="cup11" file="cup_split11.stl" scale="0.001 0.001 0.001" />
<mesh name="cup12" file="cup_split12.stl" scale="0.001 0.001 0.001" />
<mesh name="cup13" file="cup_split13.stl" scale="0.001 0.001 0.001" />
<mesh name="cup14" file="cup_split14.stl" scale="0.001 0.001 0.001" />
<mesh name="cup15" file="cup_split15.stl" scale="0.001 0.001 0.001" />
<mesh name="cup16" file="cup_split16.stl" scale="0.001 0.001 0.001" />
<mesh name="cup17" file="cup_split17.stl" scale="0.001 0.001 0.001" />
<mesh name="cup18" file="cup_split18.stl" scale="0.001 0.001 0.001" />
<mesh name="cup3_table" file="cup_split3.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup4_table" file="cup_split4.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup5_table" file="cup_split5.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup6_table" file="cup_split6.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup7_table" file="cup_split7.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup8_table" file="cup_split8.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup9_table" file="cup_split9.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup10_table" file="cup_split10.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup15_table" file="cup_split15.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup16_table" file="cup_split16.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup17_table" file="cup_split17.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup18_table" file="cup_split18.stl" scale="0.00211 0.00211 0.01" />
</asset>
<worldbody>
<geom name="ground" size="5 5 1" type="plane" material="MatGnd" />
<light pos="0.1 0.2 1.3" dir="-0.0758098 -0.32162 -0.985527" directional="true" cutoff="60" exponent="1" diffuse="1 1 1" specular="0.1 0.1 0.1" />
<body name="wam/base_link" pos="0 0 0.6">
<inertial pos="6.93764e-06 0.0542887 0.076438" quat="0.496481 0.503509 -0.503703 0.496255" mass="27.5544" diaginertia="0.432537 0.318732 0.219528" />
<geom class="viz" quat="0.707107 0 0 -0.707107" mesh="base_link_fine" />
<geom class="col" quat="0.707107 0 0 -0.707107" mesh="base_link_convex" name="base_link_convex_geom"/>
<body name="wam/shoulder_yaw_link" pos="0 0 0.16" quat="0.707107 0 0 -0.707107">
<inertial pos="-0.00443422 -0.00066489 -0.12189" quat="0.999995 0.000984795 0.00270132 0.00136071" mass="10.7677" diaginertia="0.507411 0.462983 0.113271" />
<joint name="wam/base_yaw_joint" pos="0 0 0" axis="0 0 1" range="-2.6 2.6" />
<geom class="viz" pos="0 0 0.186" mesh="shoulder_link_fine" />
<geom class="col" pos="0 0 0.186" mesh="shoulder_link_convex_decomposition_p1" name="shoulder_link_convex_decomposition_p1_geom"/>
<geom class="col" pos="0 0 0.186" mesh="shoulder_link_convex_decomposition_p2" name="shoulder_link_convex_decomposition_p2_geom"/>
<geom class="col" pos="0 0 0.186" mesh="shoulder_link_convex_decomposition_p3" name="shoulder_link_convex_decomposition_p3_geom"/>
<body name="wam/shoulder_pitch_link" pos="0 0 0.184" quat="0.707107 -0.707107 0 0">
<inertial pos="-0.00236983 -0.0154211 0.0310561" quat="0.961781 -0.272983 0.0167269 0.0133385" mass="3.87494" diaginertia="0.0214207 0.0167101 0.0126465" />
<joint name="wam/shoulder_pitch_joint" pos="0 0 0" axis="0 0 1" range="-1.985 1.985" />
<geom class="viz" mesh="shoulder_pitch_link_fine" />
<geom class="col" mesh="shoulder_pitch_link_convex" />
<body name="wam/upper_arm_link" pos="0 -0.505 0" quat="0.707107 0.707107 0 0">
<inertial pos="-0.0382586 3.309e-05 -0.207508" quat="0.705455 0.0381914 0.0383402 0.706686" mass="1.80228" diaginertia="0.0665697 0.0634285 0.00622701" />
<joint name="wam/shoulder_yaw_joint" pos="0 0 0" axis="0 0 1" range="-2.8 2.8" />
<geom class="viz" pos="0 0 -0.505" mesh="upper_arm_link_fine" />
<geom class="col" pos="0 0 -0.505" mesh="upper_arm_link_convex_decomposition_p1" name="upper_arm_link_convex_decomposition_p1_geom"/>
<geom class="col" pos="0 0 -0.505" mesh="upper_arm_link_convex_decomposition_p2" name="upper_arm_link_convex_decomposition_p2_geom"/>
<body name="wam/forearm_link" pos="0.045 0 0.045" quat="0.707107 -0.707107 0 0">
<inertial pos="0.00498512 -0.132717 -0.00022942" quat="0.546303 0.447151 -0.548676 0.447842" mass="2.40017" diaginertia="0.0196896 0.0152225 0.00749914" />
<joint name="wam/elbow_pitch_joint" pos="0 0 0" axis="0 0 1" range="-0.9 3.14159" />
<geom class="viz" mesh="elbow_link_fine" />
<geom class="col" mesh="elbow_link_convex" />
<geom class="viz" pos="-0.045 -0.073 0" quat="0.707388 0.706825 0 0" mesh="forearm_link_fine" />
<geom class="col" pos="-0.045 -0.073 0" quat="0.707388 0.706825 0 0" mesh="forearm_link_convex_decomposition_p1" name="forearm_link_convex_decomposition_p1_geom" />
<geom class="col" pos="-0.045 -0.073 0" quat="0.707388 0.706825 0 0" mesh="forearm_link_convex_decomposition_p2" name="forearm_link_convex_decomposition_p2_geom" />
<body name="wam/wrist_yaw_link" pos="-0.045 0 0" quat="0.707107 0.707107 0 0">
<inertial pos="8.921e-05 0.00435824 -0.00511217" quat="0.708528 -0.000120667 0.000107481 0.705683" mass="0.12376" diaginertia="0.0112011 0.0111887 7.58188e-05" />
<joint name="wam/wrist_yaw_joint" pos="0 0 0" axis="0 0 1" range="-4.55 1.25" />
<geom class="viz" pos="0 0 0.3" mesh="wrist_yaw_link_fine" />
<geom class="col" pos="0 0 0.3" mesh="wrist_yaw_link_convex_decomposition_p1" name="wrist_yaw_link_convex_decomposition_p1_geom" />
<geom class="col" pos="0 0 0.3" mesh="wrist_yaw_link_convex_decomposition_p2" name="wrist_yaw_link_convex_decomposition_p2_geom" />
<body name="wam/wrist_pitch_link" pos="0 0 0.3" quat="0.707107 -0.707107 0 0">
<inertial pos="-0.00012262 -0.0246834 -0.0170319" quat="0.994687 -0.102891 0.000824211 -0.00336105" mass="0.417974" diaginertia="0.000555166 0.000463174 0.00023407" />
<joint name="wam/wrist_pitch_joint" pos="0 0 0" axis="0 0 1" range="-1.5707 1.5707" />
<geom class="viz" mesh="wrist_pitch_link_fine" />
<geom class="col" mesh="wrist_pitch_link_convex_decomposition_p1" name="wrist_pitch_link_convex_decomposition_p1_geom" />
<geom class="col" mesh="wrist_pitch_link_convex_decomposition_p2" name="wrist_pitch_link_convex_decomposition_p2_geom" />
<geom class="col" mesh="wrist_pitch_link_convex_decomposition_p3" name="wrist_pitch_link_convex_decomposition_p3_geom" />
<body name="wam/wrist_palm_link" pos="0 -0.06 0" quat="0.707107 0.707107 0 0">
<inertial pos="-7.974e-05 -0.00323552 -0.00016313" quat="0.594752 0.382453 0.382453 0.594752" mass="0.0686475" diaginertia="7.408e-05 3.81466e-05 3.76434e-05" />
<joint name="wam/palm_yaw_joint" pos="0 0 0" axis="0 0 1" range="-2.7 2.7" />
<geom class="viz" pos="0 0 -0.06" mesh="wrist_palm_link_fine" />
<geom class="col" pos="0 0 -0.06" mesh="wrist_palm_link_convex" name="wrist_palm_link_convex_geom" />
<site name="init_ball_pos" size="0.025 0.025 0.025" pos="0.0 0.0 0.035" rgba="0 1 0 1"/>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
<body name="table_body" pos="0 -1.85 0.4025">
<geom name="table" type="box" size="0.4 0.6 0.4" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"
solref="-10000 -100"/>
<geom name="table_contact_geom" type="box" size="0.4 0.6 0.1" pos="0 0 0.31" rgba="1.4 0.8 0.45 1" solimp="0.999 0.999 0.001"
solref="-10000 -100"/>
</body>
<geom name="table_robot" type="box" size="0.1 0.1 0.3" pos="0 0.00 0.3025" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"
solref="-10000 -100"/>
<geom name="wall" type="box" quat="1 0 0 0" size="0.4 0.04 1.1" pos="0. -2.45 1.1" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"
solref="-10000 -100"/>
<body name="cup_table" pos="0.32 -1.55 0.84" quat="0.7071068 0.7071068 0 0">
<inertial pos="-3.75236e-10 8.27811e-05 0.0947015" quat="0.999945 -0.0104888 0 0" mass="10.132" diaginertia="0.000285643 0.000270485 9.65696e-05" />
<geom priority= "1" name="cup_geom_table3" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup3_table" mass="10"/>
<geom priority= "1" name="cup_geom_table4" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup4_table" mass="10"/>
<geom priority= "1" name="cup_geom_table5" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup5_table" mass="10"/>
<geom priority= "1" name="cup_geom_table6" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup6_table" mass="10"/>
<geom priority= "1" name="cup_geom_table7" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup7_table" mass="10"/>
<geom priority= "1" name="cup_geom_table8" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup8_table" mass="10"/>
<geom priority= "1" name="cup_geom_table9" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup9_table" mass="10"/>
<geom priority= "1" name="cup_geom_table10" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -010" type="mesh" mesh="cup10_table" mass="10"/>
<geom priority= "1" name="cup_base_table" pos="0 -0.035 0.1337249" euler="-1.57 0 0" type="cylinder" size="0.08 0.045" solref="-10000 -100" mass="10"/>
<geom priority= "1" name="cup_base_table_contact" pos="0 0.015 0.1337249" euler="-1.57 0 0" type="cylinder" size="0.07 0.01" solref="-10000 -100" rgba="0 0 255 1" mass="10"/>
<geom priority= "1" name="cup_geom_table15" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup15_table" mass="10"/>
<geom priority= "1" name="cup_geom_table16" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup16_table" mass="10"/>
<geom priority= "1" name="cup_geom_table17" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup17_table" mass="10"/>
<geom priority= "1" name="cup_geom1_table8" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup18_table" mass="10"/>
<site name="cup_goal_table" pos="0 0.11 0.1337249" rgba="255 0 0 1"/>
<site name="cup_goal_final_table" pos="0.0 0.025 0.1337249" rgba="0 255 0 1"/>
</body>
<body name="ball" pos="0 0 0">
<joint axis="1 0 0" damping="0.0" name="tar:x" pos="0 0 0" stiffness="0" type="slide" frictionloss="0" limited="false"/>
<joint axis="0 1 0" damping="0.0" name="tar:y" pos="0 0 0" stiffness="0" type="slide" frictionloss="0" limited="false"/>
<joint axis="0 0 1" damping="0.0" name="tar:z" pos="0 0 0" stiffness="0" type="slide" frictionloss="0" limited="false"/>
<geom priority= "1" size="0.025 0.025 0.025" type="sphere" condim="4" name="ball_geom" rgba="0.8 0.2 0.1 1" mass="0.1"
friction="0.1 0.1 0.1" solimp="0.9 0.95 0.001 0.5 2" solref="-10000 -10"/>
<site name="target_ball" pos="0 0 0" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"/>
</body>
<!-- <camera name="visualization" mode="targetbody" target="wam/wrist_yaw_link" pos="1.5 -0.4 2.2"/>-->
<!-- <camera name="experiment" mode="fixed" quat="0.44418059 0.41778323 0.54301123 0.57732103" pos="1.5 -0.3 1.33" />-->
<camera name="visualization" mode="fixed" euler="2.35 2.0 -0.75" pos="2.0 -0.0 1.85"/>
</worldbody>
<actuator>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="150.0" joint="wam/base_yaw_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="wam/shoulder_pitch_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="50.0" joint="wam/shoulder_yaw_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="60.0" joint="wam/elbow_pitch_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="5.0" joint="wam/wrist_yaw_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="5.0" joint="wam/wrist_pitch_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="2.0" joint="wam/palm_yaw_joint"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,193 @@
<mujoco model="wam(v1.31)">
<compiler angle="radian" meshdir="./meshes/wam/" />
<option timestep="0.005" integrator="Euler" />
<size njmax="500" nconmax="100" />
<default class="main">
<joint limited="true" frictionloss="0.001" damping="0.07"/>
<default class="viz">
<geom type="mesh" contype="0" conaffinity="0" group="1" rgba="0.7 0.7 0.7 1" />
</default>
<default class="col">
<geom type="mesh" contype="0" rgba="0.5 0.6 0.7 1" />
</default>
</default>
<asset>
<texture type="2d" name="groundplane" builtin="checker" mark="edge" rgb1="0.25 0.26 0.25" rgb2="0.22 0.22 0.22" markrgb="0.3 0.3 0.3" width="100" height="100" />
<material name="MatGnd" texture="groundplane" texrepeat="5 5" specular="1" shininess="0.3" reflectance="1e-05" />
<mesh name="base_link_fine" file="base_link_fine.stl" />
<mesh name="base_link_convex" file="base_link_convex.stl" />
<mesh name="shoulder_link_fine" file="shoulder_link_fine.stl" />
<mesh name="shoulder_link_convex_decomposition_p1" file="shoulder_link_convex_decomposition_p1.stl" />
<mesh name="shoulder_link_convex_decomposition_p2" file="shoulder_link_convex_decomposition_p2.stl" />
<mesh name="shoulder_link_convex_decomposition_p3" file="shoulder_link_convex_decomposition_p3.stl" />
<mesh name="shoulder_pitch_link_fine" file="shoulder_pitch_link_fine.stl" />
<mesh name="shoulder_pitch_link_convex" file="shoulder_pitch_link_convex.stl" />
<mesh name="upper_arm_link_fine" file="upper_arm_link_fine.stl" />
<mesh name="upper_arm_link_convex_decomposition_p1" file="upper_arm_link_convex_decomposition_p1.stl" />
<mesh name="upper_arm_link_convex_decomposition_p2" file="upper_arm_link_convex_decomposition_p2.stl" />
<mesh name="elbow_link_fine" file="elbow_link_fine.stl" />
<mesh name="elbow_link_convex" file="elbow_link_convex.stl" />
<mesh name="forearm_link_fine" file="forearm_link_fine.stl" />
<mesh name="forearm_link_convex_decomposition_p1" file="forearm_link_convex_decomposition_p1.stl" />
<mesh name="forearm_link_convex_decomposition_p2" file="forearm_link_convex_decomposition_p2.stl" />
<mesh name="wrist_yaw_link_fine" file="wrist_yaw_link_fine.stl" />
<mesh name="wrist_yaw_link_convex_decomposition_p1" file="wrist_yaw_link_convex_decomposition_p1.stl" />
<mesh name="wrist_yaw_link_convex_decomposition_p2" file="wrist_yaw_link_convex_decomposition_p2.stl" />
<mesh name="wrist_pitch_link_fine" file="wrist_pitch_link_fine.stl" />
<mesh name="wrist_pitch_link_convex_decomposition_p1" file="wrist_pitch_link_convex_decomposition_p1.stl" />
<mesh name="wrist_pitch_link_convex_decomposition_p2" file="wrist_pitch_link_convex_decomposition_p2.stl" />
<mesh name="wrist_pitch_link_convex_decomposition_p3" file="wrist_pitch_link_convex_decomposition_p3.stl" />
<mesh name="wrist_palm_link_fine" file="wrist_palm_link_fine.stl" />
<mesh name="wrist_palm_link_convex" file="wrist_palm_link_convex.stl" />
<mesh name="cup1" file="cup_split1.stl" scale="0.001 0.001 0.001" />
<mesh name="cup2" file="cup_split2.stl" scale="0.001 0.001 0.001" />
<mesh name="cup3" file="cup_split3.stl" scale="0.001 0.001 0.001" />
<mesh name="cup4" file="cup_split4.stl" scale="0.001 0.001 0.001" />
<mesh name="cup5" file="cup_split5.stl" scale="0.001 0.001 0.001" />
<mesh name="cup6" file="cup_split6.stl" scale="0.001 0.001 0.001" />
<mesh name="cup7" file="cup_split7.stl" scale="0.001 0.001 0.001" />
<mesh name="cup8" file="cup_split8.stl" scale="0.001 0.001 0.001" />
<mesh name="cup9" file="cup_split9.stl" scale="0.001 0.001 0.001" />
<mesh name="cup10" file="cup_split10.stl" scale="0.001 0.001 0.001" />
<mesh name="cup11" file="cup_split11.stl" scale="0.001 0.001 0.001" />
<mesh name="cup12" file="cup_split12.stl" scale="0.001 0.001 0.001" />
<mesh name="cup13" file="cup_split13.stl" scale="0.001 0.001 0.001" />
<mesh name="cup14" file="cup_split14.stl" scale="0.001 0.001 0.001" />
<mesh name="cup15" file="cup_split15.stl" scale="0.001 0.001 0.001" />
<mesh name="cup16" file="cup_split16.stl" scale="0.001 0.001 0.001" />
<mesh name="cup17" file="cup_split17.stl" scale="0.001 0.001 0.001" />
<mesh name="cup18" file="cup_split18.stl" scale="0.001 0.001 0.001" />
<mesh name="cup3_table" file="cup_split3.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup4_table" file="cup_split4.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup5_table" file="cup_split5.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup6_table" file="cup_split6.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup7_table" file="cup_split7.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup8_table" file="cup_split8.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup9_table" file="cup_split9.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup10_table" file="cup_split10.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup15_table" file="cup_split15.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup16_table" file="cup_split16.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup17_table" file="cup_split17.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup18_table" file="cup_split18.stl" scale="0.00211 0.00211 0.01" />
</asset>
<worldbody>
<geom name="ground" size="5 5 1" type="plane" material="MatGnd" />
<light pos="0.1 0.2 1.3" dir="-0.0758098 -0.32162 -0.985527" directional="true" cutoff="60" exponent="1" diffuse="1 1 1" specular="0.1 0.1 0.1" />
<body name="wam/base_link" pos="0 0 0.6">
<inertial pos="6.93764e-06 0.0542887 0.076438" quat="0.496481 0.503509 -0.503703 0.496255" mass="27.5544" diaginertia="0.432537 0.318732 0.219528" />
<geom class="viz" quat="0.707107 0 0 -0.707107" mesh="base_link_fine" />
<geom class="col" quat="0.707107 0 0 -0.707107" mesh="base_link_convex" name="base_link_convex_geom"/>
<body name="wam/shoulder_yaw_link" pos="0 0 0.16" quat="0.707107 0 0 -0.707107">
<inertial pos="-0.00443422 -0.00066489 -0.12189" quat="0.999995 0.000984795 0.00270132 0.00136071" mass="10.7677" diaginertia="0.507411 0.462983 0.113271" />
<joint name="wam/base_yaw_joint" pos="0 0 0" axis="0 0 1" range="-2.6 2.6" />
<geom class="viz" pos="0 0 0.186" mesh="shoulder_link_fine" />
<geom class="col" pos="0 0 0.186" mesh="shoulder_link_convex_decomposition_p1" name="shoulder_link_convex_decomposition_p1_geom"/>
<geom class="col" pos="0 0 0.186" mesh="shoulder_link_convex_decomposition_p2" name="shoulder_link_convex_decomposition_p2_geom"/>
<geom class="col" pos="0 0 0.186" mesh="shoulder_link_convex_decomposition_p3" name="shoulder_link_convex_decomposition_p3_geom"/>
<body name="wam/shoulder_pitch_link" pos="0 0 0.184" quat="0.707107 -0.707107 0 0">
<inertial pos="-0.00236983 -0.0154211 0.0310561" quat="0.961781 -0.272983 0.0167269 0.0133385" mass="3.87494" diaginertia="0.0214207 0.0167101 0.0126465" />
<joint name="wam/shoulder_pitch_joint" pos="0 0 0" axis="0 0 1" range="-1.985 1.985" />
<geom class="viz" mesh="shoulder_pitch_link_fine" />
<geom class="col" mesh="shoulder_pitch_link_convex" />
<body name="wam/upper_arm_link" pos="0 -0.505 0" quat="0.707107 0.707107 0 0">
<inertial pos="-0.0382586 3.309e-05 -0.207508" quat="0.705455 0.0381914 0.0383402 0.706686" mass="1.80228" diaginertia="0.0665697 0.0634285 0.00622701" />
<joint name="wam/shoulder_yaw_joint" pos="0 0 0" axis="0 0 1" range="-2.8 2.8" />
<geom class="viz" pos="0 0 -0.505" mesh="upper_arm_link_fine" />
<geom class="col" pos="0 0 -0.505" mesh="upper_arm_link_convex_decomposition_p1" name="upper_arm_link_convex_decomposition_p1_geom"/>
<geom class="col" pos="0 0 -0.505" mesh="upper_arm_link_convex_decomposition_p2" name="upper_arm_link_convex_decomposition_p2_geom"/>
<body name="wam/forearm_link" pos="0.045 0 0.045" quat="0.707107 -0.707107 0 0">
<inertial pos="0.00498512 -0.132717 -0.00022942" quat="0.546303 0.447151 -0.548676 0.447842" mass="2.40017" diaginertia="0.0196896 0.0152225 0.00749914" />
<joint name="wam/elbow_pitch_joint" pos="0 0 0" axis="0 0 1" range="-0.9 3.14159" />
<geom class="viz" mesh="elbow_link_fine" />
<geom class="col" mesh="elbow_link_convex" />
<geom class="viz" pos="-0.045 -0.073 0" quat="0.707388 0.706825 0 0" mesh="forearm_link_fine" />
<geom class="col" pos="-0.045 -0.073 0" quat="0.707388 0.706825 0 0" mesh="forearm_link_convex_decomposition_p1" name="forearm_link_convex_decomposition_p1_geom" />
<geom class="col" pos="-0.045 -0.073 0" quat="0.707388 0.706825 0 0" mesh="forearm_link_convex_decomposition_p2" name="forearm_link_convex_decomposition_p2_geom" />
<body name="wam/wrist_yaw_link" pos="-0.045 0 0" quat="0.707107 0.707107 0 0">
<inertial pos="8.921e-05 0.00435824 -0.00511217" quat="0.708528 -0.000120667 0.000107481 0.705683" mass="0.12376" diaginertia="0.0112011 0.0111887 7.58188e-05" />
<joint name="wam/wrist_yaw_joint" pos="0 0 0" axis="0 0 1" range="-4.55 1.25" />
<geom class="viz" pos="0 0 0.3" mesh="wrist_yaw_link_fine" />
<geom class="col" pos="0 0 0.3" mesh="wrist_yaw_link_convex_decomposition_p1" name="wrist_yaw_link_convex_decomposition_p1_geom" />
<geom class="col" pos="0 0 0.3" mesh="wrist_yaw_link_convex_decomposition_p2" name="wrist_yaw_link_convex_decomposition_p2_geom" />
<body name="wam/wrist_pitch_link" pos="0 0 0.3" quat="0.707107 -0.707107 0 0">
<inertial pos="-0.00012262 -0.0246834 -0.0170319" quat="0.994687 -0.102891 0.000824211 -0.00336105" mass="0.417974" diaginertia="0.000555166 0.000463174 0.00023407" />
<joint name="wam/wrist_pitch_joint" pos="0 0 0" axis="0 0 1" range="-1.5707 1.5707" />
<geom class="viz" mesh="wrist_pitch_link_fine" />
<geom class="col" mesh="wrist_pitch_link_convex_decomposition_p1" name="wrist_pitch_link_convex_decomposition_p1_geom" />
<geom class="col" mesh="wrist_pitch_link_convex_decomposition_p2" name="wrist_pitch_link_convex_decomposition_p2_geom" />
<geom class="col" mesh="wrist_pitch_link_convex_decomposition_p3" name="wrist_pitch_link_convex_decomposition_p3_geom" />
<body name="wam/wrist_palm_link" pos="0 -0.06 0" quat="0.707107 0.707107 0 0">
<inertial pos="-7.974e-05 -0.00323552 -0.00016313" quat="0.594752 0.382453 0.382453 0.594752" mass="0.0686475" diaginertia="7.408e-05 3.81466e-05 3.76434e-05" />
<joint name="wam/palm_yaw_joint" pos="0 0 0" axis="0 0 1" range="-2.7 2.7" />
<geom class="viz" pos="0 0 -0.06" mesh="wrist_palm_link_fine" />
<geom class="col" pos="0 0 -0.06" mesh="wrist_palm_link_convex" name="wrist_palm_link_convex_geom" />
<site name="init_ball_pos" size="0.025 0.025 0.025" pos="0.0 0.0 0.035" rgba="0 1 0 1"/>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
<body name="table_body" pos="0 -2.8 0.4025">
<geom name="table" type="box" size="1.5 1.5 0.4" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"
solref="-10000 -100"/>
<geom name="table_contact_geom" type="box" size="1.5 1.5 0.1" pos="0 0 0.31" rgba="1.4 0.8 0.45 1" solimp="0.999 0.999 0.001"
solref="-10000 -100"/>
</body>
<geom name="table_robot" type="box" size="0.1 0.1 0.3" pos="0 0.00 0.3025" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"
solref="-10000 -100"/>
<geom name="wall" type="box" quat="1 0 0 0" size="1.5 0.04 1.4" pos="0. -4.3 1.4" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"
solref="-10000 -100"/>
<!-- <body name="cup_table" pos="0.32 -1.55 0.84" quat="0.7071068 0.7071068 0 0">-->
<body name="cup_table" pos="-1.42 -4.05 0.84" quat="0.7071068 0.7071068 0 0">
<inertial pos="-3.75236e-10 8.27811e-05 0.0947015" quat="0.999945 -0.0104888 0 0" mass="10.132" diaginertia="0.000285643 0.000270485 9.65696e-05" />
<geom priority= "1" name="cup_geom_table3" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup3_table" mass="10"/>
<geom priority= "1" name="cup_geom_table4" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup4_table" mass="10"/>
<geom priority= "1" name="cup_geom_table5" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup5_table" mass="10"/>
<geom priority= "1" name="cup_geom_table6" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup6_table" mass="10"/>
<geom priority= "1" name="cup_geom_table7" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup7_table" mass="10"/>
<geom priority= "1" name="cup_geom_table8" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup8_table" mass="10"/>
<geom priority= "1" name="cup_geom_table9" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup9_table" mass="10"/>
<geom priority= "1" name="cup_geom_table10" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -010" type="mesh" mesh="cup10_table" mass="10"/>
<geom priority= "1" name="cup_base_table" pos="0 -0.035 0.1337249" euler="-1.57 0 0" type="cylinder" size="0.08 0.045" solref="-10000 -100" mass="10"/>
<geom priority= "1" name="cup_base_table_contact" pos="0 0.015 0.1337249" euler="-1.57 0 0" type="cylinder" size="0.07 0.01" solref="-10000 -100" rgba="0 0 255 1" mass="10"/>
<geom priority= "1" name="cup_geom_table15" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup15_table" mass="10"/>
<geom priority= "1" name="cup_geom_table16" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup16_table" mass="10"/>
<geom priority= "1" name="cup_geom_table17" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup17_table" mass="10"/>
<geom priority= "1" name="cup_geom1_table8" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup18_table" mass="10"/>
<site name="cup_goal_table" pos="0 0.11 0.1337249" rgba="255 0 0 1"/>
<site name="cup_goal_final_table" pos="0.0 0.025 0.1337249" rgba="0 255 0 1"/>
</body>
<body name="ball" pos="0 0 0">
<joint axis="1 0 0" damping="0.0" name="tar:x" pos="0 0 0" stiffness="0" type="slide" frictionloss="0" limited="false"/>
<joint axis="0 1 0" damping="0.0" name="tar:y" pos="0 0 0" stiffness="0" type="slide" frictionloss="0" limited="false"/>
<joint axis="0 0 1" damping="0.0" name="tar:z" pos="0 0 0" stiffness="0" type="slide" frictionloss="0" limited="false"/>
<geom priority= "1" size="0.025 0.025 0.025" type="sphere" condim="4" name="ball_geom" rgba="0.8 0.2 0.1 1" mass="0.1"
friction="0.1 0.1 0.1" solimp="0.9 0.95 0.001 0.5 2" solref="-10000 -10"/>
<site name="target_ball" pos="0 0 0" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"/>
</body>
<!-- <camera name="visualization" mode="targetbody" target="wam/wrist_yaw_link" pos="1.5 -0.4 2.2"/>-->
<!-- <camera name="experiment" mode="fixed" quat="0.44418059 0.41778323 0.54301123 0.57732103" pos="1.5 -0.3 1.33" />-->
<camera name="visualization" mode="fixed" euler="2.35 2.0 -0.75" pos="2.0 -0.0 1.85"/>
</worldbody>
<actuator>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="150.0" joint="wam/base_yaw_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="wam/shoulder_pitch_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="50.0" joint="wam/shoulder_yaw_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="60.0" joint="wam/elbow_pitch_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="5.0" joint="wam/wrist_yaw_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="5.0" joint="wam/wrist_pitch_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="2.0" joint="wam/palm_yaw_joint"/>
</actuator>
<sensor>
<framelinvel name="init_ball_vel" objtype="site" objname="init_ball_pos" />
</sensor>
</mujoco>

Some files were not shown because too many files have changed in this diff Show More