Merge branch 'master' into deprecate_detpmp

# Conflicts:
#	README.md
#	alr_envs/alr/__init__.py
This commit is contained in:
Maximilian Huettenrauch 2021-12-01 16:34:18 +01:00
commit 083e937e17
42 changed files with 1277 additions and 165 deletions

View File

@ -1,26 +1,25 @@
## ALR Robotics Control Environments ## ALR Robotics Control Environments
This project offers a large verity of reinforcement learning environments under a unifying interface base on OpenAI gym. This project offers a large variety of reinforcement learning environments under the unifying interface of [OpenAI gym](https://gym.openai.com/).
Besides, some custom environments we also provide support for the benchmark suites Besides, we also provide support (under the OpenAI interface) for the benchmark suites
[OpenAI gym](https://gym.openai.com/),
[DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control) [DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control)
(DMC), and [Metaworld](https://meta-world.github.io/). Custom (Mujoco) gym environment can be created according (DMC) and [Metaworld](https://meta-world.github.io/). Custom (Mujoco) gym environments can be created according
to [this guide](https://github.com/openai/gym/blob/master/docs/creating-environments.md). Unlike existing libraries, we to [this guide](https://github.com/openai/gym/blob/master/docs/creating-environments.md). Unlike existing libraries, we
further support to control agents with Dynamic Movement Primitives (DMPs) and Probabilistic Movement Primitives (ProMP, additionally support to control agents with Dynamic Movement Primitives (DMPs) and Probabilistic Movement Primitives (ProMP,
we only consider the mean usually). we only consider the mean usually).
## Motion Primitive Environments (Episodic environments) ## Motion Primitive Environments (Episodic environments)
Unlike step-based environments, motion primitive (MP) environments are closer related to stochastic search, black box Unlike step-based environments, motion primitive (MP) environments are closer related to stochastic search, black box
optimization and methods that often used in robotics. MP environments are trajectory-based and always execute a full optimization, and methods that are often used in robotics. MP environments are trajectory-based and always execute a full
trajectory, which is generated by a Dynamic Motion Primitive (DMP) or a Probabilistic Motion Primitive (ProMP). The trajectory, which is generated by a Dynamic Motion Primitive (DMP) or a Probabilistic Motion Primitive (ProMP). The
generated trajectory is translated into individual step-wise actions by a controller. The exact choice of controller is, generated trajectory is translated into individual step-wise actions by a controller. The exact choice of controller is,
however, dependent on the type of environment. We currently support position, velocity, and PD-Controllers for position, however, dependent on the type of environment. We currently support position, velocity, and PD-Controllers for position,
velocity and torque control, respectively. The goal of all MP environments is still to learn a policy. Yet, an action velocity, and torque control, respectively. The goal of all MP environments is still to learn a policy. Yet, an action
represents the parametrization of the motion primitives to generate a suitable trajectory. Additionally, in this represents the parametrization of the motion primitives to generate a suitable trajectory. Additionally, in this
framework we support the above setting for the contextual setting, for which we expose all changing substates of the framework we support all of this also for the contextual setting, for which we expose all changing substates of the
task as a single observation in the beginning. This requires to predict a new action/MP parametrization for each task as a single observation in the beginning. This requires to predict a new action/MP parametrization for each
trajectory. All environments provide the next to the cumulative episode reward also all collected information from each trajectory. All environments provide next to the cumulative episode reward all collected information from each
step as part of the info dictionary. This information should, however, mainly be used for debugging and logging. step as part of the info dictionary. This information should, however, mainly be used for debugging and logging.
|Key| Description| |Key| Description|

View File

@ -1,3 +1,4 @@
import numpy as np
from gym import register from gym import register
from . import classic_control, mujoco from . import classic_control, mujoco
@ -9,6 +10,8 @@ from .mujoco.ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
from .mujoco.reacher.alr_reacher import ALRReacherEnv from .mujoco.reacher.alr_reacher import ALRReacherEnv
from .mujoco.reacher.balancing import BalancingEnv from .mujoco.reacher.balancing import BalancingEnv
from alr_envs.alr.mujoco.table_tennis.tt_gym import MAX_EPISODE_STEPS
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []} ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
# Classic Control # Classic Control
@ -193,6 +196,31 @@ register(
} }
) )
## Table Tennis
register(id='TableTennis2DCtxt-v0',
entry_point='alr_envs.alr.mujoco:TT_Env_Gym',
max_episode_steps=MAX_EPISODE_STEPS,
kwargs={'ctxt_dim':2})
register(id='TableTennis4DCtxt-v0',
entry_point='alr_envs.alr.mujoco:TT_Env_Gym',
max_episode_steps=MAX_EPISODE_STEPS,
kwargs={'ctxt_dim':4})
## BeerPong
difficulties = ["simple", "intermediate", "hard", "hardest"]
for v, difficulty in enumerate(difficulties):
register(
id='ALRBeerPong-v{}'.format(v),
entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv',
max_episode_steps=600,
kwargs={
"difficulty": difficulty,
"reward_type": "staged",
}
)
# Motion Primitive Environments # Motion Primitive Environments
## Simple Reacher ## Simple Reacher
@ -330,3 +358,59 @@ for _v in _versions:
} }
) )
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
## Beerpong
register(
id='BeerpongProMP-v0',
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
kwargs={
"name": "alr_envs:ALRBeerPong-v0",
"wrappers": [mujoco.beerpong.MPWrapper],
"mp_kwargs": {
"num_dof": 7,
"num_basis": 2,
"n_zero_bases": 2,
"duration": 0.5,
"post_traj_time": 2.5,
# "width": 0.01,
# "off": 0.01,
"policy_type": "motor",
"weights_scale": 0.08,
"zero_start": True,
"zero_goal": False,
"policy_kwargs": {
"p_gains": np.array([ 1.5, 5, 2.55, 3, 2., 2, 1.25]),
"d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125])
}
}
}
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("BeerpongProMP-v0")
## Table Tennis
register(
id='TableTennisProMP-v0',
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
kwargs={
"name": "alr_envs:TableTennis4DCtxt-v0",
"wrappers": [mujoco.table_tennis.MPWrapper],
"mp_kwargs": {
"num_dof": 7,
"num_basis": 2,
"n_zero_bases": 2,
"duration": 1.25,
"post_traj_time": 4.5,
# "width": 0.01,
# "off": 0.01,
"policy_type": "motor",
"weights_scale": 1.0,
"zero_start": True,
"zero_goal": False,
"policy_kwargs": {
"p_gains": 0.5*np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]),
"d_gains": 0.5*np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1])
}
}
}
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("TableTennisProMP-v0")

View File

@ -106,7 +106,7 @@ class HoleReacherEnv(BaseReacherDirectEnv):
# self._tmp_hole_depth, # self._tmp_hole_depth,
self.end_effector - self._goal, self.end_effector - self._goal,
self._steps self._steps
]) ]).astype(np.float32)
def _get_line_points(self, num_points_per_link=1): def _get_line_points(self, num_points_per_link=1):
theta = self._joint_angles[:, None] theta = self._joint_angles[:, None]

View File

@ -73,7 +73,7 @@ class SimpleReacherEnv(BaseReacherTorqueEnv):
self._angle_velocity, self._angle_velocity,
self.end_effector - self._goal, self.end_effector - self._goal,
self._steps self._steps
]) ]).astype(np.float32)
def _generate_goal(self): def _generate_goal(self):

View File

@ -110,7 +110,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv):
self.end_effector - self._via_point, self.end_effector - self._via_point,
self.end_effector - self._goal, self.end_effector - self._goal,
self._steps self._steps
]) ]).astype(np.float32)
def _check_collisions(self) -> bool: def _check_collisions(self) -> bool:
return self._check_self_collision() return self._check_self_collision()

View File

@ -2,3 +2,5 @@ from .reacher.alr_reacher import ALRReacherEnv
from .reacher.balancing import BalancingEnv from .reacher.balancing import BalancingEnv
from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
from .table_tennis.tt_gym import TT_Env_Gym
from .beerpong.beerpong import ALRBeerBongEnv

View File

@ -0,0 +1 @@
from .mp_wrapper import MPWrapper

View File

@ -0,0 +1,187 @@
<mujoco model="wam(v1.31)">
<compiler angle="radian" meshdir="../../meshes/wam/" />
<option timestep="0.005" integrator="Euler" />
<size njmax="500" nconmax="100" />
<default class="main">
<joint limited="true" frictionloss="0.001" damping="0.07"/>
<default class="viz">
<geom type="mesh" contype="0" conaffinity="0" group="1" rgba="0.7 0.7 0.7 1" />
</default>
<default class="col">
<geom type="mesh" contype="0" rgba="0.5 0.6 0.7 1" />
</default>
</default>
<asset>
<texture type="2d" name="groundplane" builtin="checker" mark="edge" rgb1="0.25 0.26 0.25" rgb2="0.22 0.22 0.22" markrgb="0.3 0.3 0.3" width="100" height="100" />
<material name="MatGnd" texture="groundplane" texrepeat="5 5" specular="1" shininess="0.3" reflectance="1e-05" />
<mesh name="base_link_fine" file="base_link_fine.stl" />
<mesh name="base_link_convex" file="base_link_convex.stl" />
<mesh name="shoulder_link_fine" file="shoulder_link_fine.stl" />
<mesh name="shoulder_link_convex_decomposition_p1" file="shoulder_link_convex_decomposition_p1.stl" />
<mesh name="shoulder_link_convex_decomposition_p2" file="shoulder_link_convex_decomposition_p2.stl" />
<mesh name="shoulder_link_convex_decomposition_p3" file="shoulder_link_convex_decomposition_p3.stl" />
<mesh name="shoulder_pitch_link_fine" file="shoulder_pitch_link_fine.stl" />
<mesh name="shoulder_pitch_link_convex" file="shoulder_pitch_link_convex.stl" />
<mesh name="upper_arm_link_fine" file="upper_arm_link_fine.stl" />
<mesh name="upper_arm_link_convex_decomposition_p1" file="upper_arm_link_convex_decomposition_p1.stl" />
<mesh name="upper_arm_link_convex_decomposition_p2" file="upper_arm_link_convex_decomposition_p2.stl" />
<mesh name="elbow_link_fine" file="elbow_link_fine.stl" />
<mesh name="elbow_link_convex" file="elbow_link_convex.stl" />
<mesh name="forearm_link_fine" file="forearm_link_fine.stl" />
<mesh name="forearm_link_convex_decomposition_p1" file="forearm_link_convex_decomposition_p1.stl" />
<mesh name="forearm_link_convex_decomposition_p2" file="forearm_link_convex_decomposition_p2.stl" />
<mesh name="wrist_yaw_link_fine" file="wrist_yaw_link_fine.stl" />
<mesh name="wrist_yaw_link_convex_decomposition_p1" file="wrist_yaw_link_convex_decomposition_p1.stl" />
<mesh name="wrist_yaw_link_convex_decomposition_p2" file="wrist_yaw_link_convex_decomposition_p2.stl" />
<mesh name="wrist_pitch_link_fine" file="wrist_pitch_link_fine.stl" />
<mesh name="wrist_pitch_link_convex_decomposition_p1" file="wrist_pitch_link_convex_decomposition_p1.stl" />
<mesh name="wrist_pitch_link_convex_decomposition_p2" file="wrist_pitch_link_convex_decomposition_p2.stl" />
<mesh name="wrist_pitch_link_convex_decomposition_p3" file="wrist_pitch_link_convex_decomposition_p3.stl" />
<mesh name="wrist_palm_link_fine" file="wrist_palm_link_fine.stl" />
<mesh name="wrist_palm_link_convex" file="wrist_palm_link_convex.stl" />
<mesh name="cup1" file="cup_split1.stl" scale="0.001 0.001 0.001" />
<mesh name="cup2" file="cup_split2.stl" scale="0.001 0.001 0.001" />
<mesh name="cup3" file="cup_split3.stl" scale="0.001 0.001 0.001" />
<mesh name="cup4" file="cup_split4.stl" scale="0.001 0.001 0.001" />
<mesh name="cup5" file="cup_split5.stl" scale="0.001 0.001 0.001" />
<mesh name="cup6" file="cup_split6.stl" scale="0.001 0.001 0.001" />
<mesh name="cup7" file="cup_split7.stl" scale="0.001 0.001 0.001" />
<mesh name="cup8" file="cup_split8.stl" scale="0.001 0.001 0.001" />
<mesh name="cup9" file="cup_split9.stl" scale="0.001 0.001 0.001" />
<mesh name="cup10" file="cup_split10.stl" scale="0.001 0.001 0.001" />
<mesh name="cup11" file="cup_split11.stl" scale="0.001 0.001 0.001" />
<mesh name="cup12" file="cup_split12.stl" scale="0.001 0.001 0.001" />
<mesh name="cup13" file="cup_split13.stl" scale="0.001 0.001 0.001" />
<mesh name="cup14" file="cup_split14.stl" scale="0.001 0.001 0.001" />
<mesh name="cup15" file="cup_split15.stl" scale="0.001 0.001 0.001" />
<mesh name="cup16" file="cup_split16.stl" scale="0.001 0.001 0.001" />
<mesh name="cup17" file="cup_split17.stl" scale="0.001 0.001 0.001" />
<mesh name="cup18" file="cup_split18.stl" scale="0.001 0.001 0.001" />
<mesh name="cup3_table" file="cup_split3.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup4_table" file="cup_split4.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup5_table" file="cup_split5.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup6_table" file="cup_split6.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup7_table" file="cup_split7.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup8_table" file="cup_split8.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup9_table" file="cup_split9.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup10_table" file="cup_split10.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup15_table" file="cup_split15.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup16_table" file="cup_split16.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup17_table" file="cup_split17.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup18_table" file="cup_split18.stl" scale="0.00211 0.00211 0.01" />
</asset>
<worldbody>
<geom name="ground" size="5 5 1" type="plane" material="MatGnd" />
<light pos="0.1 0.2 1.3" dir="-0.0758098 -0.32162 -0.985527" directional="true" cutoff="60" exponent="1" diffuse="1 1 1" specular="0.1 0.1 0.1" />
<body name="wam/base_link" pos="0 0 0.6">
<inertial pos="6.93764e-06 0.0542887 0.076438" quat="0.496481 0.503509 -0.503703 0.496255" mass="27.5544" diaginertia="0.432537 0.318732 0.219528" />
<geom class="viz" quat="0.707107 0 0 -0.707107" mesh="base_link_fine" />
<geom class="col" quat="0.707107 0 0 -0.707107" mesh="base_link_convex" name="base_link_convex_geom"/>
<body name="wam/shoulder_yaw_link" pos="0 0 0.16" quat="0.707107 0 0 -0.707107">
<inertial pos="-0.00443422 -0.00066489 -0.12189" quat="0.999995 0.000984795 0.00270132 0.00136071" mass="10.7677" diaginertia="0.507411 0.462983 0.113271" />
<joint name="wam/base_yaw_joint" pos="0 0 0" axis="0 0 1" range="-2.6 2.6" />
<geom class="viz" pos="0 0 0.186" mesh="shoulder_link_fine" />
<geom class="col" pos="0 0 0.186" mesh="shoulder_link_convex_decomposition_p1" name="shoulder_link_convex_decomposition_p1_geom"/>
<geom class="col" pos="0 0 0.186" mesh="shoulder_link_convex_decomposition_p2" name="shoulder_link_convex_decomposition_p2_geom"/>
<geom class="col" pos="0 0 0.186" mesh="shoulder_link_convex_decomposition_p3" name="shoulder_link_convex_decomposition_p3_geom"/>
<body name="wam/shoulder_pitch_link" pos="0 0 0.184" quat="0.707107 -0.707107 0 0">
<inertial pos="-0.00236983 -0.0154211 0.0310561" quat="0.961781 -0.272983 0.0167269 0.0133385" mass="3.87494" diaginertia="0.0214207 0.0167101 0.0126465" />
<joint name="wam/shoulder_pitch_joint" pos="0 0 0" axis="0 0 1" range="-1.985 1.985" />
<geom class="viz" mesh="shoulder_pitch_link_fine" />
<geom class="col" mesh="shoulder_pitch_link_convex" />
<body name="wam/upper_arm_link" pos="0 -0.505 0" quat="0.707107 0.707107 0 0">
<inertial pos="-0.0382586 3.309e-05 -0.207508" quat="0.705455 0.0381914 0.0383402 0.706686" mass="1.80228" diaginertia="0.0665697 0.0634285 0.00622701" />
<joint name="wam/shoulder_yaw_joint" pos="0 0 0" axis="0 0 1" range="-2.8 2.8" />
<geom class="viz" pos="0 0 -0.505" mesh="upper_arm_link_fine" />
<geom class="col" pos="0 0 -0.505" mesh="upper_arm_link_convex_decomposition_p1" name="upper_arm_link_convex_decomposition_p1_geom"/>
<geom class="col" pos="0 0 -0.505" mesh="upper_arm_link_convex_decomposition_p2" name="upper_arm_link_convex_decomposition_p2_geom"/>
<body name="wam/forearm_link" pos="0.045 0 0.045" quat="0.707107 -0.707107 0 0">
<inertial pos="0.00498512 -0.132717 -0.00022942" quat="0.546303 0.447151 -0.548676 0.447842" mass="2.40017" diaginertia="0.0196896 0.0152225 0.00749914" />
<joint name="wam/elbow_pitch_joint" pos="0 0 0" axis="0 0 1" range="-0.9 3.14159" />
<geom class="viz" mesh="elbow_link_fine" />
<geom class="col" mesh="elbow_link_convex" />
<geom class="viz" pos="-0.045 -0.073 0" quat="0.707388 0.706825 0 0" mesh="forearm_link_fine" />
<geom class="col" pos="-0.045 -0.073 0" quat="0.707388 0.706825 0 0" mesh="forearm_link_convex_decomposition_p1" name="forearm_link_convex_decomposition_p1_geom" />
<geom class="col" pos="-0.045 -0.073 0" quat="0.707388 0.706825 0 0" mesh="forearm_link_convex_decomposition_p2" name="forearm_link_convex_decomposition_p2_geom" />
<body name="wam/wrist_yaw_link" pos="-0.045 0 0" quat="0.707107 0.707107 0 0">
<inertial pos="8.921e-05 0.00435824 -0.00511217" quat="0.708528 -0.000120667 0.000107481 0.705683" mass="0.12376" diaginertia="0.0112011 0.0111887 7.58188e-05" />
<joint name="wam/wrist_yaw_joint" pos="0 0 0" axis="0 0 1" range="-4.55 1.25" />
<geom class="viz" pos="0 0 0.3" mesh="wrist_yaw_link_fine" />
<geom class="col" pos="0 0 0.3" mesh="wrist_yaw_link_convex_decomposition_p1" name="wrist_yaw_link_convex_decomposition_p1_geom" />
<geom class="col" pos="0 0 0.3" mesh="wrist_yaw_link_convex_decomposition_p2" name="wrist_yaw_link_convex_decomposition_p2_geom" />
<body name="wam/wrist_pitch_link" pos="0 0 0.3" quat="0.707107 -0.707107 0 0">
<inertial pos="-0.00012262 -0.0246834 -0.0170319" quat="0.994687 -0.102891 0.000824211 -0.00336105" mass="0.417974" diaginertia="0.000555166 0.000463174 0.00023407" />
<joint name="wam/wrist_pitch_joint" pos="0 0 0" axis="0 0 1" range="-1.5707 1.5707" />
<geom class="viz" mesh="wrist_pitch_link_fine" />
<geom class="col" mesh="wrist_pitch_link_convex_decomposition_p1" name="wrist_pitch_link_convex_decomposition_p1_geom" />
<geom class="col" mesh="wrist_pitch_link_convex_decomposition_p2" name="wrist_pitch_link_convex_decomposition_p2_geom" />
<geom class="col" mesh="wrist_pitch_link_convex_decomposition_p3" name="wrist_pitch_link_convex_decomposition_p3_geom" />
<body name="wam/wrist_palm_link" pos="0 -0.06 0" quat="0.707107 0.707107 0 0">
<inertial pos="-7.974e-05 -0.00323552 -0.00016313" quat="0.594752 0.382453 0.382453 0.594752" mass="0.0686475" diaginertia="7.408e-05 3.81466e-05 3.76434e-05" />
<joint name="wam/palm_yaw_joint" pos="0 0 0" axis="0 0 1" range="-2.7 2.7" />
<geom class="viz" pos="0 0 -0.06" mesh="wrist_palm_link_fine" />
<geom class="col" pos="0 0 -0.06" mesh="wrist_palm_link_convex" name="wrist_palm_link_convex_geom" />
<site name="init_ball_pos_site" size="0.025 0.025 0.025" pos="0.0 0.0 0.035" rgba="0 1 0 1"/>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
<body name="table_body" pos="0 -1.85 0.4025">
<geom name="table" type="box" size="0.4 0.6 0.4" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"
solref="-10000 -100"/>
<geom name="table_contact_geom" type="box" size="0.4 0.6 0.1" pos="0 0 0.31" rgba="1.4 0.8 0.45 1" solimp="0.999 0.999 0.001"
solref="-10000 -100"/>
</body>
<geom name="table_robot" type="box" size="0.1 0.1 0.3" pos="0 0.00 0.3025" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"
solref="-10000 -100"/>
<geom name="wall" type="box" quat="1 0 0 0" size="0.4 0.04 1.1" pos="0. -2.45 1.1" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"
solref="-10000 -100"/>
<body name="cup_table" pos="0.32 -1.55 0.84" quat="0.7071068 0.7071068 0 0">
<inertial pos="-3.75236e-10 8.27811e-05 0.0947015" quat="0.999945 -0.0104888 0 0" mass="10.132" diaginertia="0.000285643 0.000270485 9.65696e-05" />
<geom priority= "1" name="cup_geom_table3" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup3_table" mass="10"/>
<geom priority= "1" name="cup_geom_table4" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup4_table" mass="10"/>
<geom priority= "1" name="cup_geom_table5" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup5_table" mass="10"/>
<geom priority= "1" name="cup_geom_table6" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup6_table" mass="10"/>
<geom priority= "1" name="cup_geom_table7" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup7_table" mass="10"/>
<geom priority= "1" name="cup_geom_table8" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup8_table" mass="10"/>
<geom priority= "1" name="cup_geom_table9" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup9_table" mass="10"/>
<geom priority= "1" name="cup_geom_table10" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -010" type="mesh" mesh="cup10_table" mass="10"/>
<geom priority= "1" name="cup_base_table" pos="0 -0.035 0.1337249" euler="-1.57 0 0" type="cylinder" size="0.08 0.045" solref="-10000 -100" mass="10"/>
<geom priority= "1" name="cup_base_table_contact" pos="0 0.015 0.1337249" euler="-1.57 0 0" type="cylinder" size="0.07 0.01" solref="-10000 -100" rgba="0 0 255 1" mass="10"/>
<geom priority= "1" name="cup_geom_table15" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup15_table" mass="10"/>
<geom priority= "1" name="cup_geom_table16" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup16_table" mass="10"/>
<geom priority= "1" name="cup_geom_table17" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup17_table" mass="10"/>
<geom priority= "1" name="cup_geom1_table8" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup18_table" mass="10"/>
<site name="cup_goal_table" pos="0 0.11 0.1337249" rgba="255 0 0 1"/>
<site name="cup_goal_final_table" pos="0.0 0.025 0.1337249" rgba="0 255 0 1"/>
</body>
<body name="ball" pos="0 0 0">
<joint axis="1 0 0" damping="0.0" name="tar:x" pos="0 0 0" stiffness="0" type="slide" frictionloss="0" limited="false"/>
<joint axis="0 1 0" damping="0.0" name="tar:y" pos="0 0 0" stiffness="0" type="slide" frictionloss="0" limited="false"/>
<joint axis="0 0 1" damping="0.0" name="tar:z" pos="0 0 0" stiffness="0" type="slide" frictionloss="0" limited="false"/>
<geom priority= "1" size="0.025 0.025 0.025" type="sphere" condim="4" name="ball_geom" rgba="0.8 0.2 0.1 1" mass="0.1"
friction="0.1 0.1 0.1" solimp="0.9 0.95 0.001 0.5 2" solref="-10000 -10"/>
<site name="target_ball" pos="0 0 0" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"/>
</body>
<!-- <camera name="visualization" mode="targetbody" target="wam/wrist_yaw_link" pos="1.5 -0.4 2.2"/>-->
<!-- <camera name="experiment" mode="fixed" quat="0.44418059 0.41778323 0.54301123 0.57732103" pos="1.5 -0.3 1.33" />-->
<camera name="visualization" mode="fixed" euler="2.35 2.0 -0.75" pos="2.0 -0.0 1.85"/>
</worldbody>
<actuator>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="150.0" joint="wam/base_yaw_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="wam/shoulder_pitch_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="50.0" joint="wam/shoulder_yaw_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="60.0" joint="wam/elbow_pitch_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="5.0" joint="wam/wrist_yaw_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="5.0" joint="wam/wrist_pitch_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="2.0" joint="wam/palm_yaw_joint"/>
</actuator>
</mujoco>

View File

@ -1,3 +1,4 @@
import mujoco_py.builder
import os import os
import numpy as np import numpy as np
@ -5,44 +6,67 @@ from gym import utils
from gym.envs.mujoco import MujocoEnv from gym.envs.mujoco import MujocoEnv
class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
class ALRBeerpongEnv(MujocoEnv, utils.EzPickle): def __init__(self, frame_skip=1, apply_gravity_comp=True, reward_type: str = "staged", noisy=False,
def __init__(self, model_path, frame_skip, n_substeps=4, apply_gravity_comp=True, reward_function=None): context: np.ndarray = None, difficulty='simple'):
utils.EzPickle.__init__(**locals())
MujocoEnv.__init__(self, model_path=model_path, frame_skip=frame_skip)
self._steps = 0 self._steps = 0
self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
"beerpong" + ".xml") "beerpong_wo_cup" + ".xml")
self.start_pos = np.array([0.0, 1.35, 0.0, 1.18, 0.0, -0.786, -1.59])
self.start_vel = np.zeros(7)
self._q_pos = []
self._q_vel = []
# self.weight_matrix_scale = 50
self.max_ctrl = np.array([150., 125., 40., 60., 5., 5., 2.])
self.p_gains = 1 / self.max_ctrl * np.array([200, 300, 100, 100, 10, 10, 2.5])
self.d_gains = 1 / self.max_ctrl * np.array([7, 15, 5, 2.5, 0.3, 0.3, 0.05])
self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7]) self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7])
self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7]) self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
self.context = None self.context = context
# alr_mujoco_env.AlrMujocoEnv.__init__(self, self.apply_gravity_comp = apply_gravity_comp
# self.xml_path, self.add_noise = noisy
# apply_gravity_comp=apply_gravity_comp,
# n_substeps=n_substeps)
self.sim_time = 8 # seconds self._start_pos = np.array([0.0, 1.35, 0.0, 1.18, 0.0, -0.786, -1.59])
self.sim_steps = int(self.sim_time / self.dt) self._start_vel = np.zeros(7)
if reward_function is None:
from alr_envs.alr.mujoco.beerpong.beerpong_reward import BeerpongReward self.ball_site_id = 0
reward_function = BeerpongReward self.ball_id = 11
self.reward_function = reward_function(self.sim, self.sim_steps)
self.cup_robot_id = self.sim.model._site_name2id["cup_robot_final"] self._release_step = 100 # time step of ball release
self.ball_id = self.sim.model._body_name2id["ball"]
self.cup_table_id = self.sim.model._body_name2id["cup_table"] self.sim_time = 4 # seconds
self.ep_length = 600 # based on 5 seconds with dt = 0.005 int(self.sim_time / self.dt)
self.cup_table_id = 10
if noisy:
self.noise_std = 0.01
else:
self.noise_std = 0
if difficulty == 'simple':
self.cup_goal_pos = np.array([0, -1.7, 0.840])
elif difficulty == 'intermediate':
self.cup_goal_pos = np.array([0.3, -1.5, 0.840])
elif difficulty == 'hard':
self.cup_goal_pos = np.array([-0.3, -2.2, 0.840])
elif difficulty == 'hardest':
self.cup_goal_pos = np.array([-0.3, -1.2, 0.840])
if reward_type == "no_context":
from alr_envs.alr.mujoco.beerpong.beerpong_reward import BeerPongReward
reward_function = BeerPongReward
elif reward_type == "staged":
from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
reward_function = BeerPongReward
else:
raise ValueError("Unknown reward type: {}".format(reward_type))
self.reward_function = reward_function()
MujocoEnv.__init__(self, self.xml_path, frame_skip)
utils.EzPickle.__init__(self)
@property
def start_pos(self):
return self._start_pos
@property
def start_vel(self):
return self._start_vel
@property @property
def current_pos(self): def current_pos(self):
@ -52,9 +76,9 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
def current_vel(self): def current_vel(self):
return self.sim.data.qvel[0:7].copy() return self.sim.data.qvel[0:7].copy()
def configure(self, context): def reset(self):
self.context = context self.reward_function.reset(self.add_noise)
self.reward_function.reset(context) return super().reset()
def reset_model(self): def reset_model(self):
init_pos_all = self.init_qpos.copy() init_pos_all = self.init_qpos.copy()
@ -62,19 +86,14 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
init_vel = np.zeros_like(init_pos_all) init_vel = np.zeros_like(init_pos_all)
self._steps = 0 self._steps = 0
self._q_pos = []
self._q_vel = []
start_pos = init_pos_all start_pos = init_pos_all
start_pos[0:7] = init_pos_robot start_pos[0:7] = init_pos_robot
# start_pos[7:] = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05])
self.set_state(start_pos, init_vel) self.set_state(start_pos, init_vel)
self.sim.model.body_pos[self.cup_table_id] = self.cup_goal_pos
ball_pos = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05]) start_pos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
self.sim.model.body_pos[self.ball_id] = ball_pos.copy() self.set_state(start_pos, init_vel)
self.sim.model.body_pos[self.cup_table_id] = self.context.copy()
return self._get_obs() return self._get_obs()
def step(self, a): def step(self, a):
@ -82,32 +101,55 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
angular_vel = 0.0 angular_vel = 0.0
reward_ctrl = - np.square(a).sum() reward_ctrl = - np.square(a).sum()
crash = self.do_simulation(a) if self.apply_gravity_comp:
joint_cons_viol = self.check_traj_in_joint_limits() a = a + self.sim.data.qfrc_bias[:len(a)].copy() / self.model.actuator_gear[:, 0]
try:
self._q_pos.append(self.sim.data.qpos[0:7].ravel().copy()) self.do_simulation(a, self.frame_skip)
self._q_vel.append(self.sim.data.qvel[0:7].ravel().copy()) if self._steps < self._release_step:
self.sim.data.qpos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
self.sim.data.qvel[7::] = self.sim.data.site_xvelp[self.ball_site_id, :].copy()
elif self._steps == self._release_step and self.add_noise:
self.sim.data.qvel[7::] += self.noise_std * np.random.randn(3)
crash = False
except mujoco_py.builder.MujocoException:
crash = True
# joint_cons_viol = self.check_traj_in_joint_limits()
ob = self._get_obs() ob = self._get_obs()
if not crash and not joint_cons_viol: if not crash:
reward, success, stop_sim = self.reward_function.compute_reward(a, self.sim, self._steps) reward, reward_infos = self.reward_function.compute_reward(self, a)
done = success or self._steps == self.sim_steps - 1 or stop_sim success = reward_infos['success']
is_collided = reward_infos['is_collided']
ball_pos = reward_infos['ball_pos']
ball_vel = reward_infos['ball_vel']
done = is_collided or self._steps == self.ep_length - 1
self._steps += 1 self._steps += 1
else: else:
reward = -1000 reward = -30
success = False success = False
is_collided = False
done = True done = True
ball_pos = np.zeros(3)
ball_vel = np.zeros(3)
return ob, reward, done, dict(reward_dist=reward_dist, return ob, reward, done, dict(reward_dist=reward_dist,
reward_ctrl=reward_ctrl, reward_ctrl=reward_ctrl,
reward=reward,
velocity=angular_vel, velocity=angular_vel,
traj=self._q_pos, is_success=success, # traj=self._q_pos,
is_collided=crash or joint_cons_viol) action=a,
q_pos=self.sim.data.qpos[0:7].ravel().copy(),
q_vel=self.sim.data.qvel[0:7].ravel().copy(),
ball_pos=ball_pos,
ball_vel=ball_vel,
is_success=success,
is_collided=is_collided, sim_crash=crash)
def check_traj_in_joint_limits(self): def check_traj_in_joint_limits(self):
return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min) return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)
# TODO # TODO: extend observation space
def _get_obs(self): def _get_obs(self):
theta = self.sim.data.qpos.flat[:7] theta = self.sim.data.qpos.flat[:7]
return np.concatenate([ return np.concatenate([
@ -118,28 +160,26 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
]) ])
# TODO # TODO
@property
def active_obs(self): def active_obs(self):
pass return np.hstack([
[False] * 7, # cos
[False] * 7, # sin
# [True] * 2, # x-y coordinates of target distance
[False] # env steps
])
if __name__ == "__main__": if __name__ == "__main__":
env = ALRBeerpongEnv() env = ALRBeerBongEnv(reward_type="no_context", difficulty='hardest')
ctxt = np.array([0, -2, 0.840]) # initial
env.configure(ctxt) # env.configure(ctxt)
env.reset() env.reset()
env.render() env.render("human")
for i in range(16000): for i in range(800):
# test with random actions ac = 10 * env.action_space.sample()[0:7]
ac = 0.0 * env.action_space.sample()[0:7]
ac[1] = -0.8
ac[3] = - 0.3
ac[5] = -0.2
# ac = env.start_pos
# ac[0] += np.pi/2
obs, rew, d, info = env.step(ac) obs, rew, d, info = env.step(ac)
env.render() env.render("human")
print(rew) print(rew)
@ -147,4 +187,3 @@ if __name__ == "__main__":
break break
env.close() env.close()

View File

@ -1,123 +1,168 @@
import numpy as np import numpy as np
from alr_envs.alr.mujoco import alr_reward_fct
class BeerpongReward(alr_reward_fct.AlrReward): class BeerPongReward:
def __init__(self, sim, sim_time): def __init__(self):
self.sim = sim self.robot_collision_objects = ["wrist_palm_link_convex_geom",
self.sim_time = sim_time
self.collision_objects = ["cup_geom1", "cup_geom2", "wrist_palm_link_convex_geom",
"wrist_pitch_link_convex_decomposition_p1_geom", "wrist_pitch_link_convex_decomposition_p1_geom",
"wrist_pitch_link_convex_decomposition_p2_geom", "wrist_pitch_link_convex_decomposition_p2_geom",
"wrist_pitch_link_convex_decomposition_p3_geom", "wrist_pitch_link_convex_decomposition_p3_geom",
"wrist_yaw_link_convex_decomposition_p1_geom", "wrist_yaw_link_convex_decomposition_p1_geom",
"wrist_yaw_link_convex_decomposition_p2_geom", "wrist_yaw_link_convex_decomposition_p2_geom",
"forearm_link_convex_decomposition_p1_geom", "forearm_link_convex_decomposition_p1_geom",
"forearm_link_convex_decomposition_p2_geom"] "forearm_link_convex_decomposition_p2_geom",
"upper_arm_link_convex_decomposition_p1_geom",
"upper_arm_link_convex_decomposition_p2_geom",
"shoulder_link_convex_decomposition_p1_geom",
"shoulder_link_convex_decomposition_p2_geom",
"shoulder_link_convex_decomposition_p3_geom",
"base_link_convex_geom", "table_contact_geom"]
self.cup_collision_objects = ["cup_geom_table3", "cup_geom_table4", "cup_geom_table5", "cup_geom_table6",
"cup_geom_table7", "cup_geom_table8", "cup_geom_table9", "cup_geom_table10",
# "cup_base_table", "cup_base_table_contact",
"cup_geom_table15",
"cup_geom_table16",
"cup_geom_table17", "cup_geom1_table8",
# "cup_base_table_contact",
# "cup_base_table"
]
self.ball_id = None
self.ball_collision_id = None
self.goal_id = None
self.goal_final_id = None
self.collision_ids = None
self.ball_traj = None self.ball_traj = None
self.dists = None self.dists = None
self.dists_ctxt = None
self.dists_final = None self.dists_final = None
self.costs = None self.costs = None
self.action_costs = None
self.angle_rewards = None
self.cup_angles = None
self.cup_z_axes = None
self.collision_penalty = 500
self.reset(None) self.reset(None)
def reset(self, context): def reset(self, context):
self.ball_traj = np.zeros(shape=(self.sim_time, 3)) self.ball_traj = []
self.dists = [] self.dists = []
self.dists_ctxt = []
self.dists_final = [] self.dists_final = []
self.costs = [] self.costs = []
self.context = context self.action_costs = []
self.ball_in_cup = False self.angle_rewards = []
self.dist_ctxt = 5 self.cup_angles = []
self.cup_z_axes = []
self.ball_ground_contact = False
self.ball_table_contact = False
self.ball_wall_contact = False
self.ball_cup_contact = False
self.ball_id = self.sim.model._body_name2id["ball"] def compute_reward(self, env, action):
self.ball_collision_id = self.sim.model._geom_name2id["ball_geom"] self.ball_id = env.sim.model._body_name2id["ball"]
self.cup_robot_id = self.sim.model._site_name2id["cup_robot_final"] self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"]
self.goal_id = self.sim.model._site_name2id["cup_goal_table"] self.goal_id = env.sim.model._site_name2id["cup_goal_table"]
self.goal_final_id = self.sim.model._site_name2id["cup_goal_final_table"] self.goal_final_id = env.sim.model._site_name2id["cup_goal_final_table"]
self.collision_ids = [self.sim.model._geom_name2id[name] for name in self.collision_objects] self.cup_collision_ids = [env.sim.model._geom_name2id[name] for name in self.cup_collision_objects]
self.cup_table_id = self.sim.model._body_name2id["cup_table"] self.cup_table_id = env.sim.model._body_name2id["cup_table"]
self.table_collision_id = env.sim.model._geom_name2id["table_contact_geom"]
self.wall_collision_id = env.sim.model._geom_name2id["wall"]
self.cup_table_collision_id = env.sim.model._geom_name2id["cup_base_table_contact"]
self.init_ball_pos_site_id = env.sim.model._site_name2id["init_ball_pos_site"]
self.ground_collision_id = env.sim.model._geom_name2id["ground"]
self.robot_collision_ids = [env.sim.model._geom_name2id[name] for name in self.robot_collision_objects]
def compute_reward(self, action, sim, step): goal_pos = env.sim.data.site_xpos[self.goal_id]
action_cost = np.sum(np.square(action)) ball_pos = env.sim.data.body_xpos[self.ball_id]
goal_final_pos = env.sim.data.site_xpos[self.goal_final_id]
stop_sim = False
success = False
if self.check_collision(sim):
reward = - 1e-4 * action_cost - 1000
stop_sim = True
return reward, success, stop_sim
# Compute the current distance from the ball to the inner part of the cup
goal_pos = sim.data.site_xpos[self.goal_id]
ball_pos = sim.data.body_xpos[self.ball_id]
goal_final_pos = sim.data.site_xpos[self.goal_final_id]
self.dists.append(np.linalg.norm(goal_pos - ball_pos)) self.dists.append(np.linalg.norm(goal_pos - ball_pos))
self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos)) self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
self.dists_ctxt.append(np.linalg.norm(ball_pos - self.context))
self.ball_traj[step, :] = ball_pos
# Determine the first time when ball is in cup action_cost = np.sum(np.square(action))
if not self.ball_in_cup: self.action_costs.append(action_cost)
ball_in_cup = self.check_ball_in_cup(sim, self.ball_collision_id)
self.ball_in_cup = ball_in_cup ball_table_bounce = self._check_collision_single_objects(env.sim, self.ball_collision_id,
if ball_in_cup: self.table_collision_id)
dist_to_ctxt = np.linalg.norm(ball_pos - self.context)
self.dist_ctxt = dist_to_ctxt if ball_table_bounce: # or ball_cup_table_cont or ball_wall_con
self.ball_table_contact = True
ball_cup_cont = self._check_collision_with_set_of_objects(env.sim, self.ball_collision_id,
self.cup_collision_ids)
if ball_cup_cont:
self.ball_cup_contact = True
ball_wall_cont = self._check_collision_single_objects(env.sim, self.ball_collision_id, self.wall_collision_id)
if ball_wall_cont and not self.ball_table_contact:
self.ball_wall_contact = True
ball_ground_contact = self._check_collision_single_objects(env.sim, self.ball_collision_id,
self.ground_collision_id)
if ball_ground_contact and not self.ball_table_contact:
self.ball_ground_contact = True
self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
if env._steps == env.ep_length - 1 or self._is_collided:
if step == self.sim_time - 1:
min_dist = np.min(self.dists) min_dist = np.min(self.dists)
dist_final = self.dists_final[-1]
# dist_ctxt = self.dists_ctxt[-1]
# cost = self._get_stage_wise_cost(ball_in_cup, min_dist, dist_final, dist_ctxt) ball_in_cup = self._check_collision_single_objects(env.sim, self.ball_collision_id, self.cup_table_collision_id)
cost = 2 * (0.5 * min_dist + 0.5 * dist_final + 0.1 * self.dist_ctxt)
reward = np.exp(-1 * cost) - 1e-4 * action_cost cost_offset = 0
success = dist_final < 0.05 and self.dist_ctxt < 0.05
if self.ball_ground_contact: # or self.ball_wall_contact:
cost_offset += 2
if not self.ball_table_contact:
cost_offset += 2
if not ball_in_cup:
cost_offset += 2
cost = cost_offset + min_dist ** 2 + 0.5 * self.dists_final[-1] ** 2 + 1e-4 * action_cost # + min_dist ** 2
else:
if self.ball_cup_contact:
cost_offset += 1
cost = cost_offset + self.dists_final[-1] ** 2 + 1e-4 * action_cost
reward = - 1*cost - self.collision_penalty * int(self._is_collided)
success = ball_in_cup and not self.ball_ground_contact and not self.ball_wall_contact and not self.ball_cup_contact
else: else:
reward = - 1e-4 * action_cost reward = - 1e-4 * action_cost
success = False success = False
return reward, success, stop_sim infos = {}
infos["success"] = success
infos["is_collided"] = self._is_collided
infos["ball_pos"] = ball_pos.copy()
infos["action_cost"] = 5e-4 * action_cost
def _get_stage_wise_cost(self, ball_in_cup, min_dist, dist_final, dist_to_ctxt): return reward, infos
if not ball_in_cup:
cost = 3 + 2*(0.5 * min_dist**2 + 0.5 * dist_final**2) def _check_collision_single_objects(self, sim, id_1, id_2):
for coni in range(0, sim.data.ncon):
con = sim.data.contact[coni]
collision = con.geom1 == id_1 and con.geom2 == id_2
collision_trans = con.geom1 == id_2 and con.geom2 == id_1
if collision or collision_trans:
return True
return False
def _check_collision_with_itself(self, sim, collision_ids):
col_1, col_2 = False, False
for j, id in enumerate(collision_ids):
col_1 = self._check_collision_with_set_of_objects(sim, id, collision_ids[:j])
if j != len(collision_ids) - 1:
col_2 = self._check_collision_with_set_of_objects(sim, id, collision_ids[j + 1:])
else: else:
cost = 2 * dist_to_ctxt ** 2 col_2 = False
print('Context Distance:', dist_to_ctxt) collision = True if col_1 or col_2 else False
return cost return collision
def check_ball_in_cup(self, sim, ball_collision_id): def _check_collision_with_set_of_objects(self, sim, id_1, id_list):
cup_base_collision_id = sim.model._geom_name2id["cup_base_contact"]
for coni in range(0, sim.data.ncon): for coni in range(0, sim.data.ncon):
con = sim.data.contact[coni] con = sim.data.contact[coni]
collision = con.geom1 == cup_base_collision_id and con.geom2 == ball_collision_id collision = con.geom1 in id_list and con.geom2 == id_1
collision_trans = con.geom1 == ball_collision_id and con.geom2 == cup_base_collision_id collision_trans = con.geom1 == id_1 and con.geom2 in id_list
if collision or collision_trans:
return True
return False
def check_collision(self, sim):
for coni in range(0, sim.data.ncon):
con = sim.data.contact[coni]
collision = con.geom1 in self.collision_ids and con.geom2 == self.ball_collision_id
collision_trans = con.geom1 == self.ball_collision_id and con.geom2 in self.collision_ids
if collision or collision_trans: if collision or collision_trans:
return True return True

View File

@ -0,0 +1,169 @@
import numpy as np
class BeerPongReward:
def __init__(self):
self.robot_collision_objects = ["wrist_palm_link_convex_geom",
"wrist_pitch_link_convex_decomposition_p1_geom",
"wrist_pitch_link_convex_decomposition_p2_geom",
"wrist_pitch_link_convex_decomposition_p3_geom",
"wrist_yaw_link_convex_decomposition_p1_geom",
"wrist_yaw_link_convex_decomposition_p2_geom",
"forearm_link_convex_decomposition_p1_geom",
"forearm_link_convex_decomposition_p2_geom",
"upper_arm_link_convex_decomposition_p1_geom",
"upper_arm_link_convex_decomposition_p2_geom",
"shoulder_link_convex_decomposition_p1_geom",
"shoulder_link_convex_decomposition_p2_geom",
"shoulder_link_convex_decomposition_p3_geom",
"base_link_convex_geom", "table_contact_geom"]
self.cup_collision_objects = ["cup_geom_table3", "cup_geom_table4", "cup_geom_table5", "cup_geom_table6",
"cup_geom_table7", "cup_geom_table8", "cup_geom_table9", "cup_geom_table10",
# "cup_base_table", "cup_base_table_contact",
"cup_geom_table15",
"cup_geom_table16",
"cup_geom_table17", "cup_geom1_table8",
# "cup_base_table_contact",
# "cup_base_table"
]
self.ball_traj = None
self.dists = None
self.dists_final = None
self.costs = None
self.action_costs = None
self.angle_rewards = None
self.cup_angles = None
self.cup_z_axes = None
self.collision_penalty = 500
self.reset(None)
def reset(self, noisy):
self.ball_traj = []
self.dists = []
self.dists_final = []
self.costs = []
self.action_costs = []
self.angle_rewards = []
self.cup_angles = []
self.cup_z_axes = []
self.ball_ground_contact = False
self.ball_table_contact = False
self.ball_wall_contact = False
self.ball_cup_contact = False
self.noisy_bp = noisy
self._t_min_final_dist = -1
def compute_reward(self, env, action):
self.ball_id = env.sim.model._body_name2id["ball"]
self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"]
self.goal_id = env.sim.model._site_name2id["cup_goal_table"]
self.goal_final_id = env.sim.model._site_name2id["cup_goal_final_table"]
self.cup_collision_ids = [env.sim.model._geom_name2id[name] for name in self.cup_collision_objects]
self.cup_table_id = env.sim.model._body_name2id["cup_table"]
self.table_collision_id = env.sim.model._geom_name2id["table_contact_geom"]
self.wall_collision_id = env.sim.model._geom_name2id["wall"]
self.cup_table_collision_id = env.sim.model._geom_name2id["cup_base_table_contact"]
self.init_ball_pos_site_id = env.sim.model._site_name2id["init_ball_pos_site"]
self.ground_collision_id = env.sim.model._geom_name2id["ground"]
self.robot_collision_ids = [env.sim.model._geom_name2id[name] for name in self.robot_collision_objects]
goal_pos = env.sim.data.site_xpos[self.goal_id]
ball_pos = env.sim.data.body_xpos[self.ball_id]
ball_vel = env.sim.data.body_xvelp[self.ball_id]
goal_final_pos = env.sim.data.site_xpos[self.goal_final_id]
self.dists.append(np.linalg.norm(goal_pos - ball_pos))
self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
action_cost = np.sum(np.square(action))
self.action_costs.append(action_cost)
self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
if env._steps == env.ep_length - 1 or self._is_collided:
min_dist = np.min(self.dists)
ball_table_bounce = self._check_collision_single_objects(env.sim, self.ball_collision_id,
self.table_collision_id)
ball_cup_table_cont = self._check_collision_with_set_of_objects(env.sim, self.ball_collision_id,
self.cup_collision_ids)
ball_wall_cont = self._check_collision_single_objects(env.sim, self.ball_collision_id,
self.wall_collision_id)
ball_in_cup = self._check_collision_single_objects(env.sim, self.ball_collision_id,
self.cup_table_collision_id)
if not ball_in_cup:
cost_offset = 2
if not ball_cup_table_cont and not ball_table_bounce and not ball_wall_cont:
cost_offset += 2
cost = cost_offset + min_dist ** 2 + 0.5 * self.dists_final[-1] ** 2 + 1e-7 * action_cost
else:
cost = self.dists_final[-1] ** 2 + 1.5 * action_cost * 1e-7
reward = - 1 * cost - self.collision_penalty * int(self._is_collided)
success = ball_in_cup
crash = self._is_collided
else:
reward = - 1e-7 * action_cost
cost = 0
success = False
crash = False
infos = {}
infos["success"] = success
infos["is_collided"] = self._is_collided
infos["ball_pos"] = ball_pos.copy()
infos["ball_vel"] = ball_vel.copy()
infos["action_cost"] = 5e-4 * action_cost
infos["task_cost"] = cost
return reward, infos
def get_cost_offset(self):
if self.ball_ground_contact:
return 200
if not self.ball_table_contact:
return 100
if not self.ball_in_cup:
return 50
if self.ball_in_cup and self.ball_cup_contact and not self.noisy_bp:
return 10
return 0
def _check_collision_single_objects(self, sim, id_1, id_2):
for coni in range(0, sim.data.ncon):
con = sim.data.contact[coni]
collision = con.geom1 == id_1 and con.geom2 == id_2
collision_trans = con.geom1 == id_2 and con.geom2 == id_1
if collision or collision_trans:
return True
return False
def _check_collision_with_itself(self, sim, collision_ids):
col_1, col_2 = False, False
for j, id in enumerate(collision_ids):
col_1 = self._check_collision_with_set_of_objects(sim, id, collision_ids[:j])
if j != len(collision_ids) - 1:
col_2 = self._check_collision_with_set_of_objects(sim, id, collision_ids[j + 1:])
else:
col_2 = False
collision = True if col_1 or col_2 else False
return collision
def _check_collision_with_set_of_objects(self, sim, id_1, id_list):
for coni in range(0, sim.data.ncon):
con = sim.data.contact[coni]
collision = con.geom1 in id_list and con.geom2 == id_1
collision_trans = con.geom1 == id_1 and con.geom2 in id_list
if collision or collision_trans:
return True
return False

View File

@ -0,0 +1,39 @@
from typing import Tuple, Union
import numpy as np
from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
class MPWrapper(MPEnvWrapper):
@property
def active_obs(self):
# TODO: @Max Filter observations correctly
return np.hstack([
[False] * 7, # cos
[False] * 7, # sin
# [True] * 2, # x-y coordinates of target distance
[False] # env steps
])
@property
def start_pos(self):
return self._start_pos
@property
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
return self.sim.data.qpos[0:7].copy()
@property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.sim.data.qvel[0:7].copy()
@property
def goal_pos(self):
# TODO: @Max I think the default value of returning to the start is reasonable here
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
@property
def dt(self) -> Union[float, int]:
return self.env.dt

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1 @@
from .mp_wrapper import MPWrapper

View File

@ -0,0 +1,38 @@
from typing import Tuple, Union
import numpy as np
from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
class MPWrapper(MPEnvWrapper):
@property
def active_obs(self):
# TODO: @Max Filter observations correctly
return np.hstack([
[True] * 7, # Joint Pos
[True] * 3, # Ball pos
[True] * 3 # goal pos
])
@property
def start_pos(self):
return self.self.init_qpos_tt
@property
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
return self.sim.data.qpos[:7].copy()
@property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.sim.data.qvel[:7].copy()
@property
def goal_pos(self):
# TODO: @Max I think the default value of returning to the start is reasonable here
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
@property
def dt(self) -> Union[float, int]:
return self.env.dt

View File

@ -0,0 +1,168 @@
import os
import numpy as np
import mujoco_py
from gym import utils, spaces
from gym.envs.mujoco import MujocoEnv
from alr_envs.alr.mujoco.table_tennis.tt_utils import ball_init
from alr_envs.alr.mujoco.table_tennis.tt_reward import TT_Reward
#TODO: Check for simulation stability. Make sure the code runs even for sim crash
MAX_EPISODE_STEPS = 1375
BALL_NAME_CONTACT = "target_ball_contact"
BALL_NAME = "target_ball"
TABLE_NAME = 'table_tennis_table'
FLOOR_NAME = 'floor'
PADDLE_CONTACT_1_NAME = 'bat'
PADDLE_CONTACT_2_NAME = 'bat_back'
RACKET_NAME = 'bat'
# CONTEXT_RANGE_BOUNDS_2DIM = np.array([[-1.2, -0.6], [-0.2, 0.6]])
CONTEXT_RANGE_BOUNDS_2DIM = np.array([[-1.2, -0.6], [-0.2, 0.0]])
CONTEXT_RANGE_BOUNDS_4DIM = np.array([[-1.35, -0.75, -1.25, -0.75], [-0.1, 0.75, -0.1, 0.75]])
class TT_Env_Gym(MujocoEnv, utils.EzPickle):
def __init__(self, ctxt_dim=2):
model_path = os.path.join(os.path.dirname(__file__), "xml", 'table_tennis_env.xml')
self.ctxt_dim = ctxt_dim
if ctxt_dim == 2:
self.context_range_bounds = CONTEXT_RANGE_BOUNDS_2DIM
self.goal = np.zeros(3) # 2 x,y + 1z
elif ctxt_dim == 4:
self.context_range_bounds = CONTEXT_RANGE_BOUNDS_4DIM
self.goal = np.zeros(3)
else:
raise ValueError("either 2 or 4 dimensional Contexts available")
action_space_low = np.array([-2.6, -2.0, -2.8, -0.9, -4.8, -1.6, -2.2])
action_space_high = np.array([2.6, 2.0, 2.8, 3.1, 1.3, 1.6, 2.2])
self.action_space = spaces.Box(low=action_space_low, high=action_space_high, dtype='float64')
self.time_steps = 0
self.init_qpos_tt = np.array([0, 0, 0, 1.5, 0, 0, 1.5, 0, 0, 0])
self.init_qvel_tt = np.zeros(10)
self.reward_func = TT_Reward(self.ctxt_dim)
self.ball_landing_pos = None
self.hited_ball = False
self.ball_contact_after_hit = False
self._ids_set = False
super(TT_Env_Gym, self).__init__(model_path=model_path, frame_skip=1)
self.ball_id = self.sim.model._body_name2id[BALL_NAME] # find the proper -> not protected func.
self.ball_contact_id = self.sim.model._geom_name2id[BALL_NAME_CONTACT]
self.table_contact_id = self.sim.model._geom_name2id[TABLE_NAME]
self.floor_contact_id = self.sim.model._geom_name2id[FLOOR_NAME]
self.paddle_contact_id_1 = self.sim.model._geom_name2id[PADDLE_CONTACT_1_NAME] # check if we need both or only this
self.paddle_contact_id_2 = self.sim.model._geom_name2id[PADDLE_CONTACT_2_NAME] # check if we need both or only this
self.racket_id = self.sim.model._geom_name2id[RACKET_NAME]
def _set_ids(self):
self.ball_id = self.sim.model._body_name2id[BALL_NAME] # find the proper -> not protected func.
self.table_contact_id = self.sim.model._geom_name2id[TABLE_NAME]
self.floor_contact_id = self.sim.model._geom_name2id[FLOOR_NAME]
self.paddle_contact_id_1 = self.sim.model._geom_name2id[PADDLE_CONTACT_1_NAME] # check if we need both or only this
self.paddle_contact_id_2 = self.sim.model._geom_name2id[PADDLE_CONTACT_2_NAME] # check if we need both or only this
self.racket_id = self.sim.model._geom_name2id[RACKET_NAME]
self.ball_contact_id = self.sim.model._geom_name2id[BALL_NAME_CONTACT]
self._ids_set = True
def _get_obs(self):
ball_pos = self.sim.data.body_xpos[self.ball_id]
obs = np.concatenate([self.sim.data.qpos[:7].copy(), # 7 joint positions
ball_pos,
self.goal.copy()])
return obs
def sample_context(self):
return np.random.uniform(self.context_range_bounds[0], self.context_range_bounds[1], size=self.ctxt_dim)
def reset_model(self):
self.set_state(self.init_qpos_tt, self.init_qvel_tt) # reset to initial sim state
self.time_steps = 0
self.ball_landing_pos = None
self.hited_ball = False
self.ball_contact_after_hit = False
self.goal = self.sample_context()[:2]
if self.ctxt_dim == 2:
initial_ball_state = ball_init(random=False) # fixed velocity, fixed position
elif self.ctxt_dim == 4:
initial_ball_state = ball_init(random=False)#raise NotImplementedError
self.sim.data.set_joint_qpos('tar:x', initial_ball_state[0])
self.sim.data.set_joint_qpos('tar:y', initial_ball_state[1])
self.sim.data.set_joint_qpos('tar:z', initial_ball_state[2])
self.sim.data.set_joint_qvel('tar:x', initial_ball_state[3])
self.sim.data.set_joint_qvel('tar:y', initial_ball_state[4])
self.sim.data.set_joint_qvel('tar:z', initial_ball_state[5])
z_extended_goal_pos = np.concatenate((self.goal[:2], [0.77]))
self.goal = z_extended_goal_pos
self.sim.model.body_pos[5] = self.goal[:3] # Desired Landing Position, Yellow
self.sim.model.body_pos[3] = np.array([0, 0, 0.5]) # Outgoing Ball Landing Position, Green
self.sim.model.body_pos[4] = np.array([0, 0, 0.5]) # Incoming Ball Landing Position, Red
self.sim.forward()
self.reward_func.reset(self.goal) # reset the reward function
return self._get_obs()
def _contact_checker(self, id_1, id_2):
for coni in range(0, self.sim.data.ncon):
con = self.sim.data.contact[coni]
collision = con.geom1 == id_1 and con.geom2 == id_2
collision_trans = con.geom1 == id_2 and con.geom2 == id_1
if collision or collision_trans:
return True
return False
def step(self, action):
if not self._ids_set:
self._set_ids()
done = False
episode_end = False if self.time_steps+1<MAX_EPISODE_STEPS else True
if not self.hited_ball:
self.hited_ball = self._contact_checker(self.ball_contact_id, self.paddle_contact_id_1) # check for one side
if not self.hited_ball:
self.hited_ball = self._contact_checker(self.ball_contact_id, self.paddle_contact_id_2) # check for other side
if self.hited_ball:
if not self.ball_contact_after_hit:
if self._contact_checker(self.ball_contact_id, self.floor_contact_id): # first check contact with floor
self.ball_contact_after_hit = True
self.ball_landing_pos = self.sim.data.body_xpos[self.ball_id]
elif self._contact_checker(self.ball_contact_id, self.table_contact_id): # second check contact with table
self.ball_contact_after_hit = True
self.ball_landing_pos = self.sim.data.body_xpos[self.ball_id]
c_ball_pos = self.sim.data.body_xpos[self.ball_id]
racket_pos = self.sim.data.geom_xpos[self.racket_id] # TODO: use this to reach out the position of the paddle?
if self.ball_landing_pos is not None:
done = True
episode_end =True
reward = self.reward_func.get_reward(episode_end, c_ball_pos, racket_pos, self.hited_ball, self.ball_landing_pos)
self.time_steps += 1
# gravity compensation on joints:
#action += self.sim.data.qfrc_bias[:7].copy()
try:
self.do_simulation(action, self.frame_skip)
except mujoco_py.MujocoException as e:
print('Simulation got unstable returning')
done = True
reward = -25
ob = self._get_obs()
return ob, reward, done, {"hit_ball":self.hited_ball}# might add some information here ....
def set_context(self, context):
old_state = self.sim.get_state()
qpos = old_state.qpos.copy()
qvel = old_state.qvel.copy()
self.set_state(qpos, qvel)
self.goal = context
z_extended_goal_pos = np.concatenate((self.goal[:self.ctxt_dim], [0.77]))
if self.ctxt_dim == 4:
z_extended_goal_pos = np.concatenate((z_extended_goal_pos, [0.77]))
self.goal = z_extended_goal_pos
self.sim.model.body_pos[5] = self.goal[:3] # TODO: Missing: Setting the desired incomoing landing position
self.sim.forward()
return self._get_obs()

View File

@ -0,0 +1,48 @@
import numpy as np
class TT_Reward:
def __init__(self, ctxt_dim):
self.ctxt_dim = ctxt_dim
self.c_goal = None # current desired landing point
self.c_ball_traj = []
self.c_racket_traj = []
self.constant = 8
def get_reward(self, episode_end, ball_position, racket_pos, hited_ball, ball_landing_pos):
self.c_ball_traj.append(ball_position.copy())
self.c_racket_traj.append(racket_pos.copy())
if not episode_end:
return 0
else:
# # seems to work for episodic case
min_r_b_dist = np.min(np.linalg.norm(np.array(self.c_ball_traj) - np.array(self.c_racket_traj), axis=1))
if not hited_ball:
return 0.2 * (1- np.tanh(min_r_b_dist**2))
else:
if ball_landing_pos is None:
min_b_des_b_dist = np.min(np.linalg.norm(np.array(self.c_ball_traj)[:,:2] - self.c_goal[:2], axis=1))
return 2 * (1 - np.tanh(min_r_b_dist ** 2)) + (1 - np.tanh(min_b_des_b_dist**2))
else:
min_b_des_b_land_dist = np.linalg.norm(self.c_goal[:2] - ball_landing_pos[:2])
over_net_bonus = int(ball_landing_pos[0] < 0)
return 2 * (1 - np.tanh(min_r_b_dist ** 2)) + 4 * (1 - np.tanh(min_b_des_b_land_dist ** 2)) + over_net_bonus
# if not hited_ball:
# min_r_b_dist = 1 + np.min(np.linalg.norm(np.array(self.c_ball_traj) - np.array(self.c_racket_traj), axis=1))
# return -min_r_b_dist
# else:
# if ball_landing_pos is None:
# dist_to_des_pos = 1-np.power(np.linalg.norm(self.c_goal - ball_position), 0.75)/self.constant
# else:
# dist_to_des_pos = 1-np.power(np.linalg.norm(self.c_goal - ball_landing_pos), 0.75)/self.constant
# if dist_to_des_pos < -0.2:
# dist_to_des_pos = -0.2
# return -dist_to_des_pos
def reset(self, goal):
self.c_goal = goal.copy()
self.c_ball_traj = []
self.c_racket_traj = []

View File

@ -0,0 +1,26 @@
import numpy as np
def ball_init(random=False, context_range=None):
if random:
dx = np.random.uniform(-0.1, 0.1) # TODO: clarify these numbers?
dy = np.random.uniform(-0.1, 0.1) # TODO: clarify these numbers?
dz = np.random.uniform(-0.1, 0.1) # TODO: clarify these numbers?
v_x = np.random.uniform(1.7, 1.8)
v_y = np.random.uniform(0.7, 0.8)
v_z = np.random.uniform(0.1, 0.2)
else:
dx = 1
dy = 0
dz = 0.05
v_x = 2.5
v_y = 2
v_z = 0.5
initial_x = 0 + dx - 1.2
initial_y = -0.2 + dy - 0.6
initial_z = 0.3 + dz + 1.5
initial_ball_state = np.array([initial_x, initial_y, initial_z, v_x, v_y, v_z])
return initial_ball_state

View File

@ -0,0 +1,12 @@
<mujocoinclude>
<actuator>
<motor name="wam/base_motor" joint="wam/base_yaw_joint_right" ctrllimited="true" ctrlrange="-1.0 1.0" gear="150.0"/>
<motor name="wam/shoulder_pitch_motor" joint='wam/shoulder_pitch_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="125.0"/>
<motor name="wam/shoulder_yaw_motor" joint='wam/shoulder_yaw_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="40.0"/>
<motor name="wam/elbow_pitch_motor" joint='wam/elbow_pitch_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="60.0"/>
<motor name="wam/wrist_yaw_motor" joint='wam/wrist_yaw_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="5.0"/>
<motor name="wam/wrist_pitch_motor" joint='wam/wrist_pitch_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="5.0"/>
<motor name="wam/palm_yaw_motor" joint='wam/palm_yaw_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="2.0"/>
</actuator>
</mujocoinclude>

View File

@ -0,0 +1,103 @@
<mujocoinclue>
<body name="wam/base_link_right" pos="2.1 0 2.0" quat="0 0 1 0" childclass="wam" >
<inertial pos="0 0 0" mass="1" diaginertia="0.1 0.1 0.1"/>
<geom name="base_link_fine" class="viz" mesh="base_link_fine" rgba="0.5 0.5 0.5 0"/>
<geom name="base_link_convex" class="col" mesh="base_link_convex" rgba="0.5 0.5 0.5 1"/>
<body name="wam/shoulder_yaw_link_right" pos="0 0 0.346">
<inertial pos="-0.00443422 -0.00066489 -0.128904" quat="0.69566 0.716713 -0.0354863 0.0334839" mass="5"
diaginertia="0.135089 0.113095 0.0904426"/>
<!-- control 0: 1.6-->
<joint name="wam/base_yaw_joint_right" range="-2.6 2.6" damping="1.98"/>
<geom name="shoulder_link_fine" class="viz" mesh="shoulder_link_fine" rgba="1 1 1 0"/>
<geom name="shoulder_link_convex_decomposition_p1" class="col"
mesh="shoulder_link_convex_decomposition_p1"/>
<geom name="shoulder_link_convex_decomposition_p2" class="col"
mesh="shoulder_link_convex_decomposition_p2"/>
<geom name="shoulder_link_convex_decomposition_p3" class="col"
mesh="shoulder_link_convex_decomposition_p3"/>
<body name="wam/shoulder_pitch_link_right" pos="0 0 0" quat="0.707107 -0.707107 0 0">
<inertial pos="-0.00236981 -0.0154211 0.0310561" quat="0.961794 0.273112 -0.0169316 0.00866592"
mass="3.87494" diaginertia="0.0214195 0.0167127 0.0126452"/> <!--seems off-->
<!-- control 1: 0-->
<joint name="wam/shoulder_pitch_joint_right" range="-2 2" damping="0.55"/>
<geom name="shoulder_pitch_link_fine" class="viz" mesh="shoulder_pitch_link_fine" rgba="1 1 1 0"/>
<geom name="shoulder_pitch_link_convex" class="col" mesh="shoulder_pitch_link_convex"/>
<body name="wam/upper_arm_link_right" pos="0 0 0" quat="0.707107 0.707107 0 0">
<inertial pos="0.00683259 3.309e-005 0.392492" quat="0.647136 0.0170822 0.0143038 0.762049"
mass="2.20228" diaginertia="0.0592718 0.0592207 0.00313419"/>
<!-- control 2: 0-->
<joint name="wam/shoulder_yaw_joint_right" range="-2.8 2.8" damping="1.65"/>
<geom name="upper_arm_link_fine" class="viz" mesh="upper_arm_link_fine" rgba="1 1 1 0"/>
<geom name="upper_arm_link_convex_decomposition_p1" class="col"
mesh="upper_arm_link_convex_decomposition_p1" rgba="0.094 0.48 0.804 1"/>
<geom name="upper_arm_link_convex_decomposition_p2" class="col"
mesh="upper_arm_link_convex_decomposition_p2" rgba="0.094 0.48 0.804 1"/>
<body name="wam/forearm_link_right" pos="0.045 0 0.55" quat="0.707107 -0.707107 0 0">
<inertial pos="-0.0400149 -0.142717 -0.00022942"
quat="0.704281 0.706326 0.0180333 0.0690353" mass="0.500168"
diaginertia="0.0151047 0.0148285 0.00275805"/>
<!-- control 3: 2.4-->
<joint name="wam/elbow_pitch_joint_right" range="-0.9 3.1" damping="0.88"/>
<geom name="elbow_link_fine" class="viz" mesh="elbow_link_fine" rgba="1 1 1 0"/>
<geom name="elbow_link_convex" class="col" mesh="elbow_link_convex"/>
<geom name="forearm_link_fine" class="viz" mesh="forearm_link_fine" pos="-.045 -0.0730 0"
euler="1.57 0 0" rgba="1 1 1 0"/>
<geom name="forearm_link_convex_decomposition_p1" class="col"
mesh="forearm_link_convex_decomposition_p1" pos="-0.045 -0.0730 0"
euler="1.57 0 0" rgba="0.094 0.48 0.804 1"/>
<geom name="forearm_link_convex_decomposition_p2" class="col"
mesh="forearm_link_convex_decomposition_p2" pos="-.045 -0.0730 0"
euler="1.57 0 0" rgba="0.094 0.48 0.804 1"/>
<body name="wam/wrist_yaw_link_right" pos="-0.045 -0.3 0" quat="0.707107 0.707107 0 0">
<inertial pos="8.921e-005 0.00435824 -0.00511217"
quat="0.630602 0.776093 0.00401969 -0.002372" mass="1.05376"
diaginertia="0.000555168 0.00046317 0.000234072"/> <!--this is an approximation-->
<!-- control 4: 0-->
<joint name="wam/wrist_yaw_joint_right" range="-4.8 1.3" damping="0.55"/>
<geom name="wrist_yaw_link_fine" class="viz" mesh="wrist_yaw_link_fine" rgba="1 1 1 0"/>
<geom name="wrist_yaw_link_convex_decomposition_p1" class="col"
mesh="wrist_yaw_link_convex_decomposition_p1"/>
<geom name="wrist_yaw_link_convex_decomposition_p2" class="col"
mesh="wrist_yaw_link_convex_decomposition_p2"/>
<body name="wam/wrist_pitch_link_right" pos="0 0 0" quat="0.707107 -0.707107 0 0">
<inertial pos="-0.00012262 -0.0246834 -0.0170319"
quat="0.630602 0.776093 0.00401969 -0.002372" mass="0.517974"
diaginertia="0.000555168 0.00046317 0.000234072"/>
<!-- control 5: 0-->
<joint name="wam/wrist_pitch_joint_right" range="-1.6 1.6" damping="0.11"/>
<geom name="wrist_pitch_link_fine" class="viz" mesh="wrist_pitch_link_fine"
rgba="1 1 1 0"/>
<geom name="wrist_pitch_link_convex_decomposition_p1" rgba="1 0.5 0.313 1"
class="col" mesh="wrist_pitch_link_convex_decomposition_p1"/>
<geom name="wrist_pitch_link_convex_decomposition_p2" rgba="1 0.5 0.313 1"
class="col" mesh="wrist_pitch_link_convex_decomposition_p2"/>
<geom name="wrist_pitch_link_convex_decomposition_p3" rgba="1 0.5 0.313 1"
class="col" mesh="wrist_pitch_link_convex_decomposition_p3"/>
<body name="wam/wrist_palm_link_right" pos="0 0 0" quat="0.707107 0.707107 0 0">
<inertial pos="0 0 0.055" quat="0.707107 0 0 0.707107" mass="0.0828613"
diaginertia="0.00020683 0.00010859 0.00010851"/>
<!-- control 6: 1.8-->
<joint name="wam/palm_yaw_joint_right" range="-2.2 2.2" damping="0.11"/>
<geom name="wrist_palm_link_fine" class="viz" mesh="wrist_palm_link_fine"
rgba="1 1 1 0"/>
<geom name="wrist_palm_link_convex" class="col" mesh="wrist_palm_link_convex"/>
<!-- EE=wam/paddle, configure name to the end effector name-->
<body name="EE" pos="0 0 0.26" childclass="contact_geom">
<geom name="bat" type="cylinder" size="0.075 0.005" rgba="1 0 0 1"
quat="0.71 0 0.71 0"/>
<geom name="bat_back" type="cylinder" size="0.0749 0.0025" rgba="0 1 0 1"
quat="0.71 0 0.71 0" pos="-0.0026 0 0"/>
<geom name="wam/paddle_handle" type="box" size="0.005 0.01 0.05" pos="0 0 -0.08"
rgba="1 1 1 1"/>
<!-- Extract information for sampling goals.-->
<site name="wam/paddle_center" pos="0 0 0" rgba="1 1 1 1" size="0.00001"/>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</mujocoinclue>

View File

@ -0,0 +1,30 @@
<mujocoinclude>
<body name="table_tennis_table" pos="0 0 0">
<geom class="viz" name="table_base_1" pos="1 0.7 0.375" rgba="1 1 1 1" size="0.05 0.05 .375" type="box" />
<geom class="viz" name="table_base_2" pos="1 -0.7 0.375" rgba="1 1 1 1" size="0.05 0.05 .375" type="box" />
<geom class="viz" name="table_base_3" pos="-1 -0.7 0.375" rgba="1 1 1 1" size="0.05 0.05 .375" type="box" />
<geom class="viz" name="table_base_4" pos="-1 0.7 0.375" rgba="1 1 1 1" size="0.05 0.05 .375" type="box" />
<body name="table_top" pos="0 0 0.76">
<geom class="contact_geom" name="table_tennis_table" pos="0 0 0" rgba="0 0 0.5 1" size="1.37 .7625 .01" type="box" />
<site name="left_up_corner" pos="-1.37 .7625 0.01" rgba="1 1 1 1" size="0.00001" />
<site name="middle_up_corner" pos="0 .7625 0.01" rgba="1 1 1 1" size="0.00001" />
<site name="left_down_corner" pos="-1.37 -0.7625 0.01" rgba="1 1 1 1" size="0.00001" />
<site name="middle_down_corner" pos="0 -.7625 0.01" rgba="1 1 1 1" size="0.00001" />
<geom class="contact_geom" material="floor_plane" name="table_te_context_spacennis_net" pos="0 0 0.08625" rgba="0 0 1 0.5" size="0.01 0.915 0.07625" type="box" />
<geom class="viz" name="left_while_line" pos="0 -0.7425 0.01" rgba="1 1 1 1" size="1.37 .02 .001" type="box" />
<geom class="viz" name="center_while_line" pos="0 0 0.01" rgba="1 1 1 1" size="1.37 .01 .001" type="box" />
<geom class="viz" name="right_while_line" pos="0 0.7425 0.01" rgba="1 1 1 1" size="1.37 .02 .001" type="box" />
<geom class="viz" name="right_side_line" pos="1.35 0 0.01" rgba="1 1 1 1" size="0.02 .7625 .001" type="box" />
<geom class="viz" name="left_side_line" pos="-1.35 0 0.01" rgba="1 1 1 1" size="0.02 .7625 .001" type="box" />
</body>
<body name="achieved_pos" pos="0 0 0.5">
<geom class="viz" name="achieved_point_geom" pos="0 0 0" rgba="0 1 0 1" size="0.02 0.001" type="cylinder" />
</body>
<body name="right_achieved_pos" pos="0 0 0.5">
<geom class="viz" name="hitting_achieved_point_geom" pos="0 0 0" rgba="1 0 0 1" size="0.02 0.001" type="cylinder" />
</body>
<body name="target_point" pos="0 0 0.5">
<geom class="viz" name="target_point_geom" pos="0 0 0" rgba="1 1 0 1" size="0.02 0.001" type="cylinder" />
</body>
</body>
</mujocoinclude>

View File

@ -0,0 +1,10 @@
<mujocoinclude>
<body name="target_ball" pos="0 0 0">
<joint axis="1 0 0" damping="0.0" name="tar:x" pos="0 0 0" stiffness="0" type="slide" frictionloss="0"/>
<joint axis="0 1 0" damping="0.0" name="tar:y" pos="0 0 0" stiffness="0" type="slide" frictionloss="0"/>
<joint axis="0 0 1" damping="0.0" name="tar:z" pos="0 0 0" stiffness="0" type="slide" frictionloss="0"/>
<geom size="0.025 0.025 0.025" type="sphere" condim="4" name="target_ball_contact" rgba="1 1 0 1" mass="0.1"
friction="0.1 0.1 0.1" solimp="0.9 0.95 0.001 0.5 2" solref="0.1 0.03" priority="1"/>
<site name="target_ball" pos="0 0 0" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"/>
</body>
</mujocoinclude>

View File

@ -0,0 +1,47 @@
<mujocoinclude>
<actuator>
<!-- <position ctrlrange="-2.6 2.6" joint="wam/base_yaw_joint_right" kp="100.0" />-->
<!-- <position ctrlrange="-1.985 1.985" joint="wam/shoulder_pitch_joint_right" kp="162.0" />-->
<!-- <position ctrlrange="-2.8 2.8" joint="wam/shoulder_yaw_joint_right" kp="100.0" />-->
<!-- <position ctrlrange="-0.9 3.14159" joint="wam/elbow_pitch_joint_right" kp="122.0" />-->
<!-- <position ctrlrange="-4.55 1.25" joint="wam/wrist_yaw_joint_right" kp="100.0" />-->
<!-- <position ctrlrange="-1.5707 1.5707" joint="wam/wrist_pitch_joint_right" kp="102.0" />-->
<!-- <position ctrlrange="-3 3" joint="wam/palm_yaw_joint_right" kp="100.0" />-->
<!-- <position ctrlrange="-2.6 2.6" joint="wam/base_yaw_joint_right" kp="151.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-1.985 1.985" joint="wam/shoulder_pitch_joint_right" kp="125.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-2.8 2.8" joint="wam/shoulder_yaw_joint_right" kp="122.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-0.9 3.14159" joint="wam/elbow_pitch_joint_right" kp="121.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-4.55 1.25" joint="wam/wrist_yaw_joint_right" kp="99.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-1.5707 1.5707" joint="wam/wrist_pitch_joint_right" kp="103.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-3 3" joint="wam/palm_yaw_joint_right" kp="99.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-2.6 2.6" joint="wam/base_yaw_joint_right" kp="100.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-1.985 1.985" joint="wam/shoulder_pitch_joint_right" kp="600.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-2.8 2.8" joint="wam/shoulder_yaw_joint_right" kp="122.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-0.9 3.14159" joint="wam/elbow_pitch_joint_right" kp="500.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-4.55 1.25" joint="wam/wrist_yaw_joint_right" kp="99.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-1.5707 1.5707" joint="wam/wrist_pitch_joint_right" kp="103.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-3 3" joint="wam/palm_yaw_joint_right" kp="99.0" ctrllimited="true"/>-->
<position ctrlrange="-2.6 2.6" joint="wam/base_yaw_joint_right" kp="800.0" ctrllimited="true"/>
<position ctrlrange="-1.985 1.985" joint="wam/shoulder_pitch_joint_right" kp="800.0" ctrllimited="true"/>
<position ctrlrange="-2.8 2.8" joint="wam/shoulder_yaw_joint_right" kp="800.0" ctrllimited="true"/>
<position ctrlrange="-0.9 3.14159" joint="wam/elbow_pitch_joint_right" kp="800.0" ctrllimited="true"/>
<position ctrlrange="-4.55 1.25" joint="wam/wrist_yaw_joint_right" kp="100.0" ctrllimited="true"/>
<position ctrlrange="-1.5707 1.5707" joint="wam/wrist_pitch_joint_right" kp="1000.0" ctrllimited="true"/>
<position ctrlrange="-3 3" joint="wam/palm_yaw_joint_right" kp="100.0" ctrllimited="true"/>
<!-- <position ctrlrange="-2.6 2.6" joint="wam/base_yaw_joint_right" kp="1600.0" ctrllimited="true"/>-->
<!--&lt;!&ndash; <velocity ctrlrange="-50 50" joint="wam/base_yaw_joint_right" kv="100" ctrllimited="true"/>&ndash;&gt;-->
<!-- <position ctrlrange="-1.985 1.985" joint="wam/shoulder_pitch_joint_right" kp="2000.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-2.8 2.8" joint="wam/shoulder_yaw_joint_right" kp="800.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-0.9 3.14159" joint="wam/elbow_pitch_joint_right" kp="1200.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-4.55 1.25" joint="wam/wrist_yaw_joint_right" kp="100.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-1.5707 1.5707" joint="wam/wrist_pitch_joint_right" kp="2000.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-3 3" joint="wam/palm_yaw_joint_right" kp="100.0" ctrllimited="true"/>-->
</actuator>
</mujocoinclude>

View File

@ -0,0 +1,46 @@
<mujocoinclude>
<default>
<default class="wam">
<joint type="hinge" limited="true" pos="0 0 0" axis="0 0 1"/>
</default>
<default class="viz">
<geom type="mesh" contype="0" conaffinity="0" group="1" rgba="1 1 1 1"/>
</default>
<default class="col">
<geom type="mesh" contype="0" conaffinity="1" group="0" rgba="1 1 1 1"/>
</default>
<default class="contact_geom">
<geom condim="4" friction="0.1 0.1 0.1" margin="0" solimp="1 1 0" solref="0.1 0.03"/>
</default>
</default>
<asset>
<mesh file="base_link_fine.stl"/>
<mesh file="base_link_convex.stl"/>
<mesh file="shoulder_link_fine.stl"/>
<mesh file="shoulder_link_convex_decomposition_p1.stl"/>
<mesh file="shoulder_link_convex_decomposition_p2.stl"/>
<mesh file="shoulder_link_convex_decomposition_p3.stl"/>
<mesh file="shoulder_pitch_link_fine.stl"/>
<mesh file="shoulder_pitch_link_convex.stl"/>
<mesh file="upper_arm_link_fine.stl"/>
<mesh file="upper_arm_link_convex_decomposition_p1.stl"/>
<mesh file="upper_arm_link_convex_decomposition_p2.stl"/>
<mesh file="elbow_link_fine.stl"/>
<mesh file="elbow_link_convex.stl"/>
<mesh file="forearm_link_fine.stl"/>
<mesh file="forearm_link_convex_decomposition_p1.stl"/>
<mesh file="forearm_link_convex_decomposition_p2.stl"/>
<mesh file="wrist_yaw_link_fine.stl"/>
<mesh file="wrist_yaw_link_convex_decomposition_p1.stl"/>
<mesh file="wrist_yaw_link_convex_decomposition_p2.stl"/>
<mesh file="wrist_pitch_link_fine.stl"/>
<mesh file="wrist_pitch_link_convex_decomposition_p1.stl"/>
<mesh file="wrist_pitch_link_convex_decomposition_p2.stl"/>
<mesh file="wrist_pitch_link_convex_decomposition_p3.stl"/>
<mesh file="wrist_palm_link_fine.stl"/>
<mesh file="wrist_palm_link_convex.stl"/>
<texture builtin="checker" height="512" name="texplane" rgb1=".2 .3 .4" rgb2=".1 0.15 0.2" type="2d"
width="512"/>
<material name="floor_plane" reflectance="0.5" texrepeat="1 1" texture="texplane" texuniform="true"/>
</asset>
</mujocoinclude>

View File

@ -0,0 +1,18 @@
<mujoco model="table_tennis(v0.1)">
<compiler angle="radian" coordinate="local" meshdir="../../meshes/wam" />
<option gravity="0 0 -9.81" timestep="0.002">
<flag warmstart="enable" />
</option>
<custom>
<numeric data="0 0 0 0 0 0 0" name="START_ANGLES" />
</custom>
<include file="shared.xml" />
<worldbody>
<light cutoff="60" diffuse="1 1 1" dir="-.1 -.2 -1.3" directional="true" exponent="1" pos=".1 .2 1.3" specular=".1 .1 .1" />
<geom conaffinity="1" contype="1" material="floor_plane" name="floor" pos="0 0 0" size="10 5 1" type="plane" />
<include file="include_table.xml" />
<include file="include_barrett_wam_7dof_right.xml" />
<include file="include_target_ball.xml" />
</worldbody>
<include file="include_7_motor_actuator.xml" />
</mujoco>

View File

@ -15,10 +15,10 @@ def _spec_to_box(spec):
assert s.dtype == np.float64 or s.dtype == np.float32, f"Only float64 and float32 types are allowed, instead {s.dtype} was found" assert s.dtype == np.float64 or s.dtype == np.float32, f"Only float64 and float32 types are allowed, instead {s.dtype} was found"
dim = int(np.prod(s.shape)) dim = int(np.prod(s.shape))
if type(s) == specs.Array: if type(s) == specs.Array:
bound = np.inf * np.ones(dim, dtype=np.float32) bound = np.inf * np.ones(dim, dtype=s.dtype)
return -bound, bound return -bound, bound
elif type(s) == specs.BoundedArray: elif type(s) == specs.BoundedArray:
zeros = np.zeros(dim, dtype=np.float32) zeros = np.zeros(dim, dtype=s.dtype)
return s.minimum + zeros, s.maximum + zeros return s.minimum + zeros, s.maximum + zeros
mins, maxs = [], [] mins, maxs = [], []
@ -29,7 +29,7 @@ def _spec_to_box(spec):
low = np.concatenate(mins, axis=0) low = np.concatenate(mins, axis=0)
high = np.concatenate(maxs, axis=0) high = np.concatenate(maxs, axis=0)
assert low.shape == high.shape assert low.shape == high.shape
return spaces.Box(low, high, dtype=np.float32) return spaces.Box(low, high, dtype=s.dtype)
def _flatten_obs(obs: collections.MutableMapping): def _flatten_obs(obs: collections.MutableMapping):
@ -113,7 +113,7 @@ class DMCWrapper(core.Env):
if self._channels_first: if self._channels_first:
obs = obs.transpose(2, 0, 1).copy() obs = obs.transpose(2, 0, 1).copy()
else: else:
obs = _flatten_obs(time_step.observation) obs = _flatten_obs(time_step.observation).astype(self.observation_space.dtype)
return obs return obs
@property @property