Merge branch 'master' into deprecate_detpmp
# Conflicts: # README.md # alr_envs/alr/__init__.py
This commit is contained in:
commit
083e937e17
17
README.md
17
README.md
@ -1,26 +1,25 @@
|
||||
## ALR Robotics Control Environments
|
||||
|
||||
This project offers a large verity of reinforcement learning environments under a unifying interface base on OpenAI gym.
|
||||
Besides, some custom environments we also provide support for the benchmark suites
|
||||
[OpenAI gym](https://gym.openai.com/),
|
||||
This project offers a large variety of reinforcement learning environments under the unifying interface of [OpenAI gym](https://gym.openai.com/).
|
||||
Besides, we also provide support (under the OpenAI interface) for the benchmark suites
|
||||
[DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control)
|
||||
(DMC), and [Metaworld](https://meta-world.github.io/). Custom (Mujoco) gym environment can be created according
|
||||
(DMC) and [Metaworld](https://meta-world.github.io/). Custom (Mujoco) gym environments can be created according
|
||||
to [this guide](https://github.com/openai/gym/blob/master/docs/creating-environments.md). Unlike existing libraries, we
|
||||
further support to control agents with Dynamic Movement Primitives (DMPs) and Probabilistic Movement Primitives (ProMP,
|
||||
additionally support to control agents with Dynamic Movement Primitives (DMPs) and Probabilistic Movement Primitives (ProMP,
|
||||
we only consider the mean usually).
|
||||
|
||||
## Motion Primitive Environments (Episodic environments)
|
||||
|
||||
Unlike step-based environments, motion primitive (MP) environments are closer related to stochastic search, black box
|
||||
optimization and methods that often used in robotics. MP environments are trajectory-based and always execute a full
|
||||
optimization, and methods that are often used in robotics. MP environments are trajectory-based and always execute a full
|
||||
trajectory, which is generated by a Dynamic Motion Primitive (DMP) or a Probabilistic Motion Primitive (ProMP). The
|
||||
generated trajectory is translated into individual step-wise actions by a controller. The exact choice of controller is,
|
||||
however, dependent on the type of environment. We currently support position, velocity, and PD-Controllers for position,
|
||||
velocity and torque control, respectively. The goal of all MP environments is still to learn a policy. Yet, an action
|
||||
velocity, and torque control, respectively. The goal of all MP environments is still to learn a policy. Yet, an action
|
||||
represents the parametrization of the motion primitives to generate a suitable trajectory. Additionally, in this
|
||||
framework we support the above setting for the contextual setting, for which we expose all changing substates of the
|
||||
framework we support all of this also for the contextual setting, for which we expose all changing substates of the
|
||||
task as a single observation in the beginning. This requires to predict a new action/MP parametrization for each
|
||||
trajectory. All environments provide the next to the cumulative episode reward also all collected information from each
|
||||
trajectory. All environments provide next to the cumulative episode reward all collected information from each
|
||||
step as part of the info dictionary. This information should, however, mainly be used for debugging and logging.
|
||||
|
||||
|Key| Description|
|
||||
|
@ -1,3 +1,4 @@
|
||||
import numpy as np
|
||||
from gym import register
|
||||
|
||||
from . import classic_control, mujoco
|
||||
@ -9,6 +10,8 @@ from .mujoco.ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
|
||||
from .mujoco.reacher.alr_reacher import ALRReacherEnv
|
||||
from .mujoco.reacher.balancing import BalancingEnv
|
||||
|
||||
from alr_envs.alr.mujoco.table_tennis.tt_gym import MAX_EPISODE_STEPS
|
||||
|
||||
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
|
||||
|
||||
# Classic Control
|
||||
@ -193,6 +196,31 @@ register(
|
||||
}
|
||||
)
|
||||
|
||||
## Table Tennis
|
||||
register(id='TableTennis2DCtxt-v0',
|
||||
entry_point='alr_envs.alr.mujoco:TT_Env_Gym',
|
||||
max_episode_steps=MAX_EPISODE_STEPS,
|
||||
kwargs={'ctxt_dim':2})
|
||||
|
||||
register(id='TableTennis4DCtxt-v0',
|
||||
entry_point='alr_envs.alr.mujoco:TT_Env_Gym',
|
||||
max_episode_steps=MAX_EPISODE_STEPS,
|
||||
kwargs={'ctxt_dim':4})
|
||||
|
||||
## BeerPong
|
||||
difficulties = ["simple", "intermediate", "hard", "hardest"]
|
||||
|
||||
for v, difficulty in enumerate(difficulties):
|
||||
register(
|
||||
id='ALRBeerPong-v{}'.format(v),
|
||||
entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv',
|
||||
max_episode_steps=600,
|
||||
kwargs={
|
||||
"difficulty": difficulty,
|
||||
"reward_type": "staged",
|
||||
}
|
||||
)
|
||||
|
||||
# Motion Primitive Environments
|
||||
|
||||
## Simple Reacher
|
||||
@ -330,3 +358,59 @@ for _v in _versions:
|
||||
}
|
||||
)
|
||||
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
|
||||
## Beerpong
|
||||
register(
|
||||
id='BeerpongProMP-v0',
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
||||
kwargs={
|
||||
"name": "alr_envs:ALRBeerPong-v0",
|
||||
"wrappers": [mujoco.beerpong.MPWrapper],
|
||||
"mp_kwargs": {
|
||||
"num_dof": 7,
|
||||
"num_basis": 2,
|
||||
"n_zero_bases": 2,
|
||||
"duration": 0.5,
|
||||
"post_traj_time": 2.5,
|
||||
# "width": 0.01,
|
||||
# "off": 0.01,
|
||||
"policy_type": "motor",
|
||||
"weights_scale": 0.08,
|
||||
"zero_start": True,
|
||||
"zero_goal": False,
|
||||
"policy_kwargs": {
|
||||
"p_gains": np.array([ 1.5, 5, 2.55, 3, 2., 2, 1.25]),
|
||||
"d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125])
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("BeerpongProMP-v0")
|
||||
|
||||
## Table Tennis
|
||||
register(
|
||||
id='TableTennisProMP-v0',
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
||||
kwargs={
|
||||
"name": "alr_envs:TableTennis4DCtxt-v0",
|
||||
"wrappers": [mujoco.table_tennis.MPWrapper],
|
||||
"mp_kwargs": {
|
||||
"num_dof": 7,
|
||||
"num_basis": 2,
|
||||
"n_zero_bases": 2,
|
||||
"duration": 1.25,
|
||||
"post_traj_time": 4.5,
|
||||
# "width": 0.01,
|
||||
# "off": 0.01,
|
||||
"policy_type": "motor",
|
||||
"weights_scale": 1.0,
|
||||
"zero_start": True,
|
||||
"zero_goal": False,
|
||||
"policy_kwargs": {
|
||||
"p_gains": 0.5*np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]),
|
||||
"d_gains": 0.5*np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1])
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("TableTennisProMP-v0")
|
||||
|
@ -106,7 +106,7 @@ class HoleReacherEnv(BaseReacherDirectEnv):
|
||||
# self._tmp_hole_depth,
|
||||
self.end_effector - self._goal,
|
||||
self._steps
|
||||
])
|
||||
]).astype(np.float32)
|
||||
|
||||
def _get_line_points(self, num_points_per_link=1):
|
||||
theta = self._joint_angles[:, None]
|
||||
|
@ -73,7 +73,7 @@ class SimpleReacherEnv(BaseReacherTorqueEnv):
|
||||
self._angle_velocity,
|
||||
self.end_effector - self._goal,
|
||||
self._steps
|
||||
])
|
||||
]).astype(np.float32)
|
||||
|
||||
def _generate_goal(self):
|
||||
|
||||
|
@ -110,7 +110,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv):
|
||||
self.end_effector - self._via_point,
|
||||
self.end_effector - self._goal,
|
||||
self._steps
|
||||
])
|
||||
]).astype(np.float32)
|
||||
|
||||
def _check_collisions(self) -> bool:
|
||||
return self._check_self_collision()
|
||||
|
@ -2,3 +2,5 @@ from .reacher.alr_reacher import ALRReacherEnv
|
||||
from .reacher.balancing import BalancingEnv
|
||||
from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
|
||||
from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
|
||||
from .table_tennis.tt_gym import TT_Env_Gym
|
||||
from .beerpong.beerpong import ALRBeerBongEnv
|
@ -0,0 +1 @@
|
||||
from .mp_wrapper import MPWrapper
|
187
alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml
Normal file
187
alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml
Normal file
@ -0,0 +1,187 @@
|
||||
<mujoco model="wam(v1.31)">
|
||||
<compiler angle="radian" meshdir="../../meshes/wam/" />
|
||||
<option timestep="0.005" integrator="Euler" />
|
||||
<size njmax="500" nconmax="100" />
|
||||
<default class="main">
|
||||
<joint limited="true" frictionloss="0.001" damping="0.07"/>
|
||||
<default class="viz">
|
||||
<geom type="mesh" contype="0" conaffinity="0" group="1" rgba="0.7 0.7 0.7 1" />
|
||||
</default>
|
||||
<default class="col">
|
||||
<geom type="mesh" contype="0" rgba="0.5 0.6 0.7 1" />
|
||||
</default>
|
||||
</default>
|
||||
<asset>
|
||||
<texture type="2d" name="groundplane" builtin="checker" mark="edge" rgb1="0.25 0.26 0.25" rgb2="0.22 0.22 0.22" markrgb="0.3 0.3 0.3" width="100" height="100" />
|
||||
<material name="MatGnd" texture="groundplane" texrepeat="5 5" specular="1" shininess="0.3" reflectance="1e-05" />
|
||||
<mesh name="base_link_fine" file="base_link_fine.stl" />
|
||||
<mesh name="base_link_convex" file="base_link_convex.stl" />
|
||||
<mesh name="shoulder_link_fine" file="shoulder_link_fine.stl" />
|
||||
<mesh name="shoulder_link_convex_decomposition_p1" file="shoulder_link_convex_decomposition_p1.stl" />
|
||||
<mesh name="shoulder_link_convex_decomposition_p2" file="shoulder_link_convex_decomposition_p2.stl" />
|
||||
<mesh name="shoulder_link_convex_decomposition_p3" file="shoulder_link_convex_decomposition_p3.stl" />
|
||||
<mesh name="shoulder_pitch_link_fine" file="shoulder_pitch_link_fine.stl" />
|
||||
<mesh name="shoulder_pitch_link_convex" file="shoulder_pitch_link_convex.stl" />
|
||||
<mesh name="upper_arm_link_fine" file="upper_arm_link_fine.stl" />
|
||||
<mesh name="upper_arm_link_convex_decomposition_p1" file="upper_arm_link_convex_decomposition_p1.stl" />
|
||||
<mesh name="upper_arm_link_convex_decomposition_p2" file="upper_arm_link_convex_decomposition_p2.stl" />
|
||||
<mesh name="elbow_link_fine" file="elbow_link_fine.stl" />
|
||||
<mesh name="elbow_link_convex" file="elbow_link_convex.stl" />
|
||||
<mesh name="forearm_link_fine" file="forearm_link_fine.stl" />
|
||||
<mesh name="forearm_link_convex_decomposition_p1" file="forearm_link_convex_decomposition_p1.stl" />
|
||||
<mesh name="forearm_link_convex_decomposition_p2" file="forearm_link_convex_decomposition_p2.stl" />
|
||||
<mesh name="wrist_yaw_link_fine" file="wrist_yaw_link_fine.stl" />
|
||||
<mesh name="wrist_yaw_link_convex_decomposition_p1" file="wrist_yaw_link_convex_decomposition_p1.stl" />
|
||||
<mesh name="wrist_yaw_link_convex_decomposition_p2" file="wrist_yaw_link_convex_decomposition_p2.stl" />
|
||||
<mesh name="wrist_pitch_link_fine" file="wrist_pitch_link_fine.stl" />
|
||||
<mesh name="wrist_pitch_link_convex_decomposition_p1" file="wrist_pitch_link_convex_decomposition_p1.stl" />
|
||||
<mesh name="wrist_pitch_link_convex_decomposition_p2" file="wrist_pitch_link_convex_decomposition_p2.stl" />
|
||||
<mesh name="wrist_pitch_link_convex_decomposition_p3" file="wrist_pitch_link_convex_decomposition_p3.stl" />
|
||||
<mesh name="wrist_palm_link_fine" file="wrist_palm_link_fine.stl" />
|
||||
<mesh name="wrist_palm_link_convex" file="wrist_palm_link_convex.stl" />
|
||||
<mesh name="cup1" file="cup_split1.stl" scale="0.001 0.001 0.001" />
|
||||
<mesh name="cup2" file="cup_split2.stl" scale="0.001 0.001 0.001" />
|
||||
<mesh name="cup3" file="cup_split3.stl" scale="0.001 0.001 0.001" />
|
||||
<mesh name="cup4" file="cup_split4.stl" scale="0.001 0.001 0.001" />
|
||||
<mesh name="cup5" file="cup_split5.stl" scale="0.001 0.001 0.001" />
|
||||
<mesh name="cup6" file="cup_split6.stl" scale="0.001 0.001 0.001" />
|
||||
<mesh name="cup7" file="cup_split7.stl" scale="0.001 0.001 0.001" />
|
||||
<mesh name="cup8" file="cup_split8.stl" scale="0.001 0.001 0.001" />
|
||||
<mesh name="cup9" file="cup_split9.stl" scale="0.001 0.001 0.001" />
|
||||
<mesh name="cup10" file="cup_split10.stl" scale="0.001 0.001 0.001" />
|
||||
<mesh name="cup11" file="cup_split11.stl" scale="0.001 0.001 0.001" />
|
||||
<mesh name="cup12" file="cup_split12.stl" scale="0.001 0.001 0.001" />
|
||||
<mesh name="cup13" file="cup_split13.stl" scale="0.001 0.001 0.001" />
|
||||
<mesh name="cup14" file="cup_split14.stl" scale="0.001 0.001 0.001" />
|
||||
<mesh name="cup15" file="cup_split15.stl" scale="0.001 0.001 0.001" />
|
||||
<mesh name="cup16" file="cup_split16.stl" scale="0.001 0.001 0.001" />
|
||||
<mesh name="cup17" file="cup_split17.stl" scale="0.001 0.001 0.001" />
|
||||
<mesh name="cup18" file="cup_split18.stl" scale="0.001 0.001 0.001" />
|
||||
<mesh name="cup3_table" file="cup_split3.stl" scale="0.00211 0.00211 0.01" />
|
||||
<mesh name="cup4_table" file="cup_split4.stl" scale="0.00211 0.00211 0.01" />
|
||||
<mesh name="cup5_table" file="cup_split5.stl" scale="0.00211 0.00211 0.01" />
|
||||
<mesh name="cup6_table" file="cup_split6.stl" scale="0.00211 0.00211 0.01" />
|
||||
<mesh name="cup7_table" file="cup_split7.stl" scale="0.00211 0.00211 0.01" />
|
||||
<mesh name="cup8_table" file="cup_split8.stl" scale="0.00211 0.00211 0.01" />
|
||||
<mesh name="cup9_table" file="cup_split9.stl" scale="0.00211 0.00211 0.01" />
|
||||
<mesh name="cup10_table" file="cup_split10.stl" scale="0.00211 0.00211 0.01" />
|
||||
<mesh name="cup15_table" file="cup_split15.stl" scale="0.00211 0.00211 0.01" />
|
||||
<mesh name="cup16_table" file="cup_split16.stl" scale="0.00211 0.00211 0.01" />
|
||||
<mesh name="cup17_table" file="cup_split17.stl" scale="0.00211 0.00211 0.01" />
|
||||
<mesh name="cup18_table" file="cup_split18.stl" scale="0.00211 0.00211 0.01" />
|
||||
</asset>
|
||||
|
||||
<worldbody>
|
||||
<geom name="ground" size="5 5 1" type="plane" material="MatGnd" />
|
||||
<light pos="0.1 0.2 1.3" dir="-0.0758098 -0.32162 -0.985527" directional="true" cutoff="60" exponent="1" diffuse="1 1 1" specular="0.1 0.1 0.1" />
|
||||
|
||||
<body name="wam/base_link" pos="0 0 0.6">
|
||||
<inertial pos="6.93764e-06 0.0542887 0.076438" quat="0.496481 0.503509 -0.503703 0.496255" mass="27.5544" diaginertia="0.432537 0.318732 0.219528" />
|
||||
<geom class="viz" quat="0.707107 0 0 -0.707107" mesh="base_link_fine" />
|
||||
<geom class="col" quat="0.707107 0 0 -0.707107" mesh="base_link_convex" name="base_link_convex_geom"/>
|
||||
<body name="wam/shoulder_yaw_link" pos="0 0 0.16" quat="0.707107 0 0 -0.707107">
|
||||
<inertial pos="-0.00443422 -0.00066489 -0.12189" quat="0.999995 0.000984795 0.00270132 0.00136071" mass="10.7677" diaginertia="0.507411 0.462983 0.113271" />
|
||||
<joint name="wam/base_yaw_joint" pos="0 0 0" axis="0 0 1" range="-2.6 2.6" />
|
||||
<geom class="viz" pos="0 0 0.186" mesh="shoulder_link_fine" />
|
||||
<geom class="col" pos="0 0 0.186" mesh="shoulder_link_convex_decomposition_p1" name="shoulder_link_convex_decomposition_p1_geom"/>
|
||||
<geom class="col" pos="0 0 0.186" mesh="shoulder_link_convex_decomposition_p2" name="shoulder_link_convex_decomposition_p2_geom"/>
|
||||
<geom class="col" pos="0 0 0.186" mesh="shoulder_link_convex_decomposition_p3" name="shoulder_link_convex_decomposition_p3_geom"/>
|
||||
<body name="wam/shoulder_pitch_link" pos="0 0 0.184" quat="0.707107 -0.707107 0 0">
|
||||
<inertial pos="-0.00236983 -0.0154211 0.0310561" quat="0.961781 -0.272983 0.0167269 0.0133385" mass="3.87494" diaginertia="0.0214207 0.0167101 0.0126465" />
|
||||
<joint name="wam/shoulder_pitch_joint" pos="0 0 0" axis="0 0 1" range="-1.985 1.985" />
|
||||
<geom class="viz" mesh="shoulder_pitch_link_fine" />
|
||||
<geom class="col" mesh="shoulder_pitch_link_convex" />
|
||||
<body name="wam/upper_arm_link" pos="0 -0.505 0" quat="0.707107 0.707107 0 0">
|
||||
<inertial pos="-0.0382586 3.309e-05 -0.207508" quat="0.705455 0.0381914 0.0383402 0.706686" mass="1.80228" diaginertia="0.0665697 0.0634285 0.00622701" />
|
||||
<joint name="wam/shoulder_yaw_joint" pos="0 0 0" axis="0 0 1" range="-2.8 2.8" />
|
||||
<geom class="viz" pos="0 0 -0.505" mesh="upper_arm_link_fine" />
|
||||
<geom class="col" pos="0 0 -0.505" mesh="upper_arm_link_convex_decomposition_p1" name="upper_arm_link_convex_decomposition_p1_geom"/>
|
||||
<geom class="col" pos="0 0 -0.505" mesh="upper_arm_link_convex_decomposition_p2" name="upper_arm_link_convex_decomposition_p2_geom"/>
|
||||
<body name="wam/forearm_link" pos="0.045 0 0.045" quat="0.707107 -0.707107 0 0">
|
||||
<inertial pos="0.00498512 -0.132717 -0.00022942" quat="0.546303 0.447151 -0.548676 0.447842" mass="2.40017" diaginertia="0.0196896 0.0152225 0.00749914" />
|
||||
<joint name="wam/elbow_pitch_joint" pos="0 0 0" axis="0 0 1" range="-0.9 3.14159" />
|
||||
<geom class="viz" mesh="elbow_link_fine" />
|
||||
<geom class="col" mesh="elbow_link_convex" />
|
||||
<geom class="viz" pos="-0.045 -0.073 0" quat="0.707388 0.706825 0 0" mesh="forearm_link_fine" />
|
||||
<geom class="col" pos="-0.045 -0.073 0" quat="0.707388 0.706825 0 0" mesh="forearm_link_convex_decomposition_p1" name="forearm_link_convex_decomposition_p1_geom" />
|
||||
<geom class="col" pos="-0.045 -0.073 0" quat="0.707388 0.706825 0 0" mesh="forearm_link_convex_decomposition_p2" name="forearm_link_convex_decomposition_p2_geom" />
|
||||
<body name="wam/wrist_yaw_link" pos="-0.045 0 0" quat="0.707107 0.707107 0 0">
|
||||
<inertial pos="8.921e-05 0.00435824 -0.00511217" quat="0.708528 -0.000120667 0.000107481 0.705683" mass="0.12376" diaginertia="0.0112011 0.0111887 7.58188e-05" />
|
||||
<joint name="wam/wrist_yaw_joint" pos="0 0 0" axis="0 0 1" range="-4.55 1.25" />
|
||||
<geom class="viz" pos="0 0 0.3" mesh="wrist_yaw_link_fine" />
|
||||
<geom class="col" pos="0 0 0.3" mesh="wrist_yaw_link_convex_decomposition_p1" name="wrist_yaw_link_convex_decomposition_p1_geom" />
|
||||
<geom class="col" pos="0 0 0.3" mesh="wrist_yaw_link_convex_decomposition_p2" name="wrist_yaw_link_convex_decomposition_p2_geom" />
|
||||
<body name="wam/wrist_pitch_link" pos="0 0 0.3" quat="0.707107 -0.707107 0 0">
|
||||
<inertial pos="-0.00012262 -0.0246834 -0.0170319" quat="0.994687 -0.102891 0.000824211 -0.00336105" mass="0.417974" diaginertia="0.000555166 0.000463174 0.00023407" />
|
||||
<joint name="wam/wrist_pitch_joint" pos="0 0 0" axis="0 0 1" range="-1.5707 1.5707" />
|
||||
<geom class="viz" mesh="wrist_pitch_link_fine" />
|
||||
<geom class="col" mesh="wrist_pitch_link_convex_decomposition_p1" name="wrist_pitch_link_convex_decomposition_p1_geom" />
|
||||
<geom class="col" mesh="wrist_pitch_link_convex_decomposition_p2" name="wrist_pitch_link_convex_decomposition_p2_geom" />
|
||||
<geom class="col" mesh="wrist_pitch_link_convex_decomposition_p3" name="wrist_pitch_link_convex_decomposition_p3_geom" />
|
||||
<body name="wam/wrist_palm_link" pos="0 -0.06 0" quat="0.707107 0.707107 0 0">
|
||||
<inertial pos="-7.974e-05 -0.00323552 -0.00016313" quat="0.594752 0.382453 0.382453 0.594752" mass="0.0686475" diaginertia="7.408e-05 3.81466e-05 3.76434e-05" />
|
||||
<joint name="wam/palm_yaw_joint" pos="0 0 0" axis="0 0 1" range="-2.7 2.7" />
|
||||
<geom class="viz" pos="0 0 -0.06" mesh="wrist_palm_link_fine" />
|
||||
<geom class="col" pos="0 0 -0.06" mesh="wrist_palm_link_convex" name="wrist_palm_link_convex_geom" />
|
||||
<site name="init_ball_pos_site" size="0.025 0.025 0.025" pos="0.0 0.0 0.035" rgba="0 1 0 1"/>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
<body name="table_body" pos="0 -1.85 0.4025">
|
||||
<geom name="table" type="box" size="0.4 0.6 0.4" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"
|
||||
solref="-10000 -100"/>
|
||||
<geom name="table_contact_geom" type="box" size="0.4 0.6 0.1" pos="0 0 0.31" rgba="1.4 0.8 0.45 1" solimp="0.999 0.999 0.001"
|
||||
solref="-10000 -100"/>
|
||||
</body>
|
||||
<geom name="table_robot" type="box" size="0.1 0.1 0.3" pos="0 0.00 0.3025" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"
|
||||
solref="-10000 -100"/>
|
||||
<geom name="wall" type="box" quat="1 0 0 0" size="0.4 0.04 1.1" pos="0. -2.45 1.1" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"
|
||||
solref="-10000 -100"/>
|
||||
|
||||
<body name="cup_table" pos="0.32 -1.55 0.84" quat="0.7071068 0.7071068 0 0">
|
||||
<inertial pos="-3.75236e-10 8.27811e-05 0.0947015" quat="0.999945 -0.0104888 0 0" mass="10.132" diaginertia="0.000285643 0.000270485 9.65696e-05" />
|
||||
<geom priority= "1" name="cup_geom_table3" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup3_table" mass="10"/>
|
||||
<geom priority= "1" name="cup_geom_table4" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup4_table" mass="10"/>
|
||||
<geom priority= "1" name="cup_geom_table5" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup5_table" mass="10"/>
|
||||
<geom priority= "1" name="cup_geom_table6" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup6_table" mass="10"/>
|
||||
<geom priority= "1" name="cup_geom_table7" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup7_table" mass="10"/>
|
||||
<geom priority= "1" name="cup_geom_table8" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup8_table" mass="10"/>
|
||||
<geom priority= "1" name="cup_geom_table9" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup9_table" mass="10"/>
|
||||
<geom priority= "1" name="cup_geom_table10" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -010" type="mesh" mesh="cup10_table" mass="10"/>
|
||||
<geom priority= "1" name="cup_base_table" pos="0 -0.035 0.1337249" euler="-1.57 0 0" type="cylinder" size="0.08 0.045" solref="-10000 -100" mass="10"/>
|
||||
<geom priority= "1" name="cup_base_table_contact" pos="0 0.015 0.1337249" euler="-1.57 0 0" type="cylinder" size="0.07 0.01" solref="-10000 -100" rgba="0 0 255 1" mass="10"/>
|
||||
<geom priority= "1" name="cup_geom_table15" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup15_table" mass="10"/>
|
||||
<geom priority= "1" name="cup_geom_table16" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup16_table" mass="10"/>
|
||||
<geom priority= "1" name="cup_geom_table17" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup17_table" mass="10"/>
|
||||
<geom priority= "1" name="cup_geom1_table8" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup18_table" mass="10"/>
|
||||
<site name="cup_goal_table" pos="0 0.11 0.1337249" rgba="255 0 0 1"/>
|
||||
<site name="cup_goal_final_table" pos="0.0 0.025 0.1337249" rgba="0 255 0 1"/>
|
||||
</body>
|
||||
<body name="ball" pos="0 0 0">
|
||||
<joint axis="1 0 0" damping="0.0" name="tar:x" pos="0 0 0" stiffness="0" type="slide" frictionloss="0" limited="false"/>
|
||||
<joint axis="0 1 0" damping="0.0" name="tar:y" pos="0 0 0" stiffness="0" type="slide" frictionloss="0" limited="false"/>
|
||||
<joint axis="0 0 1" damping="0.0" name="tar:z" pos="0 0 0" stiffness="0" type="slide" frictionloss="0" limited="false"/>
|
||||
<geom priority= "1" size="0.025 0.025 0.025" type="sphere" condim="4" name="ball_geom" rgba="0.8 0.2 0.1 1" mass="0.1"
|
||||
friction="0.1 0.1 0.1" solimp="0.9 0.95 0.001 0.5 2" solref="-10000 -10"/>
|
||||
<site name="target_ball" pos="0 0 0" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"/>
|
||||
</body>
|
||||
<!-- <camera name="visualization" mode="targetbody" target="wam/wrist_yaw_link" pos="1.5 -0.4 2.2"/>-->
|
||||
<!-- <camera name="experiment" mode="fixed" quat="0.44418059 0.41778323 0.54301123 0.57732103" pos="1.5 -0.3 1.33" />-->
|
||||
<camera name="visualization" mode="fixed" euler="2.35 2.0 -0.75" pos="2.0 -0.0 1.85"/>
|
||||
</worldbody>
|
||||
<actuator>
|
||||
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="150.0" joint="wam/base_yaw_joint"/>
|
||||
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="wam/shoulder_pitch_joint"/>
|
||||
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="50.0" joint="wam/shoulder_yaw_joint"/>
|
||||
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="60.0" joint="wam/elbow_pitch_joint"/>
|
||||
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="5.0" joint="wam/wrist_yaw_joint"/>
|
||||
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="5.0" joint="wam/wrist_pitch_joint"/>
|
||||
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="2.0" joint="wam/palm_yaw_joint"/>
|
||||
</actuator>
|
||||
|
||||
</mujoco>
|
@ -1,3 +1,4 @@
|
||||
import mujoco_py.builder
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
@ -5,44 +6,67 @@ from gym import utils
|
||||
from gym.envs.mujoco import MujocoEnv
|
||||
|
||||
|
||||
|
||||
class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
|
||||
def __init__(self, model_path, frame_skip, n_substeps=4, apply_gravity_comp=True, reward_function=None):
|
||||
utils.EzPickle.__init__(**locals())
|
||||
MujocoEnv.__init__(self, model_path=model_path, frame_skip=frame_skip)
|
||||
class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
||||
def __init__(self, frame_skip=1, apply_gravity_comp=True, reward_type: str = "staged", noisy=False,
|
||||
context: np.ndarray = None, difficulty='simple'):
|
||||
self._steps = 0
|
||||
|
||||
self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
|
||||
"beerpong" + ".xml")
|
||||
|
||||
self.start_pos = np.array([0.0, 1.35, 0.0, 1.18, 0.0, -0.786, -1.59])
|
||||
self.start_vel = np.zeros(7)
|
||||
|
||||
self._q_pos = []
|
||||
self._q_vel = []
|
||||
# self.weight_matrix_scale = 50
|
||||
self.max_ctrl = np.array([150., 125., 40., 60., 5., 5., 2.])
|
||||
self.p_gains = 1 / self.max_ctrl * np.array([200, 300, 100, 100, 10, 10, 2.5])
|
||||
self.d_gains = 1 / self.max_ctrl * np.array([7, 15, 5, 2.5, 0.3, 0.3, 0.05])
|
||||
"beerpong_wo_cup" + ".xml")
|
||||
|
||||
self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7])
|
||||
self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
|
||||
|
||||
self.context = None
|
||||
# alr_mujoco_env.AlrMujocoEnv.__init__(self,
|
||||
# self.xml_path,
|
||||
# apply_gravity_comp=apply_gravity_comp,
|
||||
# n_substeps=n_substeps)
|
||||
self.context = context
|
||||
self.apply_gravity_comp = apply_gravity_comp
|
||||
self.add_noise = noisy
|
||||
|
||||
self.sim_time = 8 # seconds
|
||||
self.sim_steps = int(self.sim_time / self.dt)
|
||||
if reward_function is None:
|
||||
from alr_envs.alr.mujoco.beerpong.beerpong_reward import BeerpongReward
|
||||
reward_function = BeerpongReward
|
||||
self.reward_function = reward_function(self.sim, self.sim_steps)
|
||||
self.cup_robot_id = self.sim.model._site_name2id["cup_robot_final"]
|
||||
self.ball_id = self.sim.model._body_name2id["ball"]
|
||||
self.cup_table_id = self.sim.model._body_name2id["cup_table"]
|
||||
self._start_pos = np.array([0.0, 1.35, 0.0, 1.18, 0.0, -0.786, -1.59])
|
||||
self._start_vel = np.zeros(7)
|
||||
|
||||
self.ball_site_id = 0
|
||||
self.ball_id = 11
|
||||
|
||||
self._release_step = 100 # time step of ball release
|
||||
|
||||
self.sim_time = 4 # seconds
|
||||
self.ep_length = 600 # based on 5 seconds with dt = 0.005 int(self.sim_time / self.dt)
|
||||
self.cup_table_id = 10
|
||||
|
||||
if noisy:
|
||||
self.noise_std = 0.01
|
||||
else:
|
||||
self.noise_std = 0
|
||||
|
||||
if difficulty == 'simple':
|
||||
self.cup_goal_pos = np.array([0, -1.7, 0.840])
|
||||
elif difficulty == 'intermediate':
|
||||
self.cup_goal_pos = np.array([0.3, -1.5, 0.840])
|
||||
elif difficulty == 'hard':
|
||||
self.cup_goal_pos = np.array([-0.3, -2.2, 0.840])
|
||||
elif difficulty == 'hardest':
|
||||
self.cup_goal_pos = np.array([-0.3, -1.2, 0.840])
|
||||
|
||||
if reward_type == "no_context":
|
||||
from alr_envs.alr.mujoco.beerpong.beerpong_reward import BeerPongReward
|
||||
reward_function = BeerPongReward
|
||||
elif reward_type == "staged":
|
||||
from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
|
||||
reward_function = BeerPongReward
|
||||
else:
|
||||
raise ValueError("Unknown reward type: {}".format(reward_type))
|
||||
self.reward_function = reward_function()
|
||||
|
||||
MujocoEnv.__init__(self, self.xml_path, frame_skip)
|
||||
utils.EzPickle.__init__(self)
|
||||
|
||||
@property
|
||||
def start_pos(self):
|
||||
return self._start_pos
|
||||
|
||||
@property
|
||||
def start_vel(self):
|
||||
return self._start_vel
|
||||
|
||||
@property
|
||||
def current_pos(self):
|
||||
@ -52,9 +76,9 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
|
||||
def current_vel(self):
|
||||
return self.sim.data.qvel[0:7].copy()
|
||||
|
||||
def configure(self, context):
|
||||
self.context = context
|
||||
self.reward_function.reset(context)
|
||||
def reset(self):
|
||||
self.reward_function.reset(self.add_noise)
|
||||
return super().reset()
|
||||
|
||||
def reset_model(self):
|
||||
init_pos_all = self.init_qpos.copy()
|
||||
@ -62,19 +86,14 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
|
||||
init_vel = np.zeros_like(init_pos_all)
|
||||
|
||||
self._steps = 0
|
||||
self._q_pos = []
|
||||
self._q_vel = []
|
||||
|
||||
start_pos = init_pos_all
|
||||
start_pos[0:7] = init_pos_robot
|
||||
# start_pos[7:] = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05])
|
||||
|
||||
self.set_state(start_pos, init_vel)
|
||||
|
||||
ball_pos = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05])
|
||||
self.sim.model.body_pos[self.ball_id] = ball_pos.copy()
|
||||
self.sim.model.body_pos[self.cup_table_id] = self.context.copy()
|
||||
|
||||
self.sim.model.body_pos[self.cup_table_id] = self.cup_goal_pos
|
||||
start_pos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
|
||||
self.set_state(start_pos, init_vel)
|
||||
return self._get_obs()
|
||||
|
||||
def step(self, a):
|
||||
@ -82,32 +101,55 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
|
||||
angular_vel = 0.0
|
||||
reward_ctrl = - np.square(a).sum()
|
||||
|
||||
crash = self.do_simulation(a)
|
||||
joint_cons_viol = self.check_traj_in_joint_limits()
|
||||
|
||||
self._q_pos.append(self.sim.data.qpos[0:7].ravel().copy())
|
||||
self._q_vel.append(self.sim.data.qvel[0:7].ravel().copy())
|
||||
if self.apply_gravity_comp:
|
||||
a = a + self.sim.data.qfrc_bias[:len(a)].copy() / self.model.actuator_gear[:, 0]
|
||||
try:
|
||||
self.do_simulation(a, self.frame_skip)
|
||||
if self._steps < self._release_step:
|
||||
self.sim.data.qpos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
|
||||
self.sim.data.qvel[7::] = self.sim.data.site_xvelp[self.ball_site_id, :].copy()
|
||||
elif self._steps == self._release_step and self.add_noise:
|
||||
self.sim.data.qvel[7::] += self.noise_std * np.random.randn(3)
|
||||
crash = False
|
||||
except mujoco_py.builder.MujocoException:
|
||||
crash = True
|
||||
# joint_cons_viol = self.check_traj_in_joint_limits()
|
||||
|
||||
ob = self._get_obs()
|
||||
|
||||
if not crash and not joint_cons_viol:
|
||||
reward, success, stop_sim = self.reward_function.compute_reward(a, self.sim, self._steps)
|
||||
done = success or self._steps == self.sim_steps - 1 or stop_sim
|
||||
if not crash:
|
||||
reward, reward_infos = self.reward_function.compute_reward(self, a)
|
||||
success = reward_infos['success']
|
||||
is_collided = reward_infos['is_collided']
|
||||
ball_pos = reward_infos['ball_pos']
|
||||
ball_vel = reward_infos['ball_vel']
|
||||
done = is_collided or self._steps == self.ep_length - 1
|
||||
self._steps += 1
|
||||
else:
|
||||
reward = -1000
|
||||
reward = -30
|
||||
success = False
|
||||
is_collided = False
|
||||
done = True
|
||||
ball_pos = np.zeros(3)
|
||||
ball_vel = np.zeros(3)
|
||||
|
||||
return ob, reward, done, dict(reward_dist=reward_dist,
|
||||
reward_ctrl=reward_ctrl,
|
||||
reward=reward,
|
||||
velocity=angular_vel,
|
||||
traj=self._q_pos, is_success=success,
|
||||
is_collided=crash or joint_cons_viol)
|
||||
# traj=self._q_pos,
|
||||
action=a,
|
||||
q_pos=self.sim.data.qpos[0:7].ravel().copy(),
|
||||
q_vel=self.sim.data.qvel[0:7].ravel().copy(),
|
||||
ball_pos=ball_pos,
|
||||
ball_vel=ball_vel,
|
||||
is_success=success,
|
||||
is_collided=is_collided, sim_crash=crash)
|
||||
|
||||
def check_traj_in_joint_limits(self):
|
||||
return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)
|
||||
|
||||
# TODO
|
||||
# TODO: extend observation space
|
||||
def _get_obs(self):
|
||||
theta = self.sim.data.qpos.flat[:7]
|
||||
return np.concatenate([
|
||||
@ -118,28 +160,26 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
|
||||
])
|
||||
|
||||
# TODO
|
||||
@property
|
||||
def active_obs(self):
|
||||
pass
|
||||
|
||||
return np.hstack([
|
||||
[False] * 7, # cos
|
||||
[False] * 7, # sin
|
||||
# [True] * 2, # x-y coordinates of target distance
|
||||
[False] # env steps
|
||||
])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
env = ALRBeerpongEnv()
|
||||
ctxt = np.array([0, -2, 0.840]) # initial
|
||||
env = ALRBeerBongEnv(reward_type="no_context", difficulty='hardest')
|
||||
|
||||
env.configure(ctxt)
|
||||
# env.configure(ctxt)
|
||||
env.reset()
|
||||
env.render()
|
||||
for i in range(16000):
|
||||
# test with random actions
|
||||
ac = 0.0 * env.action_space.sample()[0:7]
|
||||
ac[1] = -0.8
|
||||
ac[3] = - 0.3
|
||||
ac[5] = -0.2
|
||||
# ac = env.start_pos
|
||||
# ac[0] += np.pi/2
|
||||
env.render("human")
|
||||
for i in range(800):
|
||||
ac = 10 * env.action_space.sample()[0:7]
|
||||
obs, rew, d, info = env.step(ac)
|
||||
env.render()
|
||||
env.render("human")
|
||||
|
||||
print(rew)
|
||||
|
||||
@ -147,4 +187,3 @@ if __name__ == "__main__":
|
||||
break
|
||||
|
||||
env.close()
|
||||
|
||||
|
@ -1,123 +1,168 @@
|
||||
import numpy as np
|
||||
from alr_envs.alr.mujoco import alr_reward_fct
|
||||
|
||||
|
||||
class BeerpongReward(alr_reward_fct.AlrReward):
|
||||
def __init__(self, sim, sim_time):
|
||||
class BeerPongReward:
|
||||
def __init__(self):
|
||||
|
||||
self.sim = sim
|
||||
self.sim_time = sim_time
|
||||
|
||||
self.collision_objects = ["cup_geom1", "cup_geom2", "wrist_palm_link_convex_geom",
|
||||
self.robot_collision_objects = ["wrist_palm_link_convex_geom",
|
||||
"wrist_pitch_link_convex_decomposition_p1_geom",
|
||||
"wrist_pitch_link_convex_decomposition_p2_geom",
|
||||
"wrist_pitch_link_convex_decomposition_p3_geom",
|
||||
"wrist_yaw_link_convex_decomposition_p1_geom",
|
||||
"wrist_yaw_link_convex_decomposition_p2_geom",
|
||||
"forearm_link_convex_decomposition_p1_geom",
|
||||
"forearm_link_convex_decomposition_p2_geom"]
|
||||
"forearm_link_convex_decomposition_p2_geom",
|
||||
"upper_arm_link_convex_decomposition_p1_geom",
|
||||
"upper_arm_link_convex_decomposition_p2_geom",
|
||||
"shoulder_link_convex_decomposition_p1_geom",
|
||||
"shoulder_link_convex_decomposition_p2_geom",
|
||||
"shoulder_link_convex_decomposition_p3_geom",
|
||||
"base_link_convex_geom", "table_contact_geom"]
|
||||
|
||||
self.cup_collision_objects = ["cup_geom_table3", "cup_geom_table4", "cup_geom_table5", "cup_geom_table6",
|
||||
"cup_geom_table7", "cup_geom_table8", "cup_geom_table9", "cup_geom_table10",
|
||||
# "cup_base_table", "cup_base_table_contact",
|
||||
"cup_geom_table15",
|
||||
"cup_geom_table16",
|
||||
"cup_geom_table17", "cup_geom1_table8",
|
||||
# "cup_base_table_contact",
|
||||
# "cup_base_table"
|
||||
]
|
||||
|
||||
self.ball_id = None
|
||||
self.ball_collision_id = None
|
||||
self.goal_id = None
|
||||
self.goal_final_id = None
|
||||
self.collision_ids = None
|
||||
|
||||
self.ball_traj = None
|
||||
self.dists = None
|
||||
self.dists_ctxt = None
|
||||
self.dists_final = None
|
||||
self.costs = None
|
||||
|
||||
self.action_costs = None
|
||||
self.angle_rewards = None
|
||||
self.cup_angles = None
|
||||
self.cup_z_axes = None
|
||||
self.collision_penalty = 500
|
||||
self.reset(None)
|
||||
|
||||
def reset(self, context):
|
||||
self.ball_traj = np.zeros(shape=(self.sim_time, 3))
|
||||
self.ball_traj = []
|
||||
self.dists = []
|
||||
self.dists_ctxt = []
|
||||
self.dists_final = []
|
||||
self.costs = []
|
||||
self.context = context
|
||||
self.ball_in_cup = False
|
||||
self.dist_ctxt = 5
|
||||
self.action_costs = []
|
||||
self.angle_rewards = []
|
||||
self.cup_angles = []
|
||||
self.cup_z_axes = []
|
||||
self.ball_ground_contact = False
|
||||
self.ball_table_contact = False
|
||||
self.ball_wall_contact = False
|
||||
self.ball_cup_contact = False
|
||||
|
||||
self.ball_id = self.sim.model._body_name2id["ball"]
|
||||
self.ball_collision_id = self.sim.model._geom_name2id["ball_geom"]
|
||||
self.cup_robot_id = self.sim.model._site_name2id["cup_robot_final"]
|
||||
self.goal_id = self.sim.model._site_name2id["cup_goal_table"]
|
||||
self.goal_final_id = self.sim.model._site_name2id["cup_goal_final_table"]
|
||||
self.collision_ids = [self.sim.model._geom_name2id[name] for name in self.collision_objects]
|
||||
self.cup_table_id = self.sim.model._body_name2id["cup_table"]
|
||||
def compute_reward(self, env, action):
|
||||
self.ball_id = env.sim.model._body_name2id["ball"]
|
||||
self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"]
|
||||
self.goal_id = env.sim.model._site_name2id["cup_goal_table"]
|
||||
self.goal_final_id = env.sim.model._site_name2id["cup_goal_final_table"]
|
||||
self.cup_collision_ids = [env.sim.model._geom_name2id[name] for name in self.cup_collision_objects]
|
||||
self.cup_table_id = env.sim.model._body_name2id["cup_table"]
|
||||
self.table_collision_id = env.sim.model._geom_name2id["table_contact_geom"]
|
||||
self.wall_collision_id = env.sim.model._geom_name2id["wall"]
|
||||
self.cup_table_collision_id = env.sim.model._geom_name2id["cup_base_table_contact"]
|
||||
self.init_ball_pos_site_id = env.sim.model._site_name2id["init_ball_pos_site"]
|
||||
self.ground_collision_id = env.sim.model._geom_name2id["ground"]
|
||||
self.robot_collision_ids = [env.sim.model._geom_name2id[name] for name in self.robot_collision_objects]
|
||||
|
||||
def compute_reward(self, action, sim, step):
|
||||
action_cost = np.sum(np.square(action))
|
||||
|
||||
stop_sim = False
|
||||
success = False
|
||||
|
||||
if self.check_collision(sim):
|
||||
reward = - 1e-4 * action_cost - 1000
|
||||
stop_sim = True
|
||||
return reward, success, stop_sim
|
||||
|
||||
# Compute the current distance from the ball to the inner part of the cup
|
||||
goal_pos = sim.data.site_xpos[self.goal_id]
|
||||
ball_pos = sim.data.body_xpos[self.ball_id]
|
||||
goal_final_pos = sim.data.site_xpos[self.goal_final_id]
|
||||
goal_pos = env.sim.data.site_xpos[self.goal_id]
|
||||
ball_pos = env.sim.data.body_xpos[self.ball_id]
|
||||
goal_final_pos = env.sim.data.site_xpos[self.goal_final_id]
|
||||
self.dists.append(np.linalg.norm(goal_pos - ball_pos))
|
||||
self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
|
||||
self.dists_ctxt.append(np.linalg.norm(ball_pos - self.context))
|
||||
self.ball_traj[step, :] = ball_pos
|
||||
|
||||
# Determine the first time when ball is in cup
|
||||
if not self.ball_in_cup:
|
||||
ball_in_cup = self.check_ball_in_cup(sim, self.ball_collision_id)
|
||||
self.ball_in_cup = ball_in_cup
|
||||
if ball_in_cup:
|
||||
dist_to_ctxt = np.linalg.norm(ball_pos - self.context)
|
||||
self.dist_ctxt = dist_to_ctxt
|
||||
action_cost = np.sum(np.square(action))
|
||||
self.action_costs.append(action_cost)
|
||||
|
||||
ball_table_bounce = self._check_collision_single_objects(env.sim, self.ball_collision_id,
|
||||
self.table_collision_id)
|
||||
|
||||
if ball_table_bounce: # or ball_cup_table_cont or ball_wall_con
|
||||
self.ball_table_contact = True
|
||||
|
||||
ball_cup_cont = self._check_collision_with_set_of_objects(env.sim, self.ball_collision_id,
|
||||
self.cup_collision_ids)
|
||||
if ball_cup_cont:
|
||||
self.ball_cup_contact = True
|
||||
|
||||
ball_wall_cont = self._check_collision_single_objects(env.sim, self.ball_collision_id, self.wall_collision_id)
|
||||
if ball_wall_cont and not self.ball_table_contact:
|
||||
self.ball_wall_contact = True
|
||||
|
||||
ball_ground_contact = self._check_collision_single_objects(env.sim, self.ball_collision_id,
|
||||
self.ground_collision_id)
|
||||
if ball_ground_contact and not self.ball_table_contact:
|
||||
self.ball_ground_contact = True
|
||||
|
||||
self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
|
||||
if env._steps == env.ep_length - 1 or self._is_collided:
|
||||
|
||||
if step == self.sim_time - 1:
|
||||
min_dist = np.min(self.dists)
|
||||
dist_final = self.dists_final[-1]
|
||||
# dist_ctxt = self.dists_ctxt[-1]
|
||||
|
||||
# cost = self._get_stage_wise_cost(ball_in_cup, min_dist, dist_final, dist_ctxt)
|
||||
cost = 2 * (0.5 * min_dist + 0.5 * dist_final + 0.1 * self.dist_ctxt)
|
||||
reward = np.exp(-1 * cost) - 1e-4 * action_cost
|
||||
success = dist_final < 0.05 and self.dist_ctxt < 0.05
|
||||
ball_in_cup = self._check_collision_single_objects(env.sim, self.ball_collision_id, self.cup_table_collision_id)
|
||||
|
||||
cost_offset = 0
|
||||
|
||||
if self.ball_ground_contact: # or self.ball_wall_contact:
|
||||
cost_offset += 2
|
||||
|
||||
if not self.ball_table_contact:
|
||||
cost_offset += 2
|
||||
|
||||
if not ball_in_cup:
|
||||
cost_offset += 2
|
||||
cost = cost_offset + min_dist ** 2 + 0.5 * self.dists_final[-1] ** 2 + 1e-4 * action_cost # + min_dist ** 2
|
||||
else:
|
||||
if self.ball_cup_contact:
|
||||
cost_offset += 1
|
||||
cost = cost_offset + self.dists_final[-1] ** 2 + 1e-4 * action_cost
|
||||
|
||||
reward = - 1*cost - self.collision_penalty * int(self._is_collided)
|
||||
success = ball_in_cup and not self.ball_ground_contact and not self.ball_wall_contact and not self.ball_cup_contact
|
||||
else:
|
||||
reward = - 1e-4 * action_cost
|
||||
success = False
|
||||
|
||||
return reward, success, stop_sim
|
||||
infos = {}
|
||||
infos["success"] = success
|
||||
infos["is_collided"] = self._is_collided
|
||||
infos["ball_pos"] = ball_pos.copy()
|
||||
infos["action_cost"] = 5e-4 * action_cost
|
||||
|
||||
def _get_stage_wise_cost(self, ball_in_cup, min_dist, dist_final, dist_to_ctxt):
|
||||
if not ball_in_cup:
|
||||
cost = 3 + 2*(0.5 * min_dist**2 + 0.5 * dist_final**2)
|
||||
return reward, infos
|
||||
|
||||
def _check_collision_single_objects(self, sim, id_1, id_2):
|
||||
for coni in range(0, sim.data.ncon):
|
||||
con = sim.data.contact[coni]
|
||||
|
||||
collision = con.geom1 == id_1 and con.geom2 == id_2
|
||||
collision_trans = con.geom1 == id_2 and con.geom2 == id_1
|
||||
|
||||
if collision or collision_trans:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _check_collision_with_itself(self, sim, collision_ids):
|
||||
col_1, col_2 = False, False
|
||||
for j, id in enumerate(collision_ids):
|
||||
col_1 = self._check_collision_with_set_of_objects(sim, id, collision_ids[:j])
|
||||
if j != len(collision_ids) - 1:
|
||||
col_2 = self._check_collision_with_set_of_objects(sim, id, collision_ids[j + 1:])
|
||||
else:
|
||||
cost = 2 * dist_to_ctxt ** 2
|
||||
print('Context Distance:', dist_to_ctxt)
|
||||
return cost
|
||||
col_2 = False
|
||||
collision = True if col_1 or col_2 else False
|
||||
return collision
|
||||
|
||||
def check_ball_in_cup(self, sim, ball_collision_id):
|
||||
cup_base_collision_id = sim.model._geom_name2id["cup_base_contact"]
|
||||
def _check_collision_with_set_of_objects(self, sim, id_1, id_list):
|
||||
for coni in range(0, sim.data.ncon):
|
||||
con = sim.data.contact[coni]
|
||||
|
||||
collision = con.geom1 == cup_base_collision_id and con.geom2 == ball_collision_id
|
||||
collision_trans = con.geom1 == ball_collision_id and con.geom2 == cup_base_collision_id
|
||||
|
||||
if collision or collision_trans:
|
||||
return True
|
||||
return False
|
||||
|
||||
def check_collision(self, sim):
|
||||
for coni in range(0, sim.data.ncon):
|
||||
con = sim.data.contact[coni]
|
||||
|
||||
collision = con.geom1 in self.collision_ids and con.geom2 == self.ball_collision_id
|
||||
collision_trans = con.geom1 == self.ball_collision_id and con.geom2 in self.collision_ids
|
||||
collision = con.geom1 in id_list and con.geom2 == id_1
|
||||
collision_trans = con.geom1 == id_1 and con.geom2 in id_list
|
||||
|
||||
if collision or collision_trans:
|
||||
return True
|
||||
|
169
alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
Normal file
169
alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
Normal file
@ -0,0 +1,169 @@
|
||||
import numpy as np
|
||||
|
||||
|
||||
class BeerPongReward:
|
||||
def __init__(self):
|
||||
|
||||
self.robot_collision_objects = ["wrist_palm_link_convex_geom",
|
||||
"wrist_pitch_link_convex_decomposition_p1_geom",
|
||||
"wrist_pitch_link_convex_decomposition_p2_geom",
|
||||
"wrist_pitch_link_convex_decomposition_p3_geom",
|
||||
"wrist_yaw_link_convex_decomposition_p1_geom",
|
||||
"wrist_yaw_link_convex_decomposition_p2_geom",
|
||||
"forearm_link_convex_decomposition_p1_geom",
|
||||
"forearm_link_convex_decomposition_p2_geom",
|
||||
"upper_arm_link_convex_decomposition_p1_geom",
|
||||
"upper_arm_link_convex_decomposition_p2_geom",
|
||||
"shoulder_link_convex_decomposition_p1_geom",
|
||||
"shoulder_link_convex_decomposition_p2_geom",
|
||||
"shoulder_link_convex_decomposition_p3_geom",
|
||||
"base_link_convex_geom", "table_contact_geom"]
|
||||
|
||||
self.cup_collision_objects = ["cup_geom_table3", "cup_geom_table4", "cup_geom_table5", "cup_geom_table6",
|
||||
"cup_geom_table7", "cup_geom_table8", "cup_geom_table9", "cup_geom_table10",
|
||||
# "cup_base_table", "cup_base_table_contact",
|
||||
"cup_geom_table15",
|
||||
"cup_geom_table16",
|
||||
"cup_geom_table17", "cup_geom1_table8",
|
||||
# "cup_base_table_contact",
|
||||
# "cup_base_table"
|
||||
]
|
||||
|
||||
|
||||
self.ball_traj = None
|
||||
self.dists = None
|
||||
self.dists_final = None
|
||||
self.costs = None
|
||||
self.action_costs = None
|
||||
self.angle_rewards = None
|
||||
self.cup_angles = None
|
||||
self.cup_z_axes = None
|
||||
self.collision_penalty = 500
|
||||
self.reset(None)
|
||||
|
||||
def reset(self, noisy):
|
||||
self.ball_traj = []
|
||||
self.dists = []
|
||||
self.dists_final = []
|
||||
self.costs = []
|
||||
self.action_costs = []
|
||||
self.angle_rewards = []
|
||||
self.cup_angles = []
|
||||
self.cup_z_axes = []
|
||||
self.ball_ground_contact = False
|
||||
self.ball_table_contact = False
|
||||
self.ball_wall_contact = False
|
||||
self.ball_cup_contact = False
|
||||
self.noisy_bp = noisy
|
||||
self._t_min_final_dist = -1
|
||||
|
||||
def compute_reward(self, env, action):
|
||||
self.ball_id = env.sim.model._body_name2id["ball"]
|
||||
self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"]
|
||||
self.goal_id = env.sim.model._site_name2id["cup_goal_table"]
|
||||
self.goal_final_id = env.sim.model._site_name2id["cup_goal_final_table"]
|
||||
self.cup_collision_ids = [env.sim.model._geom_name2id[name] for name in self.cup_collision_objects]
|
||||
self.cup_table_id = env.sim.model._body_name2id["cup_table"]
|
||||
self.table_collision_id = env.sim.model._geom_name2id["table_contact_geom"]
|
||||
self.wall_collision_id = env.sim.model._geom_name2id["wall"]
|
||||
self.cup_table_collision_id = env.sim.model._geom_name2id["cup_base_table_contact"]
|
||||
self.init_ball_pos_site_id = env.sim.model._site_name2id["init_ball_pos_site"]
|
||||
self.ground_collision_id = env.sim.model._geom_name2id["ground"]
|
||||
self.robot_collision_ids = [env.sim.model._geom_name2id[name] for name in self.robot_collision_objects]
|
||||
|
||||
goal_pos = env.sim.data.site_xpos[self.goal_id]
|
||||
ball_pos = env.sim.data.body_xpos[self.ball_id]
|
||||
ball_vel = env.sim.data.body_xvelp[self.ball_id]
|
||||
goal_final_pos = env.sim.data.site_xpos[self.goal_final_id]
|
||||
self.dists.append(np.linalg.norm(goal_pos - ball_pos))
|
||||
self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
|
||||
|
||||
action_cost = np.sum(np.square(action))
|
||||
self.action_costs.append(action_cost)
|
||||
|
||||
self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
|
||||
if env._steps == env.ep_length - 1 or self._is_collided:
|
||||
|
||||
min_dist = np.min(self.dists)
|
||||
ball_table_bounce = self._check_collision_single_objects(env.sim, self.ball_collision_id,
|
||||
self.table_collision_id)
|
||||
ball_cup_table_cont = self._check_collision_with_set_of_objects(env.sim, self.ball_collision_id,
|
||||
self.cup_collision_ids)
|
||||
ball_wall_cont = self._check_collision_single_objects(env.sim, self.ball_collision_id,
|
||||
self.wall_collision_id)
|
||||
ball_in_cup = self._check_collision_single_objects(env.sim, self.ball_collision_id,
|
||||
self.cup_table_collision_id)
|
||||
if not ball_in_cup:
|
||||
cost_offset = 2
|
||||
if not ball_cup_table_cont and not ball_table_bounce and not ball_wall_cont:
|
||||
cost_offset += 2
|
||||
cost = cost_offset + min_dist ** 2 + 0.5 * self.dists_final[-1] ** 2 + 1e-7 * action_cost
|
||||
else:
|
||||
cost = self.dists_final[-1] ** 2 + 1.5 * action_cost * 1e-7
|
||||
|
||||
reward = - 1 * cost - self.collision_penalty * int(self._is_collided)
|
||||
success = ball_in_cup
|
||||
crash = self._is_collided
|
||||
else:
|
||||
reward = - 1e-7 * action_cost
|
||||
cost = 0
|
||||
success = False
|
||||
crash = False
|
||||
|
||||
infos = {}
|
||||
infos["success"] = success
|
||||
infos["is_collided"] = self._is_collided
|
||||
infos["ball_pos"] = ball_pos.copy()
|
||||
infos["ball_vel"] = ball_vel.copy()
|
||||
infos["action_cost"] = 5e-4 * action_cost
|
||||
infos["task_cost"] = cost
|
||||
|
||||
return reward, infos
|
||||
|
||||
def get_cost_offset(self):
|
||||
if self.ball_ground_contact:
|
||||
return 200
|
||||
|
||||
if not self.ball_table_contact:
|
||||
return 100
|
||||
|
||||
if not self.ball_in_cup:
|
||||
return 50
|
||||
|
||||
if self.ball_in_cup and self.ball_cup_contact and not self.noisy_bp:
|
||||
return 10
|
||||
|
||||
return 0
|
||||
|
||||
def _check_collision_single_objects(self, sim, id_1, id_2):
|
||||
for coni in range(0, sim.data.ncon):
|
||||
con = sim.data.contact[coni]
|
||||
|
||||
collision = con.geom1 == id_1 and con.geom2 == id_2
|
||||
collision_trans = con.geom1 == id_2 and con.geom2 == id_1
|
||||
|
||||
if collision or collision_trans:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _check_collision_with_itself(self, sim, collision_ids):
|
||||
col_1, col_2 = False, False
|
||||
for j, id in enumerate(collision_ids):
|
||||
col_1 = self._check_collision_with_set_of_objects(sim, id, collision_ids[:j])
|
||||
if j != len(collision_ids) - 1:
|
||||
col_2 = self._check_collision_with_set_of_objects(sim, id, collision_ids[j + 1:])
|
||||
else:
|
||||
col_2 = False
|
||||
collision = True if col_1 or col_2 else False
|
||||
return collision
|
||||
|
||||
def _check_collision_with_set_of_objects(self, sim, id_1, id_list):
|
||||
for coni in range(0, sim.data.ncon):
|
||||
con = sim.data.contact[coni]
|
||||
|
||||
collision = con.geom1 in id_list and con.geom2 == id_1
|
||||
collision_trans = con.geom1 == id_1 and con.geom2 in id_list
|
||||
|
||||
if collision or collision_trans:
|
||||
return True
|
||||
return False
|
39
alr_envs/alr/mujoco/beerpong/mp_wrapper.py
Normal file
39
alr_envs/alr/mujoco/beerpong/mp_wrapper.py
Normal file
@ -0,0 +1,39 @@
|
||||
from typing import Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
|
||||
|
||||
|
||||
class MPWrapper(MPEnvWrapper):
|
||||
|
||||
@property
|
||||
def active_obs(self):
|
||||
# TODO: @Max Filter observations correctly
|
||||
return np.hstack([
|
||||
[False] * 7, # cos
|
||||
[False] * 7, # sin
|
||||
# [True] * 2, # x-y coordinates of target distance
|
||||
[False] # env steps
|
||||
])
|
||||
|
||||
@property
|
||||
def start_pos(self):
|
||||
return self._start_pos
|
||||
|
||||
@property
|
||||
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||
return self.sim.data.qpos[0:7].copy()
|
||||
|
||||
@property
|
||||
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||
return self.sim.data.qvel[0:7].copy()
|
||||
|
||||
@property
|
||||
def goal_pos(self):
|
||||
# TODO: @Max I think the default value of returning to the start is reasonable here
|
||||
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
|
||||
|
||||
@property
|
||||
def dt(self) -> Union[float, int]:
|
||||
return self.env.dt
|
BIN
alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_convex.stl
Normal file
BIN
alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_convex.stl
Normal file
Binary file not shown.
BIN
alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_fine.stl
Normal file
BIN
alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_fine.stl
Normal file
Binary file not shown.
BIN
alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_convex.stl
Normal file
BIN
alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_convex.stl
Normal file
Binary file not shown.
BIN
alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_fine.stl
Normal file
BIN
alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_fine.stl
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_fine.stl
Normal file
BIN
alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_fine.stl
Normal file
Binary file not shown.
BIN
alr_envs/alr/mujoco/meshes/wam/bhand_palm_fine.stl
Normal file
BIN
alr_envs/alr/mujoco/meshes/wam/bhand_palm_fine.stl
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_fine.stl
Normal file
BIN
alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_fine.stl
Normal file
Binary file not shown.
BIN
alr_envs/alr/mujoco/meshes/wam/wrist_yaw_link_fine.stl
Normal file
BIN
alr_envs/alr/mujoco/meshes/wam/wrist_yaw_link_fine.stl
Normal file
Binary file not shown.
1
alr_envs/alr/mujoco/table_tennis/__init__.py
Normal file
1
alr_envs/alr/mujoco/table_tennis/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
from .mp_wrapper import MPWrapper
|
38
alr_envs/alr/mujoco/table_tennis/mp_wrapper.py
Normal file
38
alr_envs/alr/mujoco/table_tennis/mp_wrapper.py
Normal file
@ -0,0 +1,38 @@
|
||||
from typing import Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
|
||||
|
||||
|
||||
class MPWrapper(MPEnvWrapper):
|
||||
|
||||
@property
|
||||
def active_obs(self):
|
||||
# TODO: @Max Filter observations correctly
|
||||
return np.hstack([
|
||||
[True] * 7, # Joint Pos
|
||||
[True] * 3, # Ball pos
|
||||
[True] * 3 # goal pos
|
||||
])
|
||||
|
||||
@property
|
||||
def start_pos(self):
|
||||
return self.self.init_qpos_tt
|
||||
|
||||
@property
|
||||
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||
return self.sim.data.qpos[:7].copy()
|
||||
|
||||
@property
|
||||
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||
return self.sim.data.qvel[:7].copy()
|
||||
|
||||
@property
|
||||
def goal_pos(self):
|
||||
# TODO: @Max I think the default value of returning to the start is reasonable here
|
||||
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
|
||||
|
||||
@property
|
||||
def dt(self) -> Union[float, int]:
|
||||
return self.env.dt
|
168
alr_envs/alr/mujoco/table_tennis/tt_gym.py
Normal file
168
alr_envs/alr/mujoco/table_tennis/tt_gym.py
Normal file
@ -0,0 +1,168 @@
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
import mujoco_py
|
||||
from gym import utils, spaces
|
||||
from gym.envs.mujoco import MujocoEnv
|
||||
|
||||
from alr_envs.alr.mujoco.table_tennis.tt_utils import ball_init
|
||||
from alr_envs.alr.mujoco.table_tennis.tt_reward import TT_Reward
|
||||
|
||||
#TODO: Check for simulation stability. Make sure the code runs even for sim crash
|
||||
|
||||
MAX_EPISODE_STEPS = 1375
|
||||
BALL_NAME_CONTACT = "target_ball_contact"
|
||||
BALL_NAME = "target_ball"
|
||||
TABLE_NAME = 'table_tennis_table'
|
||||
FLOOR_NAME = 'floor'
|
||||
PADDLE_CONTACT_1_NAME = 'bat'
|
||||
PADDLE_CONTACT_2_NAME = 'bat_back'
|
||||
RACKET_NAME = 'bat'
|
||||
# CONTEXT_RANGE_BOUNDS_2DIM = np.array([[-1.2, -0.6], [-0.2, 0.6]])
|
||||
CONTEXT_RANGE_BOUNDS_2DIM = np.array([[-1.2, -0.6], [-0.2, 0.0]])
|
||||
CONTEXT_RANGE_BOUNDS_4DIM = np.array([[-1.35, -0.75, -1.25, -0.75], [-0.1, 0.75, -0.1, 0.75]])
|
||||
|
||||
class TT_Env_Gym(MujocoEnv, utils.EzPickle):
|
||||
|
||||
def __init__(self, ctxt_dim=2):
|
||||
model_path = os.path.join(os.path.dirname(__file__), "xml", 'table_tennis_env.xml')
|
||||
|
||||
self.ctxt_dim = ctxt_dim
|
||||
if ctxt_dim == 2:
|
||||
self.context_range_bounds = CONTEXT_RANGE_BOUNDS_2DIM
|
||||
self.goal = np.zeros(3) # 2 x,y + 1z
|
||||
elif ctxt_dim == 4:
|
||||
self.context_range_bounds = CONTEXT_RANGE_BOUNDS_4DIM
|
||||
self.goal = np.zeros(3)
|
||||
else:
|
||||
raise ValueError("either 2 or 4 dimensional Contexts available")
|
||||
|
||||
action_space_low = np.array([-2.6, -2.0, -2.8, -0.9, -4.8, -1.6, -2.2])
|
||||
action_space_high = np.array([2.6, 2.0, 2.8, 3.1, 1.3, 1.6, 2.2])
|
||||
self.action_space = spaces.Box(low=action_space_low, high=action_space_high, dtype='float64')
|
||||
|
||||
self.time_steps = 0
|
||||
self.init_qpos_tt = np.array([0, 0, 0, 1.5, 0, 0, 1.5, 0, 0, 0])
|
||||
self.init_qvel_tt = np.zeros(10)
|
||||
|
||||
self.reward_func = TT_Reward(self.ctxt_dim)
|
||||
self.ball_landing_pos = None
|
||||
self.hited_ball = False
|
||||
self.ball_contact_after_hit = False
|
||||
self._ids_set = False
|
||||
super(TT_Env_Gym, self).__init__(model_path=model_path, frame_skip=1)
|
||||
self.ball_id = self.sim.model._body_name2id[BALL_NAME] # find the proper -> not protected func.
|
||||
self.ball_contact_id = self.sim.model._geom_name2id[BALL_NAME_CONTACT]
|
||||
self.table_contact_id = self.sim.model._geom_name2id[TABLE_NAME]
|
||||
self.floor_contact_id = self.sim.model._geom_name2id[FLOOR_NAME]
|
||||
self.paddle_contact_id_1 = self.sim.model._geom_name2id[PADDLE_CONTACT_1_NAME] # check if we need both or only this
|
||||
self.paddle_contact_id_2 = self.sim.model._geom_name2id[PADDLE_CONTACT_2_NAME] # check if we need both or only this
|
||||
self.racket_id = self.sim.model._geom_name2id[RACKET_NAME]
|
||||
|
||||
def _set_ids(self):
|
||||
self.ball_id = self.sim.model._body_name2id[BALL_NAME] # find the proper -> not protected func.
|
||||
self.table_contact_id = self.sim.model._geom_name2id[TABLE_NAME]
|
||||
self.floor_contact_id = self.sim.model._geom_name2id[FLOOR_NAME]
|
||||
self.paddle_contact_id_1 = self.sim.model._geom_name2id[PADDLE_CONTACT_1_NAME] # check if we need both or only this
|
||||
self.paddle_contact_id_2 = self.sim.model._geom_name2id[PADDLE_CONTACT_2_NAME] # check if we need both or only this
|
||||
self.racket_id = self.sim.model._geom_name2id[RACKET_NAME]
|
||||
self.ball_contact_id = self.sim.model._geom_name2id[BALL_NAME_CONTACT]
|
||||
self._ids_set = True
|
||||
|
||||
def _get_obs(self):
|
||||
ball_pos = self.sim.data.body_xpos[self.ball_id]
|
||||
obs = np.concatenate([self.sim.data.qpos[:7].copy(), # 7 joint positions
|
||||
ball_pos,
|
||||
self.goal.copy()])
|
||||
return obs
|
||||
|
||||
def sample_context(self):
|
||||
return np.random.uniform(self.context_range_bounds[0], self.context_range_bounds[1], size=self.ctxt_dim)
|
||||
|
||||
def reset_model(self):
|
||||
self.set_state(self.init_qpos_tt, self.init_qvel_tt) # reset to initial sim state
|
||||
self.time_steps = 0
|
||||
self.ball_landing_pos = None
|
||||
self.hited_ball = False
|
||||
self.ball_contact_after_hit = False
|
||||
self.goal = self.sample_context()[:2]
|
||||
if self.ctxt_dim == 2:
|
||||
initial_ball_state = ball_init(random=False) # fixed velocity, fixed position
|
||||
elif self.ctxt_dim == 4:
|
||||
initial_ball_state = ball_init(random=False)#raise NotImplementedError
|
||||
|
||||
self.sim.data.set_joint_qpos('tar:x', initial_ball_state[0])
|
||||
self.sim.data.set_joint_qpos('tar:y', initial_ball_state[1])
|
||||
self.sim.data.set_joint_qpos('tar:z', initial_ball_state[2])
|
||||
|
||||
self.sim.data.set_joint_qvel('tar:x', initial_ball_state[3])
|
||||
self.sim.data.set_joint_qvel('tar:y', initial_ball_state[4])
|
||||
self.sim.data.set_joint_qvel('tar:z', initial_ball_state[5])
|
||||
|
||||
z_extended_goal_pos = np.concatenate((self.goal[:2], [0.77]))
|
||||
self.goal = z_extended_goal_pos
|
||||
self.sim.model.body_pos[5] = self.goal[:3] # Desired Landing Position, Yellow
|
||||
self.sim.model.body_pos[3] = np.array([0, 0, 0.5]) # Outgoing Ball Landing Position, Green
|
||||
self.sim.model.body_pos[4] = np.array([0, 0, 0.5]) # Incoming Ball Landing Position, Red
|
||||
self.sim.forward()
|
||||
|
||||
self.reward_func.reset(self.goal) # reset the reward function
|
||||
return self._get_obs()
|
||||
|
||||
def _contact_checker(self, id_1, id_2):
|
||||
for coni in range(0, self.sim.data.ncon):
|
||||
con = self.sim.data.contact[coni]
|
||||
collision = con.geom1 == id_1 and con.geom2 == id_2
|
||||
collision_trans = con.geom1 == id_2 and con.geom2 == id_1
|
||||
if collision or collision_trans:
|
||||
return True
|
||||
return False
|
||||
|
||||
def step(self, action):
|
||||
if not self._ids_set:
|
||||
self._set_ids()
|
||||
done = False
|
||||
episode_end = False if self.time_steps+1<MAX_EPISODE_STEPS else True
|
||||
if not self.hited_ball:
|
||||
self.hited_ball = self._contact_checker(self.ball_contact_id, self.paddle_contact_id_1) # check for one side
|
||||
if not self.hited_ball:
|
||||
self.hited_ball = self._contact_checker(self.ball_contact_id, self.paddle_contact_id_2) # check for other side
|
||||
if self.hited_ball:
|
||||
if not self.ball_contact_after_hit:
|
||||
if self._contact_checker(self.ball_contact_id, self.floor_contact_id): # first check contact with floor
|
||||
self.ball_contact_after_hit = True
|
||||
self.ball_landing_pos = self.sim.data.body_xpos[self.ball_id]
|
||||
elif self._contact_checker(self.ball_contact_id, self.table_contact_id): # second check contact with table
|
||||
self.ball_contact_after_hit = True
|
||||
self.ball_landing_pos = self.sim.data.body_xpos[self.ball_id]
|
||||
c_ball_pos = self.sim.data.body_xpos[self.ball_id]
|
||||
racket_pos = self.sim.data.geom_xpos[self.racket_id] # TODO: use this to reach out the position of the paddle?
|
||||
if self.ball_landing_pos is not None:
|
||||
done = True
|
||||
episode_end =True
|
||||
reward = self.reward_func.get_reward(episode_end, c_ball_pos, racket_pos, self.hited_ball, self.ball_landing_pos)
|
||||
self.time_steps += 1
|
||||
# gravity compensation on joints:
|
||||
#action += self.sim.data.qfrc_bias[:7].copy()
|
||||
try:
|
||||
self.do_simulation(action, self.frame_skip)
|
||||
except mujoco_py.MujocoException as e:
|
||||
print('Simulation got unstable returning')
|
||||
done = True
|
||||
reward = -25
|
||||
ob = self._get_obs()
|
||||
return ob, reward, done, {"hit_ball":self.hited_ball}# might add some information here ....
|
||||
|
||||
def set_context(self, context):
|
||||
old_state = self.sim.get_state()
|
||||
qpos = old_state.qpos.copy()
|
||||
qvel = old_state.qvel.copy()
|
||||
self.set_state(qpos, qvel)
|
||||
self.goal = context
|
||||
z_extended_goal_pos = np.concatenate((self.goal[:self.ctxt_dim], [0.77]))
|
||||
if self.ctxt_dim == 4:
|
||||
z_extended_goal_pos = np.concatenate((z_extended_goal_pos, [0.77]))
|
||||
self.goal = z_extended_goal_pos
|
||||
self.sim.model.body_pos[5] = self.goal[:3] # TODO: Missing: Setting the desired incomoing landing position
|
||||
self.sim.forward()
|
||||
return self._get_obs()
|
48
alr_envs/alr/mujoco/table_tennis/tt_reward.py
Normal file
48
alr_envs/alr/mujoco/table_tennis/tt_reward.py
Normal file
@ -0,0 +1,48 @@
|
||||
import numpy as np
|
||||
|
||||
|
||||
class TT_Reward:
|
||||
|
||||
def __init__(self, ctxt_dim):
|
||||
self.ctxt_dim = ctxt_dim
|
||||
self.c_goal = None # current desired landing point
|
||||
self.c_ball_traj = []
|
||||
self.c_racket_traj = []
|
||||
self.constant = 8
|
||||
|
||||
def get_reward(self, episode_end, ball_position, racket_pos, hited_ball, ball_landing_pos):
|
||||
self.c_ball_traj.append(ball_position.copy())
|
||||
self.c_racket_traj.append(racket_pos.copy())
|
||||
if not episode_end:
|
||||
return 0
|
||||
else:
|
||||
# # seems to work for episodic case
|
||||
min_r_b_dist = np.min(np.linalg.norm(np.array(self.c_ball_traj) - np.array(self.c_racket_traj), axis=1))
|
||||
if not hited_ball:
|
||||
return 0.2 * (1- np.tanh(min_r_b_dist**2))
|
||||
else:
|
||||
if ball_landing_pos is None:
|
||||
min_b_des_b_dist = np.min(np.linalg.norm(np.array(self.c_ball_traj)[:,:2] - self.c_goal[:2], axis=1))
|
||||
return 2 * (1 - np.tanh(min_r_b_dist ** 2)) + (1 - np.tanh(min_b_des_b_dist**2))
|
||||
else:
|
||||
min_b_des_b_land_dist = np.linalg.norm(self.c_goal[:2] - ball_landing_pos[:2])
|
||||
over_net_bonus = int(ball_landing_pos[0] < 0)
|
||||
return 2 * (1 - np.tanh(min_r_b_dist ** 2)) + 4 * (1 - np.tanh(min_b_des_b_land_dist ** 2)) + over_net_bonus
|
||||
|
||||
|
||||
# if not hited_ball:
|
||||
# min_r_b_dist = 1 + np.min(np.linalg.norm(np.array(self.c_ball_traj) - np.array(self.c_racket_traj), axis=1))
|
||||
# return -min_r_b_dist
|
||||
# else:
|
||||
# if ball_landing_pos is None:
|
||||
# dist_to_des_pos = 1-np.power(np.linalg.norm(self.c_goal - ball_position), 0.75)/self.constant
|
||||
# else:
|
||||
# dist_to_des_pos = 1-np.power(np.linalg.norm(self.c_goal - ball_landing_pos), 0.75)/self.constant
|
||||
# if dist_to_des_pos < -0.2:
|
||||
# dist_to_des_pos = -0.2
|
||||
# return -dist_to_des_pos
|
||||
|
||||
def reset(self, goal):
|
||||
self.c_goal = goal.copy()
|
||||
self.c_ball_traj = []
|
||||
self.c_racket_traj = []
|
26
alr_envs/alr/mujoco/table_tennis/tt_utils.py
Normal file
26
alr_envs/alr/mujoco/table_tennis/tt_utils.py
Normal file
@ -0,0 +1,26 @@
|
||||
import numpy as np
|
||||
|
||||
|
||||
def ball_init(random=False, context_range=None):
|
||||
if random:
|
||||
dx = np.random.uniform(-0.1, 0.1) # TODO: clarify these numbers?
|
||||
dy = np.random.uniform(-0.1, 0.1) # TODO: clarify these numbers?
|
||||
dz = np.random.uniform(-0.1, 0.1) # TODO: clarify these numbers?
|
||||
|
||||
v_x = np.random.uniform(1.7, 1.8)
|
||||
v_y = np.random.uniform(0.7, 0.8)
|
||||
v_z = np.random.uniform(0.1, 0.2)
|
||||
else:
|
||||
dx = 1
|
||||
dy = 0
|
||||
dz = 0.05
|
||||
|
||||
v_x = 2.5
|
||||
v_y = 2
|
||||
v_z = 0.5
|
||||
|
||||
initial_x = 0 + dx - 1.2
|
||||
initial_y = -0.2 + dy - 0.6
|
||||
initial_z = 0.3 + dz + 1.5
|
||||
initial_ball_state = np.array([initial_x, initial_y, initial_z, v_x, v_y, v_z])
|
||||
return initial_ball_state
|
@ -0,0 +1,12 @@
|
||||
<mujocoinclude>
|
||||
<actuator>
|
||||
<motor name="wam/base_motor" joint="wam/base_yaw_joint_right" ctrllimited="true" ctrlrange="-1.0 1.0" gear="150.0"/>
|
||||
<motor name="wam/shoulder_pitch_motor" joint='wam/shoulder_pitch_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="125.0"/>
|
||||
<motor name="wam/shoulder_yaw_motor" joint='wam/shoulder_yaw_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="40.0"/>
|
||||
<motor name="wam/elbow_pitch_motor" joint='wam/elbow_pitch_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="60.0"/>
|
||||
<motor name="wam/wrist_yaw_motor" joint='wam/wrist_yaw_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="5.0"/>
|
||||
<motor name="wam/wrist_pitch_motor" joint='wam/wrist_pitch_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="5.0"/>
|
||||
<motor name="wam/palm_yaw_motor" joint='wam/palm_yaw_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="2.0"/>
|
||||
</actuator>
|
||||
</mujocoinclude>
|
||||
|
@ -0,0 +1,103 @@
|
||||
<mujocoinclue>
|
||||
<body name="wam/base_link_right" pos="2.1 0 2.0" quat="0 0 1 0" childclass="wam" >
|
||||
<inertial pos="0 0 0" mass="1" diaginertia="0.1 0.1 0.1"/>
|
||||
<geom name="base_link_fine" class="viz" mesh="base_link_fine" rgba="0.5 0.5 0.5 0"/>
|
||||
<geom name="base_link_convex" class="col" mesh="base_link_convex" rgba="0.5 0.5 0.5 1"/>
|
||||
<body name="wam/shoulder_yaw_link_right" pos="0 0 0.346">
|
||||
<inertial pos="-0.00443422 -0.00066489 -0.128904" quat="0.69566 0.716713 -0.0354863 0.0334839" mass="5"
|
||||
diaginertia="0.135089 0.113095 0.0904426"/>
|
||||
<!-- control 0: 1.6-->
|
||||
<joint name="wam/base_yaw_joint_right" range="-2.6 2.6" damping="1.98"/>
|
||||
<geom name="shoulder_link_fine" class="viz" mesh="shoulder_link_fine" rgba="1 1 1 0"/>
|
||||
<geom name="shoulder_link_convex_decomposition_p1" class="col"
|
||||
mesh="shoulder_link_convex_decomposition_p1"/>
|
||||
<geom name="shoulder_link_convex_decomposition_p2" class="col"
|
||||
mesh="shoulder_link_convex_decomposition_p2"/>
|
||||
<geom name="shoulder_link_convex_decomposition_p3" class="col"
|
||||
mesh="shoulder_link_convex_decomposition_p3"/>
|
||||
<body name="wam/shoulder_pitch_link_right" pos="0 0 0" quat="0.707107 -0.707107 0 0">
|
||||
<inertial pos="-0.00236981 -0.0154211 0.0310561" quat="0.961794 0.273112 -0.0169316 0.00866592"
|
||||
mass="3.87494" diaginertia="0.0214195 0.0167127 0.0126452"/> <!--seems off-->
|
||||
<!-- control 1: 0-->
|
||||
<joint name="wam/shoulder_pitch_joint_right" range="-2 2" damping="0.55"/>
|
||||
<geom name="shoulder_pitch_link_fine" class="viz" mesh="shoulder_pitch_link_fine" rgba="1 1 1 0"/>
|
||||
<geom name="shoulder_pitch_link_convex" class="col" mesh="shoulder_pitch_link_convex"/>
|
||||
<body name="wam/upper_arm_link_right" pos="0 0 0" quat="0.707107 0.707107 0 0">
|
||||
<inertial pos="0.00683259 3.309e-005 0.392492" quat="0.647136 0.0170822 0.0143038 0.762049"
|
||||
mass="2.20228" diaginertia="0.0592718 0.0592207 0.00313419"/>
|
||||
<!-- control 2: 0-->
|
||||
<joint name="wam/shoulder_yaw_joint_right" range="-2.8 2.8" damping="1.65"/>
|
||||
<geom name="upper_arm_link_fine" class="viz" mesh="upper_arm_link_fine" rgba="1 1 1 0"/>
|
||||
<geom name="upper_arm_link_convex_decomposition_p1" class="col"
|
||||
mesh="upper_arm_link_convex_decomposition_p1" rgba="0.094 0.48 0.804 1"/>
|
||||
<geom name="upper_arm_link_convex_decomposition_p2" class="col"
|
||||
mesh="upper_arm_link_convex_decomposition_p2" rgba="0.094 0.48 0.804 1"/>
|
||||
<body name="wam/forearm_link_right" pos="0.045 0 0.55" quat="0.707107 -0.707107 0 0">
|
||||
<inertial pos="-0.0400149 -0.142717 -0.00022942"
|
||||
quat="0.704281 0.706326 0.0180333 0.0690353" mass="0.500168"
|
||||
diaginertia="0.0151047 0.0148285 0.00275805"/>
|
||||
<!-- control 3: 2.4-->
|
||||
<joint name="wam/elbow_pitch_joint_right" range="-0.9 3.1" damping="0.88"/>
|
||||
<geom name="elbow_link_fine" class="viz" mesh="elbow_link_fine" rgba="1 1 1 0"/>
|
||||
<geom name="elbow_link_convex" class="col" mesh="elbow_link_convex"/>
|
||||
<geom name="forearm_link_fine" class="viz" mesh="forearm_link_fine" pos="-.045 -0.0730 0"
|
||||
euler="1.57 0 0" rgba="1 1 1 0"/>
|
||||
<geom name="forearm_link_convex_decomposition_p1" class="col"
|
||||
mesh="forearm_link_convex_decomposition_p1" pos="-0.045 -0.0730 0"
|
||||
euler="1.57 0 0" rgba="0.094 0.48 0.804 1"/>
|
||||
<geom name="forearm_link_convex_decomposition_p2" class="col"
|
||||
mesh="forearm_link_convex_decomposition_p2" pos="-.045 -0.0730 0"
|
||||
euler="1.57 0 0" rgba="0.094 0.48 0.804 1"/>
|
||||
<body name="wam/wrist_yaw_link_right" pos="-0.045 -0.3 0" quat="0.707107 0.707107 0 0">
|
||||
<inertial pos="8.921e-005 0.00435824 -0.00511217"
|
||||
quat="0.630602 0.776093 0.00401969 -0.002372" mass="1.05376"
|
||||
diaginertia="0.000555168 0.00046317 0.000234072"/> <!--this is an approximation-->
|
||||
<!-- control 4: 0-->
|
||||
<joint name="wam/wrist_yaw_joint_right" range="-4.8 1.3" damping="0.55"/>
|
||||
<geom name="wrist_yaw_link_fine" class="viz" mesh="wrist_yaw_link_fine" rgba="1 1 1 0"/>
|
||||
<geom name="wrist_yaw_link_convex_decomposition_p1" class="col"
|
||||
mesh="wrist_yaw_link_convex_decomposition_p1"/>
|
||||
<geom name="wrist_yaw_link_convex_decomposition_p2" class="col"
|
||||
mesh="wrist_yaw_link_convex_decomposition_p2"/>
|
||||
<body name="wam/wrist_pitch_link_right" pos="0 0 0" quat="0.707107 -0.707107 0 0">
|
||||
<inertial pos="-0.00012262 -0.0246834 -0.0170319"
|
||||
quat="0.630602 0.776093 0.00401969 -0.002372" mass="0.517974"
|
||||
diaginertia="0.000555168 0.00046317 0.000234072"/>
|
||||
<!-- control 5: 0-->
|
||||
<joint name="wam/wrist_pitch_joint_right" range="-1.6 1.6" damping="0.11"/>
|
||||
<geom name="wrist_pitch_link_fine" class="viz" mesh="wrist_pitch_link_fine"
|
||||
rgba="1 1 1 0"/>
|
||||
<geom name="wrist_pitch_link_convex_decomposition_p1" rgba="1 0.5 0.313 1"
|
||||
class="col" mesh="wrist_pitch_link_convex_decomposition_p1"/>
|
||||
<geom name="wrist_pitch_link_convex_decomposition_p2" rgba="1 0.5 0.313 1"
|
||||
class="col" mesh="wrist_pitch_link_convex_decomposition_p2"/>
|
||||
<geom name="wrist_pitch_link_convex_decomposition_p3" rgba="1 0.5 0.313 1"
|
||||
class="col" mesh="wrist_pitch_link_convex_decomposition_p3"/>
|
||||
<body name="wam/wrist_palm_link_right" pos="0 0 0" quat="0.707107 0.707107 0 0">
|
||||
<inertial pos="0 0 0.055" quat="0.707107 0 0 0.707107" mass="0.0828613"
|
||||
diaginertia="0.00020683 0.00010859 0.00010851"/>
|
||||
<!-- control 6: 1.8-->
|
||||
<joint name="wam/palm_yaw_joint_right" range="-2.2 2.2" damping="0.11"/>
|
||||
<geom name="wrist_palm_link_fine" class="viz" mesh="wrist_palm_link_fine"
|
||||
rgba="1 1 1 0"/>
|
||||
<geom name="wrist_palm_link_convex" class="col" mesh="wrist_palm_link_convex"/>
|
||||
<!-- EE=wam/paddle, configure name to the end effector name-->
|
||||
<body name="EE" pos="0 0 0.26" childclass="contact_geom">
|
||||
<geom name="bat" type="cylinder" size="0.075 0.005" rgba="1 0 0 1"
|
||||
quat="0.71 0 0.71 0"/>
|
||||
<geom name="bat_back" type="cylinder" size="0.0749 0.0025" rgba="0 1 0 1"
|
||||
quat="0.71 0 0.71 0" pos="-0.0026 0 0"/>
|
||||
<geom name="wam/paddle_handle" type="box" size="0.005 0.01 0.05" pos="0 0 -0.08"
|
||||
rgba="1 1 1 1"/>
|
||||
<!-- Extract information for sampling goals.-->
|
||||
<site name="wam/paddle_center" pos="0 0 0" rgba="1 1 1 1" size="0.00001"/>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</mujocoinclue>
|
30
alr_envs/alr/mujoco/table_tennis/xml/include_table.xml
Normal file
30
alr_envs/alr/mujoco/table_tennis/xml/include_table.xml
Normal file
@ -0,0 +1,30 @@
|
||||
<mujocoinclude>
|
||||
<body name="table_tennis_table" pos="0 0 0">
|
||||
<geom class="viz" name="table_base_1" pos="1 0.7 0.375" rgba="1 1 1 1" size="0.05 0.05 .375" type="box" />
|
||||
<geom class="viz" name="table_base_2" pos="1 -0.7 0.375" rgba="1 1 1 1" size="0.05 0.05 .375" type="box" />
|
||||
<geom class="viz" name="table_base_3" pos="-1 -0.7 0.375" rgba="1 1 1 1" size="0.05 0.05 .375" type="box" />
|
||||
<geom class="viz" name="table_base_4" pos="-1 0.7 0.375" rgba="1 1 1 1" size="0.05 0.05 .375" type="box" />
|
||||
<body name="table_top" pos="0 0 0.76">
|
||||
<geom class="contact_geom" name="table_tennis_table" pos="0 0 0" rgba="0 0 0.5 1" size="1.37 .7625 .01" type="box" />
|
||||
<site name="left_up_corner" pos="-1.37 .7625 0.01" rgba="1 1 1 1" size="0.00001" />
|
||||
<site name="middle_up_corner" pos="0 .7625 0.01" rgba="1 1 1 1" size="0.00001" />
|
||||
<site name="left_down_corner" pos="-1.37 -0.7625 0.01" rgba="1 1 1 1" size="0.00001" />
|
||||
<site name="middle_down_corner" pos="0 -.7625 0.01" rgba="1 1 1 1" size="0.00001" />
|
||||
<geom class="contact_geom" material="floor_plane" name="table_te_context_spacennis_net" pos="0 0 0.08625" rgba="0 0 1 0.5" size="0.01 0.915 0.07625" type="box" />
|
||||
<geom class="viz" name="left_while_line" pos="0 -0.7425 0.01" rgba="1 1 1 1" size="1.37 .02 .001" type="box" />
|
||||
<geom class="viz" name="center_while_line" pos="0 0 0.01" rgba="1 1 1 1" size="1.37 .01 .001" type="box" />
|
||||
<geom class="viz" name="right_while_line" pos="0 0.7425 0.01" rgba="1 1 1 1" size="1.37 .02 .001" type="box" />
|
||||
<geom class="viz" name="right_side_line" pos="1.35 0 0.01" rgba="1 1 1 1" size="0.02 .7625 .001" type="box" />
|
||||
<geom class="viz" name="left_side_line" pos="-1.35 0 0.01" rgba="1 1 1 1" size="0.02 .7625 .001" type="box" />
|
||||
</body>
|
||||
<body name="achieved_pos" pos="0 0 0.5">
|
||||
<geom class="viz" name="achieved_point_geom" pos="0 0 0" rgba="0 1 0 1" size="0.02 0.001" type="cylinder" />
|
||||
</body>
|
||||
<body name="right_achieved_pos" pos="0 0 0.5">
|
||||
<geom class="viz" name="hitting_achieved_point_geom" pos="0 0 0" rgba="1 0 0 1" size="0.02 0.001" type="cylinder" />
|
||||
</body>
|
||||
<body name="target_point" pos="0 0 0.5">
|
||||
<geom class="viz" name="target_point_geom" pos="0 0 0" rgba="1 1 0 1" size="0.02 0.001" type="cylinder" />
|
||||
</body>
|
||||
</body>
|
||||
</mujocoinclude>
|
10
alr_envs/alr/mujoco/table_tennis/xml/include_target_ball.xml
Normal file
10
alr_envs/alr/mujoco/table_tennis/xml/include_target_ball.xml
Normal file
@ -0,0 +1,10 @@
|
||||
<mujocoinclude>
|
||||
<body name="target_ball" pos="0 0 0">
|
||||
<joint axis="1 0 0" damping="0.0" name="tar:x" pos="0 0 0" stiffness="0" type="slide" frictionloss="0"/>
|
||||
<joint axis="0 1 0" damping="0.0" name="tar:y" pos="0 0 0" stiffness="0" type="slide" frictionloss="0"/>
|
||||
<joint axis="0 0 1" damping="0.0" name="tar:z" pos="0 0 0" stiffness="0" type="slide" frictionloss="0"/>
|
||||
<geom size="0.025 0.025 0.025" type="sphere" condim="4" name="target_ball_contact" rgba="1 1 0 1" mass="0.1"
|
||||
friction="0.1 0.1 0.1" solimp="0.9 0.95 0.001 0.5 2" solref="0.1 0.03" priority="1"/>
|
||||
<site name="target_ball" pos="0 0 0" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"/>
|
||||
</body>
|
||||
</mujocoinclude>
|
47
alr_envs/alr/mujoco/table_tennis/xml/right_arm_actuator.xml
Normal file
47
alr_envs/alr/mujoco/table_tennis/xml/right_arm_actuator.xml
Normal file
@ -0,0 +1,47 @@
|
||||
<mujocoinclude>
|
||||
<actuator>
|
||||
|
||||
<!-- <position ctrlrange="-2.6 2.6" joint="wam/base_yaw_joint_right" kp="100.0" />-->
|
||||
<!-- <position ctrlrange="-1.985 1.985" joint="wam/shoulder_pitch_joint_right" kp="162.0" />-->
|
||||
<!-- <position ctrlrange="-2.8 2.8" joint="wam/shoulder_yaw_joint_right" kp="100.0" />-->
|
||||
<!-- <position ctrlrange="-0.9 3.14159" joint="wam/elbow_pitch_joint_right" kp="122.0" />-->
|
||||
<!-- <position ctrlrange="-4.55 1.25" joint="wam/wrist_yaw_joint_right" kp="100.0" />-->
|
||||
<!-- <position ctrlrange="-1.5707 1.5707" joint="wam/wrist_pitch_joint_right" kp="102.0" />-->
|
||||
<!-- <position ctrlrange="-3 3" joint="wam/palm_yaw_joint_right" kp="100.0" />-->
|
||||
|
||||
<!-- <position ctrlrange="-2.6 2.6" joint="wam/base_yaw_joint_right" kp="151.0" ctrllimited="true"/>-->
|
||||
<!-- <position ctrlrange="-1.985 1.985" joint="wam/shoulder_pitch_joint_right" kp="125.0" ctrllimited="true"/>-->
|
||||
<!-- <position ctrlrange="-2.8 2.8" joint="wam/shoulder_yaw_joint_right" kp="122.0" ctrllimited="true"/>-->
|
||||
<!-- <position ctrlrange="-0.9 3.14159" joint="wam/elbow_pitch_joint_right" kp="121.0" ctrllimited="true"/>-->
|
||||
<!-- <position ctrlrange="-4.55 1.25" joint="wam/wrist_yaw_joint_right" kp="99.0" ctrllimited="true"/>-->
|
||||
<!-- <position ctrlrange="-1.5707 1.5707" joint="wam/wrist_pitch_joint_right" kp="103.0" ctrllimited="true"/>-->
|
||||
<!-- <position ctrlrange="-3 3" joint="wam/palm_yaw_joint_right" kp="99.0" ctrllimited="true"/>-->
|
||||
|
||||
<!-- <position ctrlrange="-2.6 2.6" joint="wam/base_yaw_joint_right" kp="100.0" ctrllimited="true"/>-->
|
||||
<!-- <position ctrlrange="-1.985 1.985" joint="wam/shoulder_pitch_joint_right" kp="600.0" ctrllimited="true"/>-->
|
||||
<!-- <position ctrlrange="-2.8 2.8" joint="wam/shoulder_yaw_joint_right" kp="122.0" ctrllimited="true"/>-->
|
||||
<!-- <position ctrlrange="-0.9 3.14159" joint="wam/elbow_pitch_joint_right" kp="500.0" ctrllimited="true"/>-->
|
||||
<!-- <position ctrlrange="-4.55 1.25" joint="wam/wrist_yaw_joint_right" kp="99.0" ctrllimited="true"/>-->
|
||||
<!-- <position ctrlrange="-1.5707 1.5707" joint="wam/wrist_pitch_joint_right" kp="103.0" ctrllimited="true"/>-->
|
||||
<!-- <position ctrlrange="-3 3" joint="wam/palm_yaw_joint_right" kp="99.0" ctrllimited="true"/>-->
|
||||
|
||||
<position ctrlrange="-2.6 2.6" joint="wam/base_yaw_joint_right" kp="800.0" ctrllimited="true"/>
|
||||
<position ctrlrange="-1.985 1.985" joint="wam/shoulder_pitch_joint_right" kp="800.0" ctrllimited="true"/>
|
||||
<position ctrlrange="-2.8 2.8" joint="wam/shoulder_yaw_joint_right" kp="800.0" ctrllimited="true"/>
|
||||
<position ctrlrange="-0.9 3.14159" joint="wam/elbow_pitch_joint_right" kp="800.0" ctrllimited="true"/>
|
||||
<position ctrlrange="-4.55 1.25" joint="wam/wrist_yaw_joint_right" kp="100.0" ctrllimited="true"/>
|
||||
<position ctrlrange="-1.5707 1.5707" joint="wam/wrist_pitch_joint_right" kp="1000.0" ctrllimited="true"/>
|
||||
<position ctrlrange="-3 3" joint="wam/palm_yaw_joint_right" kp="100.0" ctrllimited="true"/>
|
||||
|
||||
|
||||
<!-- <position ctrlrange="-2.6 2.6" joint="wam/base_yaw_joint_right" kp="1600.0" ctrllimited="true"/>-->
|
||||
<!--<!– <velocity ctrlrange="-50 50" joint="wam/base_yaw_joint_right" kv="100" ctrllimited="true"/>–>-->
|
||||
|
||||
<!-- <position ctrlrange="-1.985 1.985" joint="wam/shoulder_pitch_joint_right" kp="2000.0" ctrllimited="true"/>-->
|
||||
<!-- <position ctrlrange="-2.8 2.8" joint="wam/shoulder_yaw_joint_right" kp="800.0" ctrllimited="true"/>-->
|
||||
<!-- <position ctrlrange="-0.9 3.14159" joint="wam/elbow_pitch_joint_right" kp="1200.0" ctrllimited="true"/>-->
|
||||
<!-- <position ctrlrange="-4.55 1.25" joint="wam/wrist_yaw_joint_right" kp="100.0" ctrllimited="true"/>-->
|
||||
<!-- <position ctrlrange="-1.5707 1.5707" joint="wam/wrist_pitch_joint_right" kp="2000.0" ctrllimited="true"/>-->
|
||||
<!-- <position ctrlrange="-3 3" joint="wam/palm_yaw_joint_right" kp="100.0" ctrllimited="true"/>-->
|
||||
</actuator>
|
||||
</mujocoinclude>
|
46
alr_envs/alr/mujoco/table_tennis/xml/shared.xml
Normal file
46
alr_envs/alr/mujoco/table_tennis/xml/shared.xml
Normal file
@ -0,0 +1,46 @@
|
||||
<mujocoinclude>
|
||||
<default>
|
||||
<default class="wam">
|
||||
<joint type="hinge" limited="true" pos="0 0 0" axis="0 0 1"/>
|
||||
</default>
|
||||
<default class="viz">
|
||||
<geom type="mesh" contype="0" conaffinity="0" group="1" rgba="1 1 1 1"/>
|
||||
</default>
|
||||
<default class="col">
|
||||
<geom type="mesh" contype="0" conaffinity="1" group="0" rgba="1 1 1 1"/>
|
||||
</default>
|
||||
<default class="contact_geom">
|
||||
<geom condim="4" friction="0.1 0.1 0.1" margin="0" solimp="1 1 0" solref="0.1 0.03"/>
|
||||
</default>
|
||||
</default>
|
||||
<asset>
|
||||
<mesh file="base_link_fine.stl"/>
|
||||
<mesh file="base_link_convex.stl"/>
|
||||
<mesh file="shoulder_link_fine.stl"/>
|
||||
<mesh file="shoulder_link_convex_decomposition_p1.stl"/>
|
||||
<mesh file="shoulder_link_convex_decomposition_p2.stl"/>
|
||||
<mesh file="shoulder_link_convex_decomposition_p3.stl"/>
|
||||
<mesh file="shoulder_pitch_link_fine.stl"/>
|
||||
<mesh file="shoulder_pitch_link_convex.stl"/>
|
||||
<mesh file="upper_arm_link_fine.stl"/>
|
||||
<mesh file="upper_arm_link_convex_decomposition_p1.stl"/>
|
||||
<mesh file="upper_arm_link_convex_decomposition_p2.stl"/>
|
||||
<mesh file="elbow_link_fine.stl"/>
|
||||
<mesh file="elbow_link_convex.stl"/>
|
||||
<mesh file="forearm_link_fine.stl"/>
|
||||
<mesh file="forearm_link_convex_decomposition_p1.stl"/>
|
||||
<mesh file="forearm_link_convex_decomposition_p2.stl"/>
|
||||
<mesh file="wrist_yaw_link_fine.stl"/>
|
||||
<mesh file="wrist_yaw_link_convex_decomposition_p1.stl"/>
|
||||
<mesh file="wrist_yaw_link_convex_decomposition_p2.stl"/>
|
||||
<mesh file="wrist_pitch_link_fine.stl"/>
|
||||
<mesh file="wrist_pitch_link_convex_decomposition_p1.stl"/>
|
||||
<mesh file="wrist_pitch_link_convex_decomposition_p2.stl"/>
|
||||
<mesh file="wrist_pitch_link_convex_decomposition_p3.stl"/>
|
||||
<mesh file="wrist_palm_link_fine.stl"/>
|
||||
<mesh file="wrist_palm_link_convex.stl"/>
|
||||
<texture builtin="checker" height="512" name="texplane" rgb1=".2 .3 .4" rgb2=".1 0.15 0.2" type="2d"
|
||||
width="512"/>
|
||||
<material name="floor_plane" reflectance="0.5" texrepeat="1 1" texture="texplane" texuniform="true"/>
|
||||
</asset>
|
||||
</mujocoinclude>
|
18
alr_envs/alr/mujoco/table_tennis/xml/table_tennis_env.xml
Normal file
18
alr_envs/alr/mujoco/table_tennis/xml/table_tennis_env.xml
Normal file
@ -0,0 +1,18 @@
|
||||
<mujoco model="table_tennis(v0.1)">
|
||||
<compiler angle="radian" coordinate="local" meshdir="../../meshes/wam" />
|
||||
<option gravity="0 0 -9.81" timestep="0.002">
|
||||
<flag warmstart="enable" />
|
||||
</option>
|
||||
<custom>
|
||||
<numeric data="0 0 0 0 0 0 0" name="START_ANGLES" />
|
||||
</custom>
|
||||
<include file="shared.xml" />
|
||||
<worldbody>
|
||||
<light cutoff="60" diffuse="1 1 1" dir="-.1 -.2 -1.3" directional="true" exponent="1" pos=".1 .2 1.3" specular=".1 .1 .1" />
|
||||
<geom conaffinity="1" contype="1" material="floor_plane" name="floor" pos="0 0 0" size="10 5 1" type="plane" />
|
||||
<include file="include_table.xml" />
|
||||
<include file="include_barrett_wam_7dof_right.xml" />
|
||||
<include file="include_target_ball.xml" />
|
||||
</worldbody>
|
||||
<include file="include_7_motor_actuator.xml" />
|
||||
</mujoco>
|
@ -15,10 +15,10 @@ def _spec_to_box(spec):
|
||||
assert s.dtype == np.float64 or s.dtype == np.float32, f"Only float64 and float32 types are allowed, instead {s.dtype} was found"
|
||||
dim = int(np.prod(s.shape))
|
||||
if type(s) == specs.Array:
|
||||
bound = np.inf * np.ones(dim, dtype=np.float32)
|
||||
bound = np.inf * np.ones(dim, dtype=s.dtype)
|
||||
return -bound, bound
|
||||
elif type(s) == specs.BoundedArray:
|
||||
zeros = np.zeros(dim, dtype=np.float32)
|
||||
zeros = np.zeros(dim, dtype=s.dtype)
|
||||
return s.minimum + zeros, s.maximum + zeros
|
||||
|
||||
mins, maxs = [], []
|
||||
@ -29,7 +29,7 @@ def _spec_to_box(spec):
|
||||
low = np.concatenate(mins, axis=0)
|
||||
high = np.concatenate(maxs, axis=0)
|
||||
assert low.shape == high.shape
|
||||
return spaces.Box(low, high, dtype=np.float32)
|
||||
return spaces.Box(low, high, dtype=s.dtype)
|
||||
|
||||
|
||||
def _flatten_obs(obs: collections.MutableMapping):
|
||||
@ -113,7 +113,7 @@ class DMCWrapper(core.Env):
|
||||
if self._channels_first:
|
||||
obs = obs.transpose(2, 0, 1).copy()
|
||||
else:
|
||||
obs = _flatten_obs(time_step.observation)
|
||||
obs = _flatten_obs(time_step.observation).astype(self.observation_space.dtype)
|
||||
return obs
|
||||
|
||||
@property
|
||||
|
Loading…
Reference in New Issue
Block a user