support for contexts, policy classes, pd controller example, breaking changes etc

This commit is contained in:
Maximilian Huettenrauch 2021-02-11 10:49:57 +01:00
parent 07195fa2dc
commit c81378b9e7
9 changed files with 807 additions and 262 deletions

View File

@ -0,0 +1,247 @@
from collections import OrderedDict
import os
from gym import error, spaces
from gym.utils import seeding
import numpy as np
from os import path
import gym
try:
import mujoco_py
except ImportError as e:
raise error.DependencyNotInstalled("{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(e))
DEFAULT_SIZE = 500
def convert_observation_to_space(observation):
if isinstance(observation, dict):
space = spaces.Dict(OrderedDict([
(key, convert_observation_to_space(value))
for key, value in observation.items()
]))
elif isinstance(observation, np.ndarray):
low = np.full(observation.shape, -float('inf'), dtype=np.float32)
high = np.full(observation.shape, float('inf'), dtype=np.float32)
space = spaces.Box(low, high, dtype=observation.dtype)
else:
raise NotImplementedError(type(observation), observation)
return space
class AlrMujocoEnv(gym.Env):
"""
Superclass for all MuJoCo environments.
"""
def __init__(self, model_path, n_substeps, apply_gravity_comp=True):
"""
Args:
model_path: path to xml file
n_substeps: how many steps mujoco does per call to env.step
use_servo: use actuator defined in xml, use False for direct torque control
"""
if model_path.startswith("/"):
fullpath = model_path
else:
fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path)
if not path.exists(fullpath):
raise IOError("File %s does not exist" % fullpath)
self.n_substeps = n_substeps
self.apply_gravity_comp = apply_gravity_comp
self.model = mujoco_py.load_model_from_path(fullpath)
self.sim = mujoco_py.MjSim(self.model, nsubsteps=n_substeps)
self.data = self.sim.data
self.viewer = None
self._viewers = {}
self.metadata = {
'render.modes': ['human', 'rgb_array', 'depth_array'],
'video.frames_per_second': int(np.round(1.0 / self.dt))
}
self.init_qpos = self.sim.data.qpos.ravel().copy()
self.init_qvel = self.sim.data.qvel.ravel().copy()
self._set_action_space()
# action = self.action_space.sample()
# observation, _reward, done, _info = self.step(action)
# assert not done
observation = self.reset()
self._set_observation_space(observation)
self.seed()
@property
def current_pos(self):
"""
By default returns the joint positions of all simulated objects. May be overriden in subclass.
"""
return self.sim.data.qpos
@property
def current_vel(self):
"""
By default returns the joint velocities of all simulated objects. May be overriden in subclass.
"""
return self.sim.data.qvel
def extend_des_pos(self, des_pos):
"""
In a simplified environment, the actions may only control a subset of all the joints.
Extend the trajectory to match the environments full action space
Args:
des_pos:
Returns:
"""
pass
def extend_des_vel(self, des_vel):
pass
def _set_action_space(self):
bounds = self.model.actuator_ctrlrange.copy().astype(np.float32)
low, high = bounds.T
self.action_space = spaces.Box(low=low, high=high, dtype=np.float32)
return self.action_space
def _set_observation_space(self, observation):
self.observation_space = convert_observation_to_space(observation)
return self.observation_space
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
# methods to override:
# ----------------------------
def configure(self, *args, **kwargs):
"""
Helper method to set certain environment properties such as contexts in contextual environments since reset()
doesn't take arguments. Should be called before/after reset(). TODO: before or after?
"""
pass
def reset_model(self):
"""
Reset the robot degrees of freedom (qpos and qvel).
Implement this in each subclass.
"""
raise NotImplementedError
def viewer_setup(self):
"""
This method is called when the viewer is initialized.
Optionally implement this method, if you need to tinker with camera position
and so forth.
"""
pass
# -----------------------------
def reset(self):
self.sim.reset()
ob = self.reset_model()
return ob
def set_state(self, qpos, qvel):
assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
old_state = self.sim.get_state()
new_state = mujoco_py.MjSimState(old_state.time, qpos, qvel,
old_state.act, old_state.udd_state)
self.sim.set_state(new_state)
self.sim.forward()
@property
def dt(self):
return self.model.opt.timestep * self.n_substeps
def do_simulation(self, ctrl):
"""
Additionally returns whether there was an error while stepping the simulation
"""
error_in_sim = False
num_actuations = len(ctrl)
if self.apply_gravity_comp:
self.sim.data.ctrl[:num_actuations] = ctrl + self.sim.data.qfrc_bias[:num_actuations].copy() / self.model.actuator_gear[:, 0]
else:
self.sim.data.ctrl[:num_actuations] = ctrl
try:
self.sim.step()
except mujoco_py.builder.MujocoException as e:
error_in_sim = True
return error_in_sim
def render(self,
mode='human',
width=DEFAULT_SIZE,
height=DEFAULT_SIZE,
camera_id=None,
camera_name=None):
if mode == 'rgb_array' or mode == 'depth_array':
if camera_id is not None and camera_name is not None:
raise ValueError("Both `camera_id` and `camera_name` cannot be"
" specified at the same time.")
no_camera_specified = camera_name is None and camera_id is None
if no_camera_specified:
camera_name = 'track'
if camera_id is None and camera_name in self.model._camera_name2id:
camera_id = self.model.camera_name2id(camera_name)
self._get_viewer(mode).render(width, height, camera_id=camera_id)
if mode == 'rgb_array':
# window size used for old mujoco-py:
data = self._get_viewer(mode).read_pixels(width, height, depth=False)
# original image is upside-down, so flip it
return data[::-1, :, :]
elif mode == 'depth_array':
self._get_viewer(mode).render(width, height)
# window size used for old mujoco-py:
# Extract depth part of the read_pixels() tuple
data = self._get_viewer(mode).read_pixels(width, height, depth=True)[1]
# original image is upside-down, so flip it
return data[::-1, :]
elif mode == 'human':
self._get_viewer(mode).render()
def close(self):
if self.viewer is not None:
# self.viewer.finish()
self.viewer = None
self._viewers = {}
def _get_viewer(self, mode):
self.viewer = self._viewers.get(mode)
if self.viewer is None:
if mode == 'human':
self.viewer = mujoco_py.MjViewer(self.sim)
elif mode == 'rgb_array' or mode == 'depth_array':
self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, -1)
self.viewer_setup()
self._viewers[mode] = self.viewer
return self.viewer
def get_body_com(self, body_name):
return self.data.get_body_xpos(body_name)
def state_vector(self):
return np.concatenate([
self.sim.data.qpos.flat,
self.sim.data.qvel.flat
])

View File

@ -0,0 +1,21 @@
class AlrReward:
"""
A base class for non-Markovian reward functions which may need trajectory information to calculate an episodic
reward. Call the methods in reset() and step() of the environment.
"""
# methods to override:
# ----------------------------
def reset(self, *args, **kwargs):
"""
Reset the reward function, empty state buffers before an episode, set contexts that influence reward, etc.
"""
raise NotImplementedError
def compute_reward(self, *args, **kwargs):
"""
Returns: Useful things to return are reward values, success flags or crash flags
"""
raise NotImplementedError

View File

@ -0,0 +1,360 @@
<mujoco model="wam(v1.31)">
<compiler angle="radian" meshdir="meshes/" />
<option timestep="0.0005" integrator="Euler" />
<size njmax="500" nconmax="100" />
<default class="main">
<joint limited="true" frictionloss="0.001" />
<default class="viz">
<geom type="mesh" contype="0" conaffinity="0" group="1" rgba="0.7 0.7 0.7 1" />
</default>
<default class="col">
<geom type="mesh" contype="0" rgba="0.5 0.6 0.7 1" />
</default>
</default>
<asset>
<texture type="2d" name="groundplane" builtin="checker" mark="edge" rgb1="0.25 0.26 0.25" rgb2="0.22 0.22 0.22" markrgb="0.3 0.3 0.3" width="100" height="100" />
<material name="MatGnd" texture="groundplane" texrepeat="5 5" specular="1" shininess="0.3" reflectance="1e-05" />
<mesh name="base_link_fine" file="base_link_fine.stl" />
<mesh name="base_link_convex" file="base_link_convex.stl" />
<mesh name="shoulder_link_fine" file="shoulder_link_fine.stl" />
<mesh name="shoulder_link_convex_decomposition_p1" file="shoulder_link_convex_decomposition_p1.stl" />
<mesh name="shoulder_link_convex_decomposition_p2" file="shoulder_link_convex_decomposition_p2.stl" />
<mesh name="shoulder_link_convex_decomposition_p3" file="shoulder_link_convex_decomposition_p3.stl" />
<mesh name="shoulder_pitch_link_fine" file="shoulder_pitch_link_fine.stl" />
<mesh name="shoulder_pitch_link_convex" file="shoulder_pitch_link_convex.stl" />
<mesh name="upper_arm_link_fine" file="upper_arm_link_fine.stl" />
<mesh name="upper_arm_link_convex_decomposition_p1" file="upper_arm_link_convex_decomposition_p1.stl" />
<mesh name="upper_arm_link_convex_decomposition_p2" file="upper_arm_link_convex_decomposition_p2.stl" />
<mesh name="elbow_link_fine" file="elbow_link_fine.stl" />
<mesh name="elbow_link_convex" file="elbow_link_convex.stl" />
<mesh name="forearm_link_fine" file="forearm_link_fine.stl" />
<mesh name="forearm_link_convex_decomposition_p1" file="forearm_link_convex_decomposition_p1.stl" />
<mesh name="forearm_link_convex_decomposition_p2" file="forearm_link_convex_decomposition_p2.stl" />
<mesh name="wrist_yaw_link_fine" file="wrist_yaw_link_fine.stl" />
<mesh name="wrist_yaw_link_convex_decomposition_p1" file="wrist_yaw_link_convex_decomposition_p1.stl" />
<mesh name="wrist_yaw_link_convex_decomposition_p2" file="wrist_yaw_link_convex_decomposition_p2.stl" />
<mesh name="wrist_pitch_link_fine" file="wrist_pitch_link_fine.stl" />
<mesh name="wrist_pitch_link_convex_decomposition_p1" file="wrist_pitch_link_convex_decomposition_p1.stl" />
<mesh name="wrist_pitch_link_convex_decomposition_p2" file="wrist_pitch_link_convex_decomposition_p2.stl" />
<mesh name="wrist_pitch_link_convex_decomposition_p3" file="wrist_pitch_link_convex_decomposition_p3.stl" />
<mesh name="wrist_palm_link_fine" file="wrist_palm_link_fine.stl" />
<mesh name="wrist_palm_link_convex" file="wrist_palm_link_convex.stl" />
<mesh name="cup1" file="cup_split1.stl" scale="0.001 0.001 0.001" />
<mesh name="cup2" file="cup_split2.stl" scale="0.001 0.001 0.001" />
<mesh name="cup3" file="cup_split3.stl" scale="0.001 0.001 0.001" />
<mesh name="cup4" file="cup_split4.stl" scale="0.001 0.001 0.001" />
<mesh name="cup5" file="cup_split5.stl" scale="0.001 0.001 0.001" />
<mesh name="cup6" file="cup_split6.stl" scale="0.001 0.001 0.001" />
<mesh name="cup7" file="cup_split7.stl" scale="0.001 0.001 0.001" />
<mesh name="cup8" file="cup_split8.stl" scale="0.001 0.001 0.001" />
<mesh name="cup9" file="cup_split9.stl" scale="0.001 0.001 0.001" />
<mesh name="cup10" file="cup_split10.stl" scale="0.001 0.001 0.001" />
<mesh name="cup11" file="cup_split11.stl" scale="0.001 0.001 0.001" />
<mesh name="cup12" file="cup_split12.stl" scale="0.001 0.001 0.001" />
<mesh name="cup13" file="cup_split13.stl" scale="0.001 0.001 0.001" />
<mesh name="cup14" file="cup_split14.stl" scale="0.001 0.001 0.001" />
<mesh name="cup15" file="cup_split15.stl" scale="0.001 0.001 0.001" />
<mesh name="cup16" file="cup_split16.stl" scale="0.001 0.001 0.001" />
<mesh name="cup17" file="cup_split17.stl" scale="0.001 0.001 0.001" />
<mesh name="cup18" file="cup_split18.stl" scale="0.001 0.001 0.001" />
</asset>
<worldbody>
<geom name="ground" size="1.5 2 1" type="plane" material="MatGnd" />
<light pos="0.1 0.2 1.3" dir="-0.0758098 -0.32162 -0.985527" directional="true" cutoff="60" exponent="1" diffuse="1 1 1" specular="0.1 0.1 0.1" />
<body name="wam/base_link" pos="0 0 0.6">
<inertial pos="6.93764e-06 0.0542887 0.076438" quat="0.496481 0.503509 -0.503703 0.496255" mass="27.5544" diaginertia="0.432537 0.318732 0.219528" />
<geom class="viz" quat="0.707107 0 0 -0.707107" mesh="base_link_fine" />
<geom class="col" quat="0.707107 0 0 -0.707107" mesh="base_link_convex" />
<body name="wam/shoulder_yaw_link" pos="0 0 0.16" quat="0.707107 0 0 -0.707107">
<inertial pos="-0.00443422 -0.00066489 -0.12189" quat="0.999995 0.000984795 0.00270132 0.00136071" mass="10.7677" diaginertia="0.507411 0.462983 0.113271" />
<joint name="wam/base_yaw_joint" pos="0 0 0" axis="0 0 1" range="-2.6 2.6" />
<geom class="viz" pos="0 0 0.186" mesh="shoulder_link_fine" />
<geom class="col" pos="0 0 0.186" mesh="shoulder_link_convex_decomposition_p1" />
<geom class="col" pos="0 0 0.186" mesh="shoulder_link_convex_decomposition_p2" />
<geom class="col" pos="0 0 0.186" mesh="shoulder_link_convex_decomposition_p3" />
<body name="wam/shoulder_pitch_link" pos="0 0 0.184" quat="0.707107 -0.707107 0 0">
<inertial pos="-0.00236983 -0.0154211 0.0310561" quat="0.961781 -0.272983 0.0167269 0.0133385" mass="3.87494" diaginertia="0.0214207 0.0167101 0.0126465" />
<joint name="wam/shoulder_pitch_joint" pos="0 0 0" axis="0 0 1" range="-1.985 1.985" />
<geom class="viz" mesh="shoulder_pitch_link_fine" />
<geom class="col" mesh="shoulder_pitch_link_convex" />
<body name="wam/upper_arm_link" pos="0 -0.505 0" quat="0.707107 0.707107 0 0">
<inertial pos="-0.0382586 3.309e-05 -0.207508" quat="0.705455 0.0381914 0.0383402 0.706686" mass="1.80228" diaginertia="0.0665697 0.0634285 0.00622701" />
<joint name="wam/shoulder_yaw_joint" pos="0 0 0" axis="0 0 1" range="-2.8 2.8" />
<geom class="viz" pos="0 0 -0.505" mesh="upper_arm_link_fine" />
<geom class="col" pos="0 0 -0.505" mesh="upper_arm_link_convex_decomposition_p1" />
<geom class="col" pos="0 0 -0.505" mesh="upper_arm_link_convex_decomposition_p2" />
<body name="wam/forearm_link" pos="0.045 0 0.045" quat="0.707107 -0.707107 0 0">
<inertial pos="0.00498512 -0.132717 -0.00022942" quat="0.546303 0.447151 -0.548676 0.447842" mass="2.40017" diaginertia="0.0196896 0.0152225 0.00749914" />
<joint name="wam/elbow_pitch_joint" pos="0 0 0" axis="0 0 1" range="-0.9 3.14159" />
<geom class="viz" mesh="elbow_link_fine" />
<geom class="col" mesh="elbow_link_convex" />
<geom class="viz" pos="-0.045 -0.073 0" quat="0.707388 0.706825 0 0" mesh="forearm_link_fine" />
<geom class="col" pos="-0.045 -0.073 0" quat="0.707388 0.706825 0 0" mesh="forearm_link_convex_decomposition_p1" name="forearm_link_convex_decomposition_p1_geom" />
<geom class="col" pos="-0.045 -0.073 0" quat="0.707388 0.706825 0 0" mesh="forearm_link_convex_decomposition_p2" name="forearm_link_convex_decomposition_p2_geom" />
<body name="wam/wrist_yaw_link" pos="-0.045 0 0" quat="0.707107 0.707107 0 0">
<inertial pos="8.921e-05 0.00435824 -0.00511217" quat="0.708528 -0.000120667 0.000107481 0.705683" mass="0.12376" diaginertia="0.0112011 0.0111887 7.58188e-05" />
<joint name="wam/wrist_yaw_joint" pos="0 0 0" axis="0 0 1" range="-4.55 1.25" />
<geom class="viz" pos="0 0 0.3" mesh="wrist_yaw_link_fine" />
<geom class="col" pos="0 0 0.3" mesh="wrist_yaw_link_convex_decomposition_p1" name="wrist_yaw_link_convex_decomposition_p1_geom" />
<geom class="col" pos="0 0 0.3" mesh="wrist_yaw_link_convex_decomposition_p2" name="wrist_yaw_link_convex_decomposition_p2_geom" />
<body name="wam/wrist_pitch_link" pos="0 0 0.3" quat="0.707107 -0.707107 0 0">
<inertial pos="-0.00012262 -0.0246834 -0.0170319" quat="0.994687 -0.102891 0.000824211 -0.00336105" mass="0.417974" diaginertia="0.000555166 0.000463174 0.00023407" />
<joint name="wam/wrist_pitch_joint" pos="0 0 0" axis="0 0 1" range="-1.5707 1.5707" />
<geom class="viz" mesh="wrist_pitch_link_fine" />
<geom class="col" mesh="wrist_pitch_link_convex_decomposition_p1" name="wrist_pitch_link_convex_decomposition_p1_geom" />
<geom class="col" mesh="wrist_pitch_link_convex_decomposition_p2" name="wrist_pitch_link_convex_decomposition_p2_geom" />
<geom class="col" mesh="wrist_pitch_link_convex_decomposition_p3" name="wrist_pitch_link_convex_decomposition_p3_geom" />
<body name="wam/wrist_palm_link" pos="0 -0.06 0" quat="0.707107 0.707107 0 0">
<inertial pos="-7.974e-05 -0.00323552 -0.00016313" quat="0.594752 0.382453 0.382453 0.594752" mass="0.0686475" diaginertia="7.408e-05 3.81466e-05 3.76434e-05" />
<joint name="wam/palm_yaw_joint" pos="0 0 0" axis="0 0 1" range="-2.7 2.7" />
<geom class="viz" pos="0 0 -0.06" mesh="wrist_palm_link_fine" />
<geom class="col" pos="0 0 -0.06" mesh="wrist_palm_link_convex" name="wrist_palm_link_convex_geom" />
<body name="cup" pos="0 0 0" quat="-0.000203673 0 0 1">
<inertial pos="-3.75236e-10 8.27811e-05 0.0947015" quat="0.999945 -0.0104888 0 0" mass="0.132" diaginertia="0.000285643 0.000270485 9.65696e-05" />
<geom name="cup_geom1" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup1" />
<geom name="cup_geom2" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup2" />
<geom name="cup_geom3" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup3" />
<geom name="cup_geom4" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup4" />
<geom name="cup_geom5" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup5" />
<geom name="cup_geom6" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup6" />
<geom name="cup_geom7" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup7" />
<geom name="cup_geom8" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup8" />
<geom name="cup_geom9" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup9" />
<geom name="cup_geom10" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup10" />
<geom name="cup_base" pos="0 -0.035 0.1165" euler="-1.57 0 0" type="cylinder" size="0.038 0.0045" solref="-10000 -100"/>
<!-- <geom name="cup_base_contact" pos="0 -0.025 0.1165" euler="-1.57 0 0" type="cylinder" size="0.03 0.0005" solref="-10000 -100" rgba="0 0 255 1"/>-->
<geom name="cup_base_contact" pos="0 -0.005 0.1165" euler="-1.57 0 0" type="cylinder" size="0.02 0.0005" solref="-10000 -100" rgba="0 0 255 1"/>
<!-- <geom name="cup_geom11" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup11" />-->
<!-- <geom name="cup_geom12" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup12" />-->
<!-- <geom name="cup_geom13" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup13" />-->
<!-- <geom name="cup_geom14" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup14" />-->
<geom name="cup_geom15" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup15" />
<geom name="cup_geom16" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup16" />
<geom name="cup_geom17" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup17" />
<geom name="cup_geom18" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup18" />
<site name="cup_goal" pos="0 0.05 0.1165" rgba="255 0 0 1"/>
<site name="cup_goal_final" pos="0 -0.025 0.1165" rgba="0 255 0 1"/>
<body name="B0" pos="0 -0.045 0.1165" quat="0.707388 0 0 -0.706825">
<inertial pos="0 0 0" quat="0.707107 0 0.707107 0" mass="7.4927e-05" diaginertia="5.87e-10 5.87e-10 3.74635e-11" />
<geom name="G0" size="0.001 0.00427" quat="0.707107 0 0.707107 0" type="capsule" rgba="0.8 0.2 0.1 1" />
<body name="B1" pos="0.0107 0 0">
<inertial pos="0 0 0" quat="0.707107 0 0.707107 0" mass="7.4927e-05" diaginertia="5.87e-10 5.87e-10 3.74635e-11" />
<joint name="J0_1" pos="-0.00535 0 0" axis="0 1 0" group="3" limited="false" damping="0.0001" frictionloss="0" />
<joint name="J1_1" pos="-0.00535 0 0" axis="0 0 1" group="3" limited="false" damping="0.0001" frictionloss="0" />
<geom name="G1" size="0.001 0.00427" quat="0.707107 0 0.707107 0" type="capsule" rgba="0.8 0.2 0.1 1" />
<body name="B2" pos="0.0107 0 0">
<inertial pos="0 0 0" quat="0.707107 0 0.707107 0" mass="7.4927e-05" diaginertia="5.87e-10 5.87e-10 3.74635e-11" />
<joint name="J0_2" pos="-0.00535 0 0" axis="0 1 0" group="3" limited="false" damping="0.0001" frictionloss="0" />
<joint name="J1_2" pos="-0.00535 0 0" axis="0 0 1" group="3" limited="false" damping="0.0001" frictionloss="0" />
<geom name="G2" size="0.001 0.00427" quat="0.707107 0 0.707107 0" type="capsule" rgba="0.8 0.2 0.1 1" />
<body name="B3" pos="0.0107 0 0">
<inertial pos="0 0 0" quat="0.707107 0 0.707107 0" mass="7.4927e-05" diaginertia="5.87e-10 5.87e-10 3.74635e-11" />
<joint name="J0_3" pos="-0.00535 0 0" axis="0 1 0" group="3" limited="false" damping="0.0001" frictionloss="0" />
<joint name="J1_3" pos="-0.00535 0 0" axis="0 0 1" group="3" limited="false" damping="0.0001" frictionloss="0" />
<geom name="G3" size="0.001 0.00427" quat="0.707107 0 0.707107 0" type="capsule" rgba="0.8 0.2 0.1 1" />
<body name="B4" pos="0.0107 0 0">
<inertial pos="0 0 0" quat="0.707107 0 0.707107 0" mass="7.4927e-05" diaginertia="5.87e-10 5.87e-10 3.74635e-11" />
<joint name="J0_4" pos="-0.00535 0 0" axis="0 1 0" group="3" limited="false" damping="0.0001" frictionloss="0" />
<joint name="J1_4" pos="-0.00535 0 0" axis="0 0 1" group="3" limited="false" damping="0.0001" frictionloss="0" />
<geom name="G4" size="0.001 0.00427" quat="0.707107 0 0.707107 0" type="capsule" rgba="0.8 0.2 0.1 1" />
<body name="B5" pos="0.0107 0 0">
<inertial pos="0 0 0" quat="0.707107 0 0.707107 0" mass="7.4927e-05" diaginertia="5.87e-10 5.87e-10 3.74635e-11" />
<joint name="J0_5" pos="-0.00535 0 0" axis="0 1 0" group="3" limited="false" damping="0.0001" frictionloss="0" />
<joint name="J1_5" pos="-0.00535 0 0" axis="0 0 1" group="3" limited="false" damping="0.0001" frictionloss="0" />
<geom name="G5" size="0.001 0.00427" quat="0.707107 0 0.707107 0" type="capsule" rgba="0.8 0.2 0.1 1" />
<body name="B6" pos="0.0107 0 0">
<inertial pos="0 0 0" quat="0.707107 0 0.707107 0" mass="7.4927e-05" diaginertia="5.87e-10 5.87e-10 3.74635e-11" />
<joint name="J0_6" pos="-0.00535 0 0" axis="0 1 0" group="3" limited="false" damping="0.0001" frictionloss="0" />
<joint name="J1_6" pos="-0.00535 0 0" axis="0 0 1" group="3" limited="false" damping="0.0001" frictionloss="0" />
<geom name="G6" size="0.001 0.00427" quat="0.707107 0 0.707107 0" type="capsule" rgba="0.8 0.2 0.1 1" />
<body name="B7" pos="0.0107 0 0">
<inertial pos="0 0 0" quat="0.707107 0 0.707107 0" mass="7.4927e-05" diaginertia="5.87e-10 5.87e-10 3.74635e-11" />
<joint name="J0_7" pos="-0.00535 0 0" axis="0 1 0" group="3" limited="false" damping="0.0001" frictionloss="0" />
<joint name="J1_7" pos="-0.00535 0 0" axis="0 0 1" group="3" limited="false" damping="0.0001" frictionloss="0" />
<geom name="G7" size="0.001 0.00427" quat="0.707107 0 0.707107 0" type="capsule" rgba="0.8 0.2 0.1 1" />
<body name="B8" pos="0.0107 0 0">
<inertial pos="0 0 0" quat="0.707107 0 0.707107 0" mass="7.4927e-05" diaginertia="5.87e-10 5.87e-10 3.74635e-11" />
<joint name="J0_8" pos="-0.00535 0 0" axis="0 1 0" group="3" limited="false" damping="0.0001" frictionloss="0" />
<joint name="J1_8" pos="-0.00535 0 0" axis="0 0 1" group="3" limited="false" damping="0.0001" frictionloss="0" />
<geom name="G8" size="0.001 0.00427" quat="0.707107 0 0.707107 0" type="capsule" rgba="0.8 0.2 0.1 1" />
<body name="B9" pos="0.0107 0 0">
<inertial pos="0 0 0" quat="0.707107 0 0.707107 0" mass="7.4927e-05" diaginertia="5.87e-10 5.87e-10 3.74635e-11" />
<joint name="J0_9" pos="-0.00535 0 0" axis="0 1 0" group="3" limited="false" damping="0.0001" frictionloss="0" />
<joint name="J1_9" pos="-0.00535 0 0" axis="0 0 1" group="3" limited="false" damping="0.0001" frictionloss="0" />
<geom name="G9" size="0.001 0.00427" quat="0.707107 0 0.707107 0" type="capsule" rgba="0.8 0.2 0.1 1" />
<body name="B10" pos="0.0107 0 0">
<inertial pos="0 0 0" quat="0.707107 0 0.707107 0" mass="7.4927e-05" diaginertia="5.87e-10 5.87e-10 3.74635e-11" />
<joint name="J0_10" pos="-0.00535 0 0" axis="0 1 0" group="3" limited="false" damping="0.0001" frictionloss="0" />
<joint name="J1_10" pos="-0.00535 0 0" axis="0 0 1" group="3" limited="false" damping="0.0001" frictionloss="0" />
<geom name="G10" size="0.001 0.00427" quat="0.707107 0 0.707107 0" type="capsule" rgba="0.8 0.2 0.1 1" />
<body name="B11" pos="0.0107 0 0">
<inertial pos="0 0 0" quat="0.707107 0 0.707107 0" mass="7.4927e-05" diaginertia="5.87e-10 5.87e-10 3.74635e-11" />
<joint name="J0_11" pos="-0.00535 0 0" axis="0 1 0" group="3" limited="false" damping="0.0001" frictionloss="0" />
<joint name="J1_11" pos="-0.00535 0 0" axis="0 0 1" group="3" limited="false" damping="0.0001" frictionloss="0" />
<geom name="G11" size="0.001 0.00427" quat="0.707107 0 0.707107 0" type="capsule" rgba="0.8 0.2 0.1 1" />
<body name="B12" pos="0.0107 0 0">
<inertial pos="0 0 0" quat="0.707107 0 0.707107 0" mass="7.4927e-05" diaginertia="5.87e-10 5.87e-10 3.74635e-11" />
<joint name="J0_12" pos="-0.00535 0 0" axis="0 1 0" group="3" limited="false" damping="0.0001" frictionloss="0" />
<joint name="J1_12" pos="-0.00535 0 0" axis="0 0 1" group="3" limited="false" damping="0.0001" frictionloss="0" />
<geom name="G12" size="0.001 0.00427" quat="0.707107 0 0.707107 0" type="capsule" rgba="0.8 0.2 0.1 1" />
<body name="B13" pos="0.0107 0 0">
<inertial pos="0 0 0" quat="0.707107 0 0.707107 0" mass="7.4927e-05" diaginertia="5.87e-10 5.87e-10 3.74635e-11" />
<joint name="J0_13" pos="-0.00535 0 0" axis="0 1 0" group="3" limited="false" damping="0.0001" frictionloss="0" />
<joint name="J1_13" pos="-0.00535 0 0" axis="0 0 1" group="3" limited="false" damping="0.0001" frictionloss="0" />
<geom name="G13" size="0.001 0.00427" quat="0.707107 0 0.707107 0" type="capsule" rgba="0.8 0.2 0.1 1" />
<body name="B14" pos="0.0107 0 0">
<inertial pos="0 0 0" quat="0.707107 0 0.707107 0" mass="7.4927e-05" diaginertia="5.87e-10 5.87e-10 3.74635e-11" />
<joint name="J0_14" pos="-0.00535 0 0" axis="0 1 0" group="3" limited="false" damping="0.0001" frictionloss="0" />
<joint name="J1_14" pos="-0.00535 0 0" axis="0 0 1" group="3" limited="false" damping="0.0001" frictionloss="0" />
<geom name="G14" size="0.001 0.00427" quat="0.707107 0 0.707107 0" type="capsule" rgba="0.8 0.2 0.1 1" />
<body name="B15" pos="0.0107 0 0">
<inertial pos="0 0 0" quat="0.707107 0 0.707107 0" mass="7.4927e-05" diaginertia="5.87e-10 5.87e-10 3.74635e-11" />
<joint name="J0_15" pos="-0.00535 0 0" axis="0 1 0" group="3" limited="false" damping="0.0001" frictionloss="0" />
<joint name="J1_15" pos="-0.00535 0 0" axis="0 0 1" group="3" limited="false" damping="0.0001" frictionloss="0" />
<geom name="G15" size="0.001 0.00427" quat="0.707107 0 0.707107 0" type="capsule" rgba="0.8 0.2 0.1 1" />
<body name="B16" pos="0.0107 0 0">
<inertial pos="0 0 0" quat="0.707107 0 0.707107 0" mass="7.4927e-05" diaginertia="5.87e-10 5.87e-10 3.74635e-11" />
<joint name="J0_16" pos="-0.00535 0 0" axis="0 1 0" group="3" limited="false" damping="0.0001" frictionloss="0" />
<joint name="J1_16" pos="-0.00535 0 0" axis="0 0 1" group="3" limited="false" damping="0.0001" frictionloss="0" />
<geom name="G16" size="0.001 0.00427" quat="0.707107 0 0.707107 0" type="capsule" rgba="0.8 0.2 0.1 1" />
<body name="B17" pos="0.0107 0 0">
<inertial pos="0 0 0" quat="0.707107 0 0.707107 0" mass="7.4927e-05" diaginertia="5.87e-10 5.87e-10 3.74635e-11" />
<joint name="J0_17" pos="-0.00535 0 0" axis="0 1 0" group="3" limited="false" damping="0.0001" frictionloss="0" />
<joint name="J1_17" pos="-0.00535 0 0" axis="0 0 1" group="3" limited="false" damping="0.0001" frictionloss="0" />
<geom name="G17" size="0.001 0.00427" quat="0.707107 0 0.707107 0" type="capsule" rgba="0.8 0.2 0.1 1" />
<body name="B18" pos="0.0107 0 0">
<inertial pos="0 0 0" quat="0.707107 0 0.707107 0" mass="7.4927e-05" diaginertia="5.87e-10 5.87e-10 3.74635e-11" />
<joint name="J0_18" pos="-0.00535 0 0" axis="0 1 0" group="3" limited="false" damping="0.0001" frictionloss="0" />
<joint name="J1_18" pos="-0.00535 0 0" axis="0 0 1" group="3" limited="false" damping="0.0001" frictionloss="0" />
<geom name="G18" size="0.001 0.00427" quat="0.707107 0 0.707107 0" type="capsule" rgba="0.8 0.2 0.1 1" />
<body name="B19" pos="0.0107 0 0">
<inertial pos="0 0 0" quat="0.707107 0 0.707107 0" mass="7.4927e-05" diaginertia="5.87e-10 5.87e-10 3.74635e-11" />
<joint name="J0_19" pos="-0.00535 0 0" axis="0 1 0" group="3" limited="false" damping="0.0001" frictionloss="0" />
<joint name="J1_19" pos="-0.00535 0 0" axis="0 0 1" group="3" limited="false" damping="0.0001" frictionloss="0" />
<geom name="G19" size="0.001 0.00427" quat="0.707107 0 0.707107 0" type="capsule" rgba="0.8 0.2 0.1 1" />
<body name="B20" pos="0.0107 0 0">
<inertial pos="0 0 0" quat="0.707107 0 0.707107 0" mass="7.4927e-05" diaginertia="5.87e-10 5.87e-10 3.74635e-11" />
<joint name="J0_20" pos="-0.00535 0 0" axis="0 1 0" group="3" limited="false" damping="0.0001" frictionloss="0" />
<joint name="J1_20" pos="-0.00535 0 0" axis="0 0 1" group="3" limited="false" damping="0.0001" frictionloss="0" />
<geom name="G20" size="0.001 0.00427" quat="0.707107 0 0.707107 0" type="capsule" rgba="0.8 0.2 0.1 1" />
<body name="B21" pos="0.0107 0 0">
<inertial pos="0 0 0" quat="0.707107 0 0.707107 0" mass="7.4927e-05" diaginertia="5.87e-10 5.87e-10 3.74635e-11" />
<joint name="J0_21" pos="-0.00535 0 0" axis="0 1 0" group="3" limited="false" damping="0.0001" frictionloss="0" />
<joint name="J1_21" pos="-0.00535 0 0" axis="0 0 1" group="3" limited="false" damping="0.0001" frictionloss="0" />
<geom name="G21" size="0.001 0.00427" quat="0.707107 0 0.707107 0" type="capsule" rgba="0.8 0.2 0.1 1" />
<body name="B22" pos="0.0107 0 0">
<inertial pos="0 0 0" quat="0.707107 0 0.707107 0" mass="7.4927e-05" diaginertia="5.87e-10 5.87e-10 3.74635e-11" />
<joint name="J0_22" pos="-0.00535 0 0" axis="0 1 0" group="3" limited="false" damping="0.0001" frictionloss="0" />
<joint name="J1_22" pos="-0.00535 0 0" axis="0 0 1" group="3" limited="false" damping="0.0001" frictionloss="0" />
<geom name="G22" size="0.001 0.00427" quat="0.707107 0 0.707107 0" type="capsule" rgba="0.8 0.2 0.1 1" />
<body name="B23" pos="0.0107 0 0">
<inertial pos="0 0 0" quat="0.707107 0 0.707107 0" mass="7.4927e-05" diaginertia="5.87e-10 5.87e-10 3.74635e-11" />
<joint name="J0_23" pos="-0.00535 0 0" axis="0 1 0" group="3" limited="false" damping="0.0001" frictionloss="0" />
<joint name="J1_23" pos="-0.00535 0 0" axis="0 0 1" group="3" limited="false" damping="0.0001" frictionloss="0" />
<geom name="G23" size="0.001 0.00427" quat="0.707107 0 0.707107 0" type="capsule" rgba="0.8 0.2 0.1 1" />
<body name="B24" pos="0.0107 0 0">
<inertial pos="0 0 0" quat="0.707107 0 0.707107 0" mass="7.4927e-05" diaginertia="5.87e-10 5.87e-10 3.74635e-11" />
<joint name="J0_24" pos="-0.00535 0 0" axis="0 1 0" group="3" limited="false" damping="0.0001" frictionloss="0" />
<joint name="J1_24" pos="-0.00535 0 0" axis="0 0 1" group="3" limited="false" damping="0.0001" frictionloss="0" />
<geom name="G24" size="0.001 0.00427" quat="0.707107 0 0.707107 0" type="capsule" rgba="0.8 0.2 0.1 1" />
<body name="B25" pos="0.0107 0 0">
<inertial pos="0 0 0" quat="0.707107 0 0.707107 0" mass="7.4927e-05" diaginertia="5.87e-10 5.87e-10 3.74635e-11" />
<joint name="J0_25" pos="-0.00535 0 0" axis="0 1 0" group="3" limited="false" damping="0.0001" frictionloss="0" />
<joint name="J1_25" pos="-0.00535 0 0" axis="0 0 1" group="3" limited="false" damping="0.0001" frictionloss="0" />
<geom name="G25" size="0.001 0.00427" quat="0.707107 0 0.707107 0" type="capsule" rgba="0.8 0.2 0.1 1" />
<body name="B26" pos="0.0107 0 0">
<inertial pos="0 0 0" quat="0.707107 0 0.707107 0" mass="7.4927e-05" diaginertia="5.87e-10 5.87e-10 3.74635e-11" />
<joint name="J0_26" pos="-0.00535 0 0" axis="0 1 0" group="3" limited="false" damping="0.0001" frictionloss="0" />
<joint name="J1_26" pos="-0.00535 0 0" axis="0 0 1" group="3" limited="false" damping="0.0001" frictionloss="0" />
<geom name="G26" size="0.001 0.00427" quat="0.707107 0 0.707107 0" type="capsule" rgba="0.8 0.2 0.1 1" />
<body name="B27" pos="0.0107 0 0">
<inertial pos="0 0 0" quat="0.707107 0 0.707107 0" mass="7.4927e-05" diaginertia="5.87e-10 5.87e-10 3.74635e-11" />
<joint name="J0_27" pos="-0.00535 0 0" axis="0 1 0" group="3" limited="false" damping="0.0001" frictionloss="0" />
<joint name="J1_27" pos="-0.00535 0 0" axis="0 0 1" group="3" limited="false" damping="0.0001" frictionloss="0" />
<geom name="G27" size="0.001 0.00427" quat="0.707107 0 0.707107 0" type="capsule" rgba="0.8 0.2 0.1 1" />
<body name="B28" pos="0.0107 0 0">
<inertial pos="0 0 0" quat="0.707107 0 0.707107 0" mass="7.4927e-05" diaginertia="5.87e-10 5.87e-10 3.74635e-11" />
<joint name="J0_28" pos="-0.00535 0 0" axis="0 1 0" group="3" limited="false" damping="0.0001" frictionloss="0" />
<joint name="J1_28" pos="-0.00535 0 0" axis="0 0 1" group="3" limited="false" damping="0.0001" frictionloss="0" />
<geom name="G28" size="0.001 0.00427" quat="0.707107 0 0.707107 0" type="capsule" rgba="0.8 0.2 0.1 1" />
<body name="B29" pos="0.0107 0 0">
<inertial pos="0 0 0" quat="0.707107 0 0.707107 0" mass="7.4927e-05" diaginertia="5.87e-10 5.87e-10 3.74635e-11" />
<joint name="J0_29" pos="-0.00535 0 0" axis="0 1 0" group="3" limited="false" damping="0.0001" frictionloss="0" />
<joint name="J1_29" pos="-0.00535 0 0" axis="0 0 1" group="3" limited="false" damping="0.0001" frictionloss="0" />
<geom name="G29" size="0.001 0.00427" quat="0.707107 0 0.707107 0" type="capsule" rgba="0.8 0.2 0.1 1" />
<body name="ball">
<geom name="ball_geom" type="sphere" size="0.02" mass="0.021" rgba="0.8 0.2 0.1 1"/>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
<site name="context_point" pos="0 0.05 0.1165" euler="-1.57 0 0" size="0.015" rgba="1 0 0 0.6" type="sphere"/>
<site name="context_point1" pos="-0.5 -0.85 0.8165" euler="-1.57 0 0" size="0.015" rgba="0 1 0 0.3" type="sphere"/>
<site name="context_point2" pos="-0.5 -0.85 1.4165" euler="-1.57 0 0" size="0.015" rgba="0 1 0 0.3" type="sphere"/>
<site name="context_point3" pos="-0.5 -0.35 0.8165" euler="-1.57 0 0" size="0.015" rgba="0 1 0 0.3" type="sphere"/>
<site name="context_point4" pos="-0.5 -0.35 1.4165" euler="-1.57 0 0" size="0.015" rgba="0 1 0 0.3" type="sphere"/>
<site name="context_point5" pos="0.5 -0.85 0.8165" euler="-1.57 0 0" size="0.015" rgba="0 1 0 0.3" type="sphere"/>
<site name="context_point6" pos="0.5 -0.85 1.4165" euler="-1.57 0 0" size="0.015" rgba="0 1 0 0.3" type="sphere"/>
<site name="context_point7" pos="0.5 -0.35 0.8165" euler="-1.57 0 0" size="0.015" rgba="0 1 0 0.3" type="sphere"/>
<site name="context_point8" pos="0.5 -0.35 1.4165" euler="-1.57 0 0" size="0.015" rgba="0 1 0 0.3" type="sphere"/>
<site name="context_space" pos="0 -0.6 1.1165" euler="0 0 0" size="0.5 0.25 0.3" rgba="0 0 1 0.15" type="box"/>
<camera name="visualization" mode="targetbody" target="wam/wrist_yaw_link" pos="1.5 -0.4 2.2"/>
<camera name="experiment" mode="fixed" quat="0.44418059 0.41778323 0.54301123 0.57732103" pos="1.5 -0.3 1.33" />
</worldbody>
<actuator>
<!-- <motor ctrllimited="true" ctrlrange="-150 150" joint="wam/base_yaw_joint"/>-->
<!-- <motor ctrllimited="true" ctrlrange="-125 125" joint="wam/shoulder_pitch_joint"/>-->
<!-- <motor ctrllimited="true" ctrlrange="-40 40" joint="wam/shoulder_yaw_joint"/>-->
<!-- <motor ctrllimited="true" ctrlrange="-60 60" joint="wam/elbow_pitch_joint"/>-->
<!-- <motor ctrllimited="true" ctrlrange="-5 5" joint="wam/wrist_yaw_joint"/>-->
<!-- <motor ctrllimited="true" ctrlrange="-5 5" joint="wam/wrist_pitch_joint"/>-->
<!-- <motor ctrllimited="true" ctrlrange="-2 2" joint="wam/palm_yaw_joint"/>-->
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="150.0" joint="wam/base_yaw_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="125.0" joint="wam/shoulder_pitch_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="40.0" joint="wam/shoulder_yaw_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="60.0" joint="wam/elbow_pitch_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="5.0" joint="wam/wrist_yaw_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="5.0" joint="wam/wrist_pitch_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="2.0" joint="wam/palm_yaw_joint"/>
</actuator>
</mujoco>

View File

@ -1,7 +1,8 @@
import numpy as np
from alr_envs.mujoco import alr_reward_fct
class BallInACupReward:
class BallInACupReward(alr_reward_fct.AlrReward):
def __init__(self, sim_time):
self.sim_time = sim_time
@ -14,7 +15,6 @@ class BallInACupReward:
"forearm_link_convex_decomposition_p1_geom",
"forearm_link_convex_decomposition_p2_geom"]
self.ctxt_id = None
self.ball_id = None
self.ball_collision_id = None
self.goal_id = None
@ -34,8 +34,7 @@ class BallInACupReward:
self.dists_final = []
self.costs = []
def compute_reward(self, action, sim, step):
self.ctxt_id = sim.model._site_name2id['context_point']
def compute_reward(self, action, sim, step, context=None):
self.ball_id = sim.model._body_name2id["ball"]
self.ball_collision_id = sim.model._geom_name2id["ball_geom"]
self.goal_id = sim.model._site_name2id["cup_goal"]
@ -50,59 +49,26 @@ class BallInACupReward:
goal_final_pos = sim.data.site_xpos[self.goal_final_id]
self.dists.append(np.linalg.norm(goal_pos - ball_pos))
self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
# dists_ctxt.append(np.linalg.norm(ball_pos - ctxt))
self.ball_traj[step, :] = ball_pos
if self.check_collision(sim):
return -1000, False, True
# self._get_cost(ball_pos, goal_pos, goal_final_pos, action,
# sim.data.get_site_xpos('context_point').copy(), step)
# min_dist = np.min(self.dists)
# dist_final = self.dists_final[-1]
action_cost = np.sum(np.square(action))
# cost = self.get_stage_wise_cost(ball_in_cup, min_dist, self.dists_final[-1]) # , self.dists_ctxt[-1])
if step == self.sim_time - 1:
min_dist = np.min(self.dists)
dist_final = self.dists_final[-1]
cost = 0.5 * min_dist + 0.5 * dist_final
# cost = 3 + 2 * (0.5 * min_dist ** 2 + 0.5 * dist_final ** 2)
reward = np.exp(-2 * min_dist) - 1e-5 * action_cost
success = dist_final < 0.05 and min_dist < 0.05
reward = np.exp(-2 * cost) - 1e-5 * action_cost
success = dist_final < 0.05 and ball_in_cup
else:
cost = 0
reward = - 1e-5 * action_cost
success = False
# action_cost = np.mean(np.sum(np.square(torques), axis=1), axis=0)
return reward, success, False
def get_stage_wise_cost(self, ball_in_cup, min_dist, dist_final): #, dist_to_ctxt):
# stop_sim = False
cost = 3 + 2 * (0.5 * min_dist ** 2 + 0.5 * dist_final ** 2)
# if not ball_in_cup:
# # cost = 3 + 2*(0.5 * min_dist + 0.5 * dist_final)
# cost = 3 + 2*(0.5 * min_dist**2 + 0.5 * dist_final**2)
# else:
# # cost = 2*dist_to_ctxt
# cost = 2*dist_to_ctxt**2
# stop_sim = True
# # print(dist_to_ctxt-0.02)
# print('Context Distance:', dist_to_ctxt)
return cost
def _get_cost(self, ball_pos, goal_pos, goal_pos_final, u, ctxt, t):
cost = 0
if t == self.sim_time*0.8:
dist = 0.5*np.linalg.norm(goal_pos-ball_pos)**2 + 0.5*np.linalg.norm(goal_pos_final-ball_pos)**2
# dist_ctxt = np.linalg.norm(ctxt-goal_pos)**2
cost = dist # +dist_ctxt
return cost
def check_ball_in_cup(self, sim, ball_collision_id):
cup_base_collision_id = sim.model._geom_name2id["cup_base_contact"]
for coni in range(0, sim.data.ncon):

View File

@ -1,38 +1,41 @@
from gym.envs.mujoco import mujoco_env
from alr_envs.mujoco import alr_mujoco_env
from gym import utils, spaces
import os
import numpy as np
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward import BallInACupReward
import mujoco_py
class ALRBallInACupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self, pd_control=True):
class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
def __init__(self, reward_function=None):
self._steps = 0
self.pd_control = pd_control
self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
"ball-in-a-cup_base" + ".xml")
"biac_base" + ".xml")
self.sim_time = 8 # seconds
self.sim_steps = int(self.sim_time / (0.0005 * 4)) # circular dependency.. sim.dt <-> mujocoenv init <-> reward fct
self.reward_function = BallInACupReward(self.sim_steps)
self.reward_function = reward_function(self.sim_steps)
self.start_pos = np.array([0.0, 0.58760536, 0.0, 1.36004913, 0.0, -0.32072943, -1.57])
self.start_vel = np.zeros(7)
# self.start_pos = np.array([0.58760536, 1.36004913, -0.32072943])
self._q_pos = []
self._q_vel = []
# self.weight_matrix_scale = 50
self.p_gains = 1*np.array([200, 300, 100, 100, 10, 10, 2.5])
self.d_gains = 1*np.array([7, 15, 5, 2.5, 0.3, 0.3, 0.05])
self.max_ctrl = np.array([150., 125., 40., 60., 5., 5., 2.])
self.p_gains = 1 / self.max_ctrl * np.array([200, 300, 100, 100, 10, 10, 2.5])
self.d_gains = 1 / self.max_ctrl * np.array([7, 15, 5, 2.5, 0.3, 0.3, 0.05])
self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7])
self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
self.context = None
utils.EzPickle.__init__(self)
mujoco_env.MujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), "assets", "ball-in-a-cup_base.xml"),
frame_skip=4)
alr_mujoco_env.AlrMujocoEnv.__init__(self,
self.xml_path,
apply_gravity_comp=True,
n_substeps=4)
@property
def current_pos(self):
@ -42,96 +45,68 @@ class ALRBallInACupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def current_vel(self):
return self.sim.data.qvel[0:7].copy()
def configure(self, context):
self.context = context
def reset_model(self):
init_pos_all = self.init_qpos.copy()
init_pos_robot = self.start_pos
init_vel = np.zeros_like(init_pos_all)
ball_id = self.sim.model._body_name2id["ball"]
goal_final_id = self.sim.model._site_name2id["cup_goal_final"]
# self.set_state(start_pos, start_vel)
self._steps = 0
self.reward_function.reset()
self._q_pos = []
self._q_vel = []
start_pos = init_pos_all
start_pos[0:7] = init_pos_robot
self.set_state(start_pos, init_vel)
# Reset the system
self.sim.data.qpos[:] = init_pos_all
self.sim.data.qvel[:] = init_vel
self.sim.data.qpos[0:7] = init_pos_robot
self.sim.step()
self.sim.data.qpos[:] = init_pos_all
self.sim.data.qvel[:] = init_vel
self.sim.data.qpos[0:7] = init_pos_robot
self.sim.data.body_xpos[ball_id, :] = np.copy(self.sim.data.site_xpos[goal_final_id, :]) - np.array([0., 0., 0.329])
# Stabilize the system around the initial position
for i in range(0, 2000):
self.sim.data.qpos[7:] = 0.
self.sim.data.qvel[7:] = 0.
# self.sim.data.qpos[7] = -0.2
cur_pos = self.sim.data.qpos[0:7].copy()
cur_vel = self.sim.data.qvel[0:7].copy()
trq = self.p_gains * (init_pos_robot - cur_pos) + self.d_gains * (np.zeros_like(init_pos_robot) - cur_vel)
self.sim.data.qfrc_applied[0:7] = trq + self.sim.data.qfrc_bias[:7].copy()
self.sim.step()
# self.render()
for i in range(0, 2000):
cur_pos = self.sim.data.qpos[0:7].copy()
cur_vel = self.sim.data.qvel[0:7].copy()
trq = self.p_gains * (init_pos_robot - cur_pos) + self.d_gains * (np.zeros_like(init_pos_robot) - cur_vel)
self.sim.data.qfrc_applied[0:7] = trq + self.sim.data.qfrc_bias[:7].copy()
self.sim.step()
# self.render()
def do_simulation(self, ctrl, n_frames):
# self.sim.data.qpos[:] = init_pos_all
# self.sim.data.qvel[:] = init_vel
# self.sim.data.qpos[0:7] = init_pos_robot
#
# self.sim.step()
#
# self.sim.data.qpos[:] = init_pos_all
# self.sim.data.qvel[:] = init_vel
# self.sim.data.qpos[0:7] = init_pos_robot
# self.sim.data.body_xpos[ball_id, :] = np.copy(self.sim.data.site_xpos[goal_final_id, :]) - np.array([0., 0., 0.329])
#
# # Stabilize the system around the initial position
# for i in range(0, 500):
# self.sim.data.qpos[7:] = 0.
# self.sim.data.qvel[7:] = 0.
# # self.sim.data.qpos[7] = -0.2
# cur_pos = self.sim.data.qpos[0:7].copy()
# cur_vel = self.sim.data.qvel[0:7].copy()
# des_pos = ctrl[:7]
# des_vel = ctrl[7:]
# trq = self.p_gains * (des_pos - cur_pos) + self.d_gains * (des_vel - cur_vel)
if self.pd_control:
self.sim.data.qfrc_applied[0:7] = ctrl + self.sim.data.qfrc_bias[:7].copy()
else:
self.sim.data.ctrl[:] = ctrl
# trq = self.p_gains * (init_pos_robot - cur_pos) + self.d_gains * (np.zeros_like(init_pos_robot) - cur_vel)
# self.sim.data.qfrc_applied[0:7] = trq + self.sim.data.qfrc_bias[:7].copy()
# self.sim.step()
# self.render()
#
# for i in range(0, 500):
# cur_pos = self.sim.data.qpos[0:7].copy()
# cur_vel = self.sim.data.qvel[0:7].copy()
# trq = self.p_gains * (init_pos_robot - cur_pos) + self.d_gains * (np.zeros_like(init_pos_robot) - cur_vel)
# self.sim.data.qfrc_applied[0:7] = trq + self.sim.data.qfrc_bias[:7].copy()
# self.sim.step()
# self.render()
for _ in range(n_frames):
try:
self.sim.step()
except mujoco_py.builder.MujocoException as e:
print("Error in simulation: " + str(e))
# error = True
# Copy the current torque as if it would have been applied until the end of the trajectory
# for i in range(k + 1, sim_time):
# torques.append(trq)
return True
return False
return self._get_obs()
def step(self, a):
# Apply gravity compensation
# if not np.all(self.sim.data.qfrc_applied[:7] == self.sim.data.qfrc_bias[:7]):
# self.sim.data.qfrc_applied[:7] = self.sim.data.qfrc_bias[:7]
reward_dist = 0.0
angular_vel = 0.0
# if self._steps >= self.steps_before_reward:
# vec = self.get_body_com("fingertip") - self.get_body_com("target")
# reward_dist -= self.reward_weight * np.linalg.norm(vec)
# angular_vel -= np.linalg.norm(self.sim.data.qvel.flat[:self.n_links])
reward_ctrl = - np.square(a).sum()
# reward_balance = - self.balance_weight * np.abs(
# angle_normalize(np.sum(self.sim.data.qpos.flat[:self.n_links]), type="rad"))
#
# reward = reward_dist + reward_ctrl + angular_vel + reward_balance
# self.do_simulation(a, self.frame_skip)
crash = self.do_simulation(a)
joint_cons_viol = self.check_traj_in_joint_limits()
crash = self.do_simulation(a, self.frame_skip)
self._q_pos.append(self.sim.data.qpos[0:7].ravel().copy())
self._q_vel.append(self.sim.data.qvel[0:7].ravel().copy())
@ -153,17 +128,7 @@ class ALRBallInACupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def check_traj_in_joint_limits(self):
return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)
def check_collision(self):
for coni in range(0, sim.data.ncon):
con = sim.data.contact[coni]
collision = con.geom1 in self.collision_ids and con.geom2 == self.ball_collision_id
collision_trans = con.geom1 == self.ball_collision_id and con.geom2 in self.collision_ids
if collision or collision_trans:
return True
return False
# TODO: extend observation space
def _get_obs(self):
theta = self.sim.data.qpos.flat[:7]
return np.concatenate([
@ -187,8 +152,9 @@ class ALRBallInACupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
des_vel_full[5] = des_vel[2]
return des_vel_full
if __name__ == "__main__":
env = ALRBallInACupEnv()
env = ALRBallInACupEnv(reward_function=BallInACupReward)
env.reset()
env.render()
for i in range(4000):
@ -196,7 +162,8 @@ if __name__ == "__main__":
# test with random actions
# ac = 0.1 * env.action_space.sample()
# ac = -np.array([i, i, i]) / 10000 + np.array([env.start_pos[1], env.start_pos[3], env.start_pos[5]])
ac = np.array([0., -0.1, 0, 0, 0, 0, 0])
# ac = np.array([0., -10, 0, -1, 0, 1, 0])
ac = np.array([0.,0., 0, 0, 0, 0, 0])
# ac[0] += np.pi/2
obs, rew, d, info = env.step(ac)
env.render()

View File

@ -24,14 +24,14 @@ class DmpAsyncVectorEnv(gym.vector.AsyncVectorEnv):
n=n_samples,
fn=np.zeros)
def __call__(self, params):
return self.rollout(params)
def __call__(self, params, contexts=None):
return self.rollout(params, contexts)
def rollout_async(self, actions):
def rollout_async(self, params, contexts):
"""
Parameters
----------
actions : iterable of samples from `action_space`
params : iterable of samples from `action_space`
List of actions.
"""
self._assert_is_running()
@ -40,11 +40,17 @@ class DmpAsyncVectorEnv(gym.vector.AsyncVectorEnv):
'for a pending call to `{0}` to complete.'.format(
self._state.value), self._state.value)
actions = np.atleast_2d(actions)
split_actions = np.array_split(actions, np.minimum(len(actions), self.num_envs))
for pipe, action in zip(self.parent_pipes, split_actions):
pipe.send(('rollout', action))
for pipe in self.parent_pipes[len(split_actions):]:
params = np.atleast_2d(params)
split_params = np.array_split(params, np.minimum(len(params), self.num_envs))
if contexts is None:
split_contexts = np.array_split([None, ] * len(params), np.minimum(len(params), self.num_envs))
else:
split_contexts = np.array_split(contexts, np.minimum(len(contexts), self.num_envs))
assert np.all([len(p) == len(c) for p, c in zip(split_params, split_contexts)])
for pipe, param, context in zip(self.parent_pipes, split_params, split_contexts):
pipe.send(('rollout', (param, context)))
for pipe in self.parent_pipes[len(split_params):]:
pipe.send(('idle', None))
self._state = AsyncState.WAITING_ROLLOUT
@ -98,8 +104,8 @@ class DmpAsyncVectorEnv(gym.vector.AsyncVectorEnv):
return np.array(rewards), infos
def rollout(self, actions):
self.rollout_async(actions)
def rollout(self, actions, contexts):
self.rollout_async(actions, contexts)
return self.rollout_wait()
@ -123,8 +129,8 @@ def _worker(index, env_fn, pipe, parent_pipe, shared_memory, error_queue):
rewards = []
dones = []
infos = []
for d in data:
observation, reward, done, info = env.rollout(d)
for p, c in zip(*data):
observation, reward, done, info = env.rollout(p, c)
observations.append(observation)
rewards.append(reward)
dones.append(done)

View File

@ -5,7 +5,19 @@ import numpy as np
import gym
class DmpEnvWrapperBase(gym.Wrapper):
def get_policy_class(policy_type):
if policy_type == "motor":
from alr_envs.utils.policies import PDController
return PDController
elif policy_type == "velocity":
from alr_envs.utils.policies import VelController
return VelController
elif policy_type == "position":
from alr_envs.utils.policies import PosController
return PosController
class DmpEnvWrapper(gym.Wrapper):
def __init__(self,
env,
num_dof,
@ -17,8 +29,9 @@ class DmpEnvWrapperBase(gym.Wrapper):
dt=0.01,
learn_goal=False,
post_traj_time=0.,
policy=None):
super(DmpEnvWrapperBase, self).__init__(env)
policy_type=None,
weights_scale=1.):
super(DmpEnvWrapper, self).__init__(env)
self.num_dof = num_dof
self.num_basis = num_basis
self.dim = num_dof * num_basis
@ -49,17 +62,19 @@ class DmpEnvWrapperBase(gym.Wrapper):
dmp_goal_pos = final_pos
self.dmp.set_weights(dmp_weights, dmp_goal_pos)
self.weights_scale = weights_scale
self.policy = policy
policy_class = get_policy_class(policy_type)
self.policy = policy_class(env)
def __call__(self, params):
def __call__(self, params, contexts=None):
params = np.atleast_2d(params)
observations = []
rewards = []
dones = []
infos = []
for p in params:
observation, reward, done, info = self.rollout(p)
for p, c in zip(params, contexts):
observation, reward, done, info = self.rollout(p, c)
observations.append(observation)
rewards.append(reward)
dones.append(done)
@ -81,82 +96,11 @@ class DmpEnvWrapperBase(gym.Wrapper):
goal_pos = None
weight_matrix = np.reshape(params, [self.num_basis, self.num_dof])
return goal_pos, weight_matrix
return goal_pos, weight_matrix * self.weights_scale
def rollout(self, params, render=False):
def rollout(self, params, context=None, render=False):
""" This function generates a trajectory based on a DMP and then does the usual loop over reset and step"""
raise NotImplementedError
class DmpEnvWrapperPos(DmpEnvWrapperBase):
"""
Wrapper for gym environments which creates a trajectory in joint angle space
"""
def rollout(self, action, render=False):
goal_pos, weight_matrix = self.goal_and_weights(action)
if hasattr(self.env, "weight_matrix_scale"):
weight_matrix = weight_matrix * self.env.weight_matrix_scale
self.dmp.set_weights(weight_matrix, goal_pos)
trajectory, _ = self.dmp.reference_trajectory(self.t)
if self.post_traj_steps > 0:
trajectory = np.vstack([trajectory, np.tile(trajectory[-1, :], [self.post_traj_steps, 1])])
self._trajectory = trajectory
rews = []
self.env.reset()
for t, traj in enumerate(trajectory):
obs, rew, done, info = self.env.step(traj)
rews.append(rew)
if render:
self.env.render(mode="human")
if done:
break
reward = np.sum(rews)
return obs, reward, done, info
class DmpEnvWrapperVel(DmpEnvWrapperBase):
"""
Wrapper for gym environments which creates a trajectory in joint velocity space
"""
def rollout(self, action, render=False):
goal_pos, weight_matrix = self.goal_and_weights(action)
if hasattr(self.env, "weight_matrix_scale"):
weight_matrix = weight_matrix * self.env.weight_matrix_scale
self.dmp.set_weights(weight_matrix, goal_pos)
_, velocities = self.dmp.reference_trajectory(self.t)
rews = []
infos = []
self.env.reset()
for t, vel in enumerate(velocities):
obs, rew, done, info = self.env.step(vel)
rews.append(rew)
infos.append(info)
if render:
self.env.render(mode="human")
if done:
break
reward = np.sum(rews)
return obs, reward, done, info
class DmpEnvWrapperPD(DmpEnvWrapperBase):
"""
Wrapper for gym environments which creates a trajectory in joint velocity space
"""
def rollout(self, action, render=False):
goal_pos, weight_matrix = self.goal_and_weights(action)
goal_pos, weight_matrix = self.goal_and_weights(params)
if hasattr(self.env, "weight_matrix_scale"):
weight_matrix = weight_matrix * self.env.weight_matrix_scale
self.dmp.set_weights(weight_matrix, goal_pos)
@ -173,9 +117,11 @@ class DmpEnvWrapperPD(DmpEnvWrapperBase):
infos = []
self.env.reset()
if context is not None:
self.env.configure(context)
for t, pos_vel in enumerate(zip(trajectory, velocity)):
ac = self.policy.get_action(self.env, pos_vel[0], pos_vel[1])
ac = self.policy.get_action(pos_vel[0], pos_vel[1])
obs, rew, done, info = self.env.step(ac)
rews.append(rew)
infos.append(info)

View File

@ -1,15 +1,37 @@
class PDController:
def __init__(self, p_gains, d_gains):
self.p_gains = p_gains
self.d_gains = d_gains
from alr_envs.mujoco.alr_mujoco_env import AlrMujocoEnv
def get_action(self, env, des_pos, des_vel):
class BaseController:
def __init__(self, env: AlrMujocoEnv):
self.env = env
def get_action(self, des_pos, des_vel):
raise NotImplementedError
class PosController(BaseController):
def get_action(self, des_pos, des_vel):
return des_pos
class VelController(BaseController):
def get_action(self, des_pos, des_vel):
return des_vel
class PDController(BaseController):
def __init__(self, env):
self.p_gains = env.p_gains
self.d_gains = env.d_gains
super(PDController, self).__init__(env)
def get_action(self, des_pos, des_vel):
# TODO: make standardized ALRenv such that all of them have current_pos/vel attributes
cur_pos = env.current_pos
cur_vel = env.current_vel
cur_pos = self.env.current_pos
cur_vel = self.env.current_vel
if len(des_pos) != len(cur_pos):
des_pos = env.extend_des_pos(des_pos)
des_pos = self.env.extend_des_pos(des_pos)
if len(des_vel) != len(cur_vel):
des_vel = env.extend_des_vel(des_vel)
des_vel = self.env.extend_des_vel(des_vel)
trq = self.p_gains * (des_pos - cur_pos) + self.d_gains * (des_vel - cur_vel)
return trq

View File

@ -1,7 +1,7 @@
from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapperPD
from alr_envs.utils.policies import PDController
from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapper
from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv, _worker
from alr_envs.mujoco.reacher.alr_reacher import ALRReacherEnv
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_simple import ALRBallInACupEnv
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward import BallInACupReward
import numpy as np
@ -17,32 +17,42 @@ if __name__ == "__main__":
:param rank: (int) index of the subprocess
"""
def _init():
p_gains = np.array([100, 100, 100, 100, 100])
d_gains = np.array([1, 1, 1, 1, 1])
policy = PDController(p_gains, d_gains)
_env = ALRBallInACupEnv(BallInACupReward)
env = ALRReacherEnv()
env = DmpEnvWrapperPD(env,
num_dof=5,
num_basis=5,
duration=4,
dt=env.dt,
_env = DmpEnvWrapper(_env,
num_dof=3,
num_basis=8,
duration=3.5,
alpha_phase=3,
dt=_env.dt,
learn_goal=False,
start_pos=env.init_qpos[:5],
final_pos=env.init_qpos[:5],
alpha_phase=2,
policy=policy
start_pos=_env.start_pos[1::2],
final_pos=_env.start_pos[1::2],
policy_type="motor"
# contextual=True
)
env.seed(seed + rank)
return env
_env.seed(seed + rank)
return _env
return _init
dim = 25
env = make_env(0, 0)()
dim = 24
n_samples = 10
vec_env = DmpAsyncVectorEnv([make_env(i) for i in range(4)],
n_samples=n_samples,
context="spawn",
shared_memory=False,
worker=_worker)
params = 10 * np.random.randn(n_samples, dim)
out = vec_env(params)
non_vec_env = make_env(0, 0)()
params = 10 * np.random.randn(dim)
out = env.rollout(params, render=True)
out2 = non_vec_env.rollout(params, render=True)
print(out)
print(out2)