This commit is contained in:
Maximilian Huettenrauch 2021-02-24 15:37:54 +01:00
parent 60e1673ee1
commit 7e988758fe
52 changed files with 550 additions and 13 deletions

View File

@ -1,5 +1,5 @@
<mujoco model="wam(v1.31)"> <mujoco model="wam(v1.31)">
<compiler angle="radian" meshdir="meshes/" /> <compiler angle="radian" meshdir="../../meshes/wam/" />
<option timestep="0.0005" integrator="Euler" /> <option timestep="0.0005" integrator="Euler" />
<size njmax="500" nconmax="100" /> <size njmax="500" nconmax="100" />
<default class="main"> <default class="main">
@ -286,7 +286,7 @@
<joint name="J1_29" pos="-0.00535 0 0" axis="0 0 1" group="3" limited="false" damping="0.0001" frictionloss="0" /> <joint name="J1_29" pos="-0.00535 0 0" axis="0 0 1" group="3" limited="false" damping="0.0001" frictionloss="0" />
<geom name="G29" size="0.001 0.00427" quat="0.707107 0 0.707107 0" type="capsule" rgba="0.8 0.2 0.1 1" /> <geom name="G29" size="0.001 0.00427" quat="0.707107 0 0.707107 0" type="capsule" rgba="0.8 0.2 0.1 1" />
<body name="ball"> <body name="ball">
<geom name="ball_geom" type="sphere" size="0.02" mass="0.021" rgba="0.8 0.2 0.1 1"/> <geom name="ball_geom" type="sphere" size="0.02" mass="0.015" rgba="0.8 0.2 0.1 1"/>
</body> </body>
</body> </body>
</body> </body>

View File

@ -2,8 +2,6 @@ from gym import utils
import os import os
import numpy as np import numpy as np
from alr_envs.mujoco import alr_mujoco_env from alr_envs.mujoco import alr_mujoco_env
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward import BallInACupReward
import mujoco_py
class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle): class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
@ -116,14 +114,14 @@ if __name__ == "__main__":
env.configure(ctxt) env.configure(ctxt)
env.reset() env.reset()
env.render() # env.render()
for i in range(2000): for i in range(16000):
# test with random actions # test with random actions
ac = 0.01 * env.action_space.sample()[0:7] ac = 0.001 * env.action_space.sample()[0:7]
# ac = env.start_pos # ac = env.start_pos
# ac[0] += np.pi/2 # ac[0] += np.pi/2
obs, rew, d, info = env.step(ac) obs, rew, d, info = env.step(ac)
env.render() # env.render()
print(rew) print(rew)

View File

@ -31,13 +31,15 @@ class BallInACupReward(alr_reward_fct.AlrReward):
def reset(self, context): def reset(self, context):
self.ball_traj = np.zeros(shape=(self.sim_time, 3)) self.ball_traj = np.zeros(shape=(self.sim_time, 3))
self.cup_traj = np.zeros(shape=(self.sim_time, 3))
self.dists = [] self.dists = []
self.dists_ctxt = [] self.dists_ctxt = []
self.dists_final = [] self.dists_final = []
self.costs = [] self.costs = []
self.context = context self.context = context
self.ball_in_cup = False self.ball_in_cup = False
self.dist_ctxt = 5 self.ball_above_threshold = False
self.dist_ctxt = 3
def compute_reward(self, action, sim, step): def compute_reward(self, action, sim, step):
action_cost = np.sum(np.square(action)) action_cost = np.sum(np.square(action))
@ -64,6 +66,7 @@ class BallInACupReward(alr_reward_fct.AlrReward):
self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos)) self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
self.dists_ctxt.append(np.linalg.norm(ball_pos - self.context)) self.dists_ctxt.append(np.linalg.norm(ball_pos - self.context))
self.ball_traj[step, :] = ball_pos self.ball_traj[step, :] = ball_pos
self.cup_traj[step, :] = goal_pos
# Determine the first time when ball is in cup # Determine the first time when ball is in cup
if not self.ball_in_cup: if not self.ball_in_cup:
@ -78,9 +81,18 @@ class BallInACupReward(alr_reward_fct.AlrReward):
dist_final = self.dists_final[-1] dist_final = self.dists_final[-1]
# dist_ctxt = self.dists_ctxt[-1] # dist_ctxt = self.dists_ctxt[-1]
# max distance between ball and cup and cup height at that time
ball_to_cup_diff = self.ball_traj[:, 2] - self.cup_traj[:, 2]
t_max_diff = np.argmax(ball_to_cup_diff)
t_max_ball_height = np.argmax(self.ball_traj[:, 2])
max_ball_height = np.max(self.ball_traj[:, 2])
# cost = self._get_stage_wise_cost(ball_in_cup, min_dist, dist_final, dist_ctxt) # cost = self._get_stage_wise_cost(ball_in_cup, min_dist, dist_final, dist_ctxt)
cost = 2 * (0.5 * min_dist + 0.5 * dist_final + 0.1 * self.dist_ctxt) cost = 0.3 * min_dist + 0.3 * dist_final + 0.3 * np.minimum(self.dist_ctxt, 3)
reward = np.exp(-1 * cost) - 1e-4 * action_cost reward = np.exp(-1 * cost) - 1e-4 * action_cost
if max_ball_height < self.context[2] or ball_to_cup_diff[t_max_ball_height] < 0:
reward -= 1
success = dist_final < 0.05 and self.dist_ctxt < 0.05 success = dist_final < 0.05 and self.dist_ctxt < 0.05
else: else:
reward = - 1e-4 * action_cost reward = - 1e-4 * action_cost

View File

@ -3,7 +3,7 @@ from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_simple import ALRBallInACupEnv as ALRBallInACupEnvSimple from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_simple import ALRBallInACupEnv as ALRBallInACupEnvSimple
def make_env(rank, seed=0): def make_contextual_env(rank, seed=0):
""" """
Utility function for multiprocessed env. Utility function for multiprocessed env.
@ -26,7 +26,41 @@ def make_env(rank, seed=0):
duration=3.5, duration=3.5,
post_traj_time=4.5, post_traj_time=4.5,
dt=env.dt, dt=env.dt,
weights_scale=0.1, weights_scale=0.5,
zero_start=True,
zero_goal=True
)
env.seed(seed + rank)
return env
return _init
def make_env(rank, seed=0):
"""
Utility function for multiprocessed env.
:param env_id: (str) the environment ID
:param num_env: (int) the number of environments you wish to have in subprocesses
:param seed: (int) the initial seed for RNG
:param rank: (int) index of the subprocess
:returns a function that generates an environment
"""
def _init():
env = ALRBallInACupEnvSimple()
env = DetPMPEnvWrapper(env,
num_dof=7,
num_basis=5,
width=0.005,
policy_type="motor",
start_pos=env.start_pos,
duration=3.5,
post_traj_time=4.5,
dt=env.dt,
weights_scale=0.5,
zero_start=True, zero_start=True,
zero_goal=True zero_goal=True
) )
@ -60,7 +94,7 @@ def make_simple_env(rank, seed=0):
duration=3.5, duration=3.5,
post_traj_time=4.5, post_traj_time=4.5,
dt=env.dt, dt=env.dt,
weights_scale=0.1, weights_scale=0.5,
zero_start=True, zero_start=True,
zero_goal=True zero_goal=True
) )

View File

View File

@ -0,0 +1,226 @@
<mujoco model="wam(v1.31)">
<compiler angle="radian" meshdir="../../meshes/wam/" />
<option timestep="0.005" integrator="Euler" />
<size njmax="500" nconmax="100" />
<default class="main">
<joint limited="true" frictionloss="0.001" damping="0.07"/>
<default class="viz">
<geom type="mesh" contype="0" conaffinity="0" group="1" rgba="0.7 0.7 0.7 1" />
</default>
<default class="col">
<geom type="mesh" contype="0" rgba="0.5 0.6 0.7 1" />
</default>
</default>
<asset>
<texture type="2d" name="groundplane" builtin="checker" mark="edge" rgb1="0.25 0.26 0.25" rgb2="0.22 0.22 0.22" markrgb="0.3 0.3 0.3" width="100" height="100" />
<material name="MatGnd" texture="groundplane" texrepeat="5 5" specular="1" shininess="0.3" reflectance="1e-05" />
<mesh name="base_link_fine" file="base_link_fine.stl" />
<mesh name="base_link_convex" file="base_link_convex.stl" />
<mesh name="shoulder_link_fine" file="shoulder_link_fine.stl" />
<mesh name="shoulder_link_convex_decomposition_p1" file="shoulder_link_convex_decomposition_p1.stl" />
<mesh name="shoulder_link_convex_decomposition_p2" file="shoulder_link_convex_decomposition_p2.stl" />
<mesh name="shoulder_link_convex_decomposition_p3" file="shoulder_link_convex_decomposition_p3.stl" />
<mesh name="shoulder_pitch_link_fine" file="shoulder_pitch_link_fine.stl" />
<mesh name="shoulder_pitch_link_convex" file="shoulder_pitch_link_convex.stl" />
<mesh name="upper_arm_link_fine" file="upper_arm_link_fine.stl" />
<mesh name="upper_arm_link_convex_decomposition_p1" file="upper_arm_link_convex_decomposition_p1.stl" />
<mesh name="upper_arm_link_convex_decomposition_p2" file="upper_arm_link_convex_decomposition_p2.stl" />
<mesh name="elbow_link_fine" file="elbow_link_fine.stl" />
<mesh name="elbow_link_convex" file="elbow_link_convex.stl" />
<mesh name="forearm_link_fine" file="forearm_link_fine.stl" />
<mesh name="forearm_link_convex_decomposition_p1" file="forearm_link_convex_decomposition_p1.stl" />
<mesh name="forearm_link_convex_decomposition_p2" file="forearm_link_convex_decomposition_p2.stl" />
<mesh name="wrist_yaw_link_fine" file="wrist_yaw_link_fine.stl" />
<mesh name="wrist_yaw_link_convex_decomposition_p1" file="wrist_yaw_link_convex_decomposition_p1.stl" />
<mesh name="wrist_yaw_link_convex_decomposition_p2" file="wrist_yaw_link_convex_decomposition_p2.stl" />
<mesh name="wrist_pitch_link_fine" file="wrist_pitch_link_fine.stl" />
<mesh name="wrist_pitch_link_convex_decomposition_p1" file="wrist_pitch_link_convex_decomposition_p1.stl" />
<mesh name="wrist_pitch_link_convex_decomposition_p2" file="wrist_pitch_link_convex_decomposition_p2.stl" />
<mesh name="wrist_pitch_link_convex_decomposition_p3" file="wrist_pitch_link_convex_decomposition_p3.stl" />
<mesh name="wrist_palm_link_fine" file="wrist_palm_link_fine.stl" />
<mesh name="wrist_palm_link_convex" file="wrist_palm_link_convex.stl" />
<mesh name="cup1" file="cup_split1.stl" scale="0.001 0.001 0.001" />
<mesh name="cup2" file="cup_split2.stl" scale="0.001 0.001 0.001" />
<mesh name="cup3" file="cup_split3.stl" scale="0.001 0.001 0.001" />
<mesh name="cup4" file="cup_split4.stl" scale="0.001 0.001 0.001" />
<mesh name="cup5" file="cup_split5.stl" scale="0.001 0.001 0.001" />
<mesh name="cup6" file="cup_split6.stl" scale="0.001 0.001 0.001" />
<mesh name="cup7" file="cup_split7.stl" scale="0.001 0.001 0.001" />
<mesh name="cup8" file="cup_split8.stl" scale="0.001 0.001 0.001" />
<mesh name="cup9" file="cup_split9.stl" scale="0.001 0.001 0.001" />
<mesh name="cup10" file="cup_split10.stl" scale="0.001 0.001 0.001" />
<mesh name="cup11" file="cup_split11.stl" scale="0.001 0.001 0.001" />
<mesh name="cup12" file="cup_split12.stl" scale="0.001 0.001 0.001" />
<mesh name="cup13" file="cup_split13.stl" scale="0.001 0.001 0.001" />
<mesh name="cup14" file="cup_split14.stl" scale="0.001 0.001 0.001" />
<mesh name="cup15" file="cup_split15.stl" scale="0.001 0.001 0.001" />
<mesh name="cup16" file="cup_split16.stl" scale="0.001 0.001 0.001" />
<mesh name="cup17" file="cup_split17.stl" scale="0.001 0.001 0.001" />
<mesh name="cup18" file="cup_split18.stl" scale="0.001 0.001 0.001" />
<mesh name="cup3_table" file="cup_split3.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup4_table" file="cup_split4.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup5_table" file="cup_split5.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup6_table" file="cup_split6.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup7_table" file="cup_split7.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup8_table" file="cup_split8.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup9_table" file="cup_split9.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup10_table" file="cup_split10.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup15_table" file="cup_split15.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup16_table" file="cup_split16.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup17_table" file="cup_split17.stl" scale="0.00211 0.00211 0.01" />
<mesh name="cup18_table" file="cup_split18.stl" scale="0.00211 0.00211 0.01" />
</asset>
<worldbody>
<geom name="ground" size="5 5 1" type="plane" material="MatGnd" />
<light pos="0.1 0.2 1.3" dir="-0.0758098 -0.32162 -0.985527" directional="true" cutoff="60" exponent="1" diffuse="1 1 1" specular="0.1 0.1 0.1" />
<body name="wam/base_link" pos="0 0 0.6">
<inertial pos="6.93764e-06 0.0542887 0.076438" quat="0.496481 0.503509 -0.503703 0.496255" mass="27.5544" diaginertia="0.432537 0.318732 0.219528" />
<geom class="viz" quat="0.707107 0 0 -0.707107" mesh="base_link_fine" />
<geom class="col" quat="0.707107 0 0 -0.707107" mesh="base_link_convex" name="base_link_convex_geom"/>
<body name="wam/shoulder_yaw_link" pos="0 0 0.16" quat="0.707107 0 0 -0.707107">
<inertial pos="-0.00443422 -0.00066489 -0.12189" quat="0.999995 0.000984795 0.00270132 0.00136071" mass="10.7677" diaginertia="0.507411 0.462983 0.113271" />
<joint name="wam/base_yaw_joint" pos="0 0 0" axis="0 0 1" range="-2.6 2.6" />
<geom class="viz" pos="0 0 0.186" mesh="shoulder_link_fine" />
<geom class="col" pos="0 0 0.186" mesh="shoulder_link_convex_decomposition_p1" name="shoulder_link_convex_decomposition_p1_geom"/>
<geom class="col" pos="0 0 0.186" mesh="shoulder_link_convex_decomposition_p2" name="shoulder_link_convex_decomposition_p2_geom"/>
<geom class="col" pos="0 0 0.186" mesh="shoulder_link_convex_decomposition_p3" name="shoulder_link_convex_decomposition_p3_geom"/>
<body name="wam/shoulder_pitch_link" pos="0 0 0.184" quat="0.707107 -0.707107 0 0">
<inertial pos="-0.00236983 -0.0154211 0.0310561" quat="0.961781 -0.272983 0.0167269 0.0133385" mass="3.87494" diaginertia="0.0214207 0.0167101 0.0126465" />
<joint name="wam/shoulder_pitch_joint" pos="0 0 0" axis="0 0 1" range="-1.985 1.985" />
<geom class="viz" mesh="shoulder_pitch_link_fine" />
<geom class="col" mesh="shoulder_pitch_link_convex" />
<body name="wam/upper_arm_link" pos="0 -0.505 0" quat="0.707107 0.707107 0 0">
<inertial pos="-0.0382586 3.309e-05 -0.207508" quat="0.705455 0.0381914 0.0383402 0.706686" mass="1.80228" diaginertia="0.0665697 0.0634285 0.00622701" />
<joint name="wam/shoulder_yaw_joint" pos="0 0 0" axis="0 0 1" range="-2.8 2.8" />
<geom class="viz" pos="0 0 -0.505" mesh="upper_arm_link_fine" />
<geom class="col" pos="0 0 -0.505" mesh="upper_arm_link_convex_decomposition_p1" name="upper_arm_link_convex_decomposition_p1_geom"/>
<geom class="col" pos="0 0 -0.505" mesh="upper_arm_link_convex_decomposition_p2" name="upper_arm_link_convex_decomposition_p2_geom"/>
<body name="wam/forearm_link" pos="0.045 0 0.045" quat="0.707107 -0.707107 0 0">
<inertial pos="0.00498512 -0.132717 -0.00022942" quat="0.546303 0.447151 -0.548676 0.447842" mass="2.40017" diaginertia="0.0196896 0.0152225 0.00749914" />
<joint name="wam/elbow_pitch_joint" pos="0 0 0" axis="0 0 1" range="-0.9 3.14159" />
<geom class="viz" mesh="elbow_link_fine" />
<geom class="col" mesh="elbow_link_convex" />
<geom class="viz" pos="-0.045 -0.073 0" quat="0.707388 0.706825 0 0" mesh="forearm_link_fine" />
<geom class="col" pos="-0.045 -0.073 0" quat="0.707388 0.706825 0 0" mesh="forearm_link_convex_decomposition_p1" name="forearm_link_convex_decomposition_p1_geom" />
<geom class="col" pos="-0.045 -0.073 0" quat="0.707388 0.706825 0 0" mesh="forearm_link_convex_decomposition_p2" name="forearm_link_convex_decomposition_p2_geom" />
<body name="wam/wrist_yaw_link" pos="-0.045 0 0" quat="0.707107 0.707107 0 0">
<inertial pos="8.921e-05 0.00435824 -0.00511217" quat="0.708528 -0.000120667 0.000107481 0.705683" mass="0.12376" diaginertia="0.0112011 0.0111887 7.58188e-05" />
<joint name="wam/wrist_yaw_joint" pos="0 0 0" axis="0 0 1" range="-4.55 1.25" />
<geom class="viz" pos="0 0 0.3" mesh="wrist_yaw_link_fine" />
<geom class="col" pos="0 0 0.3" mesh="wrist_yaw_link_convex_decomposition_p1" name="wrist_yaw_link_convex_decomposition_p1_geom" />
<geom class="col" pos="0 0 0.3" mesh="wrist_yaw_link_convex_decomposition_p2" name="wrist_yaw_link_convex_decomposition_p2_geom" />
<body name="wam/wrist_pitch_link" pos="0 0 0.3" quat="0.707107 -0.707107 0 0">
<inertial pos="-0.00012262 -0.0246834 -0.0170319" quat="0.994687 -0.102891 0.000824211 -0.00336105" mass="0.417974" diaginertia="0.000555166 0.000463174 0.00023407" />
<joint name="wam/wrist_pitch_joint" pos="0 0 0" axis="0 0 1" range="-1.5707 1.5707" />
<geom class="viz" mesh="wrist_pitch_link_fine" />
<geom class="col" mesh="wrist_pitch_link_convex_decomposition_p1" name="wrist_pitch_link_convex_decomposition_p1_geom" />
<geom class="col" mesh="wrist_pitch_link_convex_decomposition_p2" name="wrist_pitch_link_convex_decomposition_p2_geom" />
<geom class="col" mesh="wrist_pitch_link_convex_decomposition_p3" name="wrist_pitch_link_convex_decomposition_p3_geom" />
<body name="wam/wrist_palm_link" pos="0 -0.06 0" quat="0.707107 0.707107 0 0">
<inertial pos="-7.974e-05 -0.00323552 -0.00016313" quat="0.594752 0.382453 0.382453 0.594752" mass="0.0686475" diaginertia="7.408e-05 3.81466e-05 3.76434e-05" />
<joint name="wam/palm_yaw_joint" pos="0 0 0" axis="0 0 1" range="-2.7 2.7" />
<geom class="viz" pos="0 0 -0.06" mesh="wrist_palm_link_fine" />
<geom class="col" pos="0 0 -0.06" mesh="wrist_palm_link_convex" name="wrist_palm_link_convex_geom" />
<body name="cup" pos="0 0 0" quat="-0.000203673 0 0 1">
<inertial pos="-3.75236e-10 8.27811e-05 0.0947015" quat="0.999945 -0.0104888 0 0" mass="0.132" diaginertia="0.000285643 0.000270485 9.65696e-05" />
<geom priority="1" name="cup_geom1" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup1" />
<geom priority="1" name="cup_geom2" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup2" />
<geom priority="1" name="cup_geom3" pos="0 0.015 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup3" />
<geom priority="1" name="cup_geom4" pos="0 0.015 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup4" />
<geom priority="1" name="cup_geom5" pos="0 0.015 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup5" />
<geom priority="1" name="cup_geom6" pos="0 0.015 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup6" />
<geom priority="1" name="cup_geom7" pos="0 0.015 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup7" />
<geom priority="1" name="cup_geom8" pos="0 0.015 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup8" />
<geom priority="1" name="cup_geom9" pos="0 0.015 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup9" />
<geom priority="1" name="cup_geom10" pos="0 0.015 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup10" />
<geom name="cup_base" pos="0 -0.035 0.1165" euler="-1.57 0 0" type="cylinder" size="0.038 0.0375" solref="-10000 -100"/>
<geom name="cup_base_contact" pos="0 0.00215 0.1165" euler="-1.57 0 0" type="cylinder" size="0.03 0.0005" solref="-10000 -100" rgba="0 0 255 1"/>
<!-- <geom name="cup_geom11" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup11" />-->
<!-- <geom name="cup_geom12" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup12" />-->
<!-- <geom name="cup_geom13" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup13" />-->
<!-- <geom name="cup_geom14" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup14" />-->
<geom priority="1" name="cup_geom15" pos="0 0.015 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup15" />
<geom priority="1" name="cup_geom16" pos="0 0.015 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup16" />
<geom priority="1" name="cup_geom17" pos="0 0.015 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup17" />
<geom priority="1" name="cup_geom18" pos="0 0.015 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup18" />
<site name="cup_robot" pos="0 0.05 0.1165" rgba="255 0 0 1"/>
<site name="cup_robot_final" pos="0 -0.025 0.1165" rgba="0 255 0 1"/>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
<!-- <geom name="table" type="box" size="0.4 0.6 0.3" pos="0 -1.75 0.3025" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"-->
<geom name="table" type="box" size="0.4 0.6 0.4" pos="0 -1.85 0.4025" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"
solref="-10000 -100"/>
<geom name="table_robot" type="box" size="0.1 0.1 0.3" pos="0 0.00 0.3025" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"
solref="-10000 -100"/>
<geom name="wall" type="box" quat="1 0 0 0" size="0.4 0.04 1.1" pos="0. -2.45 1.1" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"
solref="-10000 -100"/>
<!--initial-->
<!-- <body name="cup_table" pos="0 -2 0.840" quat="0.7071068 0.7071068 0 0">-->
<!--first diff-->
<!-- <body name="cup_table" pos="0.1 -1.5 0.840" quat="0.7071068 0.7071068 0 0">-->
<!-- second diff-->
<!-- <body name="cup_table" pos="-0.3 -2.1 0.840" quat="0.7071068 0.7071068 0 0">-->
<body name="cup_table" pos="0.32 -1.2 0.84" quat="0.7071068 0.7071068 0 0">
<inertial pos="-3.75236e-10 8.27811e-05 0.0947015" quat="0.999945 -0.0104888 0 0" mass="0.132" diaginertia="0.000285643 0.000270485 9.65696e-05" />
<geom priority= "1" name="cup_geom_table3" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup3_table" />
<geom priority= "1" name="cup_geom_table4" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup4_table" />
<geom priority= "1" name="cup_geom_table5" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup5_table" />
<geom priority= "1" name="cup_geom_table6" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup6_table" />
<geom priority= "1" name="cup_geom_table7" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup7_table" />
<geom priority= "1" name="cup_geom_table8" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup8_table" />
<geom priority= "1" name="cup_geom_table9" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup9_table" />
<geom priority= "1" name="cup_geom_table10" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup10_table" />
<geom priority= "1" name="cup_base_table" pos="0 -0.035 0.1337249" euler="-1.57 0 0" type="cylinder" size="0.08 0.045" solref="-10000 -100"/>
<geom priority= "1" name="cup_base_table_contact" pos="0 0.015 0.1337249" euler="-1.57 0 0" type="cylinder" size="0.06 0.0005" solref="-10000 -100" rgba="0 0 255 1"/>
<geom priority= "1" name="cup_geom_table15" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup15_table" />
<geom priority= "1" name="cup_geom_table16" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup16_table" />
<geom priority= "1" name="cup_geom_table17" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup17_table" />
<geom priority= "1" name="cup_geom1_table8" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup18_table" />
<site name="cup_goal_table" pos="0 0.11 0.1337249" rgba="255 0 0 1"/>
<site name="cup_goal_final_table" pos="0.0 0.025 0.1337249" rgba="0 255 0 1"/>
<!-- <site name="cup_goal_final_table" pos="0.0 -0.025 0.1337249" rgba="0 255 0 1"/>-->
</body>
<!-- <body name="ball" pos="0.0 -0.813 2.382">-->
<body name="ball" pos="0.0 -0.813 2.382">
<joint axis="1 0 0" damping="0.0" name="tar:x" pos="0 0 0" stiffness="0" type="slide" frictionloss="0" limited="false"/>
<joint axis="0 1 0" damping="0.0" name="tar:y" pos="0 0 0" stiffness="0" type="slide" frictionloss="0" limited="false"/>
<joint axis="0 0 1" damping="0.0" name="tar:z" pos="0 0 0" stiffness="0" type="slide" frictionloss="0" limited="false"/>
<geom priority= "1" size="0.025 0.025 0.025" type="sphere" condim="4" name="ball_geom" rgba="0.8 0.2 0.1 1" mass="0.1"
friction="0.1 0.1 0.1" solimp="0.9 0.95 0.001 0.5 2" solref="-10000 -10"/>
<!-- friction="0.1 0.1 0.1" solimp="1 1 0" solref="0.1 0.03"/>-->
<site name="target_ball" pos="0 0 0" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"/>
</body>
<camera name="visualization" mode="targetbody" target="wam/wrist_yaw_link" pos="1.5 -0.4 2.2"/>
<camera name="experiment" mode="fixed" quat="0.44418059 0.41778323 0.54301123 0.57732103" pos="1.5 -0.3 1.33" />
<site name="test" pos="0.1 0.1 0.1" rgba="0 0 1 1" type="sphere"/>
</worldbody>
<!-- <actuator>-->
<!-- <position ctrlrange="-2.6 2.6" joint="wam/base_yaw_joint" kp="800"/>-->
<!-- <position ctrlrange="-1.985 1.985" joint="wam/shoulder_pitch_joint" kp="800"/>-->
<!-- <position ctrlrange="-2.8 2.8" joint="wam/shoulder_yaw_joint" kp="800"/>-->
<!-- <position ctrlrange="-0.9 3.14159" joint="wam/elbow_pitch_joint" kp="800"/>-->
<!-- <position ctrlrange="-4.55 1.25" joint="wam/wrist_yaw_joint" kp="100"/>-->
<!-- <position ctrlrange="-1.5707 1.5707" joint="wam/wrist_pitch_joint" kp="2000"/>-->
<!-- <position ctrlrange="-2.7 2.7" joint="wam/palm_yaw_joint" kp="100"/>-->
<!-- </actuator>-->
<actuator>
<motor ctrlrange="-150 150" joint="wam/base_yaw_joint"/>
<motor ctrlrange="-125 125" joint="wam/shoulder_pitch_joint"/>
<motor ctrlrange="-40 40" joint="wam/shoulder_yaw_joint"/>
<motor ctrlrange="-60 60" joint="wam/elbow_pitch_joint"/>
<motor ctrlrange="-5 5" joint="wam/wrist_yaw_joint"/>
<motor ctrlrange="-5 5" joint="wam/wrist_pitch_joint"/>
<motor ctrlrange="-2 2" joint="wam/palm_yaw_joint"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,143 @@
from gym import utils
import os
import numpy as np
from alr_envs.mujoco import alr_mujoco_env
class ALRBeerpongEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
def __init__(self, n_substeps=4, apply_gravity_comp=True, reward_function=None):
self._steps = 0
self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
"beerpong" + ".xml")
self.start_pos = np.array([0.0, 1.35, 0.0, 1.18, 0.0, -0.786, -1.59])
self.start_vel = np.zeros(7)
self._q_pos = []
self._q_vel = []
# self.weight_matrix_scale = 50
self.max_ctrl = np.array([150., 125., 40., 60., 5., 5., 2.])
self.p_gains = 1 / self.max_ctrl * np.array([200, 300, 100, 100, 10, 10, 2.5])
self.d_gains = 1 / self.max_ctrl * np.array([7, 15, 5, 2.5, 0.3, 0.3, 0.05])
self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7])
self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
self.context = None
utils.EzPickle.__init__(self)
alr_mujoco_env.AlrMujocoEnv.__init__(self,
self.xml_path,
apply_gravity_comp=apply_gravity_comp,
n_substeps=n_substeps)
self.sim_time = 8 # seconds
self.sim_steps = int(self.sim_time / self.dt)
if reward_function is None:
from alr_envs.mujoco.beerpong.beerpong_reward import BeerpongReward
reward_function = BeerpongReward
self.reward_function = reward_function(self.sim, self.sim_steps)
self.cup_robot_id = self.sim.model._site_name2id["cup_robot_final"]
@property
def current_pos(self):
return self.sim.data.qpos[0:7].copy()
@property
def current_vel(self):
return self.sim.data.qvel[0:7].copy()
def configure(self, context):
self.context = context
self.reward_function.reset(context)
def reset_model(self):
init_pos_all = self.init_qpos.copy()
init_pos_robot = self.start_pos
init_vel = np.zeros_like(init_pos_all)
self._steps = 0
self._q_pos = []
self._q_vel = []
start_pos = init_pos_all
start_pos[0:7] = init_pos_robot
# start_pos[7:] = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05])
self.set_state(start_pos, init_vel)
start_pos = init_pos_all
start_pos[0:7] = init_pos_robot
start_pos[7:] = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05])
self.set_state(start_pos, init_vel)
# ball_pos = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05])
# self.sim.model.body_pos[self.ball_id] = ball_pos.copy()
return self._get_obs()
def step(self, a):
reward_dist = 0.0
angular_vel = 0.0
reward_ctrl = - np.square(a).sum()
crash = self.do_simulation(a)
joint_cons_viol = self.check_traj_in_joint_limits()
self._q_pos.append(self.sim.data.qpos[0:7].ravel().copy())
self._q_vel.append(self.sim.data.qvel[0:7].ravel().copy())
ob = self._get_obs()
if not crash and not joint_cons_viol:
reward, success, stop_sim = self.reward_function.compute_reward(a, self.sim, self._steps)
done = success or self._steps == self.sim_steps - 1 or stop_sim
self._steps += 1
else:
reward = -1000
success = False
done = True
return ob, reward, done, dict(reward_dist=reward_dist,
reward_ctrl=reward_ctrl,
velocity=angular_vel,
traj=self._q_pos, is_success=success,
is_collided=crash or joint_cons_viol)
def check_traj_in_joint_limits(self):
return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)
def _get_obs(self):
theta = self.sim.data.qpos.flat[:7]
return np.concatenate([
np.cos(theta),
np.sin(theta),
# self.get_body_com("target"), # only return target to make problem harder
[self._steps],
])
if __name__ == "__main__":
env = ALRBeerpongEnv()
ctxt = np.array([-0.20869846, -0.66376693, 1.18088501])
env.configure(ctxt)
env.reset()
env.render()
for i in range(16000):
# test with random actions
ac = 0.01 * env.action_space.sample()[0:7]
# ac = env.start_pos
# ac[0] += np.pi/2
obs, rew, d, info = env.step(ac)
env.render()
print(rew)
if d:
break
env.close()

View File

@ -0,0 +1,124 @@
import numpy as np
from alr_envs.mujoco import alr_reward_fct
class BeerpongReward(alr_reward_fct.AlrReward):
def __init__(self, sim, sim_time):
self.sim = sim
self.sim_time = sim_time
self.collision_objects = ["cup_geom1", "cup_geom2", "wrist_palm_link_convex_geom",
"wrist_pitch_link_convex_decomposition_p1_geom",
"wrist_pitch_link_convex_decomposition_p2_geom",
"wrist_pitch_link_convex_decomposition_p3_geom",
"wrist_yaw_link_convex_decomposition_p1_geom",
"wrist_yaw_link_convex_decomposition_p2_geom",
"forearm_link_convex_decomposition_p1_geom",
"forearm_link_convex_decomposition_p2_geom"]
self.ball_id = None
self.ball_collision_id = None
self.goal_id = None
self.goal_final_id = None
self.collision_ids = None
self.ball_traj = None
self.dists = None
self.dists_ctxt = None
self.dists_final = None
self.costs = None
self.reset(None)
def reset(self, context):
self.ball_traj = np.zeros(shape=(self.sim_time, 3))
self.dists = []
self.dists_ctxt = []
self.dists_final = []
self.costs = []
self.context = context
self.ball_in_cup = False
self.dist_ctxt = 5
self.ball_id = self.sim.model._body_name2id["ball"]
self.ball_collision_id = self.sim.model._geom_name2id["ball_geom"]
self.cup_robot_id = self.sim.model._site_name2id["cup_robot_final"]
self.goal_id = self.sim.model._site_name2id["cup_goal_table"]
self.goal_final_id = self.sim.model._site_name2id["cup_goal_final_table"]
self.collision_ids = [self.sim.model._geom_name2id[name] for name in self.collision_objects]
self.cup_table_id = self.sim.model._body_name2id["cup_table"]
def compute_reward(self, action, sim, step):
action_cost = np.sum(np.square(action))
stop_sim = False
success = False
if self.check_collision(sim):
reward = - 1e-4 * action_cost - 1000
stop_sim = True
return reward, success, stop_sim
# Compute the current distance from the ball to the inner part of the cup
goal_pos = sim.data.site_xpos[self.goal_id]
ball_pos = sim.data.body_xpos[self.ball_id]
goal_final_pos = sim.data.site_xpos[self.goal_final_id]
self.dists.append(np.linalg.norm(goal_pos - ball_pos))
self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
self.dists_ctxt.append(np.linalg.norm(ball_pos - self.context))
self.ball_traj[step, :] = ball_pos
# Determine the first time when ball is in cup
if not self.ball_in_cup:
ball_in_cup = self.check_ball_in_cup(sim, self.ball_collision_id)
self.ball_in_cup = ball_in_cup
if ball_in_cup:
dist_to_ctxt = np.linalg.norm(ball_pos - self.context)
self.dist_ctxt = dist_to_ctxt
if step == self.sim_time - 1:
min_dist = np.min(self.dists)
dist_final = self.dists_final[-1]
# dist_ctxt = self.dists_ctxt[-1]
# cost = self._get_stage_wise_cost(ball_in_cup, min_dist, dist_final, dist_ctxt)
cost = 2 * (0.5 * min_dist + 0.5 * dist_final + 0.1 * self.dist_ctxt)
reward = np.exp(-1 * cost) - 1e-4 * action_cost
success = dist_final < 0.05 and self.dist_ctxt < 0.05
else:
reward = - 1e-4 * action_cost
success = False
return reward, success, stop_sim
def _get_stage_wise_cost(self, ball_in_cup, min_dist, dist_final, dist_to_ctxt):
if not ball_in_cup:
cost = 3 + 2*(0.5 * min_dist**2 + 0.5 * dist_final**2)
else:
cost = 2 * dist_to_ctxt ** 2
print('Context Distance:', dist_to_ctxt)
return cost
def check_ball_in_cup(self, sim, ball_collision_id):
cup_base_collision_id = sim.model._geom_name2id["cup_base_contact"]
for coni in range(0, sim.data.ncon):
con = sim.data.contact[coni]
collision = con.geom1 == cup_base_collision_id and con.geom2 == ball_collision_id
collision_trans = con.geom1 == ball_collision_id and con.geom2 == cup_base_collision_id
if collision or collision_trans:
return True
return False
def check_collision(self, sim):
for coni in range(0, sim.data.ncon):
con = sim.data.contact[coni]
collision = con.geom1 in self.collision_ids and con.geom2 == self.ball_collision_id
collision_trans = con.geom1 == self.ball_collision_id and con.geom2 in self.collision_ids
if collision or collision_trans:
return True
return False