updates
This commit is contained in:
parent
1d8b22245d
commit
a0692b1089
@ -83,7 +83,7 @@ class HoleReacher(gym.Env):
|
|||||||
"""
|
"""
|
||||||
a single step with an action in joint velocity space
|
a single step with an action in joint velocity space
|
||||||
"""
|
"""
|
||||||
vel = action
|
vel = action # + 0.01 * np.random.randn(self.num_links)
|
||||||
acc = (vel - self._angle_velocity) / self.dt
|
acc = (vel - self._angle_velocity) / self.dt
|
||||||
self._angle_velocity = vel
|
self._angle_velocity = vel
|
||||||
self._joint_angles = self._joint_angles + self.dt * self._angle_velocity
|
self._joint_angles = self._joint_angles + self.dt * self._angle_velocity
|
||||||
@ -96,20 +96,20 @@ class HoleReacher(gym.Env):
|
|||||||
|
|
||||||
dist_reward = 0
|
dist_reward = 0
|
||||||
if not self._is_collided:
|
if not self._is_collided:
|
||||||
if self._steps == 180:
|
if self._steps == 199:
|
||||||
dist_reward = np.linalg.norm(self.end_effector - self.bottom_center_of_hole)
|
dist_reward = np.linalg.norm(self.end_effector - self.bottom_center_of_hole)
|
||||||
else:
|
else:
|
||||||
dist_reward = np.linalg.norm(self.end_effector - self.bottom_center_of_hole)
|
dist_reward = np.linalg.norm(self.end_effector - self.bottom_center_of_hole)
|
||||||
|
|
||||||
reward = - dist_reward ** 2
|
reward = - dist_reward ** 2
|
||||||
|
|
||||||
reward -= 1e-6 * np.sum(acc**2)
|
reward -= 5e-8 * np.sum(acc**2)
|
||||||
|
|
||||||
if self._steps == 180:
|
# if self._steps == 180:
|
||||||
reward -= 0.1 * np.sum(vel**2) ** 2
|
# reward -= 0.1 * np.sum(vel**2) ** 2
|
||||||
|
|
||||||
if self._is_collided:
|
if self._is_collided:
|
||||||
reward -= self.collision_penalty
|
reward = -self.collision_penalty
|
||||||
|
|
||||||
info = {"is_collided": self._is_collided}
|
info = {"is_collided": self._is_collided}
|
||||||
|
|
||||||
|
@ -2,6 +2,7 @@ from alr_envs.classic_control.hole_reacher import HoleReacher
|
|||||||
from alr_envs.classic_control.viapoint_reacher import ViaPointReacher
|
from alr_envs.classic_control.viapoint_reacher import ViaPointReacher
|
||||||
from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapper
|
from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapper
|
||||||
from alr_envs.utils.detpmp_env_wrapper import DetPMPEnvWrapper
|
from alr_envs.utils.detpmp_env_wrapper import DetPMPEnvWrapper
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
def make_viapointreacher_env(rank, seed=0):
|
def make_viapointreacher_env(rank, seed=0):
|
||||||
@ -54,7 +55,7 @@ def make_holereacher_env(rank, seed=0):
|
|||||||
hole_width=0.15,
|
hole_width=0.15,
|
||||||
hole_depth=1,
|
hole_depth=1,
|
||||||
hole_x=1,
|
hole_x=1,
|
||||||
collision_penalty=1000)
|
collision_penalty=100)
|
||||||
|
|
||||||
_env = DmpEnvWrapper(_env,
|
_env = DmpEnvWrapper(_env,
|
||||||
num_dof=5,
|
num_dof=5,
|
||||||
@ -62,10 +63,51 @@ def make_holereacher_env(rank, seed=0):
|
|||||||
duration=2,
|
duration=2,
|
||||||
dt=_env.dt,
|
dt=_env.dt,
|
||||||
learn_goal=True,
|
learn_goal=True,
|
||||||
alpha_phase=2,
|
alpha_phase=3.5,
|
||||||
start_pos=_env.start_pos,
|
start_pos=_env.start_pos,
|
||||||
policy_type="velocity",
|
policy_type="velocity",
|
||||||
weights_scale=100,
|
weights_scale=100,
|
||||||
|
goal_scale=0.1
|
||||||
|
)
|
||||||
|
|
||||||
|
_env.seed(seed + rank)
|
||||||
|
return _env
|
||||||
|
|
||||||
|
return _init
|
||||||
|
|
||||||
|
|
||||||
|
def make_holereacher_fix_goal_env(rank, seed=0):
|
||||||
|
"""
|
||||||
|
Utility function for multiprocessed env.
|
||||||
|
|
||||||
|
:param env_id: (str) the environment ID
|
||||||
|
:param num_env: (int) the number of environments you wish to have in subprocesses
|
||||||
|
:param seed: (int) the initial seed for RNG
|
||||||
|
:param rank: (int) index of the subprocess
|
||||||
|
:returns a function that generates an environment
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _init():
|
||||||
|
_env = HoleReacher(num_links=5,
|
||||||
|
allow_self_collision=False,
|
||||||
|
allow_wall_collision=False,
|
||||||
|
hole_width=0.15,
|
||||||
|
hole_depth=1,
|
||||||
|
hole_x=1,
|
||||||
|
collision_penalty=100)
|
||||||
|
|
||||||
|
_env = DmpEnvWrapper(_env,
|
||||||
|
num_dof=5,
|
||||||
|
num_basis=5,
|
||||||
|
duration=2,
|
||||||
|
dt=_env.dt,
|
||||||
|
learn_goal=False,
|
||||||
|
final_pos=np.array([2.02669572, -1.25966385, -1.51618198, -0.80946476, 0.02012344]),
|
||||||
|
alpha_phase=3.5,
|
||||||
|
start_pos=_env.start_pos,
|
||||||
|
policy_type="velocity",
|
||||||
|
weights_scale=50,
|
||||||
|
goal_scale=1
|
||||||
)
|
)
|
||||||
|
|
||||||
_env.seed(seed + rank)
|
_env.seed(seed + rank)
|
||||||
@ -103,7 +145,7 @@ def make_holereacher_env_pmp(rank, seed=0):
|
|||||||
duration=2,
|
duration=2,
|
||||||
post_traj_time=0,
|
post_traj_time=0,
|
||||||
dt=_env.dt,
|
dt=_env.dt,
|
||||||
weights_scale=0.15,
|
weights_scale=0.25,
|
||||||
zero_start=True,
|
zero_start=True,
|
||||||
zero_goal=False
|
zero_goal=False
|
||||||
)
|
)
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
<mujoco model="wam(v1.31)">
|
<mujoco model="wam(v1.31)">
|
||||||
<compiler angle="radian" meshdir="../../meshes/wam/" />
|
<compiler angle="radian" meshdir="../../meshes/wam/" />
|
||||||
<option timestep="0.005" integrator="Euler" />
|
<option timestep="0.0005" integrator="Euler" />
|
||||||
<size njmax="500" nconmax="100" />
|
<size njmax="500" nconmax="100" />
|
||||||
<default class="main">
|
<default class="main">
|
||||||
<joint limited="true" frictionloss="0.001" damping="0.07"/>
|
<joint limited="true" frictionloss="0.001" damping="0.07"/>
|
||||||
@ -135,12 +135,8 @@
|
|||||||
<geom priority="1" name="cup_geom8" pos="0 0.015 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup8" />
|
<geom priority="1" name="cup_geom8" pos="0 0.015 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup8" />
|
||||||
<geom priority="1" name="cup_geom9" pos="0 0.015 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup9" />
|
<geom priority="1" name="cup_geom9" pos="0 0.015 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup9" />
|
||||||
<geom priority="1" name="cup_geom10" pos="0 0.015 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup10" />
|
<geom priority="1" name="cup_geom10" pos="0 0.015 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup10" />
|
||||||
<geom name="cup_base" pos="0 -0.035 0.1165" euler="-1.57 0 0" type="cylinder" size="0.038 0.0375" solref="-10000 -100"/>
|
<geom name="cup_base" pos="0 -0.05 0.1165" euler="-1.57 0 0" type="cylinder" size="0.038 0.025" solref="-10000 -100"/>
|
||||||
<geom name="cup_base_contact" pos="0 0.00215 0.1165" euler="-1.57 0 0" type="cylinder" size="0.03 0.0005" solref="-10000 -100" rgba="0 0 255 1"/>
|
<geom name="cup_base_contact" pos="0 -0.005 0.1165" euler="-1.57 0 0" type="cylinder" size="0.03 0.0005" solref="-10000 -100" rgba="0 0 255 1"/>
|
||||||
<!-- <geom name="cup_geom11" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup11" />-->
|
|
||||||
<!-- <geom name="cup_geom12" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup12" />-->
|
|
||||||
<!-- <geom name="cup_geom13" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup13" />-->
|
|
||||||
<!-- <geom name="cup_geom14" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup14" />-->
|
|
||||||
|
|
||||||
<geom priority="1" name="cup_geom15" pos="0 0.015 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup15" />
|
<geom priority="1" name="cup_geom15" pos="0 0.015 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup15" />
|
||||||
<geom priority="1" name="cup_geom16" pos="0 0.015 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup16" />
|
<geom priority="1" name="cup_geom16" pos="0 0.015 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup16" />
|
||||||
@ -157,37 +153,44 @@
|
|||||||
</body>
|
</body>
|
||||||
</body>
|
</body>
|
||||||
</body>
|
</body>
|
||||||
<!-- <geom name="table" type="box" size="0.4 0.6 0.3" pos="0 -1.75 0.3025" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"-->
|
<body name="table_body" pos="0 -1.85 0.4025">
|
||||||
<geom name="table" type="box" size="0.4 0.6 0.4" pos="0 -1.85 0.4025" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"
|
<geom name="table" type="box" size="0.4 0.6 0.4" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"
|
||||||
solref="-10000 -100"/>
|
solref="-10000 -100"/>
|
||||||
|
<geom name="table_contact_geom" type="box" size="0.4 0.6 0.01" pos="0 0 0.41" rgba="1.4 0.8 0.45 1" solimp="0.999 0.999 0.001"
|
||||||
|
solref="-10000 -100"/>
|
||||||
|
</body>
|
||||||
<geom name="table_robot" type="box" size="0.1 0.1 0.3" pos="0 0.00 0.3025" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"
|
<geom name="table_robot" type="box" size="0.1 0.1 0.3" pos="0 0.00 0.3025" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"
|
||||||
solref="-10000 -100"/>
|
solref="-10000 -100"/>
|
||||||
<geom name="wall" type="box" quat="1 0 0 0" size="0.4 0.04 1.1" pos="0. -2.45 1.1" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"
|
<geom name="wall" type="box" quat="1 0 0 0" size="0.4 0.04 1.1" pos="0. -2.45 1.1" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"
|
||||||
solref="-10000 -100"/>
|
solref="-10000 -100"/>
|
||||||
<!--initial-->
|
<!-- <geom name="side_wall_left" type="box" quat="1 0 0 0" size="0.04 0.4 0.5" pos="0.45 -2.05 1.1" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"-->
|
||||||
<!-- <body name="cup_table" pos="0 -2 0.840" quat="0.7071068 0.7071068 0 0">-->
|
<!-- solref="-10000 -100"/>-->
|
||||||
<!--first diff-->
|
<!-- <geom name="side_wall_right" type="box" quat="1 0 0 0" size="0.04 0.4 0.5" pos="-0.45 -2.05 1.1" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"-->
|
||||||
<!-- <body name="cup_table" pos="0.1 -1.5 0.840" quat="0.7071068 0.7071068 0 0">-->
|
<!-- solref="-10000 -100"/>-->
|
||||||
<!-- second diff-->
|
|
||||||
<!-- <body name="cup_table" pos="-0.3 -2.1 0.840" quat="0.7071068 0.7071068 0 0">-->
|
<!-- <body name="cup_table" pos="0.32 -1.55 0.84" quat="0.7071068 0.7071068 0 0">-->
|
||||||
<body name="cup_table" pos="0.32 -1.2 0.84" quat="0.7071068 0.7071068 0 0">
|
<!-- <geom priority= "1" type="box" size ="0.1 0.1 0.1" name="cup_base_table" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -10" mass="10"/>-->
|
||||||
<inertial pos="-3.75236e-10 8.27811e-05 0.0947015" quat="0.999945 -0.0104888 0 0" mass="0.132" diaginertia="0.000285643 0.000270485 9.65696e-05" />
|
<!-- </body>-->
|
||||||
<geom priority= "1" name="cup_geom_table3" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup3_table" />
|
|
||||||
<geom priority= "1" name="cup_geom_table4" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup4_table" />
|
<body name="cup_table" pos="0.32 -1.55 0.84" quat="0.7071068 0.7071068 0 0">
|
||||||
<geom priority= "1" name="cup_geom_table5" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup5_table" />
|
<inertial pos="-3.75236e-10 8.27811e-05 0.0947015" quat="0.999945 -0.0104888 0 0" mass="10.132" diaginertia="0.000285643 0.000270485 9.65696e-05" />
|
||||||
<geom priority= "1" name="cup_geom_table6" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup6_table" />
|
<geom priority= "1" name="cup_geom_table3" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup3_table" mass="10"/>
|
||||||
<geom priority= "1" name="cup_geom_table7" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup7_table" />
|
<geom priority= "1" name="cup_geom_table4" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup4_table" mass="10"/>
|
||||||
<geom priority= "1" name="cup_geom_table8" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup8_table" />
|
<geom priority= "1" name="cup_geom_table5" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup5_table" mass="10"/>
|
||||||
<geom priority= "1" name="cup_geom_table9" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup9_table" />
|
<geom priority= "1" name="cup_geom_table6" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup6_table" mass="10"/>
|
||||||
<geom priority= "1" name="cup_geom_table10" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup10_table" />
|
<geom priority= "1" name="cup_geom_table7" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup7_table" mass="10"/>
|
||||||
<geom priority= "1" name="cup_base_table" pos="0 -0.035 0.1337249" euler="-1.57 0 0" type="cylinder" size="0.08 0.045" solref="-10000 -100"/>
|
<geom priority= "1" name="cup_geom_table8" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup8_table" mass="10"/>
|
||||||
<geom priority= "1" name="cup_base_table_contact" pos="0 0.015 0.1337249" euler="-1.57 0 0" type="cylinder" size="0.06 0.0005" solref="-10000 -100" rgba="0 0 255 1"/>
|
<geom priority= "1" name="cup_geom_table9" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup9_table" mass="10"/>
|
||||||
<geom priority= "1" name="cup_geom_table15" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup15_table" />
|
<geom priority= "1" name="cup_geom_table10" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -010" type="mesh" mesh="cup10_table" mass="10"/>
|
||||||
<geom priority= "1" name="cup_geom_table16" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup16_table" />
|
<geom priority= "1" name="cup_base_table" pos="0 -0.035 0.1337249" euler="-1.57 0 0" type="cylinder" size="0.08 0.045" solref="-10000 -100" mass="10"/>
|
||||||
<geom priority= "1" name="cup_geom_table17" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup17_table" />
|
<geom priority= "1" name="cup_base_table_contact" pos="0 0.015 0.1337249" euler="-1.57 0 0" type="cylinder" size="0.06 0.0005" solref="-10000 -100" rgba="0 0 255 1" mass="10"/>
|
||||||
<geom priority= "1" name="cup_geom1_table8" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup18_table" />
|
<geom priority= "1" name="cup_geom_table15" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup15_table" mass="10"/>
|
||||||
|
<geom priority= "1" name="cup_geom_table16" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup16_table" mass="10"/>
|
||||||
|
<geom priority= "1" name="cup_geom_table17" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup17_table" mass="10"/>
|
||||||
|
<geom priority= "1" name="cup_geom1_table8" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup18_table" mass="10"/>
|
||||||
<site name="cup_goal_table" pos="0 0.11 0.1337249" rgba="255 0 0 1"/>
|
<site name="cup_goal_table" pos="0 0.11 0.1337249" rgba="255 0 0 1"/>
|
||||||
<site name="cup_goal_final_table" pos="0.0 0.025 0.1337249" rgba="0 255 0 1"/>
|
<site name="cup_goal_final_table" pos="0.0 0.025 0.1337249" rgba="0 255 0 1"/>
|
||||||
|
<site name="bounce_table" pos="0.0 -0.015 -0.2 " rgba="255 255 0 1"/>
|
||||||
|
|
||||||
<!-- <site name="cup_goal_final_table" pos="0.0 -0.025 0.1337249" rgba="0 255 0 1"/>-->
|
<!-- <site name="cup_goal_final_table" pos="0.0 -0.025 0.1337249" rgba="0 255 0 1"/>-->
|
||||||
</body>
|
</body>
|
||||||
@ -213,14 +216,23 @@
|
|||||||
<!-- <position ctrlrange="-4.55 1.25" joint="wam/wrist_yaw_joint" kp="100"/>-->
|
<!-- <position ctrlrange="-4.55 1.25" joint="wam/wrist_yaw_joint" kp="100"/>-->
|
||||||
<!-- <position ctrlrange="-1.5707 1.5707" joint="wam/wrist_pitch_joint" kp="2000"/>-->
|
<!-- <position ctrlrange="-1.5707 1.5707" joint="wam/wrist_pitch_joint" kp="2000"/>-->
|
||||||
<!-- <position ctrlrange="-2.7 2.7" joint="wam/palm_yaw_joint" kp="100"/>-->
|
<!-- <position ctrlrange="-2.7 2.7" joint="wam/palm_yaw_joint" kp="100"/>-->
|
||||||
|
<!-- </actuator>-->
|
||||||
|
<!-- <actuator>-->
|
||||||
|
<!-- <motor ctrlrange="-150 150" joint="wam/base_yaw_joint"/>-->
|
||||||
|
<!-- <motor ctrlrange="-125 125" joint="wam/shoulder_pitch_joint"/>-->
|
||||||
|
<!-- <motor ctrlrange="-40 40" joint="wam/shoulder_yaw_joint"/>-->
|
||||||
|
<!-- <motor ctrlrange="-60 60" joint="wam/elbow_pitch_joint"/>-->
|
||||||
|
<!-- <motor ctrlrange="-5 5" joint="wam/wrist_yaw_joint"/>-->
|
||||||
|
<!-- <motor ctrlrange="-5 5" joint="wam/wrist_pitch_joint"/>-->
|
||||||
|
<!-- <motor ctrlrange="-2 2" joint="wam/palm_yaw_joint"/>-->
|
||||||
<!-- </actuator>-->
|
<!-- </actuator>-->
|
||||||
<actuator>
|
<actuator>
|
||||||
<motor ctrlrange="-150 150" joint="wam/base_yaw_joint"/>
|
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="150.0" joint="wam/base_yaw_joint"/>
|
||||||
<motor ctrlrange="-125 125" joint="wam/shoulder_pitch_joint"/>
|
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="125.0" joint="wam/shoulder_pitch_joint"/>
|
||||||
<motor ctrlrange="-40 40" joint="wam/shoulder_yaw_joint"/>
|
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="40.0" joint="wam/shoulder_yaw_joint"/>
|
||||||
<motor ctrlrange="-60 60" joint="wam/elbow_pitch_joint"/>
|
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="60.0" joint="wam/elbow_pitch_joint"/>
|
||||||
<motor ctrlrange="-5 5" joint="wam/wrist_yaw_joint"/>
|
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="5.0" joint="wam/wrist_yaw_joint"/>
|
||||||
<motor ctrlrange="-5 5" joint="wam/wrist_pitch_joint"/>
|
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="5.0" joint="wam/wrist_pitch_joint"/>
|
||||||
<motor ctrlrange="-2 2" joint="wam/palm_yaw_joint"/>
|
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="2.0" joint="wam/palm_yaw_joint"/>
|
||||||
</actuator>
|
</actuator>
|
||||||
</mujoco>
|
</mujoco>
|
||||||
|
@ -39,6 +39,8 @@ class ALRBeerpongEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
|
|||||||
reward_function = BeerpongReward
|
reward_function = BeerpongReward
|
||||||
self.reward_function = reward_function(self.sim, self.sim_steps)
|
self.reward_function = reward_function(self.sim, self.sim_steps)
|
||||||
self.cup_robot_id = self.sim.model._site_name2id["cup_robot_final"]
|
self.cup_robot_id = self.sim.model._site_name2id["cup_robot_final"]
|
||||||
|
self.ball_id = self.sim.model._body_name2id["ball"]
|
||||||
|
self.cup_table_id = self.sim.model._body_name2id["cup_table"]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def current_pos(self):
|
def current_pos(self):
|
||||||
@ -67,14 +69,9 @@ class ALRBeerpongEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
self.set_state(start_pos, init_vel)
|
self.set_state(start_pos, init_vel)
|
||||||
|
|
||||||
start_pos = init_pos_all
|
ball_pos = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05])
|
||||||
start_pos[0:7] = init_pos_robot
|
self.sim.model.body_pos[self.ball_id] = ball_pos.copy()
|
||||||
start_pos[7:] = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05])
|
self.sim.model.body_pos[self.cup_table_id] = self.context.copy()
|
||||||
|
|
||||||
self.set_state(start_pos, init_vel)
|
|
||||||
|
|
||||||
# ball_pos = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05])
|
|
||||||
# self.sim.model.body_pos[self.ball_id] = ball_pos.copy()
|
|
||||||
|
|
||||||
return self._get_obs()
|
return self._get_obs()
|
||||||
|
|
||||||
@ -121,14 +118,17 @@ class ALRBeerpongEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
env = ALRBeerpongEnv()
|
env = ALRBeerpongEnv()
|
||||||
ctxt = np.array([-0.20869846, -0.66376693, 1.18088501])
|
ctxt = np.array([0, -2, 0.840]) # initial
|
||||||
|
|
||||||
env.configure(ctxt)
|
env.configure(ctxt)
|
||||||
env.reset()
|
env.reset()
|
||||||
env.render()
|
env.render()
|
||||||
for i in range(16000):
|
for i in range(16000):
|
||||||
# test with random actions
|
# test with random actions
|
||||||
ac = 0.01 * env.action_space.sample()[0:7]
|
ac = 0.0 * env.action_space.sample()[0:7]
|
||||||
|
ac[1] = -0.8
|
||||||
|
ac[3] = - 0.3
|
||||||
|
ac[5] = -0.2
|
||||||
# ac = env.start_pos
|
# ac = env.start_pos
|
||||||
# ac[0] += np.pi/2
|
# ac[0] += np.pi/2
|
||||||
obs, rew, d, info = env.step(ac)
|
obs, rew, d, info = env.step(ac)
|
||||||
|
141
alr_envs/mujoco/beerpong/beerpong_reward_simple.py
Normal file
141
alr_envs/mujoco/beerpong/beerpong_reward_simple.py
Normal file
@ -0,0 +1,141 @@
|
|||||||
|
import numpy as np
|
||||||
|
from alr_envs.mujoco import alr_reward_fct
|
||||||
|
|
||||||
|
|
||||||
|
class BeerpongReward(alr_reward_fct.AlrReward):
|
||||||
|
def __init__(self, sim, sim_time):
|
||||||
|
|
||||||
|
self.sim = sim
|
||||||
|
self.sim_time = sim_time
|
||||||
|
|
||||||
|
self.collision_objects = ["cup_geom1", "cup_geom2", "wrist_palm_link_convex_geom",
|
||||||
|
"wrist_pitch_link_convex_decomposition_p1_geom",
|
||||||
|
"wrist_pitch_link_convex_decomposition_p2_geom",
|
||||||
|
"wrist_pitch_link_convex_decomposition_p3_geom",
|
||||||
|
"wrist_yaw_link_convex_decomposition_p1_geom",
|
||||||
|
"wrist_yaw_link_convex_decomposition_p2_geom",
|
||||||
|
"forearm_link_convex_decomposition_p1_geom",
|
||||||
|
"forearm_link_convex_decomposition_p2_geom"]
|
||||||
|
|
||||||
|
self.ball_id = None
|
||||||
|
self.ball_collision_id = None
|
||||||
|
self.goal_id = None
|
||||||
|
self.goal_final_id = None
|
||||||
|
self.collision_ids = None
|
||||||
|
|
||||||
|
self.ball_traj = None
|
||||||
|
self.dists = None
|
||||||
|
self.dists_ctxt = None
|
||||||
|
self.dists_final = None
|
||||||
|
self.costs = None
|
||||||
|
|
||||||
|
self.reset(None)
|
||||||
|
|
||||||
|
def reset(self, context):
|
||||||
|
self.ball_traj = np.zeros(shape=(self.sim_time, 3))
|
||||||
|
self.dists = []
|
||||||
|
self.dists_ctxt = []
|
||||||
|
self.dists_final = []
|
||||||
|
self.costs = []
|
||||||
|
self.action_costs = []
|
||||||
|
self.context = context
|
||||||
|
self.ball_in_cup = False
|
||||||
|
self.dist_ctxt = 5
|
||||||
|
self.bounce_dist = 2
|
||||||
|
self.min_dist = 2
|
||||||
|
self.dist_final = 2
|
||||||
|
self.table_contact = False
|
||||||
|
|
||||||
|
self.ball_id = self.sim.model._body_name2id["ball"]
|
||||||
|
self.ball_collision_id = self.sim.model._geom_name2id["ball_geom"]
|
||||||
|
self.cup_robot_id = self.sim.model._site_name2id["cup_robot_final"]
|
||||||
|
self.goal_id = self.sim.model._site_name2id["cup_goal_table"]
|
||||||
|
self.goal_final_id = self.sim.model._site_name2id["cup_goal_final_table"]
|
||||||
|
self.collision_ids = [self.sim.model._geom_name2id[name] for name in self.collision_objects]
|
||||||
|
self.cup_table_id = self.sim.model._body_name2id["cup_table"]
|
||||||
|
self.bounce_table_id = self.sim.model._site_name2id["bounce_table"]
|
||||||
|
|
||||||
|
def compute_reward(self, action, sim, step):
|
||||||
|
action_cost = np.sum(np.square(action))
|
||||||
|
self.action_costs.append(action_cost)
|
||||||
|
|
||||||
|
stop_sim = False
|
||||||
|
success = False
|
||||||
|
|
||||||
|
if self.check_collision(sim):
|
||||||
|
reward = - 1e-2 * action_cost - 10
|
||||||
|
stop_sim = True
|
||||||
|
return reward, success, stop_sim
|
||||||
|
|
||||||
|
# Compute the current distance from the ball to the inner part of the cup
|
||||||
|
goal_pos = sim.data.site_xpos[self.goal_id]
|
||||||
|
ball_pos = sim.data.body_xpos[self.ball_id]
|
||||||
|
bounce_pos = sim.data.site_xpos[self.bounce_table_id]
|
||||||
|
goal_final_pos = sim.data.site_xpos[self.goal_final_id]
|
||||||
|
self.dists.append(np.linalg.norm(goal_pos - ball_pos))
|
||||||
|
self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
|
||||||
|
self.ball_traj[step, :] = ball_pos
|
||||||
|
|
||||||
|
ball_in_cup = self.check_ball_in_cup(sim, self.ball_collision_id)
|
||||||
|
table_contact = self.check_ball_table_contact(sim, self.ball_collision_id)
|
||||||
|
|
||||||
|
if table_contact and not self.table_contact:
|
||||||
|
self.bounce_dist = np.minimum((np.linalg.norm(bounce_pos - ball_pos)), 2)
|
||||||
|
self.table_contact = True
|
||||||
|
|
||||||
|
if step == self.sim_time - 1:
|
||||||
|
min_dist = np.min(self.dists)
|
||||||
|
self.min_dist = min_dist
|
||||||
|
dist_final = self.dists_final[-1]
|
||||||
|
self.dist_final = dist_final
|
||||||
|
|
||||||
|
cost = 0.33 * min_dist + 0.33 * dist_final + 0.33 * self.bounce_dist
|
||||||
|
reward = np.exp(-2 * cost) - 1e-2 * action_cost
|
||||||
|
success = self.bounce_dist < 0.05 and dist_final < 0.05 and ball_in_cup
|
||||||
|
else:
|
||||||
|
reward = - 1e-2 * action_cost
|
||||||
|
success = False
|
||||||
|
|
||||||
|
return reward, success, stop_sim
|
||||||
|
|
||||||
|
def _get_stage_wise_cost(self, ball_in_cup, min_dist, dist_final, dist_to_ctxt):
|
||||||
|
if not ball_in_cup:
|
||||||
|
cost = 3 + 2*(0.5 * min_dist**2 + 0.5 * dist_final**2)
|
||||||
|
else:
|
||||||
|
cost = 2 * dist_to_ctxt ** 2
|
||||||
|
print('Context Distance:', dist_to_ctxt)
|
||||||
|
return cost
|
||||||
|
|
||||||
|
def check_ball_table_contact(self, sim, ball_collision_id):
|
||||||
|
table_collision_id = sim.model._geom_name2id["table_contact_geom"]
|
||||||
|
for coni in range(0, sim.data.ncon):
|
||||||
|
con = sim.data.contact[coni]
|
||||||
|
collision = con.geom1 == table_collision_id and con.geom2 == ball_collision_id
|
||||||
|
collision_trans = con.geom1 == ball_collision_id and con.geom2 == table_collision_id
|
||||||
|
|
||||||
|
if collision or collision_trans:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def check_ball_in_cup(self, sim, ball_collision_id):
|
||||||
|
cup_base_collision_id = sim.model._geom_name2id["cup_base_table_contact"]
|
||||||
|
for coni in range(0, sim.data.ncon):
|
||||||
|
con = sim.data.contact[coni]
|
||||||
|
|
||||||
|
collision = con.geom1 == cup_base_collision_id and con.geom2 == ball_collision_id
|
||||||
|
collision_trans = con.geom1 == ball_collision_id and con.geom2 == cup_base_collision_id
|
||||||
|
|
||||||
|
if collision or collision_trans:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def check_collision(self, sim):
|
||||||
|
for coni in range(0, sim.data.ncon):
|
||||||
|
con = sim.data.contact[coni]
|
||||||
|
|
||||||
|
collision = con.geom1 in self.collision_ids and con.geom2 == self.ball_collision_id
|
||||||
|
collision_trans = con.geom1 == self.ball_collision_id and con.geom2 in self.collision_ids
|
||||||
|
|
||||||
|
if collision or collision_trans:
|
||||||
|
return True
|
||||||
|
return False
|
164
alr_envs/mujoco/beerpong/beerpong_simple.py
Normal file
164
alr_envs/mujoco/beerpong/beerpong_simple.py
Normal file
@ -0,0 +1,164 @@
|
|||||||
|
from gym import utils
|
||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
from alr_envs.mujoco import alr_mujoco_env
|
||||||
|
|
||||||
|
|
||||||
|
class ALRBeerpongEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
|
||||||
|
def __init__(self, n_substeps=4, apply_gravity_comp=True, reward_function=None):
|
||||||
|
self._steps = 0
|
||||||
|
|
||||||
|
self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
|
||||||
|
"beerpong" + ".xml")
|
||||||
|
|
||||||
|
self.start_pos = np.array([0.0, 1.35, 0.0, 1.18, 0.0, -0.786, -1.59])
|
||||||
|
self.start_vel = np.zeros(7)
|
||||||
|
|
||||||
|
self._q_pos = []
|
||||||
|
self._q_vel = []
|
||||||
|
# self.weight_matrix_scale = 50
|
||||||
|
self.max_ctrl = np.array([150., 125., 40., 60., 5., 5., 2.])
|
||||||
|
self.p_gains = 1 / self.max_ctrl * np.array([200, 300, 100, 100, 10, 10, 2.5])
|
||||||
|
self.d_gains = 1 / self.max_ctrl * np.array([7, 15, 5, 2.5, 0.3, 0.3, 0.05])
|
||||||
|
|
||||||
|
self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7])
|
||||||
|
self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
|
||||||
|
|
||||||
|
self.context = None
|
||||||
|
|
||||||
|
utils.EzPickle.__init__(self)
|
||||||
|
alr_mujoco_env.AlrMujocoEnv.__init__(self,
|
||||||
|
self.xml_path,
|
||||||
|
apply_gravity_comp=apply_gravity_comp,
|
||||||
|
n_substeps=n_substeps)
|
||||||
|
|
||||||
|
self.sim_time = 8 # seconds
|
||||||
|
self.sim_steps = int(self.sim_time / self.dt)
|
||||||
|
if reward_function is None:
|
||||||
|
from alr_envs.mujoco.beerpong.beerpong_reward_simple import BeerpongReward
|
||||||
|
reward_function = BeerpongReward
|
||||||
|
self.reward_function = reward_function(self.sim, self.sim_steps)
|
||||||
|
self.cup_robot_id = self.sim.model._site_name2id["cup_robot_final"]
|
||||||
|
self.ball_id = self.sim.model._body_name2id["ball"]
|
||||||
|
self.cup_table_id = self.sim.model._body_name2id["cup_table"]
|
||||||
|
# self.bounce_table_id = self.sim.model._body_name2id["bounce_table"]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def current_pos(self):
|
||||||
|
return self.sim.data.qpos[0:7].copy()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def current_vel(self):
|
||||||
|
return self.sim.data.qvel[0:7].copy()
|
||||||
|
|
||||||
|
def configure(self, context):
|
||||||
|
if context is None:
|
||||||
|
context = np.array([0, -2, 0.840])
|
||||||
|
self.context = context
|
||||||
|
self.reward_function.reset(context)
|
||||||
|
|
||||||
|
def reset_model(self):
|
||||||
|
init_pos_all = self.init_qpos.copy()
|
||||||
|
init_pos_robot = self.start_pos
|
||||||
|
init_vel = np.zeros_like(init_pos_all)
|
||||||
|
|
||||||
|
self._steps = 0
|
||||||
|
self._q_pos = []
|
||||||
|
self._q_vel = []
|
||||||
|
|
||||||
|
start_pos = init_pos_all
|
||||||
|
start_pos[0:7] = init_pos_robot
|
||||||
|
# start_pos[7:] = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05])
|
||||||
|
|
||||||
|
self.set_state(start_pos, init_vel)
|
||||||
|
|
||||||
|
ball_pos = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05])
|
||||||
|
self.sim.model.body_pos[self.ball_id] = ball_pos.copy()
|
||||||
|
self.sim.model.body_pos[self.cup_table_id] = self.context.copy()
|
||||||
|
# self.sim.model.body_pos[self.bounce_table_id] = self.context.copy()
|
||||||
|
|
||||||
|
self.sim.forward()
|
||||||
|
|
||||||
|
return self._get_obs()
|
||||||
|
|
||||||
|
def step(self, a):
|
||||||
|
reward_dist = 0.0
|
||||||
|
angular_vel = 0.0
|
||||||
|
reward_ctrl = - np.square(a).sum()
|
||||||
|
action_cost = np.sum(np.square(a))
|
||||||
|
|
||||||
|
crash = self.do_simulation(a)
|
||||||
|
joint_cons_viol = self.check_traj_in_joint_limits()
|
||||||
|
|
||||||
|
self._q_pos.append(self.sim.data.qpos[0:7].ravel().copy())
|
||||||
|
self._q_vel.append(self.sim.data.qvel[0:7].ravel().copy())
|
||||||
|
|
||||||
|
ob = self._get_obs()
|
||||||
|
|
||||||
|
if not crash and not joint_cons_viol:
|
||||||
|
reward, success, stop_sim = self.reward_function.compute_reward(a, self.sim, self._steps)
|
||||||
|
done = success or self._steps == self.sim_steps - 1 or stop_sim
|
||||||
|
self._steps += 1
|
||||||
|
else:
|
||||||
|
reward = -10 - 1e-2 * action_cost
|
||||||
|
success = False
|
||||||
|
done = True
|
||||||
|
return ob, reward, done, dict(reward_dist=reward_dist,
|
||||||
|
reward_ctrl=reward_ctrl,
|
||||||
|
velocity=angular_vel,
|
||||||
|
traj=self._q_pos, is_success=success,
|
||||||
|
is_collided=crash or joint_cons_viol)
|
||||||
|
|
||||||
|
def check_traj_in_joint_limits(self):
|
||||||
|
return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)
|
||||||
|
|
||||||
|
def extend_des_pos(self, des_pos):
|
||||||
|
des_pos_full = self.start_pos.copy()
|
||||||
|
des_pos_full[1] = des_pos[0]
|
||||||
|
des_pos_full[3] = des_pos[1]
|
||||||
|
des_pos_full[5] = des_pos[2]
|
||||||
|
return des_pos_full
|
||||||
|
|
||||||
|
def extend_des_vel(self, des_vel):
|
||||||
|
des_vel_full = self.start_vel.copy()
|
||||||
|
des_vel_full[1] = des_vel[0]
|
||||||
|
des_vel_full[3] = des_vel[1]
|
||||||
|
des_vel_full[5] = des_vel[2]
|
||||||
|
return des_vel_full
|
||||||
|
|
||||||
|
def _get_obs(self):
|
||||||
|
theta = self.sim.data.qpos.flat[:7]
|
||||||
|
return np.concatenate([
|
||||||
|
np.cos(theta),
|
||||||
|
np.sin(theta),
|
||||||
|
# self.get_body_com("target"), # only return target to make problem harder
|
||||||
|
[self._steps],
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
env = ALRBeerpongEnv()
|
||||||
|
ctxt = np.array([0, -2, 0.840]) # initial
|
||||||
|
|
||||||
|
env.configure(ctxt)
|
||||||
|
env.reset()
|
||||||
|
env.render()
|
||||||
|
for i in range(16000):
|
||||||
|
# test with random actions
|
||||||
|
ac = 0.0 * env.action_space.sample()[0:7]
|
||||||
|
ac[1] = -0.01
|
||||||
|
ac[3] = - 0.01
|
||||||
|
ac[5] = -0.01
|
||||||
|
# ac = env.start_pos
|
||||||
|
# ac[0] += np.pi/2
|
||||||
|
obs, rew, d, info = env.step(ac)
|
||||||
|
env.render()
|
||||||
|
|
||||||
|
print(rew)
|
||||||
|
|
||||||
|
if d:
|
||||||
|
break
|
||||||
|
|
||||||
|
env.close()
|
||||||
|
|
105
alr_envs/mujoco/beerpong/utils.py
Normal file
105
alr_envs/mujoco/beerpong/utils.py
Normal file
@ -0,0 +1,105 @@
|
|||||||
|
from alr_envs.utils.detpmp_env_wrapper import DetPMPEnvWrapper
|
||||||
|
from alr_envs.mujoco.beerpong.beerpong import ALRBeerpongEnv
|
||||||
|
from alr_envs.mujoco.beerpong.beerpong_simple import ALRBeerpongEnv as ALRBeerpongEnvSimple
|
||||||
|
|
||||||
|
|
||||||
|
def make_contextual_env(rank, seed=0):
|
||||||
|
"""
|
||||||
|
Utility function for multiprocessed env.
|
||||||
|
|
||||||
|
:param env_id: (str) the environment ID
|
||||||
|
:param num_env: (int) the number of environments you wish to have in subprocesses
|
||||||
|
:param seed: (int) the initial seed for RNG
|
||||||
|
:param rank: (int) index of the subprocess
|
||||||
|
:returns a function that generates an environment
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _init():
|
||||||
|
env = ALRBeerpongEnv()
|
||||||
|
|
||||||
|
env = DetPMPEnvWrapper(env,
|
||||||
|
num_dof=7,
|
||||||
|
num_basis=5,
|
||||||
|
width=0.005,
|
||||||
|
policy_type="motor",
|
||||||
|
start_pos=env.start_pos,
|
||||||
|
duration=3.5,
|
||||||
|
post_traj_time=4.5,
|
||||||
|
dt=env.dt,
|
||||||
|
weights_scale=0.5,
|
||||||
|
zero_start=True,
|
||||||
|
zero_goal=True
|
||||||
|
)
|
||||||
|
|
||||||
|
env.seed(seed + rank)
|
||||||
|
return env
|
||||||
|
|
||||||
|
return _init
|
||||||
|
|
||||||
|
|
||||||
|
def make_env(rank, seed=0):
|
||||||
|
"""
|
||||||
|
Utility function for multiprocessed env.
|
||||||
|
|
||||||
|
:param env_id: (str) the environment ID
|
||||||
|
:param num_env: (int) the number of environments you wish to have in subprocesses
|
||||||
|
:param seed: (int) the initial seed for RNG
|
||||||
|
:param rank: (int) index of the subprocess
|
||||||
|
:returns a function that generates an environment
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _init():
|
||||||
|
env = ALRBeerpongEnvSimple()
|
||||||
|
|
||||||
|
env = DetPMPEnvWrapper(env,
|
||||||
|
num_dof=7,
|
||||||
|
num_basis=5,
|
||||||
|
width=0.005,
|
||||||
|
policy_type="motor",
|
||||||
|
start_pos=env.start_pos,
|
||||||
|
duration=3.5,
|
||||||
|
post_traj_time=4.5,
|
||||||
|
dt=env.dt,
|
||||||
|
weights_scale=0.25,
|
||||||
|
zero_start=True,
|
||||||
|
zero_goal=True
|
||||||
|
)
|
||||||
|
|
||||||
|
env.seed(seed + rank)
|
||||||
|
return env
|
||||||
|
|
||||||
|
return _init
|
||||||
|
|
||||||
|
|
||||||
|
def make_simple_env(rank, seed=0):
|
||||||
|
"""
|
||||||
|
Utility function for multiprocessed env.
|
||||||
|
|
||||||
|
:param env_id: (str) the environment ID
|
||||||
|
:param num_env: (int) the number of environments you wish to have in subprocesses
|
||||||
|
:param seed: (int) the initial seed for RNG
|
||||||
|
:param rank: (int) index of the subprocess
|
||||||
|
:returns a function that generates an environment
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _init():
|
||||||
|
env = ALRBeerpongEnvSimple()
|
||||||
|
|
||||||
|
env = DetPMPEnvWrapper(env,
|
||||||
|
num_dof=3,
|
||||||
|
num_basis=5,
|
||||||
|
width=0.005,
|
||||||
|
policy_type="motor",
|
||||||
|
start_pos=env.start_pos[1::2],
|
||||||
|
duration=3.5,
|
||||||
|
post_traj_time=4.5,
|
||||||
|
dt=env.dt,
|
||||||
|
weights_scale=0.5,
|
||||||
|
zero_start=True,
|
||||||
|
zero_goal=True
|
||||||
|
)
|
||||||
|
|
||||||
|
env.seed(seed + rank)
|
||||||
|
return env
|
||||||
|
|
||||||
|
return _init
|
@ -19,7 +19,9 @@ class DmpEnvWrapper(gym.Wrapper):
|
|||||||
learn_goal=False,
|
learn_goal=False,
|
||||||
post_traj_time=0.,
|
post_traj_time=0.,
|
||||||
policy_type=None,
|
policy_type=None,
|
||||||
weights_scale=1.):
|
weights_scale=1.,
|
||||||
|
goal_scale=1.,
|
||||||
|
):
|
||||||
super(DmpEnvWrapper, self).__init__(env)
|
super(DmpEnvWrapper, self).__init__(env)
|
||||||
self.num_dof = num_dof
|
self.num_dof = num_dof
|
||||||
self.num_basis = num_basis
|
self.num_basis = num_basis
|
||||||
@ -52,6 +54,7 @@ class DmpEnvWrapper(gym.Wrapper):
|
|||||||
|
|
||||||
self.dmp.set_weights(dmp_weights, dmp_goal_pos)
|
self.dmp.set_weights(dmp_weights, dmp_goal_pos)
|
||||||
self.weights_scale = weights_scale
|
self.weights_scale = weights_scale
|
||||||
|
self.goal_scale = goal_scale
|
||||||
|
|
||||||
policy_class = get_policy_class(policy_type)
|
policy_class = get_policy_class(policy_type)
|
||||||
self.policy = policy_class(env)
|
self.policy = policy_class(env)
|
||||||
@ -78,10 +81,11 @@ class DmpEnvWrapper(gym.Wrapper):
|
|||||||
goal_pos = params[0, -self.num_dof:]
|
goal_pos = params[0, -self.num_dof:]
|
||||||
weight_matrix = np.reshape(params[:, :-self.num_dof], [self.num_basis, self.num_dof])
|
weight_matrix = np.reshape(params[:, :-self.num_dof], [self.num_basis, self.num_dof])
|
||||||
else:
|
else:
|
||||||
goal_pos = None
|
goal_pos = self.dmp.dmp_goal_pos.flatten()
|
||||||
|
assert goal_pos is not None
|
||||||
weight_matrix = np.reshape(params, [self.num_basis, self.num_dof])
|
weight_matrix = np.reshape(params, [self.num_basis, self.num_dof])
|
||||||
|
|
||||||
return goal_pos, weight_matrix * self.weights_scale
|
return goal_pos * self.goal_scale, weight_matrix * self.weights_scale
|
||||||
|
|
||||||
def rollout(self, params, context=None, render=False):
|
def rollout(self, params, context=None, render=False):
|
||||||
""" This function generates a trajectory based on a DMP and then does the usual loop over reset and step"""
|
""" This function generates a trajectory based on a DMP and then does the usual loop over reset and step"""
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
from alr_envs.classic_control.utils import make_viapointreacher_env
|
from alr_envs.classic_control.utils import make_viapointreacher_env
|
||||||
|
from alr_envs.classic_control.utils import make_holereacher_env, make_holereacher_fix_goal_env
|
||||||
from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv
|
from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@ -7,21 +8,20 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
n_samples = 1
|
n_samples = 1
|
||||||
n_cpus = 4
|
n_cpus = 4
|
||||||
dim = 25
|
dim = 30
|
||||||
|
|
||||||
# env = DmpAsyncVectorEnv([make_viapointreacher_env(i) for i in range(n_cpus)],
|
# env = DmpAsyncVectorEnv([make_viapointreacher_env(i) for i in range(n_cpus)],
|
||||||
# n_samples=n_samples)
|
# n_samples=n_samples)
|
||||||
|
|
||||||
test_env = make_viapointreacher_env(0)()
|
test_env = make_holereacher_env(0)()
|
||||||
|
|
||||||
# params = np.random.randn(n_samples, dim)
|
# params = np.random.randn(n_samples, dim)
|
||||||
params = np.array([ 217.54494933, -1.85169983, 24.08414447, 42.23816868,
|
params = np.array([[ 1.386102 , -3.29980525, 4.70402733, 1.3966668 , 0.73774902,
|
||||||
23.32071702, 7.60780651, -31.74777741, 265.50634253,
|
3.14676681, -4.98644416, 6.20303193, 1.30502127, -0.09330522,
|
||||||
463.43822562, 245.93948374, -272.64003621, -45.24999553,
|
7.62656797, -5.76893033, 3.4706711 , -0.6944142 , -3.33442788,
|
||||||
503.21185823, 809.17742517, 393.12387021, -196.54196471,
|
12.31421548, -0.72760271, -6.9090723 , 7.02903814, -8.7236836 ,
|
||||||
6.79327307, 374.82429078, 552.4119579 , 197.3963343 ,
|
1.4805914 , 0.53185824, -5.46626893, 0.69692163, 13.58472666,
|
||||||
243.87357056, -39.56041541, -616.93957463, -710.0772516 ,
|
0.77199316, 2.02906724, -3.0203244 , -1.00533159, -0.57417351]])
|
||||||
-414.21769789])
|
|
||||||
|
|
||||||
# params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])])
|
# params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])])
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user