updates in biac
This commit is contained in:
parent
13a292f0e0
commit
708478c626
@ -2,27 +2,44 @@ from gym import utils
|
||||
import os
|
||||
import numpy as np
|
||||
from alr_envs.mujoco import alr_mujoco_env
|
||||
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward_simple import BallInACupReward
|
||||
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward import BallInACupReward
|
||||
import mujoco_py
|
||||
|
||||
|
||||
class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
|
||||
def __init__(self, ):
|
||||
def __init__(self, n_substeps=4, apply_gravity_comp=True, reward_function=None):
|
||||
self._steps = 0
|
||||
|
||||
self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
|
||||
"ball-in-a-cup_base" + ".xml")
|
||||
|
||||
self.sim_time = 8 # seconds
|
||||
self.sim_steps = int(self.sim_time / (0.0005 * 4)) # circular dependency.. sim.dt <-> mujocoenv init <-> reward fct
|
||||
self.reward_function = BallInACupReward(self.sim_steps)
|
||||
"biac_base" + ".xml")
|
||||
|
||||
self.start_pos = np.array([0.0, 0.58760536, 0.0, 1.36004913, 0.0, -0.32072943, -1.57])
|
||||
self.start_vel = np.zeros(7)
|
||||
|
||||
self._q_pos = []
|
||||
self._q_vel = []
|
||||
# self.weight_matrix_scale = 50
|
||||
self.max_ctrl = np.array([150., 125., 40., 60., 5., 5., 2.])
|
||||
self.p_gains = 1 / self.max_ctrl * np.array([200, 300, 100, 100, 10, 10, 2.5])
|
||||
self.d_gains = 1 / self.max_ctrl * np.array([7, 15, 5, 2.5, 0.3, 0.3, 0.05])
|
||||
|
||||
self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7])
|
||||
self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
|
||||
|
||||
self.context = None
|
||||
|
||||
utils.EzPickle.__init__(self)
|
||||
alr_mujoco_env.AlrMujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), "assets", "ball-in-a-cup_base.xml"),
|
||||
n_substeps=4)
|
||||
alr_mujoco_env.AlrMujocoEnv.__init__(self,
|
||||
self.xml_path,
|
||||
apply_gravity_comp=apply_gravity_comp,
|
||||
n_substeps=n_substeps)
|
||||
|
||||
self.sim_time = 8 # seconds
|
||||
self.sim_steps = int(self.sim_time / self.dt)
|
||||
if reward_function is None:
|
||||
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward import BallInACupReward
|
||||
reward_function = BallInACupReward
|
||||
self.reward_function = reward_function(self.sim_steps)
|
||||
|
||||
def configure(self, context):
|
||||
self.context = context
|
||||
|
95
alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py
Normal file
95
alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py
Normal file
@ -0,0 +1,95 @@
|
||||
import numpy as np
|
||||
from alr_envs.mujoco import alr_reward_fct
|
||||
|
||||
|
||||
class BallInACupReward(alr_reward_fct.AlrReward):
|
||||
def __init__(self, sim_time):
|
||||
self.sim_time = sim_time
|
||||
|
||||
self.collision_objects = ["cup_geom1", "cup_geom2", "wrist_palm_link_convex_geom",
|
||||
"wrist_pitch_link_convex_decomposition_p1_geom",
|
||||
"wrist_pitch_link_convex_decomposition_p2_geom",
|
||||
"wrist_pitch_link_convex_decomposition_p3_geom",
|
||||
"wrist_yaw_link_convex_decomposition_p1_geom",
|
||||
"wrist_yaw_link_convex_decomposition_p2_geom",
|
||||
"forearm_link_convex_decomposition_p1_geom",
|
||||
"forearm_link_convex_decomposition_p2_geom"]
|
||||
|
||||
self.ball_id = None
|
||||
self.ball_collision_id = None
|
||||
self.goal_id = None
|
||||
self.goal_final_id = None
|
||||
self.collision_ids = None
|
||||
|
||||
self.ball_traj = None
|
||||
self.dists = None
|
||||
self.dists_final = None
|
||||
self.costs = None
|
||||
|
||||
self.reset(None)
|
||||
|
||||
def reset(self, context):
|
||||
self.ball_traj = np.zeros(shape=(self.sim_time, 3))
|
||||
self.dists = []
|
||||
self.dists_final = []
|
||||
self.costs = []
|
||||
self.context = context
|
||||
|
||||
def compute_reward(self, action, sim, step, context=None):
|
||||
self.ball_id = sim.model._body_name2id["ball"]
|
||||
self.ball_collision_id = sim.model._geom_name2id["ball_geom"]
|
||||
self.goal_id = sim.model._site_name2id["cup_goal"]
|
||||
self.goal_final_id = sim.model._site_name2id["cup_goal_final"]
|
||||
self.collision_ids = [sim.model._geom_name2id[name] for name in self.collision_objects]
|
||||
|
||||
ball_in_cup = self.check_ball_in_cup(sim, self.ball_collision_id)
|
||||
|
||||
# Compute the current distance from the ball to the inner part of the cup
|
||||
goal_pos = sim.data.site_xpos[self.goal_id]
|
||||
ball_pos = sim.data.body_xpos[self.ball_id]
|
||||
goal_final_pos = sim.data.site_xpos[self.goal_final_id]
|
||||
self.dists.append(np.linalg.norm(goal_pos - ball_pos))
|
||||
self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
|
||||
self.ball_traj[step, :] = ball_pos
|
||||
|
||||
action_cost = np.sum(np.square(action))
|
||||
|
||||
if self.check_collision(sim):
|
||||
reward = - 1e-5 * action_cost - 1000
|
||||
return reward, False, True
|
||||
|
||||
if step == self.sim_time - 1:
|
||||
min_dist = np.min(self.dists)
|
||||
dist_final = self.dists_final[-1]
|
||||
|
||||
cost = 0.5 * min_dist + 0.5 * dist_final
|
||||
reward = np.exp(-2 * cost) - 1e-5 * action_cost
|
||||
success = dist_final < 0.05 and ball_in_cup
|
||||
else:
|
||||
reward = - 1e-5 * action_cost
|
||||
success = False
|
||||
|
||||
return reward, success, False
|
||||
|
||||
def check_ball_in_cup(self, sim, ball_collision_id):
|
||||
cup_base_collision_id = sim.model._geom_name2id["cup_base_contact"]
|
||||
for coni in range(0, sim.data.ncon):
|
||||
con = sim.data.contact[coni]
|
||||
|
||||
collision = con.geom1 == cup_base_collision_id and con.geom2 == ball_collision_id
|
||||
collision_trans = con.geom1 == ball_collision_id and con.geom2 == cup_base_collision_id
|
||||
|
||||
if collision or collision_trans:
|
||||
return True
|
||||
return False
|
||||
|
||||
def check_collision(self, sim):
|
||||
for coni in range(0, sim.data.ncon):
|
||||
con = sim.data.contact[coni]
|
||||
|
||||
collision = con.geom1 in self.collision_ids and con.geom2 == self.ball_collision_id
|
||||
collision_trans = con.geom1 == self.ball_collision_id and con.geom2 in self.collision_ids
|
||||
|
||||
if collision or collision_trans:
|
||||
return True
|
||||
return False
|
@ -93,7 +93,8 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
|
||||
velocity=angular_vel, # reward_balance=reward_balance,
|
||||
# end_effector=self.get_body_com("fingertip").copy(),
|
||||
goal=self.goal if hasattr(self, "goal") else None,
|
||||
traj=self._q_pos)
|
||||
traj=self._q_pos,
|
||||
is_collided=crash or joint_cons_viol)
|
||||
|
||||
def check_traj_in_joint_limits(self):
|
||||
return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)
|
||||
|
39
alr_envs/mujoco/ball_in_a_cup/utils.py
Normal file
39
alr_envs/mujoco/ball_in_a_cup/utils.py
Normal file
@ -0,0 +1,39 @@
|
||||
from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapper
|
||||
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
|
||||
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_simple import ALRBallInACupEnv as ALRBallInACupEnvSimple
|
||||
|
||||
|
||||
# TODO: add make_env for standard biac
|
||||
|
||||
|
||||
def make_simple_env(rank, seed=0):
|
||||
"""
|
||||
Utility function for multiprocessed env.
|
||||
|
||||
:param env_id: (str) the environment ID
|
||||
:param num_env: (int) the number of environments you wish to have in subprocesses
|
||||
:param seed: (int) the initial seed for RNG
|
||||
:param rank: (int) index of the subprocess
|
||||
:returns a function that generates an environment
|
||||
"""
|
||||
|
||||
def _init():
|
||||
env = ALRBallInACupEnvSimple()
|
||||
|
||||
env = DmpEnvWrapper(env,
|
||||
policy_type="motor",
|
||||
start_pos=env.start_pos[1::2],
|
||||
final_pos=env.start_pos[1::2],
|
||||
num_dof=3,
|
||||
num_basis=8,
|
||||
duration=3.5,
|
||||
alpha_phase=3,
|
||||
post_traj_time=4.5,
|
||||
dt=env.dt,
|
||||
learn_goal=False,
|
||||
weights_scale=50)
|
||||
|
||||
env.seed(seed + rank)
|
||||
return env
|
||||
|
||||
return _init
|
Loading…
Reference in New Issue
Block a user