updates in biac

This commit is contained in:
Maximilian Huettenrauch 2021-02-11 16:19:57 +01:00
parent 13a292f0e0
commit 708478c626
4 changed files with 162 additions and 10 deletions

View File

@ -2,27 +2,44 @@ from gym import utils
import os import os
import numpy as np import numpy as np
from alr_envs.mujoco import alr_mujoco_env from alr_envs.mujoco import alr_mujoco_env
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward_simple import BallInACupReward from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward import BallInACupReward
import mujoco_py import mujoco_py
class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle): class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
def __init__(self, ): def __init__(self, n_substeps=4, apply_gravity_comp=True, reward_function=None):
self._steps = 0 self._steps = 0
self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
"ball-in-a-cup_base" + ".xml") "biac_base" + ".xml")
self.sim_time = 8 # seconds
self.sim_steps = int(self.sim_time / (0.0005 * 4)) # circular dependency.. sim.dt <-> mujocoenv init <-> reward fct
self.reward_function = BallInACupReward(self.sim_steps)
self.start_pos = np.array([0.0, 0.58760536, 0.0, 1.36004913, 0.0, -0.32072943, -1.57]) self.start_pos = np.array([0.0, 0.58760536, 0.0, 1.36004913, 0.0, -0.32072943, -1.57])
self.start_vel = np.zeros(7)
self._q_pos = [] self._q_pos = []
self._q_vel = []
# self.weight_matrix_scale = 50
self.max_ctrl = np.array([150., 125., 40., 60., 5., 5., 2.])
self.p_gains = 1 / self.max_ctrl * np.array([200, 300, 100, 100, 10, 10, 2.5])
self.d_gains = 1 / self.max_ctrl * np.array([7, 15, 5, 2.5, 0.3, 0.3, 0.05])
self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7])
self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
self.context = None
utils.EzPickle.__init__(self) utils.EzPickle.__init__(self)
alr_mujoco_env.AlrMujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), "assets", "ball-in-a-cup_base.xml"), alr_mujoco_env.AlrMujocoEnv.__init__(self,
n_substeps=4) self.xml_path,
apply_gravity_comp=apply_gravity_comp,
n_substeps=n_substeps)
self.sim_time = 8 # seconds
self.sim_steps = int(self.sim_time / self.dt)
if reward_function is None:
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward import BallInACupReward
reward_function = BallInACupReward
self.reward_function = reward_function(self.sim_steps)
def configure(self, context): def configure(self, context):
self.context = context self.context = context

View File

@ -0,0 +1,95 @@
import numpy as np
from alr_envs.mujoco import alr_reward_fct
class BallInACupReward(alr_reward_fct.AlrReward):
def __init__(self, sim_time):
self.sim_time = sim_time
self.collision_objects = ["cup_geom1", "cup_geom2", "wrist_palm_link_convex_geom",
"wrist_pitch_link_convex_decomposition_p1_geom",
"wrist_pitch_link_convex_decomposition_p2_geom",
"wrist_pitch_link_convex_decomposition_p3_geom",
"wrist_yaw_link_convex_decomposition_p1_geom",
"wrist_yaw_link_convex_decomposition_p2_geom",
"forearm_link_convex_decomposition_p1_geom",
"forearm_link_convex_decomposition_p2_geom"]
self.ball_id = None
self.ball_collision_id = None
self.goal_id = None
self.goal_final_id = None
self.collision_ids = None
self.ball_traj = None
self.dists = None
self.dists_final = None
self.costs = None
self.reset(None)
def reset(self, context):
self.ball_traj = np.zeros(shape=(self.sim_time, 3))
self.dists = []
self.dists_final = []
self.costs = []
self.context = context
def compute_reward(self, action, sim, step, context=None):
self.ball_id = sim.model._body_name2id["ball"]
self.ball_collision_id = sim.model._geom_name2id["ball_geom"]
self.goal_id = sim.model._site_name2id["cup_goal"]
self.goal_final_id = sim.model._site_name2id["cup_goal_final"]
self.collision_ids = [sim.model._geom_name2id[name] for name in self.collision_objects]
ball_in_cup = self.check_ball_in_cup(sim, self.ball_collision_id)
# Compute the current distance from the ball to the inner part of the cup
goal_pos = sim.data.site_xpos[self.goal_id]
ball_pos = sim.data.body_xpos[self.ball_id]
goal_final_pos = sim.data.site_xpos[self.goal_final_id]
self.dists.append(np.linalg.norm(goal_pos - ball_pos))
self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
self.ball_traj[step, :] = ball_pos
action_cost = np.sum(np.square(action))
if self.check_collision(sim):
reward = - 1e-5 * action_cost - 1000
return reward, False, True
if step == self.sim_time - 1:
min_dist = np.min(self.dists)
dist_final = self.dists_final[-1]
cost = 0.5 * min_dist + 0.5 * dist_final
reward = np.exp(-2 * cost) - 1e-5 * action_cost
success = dist_final < 0.05 and ball_in_cup
else:
reward = - 1e-5 * action_cost
success = False
return reward, success, False
def check_ball_in_cup(self, sim, ball_collision_id):
cup_base_collision_id = sim.model._geom_name2id["cup_base_contact"]
for coni in range(0, sim.data.ncon):
con = sim.data.contact[coni]
collision = con.geom1 == cup_base_collision_id and con.geom2 == ball_collision_id
collision_trans = con.geom1 == ball_collision_id and con.geom2 == cup_base_collision_id
if collision or collision_trans:
return True
return False
def check_collision(self, sim):
for coni in range(0, sim.data.ncon):
con = sim.data.contact[coni]
collision = con.geom1 in self.collision_ids and con.geom2 == self.ball_collision_id
collision_trans = con.geom1 == self.ball_collision_id and con.geom2 in self.collision_ids
if collision or collision_trans:
return True
return False

View File

@ -93,7 +93,8 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
velocity=angular_vel, # reward_balance=reward_balance, velocity=angular_vel, # reward_balance=reward_balance,
# end_effector=self.get_body_com("fingertip").copy(), # end_effector=self.get_body_com("fingertip").copy(),
goal=self.goal if hasattr(self, "goal") else None, goal=self.goal if hasattr(self, "goal") else None,
traj=self._q_pos) traj=self._q_pos,
is_collided=crash or joint_cons_viol)
def check_traj_in_joint_limits(self): def check_traj_in_joint_limits(self):
return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min) return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)

View File

@ -0,0 +1,39 @@
from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapper
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_simple import ALRBallInACupEnv as ALRBallInACupEnvSimple
# TODO: add make_env for standard biac
def make_simple_env(rank, seed=0):
"""
Utility function for multiprocessed env.
:param env_id: (str) the environment ID
:param num_env: (int) the number of environments you wish to have in subprocesses
:param seed: (int) the initial seed for RNG
:param rank: (int) index of the subprocess
:returns a function that generates an environment
"""
def _init():
env = ALRBallInACupEnvSimple()
env = DmpEnvWrapper(env,
policy_type="motor",
start_pos=env.start_pos[1::2],
final_pos=env.start_pos[1::2],
num_dof=3,
num_basis=8,
duration=3.5,
alpha_phase=3,
post_traj_time=4.5,
dt=env.dt,
learn_goal=False,
weights_scale=50)
env.seed(seed + rank)
return env
return _init