Merge branch 'master' into fix_metaworld_rendering

This commit is contained in:
Dominik Moritz Roth 2024-02-10 08:56:06 +01:00
commit 3be34048b2
16 changed files with 438 additions and 101 deletions

View File

@ -105,17 +105,16 @@ Regular step based environments added by Fancy Gym are added into the `fancy/` n
import gymnasium as gym import gymnasium as gym
import fancy_gym import fancy_gym
env = gym.make('fancy/Reacher5d-v0') env = gym.make('fancy/Reacher5d-v0', render_mode='human')
# or env = gym.make('metaworld/reach-v2') # fancy_gym allows access to all metaworld ML1 tasks via the metaworld/ NS # or env = gym.make('metaworld/reach-v2', render_mode='human') # fancy_gym allows access to all metaworld ML1 tasks via the metaworld/ NS
# or env = gym.make('dm_control/ball_in_cup-catch-v0') # or env = gym.make('dm_control/ball_in_cup-catch-v0', render_mode='human')
# or env = gym.make('Reacher-v2') # or env = gym.make('Reacher-v2', render_mode='human')
observation = env.reset(seed=1) observation = env.reset(seed=1)
env.render()
for i in range(1000): for i in range(1000):
action = env.action_space.sample() action = env.action_space.sample()
observation, reward, terminated, truncated, info = env.step(action) observation, reward, terminated, truncated, info = env.step(action)
if i % 5 == 0:
env.render()
if terminated or truncated: if terminated or truncated:
observation, info = env.reset() observation, info = env.reset()
@ -149,17 +148,14 @@ Just keep in mind, calling `step()` executes a full trajectory.
import gymnasium as gym import gymnasium as gym
import fancy_gym import fancy_gym
env = gym.make('fancy_ProMP/Reacher5d-v0') env = gym.make('fancy_ProMP/Reacher5d-v0', render_mode="human")
# or env = gym.make('metaworld_ProDMP/reach-v2') # or env = gym.make('metaworld_ProDMP/reach-v2', render_mode="human")
# or env = gym.make('dm_control_DMP/ball_in_cup-catch-v0') # or env = gym.make('dm_control_DMP/ball_in_cup-catch-v0', render_mode="human")
# or env = gym.make('gym_ProMP/Reacher-v2') # mp versions of envs added directly by gymnasium are in the gym_<MP-type> NS # or env = gym.make('gym_ProMP/Reacher-v2', render_mode="human") # mp versions of envs added directly by gymnasium are in the gym_<MP-type> NS
# render() can be called once in the beginning with all necessary arguments.
# To turn it of again just call render() without any arguments.
env.render(mode='human')
# This returns the context information, not the full state observation # This returns the context information, not the full state observation
observation, info = env.reset(seed=1) observation, info = env.reset(seed=1)
env.render()
for i in range(5): for i in range(5):
action = env.action_space.sample() action = env.action_space.sample()

View File

@ -291,7 +291,7 @@ register(
) )
# Air Hockey environments # Air Hockey environments
for env_mode in ["7dof-hit", "7dof-defend", "3dof-hit", "3dof-defend"]: for env_mode in ["7dof-hit", "7dof-defend", "3dof-hit", "3dof-defend", "7dof-hit-airhockit2023", "7dof-defend-airhockit2023"]:
register( register(
id=f'fancy/AirHockey-{env_mode}-v0', id=f'fancy/AirHockey-{env_mode}-v0',
entry_point='fancy_gym.envs.mujoco:AirHockeyEnv', entry_point='fancy_gym.envs.mujoco:AirHockeyEnv',

View File

@ -8,9 +8,9 @@ from fancy_gym.envs.mujoco.air_hockey.utils import robot_to_world
from mushroom_rl.core import Environment from mushroom_rl.core import Environment
class AirHockeyEnv(Environment): class AirHockeyEnv(Environment):
metadata = {"render_modes": ["human"], "render_fps": 50} metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 50}
def __init__(self, env_mode=None, interpolation_order=3, render_mode=None, **kwargs): def __init__(self, env_mode=None, interpolation_order=3, render_mode=None, width=1920, height=1080, **kwargs):
""" """
Environment Constructor Environment Constructor
@ -30,7 +30,10 @@ class AirHockeyEnv(Environment):
"7dof-defend": position.IiwaPositionDefend, "7dof-defend": position.IiwaPositionDefend,
"3dof-hit": position.PlanarPositionHit, "3dof-hit": position.PlanarPositionHit,
"3dof-defend": position.PlanarPositionDefend "3dof-defend": position.PlanarPositionDefend,
"7dof-hit-airhockit2023": position.IiwaPositionHitAirhocKIT2023,
"7dof-defend-airhockit2023": position.IiwaPositionDefendAirhocKIT2023,
} }
if env_mode not in env_dict: if env_mode not in env_dict:
@ -39,37 +42,53 @@ class AirHockeyEnv(Environment):
if env_mode == "tournament" and type(interpolation_order) != tuple: if env_mode == "tournament" and type(interpolation_order) != tuple:
interpolation_order = (interpolation_order, interpolation_order) interpolation_order = (interpolation_order, interpolation_order)
self.render_mode = render_mode
self.render_human_active = False
# Determine headless mode based on render_mode
headless = self.render_mode == 'rgb_array'
# Prepare viewer_params
viewer_params = kwargs.get('viewer_params', {})
viewer_params.update({'headless': headless, 'width': width, 'height': height})
kwargs['viewer_params'] = viewer_params
self.base_env = env_dict[env_mode](interpolation_order=interpolation_order, **kwargs) self.base_env = env_dict[env_mode](interpolation_order=interpolation_order, **kwargs)
self.env_name = env_mode self.env_name = env_mode
self.env_info = self.base_env.env_info self.env_info = self.base_env.env_info
single_robot_obs_size = len(self.base_env.info.observation_space.low)
if env_mode == "tournament":
self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(2,single_robot_obs_size), dtype=np.float64)
else:
self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(single_robot_obs_size,), dtype=np.float64)
robot_info = self.env_info["robot"]
if env_mode != "tournament": if hasattr(self.base_env, "wrapper_obs_space") and hasattr(self.base_env, "wrapper_act_space"):
if interpolation_order in [1, 2]: self.observation_space = self.base_env.wrapper_obs_space
self.action_space = spaces.Box(low=robot_info["joint_pos_limit"][0], high=robot_info["joint_pos_limit"][1]) self.action_space = self.base_env.wrapper_act_space
if interpolation_order in [3, 4, -1]:
self.action_space = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0]]),
high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1]]))
if interpolation_order in [5]:
self.action_space = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0], robot_info["joint_acc_limit"][0]]),
high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1], robot_info["joint_acc_limit"][1]]))
else: else:
acts = [None, None] single_robot_obs_size = len(self.base_env.info.observation_space.low)
for i in range(2): if env_mode == "tournament":
if interpolation_order[i] in [1, 2]: self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(2,single_robot_obs_size), dtype=np.float64)
acts[i] = spaces.Box(low=robot_info["joint_pos_limit"][0], high=robot_info["joint_pos_limit"][1]) else:
if interpolation_order[i] in [3, 4, -1]: self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(single_robot_obs_size,), dtype=np.float64)
acts[i] = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0]]), robot_info = self.env_info["robot"]
high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1]]))
if interpolation_order[i] in [5]: if env_mode != "tournament":
acts[i] = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0], robot_info["joint_acc_limit"][0]]), if interpolation_order in [1, 2]:
high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1], robot_info["joint_acc_limit"][1]])) self.action_space = spaces.Box(low=robot_info["joint_pos_limit"][0], high=robot_info["joint_pos_limit"][1])
self.action_space = spaces.Tuple((acts[0], acts[1])) if interpolation_order in [3, 4, -1]:
self.action_space = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0]]),
high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1]]))
if interpolation_order in [5]:
self.action_space = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0], robot_info["joint_acc_limit"][0]]),
high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1], robot_info["joint_acc_limit"][1]]))
else:
acts = [None, None]
for i in range(2):
if interpolation_order[i] in [1, 2]:
acts[i] = spaces.Box(low=robot_info["joint_pos_limit"][0], high=robot_info["joint_pos_limit"][1])
if interpolation_order[i] in [3, 4, -1]:
acts[i] = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0]]),
high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1]]))
if interpolation_order[i] in [5]:
acts[i] = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0], robot_info["joint_acc_limit"][0]]),
high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1], robot_info["joint_acc_limit"][1]]))
self.action_space = spaces.Tuple((acts[0], acts[1]))
constraint_list = constraints.ConstraintList() constraint_list = constraints.ConstraintList()
constraint_list.add(constraints.JointPositionConstraint(self.env_info)) constraint_list.add(constraints.JointPositionConstraint(self.env_info))
@ -81,9 +100,6 @@ class AirHockeyEnv(Environment):
self.env_info['constraints'] = constraint_list self.env_info['constraints'] = constraint_list
self.env_info['env_name'] = self.env_name self.env_info['env_name'] = self.env_name
self.render_mode = render_mode
self.render_human_active = False
super().__init__(self.base_env.info) super().__init__(self.base_env.info)
def step(self, action): def step(self, action):
@ -111,15 +127,21 @@ class AirHockeyEnv(Environment):
if self.env_info['env_name'] == "tournament": if self.env_info['env_name'] == "tournament":
obs = np.array(np.split(obs, 2)) obs = np.array(np.split(obs, 2))
if self.render_human_active: if self.render_human_active:
self.base_env.render() self.base_env.render()
return obs, reward, done, False, info return obs, reward, done, False, info
def render(self): def render(self):
self.render_human_active = True if self.render_mode == 'rgb_array':
return self.base_env.render(record = True)
elif self.render_mode == 'human':
self.render_human_active = True
self.base_env.render()
else:
raise ValueError(f"Unsupported render mode: '{self.render_mode}'")
def reset(self, seed=None, options={}): def reset(self, seed=None, options={}):
self.base_env.seed(seed) self.base_env.seed(seed)
obs = self.base_env.reset() obs = self.base_env.reset()
@ -177,4 +199,4 @@ if __name__ == "__main__":
J = 0. J = 0.
gamma = 1. gamma = 1.
steps = 0 steps = 0
env.reset() env.reset()

View File

@ -261,10 +261,14 @@ class PlanarPositionDefend(PositionControlPlanar, three_dof.AirHockeyDefend):
class IiwaPositionHit(PositionControlIIWA, seven_dof.AirHockeyHit): class IiwaPositionHit(PositionControlIIWA, seven_dof.AirHockeyHit):
pass pass
class IiwaPositionHitAirhocKIT2023(PositionControlIIWA, seven_dof.AirHockeyHitAirhocKIT2023):
pass
class IiwaPositionDefend(PositionControlIIWA, seven_dof.AirHockeyDefend): class IiwaPositionDefend(PositionControlIIWA, seven_dof.AirHockeyDefend):
pass pass
class IiwaPositionDefendAirhocKIT2023(PositionControlIIWA, seven_dof.AirHockeyDefendAirhocKIT2023):
pass
class IiwaPositionTournament(PositionControlIIWA, seven_dof.AirHockeyTournament): class IiwaPositionTournament(PositionControlIIWA, seven_dof.AirHockeyTournament):
pass pass

View File

@ -1,4 +1,4 @@
from .env_base import AirHockeyBase from .env_base import AirHockeyBase
from .tournament import AirHockeyTournament from .tournament import AirHockeyTournament
from .hit import AirHockeyHit from .hit import AirHockeyHit, AirHockeyHitAirhocKIT2023
from .defend import AirHockeyDefend from .defend import AirHockeyDefend, AirHockeyDefendAirhocKIT2023

View File

@ -0,0 +1,114 @@
import numpy as np
from gymnasium import spaces
from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingle
from fancy_gym.envs.mujoco.air_hockey.utils import inverse_kinematics, forward_kinematics, jacobian
class AirhocKIT2023BaseEnv(AirHockeySingle):
def __init__(self, noise=False, **kwargs):
super().__init__(**kwargs)
obs_low = np.hstack([[-np.inf] * 37])
obs_high = np.hstack([[np.inf] * 37])
self.wrapper_obs_space = spaces.Box(low=obs_low, high=obs_high, dtype=np.float64)
self.wrapper_act_space = spaces.Box(low=np.repeat(-100., 6), high=np.repeat(100., 6))
self.noise = noise
# We don't need puck yaw observations
def filter_obs(self, obs):
obs = np.hstack([obs[0:2], obs[3:5], obs[6:12], obs[13:19], obs[20:]])
return obs
def add_noise(self, obs):
if not self.noise:
return
obs[self.env_info["puck_pos_ids"]] += np.random.normal(0, 0.001, 3)
obs[self.env_info["puck_vel_ids"]] += np.random.normal(0, 0.1, 3)
def reset(self):
self.last_acceleration = np.repeat(0., 6)
obs = super().reset()
self.add_noise(obs)
self.interp_pos = obs[self.env_info["joint_pos_ids"]][:-1]
self.interp_vel = obs[self.env_info["joint_vel_ids"]][:-1]
self.last_planned_world_pos = self._fk(self.interp_pos)
obs = np.hstack([
obs, self.interp_pos, self.interp_vel, self.last_acceleration, self.last_planned_world_pos
])
return self.filter_obs(obs)
def step(self, action):
action /= 10
new_vel = self.interp_vel + action
jerk = 2 * (new_vel - self.interp_vel - self.last_acceleration * 0.02) / (0.02 ** 2)
new_pos = self.interp_pos + self.interp_vel * 0.02 + (1/2) * self.last_acceleration * (0.02 ** 2) + (1/6) * jerk * (0.02 ** 3)
abs_action = np.vstack([np.hstack([new_pos, 0]), np.hstack([new_vel, 0])])
self.interp_pos = new_pos
self.interp_vel = new_vel
self.last_acceleration += jerk * 0.02
obs, rew, done, info = super().step(abs_action)
self.add_noise(obs)
self.last_planned_world_pos = self._fk(self.interp_pos)
obs = np.hstack([
obs, self.interp_pos, self.interp_vel, self.last_acceleration, self.last_planned_world_pos
])
fatal_rew = self.check_fatal(obs)
if fatal_rew != 0:
return self.filter_obs(obs), fatal_rew, True, info
return self.filter_obs(obs), rew, done, info
def check_constraints(self, constraint_values):
fatal_rew = 0
j_pos_constr = constraint_values["joint_pos_constr"]
if j_pos_constr.max() > 0:
fatal_rew += j_pos_constr.max()
j_vel_constr = constraint_values["joint_vel_constr"]
if j_vel_constr.max() > 0:
fatal_rew += j_vel_constr.max()
ee_constr = constraint_values["ee_constr"]
if ee_constr.max() > 0:
fatal_rew += ee_constr.max()
link_constr = constraint_values["link_constr"]
if link_constr.max() > 0:
fatal_rew += link_constr.max()
return -fatal_rew
def check_fatal(self, obs):
fatal_rew = 0
q = obs[self.env_info["joint_pos_ids"]]
qd = obs[self.env_info["joint_vel_ids"]]
constraint_values_obs = self.env_info["constraints"].fun(q, qd)
fatal_rew += self.check_constraints(constraint_values_obs)
return -fatal_rew
def _fk(self, pos):
res, _ = forward_kinematics(self.env_info["robot"]["robot_model"],
self.env_info["robot"]["robot_data"], pos)
return res.astype(np.float32)
def _ik(self, world_pos, init_q=None):
success, pos = inverse_kinematics(self.env_info["robot"]["robot_model"],
self.env_info["robot"]["robot_data"],
world_pos,
initial_q=init_q)
pos = pos.astype(np.float32)
assert success
return pos
def _jacobian(self, pos):
return jacobian(self.env_info["robot"]["robot_model"],
self.env_info["robot"]["robot_data"],
pos).astype(np.float32)

View File

@ -1,6 +1,7 @@
import numpy as np import numpy as np
from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingle from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingle
from fancy_gym.envs.mujoco.air_hockey.seven_dof.airhockit_base_env import AirhocKIT2023BaseEnv
class AirHockeyDefend(AirHockeySingle): class AirHockeyDefend(AirHockeySingle):
@ -10,9 +11,7 @@ class AirHockeyDefend(AirHockeySingle):
""" """
def __init__(self, gamma=0.99, horizon=500, viewer_params={}): def __init__(self, gamma=0.99, horizon=500, viewer_params={}):
self.init_velocity_range = (1, 3) self.init_velocity_range = (1, 3)
self.start_range = np.array([[0.29, 0.65], [-0.4, 0.4]]) # Table Frame self.start_range = np.array([[0.29, 0.65], [-0.4, 0.4]]) # Table Frame
self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]]) # Robot Frame
super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params) super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
def setup(self, obs): def setup(self, obs):
@ -32,7 +31,7 @@ class AirHockeyDefend(AirHockeySingle):
self._write_data("puck_y_vel", puck_vel[1]) self._write_data("puck_y_vel", puck_vel[1])
self._write_data("puck_yaw_vel", puck_vel[2]) self._write_data("puck_yaw_vel", puck_vel[2])
super(AirHockeyDefend, self).setup(obs) super().setup(obs)
def reward(self, state, action, next_state, absorbing): def reward(self, state, action, next_state, absorbing):
return 0 return 0
@ -46,6 +45,98 @@ class AirHockeyDefend(AirHockeySingle):
return True return True
return super().is_absorbing(state) return super().is_absorbing(state)
class AirHockeyDefendAirhocKIT2023(AirhocKIT2023BaseEnv):
def __init__(self, gamma=0.99, horizon=200, viewer_params={}, **kwargs):
super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params, **kwargs)
self.init_velocity_range = (1, 3)
self.start_range = np.array([[0.4, 0.75], [-0.4, 0.4]]) # Table Frame
self._setup_metrics()
def setup(self, obs):
self._setup_metrics()
puck_pos = np.random.rand(2) * (self.start_range[:, 1] - self.start_range[:, 0]) + self.start_range[:, 0]
lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1])
angle = np.random.uniform(-0.5, 0.5)
puck_vel = np.zeros(3)
puck_vel[0] = -np.cos(angle) * lin_vel
puck_vel[1] = np.sin(angle) * lin_vel
puck_vel[2] = np.random.uniform(-10, 10)
self._write_data("puck_x_pos", puck_pos[0])
self._write_data("puck_y_pos", puck_pos[1])
self._write_data("puck_x_vel", puck_vel[0])
self._write_data("puck_y_vel", puck_vel[1])
self._write_data("puck_yaw_vel", puck_vel[2])
super().setup(obs)
def reset(self, *args):
obs = super().reset()
self.hit_step_flag = False
self.hit_step = False
self.received_hit_reward = False
self.give_reward_next = False
return obs
def _setup_metrics(self):
self.episode_steps = 0
self.has_hit = False
def _simulation_post_step(self):
if not self.has_hit:
self.has_hit = self._check_collision("puck", "robot_1/ee")
super()._simulation_post_step()
def _step_finalize(self):
self.episode_steps += 1
return super()._step_finalize()
def reward(self, state, action, next_state, absorbing):
puck_pos, puck_vel = self.get_puck(next_state)
ee_pos, _ = self.get_ee()
rew = 0.01
if -0.7 < puck_pos[0] <= -0.2 and np.linalg.norm(puck_vel[:2]) < 0.1:
assert absorbing
rew += 70
if self.has_hit and not self.hit_step_flag:
self.hit_step_flag = True
self.hit_step = True
else:
self.hit_step = False
f = lambda puck_vel: 30 + 100 * (100 ** (-0.25 * np.linalg.norm(puck_vel[:2])))
if not self.give_reward_next and not self.received_hit_reward and self.hit_step and ee_pos[0] < puck_pos[0]:
self.hit_this_step = True
if np.linalg.norm(puck_vel[:2]) < 0.1:
return rew + f(puck_vel)
self.give_reward_next = True
return rew
if not self.received_hit_reward and self.give_reward_next:
self.received_hit_reward = True
if puck_vel[0] >= -0.2:
return rew + f(puck_vel)
return rew
else:
return rew
def is_absorbing(self, obs):
puck_pos, puck_vel = self.get_puck(obs)
# If puck is over the middle line and moving towards opponent
if puck_pos[0] > 0 and puck_vel[0] > 0:
return True
if self.episode_steps == self._mdp_info.horizon:
return True
if np.linalg.norm(puck_vel[:2]) < 0.1:
return True
return super().is_absorbing(obs)
if __name__ == '__main__': if __name__ == '__main__':
env = AirHockeyDefend() env = AirHockeyDefend()

View File

@ -1,6 +1,7 @@
import numpy as np import numpy as np
from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingle from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingle
from fancy_gym.envs.mujoco.air_hockey.seven_dof.airhockit_base_env import AirhocKIT2023BaseEnv
class AirHockeyHit(AirHockeySingle): class AirHockeyHit(AirHockeySingle):
@ -14,9 +15,6 @@ class AirHockeyHit(AirHockeySingle):
opponent_agent(Agent, None): Agent which controls the opponent opponent_agent(Agent, None): Agent which controls the opponent
moving_init(bool, False): If true, initialize the puck with inital velocity. moving_init(bool, False): If true, initialize the puck with inital velocity.
""" """
self.hit_range = np.array([[-0.65, -0.25], [-0.4, 0.4]]) # Table Frame
self.init_velocity_range = (0, 0.5) # Table Frame
super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params) super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
self.moving_init = moving_init self.moving_init = moving_init
@ -58,6 +56,93 @@ class AirHockeyHit(AirHockeySingle):
return True return True
return super(AirHockeyHit, self).is_absorbing(obs) return super(AirHockeyHit, self).is_absorbing(obs)
class AirHockeyHitAirhocKIT2023(AirhocKIT2023BaseEnv):
def __init__(self, gamma=0.99, horizon=500, moving_init=True, viewer_params={}, **kwargs):
super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params, **kwargs)
self.moving_init = moving_init
hit_width = self.env_info['table']['width'] / 2 - self.env_info['puck']['radius'] - \
self.env_info['mallet']['radius'] * 2
self.hit_range = np.array([[-0.7, -0.2], [-hit_width, hit_width]]) # Table Frame
self.init_velocity_range = (0, 0.5) # Table Frame
self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]]) # Robot Frame
self._setup_metrics()
def reset(self, *args):
obs = super().reset()
self.last_ee_pos = self.last_planned_world_pos.copy()
self.last_ee_pos[0] -= 1.51
return obs
def setup(self, obs):
self._setup_metrics()
puck_pos = np.random.rand(2) * (self.hit_range[:, 1] - self.hit_range[:, 0]) + self.hit_range[:, 0]
self._write_data("puck_x_pos", puck_pos[0])
self._write_data("puck_y_pos", puck_pos[1])
if self.moving_init:
lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1])
angle = np.random.uniform(-np.pi / 2 - 0.1, np.pi / 2 + 0.1)
puck_vel = np.zeros(3)
puck_vel[0] = -np.cos(angle) * lin_vel
puck_vel[1] = np.sin(angle) * lin_vel
puck_vel[2] = np.random.uniform(-2, 2)
self._write_data("puck_x_vel", puck_vel[0])
self._write_data("puck_y_vel", puck_vel[1])
self._write_data("puck_yaw_vel", puck_vel[2])
super().setup(obs)
def _setup_metrics(self):
self.episode_steps = 0
self.has_scored = False
def _step_finalize(self):
cur_obs = self._create_observation(self.obs_helper._build_obs(self._data))
puck_pos, _ = self.get_puck(cur_obs) # world frame [x, y, z] and [x', y', z']
if not self.has_scored:
boundary = np.array([self.env_info['table']['length'], self.env_info['table']['width']]) / 2
self.has_scored = np.any(np.abs(puck_pos[:2]) > boundary) and puck_pos[0] > 0
self.episode_steps += 1
return super()._step_finalize()
def reward(self, state, action, next_state, absorbing):
rew = 0
puck_pos, puck_vel = self.get_puck(next_state)
ee_pos, _ = self.get_ee()
ee_vel = (ee_pos - self.last_ee_pos) / 0.02
self.last_ee_pos = ee_pos
if puck_vel[0] < 0.25 and puck_pos[0] < 0:
ee_puck_dir = (puck_pos - ee_pos)[:2]
ee_puck_dir = ee_puck_dir / np.linalg.norm(ee_puck_dir)
rew += 1 * max(0, np.dot(ee_puck_dir, ee_vel[:2]))
else:
rew += 10 * np.linalg.norm(puck_vel[:2])
if self.has_scored:
rew += 2000 + 5000 * np.linalg.norm(puck_vel[:2])
return rew
def is_absorbing(self, obs):
puck_pos, puck_vel = self.get_puck(obs)
# Stop if the puck bounces back on the opponents wall
if puck_pos[0] > 0 and puck_vel[0] < 0:
return True
if self.has_scored:
return True
if self.episode_steps == self._mdp_info.horizon:
return True
return super().is_absorbing(obs)
if __name__ == '__main__': if __name__ == '__main__':
env = AirHockeyHit(moving_init=True) env = AirHockeyHit(moving_init=True)

View File

@ -0,0 +1 @@
# TODO

View File

@ -6,21 +6,21 @@ def example_run_replanning_env(env_name="fancy_ProDMP/BoxPushingDenseReplan-v0",
env = gym.make(env_name) env = gym.make(env_name)
env.reset(seed=seed) env.reset(seed=seed)
for i in range(iterations): for i in range(iterations):
done = False while True:
while done is False:
ac = env.action_space.sample() ac = env.action_space.sample()
obs, reward, terminated, truncated, info = env.step(ac) obs, reward, terminated, truncated, info = env.step(ac)
if render: if render:
env.render(mode="human") env.render(mode="human")
if terminated or truncated: if terminated or truncated:
env.reset() env.reset()
break
env.close() env.close()
del env del env
def example_custom_replanning_envs(seed=0, iteration=100, render=True): def example_custom_replanning_envs(seed=0, iteration=100, render=True):
# id for a step-based environment # id for a step-based environment
base_env_id = "BoxPushingDense-v0" base_env_id = "fancy/BoxPushingDense-v0"
wrappers = [fancy_gym.envs.mujoco.box_pushing.mp_wrapper.MPWrapper] wrappers = [fancy_gym.envs.mujoco.box_pushing.mp_wrapper.MPWrapper]
@ -38,7 +38,8 @@ def example_custom_replanning_envs(seed=0, iteration=100, render=True):
'replanning_schedule': lambda pos, vel, obs, action, t: t % 25 == 0, 'replanning_schedule': lambda pos, vel, obs, action, t: t % 25 == 0,
'condition_on_desired': True} 'condition_on_desired': True}
env = fancy_gym.make_bb(env_id=base_env_id, wrappers=wrappers, black_box_kwargs=black_box_kwargs, base_env = gym.make(base_env_id)
env = fancy_gym.make_bb(env=base_env, wrappers=wrappers, black_box_kwargs=black_box_kwargs,
traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs, traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs,
phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs, phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs,
seed=seed) seed=seed)
@ -56,10 +57,12 @@ def example_custom_replanning_envs(seed=0, iteration=100, render=True):
env.close() env.close()
del env del env
def main(render=False):
if __name__ == "__main__":
# run a registered replanning environment # run a registered replanning environment
example_run_replanning_env(env_name="fancy_ProDMP/BoxPushingDenseReplan-v0", seed=1, iterations=1, render=False) example_run_replanning_env(env_name="fancy_ProDMP/BoxPushingDenseReplan-v0", seed=1, iterations=1, render=render)
# run a custom replanning environment # run a custom replanning environment
example_custom_replanning_envs(seed=0, iteration=8, render=True) example_custom_replanning_envs(seed=0, iteration=8, render=render)
if __name__ == "__main__":
main()

View File

@ -84,7 +84,8 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
# basis_generator_kwargs = {'basis_generator_type': 'rbf', # basis_generator_kwargs = {'basis_generator_type': 'rbf',
# 'num_basis': 5 # 'num_basis': 5
# } # }
env = fancy_gym.make_bb(env_id=base_env_id, wrappers=wrappers, black_box_kwargs={}, base_env = gym.make(base_env_id)
env = fancy_gym.make_bb(env=base_env, wrappers=wrappers, black_box_kwargs={},
traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs, traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs,
phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs, phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs,
seed=seed) seed=seed)
@ -114,21 +115,13 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
env.close() env.close()
del env del env
def main(render = True):
if __name__ == '__main__':
# Disclaimer: DMC environments require the seed to be specified in the beginning.
# Adjusting it afterwards with env.seed() is not recommended as it does not affect the underlying physics.
# For rendering DMC
# export MUJOCO_GL="osmesa"
render = True
# # Standard DMC Suite tasks # # Standard DMC Suite tasks
example_dmc("dm_control/fish-swim", seed=10, iterations=1000, render=render) example_dmc("dm_control/fish-swim", seed=10, iterations=1000, render=render)
# #
# # Manipulation tasks # # Manipulation tasks
# # Disclaimer: The vision versions are currently not integrated and yield an error # # Disclaimer: The vision versions are currently not integrated and yield an error
example_dmc("dm_control/manipulation-reach_site_features", seed=10, iterations=250, render=render) example_dmc("dm_control/reach_site_features", seed=10, iterations=250, render=render)
# #
# # Gym + DMC hybrid task provided in the MP framework # # Gym + DMC hybrid task provided in the MP framework
example_dmc("dm_control_ProMP/ball_in_cup-catch-v0", seed=10, iterations=1, render=render) example_dmc("dm_control_ProMP/ball_in_cup-catch-v0", seed=10, iterations=1, render=render)
@ -136,3 +129,20 @@ if __name__ == '__main__':
# Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is # Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is
# already registered above # already registered above
example_custom_dmc_and_mp(seed=11, iterations=1, render=render) example_custom_dmc_and_mp(seed=11, iterations=1, render=render)
# # Standard DMC Suite tasks
example_dmc("dm_control/fish-swim", seed=10, iterations=1000, render=render)
#
# # Manipulation tasks
# # Disclaimer: The vision versions are currently not integrated and yield an error
example_dmc("dm_control/reach_site_features", seed=10, iterations=250, render=render)
#
# # Gym + DMC hybrid task provided in the MP framework
example_dmc("dm_control_ProMP/ball_in_cup-catch-v0", seed=10, iterations=1, render=render)
# Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is
# already registered above
example_custom_dmc_and_mp(seed=11, iterations=1, render=render)
if __name__ == '__main__':
main()

View File

@ -85,10 +85,7 @@ def example_async(env_id="fancy/HoleReacher-v0", n_cpu=4, seed=int('533D', 16),
# do not return values above threshold # do not return values above threshold
return *map(lambda v: np.stack(v)[:n_samples], buffer.values()), return *map(lambda v: np.stack(v)[:n_samples], buffer.values()),
def main(render = True):
if __name__ == '__main__':
render = True
# Basic gym task # Basic gym task
example_general("Pendulum-v1", seed=10, iterations=200, render=render) example_general("Pendulum-v1", seed=10, iterations=200, render=render)
@ -100,3 +97,6 @@ if __name__ == '__main__':
# Vectorized multiprocessing environments # Vectorized multiprocessing environments
# example_async(env_id="HoleReacher-v0", n_cpu=2, seed=int('533D', 16), n_samples=2 * 200) # example_async(env_id="HoleReacher-v0", n_cpu=2, seed=int('533D', 16), n_samples=2 * 200)
if __name__ == '__main__':
main()

View File

@ -35,7 +35,7 @@ def example_meta(env_id="fish-swim", seed=1, iterations=1000, render=True):
if terminated or truncated: if terminated or truncated:
print(env_id, rewards) print(env_id, rewards)
rewards = 0 rewards = 0
obs = env.reset() obs = env.reset(seed=seed+i+1)
env.close() env.close()
del env del env
@ -81,7 +81,8 @@ def example_custom_meta_and_mp(seed=1, iterations=1, render=True):
basis_generator_kwargs = {'basis_generator_type': 'rbf', basis_generator_kwargs = {'basis_generator_type': 'rbf',
'num_basis': 5 'num_basis': 5
} }
env = fancy_gym.make_bb(env_id=base_env_id, wrappers=wrappers, black_box_kwargs={}, base_env = gym.make(base_env_id)
env = fancy_gym.make_bb(env=base_env, wrappers=wrappers, black_box_kwargs={},
traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs, traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs,
phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs, phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs,
seed=seed) seed=seed)
@ -92,14 +93,10 @@ def example_custom_meta_and_mp(seed=1, iterations=1, render=True):
# It is also possible to change them mode multiple times when # It is also possible to change them mode multiple times when
# e.g. only every nth trajectory should be displayed. # e.g. only every nth trajectory should be displayed.
if render: if render:
raise ValueError("Metaworld render interface bug does not allow to render() fixes its interface. " env.render(mode="human")
"A temporary workaround is to alter their code in MujocoEnv render() from "
"`if not offscreen` to `if not offscreen or offscreen == 'human'`.")
# TODO: Remove this, when Metaworld fixes its interface.
# env.render(mode="human")
rewards = 0 rewards = 0
obs = env.reset() obs = env.reset(seed=seed)
# number of samples/full trajectories (multiple environment steps) # number of samples/full trajectories (multiple environment steps)
for i in range(iterations): for i in range(iterations):
@ -110,25 +107,23 @@ def example_custom_meta_and_mp(seed=1, iterations=1, render=True):
if terminated or truncated: if terminated or truncated:
print(base_env_id, rewards) print(base_env_id, rewards)
rewards = 0 rewards = 0
obs = env.reset() obs = env.reset(seed=seed+i+1)
env.close() env.close()
del env del env
def main(render = False):
if __name__ == '__main__':
# Disclaimer: MetaWorld environments require the seed to be specified in the beginning.
# Adjusting it afterwards with env.seed() is not recommended as it may not affect the underlying behavior.
# For rendering it might be necessary to specify your OpenGL installation # For rendering it might be necessary to specify your OpenGL installation
# export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so # export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so
render = False
# # Standard Meta world tasks # # Standard Meta world tasks
example_meta("metaworld/button-press-v2", seed=10, iterations=500, render=render) example_meta("metaworld/button-press-v2", seed=10, iterations=500, render=render)
# # MP + MetaWorld hybrid task provided in the our framework # # MP + MetaWorld hybrid task provided in the our framework
example_meta("metaworld_ProMP/ButtonPress-v2", seed=10, iterations=1, render=render) example_meta("metaworld_ProMP/button-press-v2", seed=10, iterations=1, render=render)
# #
# # Custom MetaWorld task # # Custom MetaWorld task
example_custom_meta_and_mp(seed=10, iterations=1, render=render) example_custom_meta_and_mp(seed=10, iterations=1, render=render)
if __name__ == '__main__':
main()

View File

@ -26,6 +26,8 @@ def example_mp(env_name="fancy_ProMP/HoleReacher-v0", seed=1, iterations=1, rend
for i in range(iterations): for i in range(iterations):
if render and i % 1 == 0: if render and i % 1 == 0:
# This renders the full MP trajectory
# It is only required to call render() once in the beginning, which renders every consecutive trajectory.
env.render() env.render()
# Now the action space is not the raw action but the parametrization of the trajectory generator, # Now the action space is not the raw action but the parametrization of the trajectory generator,
@ -248,8 +250,7 @@ def example_fully_custom_mp_alternative(seed=1, iterations=1, render=True):
pass pass
def main(): def main(render=False):
render = False
# DMP # DMP
example_mp("fancy_DMP/HoleReacher-v0", seed=10, iterations=5, render=render) example_mp("fancy_DMP/HoleReacher-v0", seed=10, iterations=5, render=render)

View File

@ -31,6 +31,8 @@ def example_mp(env_name, seed=1, render=True):
print(returns) print(returns)
obs = env.reset() obs = env.reset()
def main(render=True):
example_mp("gym_ProMP/Reacher-v2", render=render)
if __name__ == '__main__': if __name__ == '__main__':
example_mp("gym_ProMP/Reacher-v2") main()

13
test/test_examples.py Normal file
View File

@ -0,0 +1,13 @@
import pytest
from fancy_gym.examples.example_replanning_envs import main as replanning_envs_main
from fancy_gym.examples.examples_dmc import main as dmc_main
from fancy_gym.examples.examples_general import main as general_main
from fancy_gym.examples.examples_metaworld import main as metaworld_main
from fancy_gym.examples.examples_movement_primitives import main as mp_main
from fancy_gym.examples.examples_open_ai import main as open_ai_main
@pytest.mark.parametrize('entry', [replanning_envs_main, dmc_main, general_main, metaworld_main, mp_main, open_ai_main])
@pytest.mark.parametrize('render', [False])
def test_run_example(entry, render):
entry(render=render)