Merge branch 'master' into fix_pyproject_missing_deps
This commit is contained in:
commit
6cb32e6203
24
README.md
24
README.md
@ -105,17 +105,16 @@ Regular step based environments added by Fancy Gym are added into the `fancy/` n
|
|||||||
import gymnasium as gym
|
import gymnasium as gym
|
||||||
import fancy_gym
|
import fancy_gym
|
||||||
|
|
||||||
env = gym.make('fancy/Reacher5d-v0')
|
env = gym.make('fancy/Reacher5d-v0', render_mode='human')
|
||||||
# or env = gym.make('metaworld/reach-v2') # fancy_gym allows access to all metaworld ML1 tasks via the metaworld/ NS
|
# or env = gym.make('metaworld/reach-v2', render_mode='human') # fancy_gym allows access to all metaworld ML1 tasks via the metaworld/ NS
|
||||||
# or env = gym.make('dm_control/ball_in_cup-catch-v0')
|
# or env = gym.make('dm_control/ball_in_cup-catch-v0', render_mode='human')
|
||||||
# or env = gym.make('Reacher-v2')
|
# or env = gym.make('Reacher-v2', render_mode='human')
|
||||||
observation = env.reset(seed=1)
|
observation = env.reset(seed=1)
|
||||||
|
env.render()
|
||||||
|
|
||||||
for i in range(1000):
|
for i in range(1000):
|
||||||
action = env.action_space.sample()
|
action = env.action_space.sample()
|
||||||
observation, reward, terminated, truncated, info = env.step(action)
|
observation, reward, terminated, truncated, info = env.step(action)
|
||||||
if i % 5 == 0:
|
|
||||||
env.render()
|
|
||||||
|
|
||||||
if terminated or truncated:
|
if terminated or truncated:
|
||||||
observation, info = env.reset()
|
observation, info = env.reset()
|
||||||
@ -149,17 +148,14 @@ Just keep in mind, calling `step()` executes a full trajectory.
|
|||||||
import gymnasium as gym
|
import gymnasium as gym
|
||||||
import fancy_gym
|
import fancy_gym
|
||||||
|
|
||||||
env = gym.make('fancy_ProMP/Reacher5d-v0')
|
env = gym.make('fancy_ProMP/Reacher5d-v0', render_mode="human")
|
||||||
# or env = gym.make('metaworld_ProDMP/reach-v2')
|
# or env = gym.make('metaworld_ProDMP/reach-v2', render_mode="human")
|
||||||
# or env = gym.make('dm_control_DMP/ball_in_cup-catch-v0')
|
# or env = gym.make('dm_control_DMP/ball_in_cup-catch-v0', render_mode="human")
|
||||||
# or env = gym.make('gym_ProMP/Reacher-v2') # mp versions of envs added directly by gymnasium are in the gym_<MP-type> NS
|
# or env = gym.make('gym_ProMP/Reacher-v2', render_mode="human") # mp versions of envs added directly by gymnasium are in the gym_<MP-type> NS
|
||||||
|
|
||||||
# render() can be called once in the beginning with all necessary arguments.
|
|
||||||
# To turn it of again just call render() without any arguments.
|
|
||||||
env.render(mode='human')
|
|
||||||
|
|
||||||
# This returns the context information, not the full state observation
|
# This returns the context information, not the full state observation
|
||||||
observation, info = env.reset(seed=1)
|
observation, info = env.reset(seed=1)
|
||||||
|
env.render()
|
||||||
|
|
||||||
for i in range(5):
|
for i in range(5):
|
||||||
action = env.action_space.sample()
|
action = env.action_space.sample()
|
||||||
|
@ -291,7 +291,7 @@ register(
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Air Hockey environments
|
# Air Hockey environments
|
||||||
for env_mode in ["7dof-hit", "7dof-defend", "3dof-hit", "3dof-defend"]:
|
for env_mode in ["7dof-hit", "7dof-defend", "3dof-hit", "3dof-defend", "7dof-hit-airhockit2023", "7dof-defend-airhockit2023"]:
|
||||||
register(
|
register(
|
||||||
id=f'fancy/AirHockey-{env_mode}-v0',
|
id=f'fancy/AirHockey-{env_mode}-v0',
|
||||||
entry_point='fancy_gym.envs.mujoco:AirHockeyEnv',
|
entry_point='fancy_gym.envs.mujoco:AirHockeyEnv',
|
||||||
|
@ -8,9 +8,9 @@ from fancy_gym.envs.mujoco.air_hockey.utils import robot_to_world
|
|||||||
from mushroom_rl.core import Environment
|
from mushroom_rl.core import Environment
|
||||||
|
|
||||||
class AirHockeyEnv(Environment):
|
class AirHockeyEnv(Environment):
|
||||||
metadata = {"render_modes": ["human"], "render_fps": 50}
|
metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 50}
|
||||||
|
|
||||||
def __init__(self, env_mode=None, interpolation_order=3, render_mode=None, **kwargs):
|
def __init__(self, env_mode=None, interpolation_order=3, render_mode=None, width=1920, height=1080, **kwargs):
|
||||||
"""
|
"""
|
||||||
Environment Constructor
|
Environment Constructor
|
||||||
|
|
||||||
@ -30,7 +30,10 @@ class AirHockeyEnv(Environment):
|
|||||||
"7dof-defend": position.IiwaPositionDefend,
|
"7dof-defend": position.IiwaPositionDefend,
|
||||||
|
|
||||||
"3dof-hit": position.PlanarPositionHit,
|
"3dof-hit": position.PlanarPositionHit,
|
||||||
"3dof-defend": position.PlanarPositionDefend
|
"3dof-defend": position.PlanarPositionDefend,
|
||||||
|
|
||||||
|
"7dof-hit-airhockit2023": position.IiwaPositionHitAirhocKIT2023,
|
||||||
|
"7dof-defend-airhockit2023": position.IiwaPositionDefendAirhocKIT2023,
|
||||||
}
|
}
|
||||||
|
|
||||||
if env_mode not in env_dict:
|
if env_mode not in env_dict:
|
||||||
@ -39,37 +42,53 @@ class AirHockeyEnv(Environment):
|
|||||||
if env_mode == "tournament" and type(interpolation_order) != tuple:
|
if env_mode == "tournament" and type(interpolation_order) != tuple:
|
||||||
interpolation_order = (interpolation_order, interpolation_order)
|
interpolation_order = (interpolation_order, interpolation_order)
|
||||||
|
|
||||||
|
self.render_mode = render_mode
|
||||||
|
self.render_human_active = False
|
||||||
|
|
||||||
|
# Determine headless mode based on render_mode
|
||||||
|
headless = self.render_mode == 'rgb_array'
|
||||||
|
|
||||||
|
# Prepare viewer_params
|
||||||
|
viewer_params = kwargs.get('viewer_params', {})
|
||||||
|
viewer_params.update({'headless': headless, 'width': width, 'height': height})
|
||||||
|
kwargs['viewer_params'] = viewer_params
|
||||||
|
|
||||||
self.base_env = env_dict[env_mode](interpolation_order=interpolation_order, **kwargs)
|
self.base_env = env_dict[env_mode](interpolation_order=interpolation_order, **kwargs)
|
||||||
self.env_name = env_mode
|
self.env_name = env_mode
|
||||||
self.env_info = self.base_env.env_info
|
self.env_info = self.base_env.env_info
|
||||||
single_robot_obs_size = len(self.base_env.info.observation_space.low)
|
|
||||||
if env_mode == "tournament":
|
|
||||||
self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(2,single_robot_obs_size), dtype=np.float64)
|
|
||||||
else:
|
|
||||||
self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(single_robot_obs_size,), dtype=np.float64)
|
|
||||||
robot_info = self.env_info["robot"]
|
|
||||||
|
|
||||||
if env_mode != "tournament":
|
if hasattr(self.base_env, "wrapper_obs_space") and hasattr(self.base_env, "wrapper_act_space"):
|
||||||
if interpolation_order in [1, 2]:
|
self.observation_space = self.base_env.wrapper_obs_space
|
||||||
self.action_space = spaces.Box(low=robot_info["joint_pos_limit"][0], high=robot_info["joint_pos_limit"][1])
|
self.action_space = self.base_env.wrapper_act_space
|
||||||
if interpolation_order in [3, 4, -1]:
|
|
||||||
self.action_space = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0]]),
|
|
||||||
high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1]]))
|
|
||||||
if interpolation_order in [5]:
|
|
||||||
self.action_space = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0], robot_info["joint_acc_limit"][0]]),
|
|
||||||
high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1], robot_info["joint_acc_limit"][1]]))
|
|
||||||
else:
|
else:
|
||||||
acts = [None, None]
|
single_robot_obs_size = len(self.base_env.info.observation_space.low)
|
||||||
for i in range(2):
|
if env_mode == "tournament":
|
||||||
if interpolation_order[i] in [1, 2]:
|
self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(2,single_robot_obs_size), dtype=np.float64)
|
||||||
acts[i] = spaces.Box(low=robot_info["joint_pos_limit"][0], high=robot_info["joint_pos_limit"][1])
|
else:
|
||||||
if interpolation_order[i] in [3, 4, -1]:
|
self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(single_robot_obs_size,), dtype=np.float64)
|
||||||
acts[i] = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0]]),
|
robot_info = self.env_info["robot"]
|
||||||
high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1]]))
|
|
||||||
if interpolation_order[i] in [5]:
|
if env_mode != "tournament":
|
||||||
acts[i] = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0], robot_info["joint_acc_limit"][0]]),
|
if interpolation_order in [1, 2]:
|
||||||
high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1], robot_info["joint_acc_limit"][1]]))
|
self.action_space = spaces.Box(low=robot_info["joint_pos_limit"][0], high=robot_info["joint_pos_limit"][1])
|
||||||
self.action_space = spaces.Tuple((acts[0], acts[1]))
|
if interpolation_order in [3, 4, -1]:
|
||||||
|
self.action_space = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0]]),
|
||||||
|
high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1]]))
|
||||||
|
if interpolation_order in [5]:
|
||||||
|
self.action_space = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0], robot_info["joint_acc_limit"][0]]),
|
||||||
|
high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1], robot_info["joint_acc_limit"][1]]))
|
||||||
|
else:
|
||||||
|
acts = [None, None]
|
||||||
|
for i in range(2):
|
||||||
|
if interpolation_order[i] in [1, 2]:
|
||||||
|
acts[i] = spaces.Box(low=robot_info["joint_pos_limit"][0], high=robot_info["joint_pos_limit"][1])
|
||||||
|
if interpolation_order[i] in [3, 4, -1]:
|
||||||
|
acts[i] = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0]]),
|
||||||
|
high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1]]))
|
||||||
|
if interpolation_order[i] in [5]:
|
||||||
|
acts[i] = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0], robot_info["joint_acc_limit"][0]]),
|
||||||
|
high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1], robot_info["joint_acc_limit"][1]]))
|
||||||
|
self.action_space = spaces.Tuple((acts[0], acts[1]))
|
||||||
|
|
||||||
constraint_list = constraints.ConstraintList()
|
constraint_list = constraints.ConstraintList()
|
||||||
constraint_list.add(constraints.JointPositionConstraint(self.env_info))
|
constraint_list.add(constraints.JointPositionConstraint(self.env_info))
|
||||||
@ -81,9 +100,6 @@ class AirHockeyEnv(Environment):
|
|||||||
self.env_info['constraints'] = constraint_list
|
self.env_info['constraints'] = constraint_list
|
||||||
self.env_info['env_name'] = self.env_name
|
self.env_info['env_name'] = self.env_name
|
||||||
|
|
||||||
self.render_mode = render_mode
|
|
||||||
self.render_human_active = False
|
|
||||||
|
|
||||||
super().__init__(self.base_env.info)
|
super().__init__(self.base_env.info)
|
||||||
|
|
||||||
def step(self, action):
|
def step(self, action):
|
||||||
@ -111,15 +127,21 @@ class AirHockeyEnv(Environment):
|
|||||||
|
|
||||||
if self.env_info['env_name'] == "tournament":
|
if self.env_info['env_name'] == "tournament":
|
||||||
obs = np.array(np.split(obs, 2))
|
obs = np.array(np.split(obs, 2))
|
||||||
|
|
||||||
if self.render_human_active:
|
if self.render_human_active:
|
||||||
self.base_env.render()
|
self.base_env.render()
|
||||||
|
|
||||||
return obs, reward, done, False, info
|
return obs, reward, done, False, info
|
||||||
|
|
||||||
def render(self):
|
def render(self):
|
||||||
self.render_human_active = True
|
if self.render_mode == 'rgb_array':
|
||||||
|
return self.base_env.render(record = True)
|
||||||
|
elif self.render_mode == 'human':
|
||||||
|
self.render_human_active = True
|
||||||
|
self.base_env.render()
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unsupported render mode: '{self.render_mode}'")
|
||||||
|
|
||||||
def reset(self, seed=None, options={}):
|
def reset(self, seed=None, options={}):
|
||||||
self.base_env.seed(seed)
|
self.base_env.seed(seed)
|
||||||
obs = self.base_env.reset()
|
obs = self.base_env.reset()
|
||||||
@ -177,4 +199,4 @@ if __name__ == "__main__":
|
|||||||
J = 0.
|
J = 0.
|
||||||
gamma = 1.
|
gamma = 1.
|
||||||
steps = 0
|
steps = 0
|
||||||
env.reset()
|
env.reset()
|
||||||
|
@ -261,10 +261,14 @@ class PlanarPositionDefend(PositionControlPlanar, three_dof.AirHockeyDefend):
|
|||||||
class IiwaPositionHit(PositionControlIIWA, seven_dof.AirHockeyHit):
|
class IiwaPositionHit(PositionControlIIWA, seven_dof.AirHockeyHit):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
class IiwaPositionHitAirhocKIT2023(PositionControlIIWA, seven_dof.AirHockeyHitAirhocKIT2023):
|
||||||
|
pass
|
||||||
|
|
||||||
class IiwaPositionDefend(PositionControlIIWA, seven_dof.AirHockeyDefend):
|
class IiwaPositionDefend(PositionControlIIWA, seven_dof.AirHockeyDefend):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
class IiwaPositionDefendAirhocKIT2023(PositionControlIIWA, seven_dof.AirHockeyDefendAirhocKIT2023):
|
||||||
|
pass
|
||||||
|
|
||||||
class IiwaPositionTournament(PositionControlIIWA, seven_dof.AirHockeyTournament):
|
class IiwaPositionTournament(PositionControlIIWA, seven_dof.AirHockeyTournament):
|
||||||
pass
|
pass
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
from .env_base import AirHockeyBase
|
from .env_base import AirHockeyBase
|
||||||
from .tournament import AirHockeyTournament
|
from .tournament import AirHockeyTournament
|
||||||
from .hit import AirHockeyHit
|
from .hit import AirHockeyHit, AirHockeyHitAirhocKIT2023
|
||||||
from .defend import AirHockeyDefend
|
from .defend import AirHockeyDefend, AirHockeyDefendAirhocKIT2023
|
114
fancy_gym/envs/mujoco/air_hockey/seven_dof/airhockit_base_env.py
Normal file
114
fancy_gym/envs/mujoco/air_hockey/seven_dof/airhockit_base_env.py
Normal file
@ -0,0 +1,114 @@
|
|||||||
|
import numpy as np
|
||||||
|
from gymnasium import spaces
|
||||||
|
|
||||||
|
from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingle
|
||||||
|
from fancy_gym.envs.mujoco.air_hockey.utils import inverse_kinematics, forward_kinematics, jacobian
|
||||||
|
|
||||||
|
class AirhocKIT2023BaseEnv(AirHockeySingle):
|
||||||
|
def __init__(self, noise=False, **kwargs):
|
||||||
|
super().__init__(**kwargs)
|
||||||
|
obs_low = np.hstack([[-np.inf] * 37])
|
||||||
|
obs_high = np.hstack([[np.inf] * 37])
|
||||||
|
self.wrapper_obs_space = spaces.Box(low=obs_low, high=obs_high, dtype=np.float64)
|
||||||
|
self.wrapper_act_space = spaces.Box(low=np.repeat(-100., 6), high=np.repeat(100., 6))
|
||||||
|
self.noise = noise
|
||||||
|
|
||||||
|
# We don't need puck yaw observations
|
||||||
|
def filter_obs(self, obs):
|
||||||
|
obs = np.hstack([obs[0:2], obs[3:5], obs[6:12], obs[13:19], obs[20:]])
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def add_noise(self, obs):
|
||||||
|
if not self.noise:
|
||||||
|
return
|
||||||
|
obs[self.env_info["puck_pos_ids"]] += np.random.normal(0, 0.001, 3)
|
||||||
|
obs[self.env_info["puck_vel_ids"]] += np.random.normal(0, 0.1, 3)
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self.last_acceleration = np.repeat(0., 6)
|
||||||
|
obs = super().reset()
|
||||||
|
self.add_noise(obs)
|
||||||
|
self.interp_pos = obs[self.env_info["joint_pos_ids"]][:-1]
|
||||||
|
self.interp_vel = obs[self.env_info["joint_vel_ids"]][:-1]
|
||||||
|
|
||||||
|
self.last_planned_world_pos = self._fk(self.interp_pos)
|
||||||
|
obs = np.hstack([
|
||||||
|
obs, self.interp_pos, self.interp_vel, self.last_acceleration, self.last_planned_world_pos
|
||||||
|
])
|
||||||
|
return self.filter_obs(obs)
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
action /= 10
|
||||||
|
|
||||||
|
new_vel = self.interp_vel + action
|
||||||
|
|
||||||
|
jerk = 2 * (new_vel - self.interp_vel - self.last_acceleration * 0.02) / (0.02 ** 2)
|
||||||
|
new_pos = self.interp_pos + self.interp_vel * 0.02 + (1/2) * self.last_acceleration * (0.02 ** 2) + (1/6) * jerk * (0.02 ** 3)
|
||||||
|
abs_action = np.vstack([np.hstack([new_pos, 0]), np.hstack([new_vel, 0])])
|
||||||
|
|
||||||
|
self.interp_pos = new_pos
|
||||||
|
self.interp_vel = new_vel
|
||||||
|
self.last_acceleration += jerk * 0.02
|
||||||
|
|
||||||
|
obs, rew, done, info = super().step(abs_action)
|
||||||
|
self.add_noise(obs)
|
||||||
|
self.last_planned_world_pos = self._fk(self.interp_pos)
|
||||||
|
obs = np.hstack([
|
||||||
|
obs, self.interp_pos, self.interp_vel, self.last_acceleration, self.last_planned_world_pos
|
||||||
|
])
|
||||||
|
|
||||||
|
fatal_rew = self.check_fatal(obs)
|
||||||
|
if fatal_rew != 0:
|
||||||
|
return self.filter_obs(obs), fatal_rew, True, info
|
||||||
|
|
||||||
|
return self.filter_obs(obs), rew, done, info
|
||||||
|
|
||||||
|
def check_constraints(self, constraint_values):
|
||||||
|
fatal_rew = 0
|
||||||
|
|
||||||
|
j_pos_constr = constraint_values["joint_pos_constr"]
|
||||||
|
if j_pos_constr.max() > 0:
|
||||||
|
fatal_rew += j_pos_constr.max()
|
||||||
|
|
||||||
|
j_vel_constr = constraint_values["joint_vel_constr"]
|
||||||
|
if j_vel_constr.max() > 0:
|
||||||
|
fatal_rew += j_vel_constr.max()
|
||||||
|
|
||||||
|
ee_constr = constraint_values["ee_constr"]
|
||||||
|
if ee_constr.max() > 0:
|
||||||
|
fatal_rew += ee_constr.max()
|
||||||
|
|
||||||
|
link_constr = constraint_values["link_constr"]
|
||||||
|
if link_constr.max() > 0:
|
||||||
|
fatal_rew += link_constr.max()
|
||||||
|
|
||||||
|
return -fatal_rew
|
||||||
|
|
||||||
|
def check_fatal(self, obs):
|
||||||
|
fatal_rew = 0
|
||||||
|
|
||||||
|
q = obs[self.env_info["joint_pos_ids"]]
|
||||||
|
qd = obs[self.env_info["joint_vel_ids"]]
|
||||||
|
constraint_values_obs = self.env_info["constraints"].fun(q, qd)
|
||||||
|
fatal_rew += self.check_constraints(constraint_values_obs)
|
||||||
|
|
||||||
|
return -fatal_rew
|
||||||
|
|
||||||
|
def _fk(self, pos):
|
||||||
|
res, _ = forward_kinematics(self.env_info["robot"]["robot_model"],
|
||||||
|
self.env_info["robot"]["robot_data"], pos)
|
||||||
|
return res.astype(np.float32)
|
||||||
|
|
||||||
|
def _ik(self, world_pos, init_q=None):
|
||||||
|
success, pos = inverse_kinematics(self.env_info["robot"]["robot_model"],
|
||||||
|
self.env_info["robot"]["robot_data"],
|
||||||
|
world_pos,
|
||||||
|
initial_q=init_q)
|
||||||
|
pos = pos.astype(np.float32)
|
||||||
|
assert success
|
||||||
|
return pos
|
||||||
|
|
||||||
|
def _jacobian(self, pos):
|
||||||
|
return jacobian(self.env_info["robot"]["robot_model"],
|
||||||
|
self.env_info["robot"]["robot_data"],
|
||||||
|
pos).astype(np.float32)
|
@ -1,6 +1,7 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingle
|
from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingle
|
||||||
|
from fancy_gym.envs.mujoco.air_hockey.seven_dof.airhockit_base_env import AirhocKIT2023BaseEnv
|
||||||
|
|
||||||
|
|
||||||
class AirHockeyDefend(AirHockeySingle):
|
class AirHockeyDefend(AirHockeySingle):
|
||||||
@ -10,9 +11,7 @@ class AirHockeyDefend(AirHockeySingle):
|
|||||||
"""
|
"""
|
||||||
def __init__(self, gamma=0.99, horizon=500, viewer_params={}):
|
def __init__(self, gamma=0.99, horizon=500, viewer_params={}):
|
||||||
self.init_velocity_range = (1, 3)
|
self.init_velocity_range = (1, 3)
|
||||||
|
|
||||||
self.start_range = np.array([[0.29, 0.65], [-0.4, 0.4]]) # Table Frame
|
self.start_range = np.array([[0.29, 0.65], [-0.4, 0.4]]) # Table Frame
|
||||||
self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]]) # Robot Frame
|
|
||||||
super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
|
super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
|
||||||
|
|
||||||
def setup(self, obs):
|
def setup(self, obs):
|
||||||
@ -32,7 +31,7 @@ class AirHockeyDefend(AirHockeySingle):
|
|||||||
self._write_data("puck_y_vel", puck_vel[1])
|
self._write_data("puck_y_vel", puck_vel[1])
|
||||||
self._write_data("puck_yaw_vel", puck_vel[2])
|
self._write_data("puck_yaw_vel", puck_vel[2])
|
||||||
|
|
||||||
super(AirHockeyDefend, self).setup(obs)
|
super().setup(obs)
|
||||||
|
|
||||||
def reward(self, state, action, next_state, absorbing):
|
def reward(self, state, action, next_state, absorbing):
|
||||||
return 0
|
return 0
|
||||||
@ -46,6 +45,98 @@ class AirHockeyDefend(AirHockeySingle):
|
|||||||
return True
|
return True
|
||||||
return super().is_absorbing(state)
|
return super().is_absorbing(state)
|
||||||
|
|
||||||
|
class AirHockeyDefendAirhocKIT2023(AirhocKIT2023BaseEnv):
|
||||||
|
def __init__(self, gamma=0.99, horizon=200, viewer_params={}, **kwargs):
|
||||||
|
super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params, **kwargs)
|
||||||
|
self.init_velocity_range = (1, 3)
|
||||||
|
self.start_range = np.array([[0.4, 0.75], [-0.4, 0.4]]) # Table Frame
|
||||||
|
self._setup_metrics()
|
||||||
|
|
||||||
|
def setup(self, obs):
|
||||||
|
self._setup_metrics()
|
||||||
|
puck_pos = np.random.rand(2) * (self.start_range[:, 1] - self.start_range[:, 0]) + self.start_range[:, 0]
|
||||||
|
|
||||||
|
lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1])
|
||||||
|
angle = np.random.uniform(-0.5, 0.5)
|
||||||
|
|
||||||
|
puck_vel = np.zeros(3)
|
||||||
|
puck_vel[0] = -np.cos(angle) * lin_vel
|
||||||
|
puck_vel[1] = np.sin(angle) * lin_vel
|
||||||
|
puck_vel[2] = np.random.uniform(-10, 10)
|
||||||
|
|
||||||
|
self._write_data("puck_x_pos", puck_pos[0])
|
||||||
|
self._write_data("puck_y_pos", puck_pos[1])
|
||||||
|
self._write_data("puck_x_vel", puck_vel[0])
|
||||||
|
self._write_data("puck_y_vel", puck_vel[1])
|
||||||
|
self._write_data("puck_yaw_vel", puck_vel[2])
|
||||||
|
|
||||||
|
super().setup(obs)
|
||||||
|
|
||||||
|
def reset(self, *args):
|
||||||
|
obs = super().reset()
|
||||||
|
self.hit_step_flag = False
|
||||||
|
self.hit_step = False
|
||||||
|
self.received_hit_reward = False
|
||||||
|
self.give_reward_next = False
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def _setup_metrics(self):
|
||||||
|
self.episode_steps = 0
|
||||||
|
self.has_hit = False
|
||||||
|
|
||||||
|
def _simulation_post_step(self):
|
||||||
|
if not self.has_hit:
|
||||||
|
self.has_hit = self._check_collision("puck", "robot_1/ee")
|
||||||
|
|
||||||
|
super()._simulation_post_step()
|
||||||
|
|
||||||
|
def _step_finalize(self):
|
||||||
|
self.episode_steps += 1
|
||||||
|
return super()._step_finalize()
|
||||||
|
|
||||||
|
def reward(self, state, action, next_state, absorbing):
|
||||||
|
puck_pos, puck_vel = self.get_puck(next_state)
|
||||||
|
ee_pos, _ = self.get_ee()
|
||||||
|
rew = 0.01
|
||||||
|
if -0.7 < puck_pos[0] <= -0.2 and np.linalg.norm(puck_vel[:2]) < 0.1:
|
||||||
|
assert absorbing
|
||||||
|
rew += 70
|
||||||
|
|
||||||
|
if self.has_hit and not self.hit_step_flag:
|
||||||
|
self.hit_step_flag = True
|
||||||
|
self.hit_step = True
|
||||||
|
else:
|
||||||
|
self.hit_step = False
|
||||||
|
|
||||||
|
f = lambda puck_vel: 30 + 100 * (100 ** (-0.25 * np.linalg.norm(puck_vel[:2])))
|
||||||
|
if not self.give_reward_next and not self.received_hit_reward and self.hit_step and ee_pos[0] < puck_pos[0]:
|
||||||
|
self.hit_this_step = True
|
||||||
|
if np.linalg.norm(puck_vel[:2]) < 0.1:
|
||||||
|
return rew + f(puck_vel)
|
||||||
|
self.give_reward_next = True
|
||||||
|
return rew
|
||||||
|
|
||||||
|
if not self.received_hit_reward and self.give_reward_next:
|
||||||
|
self.received_hit_reward = True
|
||||||
|
if puck_vel[0] >= -0.2:
|
||||||
|
return rew + f(puck_vel)
|
||||||
|
return rew
|
||||||
|
else:
|
||||||
|
return rew
|
||||||
|
|
||||||
|
def is_absorbing(self, obs):
|
||||||
|
puck_pos, puck_vel = self.get_puck(obs)
|
||||||
|
# If puck is over the middle line and moving towards opponent
|
||||||
|
if puck_pos[0] > 0 and puck_vel[0] > 0:
|
||||||
|
return True
|
||||||
|
|
||||||
|
if self.episode_steps == self._mdp_info.horizon:
|
||||||
|
return True
|
||||||
|
|
||||||
|
if np.linalg.norm(puck_vel[:2]) < 0.1:
|
||||||
|
return True
|
||||||
|
return super().is_absorbing(obs)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
env = AirHockeyDefend()
|
env = AirHockeyDefend()
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingle
|
from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingle
|
||||||
|
from fancy_gym.envs.mujoco.air_hockey.seven_dof.airhockit_base_env import AirhocKIT2023BaseEnv
|
||||||
|
|
||||||
|
|
||||||
class AirHockeyHit(AirHockeySingle):
|
class AirHockeyHit(AirHockeySingle):
|
||||||
@ -14,9 +15,6 @@ class AirHockeyHit(AirHockeySingle):
|
|||||||
opponent_agent(Agent, None): Agent which controls the opponent
|
opponent_agent(Agent, None): Agent which controls the opponent
|
||||||
moving_init(bool, False): If true, initialize the puck with inital velocity.
|
moving_init(bool, False): If true, initialize the puck with inital velocity.
|
||||||
"""
|
"""
|
||||||
self.hit_range = np.array([[-0.65, -0.25], [-0.4, 0.4]]) # Table Frame
|
|
||||||
self.init_velocity_range = (0, 0.5) # Table Frame
|
|
||||||
|
|
||||||
super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
|
super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
|
||||||
|
|
||||||
self.moving_init = moving_init
|
self.moving_init = moving_init
|
||||||
@ -58,6 +56,93 @@ class AirHockeyHit(AirHockeySingle):
|
|||||||
return True
|
return True
|
||||||
return super(AirHockeyHit, self).is_absorbing(obs)
|
return super(AirHockeyHit, self).is_absorbing(obs)
|
||||||
|
|
||||||
|
class AirHockeyHitAirhocKIT2023(AirhocKIT2023BaseEnv):
|
||||||
|
def __init__(self, gamma=0.99, horizon=500, moving_init=True, viewer_params={}, **kwargs):
|
||||||
|
super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params, **kwargs)
|
||||||
|
|
||||||
|
self.moving_init = moving_init
|
||||||
|
hit_width = self.env_info['table']['width'] / 2 - self.env_info['puck']['radius'] - \
|
||||||
|
self.env_info['mallet']['radius'] * 2
|
||||||
|
self.hit_range = np.array([[-0.7, -0.2], [-hit_width, hit_width]]) # Table Frame
|
||||||
|
self.init_velocity_range = (0, 0.5) # Table Frame
|
||||||
|
self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]]) # Robot Frame
|
||||||
|
self._setup_metrics()
|
||||||
|
|
||||||
|
def reset(self, *args):
|
||||||
|
obs = super().reset()
|
||||||
|
self.last_ee_pos = self.last_planned_world_pos.copy()
|
||||||
|
self.last_ee_pos[0] -= 1.51
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def setup(self, obs):
|
||||||
|
self._setup_metrics()
|
||||||
|
puck_pos = np.random.rand(2) * (self.hit_range[:, 1] - self.hit_range[:, 0]) + self.hit_range[:, 0]
|
||||||
|
|
||||||
|
self._write_data("puck_x_pos", puck_pos[0])
|
||||||
|
self._write_data("puck_y_pos", puck_pos[1])
|
||||||
|
|
||||||
|
if self.moving_init:
|
||||||
|
lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1])
|
||||||
|
angle = np.random.uniform(-np.pi / 2 - 0.1, np.pi / 2 + 0.1)
|
||||||
|
puck_vel = np.zeros(3)
|
||||||
|
puck_vel[0] = -np.cos(angle) * lin_vel
|
||||||
|
puck_vel[1] = np.sin(angle) * lin_vel
|
||||||
|
puck_vel[2] = np.random.uniform(-2, 2)
|
||||||
|
|
||||||
|
self._write_data("puck_x_vel", puck_vel[0])
|
||||||
|
self._write_data("puck_y_vel", puck_vel[1])
|
||||||
|
self._write_data("puck_yaw_vel", puck_vel[2])
|
||||||
|
|
||||||
|
super().setup(obs)
|
||||||
|
|
||||||
|
def _setup_metrics(self):
|
||||||
|
self.episode_steps = 0
|
||||||
|
self.has_scored = False
|
||||||
|
|
||||||
|
def _step_finalize(self):
|
||||||
|
cur_obs = self._create_observation(self.obs_helper._build_obs(self._data))
|
||||||
|
puck_pos, _ = self.get_puck(cur_obs) # world frame [x, y, z] and [x', y', z']
|
||||||
|
|
||||||
|
if not self.has_scored:
|
||||||
|
boundary = np.array([self.env_info['table']['length'], self.env_info['table']['width']]) / 2
|
||||||
|
self.has_scored = np.any(np.abs(puck_pos[:2]) > boundary) and puck_pos[0] > 0
|
||||||
|
|
||||||
|
self.episode_steps += 1
|
||||||
|
return super()._step_finalize()
|
||||||
|
|
||||||
|
def reward(self, state, action, next_state, absorbing):
|
||||||
|
rew = 0
|
||||||
|
puck_pos, puck_vel = self.get_puck(next_state)
|
||||||
|
ee_pos, _ = self.get_ee()
|
||||||
|
ee_vel = (ee_pos - self.last_ee_pos) / 0.02
|
||||||
|
self.last_ee_pos = ee_pos
|
||||||
|
|
||||||
|
if puck_vel[0] < 0.25 and puck_pos[0] < 0:
|
||||||
|
ee_puck_dir = (puck_pos - ee_pos)[:2]
|
||||||
|
ee_puck_dir = ee_puck_dir / np.linalg.norm(ee_puck_dir)
|
||||||
|
rew += 1 * max(0, np.dot(ee_puck_dir, ee_vel[:2]))
|
||||||
|
else:
|
||||||
|
rew += 10 * np.linalg.norm(puck_vel[:2])
|
||||||
|
|
||||||
|
if self.has_scored:
|
||||||
|
rew += 2000 + 5000 * np.linalg.norm(puck_vel[:2])
|
||||||
|
|
||||||
|
return rew
|
||||||
|
|
||||||
|
def is_absorbing(self, obs):
|
||||||
|
puck_pos, puck_vel = self.get_puck(obs)
|
||||||
|
# Stop if the puck bounces back on the opponents wall
|
||||||
|
if puck_pos[0] > 0 and puck_vel[0] < 0:
|
||||||
|
return True
|
||||||
|
|
||||||
|
if self.has_scored:
|
||||||
|
return True
|
||||||
|
|
||||||
|
if self.episode_steps == self._mdp_info.horizon:
|
||||||
|
return True
|
||||||
|
|
||||||
|
return super().is_absorbing(obs)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
env = AirHockeyHit(moving_init=True)
|
env = AirHockeyHit(moving_init=True)
|
||||||
|
@ -0,0 +1 @@
|
|||||||
|
# TODO
|
@ -6,21 +6,21 @@ def example_run_replanning_env(env_name="fancy_ProDMP/BoxPushingDenseReplan-v0",
|
|||||||
env = gym.make(env_name)
|
env = gym.make(env_name)
|
||||||
env.reset(seed=seed)
|
env.reset(seed=seed)
|
||||||
for i in range(iterations):
|
for i in range(iterations):
|
||||||
done = False
|
while True:
|
||||||
while done is False:
|
|
||||||
ac = env.action_space.sample()
|
ac = env.action_space.sample()
|
||||||
obs, reward, terminated, truncated, info = env.step(ac)
|
obs, reward, terminated, truncated, info = env.step(ac)
|
||||||
if render:
|
if render:
|
||||||
env.render(mode="human")
|
env.render(mode="human")
|
||||||
if terminated or truncated:
|
if terminated or truncated:
|
||||||
env.reset()
|
env.reset()
|
||||||
|
break
|
||||||
env.close()
|
env.close()
|
||||||
del env
|
del env
|
||||||
|
|
||||||
|
|
||||||
def example_custom_replanning_envs(seed=0, iteration=100, render=True):
|
def example_custom_replanning_envs(seed=0, iteration=100, render=True):
|
||||||
# id for a step-based environment
|
# id for a step-based environment
|
||||||
base_env_id = "BoxPushingDense-v0"
|
base_env_id = "fancy/BoxPushingDense-v0"
|
||||||
|
|
||||||
wrappers = [fancy_gym.envs.mujoco.box_pushing.mp_wrapper.MPWrapper]
|
wrappers = [fancy_gym.envs.mujoco.box_pushing.mp_wrapper.MPWrapper]
|
||||||
|
|
||||||
@ -38,7 +38,8 @@ def example_custom_replanning_envs(seed=0, iteration=100, render=True):
|
|||||||
'replanning_schedule': lambda pos, vel, obs, action, t: t % 25 == 0,
|
'replanning_schedule': lambda pos, vel, obs, action, t: t % 25 == 0,
|
||||||
'condition_on_desired': True}
|
'condition_on_desired': True}
|
||||||
|
|
||||||
env = fancy_gym.make_bb(env_id=base_env_id, wrappers=wrappers, black_box_kwargs=black_box_kwargs,
|
base_env = gym.make(base_env_id)
|
||||||
|
env = fancy_gym.make_bb(env=base_env, wrappers=wrappers, black_box_kwargs=black_box_kwargs,
|
||||||
traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs,
|
traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs,
|
||||||
phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs,
|
phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs,
|
||||||
seed=seed)
|
seed=seed)
|
||||||
@ -56,10 +57,12 @@ def example_custom_replanning_envs(seed=0, iteration=100, render=True):
|
|||||||
env.close()
|
env.close()
|
||||||
del env
|
del env
|
||||||
|
|
||||||
|
def main(render=False):
|
||||||
if __name__ == "__main__":
|
|
||||||
# run a registered replanning environment
|
# run a registered replanning environment
|
||||||
example_run_replanning_env(env_name="fancy_ProDMP/BoxPushingDenseReplan-v0", seed=1, iterations=1, render=False)
|
example_run_replanning_env(env_name="fancy_ProDMP/BoxPushingDenseReplan-v0", seed=1, iterations=1, render=render)
|
||||||
|
|
||||||
# run a custom replanning environment
|
# run a custom replanning environment
|
||||||
example_custom_replanning_envs(seed=0, iteration=8, render=True)
|
example_custom_replanning_envs(seed=0, iteration=8, render=render)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
@ -84,7 +84,8 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
|
|||||||
# basis_generator_kwargs = {'basis_generator_type': 'rbf',
|
# basis_generator_kwargs = {'basis_generator_type': 'rbf',
|
||||||
# 'num_basis': 5
|
# 'num_basis': 5
|
||||||
# }
|
# }
|
||||||
env = fancy_gym.make_bb(env_id=base_env_id, wrappers=wrappers, black_box_kwargs={},
|
base_env = gym.make(base_env_id)
|
||||||
|
env = fancy_gym.make_bb(env=base_env, wrappers=wrappers, black_box_kwargs={},
|
||||||
traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs,
|
traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs,
|
||||||
phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs,
|
phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs,
|
||||||
seed=seed)
|
seed=seed)
|
||||||
@ -114,21 +115,13 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
|
|||||||
env.close()
|
env.close()
|
||||||
del env
|
del env
|
||||||
|
|
||||||
|
def main(render = True):
|
||||||
if __name__ == '__main__':
|
|
||||||
# Disclaimer: DMC environments require the seed to be specified in the beginning.
|
|
||||||
# Adjusting it afterwards with env.seed() is not recommended as it does not affect the underlying physics.
|
|
||||||
|
|
||||||
# For rendering DMC
|
|
||||||
# export MUJOCO_GL="osmesa"
|
|
||||||
render = True
|
|
||||||
|
|
||||||
# # Standard DMC Suite tasks
|
# # Standard DMC Suite tasks
|
||||||
example_dmc("dm_control/fish-swim", seed=10, iterations=1000, render=render)
|
example_dmc("dm_control/fish-swim", seed=10, iterations=1000, render=render)
|
||||||
#
|
#
|
||||||
# # Manipulation tasks
|
# # Manipulation tasks
|
||||||
# # Disclaimer: The vision versions are currently not integrated and yield an error
|
# # Disclaimer: The vision versions are currently not integrated and yield an error
|
||||||
example_dmc("dm_control/manipulation-reach_site_features", seed=10, iterations=250, render=render)
|
example_dmc("dm_control/reach_site_features", seed=10, iterations=250, render=render)
|
||||||
#
|
#
|
||||||
# # Gym + DMC hybrid task provided in the MP framework
|
# # Gym + DMC hybrid task provided in the MP framework
|
||||||
example_dmc("dm_control_ProMP/ball_in_cup-catch-v0", seed=10, iterations=1, render=render)
|
example_dmc("dm_control_ProMP/ball_in_cup-catch-v0", seed=10, iterations=1, render=render)
|
||||||
@ -136,3 +129,20 @@ if __name__ == '__main__':
|
|||||||
# Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is
|
# Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is
|
||||||
# already registered above
|
# already registered above
|
||||||
example_custom_dmc_and_mp(seed=11, iterations=1, render=render)
|
example_custom_dmc_and_mp(seed=11, iterations=1, render=render)
|
||||||
|
|
||||||
|
# # Standard DMC Suite tasks
|
||||||
|
example_dmc("dm_control/fish-swim", seed=10, iterations=1000, render=render)
|
||||||
|
#
|
||||||
|
# # Manipulation tasks
|
||||||
|
# # Disclaimer: The vision versions are currently not integrated and yield an error
|
||||||
|
example_dmc("dm_control/reach_site_features", seed=10, iterations=250, render=render)
|
||||||
|
#
|
||||||
|
# # Gym + DMC hybrid task provided in the MP framework
|
||||||
|
example_dmc("dm_control_ProMP/ball_in_cup-catch-v0", seed=10, iterations=1, render=render)
|
||||||
|
|
||||||
|
# Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is
|
||||||
|
# already registered above
|
||||||
|
example_custom_dmc_and_mp(seed=11, iterations=1, render=render)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
@ -85,10 +85,7 @@ def example_async(env_id="fancy/HoleReacher-v0", n_cpu=4, seed=int('533D', 16),
|
|||||||
# do not return values above threshold
|
# do not return values above threshold
|
||||||
return *map(lambda v: np.stack(v)[:n_samples], buffer.values()),
|
return *map(lambda v: np.stack(v)[:n_samples], buffer.values()),
|
||||||
|
|
||||||
|
def main(render = True):
|
||||||
if __name__ == '__main__':
|
|
||||||
render = True
|
|
||||||
|
|
||||||
# Basic gym task
|
# Basic gym task
|
||||||
example_general("Pendulum-v1", seed=10, iterations=200, render=render)
|
example_general("Pendulum-v1", seed=10, iterations=200, render=render)
|
||||||
|
|
||||||
@ -100,3 +97,6 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
# Vectorized multiprocessing environments
|
# Vectorized multiprocessing environments
|
||||||
# example_async(env_id="HoleReacher-v0", n_cpu=2, seed=int('533D', 16), n_samples=2 * 200)
|
# example_async(env_id="HoleReacher-v0", n_cpu=2, seed=int('533D', 16), n_samples=2 * 200)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
@ -35,7 +35,7 @@ def example_meta(env_id="fish-swim", seed=1, iterations=1000, render=True):
|
|||||||
if terminated or truncated:
|
if terminated or truncated:
|
||||||
print(env_id, rewards)
|
print(env_id, rewards)
|
||||||
rewards = 0
|
rewards = 0
|
||||||
obs = env.reset()
|
obs = env.reset(seed=seed+i+1)
|
||||||
|
|
||||||
env.close()
|
env.close()
|
||||||
del env
|
del env
|
||||||
@ -81,7 +81,8 @@ def example_custom_meta_and_mp(seed=1, iterations=1, render=True):
|
|||||||
basis_generator_kwargs = {'basis_generator_type': 'rbf',
|
basis_generator_kwargs = {'basis_generator_type': 'rbf',
|
||||||
'num_basis': 5
|
'num_basis': 5
|
||||||
}
|
}
|
||||||
env = fancy_gym.make_bb(env_id=base_env_id, wrappers=wrappers, black_box_kwargs={},
|
base_env = gym.make(base_env_id)
|
||||||
|
env = fancy_gym.make_bb(env=base_env, wrappers=wrappers, black_box_kwargs={},
|
||||||
traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs,
|
traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs,
|
||||||
phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs,
|
phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs,
|
||||||
seed=seed)
|
seed=seed)
|
||||||
@ -92,14 +93,10 @@ def example_custom_meta_and_mp(seed=1, iterations=1, render=True):
|
|||||||
# It is also possible to change them mode multiple times when
|
# It is also possible to change them mode multiple times when
|
||||||
# e.g. only every nth trajectory should be displayed.
|
# e.g. only every nth trajectory should be displayed.
|
||||||
if render:
|
if render:
|
||||||
raise ValueError("Metaworld render interface bug does not allow to render() fixes its interface. "
|
env.render(mode="human")
|
||||||
"A temporary workaround is to alter their code in MujocoEnv render() from "
|
|
||||||
"`if not offscreen` to `if not offscreen or offscreen == 'human'`.")
|
|
||||||
# TODO: Remove this, when Metaworld fixes its interface.
|
|
||||||
# env.render(mode="human")
|
|
||||||
|
|
||||||
rewards = 0
|
rewards = 0
|
||||||
obs = env.reset()
|
obs = env.reset(seed=seed)
|
||||||
|
|
||||||
# number of samples/full trajectories (multiple environment steps)
|
# number of samples/full trajectories (multiple environment steps)
|
||||||
for i in range(iterations):
|
for i in range(iterations):
|
||||||
@ -110,25 +107,23 @@ def example_custom_meta_and_mp(seed=1, iterations=1, render=True):
|
|||||||
if terminated or truncated:
|
if terminated or truncated:
|
||||||
print(base_env_id, rewards)
|
print(base_env_id, rewards)
|
||||||
rewards = 0
|
rewards = 0
|
||||||
obs = env.reset()
|
obs = env.reset(seed=seed+i+1)
|
||||||
|
|
||||||
env.close()
|
env.close()
|
||||||
del env
|
del env
|
||||||
|
|
||||||
|
def main(render = False):
|
||||||
if __name__ == '__main__':
|
|
||||||
# Disclaimer: MetaWorld environments require the seed to be specified in the beginning.
|
|
||||||
# Adjusting it afterwards with env.seed() is not recommended as it may not affect the underlying behavior.
|
|
||||||
|
|
||||||
# For rendering it might be necessary to specify your OpenGL installation
|
# For rendering it might be necessary to specify your OpenGL installation
|
||||||
# export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so
|
# export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so
|
||||||
render = False
|
|
||||||
|
|
||||||
# # Standard Meta world tasks
|
# # Standard Meta world tasks
|
||||||
example_meta("metaworld/button-press-v2", seed=10, iterations=500, render=render)
|
example_meta("metaworld/button-press-v2", seed=10, iterations=500, render=render)
|
||||||
|
|
||||||
# # MP + MetaWorld hybrid task provided in the our framework
|
# # MP + MetaWorld hybrid task provided in the our framework
|
||||||
example_meta("metaworld_ProMP/ButtonPress-v2", seed=10, iterations=1, render=render)
|
example_meta("metaworld_ProMP/button-press-v2", seed=10, iterations=1, render=render)
|
||||||
#
|
#
|
||||||
# # Custom MetaWorld task
|
# # Custom MetaWorld task
|
||||||
example_custom_meta_and_mp(seed=10, iterations=1, render=render)
|
example_custom_meta_and_mp(seed=10, iterations=1, render=render)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
@ -26,6 +26,8 @@ def example_mp(env_name="fancy_ProMP/HoleReacher-v0", seed=1, iterations=1, rend
|
|||||||
for i in range(iterations):
|
for i in range(iterations):
|
||||||
|
|
||||||
if render and i % 1 == 0:
|
if render and i % 1 == 0:
|
||||||
|
# This renders the full MP trajectory
|
||||||
|
# It is only required to call render() once in the beginning, which renders every consecutive trajectory.
|
||||||
env.render()
|
env.render()
|
||||||
|
|
||||||
# Now the action space is not the raw action but the parametrization of the trajectory generator,
|
# Now the action space is not the raw action but the parametrization of the trajectory generator,
|
||||||
@ -248,8 +250,7 @@ def example_fully_custom_mp_alternative(seed=1, iterations=1, render=True):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main(render=False):
|
||||||
render = False
|
|
||||||
# DMP
|
# DMP
|
||||||
example_mp("fancy_DMP/HoleReacher-v0", seed=10, iterations=5, render=render)
|
example_mp("fancy_DMP/HoleReacher-v0", seed=10, iterations=5, render=render)
|
||||||
|
|
||||||
|
@ -31,6 +31,8 @@ def example_mp(env_name, seed=1, render=True):
|
|||||||
print(returns)
|
print(returns)
|
||||||
obs = env.reset()
|
obs = env.reset()
|
||||||
|
|
||||||
|
def main(render=True):
|
||||||
|
example_mp("gym_ProMP/Reacher-v2", render=render)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
example_mp("gym_ProMP/Reacher-v2")
|
main()
|
13
test/test_examples.py
Normal file
13
test/test_examples.py
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
import pytest
|
||||||
|
|
||||||
|
from fancy_gym.examples.example_replanning_envs import main as replanning_envs_main
|
||||||
|
from fancy_gym.examples.examples_dmc import main as dmc_main
|
||||||
|
from fancy_gym.examples.examples_general import main as general_main
|
||||||
|
from fancy_gym.examples.examples_metaworld import main as metaworld_main
|
||||||
|
from fancy_gym.examples.examples_movement_primitives import main as mp_main
|
||||||
|
from fancy_gym.examples.examples_open_ai import main as open_ai_main
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('entry', [replanning_envs_main, dmc_main, general_main, metaworld_main, mp_main, open_ai_main])
|
||||||
|
@pytest.mark.parametrize('render', [False])
|
||||||
|
def test_run_example(entry, render):
|
||||||
|
entry(render=render)
|
Loading…
Reference in New Issue
Block a user