Merge pull request #38 from ALRhub/remove_mujoco
Replace mujoco_py bindings with new pybind11-based bindings
This commit is contained in:
commit
d412fb229c
@ -2,7 +2,7 @@ from typing import Tuple, Union, Optional
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from gym.core import ObsType
|
from gym.core import ObsType
|
||||||
from gym.envs.mujoco.ant_v3 import AntEnv
|
from gym.envs.mujoco.ant_v4 import AntEnv
|
||||||
|
|
||||||
MAX_EPISODE_STEPS_ANTJUMP = 200
|
MAX_EPISODE_STEPS_ANTJUMP = 200
|
||||||
|
|
||||||
@ -36,9 +36,15 @@ class AntJumpEnv(AntEnv):
|
|||||||
self.current_step = 0
|
self.current_step = 0
|
||||||
self.max_height = 0
|
self.max_height = 0
|
||||||
self.goal = 0
|
self.goal = 0
|
||||||
super().__init__(xml_file, ctrl_cost_weight, contact_cost_weight, healthy_reward, terminate_when_unhealthy,
|
super().__init__(xml_file=xml_file,
|
||||||
healthy_z_range, contact_force_range, reset_noise_scale,
|
ctrl_cost_weight=ctrl_cost_weight,
|
||||||
exclude_current_positions_from_observation)
|
contact_cost_weight=contact_cost_weight,
|
||||||
|
healthy_reward=healthy_reward,
|
||||||
|
terminate_when_unhealthy=terminate_when_unhealthy,
|
||||||
|
healthy_z_range=healthy_z_range,
|
||||||
|
contact_force_range=contact_force_range,
|
||||||
|
reset_noise_scale=reset_noise_scale,
|
||||||
|
exclude_current_positions_from_observation=exclude_current_positions_from_observation)
|
||||||
|
|
||||||
def step(self, action):
|
def step(self, action):
|
||||||
self.current_step += 1
|
self.current_step += 1
|
||||||
@ -90,6 +96,7 @@ class AntJumpEnv(AntEnv):
|
|||||||
|
|
||||||
# reset_model had to be implemented in every env to make it deterministic
|
# reset_model had to be implemented in every env to make it deterministic
|
||||||
def reset_model(self):
|
def reset_model(self):
|
||||||
|
# Todo remove if not needed
|
||||||
noise_low = -self._reset_noise_scale
|
noise_low = -self._reset_noise_scale
|
||||||
noise_high = self._reset_noise_scale
|
noise_high = self._reset_noise_scale
|
||||||
|
|
||||||
|
@ -16,8 +16,8 @@ class MPWrapper(RawInterfaceWrapper):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def current_pos(self) -> Union[float, int, np.ndarray]:
|
def current_pos(self) -> Union[float, int, np.ndarray]:
|
||||||
return self.env.sim.data.qpos[7:15].copy()
|
return self.data.qpos[7:15].copy()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
return self.env.sim.data.qvel[6:14].copy()
|
return self.data.qvel[6:14].copy()
|
||||||
|
@ -185,4 +185,9 @@
|
|||||||
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="2.0" joint="wam/palm_yaw_joint"/>
|
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="2.0" joint="wam/palm_yaw_joint"/>
|
||||||
</actuator>
|
</actuator>
|
||||||
|
|
||||||
|
<sensor>
|
||||||
|
<framelinvel name="init_ball_vel" objtype="site" objname="init_ball_pos" />
|
||||||
|
</sensor>
|
||||||
|
|
||||||
|
|
||||||
</mujoco>
|
</mujoco>
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
import os
|
import os
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import mujoco_py.builder
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from gym import utils
|
from gym import utils
|
||||||
from gym.envs.mujoco import MujocoEnv
|
from gym.envs.mujoco import MujocoEnv
|
||||||
@ -51,9 +50,9 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
|
|||||||
self.repeat_action = 2
|
self.repeat_action = 2
|
||||||
# TODO: If accessing IDs is easier in the (new) official mujoco bindings, remove this
|
# TODO: If accessing IDs is easier in the (new) official mujoco bindings, remove this
|
||||||
self.model = None
|
self.model = None
|
||||||
self.site_id = lambda x: self.model.site_name2id(x)
|
self.geom_id = lambda x: self._mujoco_bindings.mj_name2id(self.model,
|
||||||
self.body_id = lambda x: self.model.body_name2id(x)
|
self._mujoco_bindings.mjtObj.mjOBJ_GEOM,
|
||||||
self.geom_id = lambda x: self.model.geom_name2id(x)
|
x)
|
||||||
|
|
||||||
# for reward calculation
|
# for reward calculation
|
||||||
self.dists = []
|
self.dists = []
|
||||||
@ -66,7 +65,7 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
|
|||||||
self.ball_in_cup = False
|
self.ball_in_cup = False
|
||||||
self.dist_ground_cup = -1 # distance floor to cup if first floor contact
|
self.dist_ground_cup = -1 # distance floor to cup if first floor contact
|
||||||
|
|
||||||
MujocoEnv.__init__(self, self.xml_path, frame_skip=1, mujoco_bindings="mujoco_py")
|
MujocoEnv.__init__(self, model_path=self.xml_path, frame_skip=1, mujoco_bindings="mujoco")
|
||||||
utils.EzPickle.__init__(self)
|
utils.EzPickle.__init__(self)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@ -101,13 +100,13 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
# TODO: Ask Max why we need to set the state twice.
|
# TODO: Ask Max why we need to set the state twice.
|
||||||
self.set_state(start_pos, init_vel)
|
self.set_state(start_pos, init_vel)
|
||||||
start_pos[7::] = self.data.site_xpos[self.site_id("init_ball_pos"), :].copy()
|
start_pos[7::] = self.data.site("init_ball_pos").xpos.copy()
|
||||||
self.set_state(start_pos, init_vel)
|
self.set_state(start_pos, init_vel)
|
||||||
xy = self.np_random.uniform(self._cup_pos_min, self._cup_pos_max)
|
xy = self.np_random.uniform(self._cup_pos_min, self._cup_pos_max)
|
||||||
xyz = np.zeros(3)
|
xyz = np.zeros(3)
|
||||||
xyz[:2] = xy
|
xyz[:2] = xy
|
||||||
xyz[-1] = 0.840
|
xyz[-1] = 0.840
|
||||||
self.model.body_pos[self.body_id("cup_table")] = xyz
|
self.model.body("cup_table").pos[:] = xyz
|
||||||
return self._get_obs()
|
return self._get_obs()
|
||||||
|
|
||||||
def step(self, a):
|
def step(self, a):
|
||||||
@ -118,10 +117,10 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
|
|||||||
self.do_simulation(applied_action, self.frame_skip)
|
self.do_simulation(applied_action, self.frame_skip)
|
||||||
# self.reward_function.check_contacts(self.sim) # I assume this is not important?
|
# self.reward_function.check_contacts(self.sim) # I assume this is not important?
|
||||||
if self._steps < self.release_step:
|
if self._steps < self.release_step:
|
||||||
self.data.qpos[7::] = self.data.site_xpos[self.site_id("init_ball_pos"), :].copy()
|
self.data.qpos[7::] = self.data.site('init_ball_pos').xpos.copy()
|
||||||
self.data.qvel[7::] = self.data.site_xvelp[self.site_id("init_ball_pos"), :].copy()
|
self.data.qvel[7::] = self.data.sensor('init_ball_vel').data.copy()
|
||||||
crash = False
|
crash = False
|
||||||
except mujoco_py.builder.MujocoException:
|
except Exception as e:
|
||||||
crash = True
|
crash = True
|
||||||
|
|
||||||
ob = self._get_obs()
|
ob = self._get_obs()
|
||||||
@ -146,18 +145,18 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
|
|||||||
return ob, reward, done, infos
|
return ob, reward, done, infos
|
||||||
|
|
||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
theta = self.data.qpos.flat[:7]
|
theta = self.data.qpos.flat[:7].copy()
|
||||||
theta_dot = self.data.qvel.flat[:7]
|
theta_dot = self.data.qvel.flat[:7].copy()
|
||||||
ball_pos = self.data.get_body_xpos("ball").copy()
|
ball_pos = self.data.qpos.flat[7:].copy()
|
||||||
cup_goal_diff_final = ball_pos - self.data.get_site_xpos("cup_goal_final_table").copy()
|
cup_goal_diff_final = ball_pos - self.data.site("cup_goal_final_table").xpos.copy()
|
||||||
cup_goal_diff_top = ball_pos - self.data.get_site_xpos("cup_goal_table").copy()
|
cup_goal_diff_top = ball_pos - self.data.site("cup_goal_table").xpos.copy()
|
||||||
return np.concatenate([
|
return np.concatenate([
|
||||||
np.cos(theta),
|
np.cos(theta),
|
||||||
np.sin(theta),
|
np.sin(theta),
|
||||||
theta_dot,
|
theta_dot,
|
||||||
cup_goal_diff_final,
|
cup_goal_diff_final,
|
||||||
cup_goal_diff_top,
|
cup_goal_diff_top,
|
||||||
self.model.body_pos[self.body_id("cup_table")][:2].copy(),
|
self.model.body("cup_table").pos[:2].copy(),
|
||||||
# [self._steps], # Use TimeAwareObservation Wrapper instead ....
|
# [self._steps], # Use TimeAwareObservation Wrapper instead ....
|
||||||
])
|
])
|
||||||
|
|
||||||
@ -166,10 +165,10 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
|
|||||||
return super(BeerPongEnv, self).dt * self.repeat_action
|
return super(BeerPongEnv, self).dt * self.repeat_action
|
||||||
|
|
||||||
def _get_reward(self, action):
|
def _get_reward(self, action):
|
||||||
goal_pos = self.data.get_site_xpos("cup_goal_table")
|
goal_pos = self.data.site("cup_goal_table").xpos
|
||||||
ball_pos = self.data.get_body_xpos("ball")
|
goal_final_pos = self.data.site("cup_goal_final_table").xpos
|
||||||
ball_vel = self.data.get_body_xvelp("ball")
|
ball_pos = self.data.qpos.flat[7:].copy()
|
||||||
goal_final_pos = self.data.get_site_xpos("cup_goal_final_table")
|
ball_vel = self.data.qvel.flat[7:].copy()
|
||||||
|
|
||||||
self._check_contacts()
|
self._check_contacts()
|
||||||
self.dists.append(np.linalg.norm(goal_pos - ball_pos))
|
self.dists.append(np.linalg.norm(goal_pos - ball_pos))
|
||||||
@ -219,6 +218,7 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
|
|||||||
return reward, infos
|
return reward, infos
|
||||||
|
|
||||||
def _check_contacts(self):
|
def _check_contacts(self):
|
||||||
|
#TODO proper contact detection using sensors?
|
||||||
if not self.ball_table_contact:
|
if not self.ball_table_contact:
|
||||||
self.ball_table_contact = self._check_collision({self.geom_id("ball_geom")},
|
self.ball_table_contact = self._check_collision({self.geom_id("ball_geom")},
|
||||||
{self.geom_id("table_contact_geom")})
|
{self.geom_id("table_contact_geom")})
|
||||||
|
@ -21,16 +21,16 @@ class MPWrapper(RawInterfaceWrapper):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
return self.env.data.qpos[0:7].copy()
|
return self.data.qpos[0:7].copy()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
return self.env.data.qvel[0:7].copy()
|
return self.data.qvel[0:7].copy()
|
||||||
|
|
||||||
# TODO: Fix this
|
# TODO: Fix this
|
||||||
def episode_callback(self, action: np.ndarray, mp) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
|
def episode_callback(self, action: np.ndarray, mp) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
|
||||||
if mp.learn_tau:
|
if mp.learn_tau:
|
||||||
self.env.env.release_step = action[0] / self.env.dt # Tau value
|
self.release_step = action[0] / self.dt # Tau value
|
||||||
return action, None
|
return action, None
|
||||||
else:
|
else:
|
||||||
return action, None
|
return action, None
|
||||||
@ -39,5 +39,5 @@ class MPWrapper(RawInterfaceWrapper):
|
|||||||
xyz = np.zeros(3)
|
xyz = np.zeros(3)
|
||||||
xyz[:2] = context
|
xyz[:2] = context
|
||||||
xyz[-1] = 0.840
|
xyz[-1] = 0.840
|
||||||
self.env.env.model.body_pos[self.env.env.cup_table_id] = xyz
|
self.model.body_pos[self.cup_table_id] = xyz
|
||||||
return self.get_observation_from_step(self.env.env._get_obs())
|
return self.get_observation_from_step(self.get_obs())
|
||||||
|
@ -2,7 +2,7 @@ import os
|
|||||||
from typing import Tuple, Union, Optional
|
from typing import Tuple, Union, Optional
|
||||||
|
|
||||||
from gym.core import ObsType
|
from gym.core import ObsType
|
||||||
from gym.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv
|
from gym.envs.mujoco.half_cheetah_v4 import HalfCheetahEnv
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
MAX_EPISODE_STEPS_HALFCHEETAHJUMP = 100
|
MAX_EPISODE_STEPS_HALFCHEETAHJUMP = 100
|
||||||
@ -27,8 +27,11 @@ class HalfCheetahJumpEnv(HalfCheetahEnv):
|
|||||||
self.goal = 0
|
self.goal = 0
|
||||||
self.context = context
|
self.context = context
|
||||||
xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
|
xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
|
||||||
super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, reset_noise_scale,
|
super().__init__(xml_file=xml_file,
|
||||||
exclude_current_positions_from_observation)
|
forward_reward_weight=forward_reward_weight,
|
||||||
|
ctrl_cost_weight=ctrl_cost_weight,
|
||||||
|
reset_noise_scale=reset_noise_scale,
|
||||||
|
exclude_current_positions_from_observation=exclude_current_positions_from_observation)
|
||||||
|
|
||||||
def step(self, action):
|
def step(self, action):
|
||||||
|
|
||||||
@ -74,6 +77,7 @@ class HalfCheetahJumpEnv(HalfCheetahEnv):
|
|||||||
|
|
||||||
# overwrite reset_model to make it deterministic
|
# overwrite reset_model to make it deterministic
|
||||||
def reset_model(self):
|
def reset_model(self):
|
||||||
|
# TODO remove if not needed!
|
||||||
noise_low = -self._reset_noise_scale
|
noise_low = -self._reset_noise_scale
|
||||||
noise_high = self._reset_noise_scale
|
noise_high = self._reset_noise_scale
|
||||||
|
|
||||||
|
@ -15,8 +15,8 @@ class MPWrapper(RawInterfaceWrapper):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def current_pos(self) -> Union[float, int, np.ndarray]:
|
def current_pos(self) -> Union[float, int, np.ndarray]:
|
||||||
return self.env.sim.data.qpos[3:9].copy()
|
return self.data.qpos[3:9].copy()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
return self.env.sim.data.qvel[3:9].copy()
|
return self.data.qvel[3:9].copy()
|
||||||
|
@ -2,8 +2,7 @@ import copy
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from gym.envs.mujoco.hopper_v3 import HopperEnv
|
from gym.envs.mujoco.hopper_v4 import HopperEnv
|
||||||
|
|
||||||
MAX_EPISODE_STEPS_HOPPERJUMP = 250
|
MAX_EPISODE_STEPS_HOPPERJUMP = 250
|
||||||
|
|
||||||
|
|
||||||
@ -50,9 +49,16 @@ class HopperJumpEnv(HopperEnv):
|
|||||||
self.contact_dist = None
|
self.contact_dist = None
|
||||||
|
|
||||||
xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
|
xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
|
||||||
super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
|
super().__init__(xml_file=xml_file,
|
||||||
healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
|
forward_reward_weight=forward_reward_weight,
|
||||||
exclude_current_positions_from_observation)
|
ctrl_cost_weight=ctrl_cost_weight,
|
||||||
|
healthy_reward=healthy_reward,
|
||||||
|
terminate_when_unhealthy=terminate_when_unhealthy,
|
||||||
|
healthy_state_range=healthy_state_range,
|
||||||
|
healthy_z_range=healthy_z_range,
|
||||||
|
healthy_angle_range=healthy_angle_range,
|
||||||
|
reset_noise_scale=reset_noise_scale,
|
||||||
|
exclude_current_positions_from_observation=exclude_current_positions_from_observation)
|
||||||
|
|
||||||
# increase initial height
|
# increase initial height
|
||||||
self.init_qpos[1] = 1.5
|
self.init_qpos[1] = 1.5
|
||||||
@ -67,7 +73,8 @@ class HopperJumpEnv(HopperEnv):
|
|||||||
self.do_simulation(action, self.frame_skip)
|
self.do_simulation(action, self.frame_skip)
|
||||||
|
|
||||||
height_after = self.get_body_com("torso")[2]
|
height_after = self.get_body_com("torso")[2]
|
||||||
site_pos_after = self.data.get_site_xpos('foot_site')
|
#site_pos_after = self.data.get_site_xpos('foot_site')
|
||||||
|
site_pos_after = self.data.site('foot_site').xpos
|
||||||
self.max_height = max(height_after, self.max_height)
|
self.max_height = max(height_after, self.max_height)
|
||||||
|
|
||||||
has_floor_contact = self._is_floor_foot_contact() if not self.contact_with_floor else False
|
has_floor_contact = self._is_floor_foot_contact() if not self.contact_with_floor else False
|
||||||
@ -111,7 +118,8 @@ class HopperJumpEnv(HopperEnv):
|
|||||||
return observation, reward, done, info
|
return observation, reward, done, info
|
||||||
|
|
||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
goal_dist = self.data.get_site_xpos('foot_site') - self.goal
|
# goal_dist = self.data.get_site_xpos('foot_site') - self.goal
|
||||||
|
goal_dist = self.data.site('foot_site').xpos - self.goal
|
||||||
return np.concatenate((super(HopperJumpEnv, self)._get_obs(), goal_dist.copy(), self.goal[:1]))
|
return np.concatenate((super(HopperJumpEnv, self)._get_obs(), goal_dist.copy(), self.goal[:1]))
|
||||||
|
|
||||||
def reset_model(self):
|
def reset_model(self):
|
||||||
@ -119,7 +127,8 @@ class HopperJumpEnv(HopperEnv):
|
|||||||
|
|
||||||
# self.goal = self.np_random.uniform(0.3, 1.35, 1)[0]
|
# self.goal = self.np_random.uniform(0.3, 1.35, 1)[0]
|
||||||
self.goal = np.concatenate([self.np_random.uniform(0.3, 1.35, 1), np.zeros(2, )])
|
self.goal = np.concatenate([self.np_random.uniform(0.3, 1.35, 1), np.zeros(2, )])
|
||||||
self.sim.model.body_pos[self.sim.model.body_name2id('goal_site_body')] = self.goal
|
# self.sim.model.body_pos[self.sim.model.body_name2id('goal_site_body')] = self.goal
|
||||||
|
self.data.body('goal_site_body').xpos[:] = np.copy(self.goal)
|
||||||
self.max_height = 0
|
self.max_height = 0
|
||||||
self._steps = 0
|
self._steps = 0
|
||||||
|
|
||||||
@ -153,8 +162,15 @@ class HopperJumpEnv(HopperEnv):
|
|||||||
return observation
|
return observation
|
||||||
|
|
||||||
def _is_floor_foot_contact(self):
|
def _is_floor_foot_contact(self):
|
||||||
floor_geom_id = self.model.geom_name2id('floor')
|
# floor_geom_id = self.model.geom_name2id('floor')
|
||||||
foot_geom_id = self.model.geom_name2id('foot_geom')
|
# foot_geom_id = self.model.geom_name2id('foot_geom')
|
||||||
|
# TODO: do this properly over a sensor in the xml file, see dmc hopper
|
||||||
|
floor_geom_id = self._mujoco_bindings.mj_name2id(self.model,
|
||||||
|
self._mujoco_bindings.mjtObj.mjOBJ_GEOM,
|
||||||
|
'floor')
|
||||||
|
foot_geom_id = self._mujoco_bindings.mj_name2id(self.model,
|
||||||
|
self._mujoco_bindings.mjtObj.mjOBJ_GEOM,
|
||||||
|
'foot_geom')
|
||||||
for i in range(self.data.ncon):
|
for i in range(self.data.ncon):
|
||||||
contact = self.data.contact[i]
|
contact = self.data.contact[i]
|
||||||
collision = contact.geom1 == floor_geom_id and contact.geom2 == foot_geom_id
|
collision = contact.geom1 == floor_geom_id and contact.geom2 == foot_geom_id
|
||||||
@ -163,7 +179,7 @@ class HopperJumpEnv(HopperEnv):
|
|||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
# TODO is that needed? if so test it
|
||||||
class HopperJumpStepEnv(HopperJumpEnv):
|
class HopperJumpStepEnv(HopperJumpEnv):
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
@ -193,7 +209,7 @@ class HopperJumpStepEnv(HopperJumpEnv):
|
|||||||
self.do_simulation(action, self.frame_skip)
|
self.do_simulation(action, self.frame_skip)
|
||||||
|
|
||||||
height_after = self.get_body_com("torso")[2]
|
height_after = self.get_body_com("torso")[2]
|
||||||
site_pos_after = self.data.get_site_xpos('foot_site')
|
site_pos_after = self.data.site('foot_site').xpos.copy()
|
||||||
self.max_height = max(height_after, self.max_height)
|
self.max_height = max(height_after, self.max_height)
|
||||||
|
|
||||||
ctrl_cost = self.control_cost(action)
|
ctrl_cost = self.control_cost(action)
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
from gym.envs.mujoco.hopper_v3 import HopperEnv
|
from gym.envs.mujoco.hopper_v4 import HopperEnv
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import os
|
import os
|
||||||
@ -46,7 +46,7 @@ class HopperJumpOnBoxEnv(HopperEnv):
|
|||||||
foot_pos = self.get_body_com("foot")
|
foot_pos = self.get_body_com("foot")
|
||||||
self.max_height = max(height_after, self.max_height)
|
self.max_height = max(height_after, self.max_height)
|
||||||
|
|
||||||
vx, vz, vangle = self.sim.data.qvel[0:3]
|
vx, vz, vangle = self.data.qvel[0:3]
|
||||||
|
|
||||||
s = self.state_vector()
|
s = self.state_vector()
|
||||||
fell_over = not (np.isfinite(s).all() and (np.abs(s[2:]) < 100).all() and (height_after > .7))
|
fell_over = not (np.isfinite(s).all() and (np.abs(s[2:]) < 100).all() and (height_after > .7))
|
||||||
@ -67,7 +67,8 @@ class HopperJumpOnBoxEnv(HopperEnv):
|
|||||||
is_on_box_y = True # is y always true because he can only move in x and z direction?
|
is_on_box_y = True # is y always true because he can only move in x and z direction?
|
||||||
is_on_box_z = box_height - 0.02 <= foot_pos[2] <= box_height + 0.02
|
is_on_box_z = box_height - 0.02 <= foot_pos[2] <= box_height + 0.02
|
||||||
is_on_box = is_on_box_x and is_on_box_y and is_on_box_z
|
is_on_box = is_on_box_x and is_on_box_y and is_on_box_z
|
||||||
if is_on_box: self.hopper_on_box = True
|
if is_on_box:
|
||||||
|
self.hopper_on_box = True
|
||||||
|
|
||||||
ctrl_cost = self.control_cost(action)
|
ctrl_cost = self.control_cost(action)
|
||||||
|
|
||||||
@ -133,9 +134,8 @@ class HopperJumpOnBoxEnv(HopperEnv):
|
|||||||
self.current_step = 0
|
self.current_step = 0
|
||||||
self.hopper_on_box = False
|
self.hopper_on_box = False
|
||||||
if self.context:
|
if self.context:
|
||||||
box_id = self.sim.model.body_name2id("box")
|
|
||||||
self.box_x = self.np_random.uniform(1, 3, 1)
|
self.box_x = self.np_random.uniform(1, 3, 1)
|
||||||
self.sim.model.body_pos[box_id] = [self.box_x, 0, 0]
|
self.model.body("box").pos = [self.box_x[0], 0, 0]
|
||||||
return super().reset()
|
return super().reset()
|
||||||
|
|
||||||
# overwrite reset_model to make it deterministic
|
# overwrite reset_model to make it deterministic
|
||||||
|
@ -20,8 +20,8 @@ class MPWrapper(RawInterfaceWrapper):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
return self.sim.data.qpos[3:6].copy()
|
return self.data.qpos[3:6].copy()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
return self.sim.data.qvel[3:6].copy()
|
return self.data.qvel[3:6].copy()
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from gym.envs.mujoco.hopper_v3 import HopperEnv
|
from gym.envs.mujoco.hopper_v4 import HopperEnv
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
MAX_EPISODE_STEPS_HOPPERTHROW = 250
|
MAX_EPISODE_STEPS_HOPPERTHROW = 250
|
||||||
@ -36,9 +36,16 @@ class HopperThrowEnv(HopperEnv):
|
|||||||
self.max_episode_steps = max_episode_steps
|
self.max_episode_steps = max_episode_steps
|
||||||
self.context = context
|
self.context = context
|
||||||
self.goal = 0
|
self.goal = 0
|
||||||
super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
|
super().__init__(xml_file=xml_file,
|
||||||
healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
|
forward_reward_weight=forward_reward_weight,
|
||||||
exclude_current_positions_from_observation)
|
ctrl_cost_weight=ctrl_cost_weight,
|
||||||
|
healthy_reward=healthy_reward,
|
||||||
|
terminate_when_unhealthy=terminate_when_unhealthy,
|
||||||
|
healthy_angle_range=healthy_state_range,
|
||||||
|
healthy_z_range=healthy_z_range,
|
||||||
|
healthy_state_range=healthy_angle_range,
|
||||||
|
reset_noise_scale=reset_noise_scale,
|
||||||
|
exclude_current_positions_from_observation=exclude_current_positions_from_observation)
|
||||||
|
|
||||||
def step(self, action):
|
def step(self, action):
|
||||||
self.current_step += 1
|
self.current_step += 1
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from gym.envs.mujoco.hopper_v3 import HopperEnv
|
from gym.envs.mujoco.hopper_v4 import HopperEnv
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@ -44,9 +44,16 @@ class HopperThrowInBasketEnv(HopperEnv):
|
|||||||
self.penalty = penalty
|
self.penalty = penalty
|
||||||
self.basket_x = 5
|
self.basket_x = 5
|
||||||
xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
|
xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
|
||||||
super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
|
super().__init__(xml_file=xml_file,
|
||||||
healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
|
forward_reward_weight=forward_reward_weight,
|
||||||
exclude_current_positions_from_observation)
|
ctrl_cost_weight=ctrl_cost_weight,
|
||||||
|
healthy_reward=healthy_reward,
|
||||||
|
terminate_when_unhealthy=terminate_when_unhealthy,
|
||||||
|
healthy_state_range=healthy_state_range,
|
||||||
|
healthy_z_range=healthy_z_range,
|
||||||
|
healthy_angle_range=healthy_angle_range,
|
||||||
|
reset_noise_scale=reset_noise_scale,
|
||||||
|
exclude_current_positions_from_observation=exclude_current_positions_from_observation)
|
||||||
|
|
||||||
def step(self, action):
|
def step(self, action):
|
||||||
|
|
||||||
@ -109,13 +116,13 @@ class HopperThrowInBasketEnv(HopperEnv):
|
|||||||
self.current_step = 0
|
self.current_step = 0
|
||||||
self.ball_in_basket = False
|
self.ball_in_basket = False
|
||||||
if self.context:
|
if self.context:
|
||||||
basket_id = self.sim.model.body_name2id("basket_ground")
|
self.basket_x = self.np_random.uniform(low=3, high=7, size=1)
|
||||||
self.basket_x = self.np_random.uniform(3, 7, 1)
|
self.model.body("basket_ground").pos[:] = [self.basket_x[0], 0, 0]
|
||||||
self.sim.model.body_pos[basket_id] = [self.basket_x, 0, 0]
|
|
||||||
return super().reset()
|
return super().reset()
|
||||||
|
|
||||||
# overwrite reset_model to make it deterministic
|
# overwrite reset_model to make it deterministic
|
||||||
def reset_model(self):
|
def reset_model(self):
|
||||||
|
# Todo remove if not needed!
|
||||||
noise_low = -self._reset_noise_scale
|
noise_low = -self._reset_noise_scale
|
||||||
noise_high = self._reset_noise_scale
|
noise_high = self._reset_noise_scale
|
||||||
|
|
||||||
|
@ -16,8 +16,8 @@ class MPWrapper(RawInterfaceWrapper):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def current_pos(self) -> Union[float, int, np.ndarray]:
|
def current_pos(self) -> Union[float, int, np.ndarray]:
|
||||||
return self.env.data.qpos[3:6].copy()
|
return self.data.qpos[3:6].copy()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
return self.env.data.qvel[3:6].copy()
|
return self.data.qvel[3:6].copy()
|
||||||
|
@ -10,18 +10,18 @@ class MPWrapper(RawInterfaceWrapper):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def context_mask(self):
|
def context_mask(self):
|
||||||
return np.concatenate([[False] * self.env.n_links, # cos
|
return np.concatenate([[False] * self.n_links, # cos
|
||||||
[False] * self.env.n_links, # sin
|
[False] * self.n_links, # sin
|
||||||
[True] * 2, # goal position
|
[True] * 2, # goal position
|
||||||
[False] * self.env.n_links, # angular velocity
|
[False] * self.n_links, # angular velocity
|
||||||
[False] * 3, # goal distance
|
[False] * 3, # goal distance
|
||||||
# [False], # step
|
# [False], # step
|
||||||
])
|
])
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
return self.env.data.qpos.flat[:self.env.n_links]
|
return self.data.qpos.flat[:self.n_links]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
return self.env.data.qvel.flat[:self.env.n_links]
|
return self.data.qvel.flat[:self.n_links]
|
||||||
|
@ -23,7 +23,10 @@ class ReacherEnv(MujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
file_name = f'reacher_{n_links}links.xml'
|
file_name = f'reacher_{n_links}links.xml'
|
||||||
|
|
||||||
MujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), "assets", file_name), 2)
|
MujocoEnv.__init__(self,
|
||||||
|
model_path=os.path.join(os.path.dirname(__file__), "assets", file_name),
|
||||||
|
frame_skip=2,
|
||||||
|
mujoco_bindings="mujoco")
|
||||||
|
|
||||||
def step(self, action):
|
def step(self, action):
|
||||||
self._steps += 1
|
self._steps += 1
|
||||||
|
@ -16,8 +16,8 @@ class MPWrapper(RawInterfaceWrapper):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def current_pos(self) -> Union[float, int, np.ndarray]:
|
def current_pos(self) -> Union[float, int, np.ndarray]:
|
||||||
return self.env.data.qpos[3:9].copy()
|
return self.data.qpos[3:9].copy()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
return self.env.data.qvel[3:9].copy()
|
return self.data.qvel[3:9].copy()
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from gym.envs.mujoco.walker2d_v3 import Walker2dEnv
|
from gym.envs.mujoco.walker2d_v4 import Walker2dEnv
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
MAX_EPISODE_STEPS_WALKERJUMP = 300
|
MAX_EPISODE_STEPS_WALKERJUMP = 300
|
||||||
@ -36,9 +36,15 @@ class Walker2dJumpEnv(Walker2dEnv):
|
|||||||
self._penalty = penalty
|
self._penalty = penalty
|
||||||
self.goal = 0
|
self.goal = 0
|
||||||
xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
|
xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
|
||||||
super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
|
super().__init__(xml_file=xml_file,
|
||||||
healthy_z_range, healthy_angle_range, reset_noise_scale,
|
forward_reward_weight=forward_reward_weight,
|
||||||
exclude_current_positions_from_observation)
|
ctrl_cost_weight=ctrl_cost_weight,
|
||||||
|
healthy_reward=healthy_reward,
|
||||||
|
terminate_when_unhealthy=terminate_when_unhealthy,
|
||||||
|
healthy_z_range=healthy_z_range,
|
||||||
|
healthy_angle_range=healthy_angle_range,
|
||||||
|
reset_noise_scale=reset_noise_scale,
|
||||||
|
exclude_current_positions_from_observation=exclude_current_positions_from_observation)
|
||||||
|
|
||||||
def step(self, action):
|
def step(self, action):
|
||||||
self.current_step += 1
|
self.current_step += 1
|
||||||
|
1
setup.py
1
setup.py
@ -28,7 +28,6 @@ setup(
|
|||||||
extras_require=extras,
|
extras_require=extras,
|
||||||
install_requires=[
|
install_requires=[
|
||||||
'gym>=0.24.0',
|
'gym>=0.24.0',
|
||||||
"mujoco_py<2.2,>=2.1",
|
|
||||||
],
|
],
|
||||||
packages=[package for package in find_packages() if package.startswith("alr_envs")],
|
packages=[package for package in find_packages() if package.startswith("alr_envs")],
|
||||||
# packages=['alr_envs', 'alr_envs.envs', 'alr_envs.open_ai', 'alr_envs.dmc', 'alr_envs.meta', 'alr_envs.utils'],
|
# packages=['alr_envs', 'alr_envs.envs', 'alr_envs.open_ai', 'alr_envs.dmc', 'alr_envs.meta', 'alr_envs.utils'],
|
||||||
|
Loading…
Reference in New Issue
Block a user