From b04e7c1dcd813e1ad5238a38f54a05fe1969811e Mon Sep 17 00:00:00 2001 From: philipp Date: Wed, 13 Jul 2022 13:28:39 +0200 Subject: [PATCH] removed dependency on mujoco-py --- alr_envs/envs/mujoco/ant_jump/ant_jump.py | 15 +++++-- alr_envs/envs/mujoco/ant_jump/mp_wrapper.py | 4 +- .../assets/beerpong_wo_cup_big_table.xml | 5 +++ alr_envs/envs/mujoco/beerpong/beerpong.py | 39 +++++++++--------- alr_envs/envs/mujoco/beerpong/mp_wrapper.py | 10 ++--- .../half_cheetah_jump/half_cheetah_jump.py | 10 +++-- .../mujoco/half_cheetah_jump/mp_wrapper.py | 4 +- .../envs/mujoco/hopper_jump/hopper_jump.py | 40 +++++++++++++------ .../mujoco/hopper_jump/hopper_jump_on_box.py | 10 ++--- .../envs/mujoco/hopper_jump/mp_wrapper.py | 4 +- .../envs/mujoco/hopper_throw/hopper_throw.py | 15 +++++-- .../hopper_throw/hopper_throw_in_basket.py | 21 ++++++---- .../envs/mujoco/hopper_throw/mp_wrapper.py | 4 +- alr_envs/envs/mujoco/reacher/mp_wrapper.py | 10 ++--- alr_envs/envs/mujoco/reacher/reacher.py | 5 ++- .../envs/mujoco/walker_2d_jump/mp_wrapper.py | 4 +- .../mujoco/walker_2d_jump/walker_2d_jump.py | 14 +++++-- 17 files changed, 134 insertions(+), 80 deletions(-) diff --git a/alr_envs/envs/mujoco/ant_jump/ant_jump.py b/alr_envs/envs/mujoco/ant_jump/ant_jump.py index 74a66a3..7c00225 100644 --- a/alr_envs/envs/mujoco/ant_jump/ant_jump.py +++ b/alr_envs/envs/mujoco/ant_jump/ant_jump.py @@ -2,7 +2,7 @@ from typing import Tuple, Union, Optional import numpy as np from gym.core import ObsType -from gym.envs.mujoco.ant_v3 import AntEnv +from gym.envs.mujoco.ant_v4 import AntEnv MAX_EPISODE_STEPS_ANTJUMP = 200 @@ -36,9 +36,15 @@ class AntJumpEnv(AntEnv): self.current_step = 0 self.max_height = 0 self.goal = 0 - super().__init__(xml_file, ctrl_cost_weight, contact_cost_weight, healthy_reward, terminate_when_unhealthy, - healthy_z_range, contact_force_range, reset_noise_scale, - exclude_current_positions_from_observation) + super().__init__(xml_file=xml_file, + ctrl_cost_weight=ctrl_cost_weight, + contact_cost_weight=contact_cost_weight, + healthy_reward=healthy_reward, + terminate_when_unhealthy=terminate_when_unhealthy, + healthy_z_range=healthy_z_range, + contact_force_range=contact_force_range, + reset_noise_scale=reset_noise_scale, + exclude_current_positions_from_observation=exclude_current_positions_from_observation) def step(self, action): self.current_step += 1 @@ -90,6 +96,7 @@ class AntJumpEnv(AntEnv): # reset_model had to be implemented in every env to make it deterministic def reset_model(self): + # Todo remove if not needed noise_low = -self._reset_noise_scale noise_high = self._reset_noise_scale diff --git a/alr_envs/envs/mujoco/ant_jump/mp_wrapper.py b/alr_envs/envs/mujoco/ant_jump/mp_wrapper.py index a481d6f..c1dfbde 100644 --- a/alr_envs/envs/mujoco/ant_jump/mp_wrapper.py +++ b/alr_envs/envs/mujoco/ant_jump/mp_wrapper.py @@ -16,8 +16,8 @@ class MPWrapper(RawInterfaceWrapper): @property def current_pos(self) -> Union[float, int, np.ndarray]: - return self.env.sim.data.qpos[7:15].copy() + return self.data.qpos[7:15].copy() @property def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: - return self.env.sim.data.qvel[6:14].copy() + return self.data.qvel[6:14].copy() diff --git a/alr_envs/envs/mujoco/beerpong/assets/beerpong_wo_cup_big_table.xml b/alr_envs/envs/mujoco/beerpong/assets/beerpong_wo_cup_big_table.xml index 756e3d2..4497818 100644 --- a/alr_envs/envs/mujoco/beerpong/assets/beerpong_wo_cup_big_table.xml +++ b/alr_envs/envs/mujoco/beerpong/assets/beerpong_wo_cup_big_table.xml @@ -185,4 +185,9 @@ + + + + + diff --git a/alr_envs/envs/mujoco/beerpong/beerpong.py b/alr_envs/envs/mujoco/beerpong/beerpong.py index b49c180..60d442a 100644 --- a/alr_envs/envs/mujoco/beerpong/beerpong.py +++ b/alr_envs/envs/mujoco/beerpong/beerpong.py @@ -1,7 +1,6 @@ import os from typing import Optional -import mujoco_py.builder import numpy as np from gym import utils from gym.envs.mujoco import MujocoEnv @@ -50,9 +49,9 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle): self.repeat_action = frame_skip # TODO: If accessing IDs is easier in the (new) official mujoco bindings, remove this self.model = None - self.site_id = lambda x: self.model.site_name2id(x) - self.body_id = lambda x: self.model.body_name2id(x) - self.geom_id = lambda x: self.model.geom_name2id(x) + self.geom_id = lambda x: self._mujoco_bindings.mj_name2id(self.model, + self._mujoco_bindings.mjtObj.mjOBJ_GEOM, + x) # for reward calculation self.dists = [] @@ -65,7 +64,7 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle): self.ball_in_cup = False self.dist_ground_cup = -1 # distance floor to cup if first floor contact - MujocoEnv.__init__(self, self.xml_path, frame_skip=1, mujoco_bindings="mujoco_py") + MujocoEnv.__init__(self, model_path=self.xml_path, frame_skip=1, mujoco_bindings="mujoco") utils.EzPickle.__init__(self) @property @@ -100,13 +99,13 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle): # TODO: Ask Max why we need to set the state twice. self.set_state(start_pos, init_vel) - start_pos[7::] = self.data.site_xpos[self.site_id("init_ball_pos"), :].copy() + start_pos[7::] = self.data.site("init_ball_pos").xpos.copy() self.set_state(start_pos, init_vel) xy = self.np_random.uniform(self._cup_pos_min, self._cup_pos_max) xyz = np.zeros(3) xyz[:2] = xy xyz[-1] = 0.840 - self.model.body_pos[self.body_id("cup_table")] = xyz + self.model.body("cup_table").pos[:] = xyz return self._get_obs() def step(self, a): @@ -117,10 +116,10 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle): self.do_simulation(applied_action, self.frame_skip) # self.reward_function.check_contacts(self.sim) # I assume this is not important? if self._steps < self.release_step: - self.data.qpos[7::] = self.data.site_xpos[self.site_id("init_ball_pos"), :].copy() - self.data.qvel[7::] = self.data.site_xvelp[self.site_id("init_ball_pos"), :].copy() + self.data.qpos[7::] = self.data.site('init_ball_pos').xpos.copy() + self.data.qvel[7::] = self.data.sensor('init_ball_vel').data.copy() crash = False - except mujoco_py.builder.MujocoException: + except Exception as e: crash = True ob = self._get_obs() @@ -145,18 +144,18 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle): return ob, reward, done, infos def _get_obs(self): - theta = self.data.qpos.flat[:7] - theta_dot = self.data.qvel.flat[:7] - ball_pos = self.data.get_body_xpos("ball").copy() - cup_goal_diff_final = ball_pos - self.data.get_site_xpos("cup_goal_final_table").copy() - cup_goal_diff_top = ball_pos - self.data.get_site_xpos("cup_goal_table").copy() + theta = self.data.qpos.flat[:7].copy() + theta_dot = self.data.qvel.flat[:7].copy() + ball_pos = self.data.qpos.flat[7:].copy() + cup_goal_diff_final = ball_pos - self.data.site("cup_goal_final_table").xpos.copy() + cup_goal_diff_top = ball_pos - self.data.site("cup_goal_table").xpos.copy() return np.concatenate([ np.cos(theta), np.sin(theta), theta_dot, cup_goal_diff_final, cup_goal_diff_top, - self.model.body_pos[self.body_id("cup_table")][:2].copy(), + self.model.body("cup_table").pos[:2].copy(), # [self._steps], # Use TimeAwareObservation Wrapper instead .... ]) @@ -165,10 +164,10 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle): return super(BeerPongEnv, self).dt * self.repeat_action def _get_reward(self, action): - goal_pos = self.data.get_site_xpos("cup_goal_table") - ball_pos = self.data.get_body_xpos("ball") - ball_vel = self.data.get_body_xvelp("ball") - goal_final_pos = self.data.get_site_xpos("cup_goal_final_table") + goal_pos = self.data.site("cup_goal_table").xpos + goal_final_pos = self.data.site("cup_goal_final_table").xpos + ball_pos = self.data.qpos.flat[7:].copy() + ball_vel = self.data.qvel.flat[7:].copy() self._check_contacts() self.dists.append(np.linalg.norm(goal_pos - ball_pos)) diff --git a/alr_envs/envs/mujoco/beerpong/mp_wrapper.py b/alr_envs/envs/mujoco/beerpong/mp_wrapper.py index 5b53e77..22c50f7 100644 --- a/alr_envs/envs/mujoco/beerpong/mp_wrapper.py +++ b/alr_envs/envs/mujoco/beerpong/mp_wrapper.py @@ -21,16 +21,16 @@ class MPWrapper(RawInterfaceWrapper): @property def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: - return self.env.data.qpos[0:7].copy() + return self.data.qpos[0:7].copy() @property def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: - return self.env.data.qvel[0:7].copy() + return self.data.qvel[0:7].copy() # TODO: Fix this def _episode_callback(self, action: np.ndarray, mp) -> Tuple[np.ndarray, Union[np.ndarray, None]]: if mp.learn_tau: - self.env.env.release_step = action[0] / self.env.dt # Tau value + self.release_step = action[0] / self.dt # Tau value return action, None else: return action, None @@ -39,5 +39,5 @@ class MPWrapper(RawInterfaceWrapper): xyz = np.zeros(3) xyz[:2] = context xyz[-1] = 0.840 - self.env.env.model.body_pos[self.env.env.cup_table_id] = xyz - return self.get_observation_from_step(self.env.env._get_obs()) + self.model.body_pos[self.cup_table_id] = xyz + return self.get_observation_from_step(self.get_obs()) diff --git a/alr_envs/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py b/alr_envs/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py index 7916d0c..3d948ac 100644 --- a/alr_envs/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py +++ b/alr_envs/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py @@ -2,7 +2,7 @@ import os from typing import Tuple, Union, Optional from gym.core import ObsType -from gym.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv +from gym.envs.mujoco.half_cheetah_v4 import HalfCheetahEnv import numpy as np MAX_EPISODE_STEPS_HALFCHEETAHJUMP = 100 @@ -27,8 +27,11 @@ class HalfCheetahJumpEnv(HalfCheetahEnv): self.goal = 0 self.context = context xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file) - super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, reset_noise_scale, - exclude_current_positions_from_observation) + super().__init__(xml_file=xml_file, + forward_reward_weight=forward_reward_weight, + ctrl_cost_weight=ctrl_cost_weight, + reset_noise_scale=reset_noise_scale, + exclude_current_positions_from_observation=exclude_current_positions_from_observation) def step(self, action): @@ -74,6 +77,7 @@ class HalfCheetahJumpEnv(HalfCheetahEnv): # overwrite reset_model to make it deterministic def reset_model(self): + # TODO remove if not needed! noise_low = -self._reset_noise_scale noise_high = self._reset_noise_scale diff --git a/alr_envs/envs/mujoco/half_cheetah_jump/mp_wrapper.py b/alr_envs/envs/mujoco/half_cheetah_jump/mp_wrapper.py index 298bf53..9681b3a 100644 --- a/alr_envs/envs/mujoco/half_cheetah_jump/mp_wrapper.py +++ b/alr_envs/envs/mujoco/half_cheetah_jump/mp_wrapper.py @@ -15,8 +15,8 @@ class MPWrapper(RawInterfaceWrapper): @property def current_pos(self) -> Union[float, int, np.ndarray]: - return self.env.sim.data.qpos[3:9].copy() + return self.data.qpos[3:9].copy() @property def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: - return self.env.sim.data.qvel[3:9].copy() + return self.data.qvel[3:9].copy() diff --git a/alr_envs/envs/mujoco/hopper_jump/hopper_jump.py b/alr_envs/envs/mujoco/hopper_jump/hopper_jump.py index 74643f3..5a334a2 100644 --- a/alr_envs/envs/mujoco/hopper_jump/hopper_jump.py +++ b/alr_envs/envs/mujoco/hopper_jump/hopper_jump.py @@ -2,8 +2,7 @@ import copy import os import numpy as np -from gym.envs.mujoco.hopper_v3 import HopperEnv - +from gym.envs.mujoco.hopper_v4 import HopperEnv MAX_EPISODE_STEPS_HOPPERJUMP = 250 @@ -50,9 +49,16 @@ class HopperJumpEnv(HopperEnv): self.contact_dist = None xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file) - super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy, - healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale, - exclude_current_positions_from_observation) + super().__init__(xml_file=xml_file, + forward_reward_weight=forward_reward_weight, + ctrl_cost_weight=ctrl_cost_weight, + healthy_reward=healthy_reward, + terminate_when_unhealthy=terminate_when_unhealthy, + healthy_state_range=healthy_state_range, + healthy_z_range=healthy_z_range, + healthy_angle_range=healthy_angle_range, + reset_noise_scale=reset_noise_scale, + exclude_current_positions_from_observation=exclude_current_positions_from_observation) # increase initial height self.init_qpos[1] = 1.5 @@ -67,7 +73,8 @@ class HopperJumpEnv(HopperEnv): self.do_simulation(action, self.frame_skip) height_after = self.get_body_com("torso")[2] - site_pos_after = self.data.get_site_xpos('foot_site') + #site_pos_after = self.data.get_site_xpos('foot_site') + site_pos_after = self.data.site('foot_site').xpos self.max_height = max(height_after, self.max_height) has_floor_contact = self._is_floor_foot_contact() if not self.contact_with_floor else False @@ -111,7 +118,8 @@ class HopperJumpEnv(HopperEnv): return observation, reward, done, info def _get_obs(self): - goal_dist = self.data.get_site_xpos('foot_site') - self.goal + # goal_dist = self.data.get_site_xpos('foot_site') - self.goal + goal_dist = self.data.site('foot_site').xpos - self.goal return np.concatenate((super(HopperJumpEnv, self)._get_obs(), goal_dist.copy(), self.goal[:1])) def reset_model(self): @@ -119,7 +127,8 @@ class HopperJumpEnv(HopperEnv): # self.goal = self.np_random.uniform(0.3, 1.35, 1)[0] self.goal = np.concatenate([self.np_random.uniform(0.3, 1.35, 1), np.zeros(2, )]) - self.sim.model.body_pos[self.sim.model.body_name2id('goal_site_body')] = self.goal + # self.sim.model.body_pos[self.sim.model.body_name2id('goal_site_body')] = self.goal + self.data.body('goal_site_body').xpos[:] = np.copy(self.goal) self.max_height = 0 self._steps = 0 @@ -153,8 +162,15 @@ class HopperJumpEnv(HopperEnv): return observation def _is_floor_foot_contact(self): - floor_geom_id = self.model.geom_name2id('floor') - foot_geom_id = self.model.geom_name2id('foot_geom') + # floor_geom_id = self.model.geom_name2id('floor') + # foot_geom_id = self.model.geom_name2id('foot_geom') + # TODO: do this properly over a sensor in the xml file, see dmc hopper + floor_geom_id = self._mujoco_bindings.mj_name2id(self.model, + self._mujoco_bindings.mjtObj.mjOBJ_GEOM, + 'floor') + foot_geom_id = self._mujoco_bindings.mj_name2id(self.model, + self._mujoco_bindings.mjtObj.mjOBJ_GEOM, + 'foot_geom') for i in range(self.data.ncon): contact = self.data.contact[i] collision = contact.geom1 == floor_geom_id and contact.geom2 == foot_geom_id @@ -163,7 +179,7 @@ class HopperJumpEnv(HopperEnv): return True return False - +# TODO is that needed? if so test it class HopperJumpStepEnv(HopperJumpEnv): def __init__(self, @@ -193,7 +209,7 @@ class HopperJumpStepEnv(HopperJumpEnv): self.do_simulation(action, self.frame_skip) height_after = self.get_body_com("torso")[2] - site_pos_after = self.data.get_site_xpos('foot_site') + site_pos_after = self.data.site('foot_site').xpos.copy() self.max_height = max(height_after, self.max_height) ctrl_cost = self.control_cost(action) diff --git a/alr_envs/envs/mujoco/hopper_jump/hopper_jump_on_box.py b/alr_envs/envs/mujoco/hopper_jump/hopper_jump_on_box.py index 845edaa..e184723 100644 --- a/alr_envs/envs/mujoco/hopper_jump/hopper_jump_on_box.py +++ b/alr_envs/envs/mujoco/hopper_jump/hopper_jump_on_box.py @@ -1,4 +1,4 @@ -from gym.envs.mujoco.hopper_v3 import HopperEnv +from gym.envs.mujoco.hopper_v4 import HopperEnv import numpy as np import os @@ -46,7 +46,7 @@ class HopperJumpOnBoxEnv(HopperEnv): foot_pos = self.get_body_com("foot") self.max_height = max(height_after, self.max_height) - vx, vz, vangle = self.sim.data.qvel[0:3] + vx, vz, vangle = self.data.qvel[0:3] s = self.state_vector() fell_over = not (np.isfinite(s).all() and (np.abs(s[2:]) < 100).all() and (height_after > .7)) @@ -67,7 +67,8 @@ class HopperJumpOnBoxEnv(HopperEnv): is_on_box_y = True # is y always true because he can only move in x and z direction? is_on_box_z = box_height - 0.02 <= foot_pos[2] <= box_height + 0.02 is_on_box = is_on_box_x and is_on_box_y and is_on_box_z - if is_on_box: self.hopper_on_box = True + if is_on_box: + self.hopper_on_box = True ctrl_cost = self.control_cost(action) @@ -133,9 +134,8 @@ class HopperJumpOnBoxEnv(HopperEnv): self.current_step = 0 self.hopper_on_box = False if self.context: - box_id = self.sim.model.body_name2id("box") self.box_x = self.np_random.uniform(1, 3, 1) - self.sim.model.body_pos[box_id] = [self.box_x, 0, 0] + self.model.body("box").pos = [self.box_x[0], 0, 0] return super().reset() # overwrite reset_model to make it deterministic diff --git a/alr_envs/envs/mujoco/hopper_jump/mp_wrapper.py b/alr_envs/envs/mujoco/hopper_jump/mp_wrapper.py index 8e91bea..0f5e8a8 100644 --- a/alr_envs/envs/mujoco/hopper_jump/mp_wrapper.py +++ b/alr_envs/envs/mujoco/hopper_jump/mp_wrapper.py @@ -20,8 +20,8 @@ class MPWrapper(RawInterfaceWrapper): @property def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: - return self.sim.data.qpos[3:6].copy() + return self.data.qpos[3:6].copy() @property def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: - return self.sim.data.qvel[3:6].copy() + return self.data.qvel[3:6].copy() diff --git a/alr_envs/envs/mujoco/hopper_throw/hopper_throw.py b/alr_envs/envs/mujoco/hopper_throw/hopper_throw.py index c2bda19..3b2ce46 100644 --- a/alr_envs/envs/mujoco/hopper_throw/hopper_throw.py +++ b/alr_envs/envs/mujoco/hopper_throw/hopper_throw.py @@ -1,7 +1,7 @@ import os from typing import Optional -from gym.envs.mujoco.hopper_v3 import HopperEnv +from gym.envs.mujoco.hopper_v4 import HopperEnv import numpy as np MAX_EPISODE_STEPS_HOPPERTHROW = 250 @@ -36,9 +36,16 @@ class HopperThrowEnv(HopperEnv): self.max_episode_steps = max_episode_steps self.context = context self.goal = 0 - super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy, - healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale, - exclude_current_positions_from_observation) + super().__init__(xml_file=xml_file, + forward_reward_weight=forward_reward_weight, + ctrl_cost_weight=ctrl_cost_weight, + healthy_reward=healthy_reward, + terminate_when_unhealthy=terminate_when_unhealthy, + healthy_angle_range=healthy_state_range, + healthy_z_range=healthy_z_range, + healthy_state_range=healthy_angle_range, + reset_noise_scale=reset_noise_scale, + exclude_current_positions_from_observation=exclude_current_positions_from_observation) def step(self, action): self.current_step += 1 diff --git a/alr_envs/envs/mujoco/hopper_throw/hopper_throw_in_basket.py b/alr_envs/envs/mujoco/hopper_throw/hopper_throw_in_basket.py index b1fa3ef..fb46809 100644 --- a/alr_envs/envs/mujoco/hopper_throw/hopper_throw_in_basket.py +++ b/alr_envs/envs/mujoco/hopper_throw/hopper_throw_in_basket.py @@ -1,7 +1,7 @@ import os from typing import Optional -from gym.envs.mujoco.hopper_v3 import HopperEnv +from gym.envs.mujoco.hopper_v4 import HopperEnv import numpy as np @@ -44,9 +44,16 @@ class HopperThrowInBasketEnv(HopperEnv): self.penalty = penalty self.basket_x = 5 xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file) - super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy, - healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale, - exclude_current_positions_from_observation) + super().__init__(xml_file=xml_file, + forward_reward_weight=forward_reward_weight, + ctrl_cost_weight=ctrl_cost_weight, + healthy_reward=healthy_reward, + terminate_when_unhealthy=terminate_when_unhealthy, + healthy_state_range=healthy_state_range, + healthy_z_range=healthy_z_range, + healthy_angle_range=healthy_angle_range, + reset_noise_scale=reset_noise_scale, + exclude_current_positions_from_observation=exclude_current_positions_from_observation) def step(self, action): @@ -109,13 +116,13 @@ class HopperThrowInBasketEnv(HopperEnv): self.current_step = 0 self.ball_in_basket = False if self.context: - basket_id = self.sim.model.body_name2id("basket_ground") - self.basket_x = self.np_random.uniform(3, 7, 1) - self.sim.model.body_pos[basket_id] = [self.basket_x, 0, 0] + self.basket_x = self.np_random.uniform(low=3, high=7, size=1) + self.model.body("basket_ground").pos[:] = [self.basket_x[0], 0, 0] return super().reset() # overwrite reset_model to make it deterministic def reset_model(self): + # Todo remove if not needed! noise_low = -self._reset_noise_scale noise_high = self._reset_noise_scale diff --git a/alr_envs/envs/mujoco/hopper_throw/mp_wrapper.py b/alr_envs/envs/mujoco/hopper_throw/mp_wrapper.py index 78c2f51..247668f 100644 --- a/alr_envs/envs/mujoco/hopper_throw/mp_wrapper.py +++ b/alr_envs/envs/mujoco/hopper_throw/mp_wrapper.py @@ -16,8 +16,8 @@ class MPWrapper(RawInterfaceWrapper): @property def current_pos(self) -> Union[float, int, np.ndarray]: - return self.env.data.qpos[3:6].copy() + return self.data.qpos[3:6].copy() @property def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: - return self.env.data.qvel[3:6].copy() + return self.data.qvel[3:6].copy() diff --git a/alr_envs/envs/mujoco/reacher/mp_wrapper.py b/alr_envs/envs/mujoco/reacher/mp_wrapper.py index 2ef7c78..1c1e2f1 100644 --- a/alr_envs/envs/mujoco/reacher/mp_wrapper.py +++ b/alr_envs/envs/mujoco/reacher/mp_wrapper.py @@ -10,18 +10,18 @@ class MPWrapper(RawInterfaceWrapper): @property def context_mask(self): - return np.concatenate([[False] * self.env.n_links, # cos - [False] * self.env.n_links, # sin + return np.concatenate([[False] * self.n_links, # cos + [False] * self.n_links, # sin [True] * 2, # goal position - [False] * self.env.n_links, # angular velocity + [False] * self.n_links, # angular velocity [False] * 3, # goal distance # [False], # step ]) @property def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: - return self.env.data.qpos.flat[:self.env.n_links] + return self.data.qpos.flat[:self.n_links] @property def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: - return self.env.data.qvel.flat[:self.env.n_links] + return self.data.qvel.flat[:self.n_links] diff --git a/alr_envs/envs/mujoco/reacher/reacher.py b/alr_envs/envs/mujoco/reacher/reacher.py index 3ebc6e8..3d52be7 100644 --- a/alr_envs/envs/mujoco/reacher/reacher.py +++ b/alr_envs/envs/mujoco/reacher/reacher.py @@ -23,7 +23,10 @@ class ReacherEnv(MujocoEnv, utils.EzPickle): file_name = f'reacher_{n_links}links.xml' - MujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), "assets", file_name), 2) + MujocoEnv.__init__(self, + model_path=os.path.join(os.path.dirname(__file__), "assets", file_name), + frame_skip=2, + mujoco_bindings="mujoco") def step(self, action): self._steps += 1 diff --git a/alr_envs/envs/mujoco/walker_2d_jump/mp_wrapper.py b/alr_envs/envs/mujoco/walker_2d_jump/mp_wrapper.py index 63432a7..f91d494 100644 --- a/alr_envs/envs/mujoco/walker_2d_jump/mp_wrapper.py +++ b/alr_envs/envs/mujoco/walker_2d_jump/mp_wrapper.py @@ -16,8 +16,8 @@ class MPWrapper(RawInterfaceWrapper): @property def current_pos(self) -> Union[float, int, np.ndarray]: - return self.env.data.qpos[3:9].copy() + return self.data.qpos[3:9].copy() @property def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: - return self.env.data.qvel[3:9].copy() + return self.data.qvel[3:9].copy() diff --git a/alr_envs/envs/mujoco/walker_2d_jump/walker_2d_jump.py b/alr_envs/envs/mujoco/walker_2d_jump/walker_2d_jump.py index 76cb688..dadf8fd 100644 --- a/alr_envs/envs/mujoco/walker_2d_jump/walker_2d_jump.py +++ b/alr_envs/envs/mujoco/walker_2d_jump/walker_2d_jump.py @@ -1,7 +1,7 @@ import os from typing import Optional -from gym.envs.mujoco.walker2d_v3 import Walker2dEnv +from gym.envs.mujoco.walker2d_v4 import Walker2dEnv import numpy as np MAX_EPISODE_STEPS_WALKERJUMP = 300 @@ -36,9 +36,15 @@ class Walker2dJumpEnv(Walker2dEnv): self._penalty = penalty self.goal = 0 xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file) - super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy, - healthy_z_range, healthy_angle_range, reset_noise_scale, - exclude_current_positions_from_observation) + super().__init__(xml_file=xml_file, + forward_reward_weight=forward_reward_weight, + ctrl_cost_weight=ctrl_cost_weight, + healthy_reward=healthy_reward, + terminate_when_unhealthy=terminate_when_unhealthy, + healthy_z_range=healthy_z_range, + healthy_angle_range=healthy_angle_range, + reset_noise_scale=reset_noise_scale, + exclude_current_positions_from_observation=exclude_current_positions_from_observation) def step(self, action): self.current_step += 1