diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index 119d923..4e3b160 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -67,23 +67,29 @@ class BlackBoxWrapper(gym.ObservationWrapper): def observation(self, observation): # return context space if we are - obs = observation[self.env.context_mask] if self.return_context_observation else observation + if self.return_context_observation: + observation = observation[self.env.context_mask] # cast dtype because metaworld returns incorrect that throws gym error - return obs.astype(self.observation_space.dtype) + return observation.astype(self.observation_space.dtype) def get_trajectory(self, action: np.ndarray) -> Tuple: clipped_params = np.clip(action, self.traj_gen_action_space.low, self.traj_gen_action_space.high) self.traj_gen.set_params(clipped_params) - # TODO: is this correct for replanning? Do we need to adjust anything here? - self.traj_gen.set_boundary_conditions( - bc_time=np.array(0) if not self.do_replanning else np.array([self.current_traj_steps * self.dt]), - bc_pos=self.current_pos, bc_vel=self.current_vel) - # TODO remove the - self.dt after Bruces fix. - self.traj_gen.set_duration(None if self.learn_sub_trajectories else self.duration - self.dt, self.dt) - traj_dict = self.traj_gen.get_trajs(get_pos=True, get_vel=True) - trajectory_tensor, velocity_tensor = traj_dict['pos'], traj_dict['vel'] + bc_time = np.array(0 if not self.do_replanning else self.current_traj_steps * self.dt) + # TODO we could think about initializing with the previous desired value in order to have a smooth transition + # at least from the planning point of view. + self.traj_gen.set_boundary_conditions(bc_time, self.current_pos, self.current_vel) + duration = None if self.learn_sub_trajectories else self.duration + self.traj_gen.set_duration(duration, self.dt) + # traj_dict = self.traj_gen.get_trajs(get_pos=True, get_vel=True) + trajectory = get_numpy(self.traj_gen.get_traj_pos()) + velocity = get_numpy(self.traj_gen.get_traj_vel()) - return get_numpy(trajectory_tensor), get_numpy(velocity_tensor) + # Remove first element of trajectory as this is the current position and velocity + # trajectory = trajectory[1:] + # velocity = velocity[1:] + + return trajectory, velocity def _get_traj_gen_action_space(self): """This function can be used to set up an individual space for the parameters of the traj_gen.""" @@ -105,13 +111,13 @@ class BlackBoxWrapper(gym.ObservationWrapper): return self._get_traj_gen_action_space() def _get_observation_space(self): - mask = self.env.context_mask - if not self.return_context_observation: + if self.return_context_observation: + mask = self.env.context_mask # return full observation - mask = np.ones_like(mask, dtype=bool) - min_obs_bound = self.env.observation_space.low[mask] - max_obs_bound = self.env.observation_space.high[mask] - return spaces.Box(low=min_obs_bound, high=max_obs_bound, dtype=self.env.observation_space.dtype) + min_obs_bound = self.env.observation_space.low[mask] + max_obs_bound = self.env.observation_space.high[mask] + return spaces.Box(low=min_obs_bound, high=max_obs_bound, dtype=self.env.observation_space.dtype) + return self.env.observation_space def step(self, action: np.ndarray): """ This function generates a trajectory based on a MP and then does the usual loop over reset and step""" @@ -152,7 +158,7 @@ class BlackBoxWrapper(gym.ObservationWrapper): t + 1 + self.current_traj_steps): break - infos.update({k: v[:t + 1] for k, v in infos.items()}) + infos.update({k: v[:t] for k, v in infos.items()}) self.current_traj_steps += t + 1 if self.verbose >= 2: diff --git a/fancy_gym/black_box/raw_interface_wrapper.py b/fancy_gym/black_box/raw_interface_wrapper.py index 7b2e996..02945a1 100644 --- a/fancy_gym/black_box/raw_interface_wrapper.py +++ b/fancy_gym/black_box/raw_interface_wrapper.py @@ -1,4 +1,3 @@ -from abc import abstractmethod from typing import Union, Tuple import gym @@ -23,7 +22,6 @@ class RawInterfaceWrapper(gym.Wrapper): return np.ones(self.env.observation_space.shape[0], dtype=bool) @property - @abstractmethod def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: """ Returns the current position of the action/control dimension. @@ -32,10 +30,9 @@ class RawInterfaceWrapper(gym.Wrapper): it should, however, be implemented regardless. E.g. The joint positions that are directly or indirectly controlled by the action. """ - raise NotImplementedError() + raise NotImplementedError @property - @abstractmethod def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: """ Returns the current velocity of the action/control dimension. @@ -44,7 +41,7 @@ class RawInterfaceWrapper(gym.Wrapper): it should, however, be implemented regardless. E.g. The joint velocities that are directly or indirectly controlled by the action. """ - raise NotImplementedError() + raise NotImplementedError @property def dt(self) -> float: diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index a00570c..9f0299e 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -126,7 +126,7 @@ for _dims in [5, 7]: register( id=f'Reacher{_dims}dSparse-v0', entry_point='fancy_gym.envs.mujoco:ReacherEnv', - max_episode_steps=200, + max_episode_steps=MAX_EPISODE_STEPS_REACHER, kwargs={ "sparse": True, 'reward_weight': 200, diff --git a/fancy_gym/envs/classic_control/base_reacher/base_reacher.py b/fancy_gym/envs/classic_control/base_reacher/base_reacher.py index abfe2ca..f2ba135 100644 --- a/fancy_gym/envs/classic_control/base_reacher/base_reacher.py +++ b/fancy_gym/envs/classic_control/base_reacher/base_reacher.py @@ -1,4 +1,3 @@ -from abc import ABC, abstractmethod from typing import Union, Tuple, Optional import gym @@ -10,7 +9,7 @@ from gym.utils import seeding from fancy_gym.envs.classic_control.utils import intersect -class BaseReacherEnv(gym.Env, ABC): +class BaseReacherEnv(gym.Env): """ Base class for all reaching environments. """ @@ -87,13 +86,6 @@ class BaseReacherEnv(gym.Env, ABC): return self._get_obs().copy() - @abstractmethod - def step(self, action: np.ndarray): - """ - A single step with action in angular velocity space - """ - raise NotImplementedError - def _update_joints(self): """ update joints to get new end-effector position. The other links are only required for rendering. @@ -120,27 +112,24 @@ class BaseReacherEnv(gym.Env, ABC): return True return False - @abstractmethod def _get_reward(self, action: np.ndarray) -> (float, dict): - pass + raise NotImplementedError - @abstractmethod def _get_obs(self) -> np.ndarray: - pass + raise NotImplementedError - @abstractmethod def _check_collisions(self) -> bool: - pass + raise NotImplementedError - @abstractmethod def _terminate(self, info) -> bool: - return False + raise NotImplementedError def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def close(self): + super(BaseReacherEnv, self).close() del self.fig @property diff --git a/fancy_gym/envs/classic_control/base_reacher/base_reacher_direct.py b/fancy_gym/envs/classic_control/base_reacher/base_reacher_direct.py index 0e44033..ab21b39 100644 --- a/fancy_gym/envs/classic_control/base_reacher/base_reacher_direct.py +++ b/fancy_gym/envs/classic_control/base_reacher/base_reacher_direct.py @@ -1,12 +1,10 @@ -from abc import ABC - import numpy as np from gym import spaces from fancy_gym.envs.classic_control.base_reacher.base_reacher import BaseReacherEnv -class BaseReacherDirectEnv(BaseReacherEnv, ABC): +class BaseReacherDirectEnv(BaseReacherEnv): """ Base class for directly controlled reaching environments """ diff --git a/fancy_gym/envs/classic_control/base_reacher/base_reacher_torque.py b/fancy_gym/envs/classic_control/base_reacher/base_reacher_torque.py index 79f3005..7364948 100644 --- a/fancy_gym/envs/classic_control/base_reacher/base_reacher_torque.py +++ b/fancy_gym/envs/classic_control/base_reacher/base_reacher_torque.py @@ -1,12 +1,10 @@ -from abc import ABC - import numpy as np from gym import spaces from fancy_gym.envs.classic_control.base_reacher.base_reacher import BaseReacherEnv -class BaseReacherTorqueEnv(BaseReacherEnv, ABC): +class BaseReacherTorqueEnv(BaseReacherEnv): """ Base class for torque controlled reaching environments """ diff --git a/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py b/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py index 63f40c8..da9ac4d 100644 --- a/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py +++ b/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py @@ -98,7 +98,7 @@ class HopperJumpEnv(HopperEnv): if not self.sparse or (self.sparse and self._steps >= MAX_EPISODE_STEPS_HOPPERJUMP): healthy_reward = self.healthy_reward distance_reward = -goal_dist * self._dist_weight - height_reward = (self.max_height if self.sparse else self.get_body_com("torso")[2]) * self._height_weight + height_reward = (self.max_height if self.sparse else height_after) * self._height_weight contact_reward = -(self.contact_dist or 5) * self._contact_weight rewards = self._forward_reward_weight * (distance_reward + height_reward + contact_reward + healthy_reward) @@ -123,7 +123,7 @@ class HopperJumpEnv(HopperEnv): return np.concatenate((super(HopperJumpEnv, self)._get_obs(), goal_dist.copy(), self.goal[:1])) def reset_model(self): - super(HopperJumpEnv, self).reset_model() + # super(HopperJumpEnv, self).reset_model() # self.goal = self.np_random.uniform(0.3, 1.35, 1)[0] self.goal = np.concatenate([self.np_random.uniform(0.3, 1.35, 1), np.zeros(2, )]) @@ -176,76 +176,76 @@ class HopperJumpEnv(HopperEnv): return True return False -# TODO is that needed? if so test it -class HopperJumpStepEnv(HopperJumpEnv): - - def __init__(self, - xml_file='hopper_jump.xml', - forward_reward_weight=1.0, - ctrl_cost_weight=1e-3, - healthy_reward=1.0, - height_weight=3, - dist_weight=3, - terminate_when_unhealthy=False, - healthy_state_range=(-100.0, 100.0), - healthy_z_range=(0.5, float('inf')), - healthy_angle_range=(-float('inf'), float('inf')), - reset_noise_scale=5e-3, - exclude_current_positions_from_observation=False - ): - - self._height_weight = height_weight - self._dist_weight = dist_weight - super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy, - healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale, - exclude_current_positions_from_observation) - - def step(self, action): - self._steps += 1 - - self.do_simulation(action, self.frame_skip) - - height_after = self.get_body_com("torso")[2] - site_pos_after = self.data.site('foot_site').xpos.copy() - self.max_height = max(height_after, self.max_height) - - ctrl_cost = self.control_cost(action) - healthy_reward = self.healthy_reward - height_reward = self._height_weight * height_after - goal_dist = np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0])) - goal_dist_reward = -self._dist_weight * goal_dist - dist_reward = self._forward_reward_weight * (goal_dist_reward + height_reward) - - rewards = dist_reward + healthy_reward - costs = ctrl_cost - done = False - - # This is only for logging the distance to goal when first having the contact - has_floor_contact = self._is_floor_foot_contact() if not self.contact_with_floor else False - - if not self.init_floor_contact: - self.init_floor_contact = has_floor_contact - if self.init_floor_contact and not self.has_left_floor: - self.has_left_floor = not has_floor_contact - if not self.contact_with_floor and self.has_left_floor: - self.contact_with_floor = has_floor_contact - - if self.contact_dist is None and self.contact_with_floor: - self.contact_dist = goal_dist - - ############################################################## - - observation = self._get_obs() - reward = rewards - costs - info = { - 'height': height_after, - 'x_pos': site_pos_after, - 'max_height': copy.copy(self.max_height), - 'goal': copy.copy(self.goal), - 'goal_dist': goal_dist, - 'height_rew': height_reward, - 'healthy_reward': healthy_reward, - 'healthy': copy.copy(self.is_healthy), - 'contact_dist': copy.copy(self.contact_dist) or 0 - } - return observation, reward, done, info +# # TODO is that needed? if so test it +# class HopperJumpStepEnv(HopperJumpEnv): +# +# def __init__(self, +# xml_file='hopper_jump.xml', +# forward_reward_weight=1.0, +# ctrl_cost_weight=1e-3, +# healthy_reward=1.0, +# height_weight=3, +# dist_weight=3, +# terminate_when_unhealthy=False, +# healthy_state_range=(-100.0, 100.0), +# healthy_z_range=(0.5, float('inf')), +# healthy_angle_range=(-float('inf'), float('inf')), +# reset_noise_scale=5e-3, +# exclude_current_positions_from_observation=False +# ): +# +# self._height_weight = height_weight +# self._dist_weight = dist_weight +# super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy, +# healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale, +# exclude_current_positions_from_observation) +# +# def step(self, action): +# self._steps += 1 +# +# self.do_simulation(action, self.frame_skip) +# +# height_after = self.get_body_com("torso")[2] +# site_pos_after = self.data.site('foot_site').xpos.copy() +# self.max_height = max(height_after, self.max_height) +# +# ctrl_cost = self.control_cost(action) +# healthy_reward = self.healthy_reward +# height_reward = self._height_weight * height_after +# goal_dist = np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0])) +# goal_dist_reward = -self._dist_weight * goal_dist +# dist_reward = self._forward_reward_weight * (goal_dist_reward + height_reward) +# +# rewards = dist_reward + healthy_reward +# costs = ctrl_cost +# done = False +# +# # This is only for logging the distance to goal when first having the contact +# has_floor_contact = self._is_floor_foot_contact() if not self.contact_with_floor else False +# +# if not self.init_floor_contact: +# self.init_floor_contact = has_floor_contact +# if self.init_floor_contact and not self.has_left_floor: +# self.has_left_floor = not has_floor_contact +# if not self.contact_with_floor and self.has_left_floor: +# self.contact_with_floor = has_floor_contact +# +# if self.contact_dist is None and self.contact_with_floor: +# self.contact_dist = goal_dist +# +# ############################################################## +# +# observation = self._get_obs() +# reward = rewards - costs +# info = { +# 'height': height_after, +# 'x_pos': site_pos_after, +# 'max_height': copy.copy(self.max_height), +# 'goal': copy.copy(self.goal), +# 'goal_dist': goal_dist, +# 'height_rew': height_reward, +# 'healthy_reward': healthy_reward, +# 'healthy': copy.copy(self.is_healthy), +# 'contact_dist': copy.copy(self.contact_dist) or 0 +# } +# return observation, reward, done, info diff --git a/fancy_gym/envs/mujoco/hopper_jump/mp_wrapper.py b/fancy_gym/envs/mujoco/hopper_jump/mp_wrapper.py index d46fa92..ed95b3d 100644 --- a/fancy_gym/envs/mujoco/hopper_jump/mp_wrapper.py +++ b/fancy_gym/envs/mujoco/hopper_jump/mp_wrapper.py @@ -14,7 +14,7 @@ class MPWrapper(RawInterfaceWrapper): [False] * (2 + int(not self.exclude_current_positions_from_observation)), # position [True] * 3, # set to true if randomize initial pos [False] * 6, # velocity - [True] * 3, # goal distance + [False] * 3, # goal distance [True] # goal ]) diff --git a/fancy_gym/meta/base_metaworld_mp_wrapper.py b/fancy_gym/meta/base_metaworld_mp_wrapper.py index ae800ff..4029e28 100644 --- a/fancy_gym/meta/base_metaworld_mp_wrapper.py +++ b/fancy_gym/meta/base_metaworld_mp_wrapper.py @@ -1,4 +1,3 @@ -from abc import ABC from typing import Tuple, Union import numpy as np @@ -6,7 +5,7 @@ import numpy as np from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper -class BaseMetaworldMPWrapper(RawInterfaceWrapper, ABC): +class BaseMetaworldMPWrapper(RawInterfaceWrapper): @property def current_pos(self) -> Union[float, int, np.ndarray]: r_close = self.env.data.get_joint_qpos("r_close") diff --git a/fancy_gym/utils/make_env_helpers.py b/fancy_gym/utils/make_env_helpers.py index b0bfe8b..5221423 100644 --- a/fancy_gym/utils/make_env_helpers.py +++ b/fancy_gym/utils/make_env_helpers.py @@ -1,3 +1,4 @@ +import logging import re import uuid from collections.abc import MutableMapping @@ -148,9 +149,9 @@ def make_bb( raise ValueError('Cannot used sub-trajectory learning and replanning together.') # add time_step observation when replanning - if (learn_sub_trajs or do_replanning) and not any(issubclass(w, TimeAwareObservation) for w in kwargs['wrappers']): + if (learn_sub_trajs or do_replanning) and not any(issubclass(w, TimeAwareObservation) for w in wrappers): # Add as first wrapper in order to alter observation - kwargs['wrappers'].insert(0, TimeAwareObservation) + wrappers.insert(0, TimeAwareObservation) env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed, **kwargs) @@ -310,7 +311,11 @@ def make_gym(env_id, seed, **kwargs): """ # Getting the existing keywords to allow for nested dict updates for BB envs # gym only allows for non nested updates. - all_kwargs = deepcopy(registry.get(env_id).kwargs) + try: + all_kwargs = deepcopy(registry.get(env_id).kwargs) + except AttributeError as e: + logging.error(f'The gym environment with id {env_id} could not been found.') + raise e nested_update(all_kwargs, kwargs) kwargs = all_kwargs diff --git a/fancy_gym/utils/time_aware_observation.py b/fancy_gym/utils/time_aware_observation.py index 4fa0d74..b2cbc78 100644 --- a/fancy_gym/utils/time_aware_observation.py +++ b/fancy_gym/utils/time_aware_observation.py @@ -40,9 +40,10 @@ class TimeAwareObservation(gym.ObservationWrapper): high = np.append(self.observation_space.high, 1.0) self.observation_space = Box(low, high, dtype=self.observation_space.dtype) self.t = 0 + self._max_episode_steps = env.spec.max_episode_steps def observation(self, observation): - """Adds to the observation with the current time step. + """Adds to the observation with the current time step normalized with max steps. Args: observation: The observation to add the time step to @@ -50,7 +51,7 @@ class TimeAwareObservation(gym.ObservationWrapper): Returns: The observation with the time step appended to """ - return np.append(observation, self.t/self.env.spec.max_episode_steps) + return np.append(observation, self.t / self._max_episode_steps) def step(self, action): """Steps through the environment, incrementing the time step. diff --git a/setup.py b/setup.py index 9c539da..1148e85 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,9 @@ from setuptools import setup, find_packages extras = { "dmc": ["dm_control>=1.0.1"], "metaworld": ["metaworld @ git+https://github.com/rlworkgroup/metaworld.git@master#egg=metaworld", - 'mujoco-py<2.2,>=2.1'], + 'mujoco-py<2.2,>=2.1', + 'scipy' + ], } # All dependencies