""" Environment wrapper for Gym environments (MuJoCo locomotion tasks) with state observations. """ import numpy as np import gym class MujocoLocomotionLowdimWrapper(gym.Env): def __init__( self, env, normalization_path, ): self.env = env # setup spaces self.action_space = env.action_space self.observation_space = env.observation_space normalization = np.load(normalization_path) self.obs_min = normalization["obs_min"] self.obs_max = normalization["obs_max"] self.action_min = normalization["action_min"] self.action_max = normalization["action_max"] def seed(self, seed=None): if seed is not None: np.random.seed(seed=seed) else: np.random.seed() def reset(self, **kwargs): """Ignore passed-in arguments like seed""" options = kwargs.get("options", {}) new_seed = options.get("seed", None) if new_seed is not None: self.seed(seed=new_seed) raw_obs = self.env.reset() # normalize obs = self.normalize_obs(raw_obs) return obs def normalize_obs(self, obs): return 2 * ((obs - self.obs_min) / (self.obs_max - self.obs_min + 1e-6) - 0.5) def unnormaliza_action(self, action): action = (action + 1) / 2 # [-1, 1] -> [0, 1] return action * (self.action_max - self.action_min) + self.action_min def step(self, action): raw_action = self.unnormaliza_action(action) raw_obs, reward, done, info = self.env.step(raw_action) # normalize obs = self.normalize_obs(raw_obs) return obs, reward, done, info def render(self, **kwargs): return self.env.render()