From 268b74e5bd526c6944537bd9f9e077b1f6c7f6b6 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Tue, 5 Dec 2023 18:14:12 +0100 Subject: [PATCH] Create 'Guide' from README --- docs/source/guide/basic_usage.rst | 127 +++++++++++++++++++++++++ docs/source/guide/episodic_rl.rst | 50 ++++++++++ docs/source/guide/installation.rst | 72 ++++++++++++++ docs/source/guide/upgrading_envs.rst | 136 +++++++++++++++++++++++++++ 4 files changed, 385 insertions(+) create mode 100644 docs/source/guide/basic_usage.rst create mode 100644 docs/source/guide/episodic_rl.rst create mode 100644 docs/source/guide/installation.rst create mode 100644 docs/source/guide/upgrading_envs.rst diff --git a/docs/source/guide/basic_usage.rst b/docs/source/guide/basic_usage.rst new file mode 100644 index 0000000..7bb4100 --- /dev/null +++ b/docs/source/guide/basic_usage.rst @@ -0,0 +1,127 @@ +Basic Usage +----------- + +We will only show the basics here and prepared `multiple +examples `__ +for a more detailed look. + +Step-Based Environments +~~~~~~~~~~~~~~~~~~~~~~~ + +Regular step based environments added by Fancy Gym are added into the +``fancy/`` namespace. + +.. note:: + Legacy versions of Fancy Gym used ``fancy_gym.make(...)``. This is no longer supported and will raise an Exception on new versions. + +.. code:: python + + import gymnasium as gym + import fancy_gym + + env = gym.make('fancy/Reacher5d-v0') + # or env = gym.make('metaworld/reach-v2') # fancy_gym allows access to all metaworld ML1 tasks via the metaworld/ NS + # or env = gym.make('dm_control/ball_in_cup-catch-v0') + # or env = gym.make('Reacher-v2') + observation = env.reset(seed=1) + + for i in range(1000): + action = env.action_space.sample() + observation, reward, terminated, truncated, info = env.step(action) + if i % 5 == 0: + env.render() + + if terminated or truncated: + observation, info = env.reset() + +Black-box Environments +~~~~~~~~~~~~~~~~~~~~~~ + +All environments provide by default the cumulative episode reward, this +can however be changed if necessary. Optionally, each environment +returns all collected information from each step as part of the infos. +This information is, however, mainly meant for debugging as well as +logging and not for training. + ++---------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------+ +| Key | Description | Type | ++=====================+============================================================================================================================================+==========+ +| `positions` | Generated trajectory from MP | Optional | ++---------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------+ +| `velocities` | Generated trajectory from MP | Optional | ++---------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------+ +| `step_actions` | Step-wise executed action based on controller output | Optional | ++---------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------+ +| `step_observations` | Step-wise intermediate observations | Optional | ++---------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------+ +| `step_rewards` | Step-wise rewards | Optional | ++---------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------+ +| `trajectory_length` | Total number of environment interactions | Always | ++---------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------+ +| `other` | All other information from the underlying environment are returned as a list with length `trajectory_length` maintaining the original key. | Always | +| | In case some information are not provided every time step, the missing values are filled with `None`. | | ++---------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------+ + +Existing MP tasks can be created the same way as above. The namespace of +a MP-variant of an environment is given by +``_/``. Just keep in mind, calling +``step()`` executes a full trajectory. + + | **Note:** + | Currently, we are also in the process of enabling replanning as + well as learning of sub-trajectories. This allows to split the + episode into multiple trajectories and is a hybrid setting between + step-based and black-box leaning. While this is already + implemented, it is still in beta and requires further testing. Feel + free to try it and open an issue with any problems that occur. + +.. code:: python + + import gymnasium as gym + import fancy_gym + + env = gym.make('fancy_ProMP/Reacher5d-v0') + # or env = gym.make('metaworld_ProDMP/reach-v2') + # or env = gym.make('dm_control_DMP/ball_in_cup-catch-v0') + # or env = gym.make('gym_ProMP/Reacher-v2') # mp versions of envs added directly by gymnasium are in the gym_ NS + + # render() can be called once in the beginning with all necessary arguments. + # To turn it of again just call render() without any arguments. + env.render(mode='human') + + # This returns the context information, not the full state observation + observation, info = env.reset(seed=1) + + for i in range(5): + action = env.action_space.sample() + observation, reward, terminated, truncated, info = env.step(action) + + # terminated or truncated is always True as we are working on the episode level, hence we always reset() + observation, info = env.reset() + +To show all available environments, we provide some additional +convenience variables. All of them return a dictionary with the keys +``DMP``, ``ProMP``, ``ProDMP`` and ``all`` that store a list of +available environment ids. + +.. code:: python + + import fancy_gym + + print("All Black-box tasks:") + print(fancy_gym.ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS) + + print("Fancy Black-box tasks:") + print(fancy_gym.ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS) + + print("OpenAI Gym Black-box tasks:") + print(fancy_gym.ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS) + + print("Deepmind Control Black-box tasks:") + print(fancy_gym.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS) + + print("MetaWorld Black-box tasks:") + print(fancy_gym.ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS) + + print("If you add custom envs, their mp versions will be found in:") + print(fancy_gym.MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS['']) \ No newline at end of file diff --git a/docs/source/guide/episodic_rl.rst b/docs/source/guide/episodic_rl.rst new file mode 100644 index 0000000..4c0b7bc --- /dev/null +++ b/docs/source/guide/episodic_rl.rst @@ -0,0 +1,50 @@ +What is Episodic RL? +-------------------- + +.. raw:: html + +

+ +Movement primitive (MP) environments differ from traditional step-based +environments. They align more with concepts from stochastic search, +black-box optimization, and methods commonly found in classical robotics +and control. Instead of individual steps, MP environments operate on an +episode basis, executing complete trajectories. These trajectories are +produced by trajectory generators like Dynamic Movement Primitives +(DMP), Probabilistic Movement Primitives (ProMP) or Probabilistic +Dynamic Movement Primitives (ProDMP). + +.. raw:: html + +

+ +.. raw:: html + +

+ +Once generated, these trajectories are converted into step-by-step +actions using a trajectory tracking controller. The specific controller +chosen depends on the environment’s requirements. Currently, we support +position, velocity, and PD-Controllers tailored for position, velocity, +and torque control. Additionally, we have a specialized controller +designed for the MetaWorld control suite. + +.. raw:: html + +

+ +.. raw:: html + +

+ +While the overarching objective of MP environments remains the learning +of an optimal policy, the actions here represent the parametrization of +motion primitives to craft the right trajectory. Our framework further +enhances this by accommodating a contextual setting. At the episode’s +onset, we present the context space—a subset of the observation space. +This demands the prediction of a new action or MP parametrization for +every unique context. + +.. raw:: html + +

\ No newline at end of file diff --git a/docs/source/guide/installation.rst b/docs/source/guide/installation.rst new file mode 100644 index 0000000..5885d43 --- /dev/null +++ b/docs/source/guide/installation.rst @@ -0,0 +1,72 @@ +Installation +------------ + +We recommend installing ``fancy_gym`` into a virtual environment as +provided by `venv `__. 3rd +party alternatives to venv like `Poetry `__ +or `Conda `__ can also be used. + +Installation from PyPI (recommended) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Install ``fancy_gym`` via + +.. code:: bash + + pip install fancy_gym + +We have a few optional dependencies. If you also want to install those +use + +.. code:: bash + + # to install all optional dependencies + pip install 'fancy_gym[all]' + + # or choose only those you want + pip install 'fancy_gym[dmc,box2d,mujoco-legacy,jax,testing]' + +Pip can not automatically install up-to-date versions of metaworld, +since they are not avaible on PyPI yet. Install metaworld via + +.. code:: bash + + pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg=metaworld + +Installation from master +~~~~~~~~~~~~~~~~~~~~~~~~ + +1. Clone the repository + +.. code:: bash + + git clone git@github.com:ALRhub/fancy_gym.git + +2. Go to the folder + +.. code:: bash + + cd fancy_gym + +3. Install with + +.. code:: bash + + pip install -e . + +We have a few optional dependencies. If you also want to install those +use + +.. code:: bash + + # to install all optional dependencies + pip install -e '.[all]' + + # or choose only those you want + pip install -e '.[dmc,box2d,mujoco-legacy,jax,testing]' + +Metaworld has to be installed manually with + +.. code:: bash + + pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg=metaworld \ No newline at end of file diff --git a/docs/source/guide/upgrading_envs.rst b/docs/source/guide/upgrading_envs.rst new file mode 100644 index 0000000..f04e8a0 --- /dev/null +++ b/docs/source/guide/upgrading_envs.rst @@ -0,0 +1,136 @@ +Creating new MP Environments +---------------------------- + +In case a required task is not supported yet in the MP framework, it can +be created relatively easy. For the task at hand, the following +`interface `__ +needs to be implemented. + +.. code:: python + + from abc import abstractmethod + from typing import Union, Tuple + + import gymnasium as gym + import numpy as np + + + class RawInterfaceWrapper(gym.Wrapper): + mp_config = { + 'ProMP': {}, + 'DMP': {}, + 'ProDMP': {}, + } + + @property + def context_mask(self) -> np.ndarray: + """ + Returns boolean mask of the same shape as the observation space. + It determines whether the observation is returned for the contextual case or not. + This effectively allows to filter unwanted or unnecessary observations from the full step-based case. + E.g. Velocities starting at 0 are only changing after the first action. Given we only receive the + context/part of the first observation, the velocities are not necessary in the observation for the task. + Returns: + bool array representing the indices of the observations + """ + return np.ones(self.env.observation_space.shape[0], dtype=bool) + + @property + @abstractmethod + def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: + """ + Returns the current position of the action/control dimension. + The dimensionality has to match the action/control dimension. + This is not required when exclusively using velocity control, + it should, however, be implemented regardless. + E.g. The joint positions that are directly or indirectly controlled by the action. + """ + raise NotImplementedError() + + @property + @abstractmethod + def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: + """ + Returns the current velocity of the action/control dimension. + The dimensionality has to match the action/control dimension. + This is not required when exclusively using position control, + it should, however, be implemented regardless. + E.g. The joint velocities that are directly or indirectly controlled by the action. + """ + raise NotImplementedError() + +Default configurations for MPs can be overitten by defining attributes +in mp_config. Available parameters are documented in the `MP_PyTorch +Userguide `__. + +.. code:: python + + class RawInterfaceWrapper(gym.Wrapper): + mp_config = { + 'ProMP': { + 'phase_generator_kwargs': { + 'phase_generator_type': 'linear' + # When selecting another generator type, the default configuration will not be merged for the attribute. + }, + 'controller_kwargs': { + 'p_gains': 0.5 * np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]), + 'd_gains': 0.5 * np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1]), + }, + 'basis_generator_kwargs': { + 'num_basis': 3, + 'num_basis_zero_start': 1, + 'num_basis_zero_goal': 1, + }, + }, + 'DMP': {}, + 'ProDMP': {}. + } + + [...] + +If you created a new task wrapper, feel free to open a PR, so we can +integrate it for others to use as well. Without the integration the task +can still be used. A rough outline can be shown here, for more details +we recommend having a look at the +`examples `__. + +If the step-based is already registered with gym, you can simply do the +following: + +.. code:: python + + fancy_gym.upgrade( + id='custom/cool_new_env-v0', + mp_wrapper=my_custom_MPWrapper + ) + +If the step-based is not yet registered with gym we can add both the +step-based and MP-versions via + +.. code:: python + + fancy_gym.register( + id='custom/cool_new_env-v0', + entry_point=my_custom_env, + mp_wrapper=my_custom_MPWrapper + ) + +From this point on, you can access MP-version of your environments via + +.. code:: python + + env = gym.make('custom_ProDMP/cool_new_env-v0') + + rewards = 0 + observation, info = env.reset() + + # number of samples/full trajectories (multiple environment steps) + for i in range(5): + ac = env.action_space.sample() + observation, reward, terminated, truncated, info = env.step(ac) + rewards += reward + + if terminated or truncated: + print(rewards) + rewards = 0 + observation, info = env.reset() \ No newline at end of file