2024-01-23 17:16:12 +01:00
<!DOCTYPE html>
< html class = "writer-html5" lang = "en" >
< head >
< meta charset = "utf-8" / > < meta name = "generator" content = "Docutils 0.19: https://docutils.sourceforge.io/" / >
< meta name = "viewport" content = "width=device-width, initial-scale=1.0" / >
2024-03-14 15:58:08 +01:00
< title > Creating new MP Environments — Fancy Gym 0.3.0 documentation< / title >
2024-01-23 17:16:12 +01:00
< link rel = "stylesheet" href = "../_static/pygments.css" type = "text/css" / >
< link rel = "stylesheet" href = "../_static/css/theme.css" type = "text/css" / >
< link rel = "stylesheet" href = "../_static/style.css" type = "text/css" / >
< link rel = "shortcut icon" href = "../_static/icon.svg" / >
<!-- [if lt IE 9]>
< script src = "../_static/js/html5shiv.min.js" > < / script >
<![endif]-->
< script data-url_root = "../" id = "documentation_options" src = "../_static/documentation_options.js" > < / script >
< script src = "../_static/jquery.js" > < / script >
< script src = "../_static/underscore.js" > < / script >
< script src = "../_static/_sphinx_javascript_frameworks_compat.js" > < / script >
< script src = "../_static/doctools.js" > < / script >
< script src = "../_static/sphinx_highlight.js" > < / script >
< script src = "../_static/js/theme.js" > < / script >
< link rel = "index" title = "Index" href = "../genindex.html" / >
< link rel = "search" title = "Search" href = "../search.html" / >
< link rel = "next" title = "Fancy" href = "../envs/fancy/index.html" / >
< link rel = "prev" title = "Basic Usage" href = "basic_usage.html" / >
< / head >
< body class = "wy-body-for-nav" >
< div class = "wy-grid-for-nav" >
< nav data-toggle = "wy-nav-shift" class = "wy-nav-side" >
< div class = "wy-side-scroll" >
< div class = "wy-side-nav-search" >
< a href = "../index.html" class = "icon icon-home" >
Fancy Gym
< img src = "../_static/icon.svg" class = "logo" alt = "Logo" / >
< / a >
< div class = "version" >
2024-03-14 15:58:08 +01:00
0.3.0
2024-01-23 17:16:12 +01:00
< / div >
< div role = "search" >
< form id = "rtd-search-form" class = "wy-form" action = "../search.html" method = "get" >
< input type = "text" name = "q" placeholder = "Search docs" aria-label = "Search docs" / >
< input type = "hidden" name = "check_keywords" value = "yes" / >
< input type = "hidden" name = "area" value = "default" / >
< / form >
< / div >
< / div > < div class = "wy-menu wy-menu-vertical" data-spy = "affix" role = "navigation" aria-label = "Navigation menu" >
< p class = "caption" role = "heading" > < span class = "caption-text" > User Guide< / span > < / p >
< ul class = "current" >
< li class = "toctree-l1" > < a class = "reference internal" href = "installation.html" > Installation< / a > < / li >
< li class = "toctree-l1" > < a class = "reference internal" href = "episodic_rl.html" > What is Episodic RL?< / a > < / li >
< li class = "toctree-l1" > < a class = "reference internal" href = "basic_usage.html" > Basic Usage< / a > < / li >
< li class = "toctree-l1 current" > < a class = "current reference internal" href = "#" > Creating new MP Environments< / a > < / li >
< / ul >
< p class = "caption" role = "heading" > < span class = "caption-text" > Environments< / span > < / p >
< ul >
< li class = "toctree-l1" > < a class = "reference internal" href = "../envs/fancy/index.html" > Fancy< / a > < / li >
< li class = "toctree-l1" > < a class = "reference internal" href = "../envs/dmc.html" > DeepMind Control (DMC)< / a > < / li >
< li class = "toctree-l1" > < a class = "reference internal" href = "../envs/meta.html" > Metaworld< / a > < / li >
< li class = "toctree-l1" > < a class = "reference internal" href = "../envs/open_ai.html" > Gymnasium< / a > < / li >
< / ul >
< p class = "caption" role = "heading" > < span class = "caption-text" > Examples< / span > < / p >
< ul >
< li class = "toctree-l1" > < a class = "reference internal" href = "../examples/general.html" > General Usage Examples< / a > < / li >
< li class = "toctree-l1" > < a class = "reference internal" href = "../examples/dmc.html" > DeepMind Control Examples< / a > < / li >
< li class = "toctree-l1" > < a class = "reference internal" href = "../examples/metaworld.html" > Metaworld Examples< / a > < / li >
< li class = "toctree-l1" > < a class = "reference internal" href = "../examples/open_ai.html" > OpenAI Envs Examples< / a > < / li >
< li class = "toctree-l1" > < a class = "reference internal" href = "../examples/movement_primitives.html" > Movement Primitives Examples< / a > < / li >
< li class = "toctree-l1" > < a class = "reference internal" href = "../examples/mp_params_tuning.html" > MP Params Tuning Example< / a > < / li >
< li class = "toctree-l1" > < a class = "reference internal" href = "../examples/pd_control_gain_tuning.html" > PD Control Gain Tuning Example< / a > < / li >
< li class = "toctree-l1" > < a class = "reference internal" href = "../examples/replanning_envs.html" > Replanning Example< / a > < / li >
< / ul >
< p class = "caption" role = "heading" > < span class = "caption-text" > API< / span > < / p >
< ul >
< li class = "toctree-l1" > < a class = "reference internal" href = "../api.html" > API< / a > < / li >
< / ul >
< / div >
< / div >
< / nav >
< section data-toggle = "wy-nav-shift" class = "wy-nav-content-wrap" > < nav class = "wy-nav-top" aria-label = "Mobile navigation menu" >
< i data-toggle = "wy-nav-top" class = "fa fa-bars" > < / i >
< a href = "../index.html" > Fancy Gym< / a >
< / nav >
< div class = "wy-nav-content" >
< div class = "rst-content" >
< div role = "navigation" aria-label = "Page navigation" >
< ul class = "wy-breadcrumbs" >
< li > < a href = "../index.html" class = "icon icon-home" aria-label = "Home" > < / a > < / li >
< li class = "breadcrumb-item active" > Creating new MP Environments< / li >
< li class = "wy-breadcrumbs-aside" >
< a href = "https://github.com/ALRhub/fancy_gym/blob/release/docs/source/guide/upgrading_envs.rst" class = "fa fa-github" > Edit on GitHub< / a >
< / li >
< / ul >
< hr / >
< / div >
< div role = "main" class = "document" itemscope = "itemscope" itemtype = "http://schema.org/Article" >
< div itemprop = "articleBody" >
< section id = "creating-new-mp-environments" >
< h1 > Creating new MP Environments< a class = "headerlink" href = "#creating-new-mp-environments" title = "Permalink to this heading" > < / a > < / h1 >
< p > This guide will explain to you how to upgrade an existing step-based Gymnasium environment into one, that supports Movement Primitives (MPs). If you are looking for a guide to build such a Gymnasium environment instead, please have a look at < a class = "reference external" href = "https://gymnasium.farama.org/tutorials/gymnasium_basics/environment_creation/" > this guide< / a > .< / p >
< p > In case a required task is not supported yet in the MP framework, it can
be created relatively easy. For the task at hand, the following
< a class = "reference external" href = "https://github.com/ALRhub/fancy_gym/tree/master/fancy_gym/black_box/raw_interface_wrapper.py" > interface< / a >
needs to be implemented.< / p >
< div class = "highlight-python notranslate" > < div class = "highlight" > < pre > < span > < / span > < span class = "kn" > from< / span > < span class = "nn" > abc< / span > < span class = "kn" > import< / span > < span class = "n" > abstractmethod< / span >
< span class = "kn" > from< / span > < span class = "nn" > typing< / span > < span class = "kn" > import< / span > < span class = "n" > Union< / span > < span class = "p" > ,< / span > < span class = "n" > Tuple< / span >
< span class = "kn" > import< / span > < span class = "nn" > gymnasium< / span > < span class = "k" > as< / span > < span class = "nn" > gym< / span >
< span class = "kn" > import< / span > < span class = "nn" > numpy< / span > < span class = "k" > as< / span > < span class = "nn" > np< / span >
< span class = "k" > class< / span > < span class = "nc" > RawInterfaceWrapper< / span > < span class = "p" > (< / span > < span class = "n" > gym< / span > < span class = "o" > .< / span > < span class = "n" > Wrapper< / span > < span class = "p" > ):< / span >
< span class = "n" > mp_config< / span > < span class = "o" > =< / span > < span class = "p" > {< / span >
< span class = "s1" > ' ProMP' < / span > < span class = "p" > :< / span > < span class = "p" > {},< / span >
< span class = "s1" > ' DMP' < / span > < span class = "p" > :< / span > < span class = "p" > {},< / span >
< span class = "s1" > ' ProDMP' < / span > < span class = "p" > :< / span > < span class = "p" > {},< / span >
< span class = "p" > }< / span >
< span class = "nd" > @property< / span >
< span class = "k" > def< / span > < span class = "nf" > context_mask< / span > < span class = "p" > (< / span > < span class = "bp" > self< / span > < span class = "p" > )< / span > < span class = "o" > -> < / span > < span class = "n" > np< / span > < span class = "o" > .< / span > < span class = "n" > ndarray< / span > < span class = "p" > :< / span >
< span class = "w" > < / span > < span class = "sd" > " " " < / span >
< span class = "sd" > Returns boolean mask of the same shape as the observation space.< / span >
< span class = "sd" > It determines whether the observation is returned for the contextual case or not.< / span >
< span class = "sd" > This effectively allows to filter unwanted or unnecessary observations from the full step-based case.< / span >
< span class = "sd" > E.g. Velocities starting at 0 are only changing after the first action. Given we only receive the< / span >
< span class = "sd" > context/part of the first observation, the velocities are not necessary in the observation for the task.< / span >
< span class = "sd" > Returns:< / span >
< span class = "sd" > bool array representing the indices of the observations< / span >
< span class = "sd" > " " " < / span >
< span class = "k" > return< / span > < span class = "n" > np< / span > < span class = "o" > .< / span > < span class = "n" > ones< / span > < span class = "p" > (< / span > < span class = "bp" > self< / span > < span class = "o" > .< / span > < span class = "n" > env< / span > < span class = "o" > .< / span > < span class = "n" > observation_space< / span > < span class = "o" > .< / span > < span class = "n" > shape< / span > < span class = "p" > [< / span > < span class = "mi" > 0< / span > < span class = "p" > ],< / span > < span class = "n" > dtype< / span > < span class = "o" > =< / span > < span class = "nb" > bool< / span > < span class = "p" > )< / span >
< span class = "nd" > @property< / span >
< span class = "nd" > @abstractmethod< / span >
< span class = "k" > def< / span > < span class = "nf" > current_pos< / span > < span class = "p" > (< / span > < span class = "bp" > self< / span > < span class = "p" > )< / span > < span class = "o" > -> < / span > < span class = "n" > Union< / span > < span class = "p" > [< / span > < span class = "nb" > float< / span > < span class = "p" > ,< / span > < span class = "nb" > int< / span > < span class = "p" > ,< / span > < span class = "n" > np< / span > < span class = "o" > .< / span > < span class = "n" > ndarray< / span > < span class = "p" > ,< / span > < span class = "n" > Tuple< / span > < span class = "p" > ]:< / span >
< span class = "w" > < / span > < span class = "sd" > " " " < / span >
< span class = "sd" > Returns the current position of the action/control dimension.< / span >
< span class = "sd" > The dimensionality has to match the action/control dimension.< / span >
< span class = "sd" > This is not required when exclusively using velocity control,< / span >
< span class = "sd" > it should, however, be implemented regardless.< / span >
< span class = "sd" > E.g. The joint positions that are directly or indirectly controlled by the action.< / span >
< span class = "sd" > " " " < / span >
< span class = "k" > raise< / span > < span class = "ne" > NotImplementedError< / span > < span class = "p" > ()< / span >
< span class = "nd" > @property< / span >
< span class = "nd" > @abstractmethod< / span >
< span class = "k" > def< / span > < span class = "nf" > current_vel< / span > < span class = "p" > (< / span > < span class = "bp" > self< / span > < span class = "p" > )< / span > < span class = "o" > -> < / span > < span class = "n" > Union< / span > < span class = "p" > [< / span > < span class = "nb" > float< / span > < span class = "p" > ,< / span > < span class = "nb" > int< / span > < span class = "p" > ,< / span > < span class = "n" > np< / span > < span class = "o" > .< / span > < span class = "n" > ndarray< / span > < span class = "p" > ,< / span > < span class = "n" > Tuple< / span > < span class = "p" > ]:< / span >
< span class = "w" > < / span > < span class = "sd" > " " " < / span >
< span class = "sd" > Returns the current velocity of the action/control dimension.< / span >
< span class = "sd" > The dimensionality has to match the action/control dimension.< / span >
< span class = "sd" > This is not required when exclusively using position control,< / span >
< span class = "sd" > it should, however, be implemented regardless.< / span >
< span class = "sd" > E.g. The joint velocities that are directly or indirectly controlled by the action.< / span >
< span class = "sd" > " " " < / span >
< span class = "k" > raise< / span > < span class = "ne" > NotImplementedError< / span > < span class = "p" > ()< / span >
< / pre > < / div >
< / div >
< p > Default configurations for MPs can be overitten by defining attributes
in mp_config. Available parameters are documented in the < a class = "reference external" href = "https://github.com/ALRhub/MP_PyTorch/blob/main/doc/README.md" > MP_PyTorch
Userguide< / a > .< / p >
< div class = "highlight-python notranslate" > < div class = "highlight" > < pre > < span > < / span > < span class = "k" > class< / span > < span class = "nc" > RawInterfaceWrapper< / span > < span class = "p" > (< / span > < span class = "n" > gym< / span > < span class = "o" > .< / span > < span class = "n" > Wrapper< / span > < span class = "p" > ):< / span >
< span class = "n" > mp_config< / span > < span class = "o" > =< / span > < span class = "p" > {< / span >
< span class = "s1" > ' ProMP' < / span > < span class = "p" > :< / span > < span class = "p" > {< / span >
< span class = "s1" > ' phase_generator_kwargs' < / span > < span class = "p" > :< / span > < span class = "p" > {< / span >
< span class = "s1" > ' phase_generator_type' < / span > < span class = "p" > :< / span > < span class = "s1" > ' linear' < / span >
< span class = "c1" > # When selecting another generator type, the default configuration will not be merged for the attribute.< / span >
< span class = "p" > },< / span >
< span class = "s1" > ' controller_kwargs' < / span > < span class = "p" > :< / span > < span class = "p" > {< / span >
< span class = "s1" > ' p_gains' < / span > < span class = "p" > :< / span > < span class = "mf" > 0.5< / span > < span class = "o" > *< / span > < span class = "n" > np< / span > < span class = "o" > .< / span > < span class = "n" > array< / span > < span class = "p" > ([< / span > < span class = "mf" > 1.0< / span > < span class = "p" > ,< / span > < span class = "mf" > 4.0< / span > < span class = "p" > ,< / span > < span class = "mf" > 2.0< / span > < span class = "p" > ,< / span > < span class = "mf" > 4.0< / span > < span class = "p" > ,< / span > < span class = "mf" > 1.0< / span > < span class = "p" > ,< / span > < span class = "mf" > 4.0< / span > < span class = "p" > ,< / span > < span class = "mf" > 1.0< / span > < span class = "p" > ]),< / span >
< span class = "s1" > ' d_gains' < / span > < span class = "p" > :< / span > < span class = "mf" > 0.5< / span > < span class = "o" > *< / span > < span class = "n" > np< / span > < span class = "o" > .< / span > < span class = "n" > array< / span > < span class = "p" > ([< / span > < span class = "mf" > 0.1< / span > < span class = "p" > ,< / span > < span class = "mf" > 0.4< / span > < span class = "p" > ,< / span > < span class = "mf" > 0.2< / span > < span class = "p" > ,< / span > < span class = "mf" > 0.4< / span > < span class = "p" > ,< / span > < span class = "mf" > 0.1< / span > < span class = "p" > ,< / span > < span class = "mf" > 0.4< / span > < span class = "p" > ,< / span > < span class = "mf" > 0.1< / span > < span class = "p" > ]),< / span >
< span class = "p" > },< / span >
< span class = "s1" > ' basis_generator_kwargs' < / span > < span class = "p" > :< / span > < span class = "p" > {< / span >
< span class = "s1" > ' num_basis' < / span > < span class = "p" > :< / span > < span class = "mi" > 3< / span > < span class = "p" > ,< / span >
< span class = "s1" > ' num_basis_zero_start' < / span > < span class = "p" > :< / span > < span class = "mi" > 1< / span > < span class = "p" > ,< / span >
< span class = "s1" > ' num_basis_zero_goal' < / span > < span class = "p" > :< / span > < span class = "mi" > 1< / span > < span class = "p" > ,< / span >
< span class = "p" > },< / span >
< span class = "p" > },< / span >
< span class = "s1" > ' DMP' < / span > < span class = "p" > :< / span > < span class = "p" > {},< / span >
< span class = "s1" > ' ProDMP' < / span > < span class = "p" > :< / span > < span class = "p" > {}< / span > < span class = "o" > .< / span >
< span class = "p" > }< / span >
< span class = "p" > [< / span > < span class = "o" > ...< / span > < span class = "p" > ]< / span >
< / pre > < / div >
< / div >
< p > If you created a new task wrapper, feel free to open a PR, so we can
integrate it for others to use as well. Without the integration the task
can still be used. A rough outline can be shown here, for more details
we recommend having a look at the
< a class = "reference internal" href = "../examples/movement_primitives.html#example-mp" > < span class = "std std-ref" > multiple examples< / span > < / a > .< / p >
< p > If the step-based is already registered with gym, you can simply do the
following:< / p >
< div class = "highlight-python notranslate" > < div class = "highlight" > < pre > < span > < / span > < span class = "n" > fancy_gym< / span > < span class = "o" > .< / span > < span class = "n" > upgrade< / span > < span class = "p" > (< / span >
< span class = "nb" > id< / span > < span class = "o" > =< / span > < span class = "s1" > ' custom/cool_new_env-v0' < / span > < span class = "p" > ,< / span >
< span class = "n" > mp_wrapper< / span > < span class = "o" > =< / span > < span class = "n" > my_custom_MPWrapper< / span >
< span class = "p" > )< / span >
< / pre > < / div >
< / div >
< p > If the step-based is not yet registered with gym we can add both the
step-based and MP-versions via< / p >
< div class = "highlight-python notranslate" > < div class = "highlight" > < pre > < span > < / span > < span class = "n" > fancy_gym< / span > < span class = "o" > .< / span > < span class = "n" > register< / span > < span class = "p" > (< / span >
< span class = "nb" > id< / span > < span class = "o" > =< / span > < span class = "s1" > ' custom/cool_new_env-v0' < / span > < span class = "p" > ,< / span >
< span class = "n" > entry_point< / span > < span class = "o" > =< / span > < span class = "n" > my_custom_env< / span > < span class = "p" > ,< / span >
< span class = "n" > mp_wrapper< / span > < span class = "o" > =< / span > < span class = "n" > my_custom_MPWrapper< / span >
< span class = "p" > )< / span >
< / pre > < / div >
< / div >
< p > From this point on, you can access MP-version of your environments via< / p >
< div class = "highlight-python notranslate" > < div class = "highlight" > < pre > < span > < / span > < span class = "n" > env< / span > < span class = "o" > =< / span > < span class = "n" > gym< / span > < span class = "o" > .< / span > < span class = "n" > make< / span > < span class = "p" > (< / span > < span class = "s1" > ' custom_ProDMP/cool_new_env-v0' < / span > < span class = "p" > )< / span >
< span class = "n" > rewards< / span > < span class = "o" > =< / span > < span class = "mi" > 0< / span >
< span class = "n" > observation< / span > < span class = "p" > ,< / span > < span class = "n" > info< / span > < span class = "o" > =< / span > < span class = "n" > env< / span > < span class = "o" > .< / span > < span class = "n" > reset< / span > < span class = "p" > ()< / span >
< span class = "c1" > # number of samples/full trajectories (multiple environment steps)< / span >
< span class = "k" > for< / span > < span class = "n" > i< / span > < span class = "ow" > in< / span > < span class = "nb" > range< / span > < span class = "p" > (< / span > < span class = "mi" > 5< / span > < span class = "p" > ):< / span >
< span class = "n" > ac< / span > < span class = "o" > =< / span > < span class = "n" > env< / span > < span class = "o" > .< / span > < span class = "n" > action_space< / span > < span class = "o" > .< / span > < span class = "n" > sample< / span > < span class = "p" > ()< / span >
< span class = "n" > observation< / span > < span class = "p" > ,< / span > < span class = "n" > reward< / span > < span class = "p" > ,< / span > < span class = "n" > terminated< / span > < span class = "p" > ,< / span > < span class = "n" > truncated< / span > < span class = "p" > ,< / span > < span class = "n" > info< / span > < span class = "o" > =< / span > < span class = "n" > env< / span > < span class = "o" > .< / span > < span class = "n" > step< / span > < span class = "p" > (< / span > < span class = "n" > ac< / span > < span class = "p" > )< / span >
< span class = "n" > rewards< / span > < span class = "o" > +=< / span > < span class = "n" > reward< / span >
< span class = "k" > if< / span > < span class = "n" > terminated< / span > < span class = "ow" > or< / span > < span class = "n" > truncated< / span > < span class = "p" > :< / span >
< span class = "nb" > print< / span > < span class = "p" > (< / span > < span class = "n" > rewards< / span > < span class = "p" > )< / span >
< span class = "n" > rewards< / span > < span class = "o" > =< / span > < span class = "mi" > 0< / span >
< span class = "n" > observation< / span > < span class = "p" > ,< / span > < span class = "n" > info< / span > < span class = "o" > =< / span > < span class = "n" > env< / span > < span class = "o" > .< / span > < span class = "n" > reset< / span > < span class = "p" > ()< / span >
< / pre > < / div >
< / div >
< / section >
< / div >
< / div >
< footer > < div class = "rst-footer-buttons" role = "navigation" aria-label = "Footer" >
< a href = "basic_usage.html" class = "btn btn-neutral float-left" title = "Basic Usage" accesskey = "p" rel = "prev" > < span class = "fa fa-arrow-circle-left" aria-hidden = "true" > < / span > Previous< / a >
< a href = "../envs/fancy/index.html" class = "btn btn-neutral float-right" title = "Fancy" accesskey = "n" rel = "next" > Next < span class = "fa fa-arrow-circle-right" aria-hidden = "true" > < / span > < / a >
< / div >
< hr / >
< div role = "contentinfo" >
< p > © Copyright 2020-2024, Fabian Otto, Onur Celik, Dominik Roth, Hongyi Zhou.< / p >
< / div >
Built with < a href = "https://www.sphinx-doc.org/" > Sphinx< / a > using a
< a href = "https://github.com/readthedocs/sphinx_rtd_theme" > theme< / a >
provided by < a href = "https://readthedocs.org" > Read the Docs< / a > .
< / footer >
< / div >
< / div >
< / section >
< / div >
< script >
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
< / script >
< / body >
< / html >