fancy_gym/docs/build/html/guide/basic_usage.html

284 lines
19 KiB
HTML
Raw Permalink Normal View History

<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Basic Usage &mdash; Fancy Gym 0.2 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
<link rel="shortcut icon" href="../_static/icon.svg"/>
<!--[if lt IE 9]>
<script src="../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
<script src="../_static/jquery.js"></script>
<script src="../_static/underscore.js"></script>
<script src="../_static/_sphinx_javascript_frameworks_compat.js"></script>
<script src="../_static/doctools.js"></script>
<script src="../_static/sphinx_highlight.js"></script>
<script src="../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="next" title="Creating new MP Environments" href="upgrading_envs.html" />
<link rel="prev" title="What is Episodic RL?" href="episodic_rl.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../index.html" class="icon icon-home">
Fancy Gym
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">User Guide</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="installation.html">Installation</a></li>
<li class="toctree-l1"><a class="reference internal" href="episodic_rl.html">What is Episodic RL?</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">Basic Usage</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#step-based-environments">Step-Based Environments</a></li>
<li class="toctree-l2"><a class="reference internal" href="#black-box-environments">Black-Box Environments</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="upgrading_envs.html">Creating new MP Environments</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Environments</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../envs/fancy/index.html">Fancy</a></li>
<li class="toctree-l1"><a class="reference internal" href="../envs/dmc.html">DeepMind Control (DMC)</a></li>
<li class="toctree-l1"><a class="reference internal" href="../envs/meta.html">Metaworld</a></li>
<li class="toctree-l1"><a class="reference internal" href="../envs/open_ai.html">Gymnasium</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Examples</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../examples/general.html">General Usage Examples</a></li>
<li class="toctree-l1"><a class="reference internal" href="../examples/dmc.html">DeepMind Control Examples</a></li>
<li class="toctree-l1"><a class="reference internal" href="../examples/metaworld.html">Metaworld Examples</a></li>
<li class="toctree-l1"><a class="reference internal" href="../examples/open_ai.html">OpenAI Envs Examples</a></li>
<li class="toctree-l1"><a class="reference internal" href="../examples/movement_primitives.html">Movement Primitives Examples</a></li>
<li class="toctree-l1"><a class="reference internal" href="../examples/mp_params_tuning.html">MP Params Tuning Example</a></li>
<li class="toctree-l1"><a class="reference internal" href="../examples/pd_control_gain_tuning.html">PD Control Gain Tuning Example</a></li>
<li class="toctree-l1"><a class="reference internal" href="../examples/replanning_envs.html">Replanning Example</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">API</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../api.html">API</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../index.html">Fancy Gym</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item active">Basic Usage</li>
<li class="wy-breadcrumbs-aside">
<a href="https://github.com/ALRhub/fancy_gym/blob/release/docs/source/guide/basic_usage.rst" class="fa fa-github"> Edit on GitHub</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<section id="basic-usage">
<h1>Basic Usage<a class="headerlink" href="#basic-usage" title="Permalink to this heading"></a></h1>
<p>We will only show the basics here and prepared <a class="reference internal" href="../examples/general.html#example-general"><span class="std std-ref">multiple examples</span></a> for a more detailed look.</p>
<section id="step-based-environments">
<h2>Step-Based Environments<a class="headerlink" href="#step-based-environments" title="Permalink to this heading"></a></h2>
<p>Regular step based environments added by Fancy Gym are added into the
<code class="docutils literal notranslate"><span class="pre">fancy/</span></code> namespace.</p>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>Legacy versions of Fancy Gym used <code class="docutils literal notranslate"><span class="pre">fancy_gym.make(...)</span></code>. This is no longer supported and will raise an Exception on new versions.</p>
</div>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">gymnasium</span> <span class="k">as</span> <span class="nn">gym</span>
<span class="kn">import</span> <span class="nn">fancy_gym</span>
<span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="s1">&#39;fancy/Reacher5d-v0&#39;</span><span class="p">)</span>
<span class="c1"># or env = gym.make(&#39;metaworld/reach-v2&#39;) # fancy_gym allows access to all metaworld ML1 tasks via the metaworld/ NS</span>
<span class="c1"># or env = gym.make(&#39;dm_control/ball_in_cup-catch-v0&#39;)</span>
<span class="c1"># or env = gym.make(&#39;Reacher-v2&#39;)</span>
<span class="n">observation</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">1000</span><span class="p">):</span>
<span class="n">action</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="n">observation</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">action</span><span class="p">)</span>
<span class="k">if</span> <span class="n">i</span> <span class="o">%</span> <span class="mi">5</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="n">observation</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
</pre></div>
</div>
</section>
<section id="black-box-environments">
<h2>Black-Box Environments<a class="headerlink" href="#black-box-environments" title="Permalink to this heading"></a></h2>
<p>All environments provide by default the cumulative episode reward, this
can however be changed if necessary. Optionally, each environment
returns all collected information from each step as part of the infos.
This information is, however, mainly meant for debugging as well as
logging and not for training.</p>
<table class="docutils align-default">
<thead>
<tr class="row-odd"><th class="head"><p>Key</p></th>
<th class="head"><p>Description</p></th>
<th class="head"><p>Type</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p><cite>positions</cite></p></td>
<td><p>Generated trajectory from MP</p></td>
<td><p>Optional</p></td>
</tr>
<tr class="row-odd"><td><p><cite>velocities</cite></p></td>
<td><p>Generated trajectory from MP</p></td>
<td><p>Optional</p></td>
</tr>
<tr class="row-even"><td><p><cite>step_actions</cite></p></td>
<td><p>Step-wise executed action based on controller output</p></td>
<td><p>Optional</p></td>
</tr>
<tr class="row-odd"><td><p><cite>step_observations</cite></p></td>
<td><p>Step-wise intermediate observations</p></td>
<td><p>Optional</p></td>
</tr>
<tr class="row-even"><td><p><cite>step_rewards</cite></p></td>
<td><p>Step-wise rewards</p></td>
<td><p>Optional</p></td>
</tr>
<tr class="row-odd"><td><p><cite>trajectory_length</cite></p></td>
<td><p>Total number of environment interactions</p></td>
<td><p>Always</p></td>
</tr>
<tr class="row-even"><td><p><cite>other</cite></p></td>
<td><p>All other information from the underlying environment are returned as a list with length <cite>trajectory_length</cite> maintaining the original key.
In case some information are not provided every time step, the missing values are filled with <cite>None</cite>.</p></td>
<td><p>Always</p></td>
</tr>
</tbody>
</table>
<p>Existing MP tasks can be created the same way as above. The namespace of
a MP-variant of an environment is given by
<code class="docutils literal notranslate"><span class="pre">&lt;original</span> <span class="pre">namespace&gt;_&lt;MP</span> <span class="pre">name&gt;/</span></code>. Just keep in mind, calling
<code class="docutils literal notranslate"><span class="pre">step()</span></code> executes a full trajectory.</p>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>Currently, we are also in the process of enabling replanning as
well as learning of sub-trajectories. This allows to split the
episode into multiple trajectories and is a hybrid setting between
step-based and black-box leaning. While this is already
implemented, it is still in beta and requires further testing. Feel
free to try it and open an issue with any problems that occur.</p>
</div>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">gymnasium</span> <span class="k">as</span> <span class="nn">gym</span>
<span class="kn">import</span> <span class="nn">fancy_gym</span>
<span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="s1">&#39;fancy_ProMP/Reacher5d-v0&#39;</span><span class="p">)</span>
<span class="c1"># or env = gym.make(&#39;metaworld_ProDMP/reach-v2&#39;)</span>
<span class="c1"># or env = gym.make(&#39;dm_control_DMP/ball_in_cup-catch-v0&#39;)</span>
<span class="c1"># or env = gym.make(&#39;gym_ProMP/Reacher-v2&#39;) # mp versions of envs added directly by gymnasium are in the gym_&lt;MP-type&gt; NS</span>
<span class="c1"># render() can be called once in the beginning with all necessary arguments.</span>
<span class="c1"># To turn it of again just call render() without any arguments.</span>
<span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span><span class="p">)</span>
<span class="c1"># This returns the context information, not the full state observation</span>
<span class="n">observation</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">5</span><span class="p">):</span>
<span class="n">action</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="n">observation</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">action</span><span class="p">)</span>
<span class="c1"># terminated or truncated is always True as we are working on the episode level, hence we always reset()</span>
<span class="n">observation</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
</pre></div>
</div>
<p>To show all available environments, we provide some additional
convenience variables. All of them return a dictionary with the keys
<code class="docutils literal notranslate"><span class="pre">DMP</span></code>, <code class="docutils literal notranslate"><span class="pre">ProMP</span></code>, <code class="docutils literal notranslate"><span class="pre">ProDMP</span></code> and <code class="docutils literal notranslate"><span class="pre">all</span></code> that store a list of
available environment ids.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">fancy_gym</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;All Black-box tasks:&quot;</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Fancy Black-box tasks:&quot;</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;OpenAI Gym Black-box tasks:&quot;</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Deepmind Control Black-box tasks:&quot;</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;MetaWorld Black-box tasks:&quot;</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;If you add custom envs, their mp versions will be found in:&quot;</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS</span><span class="p">[</span><span class="s1">&#39;&lt;my_custom_namespace&gt;&#39;</span><span class="p">])</span>
</pre></div>
</div>
</section>
</section>
</div>
</div>
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
<a href="episodic_rl.html" class="btn btn-neutral float-left" title="What is Episodic RL?" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
<a href="upgrading_envs.html" class="btn btn-neutral float-right" title="Creating new MP Environments" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
</div>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2020-2024, Fabian Otto, Onur Celik, Dominik Roth, Hongyi Zhou.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>