INITIAL

2020-05-21 19:05:06 +09:00 · 2020-05-21 19:05:06 +09:00 · 034bd64990
commit 034bd64990
13 changed files with 1354 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,7 @@
+*.pyc
+**/__pycache__/
+**/.pytest_cache/
+**/dist/
+**/build/
+**/*.egg-info/
+**/.mypy_cache/
--- a/201
+++ b/201
@ -0,0 +1,201 @@
+                              Apache License
+                        Version 2.0, January 2004
+                     http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+   "License" shall mean the terms and conditions for use, reproduction,
+   and distribution as defined by Sections 1 through 9 of this document.
+
+   "Licensor" shall mean the copyright owner or entity authorized by
+   the copyright owner that is granting the License.
+
+   "Legal Entity" shall mean the union of the acting entity and all
+   other entities that control, are controlled by, or are under common
+   control with that entity. For the purposes of this definition,
+   "control" means (i) the power, direct or indirect, to cause the
+   direction or management of such entity, whether by contract or
+   otherwise, or (ii) ownership of fifty percent (50%) or more of the
+   outstanding shares, or (iii) beneficial ownership of such entity.
+
+   "You" (or "Your") shall mean an individual or Legal Entity
+   exercising permissions granted by this License.
+
+   "Source" form shall mean the preferred form for making modifications,
+   including but not limited to software source code, documentation
+   source, and configuration files.
+
+   "Object" form shall mean any form resulting from mechanical
+   transformation or translation of a Source form, including but
+   not limited to compiled object code, generated documentation,
+   and conversions to other media types.
+
+   "Work" shall mean the work of authorship, whether in Source or
+   Object form, made available under the License, as indicated by a
+   copyright notice that is included in or attached to the work
+   (an example is provided in the Appendix below).
+
+   "Derivative Works" shall mean any work, whether in Source or Object
+   form, that is based on (or derived from) the Work and for which the
+   editorial revisions, annotations, elaborations, or other modifications
+   represent, as a whole, an original work of authorship. For the purposes
+   of this License, Derivative Works shall not include works that remain
+   separable from, or merely link (or bind by name) to the interfaces of,
+   the Work and Derivative Works thereof.
+
+   "Contribution" shall mean any work of authorship, including
+   the original version of the Work and any modifications or additions
+   to that Work or Derivative Works thereof, that is intentionally
+   submitted to Licensor for inclusion in the Work by the copyright owner
+   or by an individual or Legal Entity authorized to submit on behalf of
+   the copyright owner. For the purposes of this definition, "submitted"
+   means any form of electronic, verbal, or written communication sent
+   to the Licensor or its representatives, including but not limited to
+   communication on electronic mailing lists, source code control systems,
+   and issue tracking systems that are managed by, or on behalf of, the
+   Licensor for the purpose of discussing and improving the Work, but
+   excluding communication that is conspicuously marked or otherwise
+   designated in writing by the copyright owner as "Not a Contribution."
+
+   "Contributor" shall mean Licensor and any individual or Legal Entity
+   on behalf of whom a Contribution has been received by Licensor and
+   subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+   this License, each Contributor hereby grants to You a perpetual,
+   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+   copyright license to reproduce, prepare Derivative Works of,
+   publicly display, publicly perform, sublicense, and distribute the
+   Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+   this License, each Contributor hereby grants to You a perpetual,
+   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+   (except as stated in this section) patent license to make, have made,
+   use, offer to sell, sell, import, and otherwise transfer the Work,
+   where such license applies only to those patent claims licensable
+   by such Contributor that are necessarily infringed by their
+   Contribution(s) alone or by combination of their Contribution(s)
+   with the Work to which such Contribution(s) was submitted. If You
+   institute patent litigation against any entity (including a
+   cross-claim or counterclaim in a lawsuit) alleging that the Work
+   or a Contribution incorporated within the Work constitutes direct
+   or contributory patent infringement, then any patent licenses
+   granted to You under this License for that Work shall terminate
+   as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+   Work or Derivative Works thereof in any medium, with or without
+   modifications, and in Source or Object form, provided that You
+   meet the following conditions:
+
+   (a) You must give any other recipients of the Work or
+       Derivative Works a copy of this License; and
+
+   (b) You must cause any modified files to carry prominent notices
+       stating that You changed the files; and
+
+   (c) You must retain, in the Source form of any Derivative Works
+       that You distribute, all copyright, patent, trademark, and
+       attribution notices from the Source form of the Work,
+       excluding those notices that do not pertain to any part of
+       the Derivative Works; and
+
+   (d) If the Work includes a "NOTICE" text file as part of its
+       distribution, then any Derivative Works that You distribute must
+       include a readable copy of the attribution notices contained
+       within such NOTICE file, excluding those notices that do not
+       pertain to any part of the Derivative Works, in at least one
+       of the following places: within a NOTICE text file distributed
+       as part of the Derivative Works; within the Source form or
+       documentation, if provided along with the Derivative Works; or,
+       within a display generated by the Derivative Works, if and
+       wherever such third-party notices normally appear. The contents
+       of the NOTICE file are for informational purposes only and
+       do not modify the License. You may add Your own attribution
+       notices within Derivative Works that You distribute, alongside
+       or as an addendum to the NOTICE text from the Work, provided
+       that such additional attribution notices cannot be construed
+       as modifying the License.
+
+   You may add Your own copyright statement to Your modifications and
+   may provide additional or different license terms and conditions
+   for use, reproduction, or distribution of Your modifications, or
+   for any such Derivative Works as a whole, provided Your use,
+   reproduction, and distribution of the Work otherwise complies with
+   the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+   any Contribution intentionally submitted for inclusion in the Work
+   by You to the Licensor shall be under the terms and conditions of
+   this License, without any additional terms or conditions.
+   Notwithstanding the above, nothing herein shall supersede or modify
+   the terms of any separate license agreement you may have executed
+   with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+   names, trademarks, service marks, or product names of the Licensor,
+   except as required for reasonable and customary use in describing the
+   origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+   agreed to in writing, Licensor provides the Work (and each
+   Contributor provides its Contributions) on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+   implied, including, without limitation, any warranties or conditions
+   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+   PARTICULAR PURPOSE. You are solely responsible for determining the
+   appropriateness of using or redistributing the Work and assume any
+   risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+   whether in tort (including negligence), contract, or otherwise,
+   unless required by applicable law (such as deliberate and grossly
+   negligent acts) or agreed to in writing, shall any Contributor be
+   liable to You for damages, including any direct, indirect, special,
+   incidental, or consequential damages of any character arising as a
+   result of this License or out of the use or inability to use the
+   Work (including but not limited to damages for loss of goodwill,
+   work stoppage, computer failure or malfunction, or any and all
+   other commercial damages or losses), even if such Contributor
+   has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+   the Work or Derivative Works thereof, You may choose to offer,
+   and charge a fee for, acceptance of support, warranty, indemnity,
+   or other liability obligations and/or rights consistent with this
+   License. However, in accepting such obligations, You may act only
+   on Your own behalf and on Your sole responsibility, not on behalf
+   of any other Contributor, and only if You agree to indemnify,
+   defend, and hold each Contributor harmless for any liability
+   incurred by, or claims asserted against, such Contributor by reason
+   of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work.
+
+   To apply the Apache License to your work, attach the following
+   boilerplate notice, with the fields enclosed by brackets "[]"
+   replaced with your own identifying information. (Don't include
+   the brackets!)  The text should be enclosed in the appropriate
+   comment syntax for the file format. We also recommend that a
+   file or class name and description of purpose be included on the
+   same "printed page" as the copyright notice for easier
+   identification within third-party archives.
+
+Copyright 2020 Yuji Kanagawa
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
--- a/README.md
+++ b/README.md
@ -0,0 +1,17 @@
+# mujoco-maze
+
+Some maze environments for reinforcement learning(RL) using [mujoco-py] and
+[openai gym][gym].
+
+Thankfully, this project is based on the code from [tensorflow/models][models], [rllab]
+and [deep-skill-chaining][dsc].
+
+## License
+This project is licensed under Apache License, Version 2.0
+([LICENSE-APACHE](LICENSE) or http://www.apache.org/licenses/LICENSE-2.0).
+
+[dsc]: https://github.com/deep-skill-chaining/deep-skill-chaining
+[gym]: https://github.com/openai/gym
+[models]: https://github.com/tensorflow/models/tree/master/research/efficient-hrl
+[mujoco-py]: https://github.com/openai/mujoco-py
+[rllab]: https://github.com/rll/rllab
--- a/mujoco_maze/init.py
+++ b/mujoco_maze/init.py
@ -0,0 +1 @@
+
--- a/mujoco_maze/ant.py
+++ b/mujoco_maze/ant.py
@ -0,0 +1,156 @@
+# Copyright 2018 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Wrapper for creating the ant environment in gym_mujoco."""
+
+import math
+import numpy as np
+import mujoco_py
+from gym import utils
+from gym.envs.mujoco import mujoco_env
+
+
+def q_inv(a):
+    return [a[0], -a[1], -a[2], -a[3]]
+
+
+def q_mult(a, b):  # multiply two quaternion
+    w = a[0] * b[0] - a[1] * b[1] - a[2] * b[2] - a[3] * b[3]
+    i = a[0] * b[1] + a[1] * b[0] + a[2] * b[3] - a[3] * b[2]
+    j = a[0] * b[2] - a[1] * b[3] + a[2] * b[0] + a[3] * b[1]
+    k = a[0] * b[3] + a[1] * b[2] - a[2] * b[1] + a[3] * b[0]
+    return [w, i, j, k]
+
+
+class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
+    FILE = "ant.xml"
+    ORI_IND = 3
+
+    def __init__(
+        self,
+        file_path=None,
+        expose_all_qpos=True,
+        expose_body_coms=None,
+        expose_body_comvels=None,
+    ):
+        self._expose_all_qpos = expose_all_qpos
+        self._expose_body_coms = expose_body_coms
+        self._expose_body_comvels = expose_body_comvels
+        self._body_com_indices = {}
+        self._body_comvel_indices = {}
+
+        mujoco_env.MujocoEnv.__init__(self, file_path, 5)
+        utils.EzPickle.__init__(self)
+
+    @property
+    def physics(self):
+        # check mujoco version is greater than version 1.50 to call correct physics
+        # model containing PyMjData object for getting and setting position/velocity
+        # check https://github.com/openai/mujoco-py/issues/80 for updates to api
+        if mujoco_py.get_version() >= "1.50":
+            return self.sim
+        else:
+            return self.model
+
+    def _step(self, a):
+        return self.step(a)
+
+    def step(self, a):
+        xposbefore = self.get_body_com("torso")[0]
+        self.do_simulation(a, self.frame_skip)
+        xposafter = self.get_body_com("torso")[0]
+        forward_reward = (xposafter - xposbefore) / self.dt
+        ctrl_cost = 0.5 * np.square(a).sum()
+        survive_reward = 1.0
+        reward = forward_reward - ctrl_cost + survive_reward
+        _ = self.state_vector()
+        done = False
+        ob = self._get_obs()
+        return (
+            ob,
+            reward,
+            done,
+            dict(
+                reward_forward=forward_reward,
+                reward_ctrl=-ctrl_cost,
+                reward_survive=survive_reward,
+            ),
+        )
+
+    def _get_obs(self):
+        # No cfrc observation
+        if self._expose_all_qpos:
+            obs = np.concatenate(
+                [
+                    self.physics.data.qpos.flat[:15],  # Ensures only ant obs.
+                    self.physics.data.qvel.flat[:14],
+                ]
+            )
+        else:
+            obs = np.concatenate(
+                [self.physics.data.qpos.flat[2:15], self.physics.data.qvel.flat[:14],]
+            )
+
+        if self._expose_body_coms is not None:
+            for name in self._expose_body_coms:
+                com = self.get_body_com(name)
+                if name not in self._body_com_indices:
+                    indices = range(len(obs), len(obs) + len(com))
+                    self._body_com_indices[name] = indices
+                obs = np.concatenate([obs, com])
+
+        if self._expose_body_comvels is not None:
+            for name in self._expose_body_comvels:
+                comvel = self.get_body_comvel(name)
+                if name not in self._body_comvel_indices:
+                    indices = range(len(obs), len(obs) + len(comvel))
+                    self._body_comvel_indices[name] = indices
+                obs = np.concatenate([obs, comvel])
+        return obs
+
+    def reset_model(self):
+        qpos = self.init_qpos + self.np_random.uniform(
+            size=self.model.nq, low=-0.1, high=0.1
+        )
+        qvel = self.init_qvel + self.np_random.randn(self.model.nv) * 0.1
+
+        # Set everything other than ant to original position and 0 velocity.
+        qpos[15:] = self.init_qpos[15:]
+        qvel[14:] = 0.0
+        self.set_state(qpos, qvel)
+        return self._get_obs()
+
+    def viewer_setup(self):
+        self.viewer.cam.distance = self.model.stat.extent * 0.5
+
+    def get_ori(self):
+        ori = [0, 1, 0, 0]
+        rot = self.physics.data.qpos[
+            self.__class__.ORI_IND : self.__class__.ORI_IND + 4
+        ]  # take the quaternion
+        ori = q_mult(q_mult(rot, ori), q_inv(rot))[1:3]  # project onto x-y plane
+        ori = math.atan2(ori[1], ori[0])
+        return ori
+
+    def set_xy(self, xy):
+        qpos = np.copy(self.physics.data.qpos)
+        qpos[0] = xy[0]
+        qpos[1] = xy[1]
+
+        qvel = self.physics.data.qvel
+        self.set_state(qpos, qvel)
+
+    def get_xy(self):
+        return self.physics.data.qpos[:2]
--- a/mujoco_maze/ant_maze_env.py
+++ b/mujoco_maze/ant_maze_env.py
@ -0,0 +1,21 @@
+# Copyright 2018 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from environments.maze_env import MazeEnv
+from environments.ant import AntEnv
+
+
+class AntMazeEnv(MazeEnv):
+    MODEL_CLASS = AntEnv
--- a/mujoco_maze/assets/ant.xml
+++ b/mujoco_maze/assets/ant.xml
@ -0,0 +1,81 @@
+<mujoco model="ant">
+  <compiler inertiafromgeom="true" angle="degree" coordinate="local" />
+  <option timestep="0.02" integrator="RK4" />
+  <custom>
+    <numeric name="init_qpos" data="0.0 0.0 0.55 1.0 0.0 0.0 0.0 0.0 1.0 0.0 -1.0 0.0 -1.0 0.0 1.0" />
+  </custom>
+  <default>
+    <joint limited="true" armature="1" damping="1" />
+    <geom condim="3" conaffinity="0" margin="0.01" friction="1 0.5 0.5" solref=".02 1" solimp=".8 .8 .01" rgba="0.8 0.6 0.4 1" density="5.0" />
+  </default>
+  <asset>
+    <texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
+    <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
+    <texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
+    <material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1"  reflectance="0.5" />
+    <material name='geom' texture="texgeom" texuniform="true" />
+  </asset>
+  <worldbody>
+    <light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
+    <geom name='floor' pos='0 0 0' size='40 40 40' type='plane' conaffinity='1' rgba='0.8 0.9 0.8 1' condim='3' />
+    <body name="torso" pos="0 0 0.75">
+      <geom name="torso_geom" type="sphere" size="0.25" pos="0 0 0" />
+      <joint name="root" type="free" limited="false" pos="0 0 0" axis="0 0 1" margin="0.01" armature="0" damping="0" />
+      <body name="front_left_leg" pos="0 0 0">
+        <geom name="aux_1_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.2 0.2 0.0" />
+        <body name="aux_1" pos="0.2 0.2 0">
+          <joint name="hip_1" type="hinge" pos="0.0 0.0 0.0" axis="0 0 1" range="-30 30" />
+          <geom name="left_leg_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.2 0.2 0.0" />
+          <body pos="0.2 0.2 0">
+            <joint name="ankle_1" type="hinge" pos="0.0 0.0 0.0" axis="-1 1 0" range="30 70" />
+            <geom name="left_ankle_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.4 0.4 0.0" />
+          </body>
+        </body>
+      </body>
+      <body name="front_right_leg" pos="0 0 0">
+        <geom name="aux_2_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.2 0.2 0.0" />
+        <body name="aux_2" pos="-0.2 0.2 0">
+          <joint name="hip_2" type="hinge" pos="0.0 0.0 0.0" axis="0 0 1" range="-30 30" />
+          <geom name="right_leg_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.2 0.2 0.0" />
+          <body pos="-0.2 0.2 0">
+            <joint name="ankle_2" type="hinge" pos="0.0 0.0 0.0" axis="1 1 0" range="-70 -30" />
+            <geom name="right_ankle_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.4 0.4 0.0" />
+          </body>
+        </body>
+      </body>
+      <body name="back_leg" pos="0 0 0">
+        <geom name="aux_3_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" />
+        <body name="aux_3" pos="-0.2 -0.2 0">
+          <joint name="hip_3" type="hinge" pos="0.0 0.0 0.0" axis="0 0 1" range="-30 30" />
+          <geom name="back_leg_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" />
+          <body pos="-0.2 -0.2 0">
+            <joint name="ankle_3" type="hinge" pos="0.0 0.0 0.0" axis="-1 1 0" range="-70 -30" />
+            <geom name="third_ankle_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.4 -0.4 0.0" />
+          </body>
+        </body>
+      </body>
+      <body name="right_back_leg" pos="0 0 0">
+        <geom name="aux_4_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.2 -0.2 0.0" />
+        <body name="aux_4" pos="0.2 -0.2 0">
+          <joint name="hip_4" type="hinge" pos="0.0 0.0 0.0" axis="0 0 1" range="-30 30" />
+          <geom name="rightback_leg_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.2 -0.2 0.0" />
+          <body pos="0.2 -0.2 0">
+            <joint name="ankle_4" type="hinge" pos="0.0 0.0 0.0" axis="1 1 0" range="30 70" />
+            <geom name="fourth_ankle_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.4 -0.4 0.0" />
+          </body>
+        </body>
+      </body>
+    </body>
+
+  </worldbody>
+  <actuator>
+    <motor joint="hip_4" ctrlrange="-30.0 30.0" ctrllimited="true" />
+    <motor joint="ankle_4" ctrlrange="-30.0 30.0" ctrllimited="true" />
+    <motor joint="hip_1" ctrlrange="-30.0 30.0" ctrllimited="true" />
+    <motor joint="ankle_1" ctrlrange="-30.0 30.0" ctrllimited="true" />
+    <motor joint="hip_2" ctrlrange="-30.0 30.0" ctrllimited="true" />
+    <motor joint="ankle_2" ctrlrange="-30.0 30.0" ctrllimited="true" />
+    <motor joint="hip_3" ctrlrange="-30.0 30.0" ctrllimited="true" />
+    <motor joint="ankle_3" ctrlrange="-30.0 30.0" ctrllimited="true" />
+  </actuator>
+</mujoco>
--- a/mujoco_maze/maze_env.py
+++ b/mujoco_maze/maze_env.py
@ -0,0 +1,550 @@
+# Copyright 2018 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Adapted from rllab maze_env.py."""
+
+import os
+import tempfile
+import xml.etree.ElementTree as ET
+import math
+import numpy as np
+import gym
+
+from environments import maze_env_utils
+
+# Directory that contains mujoco xml files.
+MODEL_DIR = "environments/assets"
+
+
+class MazeEnv(gym.Env):
+    MODEL_CLASS = None
+
+    MAZE_HEIGHT = None
+    MAZE_SIZE_SCALING = None
+
+    def __init__(
+        self,
+        maze_id=None,
+        maze_height=0.5,
+        maze_size_scaling=8,
+        n_bins=0,
+        sensor_range=3.0,
+        sensor_span=2 * math.pi,
+        observe_blocks=False,
+        put_spin_near_agent=False,
+        top_down_view=False,
+        manual_collision=False,
+        *args,
+        **kwargs,
+    ):
+        self._maze_id = maze_id
+
+        model_cls = self.__class__.MODEL_CLASS
+        if model_cls is None:
+            raise "MODEL_CLASS unspecified!"
+        xml_path = os.path.join(MODEL_DIR, model_cls.FILE)
+        tree = ET.parse(xml_path)
+        worldbody = tree.find(".//worldbody")
+
+        self.MAZE_HEIGHT = height = maze_height
+        self.MAZE_SIZE_SCALING = size_scaling = maze_size_scaling
+        self._n_bins = n_bins
+        self._sensor_range = sensor_range * size_scaling
+        self._sensor_span = sensor_span
+        self._observe_blocks = observe_blocks
+        self._put_spin_near_agent = put_spin_near_agent
+        self._top_down_view = top_down_view
+        self._manual_collision = manual_collision
+
+        self.MAZE_STRUCTURE = structure = maze_env_utils.construct_maze(
+            maze_id=self._maze_id
+        )
+        self.elevated = any(
+            -1 in row for row in structure
+        )  # Elevate the maze to allow for falling.
+        self.blocks = any(
+            any(maze_env_utils.can_move(r) for r in row) for row in structure
+        )  # Are there any movable blocks?
+
+        torso_x, torso_y = self._find_robot()
+        self._init_torso_x = torso_x
+        self._init_torso_y = torso_y
+        self._init_positions = [
+            (x - torso_x, y - torso_y) for x, y in self._find_all_robots()
+        ]
+
+        self._xy_to_rowcol = lambda x, y: (
+            2 + (y + size_scaling / 2) / size_scaling,
+            2 + (x + size_scaling / 2) / size_scaling,
+        )
+        self._view = np.zeros(
+            [5, 5, 3]
+        )  # walls (immovable), chasms (fall), movable blocks
+
+        height_offset = 0.0
+        if self.elevated:
+            # Increase initial z-pos of ant.
+            height_offset = height * size_scaling
+            torso = tree.find(".//body[@name='torso']")
+            torso.set("pos", "0 0 %.2f" % (0.75 + height_offset))
+        if self.blocks:
+            # If there are movable blocks, change simulation settings to perform
+            # better contact detection.
+            default = tree.find(".//default")
+            default.find(".//geom").set("solimp", ".995 .995 .01")
+
+        self.movable_blocks = []
+        for i in range(len(structure)):
+            for j in range(len(structure[0])):
+                struct = structure[i][j]
+                if struct == "r" and self._put_spin_near_agent:
+                    struct = maze_env_utils.Move.SpinXY
+                if self.elevated and struct not in [-1]:
+                    # Create elevated platform.
+                    ET.SubElement(
+                        worldbody,
+                        "geom",
+                        name="elevated_%d_%d" % (i, j),
+                        pos="%f %f %f"
+                        % (
+                            j * size_scaling - torso_x,
+                            i * size_scaling - torso_y,
+                            height / 2 * size_scaling,
+                        ),
+                        size="%f %f %f"
+                        % (
+                            0.5 * size_scaling,
+                            0.5 * size_scaling,
+                            height / 2 * size_scaling,
+                        ),
+                        type="box",
+                        material="",
+                        contype="1",
+                        conaffinity="1",
+                        rgba="0.9 0.9 0.9 1",
+                    )
+                if struct == 1:  # Unmovable block.
+                    # Offset all coordinates so that robot starts at the origin.
+                    ET.SubElement(
+                        worldbody,
+                        "geom",
+                        name="block_%d_%d" % (i, j),
+                        pos="%f %f %f"
+                        % (
+                            j * size_scaling - torso_x,
+                            i * size_scaling - torso_y,
+                            height_offset + height / 2 * size_scaling,
+                        ),
+                        size="%f %f %f"
+                        % (
+                            0.5 * size_scaling,
+                            0.5 * size_scaling,
+                            height / 2 * size_scaling,
+                        ),
+                        type="box",
+                        material="",
+                        contype="1",
+                        conaffinity="1",
+                        rgba="0.4 0.4 0.4 1",
+                    )
+                elif maze_env_utils.can_move(struct):  # Movable block.
+                    # The "falling" blocks are shrunk slightly and increased in mass to
+                    # ensure that it can fall easily through a gap in the platform blocks.
+                    name = "movable_%d_%d" % (i, j)
+                    self.movable_blocks.append((name, struct))
+                    falling = maze_env_utils.can_move_z(struct)
+                    spinning = maze_env_utils.can_spin(struct)
+                    x_offset = 0.25 * size_scaling if spinning else 0.0
+                    y_offset = 0.0
+                    shrink = 0.1 if spinning else 0.99 if falling else 1.0
+                    height_shrink = 0.1 if spinning else 1.0
+                    movable_body = ET.SubElement(
+                        worldbody,
+                        "body",
+                        name=name,
+                        pos="%f %f %f"
+                        % (
+                            j * size_scaling - torso_x + x_offset,
+                            i * size_scaling - torso_y + y_offset,
+                            height_offset + height / 2 * size_scaling * height_shrink,
+                        ),
+                    )
+                    ET.SubElement(
+                        movable_body,
+                        "geom",
+                        name="block_%d_%d" % (i, j),
+                        pos="0 0 0",
+                        size="%f %f %f"
+                        % (
+                            0.5 * size_scaling * shrink,
+                            0.5 * size_scaling * shrink,
+                            height / 2 * size_scaling * height_shrink,
+                        ),
+                        type="box",
+                        material="",
+                        mass="0.001" if falling else "0.0002",
+                        contype="1",
+                        conaffinity="1",
+                        rgba="0.9 0.1 0.1 1",
+                    )
+                    if maze_env_utils.can_move_x(struct):
+                        ET.SubElement(
+                            movable_body,
+                            "joint",
+                            armature="0",
+                            axis="1 0 0",
+                            damping="0.0",
+                            limited="true" if falling else "false",
+                            range="%f %f" % (-size_scaling, size_scaling),
+                            margin="0.01",
+                            name="movable_x_%d_%d" % (i, j),
+                            pos="0 0 0",
+                            type="slide",
+                        )
+                    if maze_env_utils.can_move_y(struct):
+                        ET.SubElement(
+                            movable_body,
+                            "joint",
+                            armature="0",
+                            axis="0 1 0",
+                            damping="0.0",
+                            limited="true" if falling else "false",
+                            range="%f %f" % (-size_scaling, size_scaling),
+                            margin="0.01",
+                            name="movable_y_%d_%d" % (i, j),
+                            pos="0 0 0",
+                            type="slide",
+                        )
+                    if maze_env_utils.can_move_z(struct):
+                        ET.SubElement(
+                            movable_body,
+                            "joint",
+                            armature="0",
+                            axis="0 0 1",
+                            damping="0.0",
+                            limited="true",
+                            range="%f 0" % (-height_offset),
+                            margin="0.01",
+                            name="movable_z_%d_%d" % (i, j),
+                            pos="0 0 0",
+                            type="slide",
+                        )
+                    if maze_env_utils.can_spin(struct):
+                        ET.SubElement(
+                            movable_body,
+                            "joint",
+                            armature="0",
+                            axis="0 0 1",
+                            damping="0.0",
+                            limited="false",
+                            name="spinable_%d_%d" % (i, j),
+                            pos="0 0 0",
+                            type="ball",
+                        )
+
+        torso = tree.find(".//body[@name='torso']")
+        geoms = torso.findall(".//geom")
+        for geom in geoms:
+            if "name" not in geom.attrib:
+                raise Exception("Every geom of the torso must have a name " "defined")
+
+        _, file_path = tempfile.mkstemp(text=True, suffix=".xml")
+        tree.write(file_path)
+
+        self.wrapped_env = model_cls(*args, file_path=file_path, **kwargs)
+
+    def get_ori(self):
+        return self.wrapped_env.get_ori()
+
+    def get_top_down_view(self):
+        self._view = np.zeros_like(self._view)
+
+        def valid(row, col):
+            return self._view.shape[0] > row >= 0 and self._view.shape[1] > col >= 0
+
+        def update_view(x, y, d, row=None, col=None):
+            if row is None or col is None:
+                x = x - self._robot_x
+                y = y - self._robot_y
+                th = self._robot_ori
+
+                row, col = self._xy_to_rowcol(x, y)
+                update_view(x, y, d, row=row, col=col)
+                return
+
+            row, row_frac, col, col_frac = int(row), row % 1, int(col), col % 1
+            if row_frac < 0:
+                row_frac += 1
+            if col_frac < 0:
+                col_frac += 1
+
+            if valid(row, col):
+                self._view[row, col, d] += (
+                    min(1.0, row_frac + 0.5) - max(0.0, row_frac - 0.5)
+                ) * (min(1.0, col_frac + 0.5) - max(0.0, col_frac - 0.5))
+            if valid(row - 1, col):
+                self._view[row - 1, col, d] += (max(0.0, 0.5 - row_frac)) * (
+                    min(1.0, col_frac + 0.5) - max(0.0, col_frac - 0.5)
+                )
+            if valid(row + 1, col):
+                self._view[row + 1, col, d] += (max(0.0, row_frac - 0.5)) * (
+                    min(1.0, col_frac + 0.5) - max(0.0, col_frac - 0.5)
+                )
+            if valid(row, col - 1):
+                self._view[row, col - 1, d] += (
+                    min(1.0, row_frac + 0.5) - max(0.0, row_frac - 0.5)
+                ) * (max(0.0, 0.5 - col_frac))
+            if valid(row, col + 1):
+                self._view[row, col + 1, d] += (
+                    min(1.0, row_frac + 0.5) - max(0.0, row_frac - 0.5)
+                ) * (max(0.0, col_frac - 0.5))
+            if valid(row - 1, col - 1):
+                self._view[row - 1, col - 1, d] += (max(0.0, 0.5 - row_frac)) * max(
+                    0.0, 0.5 - col_frac
+                )
+            if valid(row - 1, col + 1):
+                self._view[row - 1, col + 1, d] += (max(0.0, 0.5 - row_frac)) * max(
+                    0.0, col_frac - 0.5
+                )
+            if valid(row + 1, col + 1):
+                self._view[row + 1, col + 1, d] += (max(0.0, row_frac - 0.5)) * max(
+                    0.0, col_frac - 0.5
+                )
+            if valid(row + 1, col - 1):
+                self._view[row + 1, col - 1, d] += (max(0.0, row_frac - 0.5)) * max(
+                    0.0, 0.5 - col_frac
+                )
+
+        # Draw ant.
+        robot_x, robot_y = self.wrapped_env.get_body_com("torso")[:2]
+        self._robot_x = robot_x
+        self._robot_y = robot_y
+        self._robot_ori = self.get_ori()
+
+        structure = self.MAZE_STRUCTURE
+        size_scaling = self.MAZE_SIZE_SCALING
+        height = self.MAZE_HEIGHT
+
+        # Draw immovable blocks and chasms.
+        for i in range(len(structure)):
+            for j in range(len(structure[0])):
+                if structure[i][j] == 1:  # Wall.
+                    update_view(
+                        j * size_scaling - self._init_torso_x,
+                        i * size_scaling - self._init_torso_y,
+                        0,
+                    )
+                if structure[i][j] == -1:  # Chasm.
+                    update_view(
+                        j * size_scaling - self._init_torso_x,
+                        i * size_scaling - self._init_torso_y,
+                        1,
+                    )
+
+        # Draw movable blocks.
+        for block_name, block_type in self.movable_blocks:
+            block_x, block_y = self.wrapped_env.get_body_com(block_name)[:2]
+            update_view(block_x, block_y, 2)
+
+        return self._view
+
+    def get_range_sensor_obs(self):
+        """Returns egocentric range sensor observations of maze."""
+        robot_x, robot_y, robot_z = self.wrapped_env.get_body_com("torso")[:3]
+        ori = self.get_ori()
+
+        structure = self.MAZE_STRUCTURE
+        size_scaling = self.MAZE_SIZE_SCALING
+        height = self.MAZE_HEIGHT
+
+        segments = []
+        # Get line segments (corresponding to outer boundary) of each immovable
+        # block or drop-off.
+        for i in range(len(structure)):
+            for j in range(len(structure[0])):
+                if structure[i][j] in [1, -1]:  # There's a wall or drop-off.
+                    cx = j * size_scaling - self._init_torso_x
+                    cy = i * size_scaling - self._init_torso_y
+                    x1 = cx - 0.5 * size_scaling
+                    x2 = cx + 0.5 * size_scaling
+                    y1 = cy - 0.5 * size_scaling
+                    y2 = cy + 0.5 * size_scaling
+                    struct_segments = [
+                        ((x1, y1), (x2, y1)),
+                        ((x2, y1), (x2, y2)),
+                        ((x2, y2), (x1, y2)),
+                        ((x1, y2), (x1, y1)),
+                    ]
+                    for seg in struct_segments:
+                        segments.append(dict(segment=seg, type=structure[i][j],))
+        # Get line segments (corresponding to outer boundary) of each movable
+        # block within the agent's z-view.
+        for block_name, block_type in self.movable_blocks:
+            block_x, block_y, block_z = self.wrapped_env.get_body_com(block_name)[:3]
+            if (
+                block_z + height * size_scaling / 2 >= robot_z
+                and robot_z >= block_z - height * size_scaling / 2
+            ):  # Block in view.
+                x1 = block_x - 0.5 * size_scaling
+                x2 = block_x + 0.5 * size_scaling
+                y1 = block_y - 0.5 * size_scaling
+                y2 = block_y + 0.5 * size_scaling
+                struct_segments = [
+                    ((x1, y1), (x2, y1)),
+                    ((x2, y1), (x2, y2)),
+                    ((x2, y2), (x1, y2)),
+                    ((x1, y2), (x1, y1)),
+                ]
+                for seg in struct_segments:
+                    segments.append(dict(segment=seg, type=block_type,))
+
+        sensor_readings = np.zeros((self._n_bins, 3))  # 3 for wall, drop-off, block
+        for ray_idx in range(self._n_bins):
+            ray_ori = (
+                ori
+                - self._sensor_span * 0.5
+                + (2 * ray_idx + 1.0) / (2 * self._n_bins) * self._sensor_span
+            )
+            ray_segments = []
+            # Get all segments that intersect with ray.
+            for seg in segments:
+                p = maze_env_utils.ray_segment_intersect(
+                    ray=((robot_x, robot_y), ray_ori), segment=seg["segment"]
+                )
+                if p is not None:
+                    ray_segments.append(
+                        dict(
+                            segment=seg["segment"],
+                            type=seg["type"],
+                            ray_ori=ray_ori,
+                            distance=maze_env_utils.point_distance(
+                                p, (robot_x, robot_y)
+                            ),
+                        )
+                    )
+            if len(ray_segments) > 0:
+                # Find out which segment is intersected first.
+                first_seg = sorted(ray_segments, key=lambda x: x["distance"])[0]
+                seg_type = first_seg["type"]
+                idx = (
+                    0
+                    if seg_type == 1
+                    else 1  # Wall.
+                    if seg_type == -1
+                    else 2  # Drop-off.
+                    if maze_env_utils.can_move(seg_type)
+                    else None  # Block.
+                )
+                if first_seg["distance"] <= self._sensor_range:
+                    sensor_readings[ray_idx][idx] = (
+                        self._sensor_range - first_seg["distance"]
+                    ) / self._sensor_range
+
+        return sensor_readings
+
+    def _get_obs(self):
+        wrapped_obs = self.wrapped_env._get_obs()
+        if self._top_down_view:
+            view = [self.get_top_down_view().flat]
+        else:
+            view = []
+
+        if self._observe_blocks:
+            additional_obs = []
+            for block_name, block_type in self.movable_blocks:
+                additional_obs.append(self.wrapped_env.get_body_com(block_name))
+            wrapped_obs = np.concatenate(
+                [wrapped_obs[:3]] + additional_obs + [wrapped_obs[3:]]
+            )
+
+        range_sensor_obs = self.get_range_sensor_obs()
+        return np.concatenate(
+            [wrapped_obs, range_sensor_obs.flat] + view + [[self.t * 0.001]]
+        )
+
+    def reset(self):
+        self.t = 0
+        self.trajectory = []
+        self.wrapped_env.reset()
+        if len(self._init_positions) > 1:
+            xy = random.choice(self._init_positions)
+            self.wrapped_env.set_xy(xy)
+        return self._get_obs()
+
+    @property
+    def viewer(self):
+        return self.wrapped_env.viewer
+
+    def render(self, *args, **kwargs):
+        return self.wrapped_env.render(*args, **kwargs)
+
+    @property
+    def observation_space(self):
+        shape = self._get_obs().shape
+        high = np.inf * np.ones(shape)
+        low = -high
+        return gym.spaces.Box(low, high)
+
+    @property
+    def action_space(self):
+        return self.wrapped_env.action_space
+
+    def _find_robot(self):
+        structure = self.MAZE_STRUCTURE
+        size_scaling = self.MAZE_SIZE_SCALING
+        for i in range(len(structure)):
+            for j in range(len(structure[0])):
+                if structure[i][j] == "r":
+                    return j * size_scaling, i * size_scaling
+        assert False, "No robot in maze specification."
+
+    def _find_all_robots(self):
+        structure = self.MAZE_STRUCTURE
+        size_scaling = self.MAZE_SIZE_SCALING
+        coords = []
+        for i in range(len(structure)):
+            for j in range(len(structure[0])):
+                if structure[i][j] == "r":
+                    coords.append((j * size_scaling, i * size_scaling))
+        return coords
+
+    def _is_in_collision(self, pos):
+        x, y = pos
+        structure = self.MAZE_STRUCTURE
+        size_scaling = self.MAZE_SIZE_SCALING
+        for i in range(len(structure)):
+            for j in range(len(structure[0])):
+                if structure[i][j] == 1:
+                    minx = j * size_scaling - size_scaling * 0.5 - self._init_torso_x
+                    maxx = j * size_scaling + size_scaling * 0.5 - self._init_torso_x
+                    miny = i * size_scaling - size_scaling * 0.5 - self._init_torso_y
+                    maxy = i * size_scaling + size_scaling * 0.5 - self._init_torso_y
+                    if minx <= x <= maxx and miny <= y <= maxy:
+                        return True
+        return False
+
+    def step(self, action):
+        self.t += 1
+        if self._manual_collision:
+            old_pos = self.wrapped_env.get_xy()
+            inner_next_obs, inner_reward, done, info = self.wrapped_env.step(action)
+            new_pos = self.wrapped_env.get_xy()
+            if self._is_in_collision(new_pos):
+                self.wrapped_env.set_xy(old_pos)
+        else:
+            inner_next_obs, inner_reward, done, info = self.wrapped_env.step(action)
+        next_obs = self._get_obs()
+        done = False
+        return next_obs, inner_reward, done, info
--- a/mujoco_maze/maze_env_utils.py
+++ b/mujoco_maze/maze_env_utils.py
@ -0,0 +1,162 @@
+# Copyright 2018 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Adapted from rllab maze_env_utils.py."""
+import math
+
+
+class Move(object):
+    X = 11
+    Y = 12
+    Z = 13
+    XY = 14
+    XZ = 15
+    YZ = 16
+    XYZ = 17
+    SpinXY = 18
+
+
+def can_move_x(movable):
+    return movable in [Move.X, Move.XY, Move.XZ, Move.XYZ, Move.SpinXY]
+
+
+def can_move_y(movable):
+    return movable in [Move.Y, Move.XY, Move.YZ, Move.XYZ, Move.SpinXY]
+
+
+def can_move_z(movable):
+    return movable in [Move.Z, Move.XZ, Move.YZ, Move.XYZ]
+
+
+def can_spin(movable):
+    return movable in [Move.SpinXY]
+
+
+def can_move(movable):
+    return can_move_x(movable) or can_move_y(movable) or can_move_z(movable)
+
+
+def construct_maze(maze_id="Maze"):
+    if maze_id == "Maze":
+        structure = [
+            [1, 1, 1, 1, 1],
+            [1, "r", 0, 0, 1],
+            [1, 1, 1, 0, 1],
+            [1, 0, 0, 0, 1],
+            [1, 1, 1, 1, 1],
+        ]
+    elif maze_id == "Push":
+        structure = [
+            [1, 1, 1, 1, 1],
+            [1, 0, "r", 1, 1],
+            [1, 0, Move.XY, 0, 1],
+            [1, 1, 0, 1, 1],
+            [1, 1, 1, 1, 1],
+        ]
+    elif maze_id == "Fall":
+        structure = [
+            [1, 1, 1, 1],
+            [1, "r", 0, 1],
+            [1, 0, Move.YZ, 1],
+            [1, -1, -1, 1],
+            [1, 0, 0, 1],
+            [1, 1, 1, 1],
+        ]
+    elif maze_id == "Block":
+        O = "r"
+        structure = [
+            [1, 1, 1, 1, 1],
+            [1, O, 0, 0, 1],
+            [1, 0, 0, 0, 1],
+            [1, 0, 0, 0, 1],
+            [1, 1, 1, 1, 1],
+        ]
+    elif maze_id == "BlockMaze":
+        O = "r"
+        structure = [
+            [1, 1, 1, 1],
+            [1, O, 0, 1],
+            [1, 1, 0, 1],
+            [1, 0, 0, 1],
+            [1, 1, 1, 1],
+        ]
+    else:
+        raise NotImplementedError("The provided MazeId %s is not recognized" % maze_id)
+
+    return structure
+
+
+def line_intersect(pt1, pt2, ptA, ptB):
+    """
+  Taken from https://www.cs.hmc.edu/ACM/lectures/intersections.html
+
+  this returns the intersection of Line(pt1,pt2) and Line(ptA,ptB)
+  """
+
+    DET_TOLERANCE = 0.00000001
+
+    # the first line is pt1 + r*(pt2-pt1)
+    # in component form:
+    x1, y1 = pt1
+    x2, y2 = pt2
+    dx1 = x2 - x1
+    dy1 = y2 - y1
+
+    # the second line is ptA + s*(ptB-ptA)
+    x, y = ptA
+    xB, yB = ptB
+    dx = xB - x
+    dy = yB - y
+
+    DET = -dx1 * dy + dy1 * dx
+
+    if math.fabs(DET) < DET_TOLERANCE:
+        return (0, 0, 0, 0, 0)
+
+    # now, the determinant should be OK
+    DETinv = 1.0 / DET
+
+    # find the scalar amount along the "self" segment
+    r = DETinv * (-dy * (x - x1) + dx * (y - y1))
+
+    # find the scalar amount along the input line
+    s = DETinv * (-dy1 * (x - x1) + dx1 * (y - y1))
+
+    # return the average of the two descriptions
+    xi = (x1 + r * dx1 + x + s * dx) / 2.0
+    yi = (y1 + r * dy1 + y + s * dy) / 2.0
+    return (xi, yi, 1, r, s)
+
+
+def ray_segment_intersect(ray, segment):
+    """
+  Check if the ray originated from (x, y) with direction theta intersects the line segment (x1, y1) -- (x2, y2),
+  and return the intersection point if there is one
+  """
+    (x, y), theta = ray
+    # (x1, y1), (x2, y2) = segment
+    pt1 = (x, y)
+    len = 1
+    pt2 = (x + len * math.cos(theta), y + len * math.sin(theta))
+    xo, yo, valid, r, s = line_intersect(pt1, pt2, *segment)
+    if valid and r >= 0 and 0 <= s <= 1:
+        return (xo, yo)
+    return None
+
+
+def point_distance(p1, p2):
+    x1, y1 = p1
+    x2, y2 = p2
+    return ((x1 - x2) ** 2 + (y1 - y2) ** 2) ** 0.5
--- a/mujoco_maze/point.py
+++ b/mujoco_maze/point.py
@ -0,0 +1,101 @@
+# Copyright 2018 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Wrapper for creating the ant environment in gym_mujoco."""
+
+import math
+import numpy as np
+import mujoco_py
+from gym import utils
+from gym.envs.mujoco import mujoco_env
+
+
+class PointEnv(mujoco_env.MujocoEnv, utils.EzPickle):
+    FILE = "point.xml"
+    ORI_IND = 2
+
+    def __init__(self, file_path=None, expose_all_qpos=True):
+        self._expose_all_qpos = expose_all_qpos
+
+        mujoco_env.MujocoEnv.__init__(self, file_path, 1)
+        utils.EzPickle.__init__(self)
+
+    @property
+    def physics(self):
+        # check mujoco version is greater than version 1.50 to call correct physics
+        # model containing PyMjData object for getting and setting position/velocity
+        # check https://github.com/openai/mujoco-py/issues/80 for updates to api
+        if mujoco_py.get_version() >= "1.50":
+            return self.sim
+        else:
+            return self.model
+
+    def _step(self, a):
+        return self.step(a)
+
+    def step(self, action):
+        action[0] = 0.2 * action[0]
+        qpos = np.copy(self.physics.data.qpos)
+        qpos[2] += action[1]
+        ori = qpos[2]
+        # compute increment in each direction
+        dx = math.cos(ori) * action[0]
+        dy = math.sin(ori) * action[0]
+        # ensure that the robot is within reasonable range
+        qpos[0] = np.clip(qpos[0] + dx, -100, 100)
+        qpos[1] = np.clip(qpos[1] + dy, -100, 100)
+        qvel = self.physics.data.qvel
+        self.set_state(qpos, qvel)
+        for _ in range(0, self.frame_skip):
+            self.physics.step()
+        next_obs = self._get_obs()
+        reward = 0
+        done = False
+        info = {}
+        return next_obs, reward, done, info
+
+    def _get_obs(self):
+        if self._expose_all_qpos:
+            return np.concatenate(
+                [
+                    self.physics.data.qpos.flat[:3],  # Only point-relevant coords.
+                    self.physics.data.qvel.flat[:3],
+                ]
+            )
+        return np.concatenate(
+            [self.physics.data.qpos.flat[2:3], self.physics.data.qvel.flat[:3]]
+        )
+
+    def reset_model(self):
+        qpos = self.init_qpos + self.np_random.uniform(
+            size=self.physics.model.nq, low=-0.1, high=0.1
+        )
+        qvel = self.init_qvel + self.np_random.randn(self.physics.model.nv) * 0.1
+
+        # Set everything other than point to original position and 0 velocity.
+        qpos[3:] = self.init_qpos[3:]
+        qvel[3:] = 0.0
+        self.set_state(qpos, qvel)
+        return self._get_obs()
+
+    def get_ori(self):
+        return self.physics.data.qpos[self.__class__.ORI_IND]
+
+    def set_xy(self, xy):
+        qpos = np.copy(self.physics.data.qpos)
+        qpos[0] = xy[0]
+        qpos[1] = xy[1]
+
+        qvel = self.physics.data.qvel
--- a/mujoco_maze/point_maze_env.py
+++ b/mujoco_maze/point_maze_env.py
@ -0,0 +1,21 @@
+# Copyright 2018 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from environments.maze_env import MazeEnv
+from environments.point import PointEnv
+
+
+class PointMazeEnv(MazeEnv):
+    MODEL_CLASS = PointEnv
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,18 @@
+[tool.black]
+line-length = 88
+target-version = ['py36']
+include = '\.pyi?$'
+exclude = '''
+
+(
+  /(
+      \.eggs
+    | \.git
+    | \.mypy_cache
+    | _build
+    | buck-out
+    | build
+    | dist
+  )/
+)
+'''
--- a/setup.cfg
+++ b/setup.cfg
@ -0,0 +1,18 @@
+[flake8]
+max-line-length = 88
+ignore = W391, W503, F821, E203, E231
+
+[mypy]
+python_version = 3.6
+ignore_missing_imports = True
+warn_unused_configs = True
+
+[isort]
+multi_line_output=3
+include_trailing_comma=True
+force_grid_wrap=0
+use_parentheses=True
+line_length=88
+lines_between_types=0
+combine_as_imports=True
+known_first_party=rainy, tests