From 034bd64990f090d1cfdef9af06819670269e465c Mon Sep 17 00:00:00 2001 From: kngwyu Date: Thu, 21 May 2020 19:05:06 +0900 Subject: [PATCH] INITIAL --- .gitignore | 7 + LICENSE | 201 +++++++++++++ README.md | 17 ++ mujoco_maze/__init__.py | 1 + mujoco_maze/ant.py | 156 ++++++++++ mujoco_maze/ant_maze_env.py | 21 ++ mujoco_maze/assets/ant.xml | 81 +++++ mujoco_maze/maze_env.py | 550 ++++++++++++++++++++++++++++++++++ mujoco_maze/maze_env_utils.py | 162 ++++++++++ mujoco_maze/point.py | 101 +++++++ mujoco_maze/point_maze_env.py | 21 ++ pyproject.toml | 18 ++ setup.cfg | 18 ++ 13 files changed, 1354 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100644 mujoco_maze/__init__.py create mode 100644 mujoco_maze/ant.py create mode 100644 mujoco_maze/ant_maze_env.py create mode 100755 mujoco_maze/assets/ant.xml create mode 100644 mujoco_maze/maze_env.py create mode 100644 mujoco_maze/maze_env_utils.py create mode 100644 mujoco_maze/point.py create mode 100644 mujoco_maze/point_maze_env.py create mode 100644 pyproject.toml create mode 100644 setup.cfg diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..98d91ff --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +*.pyc +**/__pycache__/ +**/.pytest_cache/ +**/dist/ +**/build/ +**/*.egg-info/ +**/.mypy_cache/ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..e21b3d1 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright 2020 Yuji Kanagawa + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..244a93f --- /dev/null +++ b/README.md @@ -0,0 +1,17 @@ +# mujoco-maze + +Some maze environments for reinforcement learning(RL) using [mujoco-py] and +[openai gym][gym]. + +Thankfully, this project is based on the code from [tensorflow/models][models], [rllab] +and [deep-skill-chaining][dsc]. + +## License +This project is licensed under Apache License, Version 2.0 +([LICENSE-APACHE](LICENSE) or http://www.apache.org/licenses/LICENSE-2.0). + +[dsc]: https://github.com/deep-skill-chaining/deep-skill-chaining +[gym]: https://github.com/openai/gym +[models]: https://github.com/tensorflow/models/tree/master/research/efficient-hrl +[mujoco-py]: https://github.com/openai/mujoco-py +[rllab]: https://github.com/rll/rllab diff --git a/mujoco_maze/__init__.py b/mujoco_maze/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/mujoco_maze/__init__.py @@ -0,0 +1 @@ + diff --git a/mujoco_maze/ant.py b/mujoco_maze/ant.py new file mode 100644 index 0000000..f3fd44a --- /dev/null +++ b/mujoco_maze/ant.py @@ -0,0 +1,156 @@ +# Copyright 2018 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Wrapper for creating the ant environment in gym_mujoco.""" + +import math +import numpy as np +import mujoco_py +from gym import utils +from gym.envs.mujoco import mujoco_env + + +def q_inv(a): + return [a[0], -a[1], -a[2], -a[3]] + + +def q_mult(a, b): # multiply two quaternion + w = a[0] * b[0] - a[1] * b[1] - a[2] * b[2] - a[3] * b[3] + i = a[0] * b[1] + a[1] * b[0] + a[2] * b[3] - a[3] * b[2] + j = a[0] * b[2] - a[1] * b[3] + a[2] * b[0] + a[3] * b[1] + k = a[0] * b[3] + a[1] * b[2] - a[2] * b[1] + a[3] * b[0] + return [w, i, j, k] + + +class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle): + FILE = "ant.xml" + ORI_IND = 3 + + def __init__( + self, + file_path=None, + expose_all_qpos=True, + expose_body_coms=None, + expose_body_comvels=None, + ): + self._expose_all_qpos = expose_all_qpos + self._expose_body_coms = expose_body_coms + self._expose_body_comvels = expose_body_comvels + self._body_com_indices = {} + self._body_comvel_indices = {} + + mujoco_env.MujocoEnv.__init__(self, file_path, 5) + utils.EzPickle.__init__(self) + + @property + def physics(self): + # check mujoco version is greater than version 1.50 to call correct physics + # model containing PyMjData object for getting and setting position/velocity + # check https://github.com/openai/mujoco-py/issues/80 for updates to api + if mujoco_py.get_version() >= "1.50": + return self.sim + else: + return self.model + + def _step(self, a): + return self.step(a) + + def step(self, a): + xposbefore = self.get_body_com("torso")[0] + self.do_simulation(a, self.frame_skip) + xposafter = self.get_body_com("torso")[0] + forward_reward = (xposafter - xposbefore) / self.dt + ctrl_cost = 0.5 * np.square(a).sum() + survive_reward = 1.0 + reward = forward_reward - ctrl_cost + survive_reward + _ = self.state_vector() + done = False + ob = self._get_obs() + return ( + ob, + reward, + done, + dict( + reward_forward=forward_reward, + reward_ctrl=-ctrl_cost, + reward_survive=survive_reward, + ), + ) + + def _get_obs(self): + # No cfrc observation + if self._expose_all_qpos: + obs = np.concatenate( + [ + self.physics.data.qpos.flat[:15], # Ensures only ant obs. + self.physics.data.qvel.flat[:14], + ] + ) + else: + obs = np.concatenate( + [self.physics.data.qpos.flat[2:15], self.physics.data.qvel.flat[:14],] + ) + + if self._expose_body_coms is not None: + for name in self._expose_body_coms: + com = self.get_body_com(name) + if name not in self._body_com_indices: + indices = range(len(obs), len(obs) + len(com)) + self._body_com_indices[name] = indices + obs = np.concatenate([obs, com]) + + if self._expose_body_comvels is not None: + for name in self._expose_body_comvels: + comvel = self.get_body_comvel(name) + if name not in self._body_comvel_indices: + indices = range(len(obs), len(obs) + len(comvel)) + self._body_comvel_indices[name] = indices + obs = np.concatenate([obs, comvel]) + return obs + + def reset_model(self): + qpos = self.init_qpos + self.np_random.uniform( + size=self.model.nq, low=-0.1, high=0.1 + ) + qvel = self.init_qvel + self.np_random.randn(self.model.nv) * 0.1 + + # Set everything other than ant to original position and 0 velocity. + qpos[15:] = self.init_qpos[15:] + qvel[14:] = 0.0 + self.set_state(qpos, qvel) + return self._get_obs() + + def viewer_setup(self): + self.viewer.cam.distance = self.model.stat.extent * 0.5 + + def get_ori(self): + ori = [0, 1, 0, 0] + rot = self.physics.data.qpos[ + self.__class__.ORI_IND : self.__class__.ORI_IND + 4 + ] # take the quaternion + ori = q_mult(q_mult(rot, ori), q_inv(rot))[1:3] # project onto x-y plane + ori = math.atan2(ori[1], ori[0]) + return ori + + def set_xy(self, xy): + qpos = np.copy(self.physics.data.qpos) + qpos[0] = xy[0] + qpos[1] = xy[1] + + qvel = self.physics.data.qvel + self.set_state(qpos, qvel) + + def get_xy(self): + return self.physics.data.qpos[:2] diff --git a/mujoco_maze/ant_maze_env.py b/mujoco_maze/ant_maze_env.py new file mode 100644 index 0000000..69a1066 --- /dev/null +++ b/mujoco_maze/ant_maze_env.py @@ -0,0 +1,21 @@ +# Copyright 2018 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from environments.maze_env import MazeEnv +from environments.ant import AntEnv + + +class AntMazeEnv(MazeEnv): + MODEL_CLASS = AntEnv diff --git a/mujoco_maze/assets/ant.xml b/mujoco_maze/assets/ant.xml new file mode 100755 index 0000000..5a49d7f --- /dev/null +++ b/mujoco_maze/assets/ant.xml @@ -0,0 +1,81 @@ + + + diff --git a/mujoco_maze/maze_env.py b/mujoco_maze/maze_env.py new file mode 100644 index 0000000..0fac835 --- /dev/null +++ b/mujoco_maze/maze_env.py @@ -0,0 +1,550 @@ +# Copyright 2018 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Adapted from rllab maze_env.py.""" + +import os +import tempfile +import xml.etree.ElementTree as ET +import math +import numpy as np +import gym + +from environments import maze_env_utils + +# Directory that contains mujoco xml files. +MODEL_DIR = "environments/assets" + + +class MazeEnv(gym.Env): + MODEL_CLASS = None + + MAZE_HEIGHT = None + MAZE_SIZE_SCALING = None + + def __init__( + self, + maze_id=None, + maze_height=0.5, + maze_size_scaling=8, + n_bins=0, + sensor_range=3.0, + sensor_span=2 * math.pi, + observe_blocks=False, + put_spin_near_agent=False, + top_down_view=False, + manual_collision=False, + *args, + **kwargs, + ): + self._maze_id = maze_id + + model_cls = self.__class__.MODEL_CLASS + if model_cls is None: + raise "MODEL_CLASS unspecified!" + xml_path = os.path.join(MODEL_DIR, model_cls.FILE) + tree = ET.parse(xml_path) + worldbody = tree.find(".//worldbody") + + self.MAZE_HEIGHT = height = maze_height + self.MAZE_SIZE_SCALING = size_scaling = maze_size_scaling + self._n_bins = n_bins + self._sensor_range = sensor_range * size_scaling + self._sensor_span = sensor_span + self._observe_blocks = observe_blocks + self._put_spin_near_agent = put_spin_near_agent + self._top_down_view = top_down_view + self._manual_collision = manual_collision + + self.MAZE_STRUCTURE = structure = maze_env_utils.construct_maze( + maze_id=self._maze_id + ) + self.elevated = any( + -1 in row for row in structure + ) # Elevate the maze to allow for falling. + self.blocks = any( + any(maze_env_utils.can_move(r) for r in row) for row in structure + ) # Are there any movable blocks? + + torso_x, torso_y = self._find_robot() + self._init_torso_x = torso_x + self._init_torso_y = torso_y + self._init_positions = [ + (x - torso_x, y - torso_y) for x, y in self._find_all_robots() + ] + + self._xy_to_rowcol = lambda x, y: ( + 2 + (y + size_scaling / 2) / size_scaling, + 2 + (x + size_scaling / 2) / size_scaling, + ) + self._view = np.zeros( + [5, 5, 3] + ) # walls (immovable), chasms (fall), movable blocks + + height_offset = 0.0 + if self.elevated: + # Increase initial z-pos of ant. + height_offset = height * size_scaling + torso = tree.find(".//body[@name='torso']") + torso.set("pos", "0 0 %.2f" % (0.75 + height_offset)) + if self.blocks: + # If there are movable blocks, change simulation settings to perform + # better contact detection. + default = tree.find(".//default") + default.find(".//geom").set("solimp", ".995 .995 .01") + + self.movable_blocks = [] + for i in range(len(structure)): + for j in range(len(structure[0])): + struct = structure[i][j] + if struct == "r" and self._put_spin_near_agent: + struct = maze_env_utils.Move.SpinXY + if self.elevated and struct not in [-1]: + # Create elevated platform. + ET.SubElement( + worldbody, + "geom", + name="elevated_%d_%d" % (i, j), + pos="%f %f %f" + % ( + j * size_scaling - torso_x, + i * size_scaling - torso_y, + height / 2 * size_scaling, + ), + size="%f %f %f" + % ( + 0.5 * size_scaling, + 0.5 * size_scaling, + height / 2 * size_scaling, + ), + type="box", + material="", + contype="1", + conaffinity="1", + rgba="0.9 0.9 0.9 1", + ) + if struct == 1: # Unmovable block. + # Offset all coordinates so that robot starts at the origin. + ET.SubElement( + worldbody, + "geom", + name="block_%d_%d" % (i, j), + pos="%f %f %f" + % ( + j * size_scaling - torso_x, + i * size_scaling - torso_y, + height_offset + height / 2 * size_scaling, + ), + size="%f %f %f" + % ( + 0.5 * size_scaling, + 0.5 * size_scaling, + height / 2 * size_scaling, + ), + type="box", + material="", + contype="1", + conaffinity="1", + rgba="0.4 0.4 0.4 1", + ) + elif maze_env_utils.can_move(struct): # Movable block. + # The "falling" blocks are shrunk slightly and increased in mass to + # ensure that it can fall easily through a gap in the platform blocks. + name = "movable_%d_%d" % (i, j) + self.movable_blocks.append((name, struct)) + falling = maze_env_utils.can_move_z(struct) + spinning = maze_env_utils.can_spin(struct) + x_offset = 0.25 * size_scaling if spinning else 0.0 + y_offset = 0.0 + shrink = 0.1 if spinning else 0.99 if falling else 1.0 + height_shrink = 0.1 if spinning else 1.0 + movable_body = ET.SubElement( + worldbody, + "body", + name=name, + pos="%f %f %f" + % ( + j * size_scaling - torso_x + x_offset, + i * size_scaling - torso_y + y_offset, + height_offset + height / 2 * size_scaling * height_shrink, + ), + ) + ET.SubElement( + movable_body, + "geom", + name="block_%d_%d" % (i, j), + pos="0 0 0", + size="%f %f %f" + % ( + 0.5 * size_scaling * shrink, + 0.5 * size_scaling * shrink, + height / 2 * size_scaling * height_shrink, + ), + type="box", + material="", + mass="0.001" if falling else "0.0002", + contype="1", + conaffinity="1", + rgba="0.9 0.1 0.1 1", + ) + if maze_env_utils.can_move_x(struct): + ET.SubElement( + movable_body, + "joint", + armature="0", + axis="1 0 0", + damping="0.0", + limited="true" if falling else "false", + range="%f %f" % (-size_scaling, size_scaling), + margin="0.01", + name="movable_x_%d_%d" % (i, j), + pos="0 0 0", + type="slide", + ) + if maze_env_utils.can_move_y(struct): + ET.SubElement( + movable_body, + "joint", + armature="0", + axis="0 1 0", + damping="0.0", + limited="true" if falling else "false", + range="%f %f" % (-size_scaling, size_scaling), + margin="0.01", + name="movable_y_%d_%d" % (i, j), + pos="0 0 0", + type="slide", + ) + if maze_env_utils.can_move_z(struct): + ET.SubElement( + movable_body, + "joint", + armature="0", + axis="0 0 1", + damping="0.0", + limited="true", + range="%f 0" % (-height_offset), + margin="0.01", + name="movable_z_%d_%d" % (i, j), + pos="0 0 0", + type="slide", + ) + if maze_env_utils.can_spin(struct): + ET.SubElement( + movable_body, + "joint", + armature="0", + axis="0 0 1", + damping="0.0", + limited="false", + name="spinable_%d_%d" % (i, j), + pos="0 0 0", + type="ball", + ) + + torso = tree.find(".//body[@name='torso']") + geoms = torso.findall(".//geom") + for geom in geoms: + if "name" not in geom.attrib: + raise Exception("Every geom of the torso must have a name " "defined") + + _, file_path = tempfile.mkstemp(text=True, suffix=".xml") + tree.write(file_path) + + self.wrapped_env = model_cls(*args, file_path=file_path, **kwargs) + + def get_ori(self): + return self.wrapped_env.get_ori() + + def get_top_down_view(self): + self._view = np.zeros_like(self._view) + + def valid(row, col): + return self._view.shape[0] > row >= 0 and self._view.shape[1] > col >= 0 + + def update_view(x, y, d, row=None, col=None): + if row is None or col is None: + x = x - self._robot_x + y = y - self._robot_y + th = self._robot_ori + + row, col = self._xy_to_rowcol(x, y) + update_view(x, y, d, row=row, col=col) + return + + row, row_frac, col, col_frac = int(row), row % 1, int(col), col % 1 + if row_frac < 0: + row_frac += 1 + if col_frac < 0: + col_frac += 1 + + if valid(row, col): + self._view[row, col, d] += ( + min(1.0, row_frac + 0.5) - max(0.0, row_frac - 0.5) + ) * (min(1.0, col_frac + 0.5) - max(0.0, col_frac - 0.5)) + if valid(row - 1, col): + self._view[row - 1, col, d] += (max(0.0, 0.5 - row_frac)) * ( + min(1.0, col_frac + 0.5) - max(0.0, col_frac - 0.5) + ) + if valid(row + 1, col): + self._view[row + 1, col, d] += (max(0.0, row_frac - 0.5)) * ( + min(1.0, col_frac + 0.5) - max(0.0, col_frac - 0.5) + ) + if valid(row, col - 1): + self._view[row, col - 1, d] += ( + min(1.0, row_frac + 0.5) - max(0.0, row_frac - 0.5) + ) * (max(0.0, 0.5 - col_frac)) + if valid(row, col + 1): + self._view[row, col + 1, d] += ( + min(1.0, row_frac + 0.5) - max(0.0, row_frac - 0.5) + ) * (max(0.0, col_frac - 0.5)) + if valid(row - 1, col - 1): + self._view[row - 1, col - 1, d] += (max(0.0, 0.5 - row_frac)) * max( + 0.0, 0.5 - col_frac + ) + if valid(row - 1, col + 1): + self._view[row - 1, col + 1, d] += (max(0.0, 0.5 - row_frac)) * max( + 0.0, col_frac - 0.5 + ) + if valid(row + 1, col + 1): + self._view[row + 1, col + 1, d] += (max(0.0, row_frac - 0.5)) * max( + 0.0, col_frac - 0.5 + ) + if valid(row + 1, col - 1): + self._view[row + 1, col - 1, d] += (max(0.0, row_frac - 0.5)) * max( + 0.0, 0.5 - col_frac + ) + + # Draw ant. + robot_x, robot_y = self.wrapped_env.get_body_com("torso")[:2] + self._robot_x = robot_x + self._robot_y = robot_y + self._robot_ori = self.get_ori() + + structure = self.MAZE_STRUCTURE + size_scaling = self.MAZE_SIZE_SCALING + height = self.MAZE_HEIGHT + + # Draw immovable blocks and chasms. + for i in range(len(structure)): + for j in range(len(structure[0])): + if structure[i][j] == 1: # Wall. + update_view( + j * size_scaling - self._init_torso_x, + i * size_scaling - self._init_torso_y, + 0, + ) + if structure[i][j] == -1: # Chasm. + update_view( + j * size_scaling - self._init_torso_x, + i * size_scaling - self._init_torso_y, + 1, + ) + + # Draw movable blocks. + for block_name, block_type in self.movable_blocks: + block_x, block_y = self.wrapped_env.get_body_com(block_name)[:2] + update_view(block_x, block_y, 2) + + return self._view + + def get_range_sensor_obs(self): + """Returns egocentric range sensor observations of maze.""" + robot_x, robot_y, robot_z = self.wrapped_env.get_body_com("torso")[:3] + ori = self.get_ori() + + structure = self.MAZE_STRUCTURE + size_scaling = self.MAZE_SIZE_SCALING + height = self.MAZE_HEIGHT + + segments = [] + # Get line segments (corresponding to outer boundary) of each immovable + # block or drop-off. + for i in range(len(structure)): + for j in range(len(structure[0])): + if structure[i][j] in [1, -1]: # There's a wall or drop-off. + cx = j * size_scaling - self._init_torso_x + cy = i * size_scaling - self._init_torso_y + x1 = cx - 0.5 * size_scaling + x2 = cx + 0.5 * size_scaling + y1 = cy - 0.5 * size_scaling + y2 = cy + 0.5 * size_scaling + struct_segments = [ + ((x1, y1), (x2, y1)), + ((x2, y1), (x2, y2)), + ((x2, y2), (x1, y2)), + ((x1, y2), (x1, y1)), + ] + for seg in struct_segments: + segments.append(dict(segment=seg, type=structure[i][j],)) + # Get line segments (corresponding to outer boundary) of each movable + # block within the agent's z-view. + for block_name, block_type in self.movable_blocks: + block_x, block_y, block_z = self.wrapped_env.get_body_com(block_name)[:3] + if ( + block_z + height * size_scaling / 2 >= robot_z + and robot_z >= block_z - height * size_scaling / 2 + ): # Block in view. + x1 = block_x - 0.5 * size_scaling + x2 = block_x + 0.5 * size_scaling + y1 = block_y - 0.5 * size_scaling + y2 = block_y + 0.5 * size_scaling + struct_segments = [ + ((x1, y1), (x2, y1)), + ((x2, y1), (x2, y2)), + ((x2, y2), (x1, y2)), + ((x1, y2), (x1, y1)), + ] + for seg in struct_segments: + segments.append(dict(segment=seg, type=block_type,)) + + sensor_readings = np.zeros((self._n_bins, 3)) # 3 for wall, drop-off, block + for ray_idx in range(self._n_bins): + ray_ori = ( + ori + - self._sensor_span * 0.5 + + (2 * ray_idx + 1.0) / (2 * self._n_bins) * self._sensor_span + ) + ray_segments = [] + # Get all segments that intersect with ray. + for seg in segments: + p = maze_env_utils.ray_segment_intersect( + ray=((robot_x, robot_y), ray_ori), segment=seg["segment"] + ) + if p is not None: + ray_segments.append( + dict( + segment=seg["segment"], + type=seg["type"], + ray_ori=ray_ori, + distance=maze_env_utils.point_distance( + p, (robot_x, robot_y) + ), + ) + ) + if len(ray_segments) > 0: + # Find out which segment is intersected first. + first_seg = sorted(ray_segments, key=lambda x: x["distance"])[0] + seg_type = first_seg["type"] + idx = ( + 0 + if seg_type == 1 + else 1 # Wall. + if seg_type == -1 + else 2 # Drop-off. + if maze_env_utils.can_move(seg_type) + else None # Block. + ) + if first_seg["distance"] <= self._sensor_range: + sensor_readings[ray_idx][idx] = ( + self._sensor_range - first_seg["distance"] + ) / self._sensor_range + + return sensor_readings + + def _get_obs(self): + wrapped_obs = self.wrapped_env._get_obs() + if self._top_down_view: + view = [self.get_top_down_view().flat] + else: + view = [] + + if self._observe_blocks: + additional_obs = [] + for block_name, block_type in self.movable_blocks: + additional_obs.append(self.wrapped_env.get_body_com(block_name)) + wrapped_obs = np.concatenate( + [wrapped_obs[:3]] + additional_obs + [wrapped_obs[3:]] + ) + + range_sensor_obs = self.get_range_sensor_obs() + return np.concatenate( + [wrapped_obs, range_sensor_obs.flat] + view + [[self.t * 0.001]] + ) + + def reset(self): + self.t = 0 + self.trajectory = [] + self.wrapped_env.reset() + if len(self._init_positions) > 1: + xy = random.choice(self._init_positions) + self.wrapped_env.set_xy(xy) + return self._get_obs() + + @property + def viewer(self): + return self.wrapped_env.viewer + + def render(self, *args, **kwargs): + return self.wrapped_env.render(*args, **kwargs) + + @property + def observation_space(self): + shape = self._get_obs().shape + high = np.inf * np.ones(shape) + low = -high + return gym.spaces.Box(low, high) + + @property + def action_space(self): + return self.wrapped_env.action_space + + def _find_robot(self): + structure = self.MAZE_STRUCTURE + size_scaling = self.MAZE_SIZE_SCALING + for i in range(len(structure)): + for j in range(len(structure[0])): + if structure[i][j] == "r": + return j * size_scaling, i * size_scaling + assert False, "No robot in maze specification." + + def _find_all_robots(self): + structure = self.MAZE_STRUCTURE + size_scaling = self.MAZE_SIZE_SCALING + coords = [] + for i in range(len(structure)): + for j in range(len(structure[0])): + if structure[i][j] == "r": + coords.append((j * size_scaling, i * size_scaling)) + return coords + + def _is_in_collision(self, pos): + x, y = pos + structure = self.MAZE_STRUCTURE + size_scaling = self.MAZE_SIZE_SCALING + for i in range(len(structure)): + for j in range(len(structure[0])): + if structure[i][j] == 1: + minx = j * size_scaling - size_scaling * 0.5 - self._init_torso_x + maxx = j * size_scaling + size_scaling * 0.5 - self._init_torso_x + miny = i * size_scaling - size_scaling * 0.5 - self._init_torso_y + maxy = i * size_scaling + size_scaling * 0.5 - self._init_torso_y + if minx <= x <= maxx and miny <= y <= maxy: + return True + return False + + def step(self, action): + self.t += 1 + if self._manual_collision: + old_pos = self.wrapped_env.get_xy() + inner_next_obs, inner_reward, done, info = self.wrapped_env.step(action) + new_pos = self.wrapped_env.get_xy() + if self._is_in_collision(new_pos): + self.wrapped_env.set_xy(old_pos) + else: + inner_next_obs, inner_reward, done, info = self.wrapped_env.step(action) + next_obs = self._get_obs() + done = False + return next_obs, inner_reward, done, info diff --git a/mujoco_maze/maze_env_utils.py b/mujoco_maze/maze_env_utils.py new file mode 100644 index 0000000..5a7667f --- /dev/null +++ b/mujoco_maze/maze_env_utils.py @@ -0,0 +1,162 @@ +# Copyright 2018 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Adapted from rllab maze_env_utils.py.""" +import math + + +class Move(object): + X = 11 + Y = 12 + Z = 13 + XY = 14 + XZ = 15 + YZ = 16 + XYZ = 17 + SpinXY = 18 + + +def can_move_x(movable): + return movable in [Move.X, Move.XY, Move.XZ, Move.XYZ, Move.SpinXY] + + +def can_move_y(movable): + return movable in [Move.Y, Move.XY, Move.YZ, Move.XYZ, Move.SpinXY] + + +def can_move_z(movable): + return movable in [Move.Z, Move.XZ, Move.YZ, Move.XYZ] + + +def can_spin(movable): + return movable in [Move.SpinXY] + + +def can_move(movable): + return can_move_x(movable) or can_move_y(movable) or can_move_z(movable) + + +def construct_maze(maze_id="Maze"): + if maze_id == "Maze": + structure = [ + [1, 1, 1, 1, 1], + [1, "r", 0, 0, 1], + [1, 1, 1, 0, 1], + [1, 0, 0, 0, 1], + [1, 1, 1, 1, 1], + ] + elif maze_id == "Push": + structure = [ + [1, 1, 1, 1, 1], + [1, 0, "r", 1, 1], + [1, 0, Move.XY, 0, 1], + [1, 1, 0, 1, 1], + [1, 1, 1, 1, 1], + ] + elif maze_id == "Fall": + structure = [ + [1, 1, 1, 1], + [1, "r", 0, 1], + [1, 0, Move.YZ, 1], + [1, -1, -1, 1], + [1, 0, 0, 1], + [1, 1, 1, 1], + ] + elif maze_id == "Block": + O = "r" + structure = [ + [1, 1, 1, 1, 1], + [1, O, 0, 0, 1], + [1, 0, 0, 0, 1], + [1, 0, 0, 0, 1], + [1, 1, 1, 1, 1], + ] + elif maze_id == "BlockMaze": + O = "r" + structure = [ + [1, 1, 1, 1], + [1, O, 0, 1], + [1, 1, 0, 1], + [1, 0, 0, 1], + [1, 1, 1, 1], + ] + else: + raise NotImplementedError("The provided MazeId %s is not recognized" % maze_id) + + return structure + + +def line_intersect(pt1, pt2, ptA, ptB): + """ + Taken from https://www.cs.hmc.edu/ACM/lectures/intersections.html + + this returns the intersection of Line(pt1,pt2) and Line(ptA,ptB) + """ + + DET_TOLERANCE = 0.00000001 + + # the first line is pt1 + r*(pt2-pt1) + # in component form: + x1, y1 = pt1 + x2, y2 = pt2 + dx1 = x2 - x1 + dy1 = y2 - y1 + + # the second line is ptA + s*(ptB-ptA) + x, y = ptA + xB, yB = ptB + dx = xB - x + dy = yB - y + + DET = -dx1 * dy + dy1 * dx + + if math.fabs(DET) < DET_TOLERANCE: + return (0, 0, 0, 0, 0) + + # now, the determinant should be OK + DETinv = 1.0 / DET + + # find the scalar amount along the "self" segment + r = DETinv * (-dy * (x - x1) + dx * (y - y1)) + + # find the scalar amount along the input line + s = DETinv * (-dy1 * (x - x1) + dx1 * (y - y1)) + + # return the average of the two descriptions + xi = (x1 + r * dx1 + x + s * dx) / 2.0 + yi = (y1 + r * dy1 + y + s * dy) / 2.0 + return (xi, yi, 1, r, s) + + +def ray_segment_intersect(ray, segment): + """ + Check if the ray originated from (x, y) with direction theta intersects the line segment (x1, y1) -- (x2, y2), + and return the intersection point if there is one + """ + (x, y), theta = ray + # (x1, y1), (x2, y2) = segment + pt1 = (x, y) + len = 1 + pt2 = (x + len * math.cos(theta), y + len * math.sin(theta)) + xo, yo, valid, r, s = line_intersect(pt1, pt2, *segment) + if valid and r >= 0 and 0 <= s <= 1: + return (xo, yo) + return None + + +def point_distance(p1, p2): + x1, y1 = p1 + x2, y2 = p2 + return ((x1 - x2) ** 2 + (y1 - y2) ** 2) ** 0.5 diff --git a/mujoco_maze/point.py b/mujoco_maze/point.py new file mode 100644 index 0000000..3a4fd3f --- /dev/null +++ b/mujoco_maze/point.py @@ -0,0 +1,101 @@ +# Copyright 2018 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Wrapper for creating the ant environment in gym_mujoco.""" + +import math +import numpy as np +import mujoco_py +from gym import utils +from gym.envs.mujoco import mujoco_env + + +class PointEnv(mujoco_env.MujocoEnv, utils.EzPickle): + FILE = "point.xml" + ORI_IND = 2 + + def __init__(self, file_path=None, expose_all_qpos=True): + self._expose_all_qpos = expose_all_qpos + + mujoco_env.MujocoEnv.__init__(self, file_path, 1) + utils.EzPickle.__init__(self) + + @property + def physics(self): + # check mujoco version is greater than version 1.50 to call correct physics + # model containing PyMjData object for getting and setting position/velocity + # check https://github.com/openai/mujoco-py/issues/80 for updates to api + if mujoco_py.get_version() >= "1.50": + return self.sim + else: + return self.model + + def _step(self, a): + return self.step(a) + + def step(self, action): + action[0] = 0.2 * action[0] + qpos = np.copy(self.physics.data.qpos) + qpos[2] += action[1] + ori = qpos[2] + # compute increment in each direction + dx = math.cos(ori) * action[0] + dy = math.sin(ori) * action[0] + # ensure that the robot is within reasonable range + qpos[0] = np.clip(qpos[0] + dx, -100, 100) + qpos[1] = np.clip(qpos[1] + dy, -100, 100) + qvel = self.physics.data.qvel + self.set_state(qpos, qvel) + for _ in range(0, self.frame_skip): + self.physics.step() + next_obs = self._get_obs() + reward = 0 + done = False + info = {} + return next_obs, reward, done, info + + def _get_obs(self): + if self._expose_all_qpos: + return np.concatenate( + [ + self.physics.data.qpos.flat[:3], # Only point-relevant coords. + self.physics.data.qvel.flat[:3], + ] + ) + return np.concatenate( + [self.physics.data.qpos.flat[2:3], self.physics.data.qvel.flat[:3]] + ) + + def reset_model(self): + qpos = self.init_qpos + self.np_random.uniform( + size=self.physics.model.nq, low=-0.1, high=0.1 + ) + qvel = self.init_qvel + self.np_random.randn(self.physics.model.nv) * 0.1 + + # Set everything other than point to original position and 0 velocity. + qpos[3:] = self.init_qpos[3:] + qvel[3:] = 0.0 + self.set_state(qpos, qvel) + return self._get_obs() + + def get_ori(self): + return self.physics.data.qpos[self.__class__.ORI_IND] + + def set_xy(self, xy): + qpos = np.copy(self.physics.data.qpos) + qpos[0] = xy[0] + qpos[1] = xy[1] + + qvel = self.physics.data.qvel diff --git a/mujoco_maze/point_maze_env.py b/mujoco_maze/point_maze_env.py new file mode 100644 index 0000000..8d6b819 --- /dev/null +++ b/mujoco_maze/point_maze_env.py @@ -0,0 +1,21 @@ +# Copyright 2018 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from environments.maze_env import MazeEnv +from environments.point import PointEnv + + +class PointMazeEnv(MazeEnv): + MODEL_CLASS = PointEnv diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..8ca620b --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,18 @@ +[tool.black] +line-length = 88 +target-version = ['py36'] +include = '\.pyi?$' +exclude = ''' + +( + /( + \.eggs + | \.git + | \.mypy_cache + | _build + | buck-out + | build + | dist + )/ +) +''' \ No newline at end of file diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..42a26d5 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,18 @@ +[flake8] +max-line-length = 88 +ignore = W391, W503, F821, E203, E231 + +[mypy] +python_version = 3.6 +ignore_missing_imports = True +warn_unused_configs = True + +[isort] +multi_line_output=3 +include_trailing_comma=True +force_grid_wrap=0 +use_parentheses=True +line_length=88 +lines_between_types=0 +combine_as_imports=True +known_first_party=rainy, tests