From 31156cec4d78bd7d9ed4a6539560fc9a17cd63ce Mon Sep 17 00:00:00 2001 From: ottofabian Date: Fri, 28 Aug 2020 18:31:06 +0200 Subject: [PATCH] added simple reacher task --- .gitignore | 111 ++++++++++++ .idea/test.iml | 8 - README.md | 2 +- __init__.py | 0 alr_envs/__init__.py | 16 ++ alr_envs/classic_control/__init__.py | 1 + alr_envs/classic_control/simple_reacher.py | 166 ++++++++++++++++++ alr_envs/mujoco/__init__.py | 1 + .../mujoco/alr_reacher.py | 12 +- .../mujoco/assets}/reacher_5links.xml | 0 example.py | 18 +- reacher.egg-info/SOURCES.txt | 1 + reacher/__init__.py | 6 - reacher/__pycache__/__init__.cpython-37.pyc | Bin 271 -> 0 bytes reacher/envs/__init__.py | 1 - .../envs/__pycache__/__init__.cpython-37.pyc | Bin 201 -> 0 bytes .../__pycache__/reacher_env.cpython-37.pyc | Bin 2011 -> 0 bytes setup.py | 2 +- 18 files changed, 315 insertions(+), 30 deletions(-) create mode 100644 .gitignore delete mode 100644 .idea/test.iml create mode 100644 __init__.py create mode 100644 alr_envs/__init__.py create mode 100644 alr_envs/classic_control/__init__.py create mode 100644 alr_envs/classic_control/simple_reacher.py create mode 100644 alr_envs/mujoco/__init__.py rename reacher/envs/reacher_env.py => alr_envs/mujoco/alr_reacher.py (76%) rename {reacher/envs => alr_envs/mujoco/assets}/reacher_5links.xml (100%) delete mode 100644 reacher/__init__.py delete mode 100644 reacher/__pycache__/__init__.cpython-37.pyc delete mode 100644 reacher/envs/__init__.py delete mode 100644 reacher/envs/__pycache__/__init__.cpython-37.pyc delete mode 100644 reacher/envs/__pycache__/reacher_env.cpython-37.pyc diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..766e68d --- /dev/null +++ b/.gitignore @@ -0,0 +1,111 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +# pycharm +.DS_Store +/.idea + +#configs +/configs/db.cfg diff --git a/.idea/test.iml b/.idea/test.iml deleted file mode 100644 index 02f1ba9..0000000 --- a/.idea/test.iml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/README.md b/README.md index 9f103bd..5d903d0 100644 --- a/README.md +++ b/README.md @@ -10,4 +10,4 @@ - Install: go to "../reacher_5_links" ``` pip install -e reacher_5_links ``` - Use (see example.py): - ``` env = gym.make('reacher:ReacherALREnv-v0')``` \ No newline at end of file + ``` env = gym.make('reacher:ALRReacherEnv-v0')``` \ No newline at end of file diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/alr_envs/__init__.py b/alr_envs/__init__.py new file mode 100644 index 0000000..86b06c3 --- /dev/null +++ b/alr_envs/__init__.py @@ -0,0 +1,16 @@ +from gym.envs.registration import register + +register( + id='ALRReacher-v0', + entry_point='alr_envs.mujoco:ALRReacherEnv', + max_episode_steps=1000, +) + +register( + id='SimpleReacher-v0', + entry_point='alr_envs.classic_control:SimpleReacherEnv', + max_episode_steps=200, + kwargs={ + "n_links": 5, + } +) diff --git a/alr_envs/classic_control/__init__.py b/alr_envs/classic_control/__init__.py new file mode 100644 index 0000000..53e2448 --- /dev/null +++ b/alr_envs/classic_control/__init__.py @@ -0,0 +1 @@ +from alr_envs.classic_control.simple_reacher import SimpleReacherEnv diff --git a/alr_envs/classic_control/simple_reacher.py b/alr_envs/classic_control/simple_reacher.py new file mode 100644 index 0000000..37319bb --- /dev/null +++ b/alr_envs/classic_control/simple_reacher.py @@ -0,0 +1,166 @@ +import gym +import numpy as np +from gym import spaces, utils +from gym.utils import seeding + +import matplotlib as mpl +import matplotlib.pyplot as plt + +mpl.use('Qt5Agg') # or can use 'TkAgg', whatever you have/prefer + + +class SimpleReacherEnv(gym.Env, utils.EzPickle): + """ + Simple Reaching Task without any physics simulation. + Returns no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions + towards the end of the trajectory. + """ + + def __init__(self, n_links): + super().__init__() + self.link_lengths = np.ones(n_links) + self.n_links = n_links + self.dt = 0.1 + + self._goal_pos = None + + self.joints = None + self._joint_angle = None + self._angle_velocity = None + + self.max_torque = 1 # 10 + + action_bound = np.ones((self.n_links,)) + state_bound = np.hstack([ + [np.pi] * self.n_links, + [np.inf] * self.n_links, + [np.inf], + [np.inf] # TODO: Maybe + ]) + self.action_space = spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape) + self.observation_space = spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) + + self.fig = None + self.metadata = {'render.modes': ["human"]} + + self._steps = 0 + self.seed() + + def step(self, action): + + action = self._scale_action(action) + + self._angle_velocity = self._angle_velocity + self.dt * action + self._joint_angle = angle_normalize(self._joint_angle + self.dt * self._angle_velocity) + self._update_joints() + self._steps += 1 + + reward = self._get_reward(action) + + # done = np.abs(self.end_effector - self._goal_pos) < 0.1 + done = False + + return self._get_obs().copy(), reward, done, {} + + def _scale_action(self, action): + """ + scale actions back in order to provide normalized actions \in [0,1] + + Args: + action: action to scale + + Returns: action according to self.max_torque + + """ + + ub = self.max_torque + lb = -self.max_torque + + action = lb + (action + 1.) * 0.5 * (ub - lb) + return np.clip(action, lb, ub) + + def _get_obs(self): + return [self._joint_angle, self._angle_velocity, self.end_effector - self._goal_pos, self._steps] + + def _update_joints(self): + """ + update joints to get new end effector position. The other links are only required for rendering. + Returns: + + """ + angles = np.cumsum(self._joint_angle) + x = self.link_lengths * np.vstack([np.cos(angles), np.sin(angles)]) + self.joints[1:] = self.joints[0] + np.cumsum(x.T, axis=0) + + def _get_reward(self, action): + diff = self.end_effector - self._goal_pos + distance = 0 + + # TODO: Is this the best option + if self._steps > 150: + distance = np.exp(-0.1 * diff ** 2).mean() + # distance -= (diff ** 2).mean() + + # distance -= action ** 2 + return distance + + def reset(self): + + # TODO: maybe do initialisation more random? + # Sample only orientation of first link, i.e. the arm is always straight. + self._joint_angle = np.hstack([[self.np_random.uniform(-np.pi, np.pi)], np.zeros(self.n_links - 1)]) + self._angle_velocity = np.zeros(self.n_links) + self.joints = np.zeros((self.n_links + 1, 2)) + self._update_joints() + + self._goal_pos = self._get_random_goal() + return self._get_obs().copy() + + def _get_random_goal(self): + center = self.joints[0] + + # Sample uniformly in circle with radius R around center of reacher. + R = np.sum(self.link_lengths) + r = R * np.sqrt(self.np_random.uniform()) + theta = self.np_random.uniform() * 2 * np.pi + return center + r * np.stack([np.cos(theta), np.sin(theta)]) + + def seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def render(self, mode='human'): # pragma: no cover + if self.fig is None: + self.fig = plt.figure() + plt.ion() + plt.show() + else: + plt.figure(self.fig.number) + + plt.cla() + + # Arm + plt.plot(self.joints[:, 0], self.joints[:, 1], 'ro-', markerfacecolor='k') + + # goal + goal_pos = self._goal_pos.T + plt.plot(goal_pos[0], goal_pos[1], 'gx') + # distance between end effector and goal + plt.plot([self.end_effector[0], goal_pos[0]], [self.end_effector[1], goal_pos[1]], 'g--') + + lim = np.sum(self.link_lengths) + 0.5 + plt.xlim([-lim, lim]) + plt.ylim([-lim, lim]) + plt.draw() + plt.pause(0.0001) + + def close(self): + del self.fig + + @property + def end_effector(self): + return self.joints[self.n_links].T + + +def angle_normalize(x): + return ((x + np.pi) % (2 * np.pi)) - np.pi diff --git a/alr_envs/mujoco/__init__.py b/alr_envs/mujoco/__init__.py new file mode 100644 index 0000000..77588f7 --- /dev/null +++ b/alr_envs/mujoco/__init__.py @@ -0,0 +1 @@ +from alr_envs.mujoco.alr_reacher import ALRReacherEnv \ No newline at end of file diff --git a/reacher/envs/reacher_env.py b/alr_envs/mujoco/alr_reacher.py similarity index 76% rename from reacher/envs/reacher_env.py rename to alr_envs/mujoco/alr_reacher.py index 2647413..95e667e 100644 --- a/reacher/envs/reacher_env.py +++ b/alr_envs/mujoco/alr_reacher.py @@ -1,14 +1,16 @@ import numpy as np +import os from gym import utils from gym.envs.mujoco import mujoco_env -class ReacherALREnv(mujoco_env.MujocoEnv, utils.EzPickle): + +class ALRReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle): def __init__(self): utils.EzPickle.__init__(self) - mujoco_env.MujocoEnv.__init__(self, '/home/vien/git/reacher_test/reacher/envs/reacher_5links.xml', 2) + mujoco_env.MujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), "assets", 'reacher_5links.xml'), 2) def step(self, a): - vec = self.get_body_com("fingertip")-self.get_body_com("target") + vec = self.get_body_com("fingertip") - self.get_body_com("target") reward_dist = - np.linalg.norm(vec) reward_ctrl = - np.square(a).sum() reward = reward_dist + reward_ctrl @@ -38,7 +40,7 @@ class ReacherALREnv(mujoco_env.MujocoEnv, utils.EzPickle): return np.concatenate([ np.cos(theta), np.sin(theta), - self.sim.data.qpos.flat[5:], # this is goal position - self.sim.data.qvel.flat[:5], # this is angular velocity + self.sim.data.qpos.flat[5:], # this is goal position + self.sim.data.qvel.flat[:5], # this is angular velocity self.get_body_com("fingertip") - self.get_body_com("target") ]) diff --git a/reacher/envs/reacher_5links.xml b/alr_envs/mujoco/assets/reacher_5links.xml similarity index 100% rename from reacher/envs/reacher_5links.xml rename to alr_envs/mujoco/assets/reacher_5links.xml diff --git a/example.py b/example.py index 4cb210b..fda066a 100644 --- a/example.py +++ b/example.py @@ -1,13 +1,15 @@ import gym +if __name__ == '__main__': -if __name__ == "__main__": - env = gym.make('reacher:ReacherALREnv-v0') - #env = gym.make('Hopper-v2') - env.reset() + # env = gym.make('alr_envs:ALRReacher-v0') + env = gym.make('alr_envs:SimpleReacher-v0') + state = env.reset() for i in range(10000): - action = env.action_space.sample() - obs = env.step(action) - print("step",i) - env.render() + state, reward, done, info = env.step(env.action_space.sample()) + if i % 5 == 0: + env.render() + + if done: + state = env.reset() diff --git a/reacher.egg-info/SOURCES.txt b/reacher.egg-info/SOURCES.txt index 147b0db..b771181 100644 --- a/reacher.egg-info/SOURCES.txt +++ b/reacher.egg-info/SOURCES.txt @@ -1,3 +1,4 @@ +README.md setup.py reacher.egg-info/PKG-INFO reacher.egg-info/SOURCES.txt diff --git a/reacher/__init__.py b/reacher/__init__.py deleted file mode 100644 index 2044486..0000000 --- a/reacher/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from gym.envs.registration import register - -register( - id='ReacherALREnv-v0', - entry_point='reacher.envs:ReacherALREnv', -) diff --git a/reacher/__pycache__/__init__.cpython-37.pyc b/reacher/__pycache__/__init__.cpython-37.pyc deleted file mode 100644 index 7e115b85dc12560388e1ba71ad9014722f91d8cd..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 271 zcmZ?b<>g`kf-nC(;v0eVV-N=hSbz)%ATE{x5-AKRj5!Rsj8Tk?45^GMOexGMEWJ#O z3@NO^44P~&fhrg@8EVm!uX|2?V7kCTFA;Ir;>-=9TG|8B|FXK}7UY^U8{? zP^2}PZZT!1+~Q8nD=DgsFUZf#EAa!0iKbWPf>rB5j4etm$;{6yVglN=lA(wVNP&r8 z`uZ99xvBbPnW=gD>6s<^P>bSAQj6gn{rLFIyv&mLc)fzkTO2mI`6;D2sdkJ&Gm1e5 L@h~wlGBE-GCHPA2 diff --git a/reacher/envs/__init__.py b/reacher/envs/__init__.py deleted file mode 100644 index 00340ff..0000000 --- a/reacher/envs/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from reacher.envs.reacher_env import ReacherALREnv diff --git a/reacher/envs/__pycache__/__init__.cpython-37.pyc b/reacher/envs/__pycache__/__init__.cpython-37.pyc deleted file mode 100644 index 6409b90c8a0b20adc90be48808264326df88b2b0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 201 zcmZ?b<>g`kg519z@y0;^&I8M%_7h#5$MiC-rA8Tq-X z`em7^dHU&@CHgSMC8@=5jy}j3{rLFIyv&mLc)fzkTO2mI`6;D2sdkJ&gFXW>0{}DI BG#dZ_ diff --git a/reacher/envs/__pycache__/reacher_env.cpython-37.pyc b/reacher/envs/__pycache__/reacher_env.cpython-37.pyc deleted file mode 100644 index ccdf81522871adba0a9a86466bad4c2dd0e3f523..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2011 zcmb7FJC7Vi5bo}I?Ci@n$H~UX7y<$Yp}Pcx1Qs?RV@eM2`IzIzvSF7YHz4&Fr4<3<)E3b#-@Db@f+|`D!@qF?`Q@-^P!8 z#{Q(m`t!it!Z4qKNG5sCB92+)qHrRI)1H&Ng%^38-C@#|-eV>`tH? z9T{NuWmonvcVr)z4eaoqO441Wzx?jrZJ6h7dRw=q<64>1#Wo&cPES6`vvSXDJuLDQdQ9kPnrv`; zb~j7+a%F=!&dRKgW9`sE_xlD3cnVltqw<|!nWmWh_38h<8;)%4AL$OX5ld!V5XJ|5 zcKIA6TWb$ea*e(KgR~lKcEQZq0$410Gm9mEiye!aPsGBRW7TKJPR$qY91;s}$*^|k z&fLAijy-7jbLdSvOTo!buw=)+bdEc7f9}mYb(bWi`&8_>z}Oqv?oL*Ym9DdW>(_}M ztNNQz*uGW|5-nqynfi2*);bS+b^w|9ekG6Mv?{D9p$OEId~Dsa(uMWS!8FmzI%Zl# zLs`WpE2eo;XH^;Xc63sx*zDoDASOfAePdmjrF9tSF5Yf=Vx2>k>I+!ti?rjn&R9|1 zhiFx*CPFRglYnb#wNFtqmp~Zb=ItyreiZ*wdAg&_iy~AopLgf1*r{8T zoRmb3TR($oouUE(Cnl)Irw%v@I)C`l>a?lN_9XUdf6`e5IPsQTdV5S?zsnA;|45d+ zIY;%fC*lUJx-0EK5+^1-vH-h6_U7W1+@g=f605WtED4iO zQY-R`N~$~(5n;`4+LzN>%4{aA=h)tc;N?*ot=(Ne^ zk^$=;KRknOde5sO zY+!vKTZXELYS2aX1wd-!-yLvO_@gkOhFnrhh;1(>u%|ikgE&SL$=j2rrRsO+x_60u zNQ9zz#)ZZiY6hCxF1^~&7Cdl+Uf=}7W(v&UM86OqR4