From 31156cec4d78bd7d9ed4a6539560fc9a17cd63ce Mon Sep 17 00:00:00 2001
From: ottofabian <fabi.otto@t-online.de>
Date: Fri, 28 Aug 2020 18:31:06 +0200
Subject: [PATCH] added simple reacher task

---
 .gitignore                                    | 111 ++++++++++++
 .idea/test.iml                                |   8 -
 README.md                                     |   2 +-
 __init__.py                                   |   0
 alr_envs/__init__.py                          |  16 ++
 alr_envs/classic_control/__init__.py          |   1 +
 alr_envs/classic_control/simple_reacher.py    | 166 ++++++++++++++++++
 alr_envs/mujoco/__init__.py                   |   1 +
 .../mujoco/alr_reacher.py                     |  12 +-
 .../mujoco/assets}/reacher_5links.xml         |   0
 example.py                                    |  18 +-
 reacher.egg-info/SOURCES.txt                  |   1 +
 reacher/__init__.py                           |   6 -
 reacher/__pycache__/__init__.cpython-37.pyc   | Bin 271 -> 0 bytes
 reacher/envs/__init__.py                      |   1 -
 .../envs/__pycache__/__init__.cpython-37.pyc  | Bin 201 -> 0 bytes
 .../__pycache__/reacher_env.cpython-37.pyc    | Bin 2011 -> 0 bytes
 setup.py                                      |   2 +-
 18 files changed, 315 insertions(+), 30 deletions(-)
 create mode 100644 .gitignore
 delete mode 100644 .idea/test.iml
 create mode 100644 __init__.py
 create mode 100644 alr_envs/__init__.py
 create mode 100644 alr_envs/classic_control/__init__.py
 create mode 100644 alr_envs/classic_control/simple_reacher.py
 create mode 100644 alr_envs/mujoco/__init__.py
 rename reacher/envs/reacher_env.py => alr_envs/mujoco/alr_reacher.py (76%)
 rename {reacher/envs => alr_envs/mujoco/assets}/reacher_5links.xml (100%)
 delete mode 100644 reacher/__init__.py
 delete mode 100644 reacher/__pycache__/__init__.cpython-37.pyc
 delete mode 100644 reacher/envs/__init__.py
 delete mode 100644 reacher/envs/__pycache__/__init__.cpython-37.pyc
 delete mode 100644 reacher/envs/__pycache__/reacher_env.cpython-37.pyc
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..766e68d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,111 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+
+# pycharm
+.DS_Store
+/.idea
+
+#configs
+/configs/db.cfg
diff --git a/.idea/test.iml b/.idea/test.iml
deleted file mode 100644
index 02f1ba9..0000000
--- a/.idea/test.iml
+++ /dev/null
@@ -1,8 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<module type="PYTHON_MODULE" version="4">
-  <component name="NewModuleRootManager">
-    <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Python 3.7 (trustpo)" jdkType="Python SDK" />
-    <orderEntry type="sourceFolder" forTests="false" />
-  </component>
-</module>
\ No newline at end of file
diff --git a/README.md b/README.md
index 9f103bd..5d903d0 100644
--- a/README.md
+++ b/README.md
@@ -10,4 +10,4 @@
     - Install: go to "../reacher_5_links"        
         ``` pip install -e reacher_5_links ```
     - Use (see example.py): 
-        ``` env = gym.make('reacher:ReacherALREnv-v0')```
\ No newline at end of file
+        ``` env = gym.make('reacher:ALRReacherEnv-v0')```
\ No newline at end of file
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/alr_envs/__init__.py b/alr_envs/__init__.py
new file mode 100644
index 0000000..86b06c3
--- /dev/null
+++ b/alr_envs/__init__.py
@@ -0,0 +1,16 @@
+from gym.envs.registration import register
+
+register(
+    id='ALRReacher-v0',
+    entry_point='alr_envs.mujoco:ALRReacherEnv',
+    max_episode_steps=1000,
+)
+
+register(
+    id='SimpleReacher-v0',
+    entry_point='alr_envs.classic_control:SimpleReacherEnv',
+    max_episode_steps=200,
+    kwargs={
+        "n_links": 5,
+    }
+)
diff --git a/alr_envs/classic_control/__init__.py b/alr_envs/classic_control/__init__.py
new file mode 100644
index 0000000..53e2448
--- /dev/null
+++ b/alr_envs/classic_control/__init__.py
@@ -0,0 +1 @@
+from alr_envs.classic_control.simple_reacher import SimpleReacherEnv
diff --git a/alr_envs/classic_control/simple_reacher.py b/alr_envs/classic_control/simple_reacher.py
new file mode 100644
index 0000000..37319bb
--- /dev/null
+++ b/alr_envs/classic_control/simple_reacher.py
@@ -0,0 +1,166 @@
+import gym
+import numpy as np
+from gym import spaces, utils
+from gym.utils import seeding
+
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+
+mpl.use('Qt5Agg')  # or can use 'TkAgg', whatever you have/prefer
+
+
+class SimpleReacherEnv(gym.Env, utils.EzPickle):
+    """
+    Simple Reaching Task without any physics simulation.
+    Returns no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions
+    towards the end of the trajectory.
+    """
+
+    def __init__(self, n_links):
+        super().__init__()
+        self.link_lengths = np.ones(n_links)
+        self.n_links = n_links
+        self.dt = 0.1
+
+        self._goal_pos = None
+
+        self.joints = None
+        self._joint_angle = None
+        self._angle_velocity = None
+
+        self.max_torque = 1  # 10
+
+        action_bound = np.ones((self.n_links,))
+        state_bound = np.hstack([
+            [np.pi] * self.n_links,
+            [np.inf] * self.n_links,
+            [np.inf],
+            [np.inf]  # TODO: Maybe
+        ])
+        self.action_space = spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape)
+        self.observation_space = spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape)
+
+        self.fig = None
+        self.metadata = {'render.modes': ["human"]}
+
+        self._steps = 0
+        self.seed()
+
+    def step(self, action):
+
+        action = self._scale_action(action)
+
+        self._angle_velocity = self._angle_velocity + self.dt * action
+        self._joint_angle = angle_normalize(self._joint_angle + self.dt * self._angle_velocity)
+        self._update_joints()
+        self._steps += 1
+
+        reward = self._get_reward(action)
+
+        # done = np.abs(self.end_effector - self._goal_pos) < 0.1
+        done = False
+
+        return self._get_obs().copy(), reward, done, {}
+
+    def _scale_action(self, action):
+        """
+        scale actions back in order to provide normalized actions \in [0,1]
+
+        Args:
+            action: action to scale
+
+        Returns: action according to self.max_torque
+
+        """
+
+        ub = self.max_torque
+        lb = -self.max_torque
+
+        action = lb + (action + 1.) * 0.5 * (ub - lb)
+        return np.clip(action, lb, ub)
+
+    def _get_obs(self):
+        return [self._joint_angle, self._angle_velocity, self.end_effector - self._goal_pos, self._steps]
+
+    def _update_joints(self):
+        """
+        update joints to get new end effector position. The other links are only required for rendering.
+        Returns:
+
+        """
+        angles = np.cumsum(self._joint_angle)
+        x = self.link_lengths * np.vstack([np.cos(angles), np.sin(angles)])
+        self.joints[1:] = self.joints[0] + np.cumsum(x.T, axis=0)
+
+    def _get_reward(self, action):
+        diff = self.end_effector - self._goal_pos
+        distance = 0
+
+        # TODO: Is this the best option
+        if self._steps > 150:
+            distance = np.exp(-0.1 * diff ** 2).mean()
+            # distance -= (diff ** 2).mean()
+
+        # distance -= action ** 2
+        return distance
+
+    def reset(self):
+
+        # TODO: maybe do initialisation more random?
+        # Sample only orientation of first link, i.e. the arm is always straight.
+        self._joint_angle = np.hstack([[self.np_random.uniform(-np.pi, np.pi)], np.zeros(self.n_links - 1)])
+        self._angle_velocity = np.zeros(self.n_links)
+        self.joints = np.zeros((self.n_links + 1, 2))
+        self._update_joints()
+
+        self._goal_pos = self._get_random_goal()
+        return self._get_obs().copy()
+
+    def _get_random_goal(self):
+        center = self.joints[0]
+
+        # Sample uniformly in circle with radius R around center of reacher.
+        R = np.sum(self.link_lengths)
+        r = R * np.sqrt(self.np_random.uniform())
+        theta = self.np_random.uniform() * 2 * np.pi
+        return center + r * np.stack([np.cos(theta), np.sin(theta)])
+
+    def seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        return [seed]
+
+    def render(self, mode='human'):  # pragma: no cover
+        if self.fig is None:
+            self.fig = plt.figure()
+            plt.ion()
+            plt.show()
+        else:
+            plt.figure(self.fig.number)
+
+        plt.cla()
+
+        # Arm
+        plt.plot(self.joints[:, 0], self.joints[:, 1], 'ro-', markerfacecolor='k')
+
+        # goal
+        goal_pos = self._goal_pos.T
+        plt.plot(goal_pos[0], goal_pos[1], 'gx')
+        # distance between end effector and goal
+        plt.plot([self.end_effector[0], goal_pos[0]], [self.end_effector[1], goal_pos[1]], 'g--')
+
+        lim = np.sum(self.link_lengths) + 0.5
+        plt.xlim([-lim, lim])
+        plt.ylim([-lim, lim])
+        plt.draw()
+        plt.pause(0.0001)
+
+    def close(self):
+        del self.fig
+
+    @property
+    def end_effector(self):
+        return self.joints[self.n_links].T
+
+
+def angle_normalize(x):
+    return ((x + np.pi) % (2 * np.pi)) - np.pi
diff --git a/alr_envs/mujoco/__init__.py b/alr_envs/mujoco/__init__.py
new file mode 100644
index 0000000..77588f7
--- /dev/null
+++ b/alr_envs/mujoco/__init__.py
@@ -0,0 +1 @@
+from alr_envs.mujoco.alr_reacher import ALRReacherEnv
\ No newline at end of file
diff --git a/reacher/envs/reacher_env.py b/alr_envs/mujoco/alr_reacher.py
similarity index 76%
rename from reacher/envs/reacher_env.py
rename to alr_envs/mujoco/alr_reacher.py
index 2647413..95e667e 100644
--- a/reacher/envs/reacher_env.py
+++ b/alr_envs/mujoco/alr_reacher.py
@@ -1,14 +1,16 @@
 import numpy as np
+import os
 from gym import utils
 from gym.envs.mujoco import mujoco_env
 
-class ReacherALREnv(mujoco_env.MujocoEnv, utils.EzPickle):
+
+class ALRReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
     def __init__(self):
         utils.EzPickle.__init__(self)
-        mujoco_env.MujocoEnv.__init__(self, '/home/vien/git/reacher_test/reacher/envs/reacher_5links.xml', 2)
+        mujoco_env.MujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), "assets", 'reacher_5links.xml'), 2)
 
     def step(self, a):
-        vec = self.get_body_com("fingertip")-self.get_body_com("target")
+        vec = self.get_body_com("fingertip") - self.get_body_com("target")
         reward_dist = - np.linalg.norm(vec)
         reward_ctrl = - np.square(a).sum()
         reward = reward_dist + reward_ctrl
@@ -38,7 +40,7 @@ class ReacherALREnv(mujoco_env.MujocoEnv, utils.EzPickle):
         return np.concatenate([
             np.cos(theta),
             np.sin(theta),
-            self.sim.data.qpos.flat[5:], # this is goal position
-            self.sim.data.qvel.flat[:5], # this is angular velocity
+            self.sim.data.qpos.flat[5:],  # this is goal position
+            self.sim.data.qvel.flat[:5],  # this is angular velocity
             self.get_body_com("fingertip") - self.get_body_com("target")
         ])
diff --git a/reacher/envs/reacher_5links.xml b/alr_envs/mujoco/assets/reacher_5links.xml
similarity index 100%
rename from reacher/envs/reacher_5links.xml
rename to alr_envs/mujoco/assets/reacher_5links.xml
diff --git a/example.py b/example.py
index 4cb210b..fda066a 100644
--- a/example.py
+++ b/example.py
@@ -1,13 +1,15 @@
 import gym
 
+if __name__ == '__main__':
 
-if __name__ == "__main__":
-    env = gym.make('reacher:ReacherALREnv-v0')
-    #env = gym.make('Hopper-v2')
-    env.reset()
+    # env = gym.make('alr_envs:ALRReacher-v0')
+    env = gym.make('alr_envs:SimpleReacher-v0')
+    state = env.reset()
 
     for i in range(10000):
-        action = env.action_space.sample()
-        obs = env.step(action)
-        print("step",i)
-        env.render()
+        state, reward, done, info = env.step(env.action_space.sample())
+        if i % 5 == 0:
+            env.render()
+
+        if done:
+            state = env.reset()
diff --git a/reacher.egg-info/SOURCES.txt b/reacher.egg-info/SOURCES.txt
index 147b0db..b771181 100644
--- a/reacher.egg-info/SOURCES.txt
+++ b/reacher.egg-info/SOURCES.txt
@@ -1,3 +1,4 @@
+README.md
 setup.py
 reacher.egg-info/PKG-INFO
 reacher.egg-info/SOURCES.txt
diff --git a/reacher/__init__.py b/reacher/__init__.py
deleted file mode 100644
index 2044486..0000000
--- a/reacher/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-from gym.envs.registration import register
-
-register(
-    id='ReacherALREnv-v0',
-    entry_point='reacher.envs:ReacherALREnv',
-)
diff --git a/reacher/__pycache__/__init__.cpython-37.pyc b/reacher/__pycache__/__init__.cpython-37.pyc
deleted file mode 100644
index 7e115b85dc12560388e1ba71ad9014722f91d8cd..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 271
zcmZ?b<>g`kf-nC(;v0eVV-N=hSbz)%ATE{x5-AKRj5!Rsj8Tk?45^GMOexGMEWJ#O
z3@NO^44P~&fhrg@8E<hErKV>Vm!uX|2?V7kCTFA;Ir;>-=9TG|8B|FXK}7UY^U8{?
zP^2}PZZT!1+~Q8nD=DgsFUZf#EAa!0iKbWPf>rB5j4etm$;{6yVglN=lA(wVNP&r8
z`uZ99xvBbPnW=gD>6s<^P>bSAQj6gn{rLFIyv&mLc)fzkTO2mI`6;D2sdkJ&Gm1e5
L@h~wlGBE-GCHPA2

diff --git a/reacher/envs/__init__.py b/reacher/envs/__init__.py
deleted file mode 100644
index 00340ff..0000000
--- a/reacher/envs/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from reacher.envs.reacher_env import ReacherALREnv
diff --git a/reacher/envs/__pycache__/__init__.cpython-37.pyc b/reacher/envs/__pycache__/__init__.cpython-37.pyc
deleted file mode 100644
index 6409b90c8a0b20adc90be48808264326df88b2b0..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 201
zcmZ?b<>g`kg519z@y0;<F^B^LOhASM5Elyoi4=wu#vF!R#wbQch7_h?22JLdKv4!w
z##_8Wsfo!MsYQ-HL9Tgaews{C5=9UZz0|z2Vm&A)9>^&I8M%_7h#5$MiC-rA8Tq-X
z`em7^dHU&@CHgSMC8@=5jy}j3{rLFIyv&mLc)fzkTO2mI`6;D2sdkJ&gFXW>0{}DI
BG#dZ_

diff --git a/reacher/envs/__pycache__/reacher_env.cpython-37.pyc b/reacher/envs/__pycache__/reacher_env.cpython-37.pyc
deleted file mode 100644
index ccdf81522871adba0a9a86466bad4c2dd0e3f523..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 2011
zcmb7FJC7Vi5bo}I?Ci@n$H~UX7y<$Yp}Pcx1Qs?RV<ExHG6B*gqw#d#-t5ieu6y?E
zTlON`p)(R3?S>@eM2`IzIzvSF7YHz4&Fr4<3<)E3b#-@Db@f+|`D!@qF?`Q@-^P!8
z#{Q(m`t!it!Z4qKNG5sCB92+)qHrRI)1H&Ng%^38-C@#|-eV>`<xPC$OgfL;D3JUI
z7Wya9XQPl?Z(3)$0n;m{lPaxZRUW2vxc&r~TNvhJ5Y=243CX2+%pylR(#4Exq>tH?
z9T{NuWmonvcVr)z4eaoqO441Wzx?jrZJ6h<L?6xcqOUQ`dmuHN@P(MOCFiVmCNA-G
z)v|dROgW?3=i9qgp|%e*Rc?>7dRw=q<64>1#Wo&cPES6`vvSXDJuLDQdQ9kPnrv`;
zb~j7+a%F=!&dRKgW9`sE_xlD3cnVltqw<|!nWmWh_38h<8;)%4AL$OX5ld!V5XJ|5
zcKIA6TWb$ea*e(KgR~lKcEQZq0$410Gm9mEiye!aPsGBRW7TKJPR$qY91;s}$*^|k
z&fLAijy-7jbLdSvOTo!buw=)+bdEc7f9}mYb(bWi`&8_>z}Oqv?oL*Ym9DdW>(_}M
ztNNQz*uGW|5-nqynfi2*);bS+b^w|9ekG6Mv?{D9p$OEId~Dsa(uMWS!8FmzI%Zl#
zLs`WpE2eo;XH^;Xc63sx*zDoDASOfAePdmjrF9tSF5Yf=Vx2>k>I+!ti?rjn&R9|1
zhiFx*CPFRglYnb#wNFtqmp~Zb<QMoQ9)Jei<+F|R_>=ItyreiZ*wdAg&_iy<LNXvE
zMw(JLbgYjwe4upXq)8F=Yn`Nf1VSc5-Z=icQ-fAMHmaWPZ^DLY;>~AopLgf1*r{8T
zoRmb3TR($oouUE(Cnl)Irw%v@I)C`l>a?lN_9XUdf6`e5IPsQTdV5S?zsnA;|45d+
zIY;%fC*lUJx-0EK5+^1-vH-h6_U7W1+@g=f605WtED4<cX#DIK`hI`DW^Mv0C$An@
zcQ+gFA~M-bt<GMog<jm$)Cx}ie);m{_@~X^?))*UpRbvlE#5=hE%#%cloC1CnU>iO
zQY-R`N~$~(<w4YK%J^WvGLbv3l3c$5lX`>5n;`4+LzN>%4{aA=h)tc;N?*ot=(Ne^
zk^$=;K<xGSp#(^$x$z2A48@S~Yg{0|Hn@-Mx`yf=h}qTi0X?I48czy6C1?{*38uxo
zi6m0)iMWb5*6zZ?Kmjj_@6dck4$YtBRyzT5V5NyN@1k*1yXZ=X+Qk?#M||AT2?4DK
zM1~+?FLF?gsG<b5{sDHZyMxkfz_b0dDpMd-Vpt~yFc2<vNZM3bp=d$#>RknOde5sO
zY+!vKTZXELYS2aX1wd-!-yLvO_@gkOhFnrhh;1(>u%|ikgE&SL$=j2rrRsO+x_60u
zNQ9zz#)ZZiY6hCxF1^~&7Cdl+Uf=}7W<Ts&ubdX>(v&UM86OqR4<fn<Zj1gTTkWs0
e`8#N8%$mxuQU0&nY;dFX`3ve2hGc^#DgFWbWcOqM

diff --git a/setup.py b/setup.py
index d8affaa..9f6b4f1 100644
--- a/setup.py
+++ b/setup.py
@@ -1,6 +1,6 @@
 from setuptools import setup
 
-setup(name='reacher',
+setup(name='alr_envs',
       version='0.0.1',
       install_requires=['gym']  # And any other dependencies foo needs
 )
\ No newline at end of file