added simple reacher task

This commit is contained in:
ottofabian 2020-08-28 18:31:06 +02:00
parent 96d1f93bda
commit 31156cec4d
18 changed files with 315 additions and 30 deletions

111
.gitignore vendored Normal file
View File

@ -0,0 +1,111 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
# pycharm
.DS_Store
/.idea
#configs
/configs/db.cfg

View File

@ -1,8 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.7 (trustpo)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

View File

@ -10,4 +10,4 @@
- Install: go to "../reacher_5_links"
``` pip install -e reacher_5_links ```
- Use (see example.py):
``` env = gym.make('reacher:ReacherALREnv-v0')```
``` env = gym.make('reacher:ALRReacherEnv-v0')```

0
__init__.py Normal file
View File

16
alr_envs/__init__.py Normal file
View File

@ -0,0 +1,16 @@
from gym.envs.registration import register
register(
id='ALRReacher-v0',
entry_point='alr_envs.mujoco:ALRReacherEnv',
max_episode_steps=1000,
)
register(
id='SimpleReacher-v0',
entry_point='alr_envs.classic_control:SimpleReacherEnv',
max_episode_steps=200,
kwargs={
"n_links": 5,
}
)

View File

@ -0,0 +1 @@
from alr_envs.classic_control.simple_reacher import SimpleReacherEnv

View File

@ -0,0 +1,166 @@
import gym
import numpy as np
from gym import spaces, utils
from gym.utils import seeding
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.use('Qt5Agg') # or can use 'TkAgg', whatever you have/prefer
class SimpleReacherEnv(gym.Env, utils.EzPickle):
"""
Simple Reaching Task without any physics simulation.
Returns no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions
towards the end of the trajectory.
"""
def __init__(self, n_links):
super().__init__()
self.link_lengths = np.ones(n_links)
self.n_links = n_links
self.dt = 0.1
self._goal_pos = None
self.joints = None
self._joint_angle = None
self._angle_velocity = None
self.max_torque = 1 # 10
action_bound = np.ones((self.n_links,))
state_bound = np.hstack([
[np.pi] * self.n_links,
[np.inf] * self.n_links,
[np.inf],
[np.inf] # TODO: Maybe
])
self.action_space = spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape)
self.observation_space = spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape)
self.fig = None
self.metadata = {'render.modes': ["human"]}
self._steps = 0
self.seed()
def step(self, action):
action = self._scale_action(action)
self._angle_velocity = self._angle_velocity + self.dt * action
self._joint_angle = angle_normalize(self._joint_angle + self.dt * self._angle_velocity)
self._update_joints()
self._steps += 1
reward = self._get_reward(action)
# done = np.abs(self.end_effector - self._goal_pos) < 0.1
done = False
return self._get_obs().copy(), reward, done, {}
def _scale_action(self, action):
"""
scale actions back in order to provide normalized actions \in [0,1]
Args:
action: action to scale
Returns: action according to self.max_torque
"""
ub = self.max_torque
lb = -self.max_torque
action = lb + (action + 1.) * 0.5 * (ub - lb)
return np.clip(action, lb, ub)
def _get_obs(self):
return [self._joint_angle, self._angle_velocity, self.end_effector - self._goal_pos, self._steps]
def _update_joints(self):
"""
update joints to get new end effector position. The other links are only required for rendering.
Returns:
"""
angles = np.cumsum(self._joint_angle)
x = self.link_lengths * np.vstack([np.cos(angles), np.sin(angles)])
self.joints[1:] = self.joints[0] + np.cumsum(x.T, axis=0)
def _get_reward(self, action):
diff = self.end_effector - self._goal_pos
distance = 0
# TODO: Is this the best option
if self._steps > 150:
distance = np.exp(-0.1 * diff ** 2).mean()
# distance -= (diff ** 2).mean()
# distance -= action ** 2
return distance
def reset(self):
# TODO: maybe do initialisation more random?
# Sample only orientation of first link, i.e. the arm is always straight.
self._joint_angle = np.hstack([[self.np_random.uniform(-np.pi, np.pi)], np.zeros(self.n_links - 1)])
self._angle_velocity = np.zeros(self.n_links)
self.joints = np.zeros((self.n_links + 1, 2))
self._update_joints()
self._goal_pos = self._get_random_goal()
return self._get_obs().copy()
def _get_random_goal(self):
center = self.joints[0]
# Sample uniformly in circle with radius R around center of reacher.
R = np.sum(self.link_lengths)
r = R * np.sqrt(self.np_random.uniform())
theta = self.np_random.uniform() * 2 * np.pi
return center + r * np.stack([np.cos(theta), np.sin(theta)])
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def render(self, mode='human'): # pragma: no cover
if self.fig is None:
self.fig = plt.figure()
plt.ion()
plt.show()
else:
plt.figure(self.fig.number)
plt.cla()
# Arm
plt.plot(self.joints[:, 0], self.joints[:, 1], 'ro-', markerfacecolor='k')
# goal
goal_pos = self._goal_pos.T
plt.plot(goal_pos[0], goal_pos[1], 'gx')
# distance between end effector and goal
plt.plot([self.end_effector[0], goal_pos[0]], [self.end_effector[1], goal_pos[1]], 'g--')
lim = np.sum(self.link_lengths) + 0.5
plt.xlim([-lim, lim])
plt.ylim([-lim, lim])
plt.draw()
plt.pause(0.0001)
def close(self):
del self.fig
@property
def end_effector(self):
return self.joints[self.n_links].T
def angle_normalize(x):
return ((x + np.pi) % (2 * np.pi)) - np.pi

View File

@ -0,0 +1 @@
from alr_envs.mujoco.alr_reacher import ALRReacherEnv

View File

@ -1,14 +1,16 @@
import numpy as np
import os
from gym import utils
from gym.envs.mujoco import mujoco_env
class ReacherALREnv(mujoco_env.MujocoEnv, utils.EzPickle):
class ALRReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
utils.EzPickle.__init__(self)
mujoco_env.MujocoEnv.__init__(self, '/home/vien/git/reacher_test/reacher/envs/reacher_5links.xml', 2)
mujoco_env.MujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), "assets", 'reacher_5links.xml'), 2)
def step(self, a):
vec = self.get_body_com("fingertip")-self.get_body_com("target")
vec = self.get_body_com("fingertip") - self.get_body_com("target")
reward_dist = - np.linalg.norm(vec)
reward_ctrl = - np.square(a).sum()
reward = reward_dist + reward_ctrl
@ -38,7 +40,7 @@ class ReacherALREnv(mujoco_env.MujocoEnv, utils.EzPickle):
return np.concatenate([
np.cos(theta),
np.sin(theta),
self.sim.data.qpos.flat[5:], # this is goal position
self.sim.data.qvel.flat[:5], # this is angular velocity
self.sim.data.qpos.flat[5:], # this is goal position
self.sim.data.qvel.flat[:5], # this is angular velocity
self.get_body_com("fingertip") - self.get_body_com("target")
])

View File

@ -1,13 +1,15 @@
import gym
if __name__ == '__main__':
if __name__ == "__main__":
env = gym.make('reacher:ReacherALREnv-v0')
#env = gym.make('Hopper-v2')
env.reset()
# env = gym.make('alr_envs:ALRReacher-v0')
env = gym.make('alr_envs:SimpleReacher-v0')
state = env.reset()
for i in range(10000):
action = env.action_space.sample()
obs = env.step(action)
print("step",i)
env.render()
state, reward, done, info = env.step(env.action_space.sample())
if i % 5 == 0:
env.render()
if done:
state = env.reset()

View File

@ -1,3 +1,4 @@
README.md
setup.py
reacher.egg-info/PKG-INFO
reacher.egg-info/SOURCES.txt

View File

@ -1,6 +0,0 @@
from gym.envs.registration import register
register(
id='ReacherALREnv-v0',
entry_point='reacher.envs:ReacherALREnv',
)

View File

@ -1 +0,0 @@
from reacher.envs.reacher_env import ReacherALREnv

View File

@ -1,6 +1,6 @@
from setuptools import setup
setup(name='reacher',
setup(name='alr_envs',
version='0.0.1',
install_requires=['gym'] # And any other dependencies foo needs
)