From e1617c34a67bb904f5eee5ef9777021b31a49619 Mon Sep 17 00:00:00 2001
From: Onur
Date: Thu, 27 Jan 2022 15:38:37 +0100
Subject: [PATCH 001/104] started working on contextual bp
---
setup.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/setup.py b/setup.py
index b5aa424..6122e90 100644
--- a/setup.py
+++ b/setup.py
@@ -8,7 +8,7 @@ setup(
'gym',
'PyQt5',
'matplotlib',
- 'mp_env_api @ git+https://github.com/ALRhub/motion_primitive_env_api.git',
+ #'mp_env_api @ git+https://github.com/ALRhub/motion_primitive_env_api.git',
# 'mp_env_api @ git+ssh://git@github.com/ALRhub/motion_primitive_env_api.git',
'mujoco-py<2.1,>=2.0',
'dm_control',
From be5b287ae19d8803c2e6c9bb7adc67ad095de90d Mon Sep 17 00:00:00 2001
From: Onur
Date: Fri, 28 Jan 2022 19:24:34 +0100
Subject: [PATCH 002/104] randomized cup
---
alr_envs/alr/__init__.py | 24 +++++++---
alr_envs/alr/mujoco/beerpong/beerpong.py | 56 ++++++++++------------
alr_envs/alr/mujoco/beerpong/mp_wrapper.py | 4 +-
3 files changed, 44 insertions(+), 40 deletions(-)
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 90ec78c..67031f1 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -213,16 +213,26 @@ register(id='TableTennis4DCtxt-v0',
kwargs={'ctxt_dim': 4})
## BeerPong
-difficulties = ["simple", "intermediate", "hard", "hardest"]
-
-for v, difficulty in enumerate(difficulties):
- register(
- id='ALRBeerPong-v{}'.format(v),
+# fixed goal cup position
+register(
+ id='ALRBeerPong-v0',
entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv',
max_episode_steps=600,
kwargs={
- "difficulty": difficulty,
- "reward_type": "staged",
+ "rndm_goal": False,
+ "cup_goal_pos": [-0.3, -1.2]
+ }
+ )
+
+
+# random goal cup position
+register(
+ id='ALRBeerPong-v1',
+ entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv',
+ max_episode_steps=600,
+ kwargs={
+ "rndm_goal": True,
+ "cup_goal_pos": [-0.3, -1.2]
}
)
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py
index 755710a..2fb435f 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@@ -4,11 +4,16 @@ import os
import numpy as np
from gym import utils
from gym.envs.mujoco import MujocoEnv
+from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
+
+
+CUP_POS_MIN = np.array([-0.32, -2.2])
+CUP_POS_MAX = np.array([0.32, -1.2])
class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
- def __init__(self, frame_skip=1, apply_gravity_comp=True, reward_type: str = "staged", noisy=False,
- context: np.ndarray = None, difficulty='simple'):
+ def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False,
+ rndm_goal=False, cup_goal_pos=[-0.3, -1.2]):
self._steps = 0
self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
@@ -17,7 +22,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7])
self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
- self.context = context
+ self.rndm_goal = rndm_goal
self.apply_gravity_comp = apply_gravity_comp
self.add_noise = noisy
@@ -38,23 +43,8 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
else:
self.noise_std = 0
- if difficulty == 'simple':
- self.cup_goal_pos = np.array([0, -1.7, 0.840])
- elif difficulty == 'intermediate':
- self.cup_goal_pos = np.array([0.3, -1.5, 0.840])
- elif difficulty == 'hard':
- self.cup_goal_pos = np.array([-0.3, -2.2, 0.840])
- elif difficulty == 'hardest':
- self.cup_goal_pos = np.array([-0.3, -1.2, 0.840])
-
- if reward_type == "no_context":
- from alr_envs.alr.mujoco.beerpong.beerpong_reward import BeerPongReward
- reward_function = BeerPongReward
- elif reward_type == "staged":
- from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
- reward_function = BeerPongReward
- else:
- raise ValueError("Unknown reward type: {}".format(reward_type))
+ self.cup_goal_pos = np.array(cup_goal_pos.append(0.840))
+ reward_function = BeerPongReward
self.reward_function = reward_function()
MujocoEnv.__init__(self, self.xml_path, frame_skip)
@@ -94,6 +84,12 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
self.sim.model.body_pos[self.cup_table_id] = self.cup_goal_pos
start_pos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
self.set_state(start_pos, init_vel)
+ if self.rndm_goal:
+ xy = np.random.uniform(CUP_POS_MIN, CUP_POS_MAX)
+ xyz = np.zeros(3)
+ xyz[:2] = xy
+ xyz[-1] = 0.840
+ self.sim.model.body_pos[self.cup_table_id] = xyz
return self._get_obs()
def step(self, a):
@@ -153,13 +149,12 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
def check_traj_in_joint_limits(self):
return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)
- # TODO: extend observation space
def _get_obs(self):
theta = self.sim.data.qpos.flat[:7]
return np.concatenate([
np.cos(theta),
np.sin(theta),
- # self.get_body_com("target"), # only return target to make problem harder
+ self.sim.model.body_pos[self.cup_table_id][:2].copy(),
[self._steps],
])
@@ -169,25 +164,26 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
return np.hstack([
[False] * 7, # cos
[False] * 7, # sin
- # [True] * 2, # x-y coordinates of target distance
+ [True] * 2, # xy position of cup
[False] # env steps
])
if __name__ == "__main__":
- env = ALRBeerBongEnv(reward_type="staged", difficulty='hardest')
-
- # env.configure(ctxt)
+ env = ALRBeerBongEnv(rndm_goal=True)
+ import time
env.reset()
env.render("human")
- for i in range(800):
- ac = 10 * env.action_space.sample()[0:7]
+ for i in range(1500):
+ # ac = 10 * env.action_space.sample()[0:7]
+ ac = np.zeros(7)
obs, rew, d, info = env.step(ac)
env.render("human")
print(rew)
if d:
- break
-
+ print('RESETTING')
+ env.reset()
+ time.sleep(1)
env.close()
diff --git a/alr_envs/alr/mujoco/beerpong/mp_wrapper.py b/alr_envs/alr/mujoco/beerpong/mp_wrapper.py
index 87c6b7e..11af9a5 100644
--- a/alr_envs/alr/mujoco/beerpong/mp_wrapper.py
+++ b/alr_envs/alr/mujoco/beerpong/mp_wrapper.py
@@ -9,11 +9,10 @@ class MPWrapper(MPEnvWrapper):
@property
def active_obs(self):
- # TODO: @Max Filter observations correctly
return np.hstack([
[False] * 7, # cos
[False] * 7, # sin
- # [True] * 2, # x-y coordinates of target distance
+ [True] * 2, # xy position of cup
[False] # env steps
])
@@ -31,7 +30,6 @@ class MPWrapper(MPEnvWrapper):
@property
def goal_pos(self):
- # TODO: @Max I think the default value of returning to the start is reasonable here
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
@property
From bcebf1077c2aa82e850a96185f63c8e0fbe03806 Mon Sep 17 00:00:00 2001
From: Onur
Date: Mon, 31 Jan 2022 17:03:49 +0100
Subject: [PATCH 003/104] contextual bp working
---
alr_envs/alr/__init__.py | 2 +-
alr_envs/alr/mujoco/beerpong/beerpong.py | 12 +++++++++---
2 files changed, 10 insertions(+), 4 deletions(-)
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 67031f1..0181f62 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -375,7 +375,7 @@ for _v in _versions:
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
## Beerpong
-_versions = ["v0", "v1", "v2", "v3"]
+_versions = ["v0", "v1"]
for _v in _versions:
_env_id = f'BeerpongProMP-{_v}'
register(
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py
index 2fb435f..9092ef1 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@@ -13,9 +13,15 @@ CUP_POS_MAX = np.array([0.32, -1.2])
class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False,
- rndm_goal=False, cup_goal_pos=[-0.3, -1.2]):
- self._steps = 0
+ rndm_goal=False, cup_goal_pos=None):
+ if cup_goal_pos is None:
+ cup_goal_pos = [-0.3, -1.2, 0.840]
+ elif len(cup_goal_pos)==2:
+ cup_goal_pos = np.array(cup_goal_pos)
+ cup_goal_pos = np.insert(cup_goal_pos, 2, 0.80)
+ self.cup_goal_pos = np.array(cup_goal_pos)
+ self._steps = 0
self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
"beerpong_wo_cup" + ".xml")
@@ -43,7 +49,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
else:
self.noise_std = 0
- self.cup_goal_pos = np.array(cup_goal_pos.append(0.840))
+
reward_function = BeerPongReward
self.reward_function = reward_function()
From 66be0b1e02a8741b945b5eba488ed50e03d4eb8d Mon Sep 17 00:00:00 2001
From: Onur
Date: Tue, 1 Feb 2022 16:02:33 +0100
Subject: [PATCH 004/104] adjusted reward function
---
alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
index e94b470..40b181b 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
@@ -97,14 +97,14 @@ class BeerPongReward:
# encourage bounce before falling into cup
if not ball_in_cup:
if not self.ball_table_contact:
- reward = 0.2 * (1 - np.tanh(min_dist ** 2)) + 0.1 * (1 - np.tanh(final_dist ** 2))
+ reward = 0.2 * (1 - np.tanh(0.5*min_dist)) + 0.1 * (1 - np.tanh(0.5*final_dist))
else:
- reward = (1 - np.tanh(min_dist ** 2)) + 0.5 * (1 - np.tanh(final_dist ** 2))
+ reward = (1 - np.tanh(0.5*min_dist)) + 0.5 * (1 - np.tanh(0.5*final_dist))
else:
if not self.ball_table_contact:
- reward = 2 * (1 - np.tanh(final_dist ** 2)) + 1 * (1 - np.tanh(min_dist ** 2)) + 1
+ reward = 2 * (1 - np.tanh(0.5*final_dist)) + 1 * (1 - np.tanh(0.5*min_dist)) + 1
else:
- reward = 2 * (1 - np.tanh(final_dist ** 2)) + 1 * (1 - np.tanh(min_dist ** 2)) + 3
+ reward = 2 * (1 - np.tanh(0.5*final_dist)) + 1 * (1 - np.tanh(0.5*min_dist)) + 3
# reward = - 1 * cost - self.collision_penalty * int(self._is_collided)
success = ball_in_cup
From 2a27f59e506893bd8ac657fbbfba3bb9f3505d41 Mon Sep 17 00:00:00 2001
From: Onur
Date: Tue, 8 Feb 2022 09:50:01 +0100
Subject: [PATCH 005/104] fix tt issues -> context + traj.length
---
alr_envs/alr/__init__.py | 39 ++++++++++++++++++-
.../beerpong/assets/beerpong_wo_cup.xml | 11 +++---
alr_envs/alr/mujoco/beerpong/beerpong.py | 7 +++-
.../alr/mujoco/table_tennis/mp_wrapper.py | 6 +--
alr_envs/alr/mujoco/table_tennis/tt_gym.py | 8 ++--
setup.py | 2 +-
6 files changed, 58 insertions(+), 15 deletions(-)
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 0181f62..026f1a7 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -236,6 +236,17 @@ register(
}
)
+# Beerpong devel big table
+register(
+ id='ALRBeerPong-v3',
+ entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv',
+ max_episode_steps=600,
+ kwargs={
+ "rndm_goal": True,
+ "cup_goal_pos": [-0.3, -1.2]
+ }
+ )
+
# Motion Primitive Environments
## Simple Reacher
@@ -402,6 +413,32 @@ for _v in _versions:
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+## Beerpong- Big table devel
+
+register(
+ id='BeerpongProMP-v3',
+ entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+ kwargs={
+ "name": f"alr_envs:ALRBeerPong-v3",
+ "wrappers": [mujoco.beerpong.MPWrapper],
+ "mp_kwargs": {
+ "num_dof": 7,
+ "num_basis": 5,
+ "duration": 1,
+ "post_traj_time": 2,
+ "policy_type": "motor",
+ "weights_scale": 1,
+ "zero_start": True,
+ "zero_goal": False,
+ "policy_kwargs": {
+ "p_gains": np.array([ 1.5, 5, 2.55, 3, 2., 2, 1.25]),
+ "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125])
+ }
+ }
+ }
+ )
+ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('BeerpongProMP-v3')
+
## Table Tennis
ctxt_dim = [2, 4]
for _v, cd in enumerate(ctxt_dim):
@@ -416,7 +453,7 @@ for _v, cd in enumerate(ctxt_dim):
"num_dof": 7,
"num_basis": 2,
"duration": 1.25,
- "post_traj_time": 4.5,
+ "post_traj_time": 1.5,
"policy_type": "motor",
"weights_scale": 1.0,
"zero_start": True,
diff --git a/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml b/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml
index e96d2bc..436b36c 100644
--- a/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml
+++ b/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml
@@ -132,18 +132,19 @@
-
+
-
+
+
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py
index 9092ef1..99d1a23 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@@ -7,8 +7,11 @@ from gym.envs.mujoco import MujocoEnv
from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
-CUP_POS_MIN = np.array([-0.32, -2.2])
-CUP_POS_MAX = np.array([0.32, -1.2])
+# CUP_POS_MIN = np.array([-0.32, -2.2])
+# CUP_POS_MAX = np.array([0.32, -1.2])
+
+CUP_POS_MIN = np.array([-1.42, -4.05])
+CUP_POS_MAX = np.array([1.42, -1.25])
class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
diff --git a/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py b/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py
index 86d5a00..473583f 100644
--- a/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py
+++ b/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py
@@ -11,9 +11,9 @@ class MPWrapper(MPEnvWrapper):
def active_obs(self):
# TODO: @Max Filter observations correctly
return np.hstack([
- [True] * 7, # Joint Pos
- [True] * 3, # Ball pos
- [True] * 3 # goal pos
+ [False] * 7, # Joint Pos
+ [True] * 2, # Ball pos
+ [True] * 2 # goal pos
])
@property
diff --git a/alr_envs/alr/mujoco/table_tennis/tt_gym.py b/alr_envs/alr/mujoco/table_tennis/tt_gym.py
index d1c2dc3..e88bbc5 100644
--- a/alr_envs/alr/mujoco/table_tennis/tt_gym.py
+++ b/alr_envs/alr/mujoco/table_tennis/tt_gym.py
@@ -10,7 +10,8 @@ from alr_envs.alr.mujoco.table_tennis.tt_reward import TT_Reward
#TODO: Check for simulation stability. Make sure the code runs even for sim crash
-MAX_EPISODE_STEPS = 1750
+# MAX_EPISODE_STEPS = 1750
+MAX_EPISODE_STEPS = 1375
BALL_NAME_CONTACT = "target_ball_contact"
BALL_NAME = "target_ball"
TABLE_NAME = 'table_tennis_table'
@@ -76,10 +77,11 @@ class TTEnvGym(MujocoEnv, utils.EzPickle):
self._ids_set = True
def _get_obs(self):
- ball_pos = self.sim.data.body_xpos[self.ball_id]
+ ball_pos = self.sim.data.body_xpos[self.ball_id][:2].copy()
+ goal_pos = self.goal[:2].copy()
obs = np.concatenate([self.sim.data.qpos[:7].copy(), # 7 joint positions
ball_pos,
- self.goal.copy()])
+ goal_pos])
return obs
def sample_context(self):
diff --git a/setup.py b/setup.py
index 6122e90..796c569 100644
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,7 @@ setup(
install_requires=[
'gym',
'PyQt5',
- 'matplotlib',
+ #'matplotlib',
#'mp_env_api @ git+https://github.com/ALRhub/motion_primitive_env_api.git',
# 'mp_env_api @ git+ssh://git@github.com/ALRhub/motion_primitive_env_api.git',
'mujoco-py<2.1,>=2.0',
From 209eac352c724f0181705b0cd333f881ee80f4be Mon Sep 17 00:00:00 2001
From: Onur
Date: Thu, 10 Feb 2022 19:22:34 +0100
Subject: [PATCH 006/104] big table xml
---
.../beerpong/assets/beerpong_wo_cup.xml | 13 +-
.../assets/beerpong_wo_cup_big_table.xml | 188 ++++++++++++++++++
2 files changed, 194 insertions(+), 7 deletions(-)
create mode 100644 alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup_big_table.xml
diff --git a/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml b/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml
index 436b36c..d89e03c 100644
--- a/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml
+++ b/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml
@@ -132,19 +132,18 @@
-
+
-
+
@@ -185,4 +184,4 @@
-
+
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup_big_table.xml b/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup_big_table.xml
new file mode 100644
index 0000000..436b36c
--- /dev/null
+++ b/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup_big_table.xml
@@ -0,0 +1,188 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
From 855f0f1c7bbc869cb0a6d6625ac8516cdf5ed12e Mon Sep 17 00:00:00 2001
From: Onur
Date: Tue, 5 Apr 2022 10:47:01 +0200
Subject: [PATCH 007/104] fix tt issues -> context + traj.length
---
alr_envs/alr/__init__.py | 88 +++++++---------------
alr_envs/alr/mujoco/table_tennis/tt_gym.py | 3 +-
2 files changed, 28 insertions(+), 63 deletions(-)
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 026f1a7..917e102 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -236,16 +236,6 @@ register(
}
)
-# Beerpong devel big table
-register(
- id='ALRBeerPong-v3',
- entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv',
- max_episode_steps=600,
- kwargs={
- "rndm_goal": True,
- "cup_goal_pos": [-0.3, -1.2]
- }
- )
# Motion Primitive Environments
@@ -413,32 +403,6 @@ for _v in _versions:
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-## Beerpong- Big table devel
-
-register(
- id='BeerpongProMP-v3',
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": f"alr_envs:ALRBeerPong-v3",
- "wrappers": [mujoco.beerpong.MPWrapper],
- "mp_kwargs": {
- "num_dof": 7,
- "num_basis": 5,
- "duration": 1,
- "post_traj_time": 2,
- "policy_type": "motor",
- "weights_scale": 1,
- "zero_start": True,
- "zero_goal": False,
- "policy_kwargs": {
- "p_gains": np.array([ 1.5, 5, 2.55, 3, 2., 2, 1.25]),
- "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125])
- }
- }
- }
- )
-ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('BeerpongProMP-v3')
-
## Table Tennis
ctxt_dim = [2, 4]
for _v, cd in enumerate(ctxt_dim):
@@ -453,7 +417,7 @@ for _v, cd in enumerate(ctxt_dim):
"num_dof": 7,
"num_basis": 2,
"duration": 1.25,
- "post_traj_time": 1.5,
+ "post_traj_time": 4.5,
"policy_type": "motor",
"weights_scale": 1.0,
"zero_start": True,
@@ -467,28 +431,28 @@ for _v, cd in enumerate(ctxt_dim):
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-register(
- id='TableTennisProMP-v2',
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": "alr_envs:TableTennis2DCtxt-v1",
- "wrappers": [mujoco.table_tennis.MPWrapper],
- "mp_kwargs": {
- "num_dof": 7,
- "num_basis": 2,
- "duration": 1.,
- "post_traj_time": 2.5,
- "policy_type": "motor",
- "weights_scale": 1,
- "off": -0.05,
- "bandwidth_factor": 3.5,
- "zero_start": True,
- "zero_goal": False,
- "policy_kwargs": {
- "p_gains": 0.5*np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]),
- "d_gains": 0.5*np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1])
- }
- }
- }
-)
-ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("TableTennisProMP-v2")
+# register(
+# id='TableTennisProMP-v2',
+# entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+# kwargs={
+# "name": "alr_envs:TableTennis2DCtxt-v1",
+# "wrappers": [mujoco.table_tennis.MPWrapper],
+# "mp_kwargs": {
+# "num_dof": 7,
+# "num_basis": 2,
+# "duration": 1.25,
+# "post_traj_time": 4.5,
+# #"width": 0.01,
+# #"off": 0.01,
+# "policy_type": "motor",
+# "weights_scale": 1.0,
+# "zero_start": True,
+# "zero_goal": False,
+# "policy_kwargs": {
+# "p_gains": 0.5*np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]),
+# "d_gains": 0.5*np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1])
+# }
+# }
+# }
+# )
+# ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("TableTennisProMP-v2")
diff --git a/alr_envs/alr/mujoco/table_tennis/tt_gym.py b/alr_envs/alr/mujoco/table_tennis/tt_gym.py
index e88bbc5..c93cd26 100644
--- a/alr_envs/alr/mujoco/table_tennis/tt_gym.py
+++ b/alr_envs/alr/mujoco/table_tennis/tt_gym.py
@@ -11,7 +11,8 @@ from alr_envs.alr.mujoco.table_tennis.tt_reward import TT_Reward
#TODO: Check for simulation stability. Make sure the code runs even for sim crash
# MAX_EPISODE_STEPS = 1750
-MAX_EPISODE_STEPS = 1375
+# MAX_EPISODE_STEPS = 1375
+MAX_EPISODE_STEPS = 2875
BALL_NAME_CONTACT = "target_ball_contact"
BALL_NAME = "target_ball"
TABLE_NAME = 'table_tennis_table'
From 8ef00f73432d64ca600263b5ad2840ff454433d6 Mon Sep 17 00:00:00 2001
From: Onur
Date: Thu, 7 Apr 2022 10:20:10 +0200
Subject: [PATCH 008/104] shorter TT simulation time
---
alr_envs/alr/__init__.py | 2 +-
alr_envs/alr/mujoco/table_tennis/tt_gym.py | 9 ++++++---
2 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 917e102..931e3bb 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -417,7 +417,7 @@ for _v, cd in enumerate(ctxt_dim):
"num_dof": 7,
"num_basis": 2,
"duration": 1.25,
- "post_traj_time": 4.5,
+ "post_traj_time": 1.5,
"policy_type": "motor",
"weights_scale": 1.0,
"zero_start": True,
diff --git a/alr_envs/alr/mujoco/table_tennis/tt_gym.py b/alr_envs/alr/mujoco/table_tennis/tt_gym.py
index c93cd26..7079a1b 100644
--- a/alr_envs/alr/mujoco/table_tennis/tt_gym.py
+++ b/alr_envs/alr/mujoco/table_tennis/tt_gym.py
@@ -10,9 +10,7 @@ from alr_envs.alr.mujoco.table_tennis.tt_reward import TT_Reward
#TODO: Check for simulation stability. Make sure the code runs even for sim crash
-# MAX_EPISODE_STEPS = 1750
-# MAX_EPISODE_STEPS = 1375
-MAX_EPISODE_STEPS = 2875
+MAX_EPISODE_STEPS = 1375 # (1.25 + 1.5)/0.002
BALL_NAME_CONTACT = "target_ball_contact"
BALL_NAME = "target_ball"
TABLE_NAME = 'table_tennis_table'
@@ -58,6 +56,7 @@ class TTEnvGym(MujocoEnv, utils.EzPickle):
self.hit_ball = False
self.ball_contact_after_hit = False
self._ids_set = False
+ self.n_step = 0
super(TTEnvGym, self).__init__(model_path=model_path, frame_skip=1)
self.ball_id = self.sim.model._body_name2id[BALL_NAME] # find the proper -> not protected func.
self.ball_contact_id = self.sim.model._geom_name2id[BALL_NAME_CONTACT]
@@ -67,6 +66,7 @@ class TTEnvGym(MujocoEnv, utils.EzPickle):
self.paddle_contact_id_2 = self.sim.model._geom_name2id[PADDLE_CONTACT_2_NAME] # check if we need both or only this
self.racket_id = self.sim.model._geom_name2id[RACKET_NAME]
+
def _set_ids(self):
self.ball_id = self.sim.model._body_name2id[BALL_NAME] # find the proper -> not protected func.
self.table_contact_id = self.sim.model._geom_name2id[TABLE_NAME]
@@ -119,6 +119,7 @@ class TTEnvGym(MujocoEnv, utils.EzPickle):
self.sim.forward()
self.reward_func.reset(self.goal) # reset the reward function
+ self.n_step = 0
return self._get_obs()
def _contact_checker(self, id_1, id_2):
@@ -166,6 +167,8 @@ class TTEnvGym(MujocoEnv, utils.EzPickle):
info = {"hit_ball": self.hit_ball,
"q_pos": np.copy(self.sim.data.qpos[:7]),
"ball_pos": np.copy(self.sim.data.qpos[7:])}
+ self.n_step += 1
+ print(self.n_step)
return ob, reward, done, info # might add some information here ....
def set_context(self, context):
From 33c79f31d2262afbce8328111e557eebe5b156ff Mon Sep 17 00:00:00 2001
From: Onur
Date: Thu, 7 Apr 2022 10:24:47 +0200
Subject: [PATCH 009/104] remove unnecessary print
---
alr_envs/alr/mujoco/table_tennis/tt_gym.py | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/alr_envs/alr/mujoco/table_tennis/tt_gym.py b/alr_envs/alr/mujoco/table_tennis/tt_gym.py
index 7079a1b..33db936 100644
--- a/alr_envs/alr/mujoco/table_tennis/tt_gym.py
+++ b/alr_envs/alr/mujoco/table_tennis/tt_gym.py
@@ -10,7 +10,7 @@ from alr_envs.alr.mujoco.table_tennis.tt_reward import TT_Reward
#TODO: Check for simulation stability. Make sure the code runs even for sim crash
-MAX_EPISODE_STEPS = 1375 # (1.25 + 1.5)/0.002
+MAX_EPISODE_STEPS = 1375 # (1.25 + 1.5) /0.002
BALL_NAME_CONTACT = "target_ball_contact"
BALL_NAME = "target_ball"
TABLE_NAME = 'table_tennis_table'
@@ -56,7 +56,6 @@ class TTEnvGym(MujocoEnv, utils.EzPickle):
self.hit_ball = False
self.ball_contact_after_hit = False
self._ids_set = False
- self.n_step = 0
super(TTEnvGym, self).__init__(model_path=model_path, frame_skip=1)
self.ball_id = self.sim.model._body_name2id[BALL_NAME] # find the proper -> not protected func.
self.ball_contact_id = self.sim.model._geom_name2id[BALL_NAME_CONTACT]
@@ -168,7 +167,6 @@ class TTEnvGym(MujocoEnv, utils.EzPickle):
"q_pos": np.copy(self.sim.data.qpos[:7]),
"ball_pos": np.copy(self.sim.data.qpos[7:])}
self.n_step += 1
- print(self.n_step)
return ob, reward, done, info # might add some information here ....
def set_context(self, context):
From 04b6b314cf6d8930daf2d0cff247e9cd4e4e0716 Mon Sep 17 00:00:00 2001
From: ottofabian
Date: Thu, 7 Apr 2022 14:57:53 +0200
Subject: [PATCH 010/104] Update beerpong_wo_cup.xml
---
alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml b/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml
index d89e03c..e96d2bc 100644
--- a/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml
+++ b/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml
@@ -132,7 +132,7 @@
+
-
\ No newline at end of file
+
From eb7dd3a18fc98f8007cfb22bc07f7ace0193031f Mon Sep 17 00:00:00 2001
From: Onur
Date: Thu, 7 Apr 2022 18:49:44 +0200
Subject: [PATCH 011/104] fixing beerpong rewards
---
alr_envs/alr/__init__.py | 28 +----
alr_envs/alr/mujoco/beerpong/beerpong.py | 18 +--
.../mujoco/beerpong/beerpong_reward_staged.py | 104 +++++++++++++++---
3 files changed, 94 insertions(+), 56 deletions(-)
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 931e3bb..7e7bca1 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -429,30 +429,4 @@ for _v, cd in enumerate(ctxt_dim):
}
}
)
- ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-# register(
-# id='TableTennisProMP-v2',
-# entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
-# kwargs={
-# "name": "alr_envs:TableTennis2DCtxt-v1",
-# "wrappers": [mujoco.table_tennis.MPWrapper],
-# "mp_kwargs": {
-# "num_dof": 7,
-# "num_basis": 2,
-# "duration": 1.25,
-# "post_traj_time": 4.5,
-# #"width": 0.01,
-# #"off": 0.01,
-# "policy_type": "motor",
-# "weights_scale": 1.0,
-# "zero_start": True,
-# "zero_goal": False,
-# "policy_kwargs": {
-# "p_gains": 0.5*np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]),
-# "d_gains": 0.5*np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1])
-# }
-# }
-# }
-# )
-# ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("TableTennisProMP-v2")
+ ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py
index 99d1a23..886b924 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@@ -7,21 +7,16 @@ from gym.envs.mujoco import MujocoEnv
from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
-# CUP_POS_MIN = np.array([-0.32, -2.2])
-# CUP_POS_MAX = np.array([0.32, -1.2])
-
-CUP_POS_MIN = np.array([-1.42, -4.05])
-CUP_POS_MAX = np.array([1.42, -1.25])
+CUP_POS_MIN = np.array([-0.32, -2.2])
+CUP_POS_MAX = np.array([0.32, -1.2])
class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False,
rndm_goal=False, cup_goal_pos=None):
- if cup_goal_pos is None:
- cup_goal_pos = [-0.3, -1.2, 0.840]
- elif len(cup_goal_pos)==2:
- cup_goal_pos = np.array(cup_goal_pos)
- cup_goal_pos = np.insert(cup_goal_pos, 2, 0.80)
+ cup_goal_pos = np.array(cup_goal_pos if cup_goal_pos is not None else [-0.3, -1.2, 0.840])
+ if cup_goal_pos.shape[0]==2:
+ cup_goal_pos = np.insert(cup_goal_pos, 2, 0.840)
self.cup_goal_pos = np.array(cup_goal_pos)
self._steps = 0
@@ -52,7 +47,6 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
else:
self.noise_std = 0
-
reward_function = BeerPongReward
self.reward_function = reward_function()
@@ -94,7 +88,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
start_pos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
self.set_state(start_pos, init_vel)
if self.rndm_goal:
- xy = np.random.uniform(CUP_POS_MIN, CUP_POS_MAX)
+ xy = self.np_random.uniform(CUP_POS_MIN, CUP_POS_MAX)
xyz = np.zeros(3)
xyz[:2] = xy
xyz[-1] = 0.840
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
index 40b181b..6edc7ee 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
@@ -54,6 +54,7 @@ class BeerPongReward:
self.ball_table_contact = False
self.ball_wall_contact = False
self.ball_cup_contact = False
+ self.ball_in_cup = False
self.noisy_bp = noisy
self._t_min_final_dist = -1
@@ -80,39 +81,94 @@ class BeerPongReward:
action_cost = np.sum(np.square(action))
self.action_costs.append(action_cost)
+ # ##################### Reward function which forces to bounce once on the table (tanh) ########################
+ # if not self.ball_table_contact:
+ # self.ball_table_contact = self._check_collision_single_objects(env.sim, self.ball_collision_id,
+ # self.table_collision_id)
- if not self.ball_table_contact:
- self.ball_table_contact = self._check_collision_single_objects(env.sim, self.ball_collision_id,
- self.table_collision_id)
+ # self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
+ # if env._steps == env.ep_length - 1 or self._is_collided:
+ # min_dist = np.min(self.dists)
+ # final_dist = self.dists_final[-1]
+ #
+ # ball_in_cup = self._check_collision_single_objects(env.sim, self.ball_collision_id,
+ # self.cup_table_collision_id)
+ #
+ # # encourage bounce before falling into cup
+ # if not ball_in_cup:
+ # if not self.ball_table_contact:
+ # reward = 0.2 * (1 - np.tanh(0.5*min_dist)) + 0.1 * (1 - np.tanh(0.5*final_dist))
+ # else:
+ # reward = (1 - np.tanh(0.5*min_dist)) + 0.5 * (1 - np.tanh(0.5*final_dist))
+ # else:
+ # if not self.ball_table_contact:
+ # reward = 2 * (1 - np.tanh(0.5*final_dist)) + 1 * (1 - np.tanh(0.5*min_dist)) + 1
+ # else:
+ # reward = 2 * (1 - np.tanh(0.5*final_dist)) + 1 * (1 - np.tanh(0.5*min_dist)) + 3
+ #
+ # # reward = - 1 * cost - self.collision_penalty * int(self._is_collided)
+ # success = ball_in_cup
+ # crash = self._is_collided
+ # else:
+ # reward = - 1e-2 * action_cost
+ # success = False
+ # crash = False
+ # ################################################################################################################
+ # ##################### Reward function which does not force to bounce once on the table (tanh) ################
+ # self._check_contacts(env.sim)
+ # self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
+ # if env._steps == env.ep_length - 1 or self._is_collided:
+ # min_dist = np.min(self.dists)
+ # final_dist = self.dists_final[-1]
+ #
+ # # encourage bounce before falling into cup
+ # if not self.ball_in_cup:
+ # if not self.ball_table_contact and not self.ball_cup_contact and not self.ball_wall_contact:
+ # min_dist_coeff, final_dist_coeff, rew_offset = 0.2, 0.1, 0
+ # # reward = 0.2 * (1 - np.tanh(0.5*min_dist)) + 0.1 * (1 - np.tanh(0.5*final_dist))
+ # else:
+ # min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, 0
+ # # reward = (1 - np.tanh(0.5*min_dist)) + 0.5 * (1 - np.tanh(0.5*final_dist))
+ # else:
+ # min_dist_coeff, final_dist_coeff, rew_offset = 1, 2, 3
+ # # reward = 2 * (1 - np.tanh(0.5*final_dist)) + 1 * (1 - np.tanh(0.5*min_dist)) + 3
+ #
+ # reward = final_dist_coeff * (1 - np.tanh(0.5 * final_dist)) + min_dist_coeff * (1 - np.tanh(0.5 * min_dist)) \
+ # + rew_offset
+ # success = self.ball_in_cup
+ # crash = self._is_collided
+ # else:
+ # reward = - 1e-2 * action_cost
+ # success = False
+ # crash = False
+ # ################################################################################################################
+
+ # ##################### Reward function which does not force to bounce once on the table (quad dist) ############
+ self._check_contacts(env.sim)
self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
if env._steps == env.ep_length - 1 or self._is_collided:
-
min_dist = np.min(self.dists)
final_dist = self.dists_final[-1]
- ball_in_cup = self._check_collision_single_objects(env.sim, self.ball_collision_id,
- self.cup_table_collision_id)
-
# encourage bounce before falling into cup
- if not ball_in_cup:
- if not self.ball_table_contact:
- reward = 0.2 * (1 - np.tanh(0.5*min_dist)) + 0.1 * (1 - np.tanh(0.5*final_dist))
+ if not self.ball_in_cup:
+ if not self.ball_table_contact and not self.ball_cup_contact and not self.ball_wall_contact:
+ min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -4
else:
- reward = (1 - np.tanh(0.5*min_dist)) + 0.5 * (1 - np.tanh(0.5*final_dist))
+ min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -2
else:
- if not self.ball_table_contact:
- reward = 2 * (1 - np.tanh(0.5*final_dist)) + 1 * (1 - np.tanh(0.5*min_dist)) + 1
- else:
- reward = 2 * (1 - np.tanh(0.5*final_dist)) + 1 * (1 - np.tanh(0.5*min_dist)) + 3
+ min_dist_coeff, final_dist_coeff, rew_offset = 0, 1, 0
- # reward = - 1 * cost - self.collision_penalty * int(self._is_collided)
- success = ball_in_cup
+ reward = rew_offset - min_dist_coeff * min_dist**2 - final_dist_coeff * final_dist**2 - \
+ 1e-4*np.mean(action_cost)
+ success = self.ball_in_cup
crash = self._is_collided
else:
reward = - 1e-2 * action_cost
success = False
crash = False
+ # ################################################################################################################
infos = {}
infos["success"] = success
@@ -124,6 +180,20 @@ class BeerPongReward:
return reward, infos
+ def _check_contacts(self, sim):
+ if not self.ball_table_contact:
+ self.ball_table_contact = self._check_collision_single_objects(sim, self.ball_collision_id,
+ self.table_collision_id)
+ if not self.ball_cup_contact:
+ self.ball_cup_contact = self._check_collision_with_set_of_objects(sim, self.ball_collision_id,
+ self.cup_collision_ids)
+ if not self.ball_wall_contact:
+ self.ball_wall_contact = self._check_collision_single_objects(sim, self.ball_collision_id,
+ self.wall_collision_id)
+ if not self.ball_in_cup:
+ self.ball_in_cup = self._check_collision_single_objects(sim, self.ball_collision_id,
+ self.cup_table_collision_id)
+
def _check_collision_single_objects(self, sim, id_1, id_2):
for coni in range(0, sim.data.ncon):
con = sim.data.contact[coni]
From 7ffe94dcfd68d420a5af1da449cb82e0116e1dbd Mon Sep 17 00:00:00 2001
From: Onur
Date: Fri, 8 Apr 2022 17:32:53 +0200
Subject: [PATCH 012/104] working bp version, tested with CMORE on a smaller
context with 1 seed
---
alr_envs/alr/__init__.py | 1 +
alr_envs/alr/mujoco/beerpong/beerpong.py | 7 ++++++-
.../alr/mujoco/beerpong/beerpong_reward_staged.py | 14 +++++++-------
3 files changed, 14 insertions(+), 8 deletions(-)
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 7e7bca1..38abbf1 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -391,6 +391,7 @@ for _v in _versions:
"duration": 1,
"post_traj_time": 2,
"policy_type": "motor",
+ # "weights_scale": 0.15,
"weights_scale": 1,
"zero_start": True,
"zero_goal": False,
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py
index 886b924..d885e78 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@@ -10,6 +10,10 @@ from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
CUP_POS_MIN = np.array([-0.32, -2.2])
CUP_POS_MAX = np.array([0.32, -1.2])
+# smaller context space -> Easier task
+# CUP_POS_MIN = np.array([-0.16, -2.2])
+# CUP_POS_MAX = np.array([0.16, -1.7])
+
class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False,
@@ -36,7 +40,8 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
self.ball_site_id = 0
self.ball_id = 11
- self._release_step = 175 # time step of ball release
+ # self._release_step = 175 # time step of ball release
+ self._release_step = 130 # time step of ball release
self.sim_time = 3 # seconds
self.ep_length = 600 # based on 3 seconds with dt = 0.005 int(self.sim_time / self.dt)
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
index 6edc7ee..c9ed451 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
@@ -85,7 +85,7 @@ class BeerPongReward:
# if not self.ball_table_contact:
# self.ball_table_contact = self._check_collision_single_objects(env.sim, self.ball_collision_id,
# self.table_collision_id)
-
+ #
# self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
# if env._steps == env.ep_length - 1 or self._is_collided:
# min_dist = np.min(self.dists)
@@ -115,7 +115,7 @@ class BeerPongReward:
# crash = False
# ################################################################################################################
- # ##################### Reward function which does not force to bounce once on the table (tanh) ################
+ ##################### Reward function which does not force to bounce once on the table (tanh) ################
# self._check_contacts(env.sim)
# self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
# if env._steps == env.ep_length - 1 or self._is_collided:
@@ -142,9 +142,9 @@ class BeerPongReward:
# reward = - 1e-2 * action_cost
# success = False
# crash = False
- # ################################################################################################################
+ ################################################################################################################
- # ##################### Reward function which does not force to bounce once on the table (quad dist) ############
+ # # ##################### Reward function which does not force to bounce once on the table (quad dist) ############
self._check_contacts(env.sim)
self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
if env._steps == env.ep_length - 1 or self._is_collided:
@@ -162,12 +162,12 @@ class BeerPongReward:
reward = rew_offset - min_dist_coeff * min_dist**2 - final_dist_coeff * final_dist**2 - \
1e-4*np.mean(action_cost)
+ # 1e-7*np.mean(action_cost)
success = self.ball_in_cup
- crash = self._is_collided
else:
- reward = - 1e-2 * action_cost
+ # reward = - 1e-2 * action_cost
+ reward = - 1e-4 * action_cost
success = False
- crash = False
# ################################################################################################################
infos = {}
From 092525889be79c43350b3e37e729a4348aab71ac Mon Sep 17 00:00:00 2001
From: Onur
Date: Wed, 13 Apr 2022 17:28:25 +0200
Subject: [PATCH 013/104] Added environments from Paul + Marc
---
alr_envs/alr/__init__.py | 273 +++++++++++++++++-
alr_envs/alr/mujoco/__init__.py | 9 +-
alr_envs/alr/mujoco/ant_jump/__init__.py | 1 +
alr_envs/alr/mujoco/ant_jump/ant_jump.py | 117 ++++++++
alr_envs/alr/mujoco/ant_jump/assets/ant.xml | 81 ++++++
alr_envs/alr/mujoco/ant_jump/mp_wrapper.py | 31 ++
.../alr/mujoco/half_cheetah_jump/__init__.py | 1 +
.../half_cheetah_jump/assets/cheetah.xml | 62 ++++
.../half_cheetah_jump/half_cheetah_jump.py | 100 +++++++
.../mujoco/half_cheetah_jump/mp_wrapper.py | 30 ++
alr_envs/alr/mujoco/hopper_jump/__init__.py | 1 +
.../alr/mujoco/hopper_jump/assets/hopper.xml | 48 +++
.../hopper_jump/assets/hopper_jump_on_box.xml | 51 ++++
.../alr/mujoco/hopper_jump/hopper_jump.py | 110 +++++++
.../mujoco/hopper_jump/hopper_jump_on_box.py | 170 +++++++++++
alr_envs/alr/mujoco/hopper_jump/mp_wrapper.py | 31 ++
alr_envs/alr/mujoco/hopper_throw/__init__.py | 1 +
.../hopper_throw/assets/hopper_throw.xml | 56 ++++
.../assets/hopper_throw_in_basket.xml | 132 +++++++++
.../alr/mujoco/hopper_throw/hopper_throw.py | 113 ++++++++
.../hopper_throw/hopper_throw_in_basket.py | 143 +++++++++
.../alr/mujoco/hopper_throw/mp_wrapper.py | 30 ++
.../alr/mujoco/walker_2d_jump/__init__.py | 1 +
.../mujoco/walker_2d_jump/assets/walker2d.xml | 62 ++++
.../alr/mujoco/walker_2d_jump/mp_wrapper.py | 30 ++
.../mujoco/walker_2d_jump/walker_2d_jump.py | 113 ++++++++
26 files changed, 1795 insertions(+), 2 deletions(-)
create mode 100644 alr_envs/alr/mujoco/ant_jump/__init__.py
create mode 100644 alr_envs/alr/mujoco/ant_jump/ant_jump.py
create mode 100644 alr_envs/alr/mujoco/ant_jump/assets/ant.xml
create mode 100644 alr_envs/alr/mujoco/ant_jump/mp_wrapper.py
create mode 100644 alr_envs/alr/mujoco/half_cheetah_jump/__init__.py
create mode 100644 alr_envs/alr/mujoco/half_cheetah_jump/assets/cheetah.xml
create mode 100644 alr_envs/alr/mujoco/half_cheetah_jump/half_cheetah_jump.py
create mode 100644 alr_envs/alr/mujoco/half_cheetah_jump/mp_wrapper.py
create mode 100644 alr_envs/alr/mujoco/hopper_jump/__init__.py
create mode 100644 alr_envs/alr/mujoco/hopper_jump/assets/hopper.xml
create mode 100644 alr_envs/alr/mujoco/hopper_jump/assets/hopper_jump_on_box.xml
create mode 100644 alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
create mode 100644 alr_envs/alr/mujoco/hopper_jump/hopper_jump_on_box.py
create mode 100644 alr_envs/alr/mujoco/hopper_jump/mp_wrapper.py
create mode 100644 alr_envs/alr/mujoco/hopper_throw/__init__.py
create mode 100644 alr_envs/alr/mujoco/hopper_throw/assets/hopper_throw.xml
create mode 100644 alr_envs/alr/mujoco/hopper_throw/assets/hopper_throw_in_basket.xml
create mode 100644 alr_envs/alr/mujoco/hopper_throw/hopper_throw.py
create mode 100644 alr_envs/alr/mujoco/hopper_throw/hopper_throw_in_basket.py
create mode 100644 alr_envs/alr/mujoco/hopper_throw/mp_wrapper.py
create mode 100644 alr_envs/alr/mujoco/walker_2d_jump/__init__.py
create mode 100644 alr_envs/alr/mujoco/walker_2d_jump/assets/walker2d.xml
create mode 100644 alr_envs/alr/mujoco/walker_2d_jump/mp_wrapper.py
create mode 100644 alr_envs/alr/mujoco/walker_2d_jump/walker_2d_jump.py
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 38abbf1..e435fa6 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -11,6 +11,13 @@ from .mujoco.reacher.alr_reacher import ALRReacherEnv
from .mujoco.reacher.balancing import BalancingEnv
from alr_envs.alr.mujoco.table_tennis.tt_gym import MAX_EPISODE_STEPS
+from .mujoco.ant_jump.ant_jump import MAX_EPISODE_STEPS_ANTJUMP
+from .mujoco.half_cheetah_jump.half_cheetah_jump import MAX_EPISODE_STEPS_HALFCHEETAHJUMP
+from .mujoco.hopper_jump.hopper_jump import MAX_EPISODE_STEPS_HOPPERJUMP
+from .mujoco.hopper_jump.hopper_jump_on_box import MAX_EPISODE_STEPS_HOPPERJUMPONBOX
+from .mujoco.hopper_throw.hopper_throw import MAX_EPISODE_STEPS_HOPPERTHROW
+from .mujoco.hopper_throw.hopper_throw_in_basket import MAX_EPISODE_STEPS_HOPPERTHROWINBASKET
+from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
@@ -185,6 +192,69 @@ register(
}
)
+
+register(
+ id='ALRAntJump-v0',
+ entry_point='alr_envs.alr.mujoco:ALRAntJumpEnv',
+ max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP,
+ kwargs={
+ "max_episode_steps": MAX_EPISODE_STEPS_ANTJUMP
+ }
+)
+
+register(
+ id='ALRHalfCheetahJump-v0',
+ entry_point='alr_envs.alr.mujoco:ALRHalfCheetahJumpEnv',
+ max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP,
+ kwargs={
+ "max_episode_steps": MAX_EPISODE_STEPS_HALFCHEETAHJUMP
+ }
+)
+
+register(
+ id='ALRHopperJump-v0',
+ entry_point='alr_envs.alr.mujoco:ALRHopperJumpEnv',
+ max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
+ kwargs={
+ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP
+ }
+)
+
+register(
+ id='ALRHopperJumpOnBox-v0',
+ entry_point='alr_envs.alr.mujoco:ALRHopperJumpOnBoxEnv',
+ max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX,
+ kwargs={
+ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMPONBOX
+ }
+)
+
+register(
+ id='ALRHopperThrow-v0',
+ entry_point='alr_envs.alr.mujoco:ALRHopperThrowEnv',
+ max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW,
+ kwargs={
+ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROW
+ }
+)
+
+register(
+ id='ALRHopperThrowInBasket-v0',
+ entry_point='alr_envs.alr.mujoco:ALRHopperThrowInBasketEnv',
+ max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET,
+ kwargs={
+ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROWINBASKET
+ }
+)
+
+register(
+ id='ALRWalker2DJump-v0',
+ entry_point='alr_envs.alr.mujoco:ALRWalker2dJumpEnv',
+ max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP,
+ kwargs={
+ "max_episode_steps": MAX_EPISODE_STEPS_WALKERJUMP
+ }
+)
## Balancing Reacher
register(
@@ -430,4 +500,205 @@ for _v, cd in enumerate(ctxt_dim):
}
}
)
- ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
\ No newline at end of file
+ ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+
+
+## AntJump
+for _v, cd in enumerate(ctxt_dim):
+ _env_id = f'TableTennisProMP-v{_v}'
+ register(
+ id=_env_id,
+ entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+ kwargs={
+ "name": "alr_envs:TableTennis{}DCtxt-v0".format(cd),
+ "wrappers": [mujoco.table_tennis.MPWrapper],
+ "mp_kwargs": {
+ "num_dof": 7,
+ "num_basis": 2,
+ "duration": 1.25,
+ "post_traj_time": 1.5,
+ "policy_type": "motor",
+ "weights_scale": 1.0,
+ "zero_start": True,
+ "zero_goal": False,
+ "policy_kwargs": {
+ "p_gains": 0.5*np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]),
+ "d_gains": 0.5*np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1])
+ }
+ }
+ }
+ )
+ ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+
+
+register(
+ id='ALRAntJumpProMP-v0',
+ entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+ kwargs={
+ "name": "alr_envs:ALRAntJump-v0",
+ "wrappers": [mujoco.ant_jump.MPWrapper],
+ "mp_kwargs": {
+ "num_dof": 8,
+ "num_basis": 5,
+ "duration": 10,
+ "post_traj_time": 0,
+ "policy_type": "motor",
+ "weights_scale": 1.0,
+ "zero_start": True,
+ "zero_goal": False,
+ "policy_kwargs": {
+ "p_gains": np.ones(8),
+ "d_gains": 0.1*np.ones(8)
+ }
+ }
+ }
+)
+ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRAntJumpProMP-v0')
+
+register(
+ id='ALRHalfCheetahJumpProMP-v0',
+ entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+ kwargs={
+ "name": "alr_envs:ALRHalfCheetahJump-v0",
+ "wrappers": [mujoco.half_cheetah_jump.MPWrapper],
+ "mp_kwargs": {
+ "num_dof": 6,
+ "num_basis": 5,
+ "duration": 5,
+ "post_traj_time": 0,
+ "policy_type": "motor",
+ "weights_scale": 1.0,
+ "zero_start": True,
+ "zero_goal": False,
+ "policy_kwargs": {
+ "p_gains": np.ones(6),
+ "d_gains": 0.1*np.ones(6)
+ }
+ }
+ }
+)
+ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRHalfCheetahJumpProMP-v0')
+
+
+register(
+ id='ALRHopperJumpProMP-v0',
+ entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+ kwargs={
+ "name": "alr_envs:ALRHopperJump-v0",
+ "wrappers": [mujoco.hopper_jump.MPWrapper],
+ "mp_kwargs": {
+ "num_dof": 3,
+ "num_basis": 5,
+ "duration": 2,
+ "post_traj_time": 0,
+ "policy_type": "motor",
+ "weights_scale": 1.0,
+ "zero_start": True,
+ "zero_goal": False,
+ "policy_kwargs": {
+ "p_gains": np.ones(3),
+ "d_gains": 0.1*np.ones(3)
+ }
+ }
+ }
+)
+ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRHopperJumpProMP-v0')
+
+register(
+ id='ALRHopperJumpOnBoxProMP-v0',
+ entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+ kwargs={
+ "name": "alr_envs:ALRHopperJumpOnBox-v0",
+ "wrappers": [mujoco.hopper_jump.MPWrapper],
+ "mp_kwargs": {
+ "num_dof": 3,
+ "num_basis": 5,
+ "duration": 2,
+ "post_traj_time": 0,
+ "policy_type": "motor",
+ "weights_scale": 1.0,
+ "zero_start": True,
+ "zero_goal": False,
+ "policy_kwargs": {
+ "p_gains": np.ones(3),
+ "d_gains": 0.1*np.ones(3)
+ }
+ }
+ }
+)
+ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRHopperJumpOnBoxProMP-v0')
+
+
+register(
+ id='ALRHopperThrowProMP-v0',
+ entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+ kwargs={
+ "name": "alr_envs:ALRHopperThrow-v0",
+ "wrappers": [mujoco.hopper_throw.MPWrapper],
+ "mp_kwargs": {
+ "num_dof": 3,
+ "num_basis": 5,
+ "duration": 2,
+ "post_traj_time": 0,
+ "policy_type": "motor",
+ "weights_scale": 1.0,
+ "zero_start": True,
+ "zero_goal": False,
+ "policy_kwargs": {
+ "p_gains": np.ones(3),
+ "d_gains": 0.1*np.ones(3)
+ }
+ }
+ }
+)
+ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRHopperThrowProMP-v0')
+
+
+register(
+ id='ALRHopperThrowInBasketProMP-v0',
+ entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+ kwargs={
+ "name": "alr_envs:ALRHopperThrowInBasket-v0",
+ "wrappers": [mujoco.hopper_throw.MPWrapper],
+ "mp_kwargs": {
+ "num_dof": 3,
+ "num_basis": 5,
+ "duration": 2,
+ "post_traj_time": 0,
+ "policy_type": "motor",
+ "weights_scale": 1.0,
+ "zero_start": True,
+ "zero_goal": False,
+ "policy_kwargs": {
+ "p_gains": np.ones(3),
+ "d_gains": 0.1*np.ones(3)
+ }
+ }
+ }
+)
+ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRHopperThrowInBasketProMP-v0')
+
+
+register(
+ id='ALRWalker2DJumpProMP-v0',
+ entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+ kwargs={
+ "name": "alr_envs:ALRWalker2DJump-v0",
+ "wrappers": [mujoco.walker_2d_jump.MPWrapper],
+ "mp_kwargs": {
+ "num_dof": 6,
+ "num_basis": 5,
+ "duration": 2.4,
+ "post_traj_time": 0,
+ "policy_type": "motor",
+ "weights_scale": 1.0,
+ "zero_start": True,
+ "zero_goal": False,
+ "policy_kwargs": {
+ "p_gains": np.ones(6),
+ "d_gains": 0.1*np.ones(6)
+ }
+ }
+ }
+)
+ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRWalker2DJumpProMP-v0')
diff --git a/alr_envs/alr/mujoco/__init__.py b/alr_envs/alr/mujoco/__init__.py
index cdb3cde..8935db4 100644
--- a/alr_envs/alr/mujoco/__init__.py
+++ b/alr_envs/alr/mujoco/__init__.py
@@ -3,4 +3,11 @@ from .reacher.balancing import BalancingEnv
from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
from .table_tennis.tt_gym import TTEnvGym
-from .beerpong.beerpong import ALRBeerBongEnv
\ No newline at end of file
+from .beerpong.beerpong import ALRBeerBongEnv
+from .ant_jump.ant_jump import ALRAntJumpEnv
+from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
+from .hopper_jump.hopper_jump import ALRHopperJumpEnv
+from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv
+from .hopper_throw.hopper_throw import ALRHopperThrowEnv
+from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv
+from .walker_2d_jump.walker_2d_jump import ALRWalker2dJumpEnv
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/ant_jump/__init__.py b/alr_envs/alr/mujoco/ant_jump/__init__.py
new file mode 100644
index 0000000..c5e6d2f
--- /dev/null
+++ b/alr_envs/alr/mujoco/ant_jump/__init__.py
@@ -0,0 +1 @@
+from .mp_wrapper import MPWrapper
diff --git a/alr_envs/alr/mujoco/ant_jump/ant_jump.py b/alr_envs/alr/mujoco/ant_jump/ant_jump.py
new file mode 100644
index 0000000..09c623d
--- /dev/null
+++ b/alr_envs/alr/mujoco/ant_jump/ant_jump.py
@@ -0,0 +1,117 @@
+import numpy as np
+from gym.envs.mujoco.ant_v3 import AntEnv
+
+MAX_EPISODE_STEPS_ANTJUMP = 200
+
+
+class ALRAntJumpEnv(AntEnv):
+ """
+ Initialization changes to normal Ant:
+ - healthy_reward: 1.0 -> 0.01 -> 0.0 no healthy reward needed - Paul and Marc
+ - ctrl_cost_weight 0.5 -> 0.0
+ - contact_cost_weight: 5e-4 -> 0.0
+ - healthy_z_range: (0.2, 1.0) -> (0.3, float('inf')) !!!!! Does that make sense, limiting height?
+ """
+
+ def __init__(self,
+ xml_file='ant.xml',
+ ctrl_cost_weight=0.0,
+ contact_cost_weight=0.0,
+ healthy_reward=0.0,
+ terminate_when_unhealthy=True,
+ healthy_z_range=(0.3, float('inf')),
+ contact_force_range=(-1.0, 1.0),
+ reset_noise_scale=0.1,
+ context=True, # variable to decide if context is used or not
+ exclude_current_positions_from_observation=True,
+ max_episode_steps=200):
+ self.current_step = 0
+ self.max_height = 0
+ self.context = context
+ self.max_episode_steps = max_episode_steps
+ self.goal = 0 # goal when training with context
+ super().__init__(xml_file, ctrl_cost_weight, contact_cost_weight, healthy_reward, terminate_when_unhealthy,
+ healthy_z_range, contact_force_range, reset_noise_scale,
+ exclude_current_positions_from_observation)
+
+ def step(self, action):
+
+ self.current_step += 1
+ self.do_simulation(action, self.frame_skip)
+
+ height = self.get_body_com("torso")[2].copy()
+
+ self.max_height = max(height, self.max_height)
+
+ rewards = 0
+
+ ctrl_cost = self.control_cost(action)
+ contact_cost = self.contact_cost
+
+ costs = ctrl_cost + contact_cost
+
+ done = height < 0.3 # fall over -> is the 0.3 value from healthy_z_range? TODO change 0.3 to the value of healthy z angle
+
+ if self.current_step == self.max_episode_steps or done:
+ if self.context:
+ # -10 for scaling the value of the distance between the max_height and the goal height; only used when context is enabled
+ # height_reward = -10 * (np.linalg.norm(self.max_height - self.goal))
+ height_reward = -10*np.linalg.norm(self.max_height - self.goal)
+ # no healthy reward when using context, because we optimize a negative value
+ healthy_reward = 0
+ else:
+ height_reward = self.max_height - 0.7
+ healthy_reward = self.healthy_reward * self.current_step
+
+ rewards = height_reward + healthy_reward
+
+ obs = self._get_obs()
+ reward = rewards - costs
+
+ info = {
+ 'height': height,
+ 'max_height': self.max_height,
+ 'goal': self.goal
+ }
+
+ return obs, reward, done, info
+
+ def _get_obs(self):
+ return np.append(super()._get_obs(), self.goal)
+
+ def reset(self):
+ self.current_step = 0
+ self.max_height = 0
+ self.goal = np.random.uniform(1.0, 2.5,
+ 1) # goal heights from 1.0 to 2.5; can be increased, but didnt work well with CMORE
+ return super().reset()
+
+ # reset_model had to be implemented in every env to make it deterministic
+ def reset_model(self):
+ noise_low = -self._reset_noise_scale
+ noise_high = self._reset_noise_scale
+
+ qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
+ qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
+
+ self.set_state(qpos, qvel)
+
+ observation = self._get_obs()
+ return observation
+
+if __name__ == '__main__':
+ render_mode = "human" # "human" or "partial" or "final"
+ env = ALRAntJumpEnv()
+ obs = env.reset()
+
+ for i in range(2000):
+ # objective.load_result("/tmp/cma")
+ # test with random actions
+ ac = env.action_space.sample()
+ obs, rew, d, info = env.step(ac)
+ if i % 10 == 0:
+ env.render(mode=render_mode)
+ if d:
+ env.reset()
+
+ env.close()
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/ant_jump/assets/ant.xml b/alr_envs/alr/mujoco/ant_jump/assets/ant.xml
new file mode 100644
index 0000000..ee4d679
--- /dev/null
+++ b/alr_envs/alr/mujoco/ant_jump/assets/ant.xml
@@ -0,0 +1,81 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/alr_envs/alr/mujoco/ant_jump/mp_wrapper.py b/alr_envs/alr/mujoco/ant_jump/mp_wrapper.py
new file mode 100644
index 0000000..4967b64
--- /dev/null
+++ b/alr_envs/alr/mujoco/ant_jump/mp_wrapper.py
@@ -0,0 +1,31 @@
+from typing import Tuple, Union
+
+import numpy as np
+
+from mp_env_api import MPEnvWrapper
+
+
+class MPWrapper(MPEnvWrapper):
+
+ @property
+ def active_obs(self):
+ return np.hstack([
+ [False] * 111, # ant has 111 dimensional observation space !!
+ [True] # goal height
+ ])
+
+ @property
+ def current_pos(self) -> Union[float, int, np.ndarray]:
+ return self.env.sim.data.qpos[7:15].copy()
+
+ @property
+ def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.env.sim.data.qvel[6:14].copy()
+
+ @property
+ def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+ raise ValueError("Goal position is not available and has to be learnt based on the environment.")
+
+ @property
+ def dt(self) -> Union[float, int]:
+ return self.env.dt
diff --git a/alr_envs/alr/mujoco/half_cheetah_jump/__init__.py b/alr_envs/alr/mujoco/half_cheetah_jump/__init__.py
new file mode 100644
index 0000000..c5e6d2f
--- /dev/null
+++ b/alr_envs/alr/mujoco/half_cheetah_jump/__init__.py
@@ -0,0 +1 @@
+from .mp_wrapper import MPWrapper
diff --git a/alr_envs/alr/mujoco/half_cheetah_jump/assets/cheetah.xml b/alr_envs/alr/mujoco/half_cheetah_jump/assets/cheetah.xml
new file mode 100644
index 0000000..41daadf
--- /dev/null
+++ b/alr_envs/alr/mujoco/half_cheetah_jump/assets/cheetah.xml
@@ -0,0 +1,62 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/alr_envs/alr/mujoco/half_cheetah_jump/half_cheetah_jump.py b/alr_envs/alr/mujoco/half_cheetah_jump/half_cheetah_jump.py
new file mode 100644
index 0000000..4b53a5d
--- /dev/null
+++ b/alr_envs/alr/mujoco/half_cheetah_jump/half_cheetah_jump.py
@@ -0,0 +1,100 @@
+import os
+from gym.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv
+import numpy as np
+
+MAX_EPISODE_STEPS_HALFCHEETAHJUMP = 100
+
+
+class ALRHalfCheetahJumpEnv(HalfCheetahEnv):
+ """
+ ctrl_cost_weight 0.1 -> 0.0
+ """
+
+ def __init__(self,
+ xml_file='cheetah.xml',
+ forward_reward_weight=1.0,
+ ctrl_cost_weight=0.0,
+ reset_noise_scale=0.1,
+ context=True,
+ exclude_current_positions_from_observation=True,
+ max_episode_steps=100):
+ self.current_step = 0
+ self.max_height = 0
+ self.max_episode_steps = max_episode_steps
+ self.goal = 0
+ self.context = context
+ xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
+ super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, reset_noise_scale,
+ exclude_current_positions_from_observation)
+
+ def step(self, action):
+
+ self.current_step += 1
+ self.do_simulation(action, self.frame_skip)
+
+ height_after = self.get_body_com("torso")[2]
+ self.max_height = max(height_after, self.max_height)
+
+ ## Didnt use fell_over, because base env also has no done condition - Paul and Marc
+ # fell_over = abs(self.sim.data.qpos[2]) > 2.5 # how to figure out if the cheetah fell over? -> 2.5 oke?
+ # TODO: Should a fall over be checked herE?
+ done = False
+
+ ctrl_cost = self.control_cost(action)
+ costs = ctrl_cost
+
+ if self.current_step == self.max_episode_steps:
+ height_goal_distance = -10*np.linalg.norm(self.max_height - self.goal) + 1e-8 if self.context \
+ else self.max_height
+ rewards = self._forward_reward_weight * height_goal_distance
+ else:
+ rewards = 0
+
+ observation = self._get_obs()
+ reward = rewards - costs
+ info = {
+ 'height': height_after,
+ 'max_height': self.max_height
+ }
+
+ return observation, reward, done, info
+
+ def _get_obs(self):
+ return np.append(super()._get_obs(), self.goal)
+
+ def reset(self):
+ self.max_height = 0
+ self.current_step = 0
+ self.goal = np.random.uniform(1.1, 1.6, 1) # 1.1 1.6
+ return super().reset()
+
+ # overwrite reset_model to make it deterministic
+ def reset_model(self):
+ noise_low = -self._reset_noise_scale
+ noise_high = self._reset_noise_scale
+
+ qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
+ qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
+
+ self.set_state(qpos, qvel)
+
+ observation = self._get_obs()
+ return observation
+
+if __name__ == '__main__':
+ render_mode = "human" # "human" or "partial" or "final"
+ env = ALRHalfCheetahJumpEnv()
+ obs = env.reset()
+
+ for i in range(2000):
+ # objective.load_result("/tmp/cma")
+ # test with random actions
+ ac = env.action_space.sample()
+ obs, rew, d, info = env.step(ac)
+ if i % 10 == 0:
+ env.render(mode=render_mode)
+ if d:
+ print('After ', i, ' steps, done: ', d)
+ env.reset()
+
+ env.close()
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/half_cheetah_jump/mp_wrapper.py b/alr_envs/alr/mujoco/half_cheetah_jump/mp_wrapper.py
new file mode 100644
index 0000000..6179b07
--- /dev/null
+++ b/alr_envs/alr/mujoco/half_cheetah_jump/mp_wrapper.py
@@ -0,0 +1,30 @@
+from typing import Tuple, Union
+
+import numpy as np
+
+from mp_env_api import MPEnvWrapper
+
+
+class MPWrapper(MPEnvWrapper):
+ @property
+ def active_obs(self):
+ return np.hstack([
+ [False] * 17,
+ [True] # goal height
+ ])
+
+ @property
+ def current_pos(self) -> Union[float, int, np.ndarray]:
+ return self.env.sim.data.qpos[3:9].copy()
+
+ @property
+ def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.env.sim.data.qvel[3:9].copy()
+
+ @property
+ def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+ raise ValueError("Goal position is not available and has to be learnt based on the environment.")
+
+ @property
+ def dt(self) -> Union[float, int]:
+ return self.env.dt
diff --git a/alr_envs/alr/mujoco/hopper_jump/__init__.py b/alr_envs/alr/mujoco/hopper_jump/__init__.py
new file mode 100644
index 0000000..c5e6d2f
--- /dev/null
+++ b/alr_envs/alr/mujoco/hopper_jump/__init__.py
@@ -0,0 +1 @@
+from .mp_wrapper import MPWrapper
diff --git a/alr_envs/alr/mujoco/hopper_jump/assets/hopper.xml b/alr_envs/alr/mujoco/hopper_jump/assets/hopper.xml
new file mode 100644
index 0000000..f18bc46
--- /dev/null
+++ b/alr_envs/alr/mujoco/hopper_jump/assets/hopper.xml
@@ -0,0 +1,48 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/alr_envs/alr/mujoco/hopper_jump/assets/hopper_jump_on_box.xml b/alr_envs/alr/mujoco/hopper_jump/assets/hopper_jump_on_box.xml
new file mode 100644
index 0000000..69d78ff
--- /dev/null
+++ b/alr_envs/alr/mujoco/hopper_jump/assets/hopper_jump_on_box.xml
@@ -0,0 +1,51 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
new file mode 100644
index 0000000..7ce8196
--- /dev/null
+++ b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
@@ -0,0 +1,110 @@
+from gym.envs.mujoco.hopper_v3 import HopperEnv
+import numpy as np
+
+MAX_EPISODE_STEPS_HOPPERJUMP = 250
+
+
+class ALRHopperJumpEnv(HopperEnv):
+ """
+ Initialization changes to normal Hopper:
+ - healthy_reward: 1.0 -> 0.1 -> 0
+ - healthy_angle_range: (-0.2, 0.2) -> (-float('inf'), float('inf'))
+ - healthy_z_range: (0.7, float('inf')) -> (0.5, float('inf'))
+
+ """
+
+ def __init__(self,
+ xml_file='hopper.xml',
+ forward_reward_weight=1.0,
+ ctrl_cost_weight=1e-3,
+ healthy_reward=0.0,
+ penalty=0.0,
+ context=True,
+ terminate_when_unhealthy=True,
+ healthy_state_range=(-100.0, 100.0),
+ healthy_z_range=(0.5, float('inf')),
+ healthy_angle_range=(-float('inf'), float('inf')),
+ reset_noise_scale=5e-3,
+ exclude_current_positions_from_observation=True,
+ max_episode_steps=250):
+ self.current_step = 0
+ self.max_height = 0
+ self.max_episode_steps = max_episode_steps
+ self.penalty = penalty
+ self.goal = 0
+ self.context = context
+ self.exclude_current_positions_from_observation = exclude_current_positions_from_observation
+ super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
+ healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
+ exclude_current_positions_from_observation)
+
+ def step(self, action):
+
+ self.current_step += 1
+ self.do_simulation(action, self.frame_skip)
+ height_after = self.get_body_com("torso")[2]
+ self.max_height = max(height_after, self.max_height)
+
+ ctrl_cost = self.control_cost(action)
+ costs = ctrl_cost
+ done = False
+
+ if self.current_step >= self.max_episode_steps:
+ hight_goal_distance = -10*np.linalg.norm(self.max_height - self.goal) if self.context else self.max_height
+ healthy_reward = 0 if self.context else self.healthy_reward * self.current_step
+ height_reward = self._forward_reward_weight * hight_goal_distance # maybe move reward calculation into if structure and define two different _forward_reward_weight variables for context and episodic seperatley
+ rewards = height_reward + healthy_reward
+
+ else:
+ # penalty for wrong start direction of first two joints; not needed, could be removed
+ rewards = ((action[:2] > 0) * self.penalty).sum() if self.current_step < 10 else 0
+
+ observation = self._get_obs()
+ reward = rewards - costs
+ info = {
+ 'height' : height_after,
+ 'max_height': self.max_height,
+ 'goal' : self.goal
+ }
+
+ return observation, reward, done, info
+
+ def _get_obs(self):
+ return np.append(super()._get_obs(), self.goal)
+
+ def reset(self):
+ self.goal = np.random.uniform(1.4, 2.3, 1) # 1.3 2.3
+ self.max_height = 0
+ self.current_step = 0
+ return super().reset()
+
+ # overwrite reset_model to make it deterministic
+ def reset_model(self):
+ noise_low = -self._reset_noise_scale
+ noise_high = self._reset_noise_scale
+
+ qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
+ qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
+
+ self.set_state(qpos, qvel)
+
+ observation = self._get_obs()
+ return observation
+
+if __name__ == '__main__':
+ render_mode = "human" # "human" or "partial" or "final"
+ env = ALRHopperJumpEnv()
+ obs = env.reset()
+
+ for i in range(2000):
+ # objective.load_result("/tmp/cma")
+ # test with random actions
+ ac = env.action_space.sample()
+ obs, rew, d, info = env.step(ac)
+ if i % 10 == 0:
+ env.render(mode=render_mode)
+ if d:
+ print('After ', i, ' steps, done: ', d)
+ env.reset()
+
+ env.close()
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/hopper_jump/hopper_jump_on_box.py b/alr_envs/alr/mujoco/hopper_jump/hopper_jump_on_box.py
new file mode 100644
index 0000000..e5e363e
--- /dev/null
+++ b/alr_envs/alr/mujoco/hopper_jump/hopper_jump_on_box.py
@@ -0,0 +1,170 @@
+from gym.envs.mujoco.hopper_v3 import HopperEnv
+
+import numpy as np
+import os
+
+MAX_EPISODE_STEPS_HOPPERJUMPONBOX = 250
+
+
+class ALRHopperJumpOnBoxEnv(HopperEnv):
+ """
+ Initialization changes to normal Hopper:
+ - healthy_reward: 1.0 -> 0.01 -> 0.001
+ - healthy_angle_range: (-0.2, 0.2) -> (-float('inf'), float('inf'))
+ """
+
+ def __init__(self,
+ xml_file='hopper_jump_on_box.xml',
+ forward_reward_weight=1.0,
+ ctrl_cost_weight=1e-3,
+ healthy_reward=0.001,
+ terminate_when_unhealthy=True,
+ healthy_state_range=(-100.0, 100.0),
+ healthy_z_range=(0.7, float('inf')),
+ healthy_angle_range=(-float('inf'), float('inf')),
+ reset_noise_scale=5e-3,
+ context=True,
+ exclude_current_positions_from_observation=True,
+ max_episode_steps=250):
+ self.current_step = 0
+ self.max_height = 0
+ self.max_episode_steps = max_episode_steps
+ self.min_distance = 5000 # what value?
+ self.hopper_on_box = False
+ self.context = context
+ self.box_x = 1
+ xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
+ super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
+ healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
+ exclude_current_positions_from_observation)
+
+ def step(self, action):
+
+ self.current_step += 1
+ self.do_simulation(action, self.frame_skip)
+ height_after = self.get_body_com("torso")[2]
+ foot_pos = self.get_body_com("foot")
+ self.max_height = max(height_after, self.max_height)
+
+ vx, vz, vangle = self.sim.data.qvel[0:3]
+
+ s = self.state_vector()
+ fell_over = not (np.isfinite(s).all() and (np.abs(s[2:]) < 100).all() and (height_after > .7))
+
+ box_pos = self.get_body_com("box")
+ box_size = 0.3
+ box_height = 0.3
+ box_center = (box_pos[0] + (box_size / 2), box_pos[1], box_height)
+ foot_length = 0.3
+ foot_center = foot_pos[0] - (foot_length / 2)
+
+ dist = np.linalg.norm(foot_pos - box_center)
+
+ self.min_distance = min(dist, self.min_distance)
+
+ # check if foot is on box
+ is_on_box_x = box_pos[0] <= foot_center <= box_pos[0] + box_size
+ is_on_box_y = True # is y always true because he can only move in x and z direction?
+ is_on_box_z = box_height - 0.02 <= foot_pos[2] <= box_height + 0.02
+ is_on_box = is_on_box_x and is_on_box_y and is_on_box_z
+ if is_on_box: self.hopper_on_box = True
+
+ ctrl_cost = self.control_cost(action)
+
+ costs = ctrl_cost
+
+ done = fell_over or self.hopper_on_box
+
+ if self.current_step >= self.max_episode_steps or done:
+ done = False
+
+ max_height = self.max_height.copy()
+ min_distance = self.min_distance.copy()
+
+ alive_bonus = self._healthy_reward * self.current_step
+ box_bonus = 0
+ rewards = 0
+
+ # TODO explain what we did here for the calculation of the reward
+ if is_on_box:
+ if self.context:
+ rewards -= 100 * vx ** 2 if 100 * vx ** 2 < 1 else 1
+ else:
+ box_bonus = 10
+ rewards += box_bonus
+ # rewards -= dist * dist ???? why when already on box?
+ # reward -= 90 - abs(angle)
+ rewards -= 100 * vx ** 2 if 100 * vx ** 2 < 1 else 1
+ rewards += max_height * 3
+ rewards += alive_bonus
+
+ else:
+ if self.context:
+ rewards = -10 - min_distance
+ rewards += max_height * 3
+ else:
+ # reward -= (dist*dist)
+ rewards -= min_distance * min_distance
+ # rewards -= dist / self.max_distance
+ rewards += max_height
+ rewards += alive_bonus
+
+ else:
+ rewards = 0
+
+ observation = self._get_obs()
+ reward = rewards - costs
+ info = {
+ 'height': height_after,
+ 'max_height': self.max_height.copy(),
+ 'min_distance': self.min_distance,
+ 'goal': self.box_x,
+ }
+
+ return observation, reward, done, info
+
+ def _get_obs(self):
+ return np.append(super()._get_obs(), self.box_x)
+
+ def reset(self):
+
+ self.max_height = 0
+ self.min_distance = 5000
+ self.current_step = 0
+ self.hopper_on_box = False
+ if self.context:
+ box_id = self.sim.model.body_name2id("box")
+ self.box_x = np.random.uniform(1, 3, 1)
+ self.sim.model.body_pos[box_id] = [self.box_x, 0, 0]
+ return super().reset()
+
+ # overwrite reset_model to make it deterministic
+ def reset_model(self):
+ noise_low = -self._reset_noise_scale
+ noise_high = self._reset_noise_scale
+
+ qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
+ qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
+
+ self.set_state(qpos, qvel)
+
+ observation = self._get_obs()
+ return observation
+
+if __name__ == '__main__':
+ render_mode = "human" # "human" or "partial" or "final"
+ env = ALRHopperJumpOnBoxEnv()
+ obs = env.reset()
+
+ for i in range(2000):
+ # objective.load_result("/tmp/cma")
+ # test with random actions
+ ac = env.action_space.sample()
+ obs, rew, d, info = env.step(ac)
+ if i % 10 == 0:
+ env.render(mode=render_mode)
+ if d:
+ print('After ', i, ' steps, done: ', d)
+ env.reset()
+
+ env.close()
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/hopper_jump/mp_wrapper.py b/alr_envs/alr/mujoco/hopper_jump/mp_wrapper.py
new file mode 100644
index 0000000..0d6768c
--- /dev/null
+++ b/alr_envs/alr/mujoco/hopper_jump/mp_wrapper.py
@@ -0,0 +1,31 @@
+from typing import Tuple, Union
+
+import numpy as np
+
+from mp_env_api import MPEnvWrapper
+
+
+class MPWrapper(MPEnvWrapper):
+ @property
+ def active_obs(self):
+ return np.hstack([
+ [False] * (5 + int(not self.exclude_current_positions_from_observation)), # position
+ [False] * 6, # velocity
+ [True]
+ ])
+
+ @property
+ def current_pos(self) -> Union[float, int, np.ndarray]:
+ return self.env.sim.data.qpos[3:6].copy()
+
+ @property
+ def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.env.sim.data.qvel[3:6].copy()
+
+ @property
+ def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+ raise ValueError("Goal position is not available and has to be learnt based on the environment.")
+
+ @property
+ def dt(self) -> Union[float, int]:
+ return self.env.dt
diff --git a/alr_envs/alr/mujoco/hopper_throw/__init__.py b/alr_envs/alr/mujoco/hopper_throw/__init__.py
new file mode 100644
index 0000000..989b5a9
--- /dev/null
+++ b/alr_envs/alr/mujoco/hopper_throw/__init__.py
@@ -0,0 +1 @@
+from .mp_wrapper import MPWrapper
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/hopper_throw/assets/hopper_throw.xml b/alr_envs/alr/mujoco/hopper_throw/assets/hopper_throw.xml
new file mode 100644
index 0000000..1c39602
--- /dev/null
+++ b/alr_envs/alr/mujoco/hopper_throw/assets/hopper_throw.xml
@@ -0,0 +1,56 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/alr_envs/alr/mujoco/hopper_throw/assets/hopper_throw_in_basket.xml b/alr_envs/alr/mujoco/hopper_throw/assets/hopper_throw_in_basket.xml
new file mode 100644
index 0000000..b4f0342
--- /dev/null
+++ b/alr_envs/alr/mujoco/hopper_throw/assets/hopper_throw_in_basket.xml
@@ -0,0 +1,132 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/hopper_throw/hopper_throw.py b/alr_envs/alr/mujoco/hopper_throw/hopper_throw.py
new file mode 100644
index 0000000..03553f2
--- /dev/null
+++ b/alr_envs/alr/mujoco/hopper_throw/hopper_throw.py
@@ -0,0 +1,113 @@
+import os
+
+from gym.envs.mujoco.hopper_v3 import HopperEnv
+import numpy as np
+
+MAX_EPISODE_STEPS_HOPPERTHROW = 250
+
+
+class ALRHopperThrowEnv(HopperEnv):
+ """
+ Initialization changes to normal Hopper:
+ - healthy_reward: 1.0 -> 0.0 -> 0.1
+ - forward_reward_weight -> 5.0
+ - healthy_angle_range: (-0.2, 0.2) -> (-float('inf'), float('inf'))
+
+ Reward changes to normal Hopper:
+ - velocity: (x_position_after - x_position_before) -> self.get_body_com("ball")[0]
+ """
+
+ def __init__(self,
+ xml_file='hopper_throw.xml',
+ forward_reward_weight=5.0,
+ ctrl_cost_weight=1e-3,
+ healthy_reward=0.1,
+ terminate_when_unhealthy=True,
+ healthy_state_range=(-100.0, 100.0),
+ healthy_z_range=(0.7, float('inf')),
+ healthy_angle_range=(-float('inf'), float('inf')),
+ reset_noise_scale=5e-3,
+ context = True,
+ exclude_current_positions_from_observation=True,
+ max_episode_steps=250):
+ xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
+ self.current_step = 0
+ self.max_episode_steps = max_episode_steps
+ self.context = context
+ self.goal = 0
+ super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
+ healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
+ exclude_current_positions_from_observation)
+
+ def step(self, action):
+
+ self.current_step += 1
+ self.do_simulation(action, self.frame_skip)
+ ball_pos_after = self.get_body_com("ball")[0] #abs(self.get_body_com("ball")[0]) # use x and y to get point and use euclid distance as reward?
+ ball_pos_after_y = self.get_body_com("ball")[2]
+
+ # done = self.done TODO We should use this, not sure why there is no other termination; ball_landed should be enough, because we only look at the throw itself? - Paul and Marc
+ ball_landed = self.get_body_com("ball")[2] <= 0.05
+ done = ball_landed
+
+ ctrl_cost = self.control_cost(action)
+ costs = ctrl_cost
+
+ rewards = 0
+
+ if self.current_step >= self.max_episode_steps or done:
+ distance_reward = -np.linalg.norm(ball_pos_after - self.goal) if self.context else \
+ self._forward_reward_weight * ball_pos_after
+ healthy_reward = 0 if self.context else self.healthy_reward * self.current_step
+
+ rewards = distance_reward + healthy_reward
+
+ observation = self._get_obs()
+ reward = rewards - costs
+ info = {
+ 'ball_pos': ball_pos_after,
+ 'ball_pos_y': ball_pos_after_y,
+ 'current_step' : self.current_step,
+ 'goal' : self.goal,
+ }
+
+ return observation, reward, done, info
+
+ def _get_obs(self):
+ return np.append(super()._get_obs(), self.goal)
+
+ def reset(self):
+ self.current_step = 0
+ self.goal = self.goal = np.random.uniform(2.0, 6.0, 1) # 0.5 8.0
+ return super().reset()
+
+ # overwrite reset_model to make it deterministic
+ def reset_model(self):
+ noise_low = -self._reset_noise_scale
+ noise_high = self._reset_noise_scale
+
+ qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
+ qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
+
+ self.set_state(qpos, qvel)
+
+ observation = self._get_obs()
+ return observation
+
+if __name__ == '__main__':
+ render_mode = "human" # "human" or "partial" or "final"
+ env = ALRHopperThrowEnv()
+ obs = env.reset()
+
+ for i in range(2000):
+ # objective.load_result("/tmp/cma")
+ # test with random actions
+ ac = env.action_space.sample()
+ obs, rew, d, info = env.step(ac)
+ if i % 10 == 0:
+ env.render(mode=render_mode)
+ if d:
+ print('After ', i, ' steps, done: ', d)
+ env.reset()
+
+ env.close()
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/hopper_throw/hopper_throw_in_basket.py b/alr_envs/alr/mujoco/hopper_throw/hopper_throw_in_basket.py
new file mode 100644
index 0000000..74a5b21
--- /dev/null
+++ b/alr_envs/alr/mujoco/hopper_throw/hopper_throw_in_basket.py
@@ -0,0 +1,143 @@
+import os
+from gym.envs.mujoco.hopper_v3 import HopperEnv
+
+import numpy as np
+
+
+MAX_EPISODE_STEPS_HOPPERTHROWINBASKET = 250
+
+
+class ALRHopperThrowInBasketEnv(HopperEnv):
+ """
+ Initialization changes to normal Hopper:
+ - healthy_reward: 1.0 -> 0.0
+ - healthy_angle_range: (-0.2, 0.2) -> (-float('inf'), float('inf'))
+ - hit_basket_reward: - -> 10
+
+ Reward changes to normal Hopper:
+ - velocity: (x_position_after - x_position_before) -> (ball_position_after - ball_position_before)
+ """
+
+ def __init__(self,
+ xml_file='hopper_throw_in_basket.xml',
+ forward_reward_weight=1.0,
+ ctrl_cost_weight=1e-3,
+ healthy_reward=0.0,
+ hit_basket_reward=10,
+ basket_size=0.3,
+ terminate_when_unhealthy=True,
+ healthy_state_range=(-100.0, 100.0),
+ healthy_z_range=(0.7, float('inf')),
+ healthy_angle_range=(-float('inf'), float('inf')),
+ reset_noise_scale=5e-3,
+ context=True,
+ penalty=0.0,
+ exclude_current_positions_from_observation=True,
+ max_episode_steps = 250):
+ self.hit_basket_reward = hit_basket_reward
+ self.current_step = 0
+ self.max_episode_steps = max_episode_steps
+ self.ball_in_basket = False
+ self.basket_size = basket_size
+ self.context = context
+ self.penalty = penalty
+ self.basket_x = 5
+ xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
+ super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
+ healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
+ exclude_current_positions_from_observation)
+
+ def step(self, action):
+
+ self.current_step += 1
+ self.do_simulation(action, self.frame_skip)
+ ball_pos = self.get_body_com("ball")
+
+ basket_pos = self.get_body_com("basket_ground")
+ basket_center = (basket_pos[0] + 0.5, basket_pos[1], basket_pos[2])
+
+ is_in_basket_x = ball_pos[0] >= basket_pos[0] and ball_pos[0] <= basket_pos[0] + self.basket_size
+ is_in_basket_y = ball_pos[1] >= basket_pos[1] - (self.basket_size/2) and ball_pos[1] <= basket_pos[1] + (self.basket_size/2)
+ is_in_basket_z = ball_pos[2] < 0.1
+ is_in_basket = is_in_basket_x and is_in_basket_y and is_in_basket_z
+ if is_in_basket: self.ball_in_basket = True
+
+ ball_landed = self.get_body_com("ball")[2] <= 0.05
+ done = ball_landed or is_in_basket
+
+ rewards = 0
+
+ ctrl_cost = self.control_cost(action)
+
+ costs = ctrl_cost
+
+ if self.current_step >= self.max_episode_steps or done:
+
+ if is_in_basket:
+ if not self.context:
+ rewards += self.hit_basket_reward
+ else:
+ dist = np.linalg.norm(ball_pos-basket_center)
+ if self.context:
+ rewards = -10 * dist
+ else:
+ rewards -= (dist*dist)
+ else:
+ # penalty not needed
+ rewards += ((action[:2] > 0) * self.penalty).sum() if self.current_step < 10 else 0 #too much of a penalty?
+
+ observation = self._get_obs()
+ reward = rewards - costs
+ info = {
+ 'ball_pos': ball_pos[0],
+ }
+
+ return observation, reward, done, info
+
+ def _get_obs(self):
+ return np.append(super()._get_obs(), self.basket_x)
+
+ def reset(self):
+ if self.max_episode_steps == 10:
+ # We have to initialize this here, because the spec is only added after creating the env.
+ self.max_episode_steps = self.spec.max_episode_steps
+
+ self.current_step = 0
+ self.ball_in_basket = False
+ if self.context:
+ basket_id = self.sim.model.body_name2id("basket_ground")
+ self.basket_x = np.random.uniform(3, 7, 1)
+ self.sim.model.body_pos[basket_id] = [self.basket_x, 0, 0]
+ return super().reset()
+
+ # overwrite reset_model to make it deterministic
+ def reset_model(self):
+ noise_low = -self._reset_noise_scale
+ noise_high = self._reset_noise_scale
+
+ qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
+ qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
+
+ self.set_state(qpos, qvel)
+
+ observation = self._get_obs()
+ return observation
+
+
+if __name__ == '__main__':
+ render_mode = "human" # "human" or "partial" or "final"
+ env = ALRHopperThrowInBasketEnv()
+ obs = env.reset()
+
+ for i in range(2000):
+ # objective.load_result("/tmp/cma")
+ # test with random actions
+ ac = env.action_space.sample()
+ obs, rew, d, info = env.step(ac)
+ if i % 10 == 0:
+ env.render(mode=render_mode)
+ if d:
+ print('After ', i, ' steps, done: ', d)
+ env.reset()
+
+ env.close()
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/hopper_throw/mp_wrapper.py b/alr_envs/alr/mujoco/hopper_throw/mp_wrapper.py
new file mode 100644
index 0000000..e5e9486
--- /dev/null
+++ b/alr_envs/alr/mujoco/hopper_throw/mp_wrapper.py
@@ -0,0 +1,30 @@
+from typing import Tuple, Union
+
+import numpy as np
+
+from mp_env_api import MPEnvWrapper
+
+
+class MPWrapper(MPEnvWrapper):
+ @property
+ def active_obs(self):
+ return np.hstack([
+ [False] * 17,
+ [True] # goal pos
+ ])
+
+ @property
+ def current_pos(self) -> Union[float, int, np.ndarray]:
+ return self.env.sim.data.qpos[3:6].copy()
+
+ @property
+ def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.env.sim.data.qvel[3:6].copy()
+
+ @property
+ def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+ raise ValueError("Goal position is not available and has to be learnt based on the environment.")
+
+ @property
+ def dt(self) -> Union[float, int]:
+ return self.env.dt
diff --git a/alr_envs/alr/mujoco/walker_2d_jump/__init__.py b/alr_envs/alr/mujoco/walker_2d_jump/__init__.py
new file mode 100644
index 0000000..989b5a9
--- /dev/null
+++ b/alr_envs/alr/mujoco/walker_2d_jump/__init__.py
@@ -0,0 +1 @@
+from .mp_wrapper import MPWrapper
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/walker_2d_jump/assets/walker2d.xml b/alr_envs/alr/mujoco/walker_2d_jump/assets/walker2d.xml
new file mode 100644
index 0000000..1b48e36
--- /dev/null
+++ b/alr_envs/alr/mujoco/walker_2d_jump/assets/walker2d.xml
@@ -0,0 +1,62 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/walker_2d_jump/mp_wrapper.py b/alr_envs/alr/mujoco/walker_2d_jump/mp_wrapper.py
new file mode 100644
index 0000000..445fa40
--- /dev/null
+++ b/alr_envs/alr/mujoco/walker_2d_jump/mp_wrapper.py
@@ -0,0 +1,30 @@
+from typing import Tuple, Union
+
+import numpy as np
+
+from mp_env_api import MPEnvWrapper
+
+
+class MPWrapper(MPEnvWrapper):
+ @property
+ def active_obs(self):
+ return np.hstack([
+ [False] * 17,
+ [True] # goal pos
+ ])
+
+ @property
+ def current_pos(self) -> Union[float, int, np.ndarray]:
+ return self.env.sim.data.qpos[3:9].copy()
+
+ @property
+ def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.env.sim.data.qvel[3:9].copy()
+
+ @property
+ def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+ raise ValueError("Goal position is not available and has to be learnt based on the environment.")
+
+ @property
+ def dt(self) -> Union[float, int]:
+ return self.env.dt
diff --git a/alr_envs/alr/mujoco/walker_2d_jump/walker_2d_jump.py b/alr_envs/alr/mujoco/walker_2d_jump/walker_2d_jump.py
new file mode 100644
index 0000000..009dd9d
--- /dev/null
+++ b/alr_envs/alr/mujoco/walker_2d_jump/walker_2d_jump.py
@@ -0,0 +1,113 @@
+import os
+from gym.envs.mujoco.walker2d_v3 import Walker2dEnv
+import numpy as np
+
+MAX_EPISODE_STEPS_WALKERJUMP = 300
+
+
+class ALRWalker2dJumpEnv(Walker2dEnv):
+ """
+ healthy reward 1.0 -> 0.005 -> 0.0025 not from alex
+ penalty 10 -> 0 not from alex
+ """
+
+ def __init__(self,
+ xml_file='walker2d.xml',
+ forward_reward_weight=1.0,
+ ctrl_cost_weight=1e-3,
+ healthy_reward=0.0025,
+ terminate_when_unhealthy=True,
+ healthy_z_range=(0.8, 2.0),
+ healthy_angle_range=(-1.0, 1.0),
+ reset_noise_scale=5e-3,
+ penalty=0,
+ context=True,
+ exclude_current_positions_from_observation=True,
+ max_episode_steps=300):
+ self.current_step = 0
+ self.max_episode_steps = max_episode_steps
+ self.max_height = 0
+ self._penalty = penalty
+ self.context = context
+ self.goal = 0
+ xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
+ super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
+ healthy_z_range, healthy_angle_range, reset_noise_scale,
+ exclude_current_positions_from_observation)
+
+ def step(self, action):
+ self.current_step += 1
+ self.do_simulation(action, self.frame_skip)
+ #pos_after = self.get_body_com("torso")[0]
+ height = self.get_body_com("torso")[2]
+
+ self.max_height = max(height, self.max_height)
+
+ fell_over = height < 0.2
+ done = fell_over
+
+ ctrl_cost = self.control_cost(action)
+ costs = ctrl_cost
+
+ if self.current_step >= self.max_episode_steps or done:
+ done = True
+ height_goal_distance = -10 * (np.linalg.norm(self.max_height - self.goal)) if self.context else self.max_height
+ healthy_reward = self.healthy_reward * self.current_step
+
+ rewards = height_goal_distance + healthy_reward
+ else:
+ # penalty not needed
+ rewards = 0
+ rewards += ((action[:2] > 0) * self._penalty).sum() if self.current_step < 4 else 0
+ rewards += ((action[3:5] > 0) * self._penalty).sum() if self.current_step < 4 else 0
+
+
+ observation = self._get_obs()
+ reward = rewards - costs
+ info = {
+ 'height': height,
+ 'max_height': self.max_height,
+ 'goal' : self.goal,
+ }
+
+ return observation, reward, done, info
+
+ def _get_obs(self):
+ return np.append(super()._get_obs(), self.goal)
+
+ def reset(self):
+ self.current_step = 0
+ self.max_height = 0
+ self.goal = np.random.uniform(1.5, 2.5, 1) # 1.5 3.0
+ return super().reset()
+
+ # overwrite reset_model to make it deterministic
+ def reset_model(self):
+ noise_low = -self._reset_noise_scale
+ noise_high = self._reset_noise_scale
+
+ qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
+ qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
+
+ self.set_state(qpos, qvel)
+
+ observation = self._get_obs()
+ return observation
+
+if __name__ == '__main__':
+ render_mode = "human" # "human" or "partial" or "final"
+ env = ALRWalker2dJumpEnv()
+ obs = env.reset()
+
+ for i in range(2000):
+ # objective.load_result("/tmp/cma")
+ # test with random actions
+ ac = env.action_space.sample()
+ obs, rew, d, info = env.step(ac)
+ if i % 10 == 0:
+ env.render(mode=render_mode)
+ if d:
+ print('After ', i, ' steps, done: ', d)
+ env.reset()
+
+ env.close()
\ No newline at end of file
From 1c9019ab083622e200732711faba94b5da9f2be3 Mon Sep 17 00:00:00 2001
From: Onur
Date: Wed, 20 Apr 2022 10:50:41 +0200
Subject: [PATCH 014/104] added non-contextual envs
---
alr_envs/alr/__init__.py | 430 +++++++++++++++++++++++----------------
1 file changed, 253 insertions(+), 177 deletions(-)
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index e435fa6..1b3f118 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -192,69 +192,150 @@ register(
}
)
-
+# CtxtFree are v0, Contextual are v1
register(
id='ALRAntJump-v0',
entry_point='alr_envs.alr.mujoco:ALRAntJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP,
kwargs={
- "max_episode_steps": MAX_EPISODE_STEPS_ANTJUMP
+ "max_episode_steps": MAX_EPISODE_STEPS_ANTJUMP,
+ "context": False
}
)
+# CtxtFree are v0, Contextual are v1
+register(
+ id='ALRAntJump-v1',
+ entry_point='alr_envs.alr.mujoco:ALRAntJumpEnv',
+ max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP,
+ kwargs={
+ "max_episode_steps": MAX_EPISODE_STEPS_ANTJUMP,
+ "context": True
+ }
+)
+
+# CtxtFree are v0, Contextual are v1
register(
id='ALRHalfCheetahJump-v0',
entry_point='alr_envs.alr.mujoco:ALRHalfCheetahJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP,
kwargs={
- "max_episode_steps": MAX_EPISODE_STEPS_HALFCHEETAHJUMP
+ "max_episode_steps": MAX_EPISODE_STEPS_HALFCHEETAHJUMP,
+ "context": False
}
)
-
+# CtxtFree are v0, Contextual are v1
+register(
+ id='ALRHalfCheetahJump-v1',
+ entry_point='alr_envs.alr.mujoco:ALRHalfCheetahJumpEnv',
+ max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP,
+ kwargs={
+ "max_episode_steps": MAX_EPISODE_STEPS_HALFCHEETAHJUMP,
+ "context": True
+ }
+)
+# CtxtFree are v0, Contextual are v1
register(
id='ALRHopperJump-v0',
entry_point='alr_envs.alr.mujoco:ALRHopperJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
kwargs={
- "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP
+ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP,
+ "context": False
}
)
-
+register(
+ id='ALRHopperJump-v1',
+ entry_point='alr_envs.alr.mujoco:ALRHopperJumpEnv',
+ max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
+ kwargs={
+ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP,
+ "context": True
+ }
+)
+# CtxtFree are v0, Contextual are v1
register(
id='ALRHopperJumpOnBox-v0',
entry_point='alr_envs.alr.mujoco:ALRHopperJumpOnBoxEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX,
kwargs={
- "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMPONBOX
+ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMPONBOX,
+ "context": False
}
)
+# CtxtFree are v0, Contextual are v1
+register(
+ id='ALRHopperJumpOnBox-v1',
+ entry_point='alr_envs.alr.mujoco:ALRHopperJumpOnBoxEnv',
+ max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX,
+ kwargs={
+ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMPONBOX,
+ "context": True
+ }
+)
+# CtxtFree are v0, Contextual are v1
register(
id='ALRHopperThrow-v0',
entry_point='alr_envs.alr.mujoco:ALRHopperThrowEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW,
kwargs={
- "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROW
+ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROW,
+ "context": False
}
)
+# CtxtFree are v0, Contextual are v1
+register(
+ id='ALRHopperThrow-v1',
+ entry_point='alr_envs.alr.mujoco:ALRHopperThrowEnv',
+ max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW,
+ kwargs={
+ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROW,
+ "context": True
+ }
+)
+# CtxtFree are v0, Contextual are v1
register(
id='ALRHopperThrowInBasket-v0',
entry_point='alr_envs.alr.mujoco:ALRHopperThrowInBasketEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET,
kwargs={
- "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROWINBASKET
+ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROWINBASKET,
+ "context": False
}
)
-
+# CtxtFree are v0, Contextual are v1
+register(
+ id='ALRHopperThrowInBasket-v1',
+ entry_point='alr_envs.alr.mujoco:ALRHopperThrowInBasketEnv',
+ max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET,
+ kwargs={
+ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROWINBASKET,
+ "context": True
+ }
+)
+# CtxtFree are v0, Contextual are v1
register(
id='ALRWalker2DJump-v0',
entry_point='alr_envs.alr.mujoco:ALRWalker2dJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP,
kwargs={
- "max_episode_steps": MAX_EPISODE_STEPS_WALKERJUMP
+ "max_episode_steps": MAX_EPISODE_STEPS_WALKERJUMP,
+ "context": False
}
)
+# CtxtFree are v0, Contextual are v1
+register(
+ id='ALRWalker2DJump-v1',
+ entry_point='alr_envs.alr.mujoco:ALRWalker2dJumpEnv',
+ max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP,
+ kwargs={
+ "max_episode_steps": MAX_EPISODE_STEPS_WALKERJUMP,
+ "context": True
+ }
+)
+
## Balancing Reacher
register(
@@ -502,203 +583,198 @@ for _v, cd in enumerate(ctxt_dim):
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
## AntJump
-for _v, cd in enumerate(ctxt_dim):
- _env_id = f'TableTennisProMP-v{_v}'
+_versions = ["v0", "v1"]
+for _v in _versions:
+ _env_id = f'ALRAntJumpProMP-{_v}'
register(
id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
kwargs={
- "name": "alr_envs:TableTennis{}DCtxt-v0".format(cd),
- "wrappers": [mujoco.table_tennis.MPWrapper],
+ "name": f"alr_envs:ALRAntJump-{_v}",
+ "wrappers": [mujoco.ant_jump.MPWrapper],
"mp_kwargs": {
- "num_dof": 7,
- "num_basis": 2,
- "duration": 1.25,
- "post_traj_time": 1.5,
+ "num_dof": 8,
+ "num_basis": 5,
+ "duration": 10,
+ "post_traj_time": 0,
"policy_type": "motor",
"weights_scale": 1.0,
"zero_start": True,
"zero_goal": False,
"policy_kwargs": {
- "p_gains": 0.5*np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]),
- "d_gains": 0.5*np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1])
+ "p_gains": np.ones(8),
+ "d_gains": 0.1*np.ones(8)
}
}
}
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-register(
- id='ALRAntJumpProMP-v0',
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": "alr_envs:ALRAntJump-v0",
- "wrappers": [mujoco.ant_jump.MPWrapper],
- "mp_kwargs": {
- "num_dof": 8,
- "num_basis": 5,
- "duration": 10,
- "post_traj_time": 0,
- "policy_type": "motor",
- "weights_scale": 1.0,
- "zero_start": True,
- "zero_goal": False,
- "policy_kwargs": {
- "p_gains": np.ones(8),
- "d_gains": 0.1*np.ones(8)
+## HalfCheetahJump
+_versions = ["v0", "v1"]
+for _v in _versions:
+ _env_id = f'ALRHalfCheetahJumpProMP-{_v}'
+ register(
+ id=_env_id,
+ entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+ kwargs={
+ "name": f"alr_envs:ALRHalfCheetahJump-{_v}",
+ "wrappers": [mujoco.half_cheetah_jump.MPWrapper],
+ "mp_kwargs": {
+ "num_dof": 6,
+ "num_basis": 5,
+ "duration": 5,
+ "post_traj_time": 0,
+ "policy_type": "motor",
+ "weights_scale": 1.0,
+ "zero_start": True,
+ "zero_goal": False,
+ "policy_kwargs": {
+ "p_gains": np.ones(6),
+ "d_gains": 0.1*np.ones(6)
+ }
}
}
- }
-)
-ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRAntJumpProMP-v0')
+ )
+ ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-register(
- id='ALRHalfCheetahJumpProMP-v0',
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": "alr_envs:ALRHalfCheetahJump-v0",
- "wrappers": [mujoco.half_cheetah_jump.MPWrapper],
- "mp_kwargs": {
- "num_dof": 6,
- "num_basis": 5,
- "duration": 5,
- "post_traj_time": 0,
- "policy_type": "motor",
- "weights_scale": 1.0,
- "zero_start": True,
- "zero_goal": False,
- "policy_kwargs": {
- "p_gains": np.ones(6),
- "d_gains": 0.1*np.ones(6)
+## HopperJump
+_versions = ["v0", "v1"]
+for _v in _versions:
+ _env_id = f'ALRHopperJumpProMP-{_v}'
+ register(
+ id= _env_id,
+ entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+ kwargs={
+ "name": f"alr_envs:ALRHopperJump-{_v}",
+ "wrappers": [mujoco.hopper_jump.MPWrapper],
+ "mp_kwargs": {
+ "num_dof": 3,
+ "num_basis": 5,
+ "duration": 2,
+ "post_traj_time": 0,
+ "policy_type": "motor",
+ "weights_scale": 1.0,
+ "zero_start": True,
+ "zero_goal": False,
+ "policy_kwargs": {
+ "p_gains": np.ones(3),
+ "d_gains": 0.1*np.ones(3)
+ }
}
}
- }
-)
-ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRHalfCheetahJumpProMP-v0')
+ )
+ ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-register(
- id='ALRHopperJumpProMP-v0',
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": "alr_envs:ALRHopperJump-v0",
- "wrappers": [mujoco.hopper_jump.MPWrapper],
- "mp_kwargs": {
- "num_dof": 3,
- "num_basis": 5,
- "duration": 2,
- "post_traj_time": 0,
- "policy_type": "motor",
- "weights_scale": 1.0,
- "zero_start": True,
- "zero_goal": False,
- "policy_kwargs": {
- "p_gains": np.ones(3),
- "d_gains": 0.1*np.ones(3)
+## HopperJumpOnBox
+_versions = ["v0", "v1"]
+for _v in _versions:
+ _env_id = f'ALRHopperJumpOnBoxProMP-{_v}'
+ register(
+ id=_env_id,
+ entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+ kwargs={
+ "name": f"alr_envs:ALRHopperJumpOnBox-{_v}",
+ "wrappers": [mujoco.hopper_jump.MPWrapper],
+ "mp_kwargs": {
+ "num_dof": 3,
+ "num_basis": 5,
+ "duration": 2,
+ "post_traj_time": 0,
+ "policy_type": "motor",
+ "weights_scale": 1.0,
+ "zero_start": True,
+ "zero_goal": False,
+ "policy_kwargs": {
+ "p_gains": np.ones(3),
+ "d_gains": 0.1*np.ones(3)
+ }
}
}
- }
-)
-ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRHopperJumpProMP-v0')
+ )
+ ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-register(
- id='ALRHopperJumpOnBoxProMP-v0',
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": "alr_envs:ALRHopperJumpOnBox-v0",
- "wrappers": [mujoco.hopper_jump.MPWrapper],
- "mp_kwargs": {
- "num_dof": 3,
- "num_basis": 5,
- "duration": 2,
- "post_traj_time": 0,
- "policy_type": "motor",
- "weights_scale": 1.0,
- "zero_start": True,
- "zero_goal": False,
- "policy_kwargs": {
- "p_gains": np.ones(3),
- "d_gains": 0.1*np.ones(3)
+#HopperThrow
+_versions = ["v0", "v1"]
+for _v in _versions:
+ _env_id = f'ALRHopperThrowProMP-{_v}'
+ register(
+ id=_env_id,
+ entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+ kwargs={
+ "name": f"alr_envs:ALRHopperThrow-{_v}",
+ "wrappers": [mujoco.hopper_throw.MPWrapper],
+ "mp_kwargs": {
+ "num_dof": 3,
+ "num_basis": 5,
+ "duration": 2,
+ "post_traj_time": 0,
+ "policy_type": "motor",
+ "weights_scale": 1.0,
+ "zero_start": True,
+ "zero_goal": False,
+ "policy_kwargs": {
+ "p_gains": np.ones(3),
+ "d_gains": 0.1*np.ones(3)
+ }
}
}
- }
-)
-ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRHopperJumpOnBoxProMP-v0')
+ )
+ ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-register(
- id='ALRHopperThrowProMP-v0',
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": "alr_envs:ALRHopperThrow-v0",
- "wrappers": [mujoco.hopper_throw.MPWrapper],
- "mp_kwargs": {
- "num_dof": 3,
- "num_basis": 5,
- "duration": 2,
- "post_traj_time": 0,
- "policy_type": "motor",
- "weights_scale": 1.0,
- "zero_start": True,
- "zero_goal": False,
- "policy_kwargs": {
- "p_gains": np.ones(3),
- "d_gains": 0.1*np.ones(3)
+## HopperThrowInBasket
+_versions = ["v0", "v1"]
+for _v in _versions:
+ _env_id = f'ALRHopperThrowInBasketProMP-{_v}'
+ register(
+ id=_env_id,
+ entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+ kwargs={
+ "name": f"alr_envs:ALRHopperThrowInBasket-{_v}",
+ "wrappers": [mujoco.hopper_throw.MPWrapper],
+ "mp_kwargs": {
+ "num_dof": 3,
+ "num_basis": 5,
+ "duration": 2,
+ "post_traj_time": 0,
+ "policy_type": "motor",
+ "weights_scale": 1.0,
+ "zero_start": True,
+ "zero_goal": False,
+ "policy_kwargs": {
+ "p_gains": np.ones(3),
+ "d_gains": 0.1*np.ones(3)
+ }
}
}
- }
-)
-ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRHopperThrowProMP-v0')
+ )
+ ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-register(
- id='ALRHopperThrowInBasketProMP-v0',
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": "alr_envs:ALRHopperThrowInBasket-v0",
- "wrappers": [mujoco.hopper_throw.MPWrapper],
- "mp_kwargs": {
- "num_dof": 3,
- "num_basis": 5,
- "duration": 2,
- "post_traj_time": 0,
- "policy_type": "motor",
- "weights_scale": 1.0,
- "zero_start": True,
- "zero_goal": False,
- "policy_kwargs": {
- "p_gains": np.ones(3),
- "d_gains": 0.1*np.ones(3)
+## Walker2DJump
+_versions = ["v0", "v1"]
+for _v in _versions:
+ _env_id = f'ALRWalker2DJumpProMP-{_v}'
+ register(
+ id=_env_id,
+ entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+ kwargs={
+ "name": f"alr_envs:ALRWalker2DJump-{_v}",
+ "wrappers": [mujoco.walker_2d_jump.MPWrapper],
+ "mp_kwargs": {
+ "num_dof": 6,
+ "num_basis": 5,
+ "duration": 2.4,
+ "post_traj_time": 0,
+ "policy_type": "motor",
+ "weights_scale": 1.0,
+ "zero_start": True,
+ "zero_goal": False,
+ "policy_kwargs": {
+ "p_gains": np.ones(6),
+ "d_gains": 0.1*np.ones(6)
+ }
}
}
- }
-)
-ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRHopperThrowInBasketProMP-v0')
-
-
-register(
- id='ALRWalker2DJumpProMP-v0',
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": "alr_envs:ALRWalker2DJump-v0",
- "wrappers": [mujoco.walker_2d_jump.MPWrapper],
- "mp_kwargs": {
- "num_dof": 6,
- "num_basis": 5,
- "duration": 2.4,
- "post_traj_time": 0,
- "policy_type": "motor",
- "weights_scale": 1.0,
- "zero_start": True,
- "zero_goal": False,
- "policy_kwargs": {
- "p_gains": np.ones(6),
- "d_gains": 0.1*np.ones(6)
- }
- }
- }
-)
-ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append('ALRWalker2DJumpProMP-v0')
+ )
+ ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
From 855ddcee4b119c25f43a657a50d547f86018e3a0 Mon Sep 17 00:00:00 2001
From: Onur
Date: Wed, 20 Apr 2022 14:50:02 +0200
Subject: [PATCH 015/104] hopperjump-v2 -> init pos randomized
---
alr_envs/alr/__init__.py | 34 +++++++++++++
alr_envs/alr/mujoco/__init__.py | 2 +-
alr_envs/alr/mujoco/hopper_jump/__init__.py | 2 +-
.../alr/mujoco/hopper_jump/hopper_jump.py | 48 +++++++++++++++++++
alr_envs/alr/mujoco/hopper_jump/mp_wrapper.py | 26 ++++++++++
5 files changed, 110 insertions(+), 2 deletions(-)
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 1b3f118..91881f0 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -253,6 +253,15 @@ register(
"context": True
}
)
+
+register(
+ id='ALRHopperJump-v2',
+ entry_point='alr_envs.alr.mujoco:ALRHopperJumpRndmPosEnv',
+ max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
+ kwargs={
+ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP
+ }
+)
# CtxtFree are v0, Contextual are v1
register(
id='ALRHopperJumpOnBox-v0',
@@ -667,6 +676,31 @@ for _v in _versions:
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+## HopperJump
+register(
+ id= "ALRHopperJumpProMP-v2",
+ entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+ kwargs={
+ "name": f"alr_envs:ALRHopperJump-v2",
+ "wrappers": [mujoco.hopper_jump.HighCtxtMPWrapper],
+ "mp_kwargs": {
+ "num_dof": 3,
+ "num_basis": 5,
+ "duration": 2,
+ "post_traj_time": 0,
+ "policy_type": "motor",
+ "weights_scale": 1.0,
+ "zero_start": True,
+ "zero_goal": False,
+ "policy_kwargs": {
+ "p_gains": np.ones(3),
+ "d_gains": 0.1*np.ones(3)
+ }
+ }
+ }
+)
+ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v2")
+
## HopperJumpOnBox
_versions = ["v0", "v1"]
for _v in _versions:
diff --git a/alr_envs/alr/mujoco/__init__.py b/alr_envs/alr/mujoco/__init__.py
index 8935db4..df71924 100644
--- a/alr_envs/alr/mujoco/__init__.py
+++ b/alr_envs/alr/mujoco/__init__.py
@@ -6,7 +6,7 @@ from .table_tennis.tt_gym import TTEnvGym
from .beerpong.beerpong import ALRBeerBongEnv
from .ant_jump.ant_jump import ALRAntJumpEnv
from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
-from .hopper_jump.hopper_jump import ALRHopperJumpEnv
+from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv
from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv
from .hopper_throw.hopper_throw import ALRHopperThrowEnv
from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv
diff --git a/alr_envs/alr/mujoco/hopper_jump/__init__.py b/alr_envs/alr/mujoco/hopper_jump/__init__.py
index c5e6d2f..c6db9c2 100644
--- a/alr_envs/alr/mujoco/hopper_jump/__init__.py
+++ b/alr_envs/alr/mujoco/hopper_jump/__init__.py
@@ -1 +1 @@
-from .mp_wrapper import MPWrapper
+from .mp_wrapper import MPWrapper, HighCtxtMPWrapper
diff --git a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
index 7ce8196..4047d04 100644
--- a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
+++ b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
@@ -91,6 +91,54 @@ class ALRHopperJumpEnv(HopperEnv):
observation = self._get_obs()
return observation
+
+class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
+ def __init__(self, max_episode_steps=250):
+ super(ALRHopperJumpRndmPosEnv, self).__init__(exclude_current_positions_from_observation=False,
+ reset_noise_scale=5e-1,
+ max_episode_steps=max_episode_steps)
+ def reset_model(self):
+ noise_low = -self._reset_noise_scale
+ noise_high = self._reset_noise_scale
+
+ qpos = self.init_qpos + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
+ qvel = self.init_qvel #+ self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
+
+ self.set_state(qpos, qvel)
+
+ observation = self._get_obs()
+ return observation
+
+ def step(self, action):
+
+ self.current_step += 1
+ self.do_simulation(action, self.frame_skip)
+ height_after = self.get_body_com("torso")[2]
+ self.max_height = max(height_after, self.max_height)
+
+ ctrl_cost = self.control_cost(action)
+ costs = ctrl_cost
+ done = False
+
+ if self.current_step >= self.max_episode_steps:
+ healthy_reward = 0
+ height_reward = self._forward_reward_weight * self.max_height # maybe move reward calculation into if structure and define two different _forward_reward_weight variables for context and episodic seperatley
+ rewards = height_reward + healthy_reward
+
+ else:
+ # penalty for wrong start direction of first two joints; not needed, could be removed
+ rewards = ((action[:2] > 0) * self.penalty).sum() if self.current_step < 10 else 0
+
+ observation = self._get_obs()
+ reward = rewards - costs
+ info = {
+ 'height': height_after,
+ 'max_height': self.max_height,
+ 'goal': self.goal
+ }
+
+ return observation, reward, done, info
+
if __name__ == '__main__':
render_mode = "human" # "human" or "partial" or "final"
env = ALRHopperJumpEnv()
diff --git a/alr_envs/alr/mujoco/hopper_jump/mp_wrapper.py b/alr_envs/alr/mujoco/hopper_jump/mp_wrapper.py
index 0d6768c..36b7158 100644
--- a/alr_envs/alr/mujoco/hopper_jump/mp_wrapper.py
+++ b/alr_envs/alr/mujoco/hopper_jump/mp_wrapper.py
@@ -29,3 +29,29 @@ class MPWrapper(MPEnvWrapper):
@property
def dt(self) -> Union[float, int]:
return self.env.dt
+
+
+class HighCtxtMPWrapper(MPWrapper):
+ @property
+ def active_obs(self):
+ return np.hstack([
+ [True] * (5 + int(not self.exclude_current_positions_from_observation)), # position
+ [False] * 6, # velocity
+ [False]
+ ])
+
+ @property
+ def current_pos(self) -> Union[float, int, np.ndarray]:
+ return self.env.sim.data.qpos[3:6].copy()
+
+ @property
+ def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.env.sim.data.qvel[3:6].copy()
+
+ @property
+ def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+ raise ValueError("Goal position is not available and has to be learnt based on the environment.")
+
+ @property
+ def dt(self) -> Union[float, int]:
+ return self.env.dt
From 77927e91573a9e517acbaf1c1c4a1459787116b0 Mon Sep 17 00:00:00 2001
From: Onur
Date: Wed, 20 Apr 2022 17:51:43 +0200
Subject: [PATCH 016/104] improve randomization hopperjump-v2
---
alr_envs/alr/mujoco/hopper_jump/hopper_jump.py | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
index 4047d04..a1e8990 100644
--- a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
+++ b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
@@ -100,8 +100,11 @@ class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
-
- qpos = self.init_qpos + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
+ rnd_vec = self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
+ rnd_vec[2] *= 0.05 # the angle around the y axis shouldn't be too high as the agent then falls down quickly and
+ # can not recover
+ rnd_vec[1] = np.clip(rnd_vec[1], 0, 0.3)
+ qpos = self.init_qpos + rnd_vec
qvel = self.init_qvel #+ self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
self.set_state(qpos, qvel)
From c0502cf1d4b2f37e7adcc27ab1607681be45f426 Mon Sep 17 00:00:00 2001
From: Onur
Date: Thu, 21 Apr 2022 08:44:13 +0200
Subject: [PATCH 017/104] corrected reward for hopperjumprndminit + ALRReacher
for iLQR
---
alr_envs/alr/__init__.py | 11 +++++++
alr_envs/alr/mujoco/__init__.py | 2 +-
.../alr/mujoco/hopper_jump/hopper_jump.py | 29 ++++++++++++++++---
alr_envs/alr/mujoco/reacher/alr_reacher.py | 21 ++++++++++++++
4 files changed, 58 insertions(+), 5 deletions(-)
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 91881f0..8f37f78 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -148,6 +148,17 @@ register(
}
)
+register(
+ id='ALRReacherSparseOptCtrl-v0',
+ entry_point='alr_envs.alr.mujoco:ALRReacherOptCtrlEnv',
+ max_episode_steps=200,
+ kwargs={
+ "steps_before_reward": 200,
+ "n_links": 5,
+ "balance": False,
+ }
+)
+
register(
id='ALRReacherSparseBalanced-v0',
entry_point='alr_envs.alr.mujoco:ALRReacherEnv',
diff --git a/alr_envs/alr/mujoco/__init__.py b/alr_envs/alr/mujoco/__init__.py
index df71924..9c7eecf 100644
--- a/alr_envs/alr/mujoco/__init__.py
+++ b/alr_envs/alr/mujoco/__init__.py
@@ -1,4 +1,4 @@
-from .reacher.alr_reacher import ALRReacherEnv
+from .reacher.alr_reacher import ALRReacherEnv, ALRReacherOptCtrlEnv
from .reacher.balancing import BalancingEnv
from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
diff --git a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
index a1e8990..e76cf4f 100644
--- a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
+++ b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
@@ -94,10 +94,16 @@ class ALRHopperJumpEnv(HopperEnv):
class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
def __init__(self, max_episode_steps=250):
+ self.contact_with_floor = False
+ self._floor_geom_id = None
+ self._foot_geom_id = None
super(ALRHopperJumpRndmPosEnv, self).__init__(exclude_current_positions_from_observation=False,
reset_noise_scale=5e-1,
max_episode_steps=max_episode_steps)
+
def reset_model(self):
+ self._floor_geom_id = self.model.geom_name2id('floor')
+ self._foot_geom_id = self.model.geom_name2id('foot_geom')
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
rnd_vec = self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
@@ -116,8 +122,12 @@ class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
self.current_step += 1
self.do_simulation(action, self.frame_skip)
+
+ self.contact_with_floor = self._contact_checker(self._floor_geom_id, self._foot_geom_id) if not \
+ self.contact_with_floor else True
+
height_after = self.get_body_com("torso")[2]
- self.max_height = max(height_after, self.max_height)
+ self.max_height = max(height_after, self.max_height) if self.contact_with_floor else 0
ctrl_cost = self.control_cost(action)
costs = ctrl_cost
@@ -142,9 +152,19 @@ class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
return observation, reward, done, info
+ def _contact_checker(self, id_1, id_2):
+ for coni in range(0, self.sim.data.ncon):
+ con = self.sim.data.contact[coni]
+ collision = con.geom1 == id_1 and con.geom2 == id_2
+ collision_trans = con.geom1 == id_2 and con.geom2 == id_1
+ if collision or collision_trans:
+ return True
+ return False
+
if __name__ == '__main__':
render_mode = "human" # "human" or "partial" or "final"
- env = ALRHopperJumpEnv()
+ # env = ALRHopperJumpEnv()
+ env = ALRHopperJumpRndmPosEnv()
obs = env.reset()
for i in range(2000):
@@ -152,8 +172,9 @@ if __name__ == '__main__':
# test with random actions
ac = env.action_space.sample()
obs, rew, d, info = env.step(ac)
- if i % 10 == 0:
- env.render(mode=render_mode)
+ # if i % 10 == 0:
+ # env.render(mode=render_mode)
+ env.render(mode=render_mode)
if d:
print('After ', i, ' steps, done: ', d)
env.reset()
diff --git a/alr_envs/alr/mujoco/reacher/alr_reacher.py b/alr_envs/alr/mujoco/reacher/alr_reacher.py
index 2d122d2..42fe5a7 100644
--- a/alr_envs/alr/mujoco/reacher/alr_reacher.py
+++ b/alr_envs/alr/mujoco/reacher/alr_reacher.py
@@ -87,6 +87,27 @@ class ALRReacherEnv(MujocoEnv, utils.EzPickle):
[self._steps],
])
+class ALRReacherOptCtrlEnv(ALRReacherEnv):
+ def __init__(self, steps_before_reward=200, n_links=5, balance=False):
+ super(ALRReacherOptCtrlEnv, self).__init__(steps_before_reward, n_links, balance)
+ self.goal = np.array([0.1, 0.1])
+
+ def _get_obs(self):
+ theta = self.sim.data.qpos.flat[:self.n_links]
+ return np.concatenate([
+ theta,
+ self.sim.data.qvel.flat[:self.n_links], # this is angular velocity
+ ])
+
+ def reset_model(self):
+ qpos = self.init_qpos
+ qpos[-2:] = self.goal
+ qvel = self.init_qvel
+ qvel[-2:] = 0
+ self.set_state(qpos, qvel)
+ self._steps = 0
+
+ return self._get_obs()
if __name__ == '__main__':
nl = 5
From a9460f15fd7737eb7649e5f6710a78d3cc63c4ec Mon Sep 17 00:00:00 2001
From: Onur
Date: Fri, 22 Apr 2022 15:36:19 +0200
Subject: [PATCH 018/104] fix small reacher issues for optimal control
---
alr_envs/alr/mujoco/reacher/alr_reacher.py | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/alr_envs/alr/mujoco/reacher/alr_reacher.py b/alr_envs/alr/mujoco/reacher/alr_reacher.py
index 42fe5a7..74cff4d 100644
--- a/alr_envs/alr/mujoco/reacher/alr_reacher.py
+++ b/alr_envs/alr/mujoco/reacher/alr_reacher.py
@@ -89,12 +89,14 @@ class ALRReacherEnv(MujocoEnv, utils.EzPickle):
class ALRReacherOptCtrlEnv(ALRReacherEnv):
def __init__(self, steps_before_reward=200, n_links=5, balance=False):
- super(ALRReacherOptCtrlEnv, self).__init__(steps_before_reward, n_links, balance)
self.goal = np.array([0.1, 0.1])
+ super(ALRReacherOptCtrlEnv, self).__init__(steps_before_reward, n_links, balance)
def _get_obs(self):
theta = self.sim.data.qpos.flat[:self.n_links]
+ tip_pos = self.get_body_com("fingertip")
return np.concatenate([
+ tip_pos[:2],
theta,
self.sim.data.qvel.flat[:self.n_links], # this is angular velocity
])
From 7f64c975cd061159db1b1c365b3b513bf0ac8db3 Mon Sep 17 00:00:00 2001
From: Onur
Date: Wed, 27 Apr 2022 16:15:17 +0200
Subject: [PATCH 019/104] shorter number of release steps for beerpong
---
alr_envs/alr/__init__.py | 10 ++--
alr_envs/alr/mujoco/beerpong/beerpong.py | 7 ++-
.../mujoco/beerpong/beerpong_reward_staged.py | 53 +++++++++++++++----
3 files changed, 55 insertions(+), 15 deletions(-)
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 8f37f78..ac885fa 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -559,11 +559,13 @@ for _v in _versions:
"mp_kwargs": {
"num_dof": 7,
"num_basis": 2,
- "duration": 1,
- "post_traj_time": 2,
+ # "duration": 1,
+ "duration": 0.5,
+ # "post_traj_time": 2,
+ "post_traj_time": 2.5,
"policy_type": "motor",
- # "weights_scale": 0.15,
- "weights_scale": 1,
+ "weights_scale": 0.14,
+ # "weights_scale": 1,
"zero_start": True,
"zero_goal": False,
"policy_kwargs": {
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py
index d885e78..f5d2bd8 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@@ -41,9 +41,9 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
self.ball_id = 11
# self._release_step = 175 # time step of ball release
- self._release_step = 130 # time step of ball release
+ # self._release_step = 130 # time step of ball release
+ self._release_step = 100 # time step of ball release
- self.sim_time = 3 # seconds
self.ep_length = 600 # based on 3 seconds with dt = 0.005 int(self.sim_time / self.dt)
self.cup_table_id = 10
@@ -54,6 +54,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
reward_function = BeerPongReward
self.reward_function = reward_function()
+ self.n_table_bounces_first = 0
MujocoEnv.__init__(self, self.xml_path, frame_skip)
utils.EzPickle.__init__(self)
@@ -75,6 +76,8 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
return self.sim.data.qvel[0:7].copy()
def reset(self):
+ print(not self.reward_function.ball_ground_contact_first)
+ self.n_table_bounces_first += int(not self.reward_function.ball_ground_contact_first)
self.reward_function.reset(self.add_noise)
return super().reset()
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
index c9ed451..24c48be 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
@@ -50,7 +50,7 @@ class BeerPongReward:
self.angle_rewards = []
self.cup_angles = []
self.cup_z_axes = []
- self.ball_ground_contact = False
+ self.ball_ground_contact_first = False
self.ball_table_contact = False
self.ball_wall_contact = False
self.ball_cup_contact = False
@@ -150,8 +150,9 @@ class BeerPongReward:
if env._steps == env.ep_length - 1 or self._is_collided:
min_dist = np.min(self.dists)
final_dist = self.dists_final[-1]
-
- # encourage bounce before falling into cup
+ # if self.ball_ground_contact_first:
+ # min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -6
+ # else:
if not self.ball_in_cup:
if not self.ball_table_contact and not self.ball_cup_contact and not self.ball_wall_contact:
min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -4
@@ -159,17 +160,47 @@ class BeerPongReward:
min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -2
else:
min_dist_coeff, final_dist_coeff, rew_offset = 0, 1, 0
-
- reward = rew_offset - min_dist_coeff * min_dist**2 - final_dist_coeff * final_dist**2 - \
- 1e-4*np.mean(action_cost)
- # 1e-7*np.mean(action_cost)
+ reward = rew_offset - min_dist_coeff * min_dist ** 2 - final_dist_coeff * final_dist ** 2 - \
+ 1e-4 * np.mean(action_cost)
+ # 1e-7*np.mean(action_cost)
success = self.ball_in_cup
else:
- # reward = - 1e-2 * action_cost
- reward = - 1e-4 * action_cost
+ reward = - 1e-2 * action_cost
+ # reward = - 1e-4 * action_cost
+ # reward = 0
success = False
# ################################################################################################################
+ # # # ##################### Reward function which does not force to bounce once on the table (quad dist) ############
+ # self._check_contacts(env.sim)
+ # self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
+ # if env._steps == env.ep_length - 1 or self._is_collided:
+ # min_dist = np.min(self.dists)
+ # final_dist = self.dists_final[-1]
+ #
+ # if not self.ball_in_cup:
+ # if not self.ball_table_contact and not self.ball_cup_contact and not self.ball_wall_contact:
+ # min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -6
+ # else:
+ # if self.ball_ground_contact_first:
+ # min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -4
+ # else:
+ # min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -2
+ # else:
+ # if self.ball_ground_contact_first:
+ # min_dist_coeff, final_dist_coeff, rew_offset = 0, 1, -1
+ # else:
+ # min_dist_coeff, final_dist_coeff, rew_offset = 0, 1, 0
+ # reward = rew_offset - min_dist_coeff * min_dist ** 2 - final_dist_coeff * final_dist ** 2 - \
+ # 1e-7 * np.mean(action_cost)
+ # # 1e-4*np.mean(action_cost)
+ # success = self.ball_in_cup
+ # else:
+ # # reward = - 1e-2 * action_cost
+ # # reward = - 1e-4 * action_cost
+ # reward = 0
+ # success = False
+ # ################################################################################################################
infos = {}
infos["success"] = success
infos["is_collided"] = self._is_collided
@@ -193,6 +224,10 @@ class BeerPongReward:
if not self.ball_in_cup:
self.ball_in_cup = self._check_collision_single_objects(sim, self.ball_collision_id,
self.cup_table_collision_id)
+ if not self.ball_ground_contact_first:
+ if not self.ball_table_contact and not self.ball_cup_contact and not self.ball_wall_contact and not self.ball_in_cup:
+ self.ball_ground_contact_first = self._check_collision_single_objects(sim, self.ball_collision_id,
+ self.ground_collision_id)
def _check_collision_single_objects(self, sim, id_1, id_2):
for coni in range(0, sim.data.ncon):
From cd33e82d3c78f86950324c19c005f837e2ec6503 Mon Sep 17 00:00:00 2001
From: Onur
Date: Thu, 28 Apr 2022 09:05:28 +0200
Subject: [PATCH 020/104] start integrating mp_pytorch lib
---
alr_envs/alr/__init__.py | 7 +
.../alr/mujoco/beerpong/new_mp_wrapper.py | 20 +++
alr_envs/alr/mujoco/reacher/new_mp_wrapper.py | 24 +++
mp_wrapper.py | 170 ++++++++++++++++++
policies.py | 129 +++++++++++++
5 files changed, 350 insertions(+)
create mode 100644 alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
create mode 100644 alr_envs/alr/mujoco/reacher/new_mp_wrapper.py
create mode 100644 mp_wrapper.py
create mode 100644 policies.py
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index ac885fa..3182b72 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -825,3 +825,10 @@ for _v in _versions:
}
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+
+
+# --------------------- Testing new mp wrapper -----------------------------------------------------
+
+# register(
+# id='ALRReacherProMP-v0'
+# )
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py b/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
new file mode 100644
index 0000000..ca2e17e
--- /dev/null
+++ b/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
@@ -0,0 +1,20 @@
+from mp_wrapper import BaseMPWrapper
+from typing import Union, Tuple
+import numpy as np
+
+
+class MPWrapper(BaseMPWrapper):
+
+ def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.env.sim.data.qpos[0:7].copy()
+
+ def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.env.sim.data.qvel[0:7].copy()
+
+ def set_active_obs(self):
+ return np.hstack([
+ [False] * 7, # cos
+ [False] * 7, # sin
+ [True] * 2, # xy position of cup
+ [False] # env steps
+ ])
diff --git a/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py b/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py
new file mode 100644
index 0000000..43c8a51
--- /dev/null
+++ b/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py
@@ -0,0 +1,24 @@
+from mp_wrapper import BaseMPWrapper
+from typing import Union, Tuple
+import numpy as np
+
+
+class MPWrapper(BaseMPWrapper):
+
+ @property
+ def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.env.sim.data.qpos.flat[:self.env.n_links]
+ @property
+ def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.env.sim.data.qvel.flat[:self.env.n_links]
+
+ def set_active_obs(self):
+ return np.concatenate([
+ [False] * self.env.n_links, # cos
+ [False] * self.env.n_links, # sin
+ [True] * 2, # goal position
+ [False] * self.env.n_links, # angular velocity
+ [False] * 3, # goal distance
+ # self.get_body_com("target"), # only return target to make problem harder
+ [False], # step
+ ])
diff --git a/mp_wrapper.py b/mp_wrapper.py
new file mode 100644
index 0000000..202082a
--- /dev/null
+++ b/mp_wrapper.py
@@ -0,0 +1,170 @@
+from abc import ABC, abstractmethod
+from typing import Union, Tuple
+
+import gym
+import numpy as np
+
+from gym import spaces
+from gym.envs.mujoco import MujocoEnv
+from policies import get_policy_class, BaseController
+from mp_pytorch.mp.mp_interfaces import MPInterface
+
+
+class BaseMPWrapper(gym.Env, ABC):
+ """
+ Base class for movement primitive based gym.Wrapper implementations.
+
+ Args:
+ env: The (wrapped) environment this wrapper is applied on
+ num_dof: Dimension of the action space of the wrapped env
+ num_basis: Number of basis functions per dof
+ duration: Length of the trajectory of the movement primitive in seconds
+ post_traj_time: Time for which the last position of the trajectory is fed to the environment to continue
+ simulation in seconds
+ policy_type: Type or object defining the policy that is used to generate action based on the trajectory
+ weight_scale: Scaling parameter for the actions given to this wrapper
+ render_mode: Equivalent to gym render mode
+
+ """
+
+ def __init__(self,
+ env: MujocoEnv,
+ mp: MPInterface,
+ duration: float,
+ policy_type: Union[str, BaseController] = None,
+ render_mode: str = None,
+ **mp_kwargs
+ ):
+ super().__init__()
+
+ assert env.dt is not None
+ self.env = env
+ self.dt = env.dt
+ self.duration = duration
+ self.traj_steps = int(duration / self.dt)
+ self.post_traj_steps = self.env.spec.max_episode_steps - self.traj_steps
+
+ if isinstance(policy_type, str):
+ # pop policy kwargs here such that they are not passed to the initialize_mp method
+ self.policy = get_policy_class(policy_type, self, **mp_kwargs.pop('policy_kwargs', {}))
+ else:
+ self.policy = policy_type
+
+ self.mp = mp
+ self.env = env
+
+ # rendering
+ self.render_mode = render_mode
+ self.render_kwargs = {}
+ self.time_steps = np.linspace(0, self.duration, self.traj_steps + 1)
+ self.mp.set_mp_times(self.time_steps)
+
+ # TODO: put action bounds in mp wrapper (e.g. time bound for traj. length ...), otherwis learning the durations
+ # might not work
+ # action_bounds = np.inf * np.ones((np.prod(self.mp.num_params)))
+ min_action_bounds, max_action_bounds = mp.get_param_bounds()
+ self.action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
+ dtype=np.float32)
+
+ self.active_obs = self.set_active_obs()
+ self.observation_space = spaces.Box(low=self.env.observation_space.low[self.active_obs],
+ high=self.env.observation_space.high[self.active_obs],
+ dtype=self.env.observation_space.dtype)
+
+ def get_trajectory(self, action: np.ndarray) -> Tuple:
+ self.mp.set_params(action)
+ traj_dict = self.mp.get_mp_trajs(get_pos = True, get_vel = True)
+ trajectory_tensor, velocity_tensor = traj_dict['pos'], traj_dict['vel']
+
+ trajectory = trajectory_tensor.numpy()
+ velocity = velocity_tensor.numpy()
+ if self.post_traj_steps > 0:
+ trajectory = np.vstack([trajectory, np.tile(trajectory[-1, :], [self.post_traj_steps, 1])])
+ velocity = np.vstack([velocity, np.zeros(shape=(self.post_traj_steps, self.mp.num_dof))])
+
+ return trajectory, velocity
+
+ @abstractmethod
+ def set_active_obs(self):
+ pass
+
+ @property
+ @abstractmethod
+ def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+ """
+ Returns the current position of the action/control dimension.
+ The dimensionality has to match the action/control dimension.
+ This is not required when exclusively using velocity control,
+ it should, however, be implemented regardless.
+ E.g. The joint positions that are directly or indirectly controlled by the action.
+ """
+ raise NotImplementedError()
+
+ @property
+ @abstractmethod
+ def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+ """
+ Returns the current velocity of the action/control dimension.
+ The dimensionality has to match the action/control dimension.
+ This is not required when exclusively using position control,
+ it should, however, be implemented regardless.
+ E.g. The joint velocities that are directly or indirectly controlled by the action.
+ """
+ raise NotImplementedError()
+
+ def step(self, action: np.ndarray):
+ """ This function generates a trajectory based on a MP and then does the usual loop over reset and step"""
+
+
+ trajectory, velocity = self.get_trajectory(action)
+
+ trajectory_length = len(trajectory)
+ actions = np.zeros(shape=(trajectory_length,) + self.env.action_space.shape)
+ if isinstance(self.env.observation_space, spaces.Dict): # For goal environments
+ observations = np.zeros(shape=(trajectory_length,) + self.env.observation_space["observation"].shape,
+ dtype=self.env.observation_space.dtype)
+ else:
+ observations = np.zeros(shape=(trajectory_length,) + self.env.observation_space.shape,
+ dtype=self.env.observation_space.dtype)
+ rewards = np.zeros(shape=(trajectory_length,))
+ trajectory_return = 0
+
+ infos = dict()
+
+ for t, pos_vel in enumerate(zip(trajectory, velocity)):
+ ac = self.policy.get_action(pos_vel[0], pos_vel[1])
+ actions[t, :] = np.clip(ac, self.env.action_space.low, self.env.action_space.high)
+ obs, rewards[t], done, info = self.env.step(actions[t, :])
+ observations[t, :] = obs["observation"] if isinstance(self.env.observation_space, spaces.Dict) else obs
+ trajectory_return += rewards[t]
+ for k, v in info.items():
+ elems = infos.get(k, [None] * trajectory_length)
+ elems[t] = v
+ infos[k] = elems
+ # infos['step_infos'].append(info)
+ if self.render_mode:
+ self.render(mode=self.render_mode, **self.render_kwargs)
+ if done:
+ break
+ infos.update({k: v[:t + 1] for k, v in infos.items()})
+ infos['trajectory'] = trajectory
+ infos['step_actions'] = actions[:t + 1]
+ infos['step_observations'] = observations[:t + 1]
+ infos['step_rewards'] = rewards[:t + 1]
+ infos['trajectory_length'] = t + 1
+ done = True
+ return self.get_observation_from_step(observations[t]), trajectory_return, done, infos
+
+ def reset(self):
+ return self.get_observation_from_step(self.env.reset())
+
+ def render(self, mode='human', **kwargs):
+ """Only set render options here, such that they can be used during the rollout.
+ This only needs to be called once"""
+ self.render_mode = mode
+ self.render_kwargs = kwargs
+ # self.env.render(mode=self.render_mode, **self.render_kwargs)
+ self.env.render(mode=self.render_mode)
+
+ def get_observation_from_step(self, observation: np.ndarray) -> np.ndarray:
+ return observation[self.active_obs]
diff --git a/policies.py b/policies.py
new file mode 100644
index 0000000..fd89f9e
--- /dev/null
+++ b/policies.py
@@ -0,0 +1,129 @@
+from typing import Tuple, Union
+
+import numpy as np
+
+
+
+class BaseController:
+ def __init__(self, env, **kwargs):
+ self.env = env
+
+ def get_action(self, des_pos, des_vel):
+ raise NotImplementedError
+
+
+class PosController(BaseController):
+ """
+ A Position Controller. The controller calculates a response only based on the desired position.
+ """
+ def get_action(self, des_pos, des_vel):
+ return des_pos
+
+
+class VelController(BaseController):
+ """
+ A Velocity Controller. The controller calculates a response only based on the desired velocity.
+ """
+ def get_action(self, des_pos, des_vel):
+ return des_vel
+
+
+class PDController(BaseController):
+ """
+ A PD-Controller. Using position and velocity information from a provided environment,
+ the controller calculates a response based on the desired position and velocity
+
+ :param env: A position environment
+ :param p_gains: Factors for the proportional gains
+ :param d_gains: Factors for the differential gains
+ """
+
+ def __init__(self,
+ env,
+ p_gains: Union[float, Tuple],
+ d_gains: Union[float, Tuple]):
+ self.p_gains = p_gains
+ self.d_gains = d_gains
+ super(PDController, self).__init__(env)
+
+ def get_action(self, des_pos, des_vel):
+ cur_pos = self.env.current_pos
+ cur_vel = self.env.current_vel
+ assert des_pos.shape == cur_pos.shape, \
+ f"Mismatch in dimension between desired position {des_pos.shape} and current position {cur_pos.shape}"
+ assert des_vel.shape == cur_vel.shape, \
+ f"Mismatch in dimension between desired velocity {des_vel.shape} and current velocity {cur_vel.shape}"
+ trq = self.p_gains * (des_pos - cur_pos) + self.d_gains * (des_vel - cur_vel)
+ return trq
+
+
+class MetaWorldController(BaseController):
+ """
+ A Metaworld Controller. Using position and velocity information from a provided environment,
+ the controller calculates a response based on the desired position and velocity.
+ Unlike the other Controllers, this is a special controller for MetaWorld environments.
+ They use a position delta for the xyz coordinates and a raw position for the gripper opening.
+
+ :param env: A position environment
+ """
+
+ def __init__(self,
+ env
+ ):
+ super(MetaWorldController, self).__init__(env)
+
+ def get_action(self, des_pos, des_vel):
+ gripper_pos = des_pos[-1]
+
+ cur_pos = self.env.current_pos[:-1]
+ xyz_pos = des_pos[:-1]
+
+ assert xyz_pos.shape == cur_pos.shape, \
+ f"Mismatch in dimension between desired position {xyz_pos.shape} and current position {cur_pos.shape}"
+ trq = np.hstack([(xyz_pos - cur_pos), gripper_pos])
+ return trq
+
+
+#TODO: Do we need this class?
+class PDControllerExtend(BaseController):
+ """
+ A PD-Controller. Using position and velocity information from a provided positional environment,
+ the controller calculates a response based on the desired position and velocity
+
+ :param env: A position environment
+ :param p_gains: Factors for the proportional gains
+ :param d_gains: Factors for the differential gains
+ """
+
+ def __init__(self,
+ env,
+ p_gains: Union[float, Tuple],
+ d_gains: Union[float, Tuple]):
+
+ self.p_gains = p_gains
+ self.d_gains = d_gains
+ super(PDControllerExtend, self).__init__(env)
+
+ def get_action(self, des_pos, des_vel):
+ cur_pos = self.env.current_pos
+ cur_vel = self.env.current_vel
+ if len(des_pos) != len(cur_pos):
+ des_pos = self.env.extend_des_pos(des_pos)
+ if len(des_vel) != len(cur_vel):
+ des_vel = self.env.extend_des_vel(des_vel)
+ trq = self.p_gains * (des_pos - cur_pos) + self.d_gains * (des_vel - cur_vel)
+ return trq
+
+
+def get_policy_class(policy_type, env, **kwargs):
+ if policy_type == "motor":
+ return PDController(env, **kwargs)
+ elif policy_type == "velocity":
+ return VelController(env)
+ elif policy_type == "position":
+ return PosController(env)
+ elif policy_type == "metaworld":
+ return MetaWorldController(env)
+ else:
+ raise ValueError(f"Invalid controller type {policy_type} provided. Only 'motor', 'velocity', 'position' "
+ f"and 'metaworld are currently supported controllers.")
From 137eb726eb4f2924eed8bd867cd83588c7e90f76 Mon Sep 17 00:00:00 2001
From: Onur
Date: Fri, 29 Apr 2022 18:46:09 +0200
Subject: [PATCH 021/104] mp_pytorch now running with zero start/goal promp,
but delay is not working
---
alr_envs/alr/mujoco/reacher/alr_reacher.py | 9 +++--
alr_envs/alr/mujoco/reacher/new_mp_wrapper.py | 3 ++
mp_wrapper.py | 38 ++++++++++++++++---
3 files changed, 41 insertions(+), 9 deletions(-)
diff --git a/alr_envs/alr/mujoco/reacher/alr_reacher.py b/alr_envs/alr/mujoco/reacher/alr_reacher.py
index 74cff4d..4477a66 100644
--- a/alr_envs/alr/mujoco/reacher/alr_reacher.py
+++ b/alr_envs/alr/mujoco/reacher/alr_reacher.py
@@ -56,19 +56,22 @@ class ALRReacherEnv(MujocoEnv, utils.EzPickle):
return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl,
velocity=angular_vel, reward_balance=reward_balance,
end_effector=self.get_body_com("fingertip").copy(),
- goal=self.goal if hasattr(self, "goal") else None)
+ goal=self.goal if hasattr(self, "goal") else None,
+ joint_pos = self.sim.data.qpos.flat[:self.n_links].copy(),
+ joint_vel = self.sim.data.qvel.flat[:self.n_links].copy())
def viewer_setup(self):
self.viewer.cam.trackbodyid = 0
def reset_model(self):
- qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
+ # qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
+ qpos = self.init_qpos
while True:
self.goal = self.np_random.uniform(low=-self.n_links / 10, high=self.n_links / 10, size=2)
if np.linalg.norm(self.goal) < self.n_links / 10:
break
qpos[-2:] = self.goal
- qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
+ qvel = self.init_qvel# + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
qvel[-2:] = 0
self.set_state(qpos, qvel)
self._steps = 0
diff --git a/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py b/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py
index 43c8a51..37499e6 100644
--- a/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py
@@ -22,3 +22,6 @@ class MPWrapper(BaseMPWrapper):
# self.get_body_com("target"), # only return target to make problem harder
[False], # step
])
+
+ def _step_callback(self, action):
+ pass
\ No newline at end of file
diff --git a/mp_wrapper.py b/mp_wrapper.py
index 202082a..82b23bd 100644
--- a/mp_wrapper.py
+++ b/mp_wrapper.py
@@ -6,6 +6,7 @@ import numpy as np
from gym import spaces
from gym.envs.mujoco import MujocoEnv
+
from policies import get_policy_class, BaseController
from mp_pytorch.mp.mp_interfaces import MPInterface
@@ -24,7 +25,6 @@ class BaseMPWrapper(gym.Env, ABC):
policy_type: Type or object defining the policy that is used to generate action based on the trajectory
weight_scale: Scaling parameter for the actions given to this wrapper
render_mode: Equivalent to gym render mode
-
"""
def __init__(self,
@@ -44,6 +44,7 @@ class BaseMPWrapper(gym.Env, ABC):
self.traj_steps = int(duration / self.dt)
self.post_traj_steps = self.env.spec.max_episode_steps - self.traj_steps
+ # TODO: move to constructer, use policy factory instead what Fabian already coded
if isinstance(policy_type, str):
# pop policy kwargs here such that they are not passed to the initialize_mp method
self.policy = get_policy_class(policy_type, self, **mp_kwargs.pop('policy_kwargs', {}))
@@ -56,11 +57,10 @@ class BaseMPWrapper(gym.Env, ABC):
# rendering
self.render_mode = render_mode
self.render_kwargs = {}
- self.time_steps = np.linspace(0, self.duration, self.traj_steps + 1)
+ # self.time_steps = np.linspace(0, self.duration, self.traj_steps + 1)
+ self.time_steps = np.linspace(0, self.duration, self.traj_steps)
self.mp.set_mp_times(self.time_steps)
- # TODO: put action bounds in mp wrapper (e.g. time bound for traj. length ...), otherwis learning the durations
- # might not work
# action_bounds = np.inf * np.ones((np.prod(self.mp.num_params)))
min_action_bounds, max_action_bounds = mp.get_param_bounds()
self.action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
@@ -73,11 +73,13 @@ class BaseMPWrapper(gym.Env, ABC):
def get_trajectory(self, action: np.ndarray) -> Tuple:
self.mp.set_params(action)
+ self.mp.set_boundary_conditions(bc_time=self.time_steps[:1], bc_pos=self.current_pos, bc_vel=self.current_vel)
traj_dict = self.mp.get_mp_trajs(get_pos = True, get_vel = True)
trajectory_tensor, velocity_tensor = traj_dict['pos'], traj_dict['vel']
trajectory = trajectory_tensor.numpy()
velocity = velocity_tensor.numpy()
+
if self.post_traj_steps > 0:
trajectory = np.vstack([trajectory, np.tile(trajectory[-1, :], [self.post_traj_steps, 1])])
velocity = np.vstack([velocity, np.zeros(shape=(self.post_traj_steps, self.mp.num_dof))])
@@ -112,10 +114,16 @@ class BaseMPWrapper(gym.Env, ABC):
"""
raise NotImplementedError()
+ @abstractmethod
+ def _step_callback(self, action):
+ pass
+
def step(self, action: np.ndarray):
""" This function generates a trajectory based on a MP and then does the usual loop over reset and step"""
-
-
+ # TODO: Think about sequencing
+ # TODO: put in a callback function here which every environment can implement. Important for e.g. BP to allow the
+ # TODO: Reward Function rather here?
+ # agent to learn when to release the ball
trajectory, velocity = self.get_trajectory(action)
trajectory_length = len(trajectory)
@@ -148,6 +156,7 @@ class BaseMPWrapper(gym.Env, ABC):
break
infos.update({k: v[:t + 1] for k, v in infos.items()})
infos['trajectory'] = trajectory
+ # TODO: remove step information? Might be relevant for debugging -> return only in debug mode (verbose)?
infos['step_actions'] = actions[:t + 1]
infos['step_observations'] = observations[:t + 1]
infos['step_rewards'] = rewards[:t + 1]
@@ -168,3 +177,20 @@ class BaseMPWrapper(gym.Env, ABC):
def get_observation_from_step(self, observation: np.ndarray) -> np.ndarray:
return observation[self.active_obs]
+
+ def plot_trajs(self, des_trajs, des_vels):
+ import matplotlib.pyplot as plt
+ import matplotlib
+ matplotlib.use('TkAgg')
+ pos_fig = plt.figure('positions')
+ vel_fig = plt.figure('velocities')
+ for i in range(des_trajs.shape[1]):
+ plt.figure(pos_fig.number)
+ plt.subplot(des_trajs.shape[1], 1, i + 1)
+ plt.plot(np.ones(des_trajs.shape[0])*self.current_pos[i])
+ plt.plot(des_trajs[:, i])
+
+ plt.figure(vel_fig.number)
+ plt.subplot(des_vels.shape[1], 1, i + 1)
+ plt.plot(np.ones(des_trajs.shape[0])*self.current_vel[i])
+ plt.plot(des_vels[:, i])
From f33996c27afcb5592b6ce325c58c9f7373864c1b Mon Sep 17 00:00:00 2001
From: Onur
Date: Mon, 2 May 2022 15:06:21 +0200
Subject: [PATCH 022/104] include callback in step
---
alr_envs/alr/mujoco/reacher/new_mp_wrapper.py | 5 +----
mp_wrapper.py | 21 +++++++++++--------
2 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py b/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py
index 37499e6..5475366 100644
--- a/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py
@@ -21,7 +21,4 @@ class MPWrapper(BaseMPWrapper):
[False] * 3, # goal distance
# self.get_body_com("target"), # only return target to make problem harder
[False], # step
- ])
-
- def _step_callback(self, action):
- pass
\ No newline at end of file
+ ])
\ No newline at end of file
diff --git a/mp_wrapper.py b/mp_wrapper.py
index 82b23bd..3defe07 100644
--- a/mp_wrapper.py
+++ b/mp_wrapper.py
@@ -33,6 +33,7 @@ class BaseMPWrapper(gym.Env, ABC):
duration: float,
policy_type: Union[str, BaseController] = None,
render_mode: str = None,
+ verbose=2,
**mp_kwargs
):
super().__init__()
@@ -44,7 +45,7 @@ class BaseMPWrapper(gym.Env, ABC):
self.traj_steps = int(duration / self.dt)
self.post_traj_steps = self.env.spec.max_episode_steps - self.traj_steps
- # TODO: move to constructer, use policy factory instead what Fabian already coded
+ # TODO: move to constructor, use policy factory instead what Fabian already coded
if isinstance(policy_type, str):
# pop policy kwargs here such that they are not passed to the initialize_mp method
self.policy = get_policy_class(policy_type, self, **mp_kwargs.pop('policy_kwargs', {}))
@@ -53,6 +54,7 @@ class BaseMPWrapper(gym.Env, ABC):
self.mp = mp
self.env = env
+ self.verbose = verbose
# rendering
self.render_mode = render_mode
@@ -114,14 +116,12 @@ class BaseMPWrapper(gym.Env, ABC):
"""
raise NotImplementedError()
- @abstractmethod
- def _step_callback(self, action):
+ def _step_callback(self, t, action):
pass
def step(self, action: np.ndarray):
""" This function generates a trajectory based on a MP and then does the usual loop over reset and step"""
# TODO: Think about sequencing
- # TODO: put in a callback function here which every environment can implement. Important for e.g. BP to allow the
# TODO: Reward Function rather here?
# agent to learn when to release the ball
trajectory, velocity = self.get_trajectory(action)
@@ -141,6 +141,9 @@ class BaseMPWrapper(gym.Env, ABC):
for t, pos_vel in enumerate(zip(trajectory, velocity)):
ac = self.policy.get_action(pos_vel[0], pos_vel[1])
+ callback_action = self._step_callback(t, action)
+ if callback_action is not None:
+ ac = np.concatenate((callback_action, ac)) # include callback action at first pos of vector
actions[t, :] = np.clip(ac, self.env.action_space.low, self.env.action_space.high)
obs, rewards[t], done, info = self.env.step(actions[t, :])
observations[t, :] = obs["observation"] if isinstance(self.env.observation_space, spaces.Dict) else obs
@@ -156,11 +159,11 @@ class BaseMPWrapper(gym.Env, ABC):
break
infos.update({k: v[:t + 1] for k, v in infos.items()})
infos['trajectory'] = trajectory
- # TODO: remove step information? Might be relevant for debugging -> return only in debug mode (verbose)?
- infos['step_actions'] = actions[:t + 1]
- infos['step_observations'] = observations[:t + 1]
- infos['step_rewards'] = rewards[:t + 1]
- infos['trajectory_length'] = t + 1
+ if self.verbose == 2:
+ infos['step_actions'] = actions[:t + 1]
+ infos['step_observations'] = observations[:t + 1]
+ infos['step_rewards'] = rewards[:t + 1]
+ infos['trajectory_length'] = t + 1
done = True
return self.get_observation_from_step(observations[t]), trajectory_return, done, infos
From 2fbde9fbb1e618e6734c81dd76dffc03c6226196 Mon Sep 17 00:00:00 2001
From: Onur
Date: Tue, 3 May 2022 19:51:54 +0200
Subject: [PATCH 023/104] working bp version
---
alr_envs/alr/__init__.py | 80 +++++++---
alr_envs/alr/mujoco/beerpong/__init__.py | 3 +-
alr_envs/alr/mujoco/beerpong/beerpong.py | 28 ++--
.../mujoco/beerpong/beerpong_reward_staged.py | 4 +
.../alr/mujoco/beerpong/new_mp_wrapper.py | 30 +++-
alr_envs/alr/mujoco/reacher/new_mp_wrapper.py | 4 +-
alr_envs/examples/pd_control_gain_tuning.py | 2 +-
alr_envs/mp/__init__.py | 0
alr_envs/mp/basis_generator_factory.py | 17 +++
alr_envs/mp/controllers/__init__.py | 0
alr_envs/mp/controllers/base_controller.py | 4 +
alr_envs/mp/controllers/controller_factory.py | 21 +++
.../mp/controllers/meta_world_controller.py | 25 ++++
alr_envs/mp/controllers/pd_controller.py | 28 ++++
alr_envs/mp/controllers/pos_controller.py | 9 ++
alr_envs/mp/controllers/vel_controller.py | 9 ++
.../mp/episodic_wrapper.py | 137 +++++++++++-------
alr_envs/mp/mp_factory.py | 22 +++
alr_envs/mp/phase_generator_factory.py | 20 +++
alr_envs/utils/make_env_helpers.py | 112 +++++++++++++-
policies.py | 129 -----------------
21 files changed, 460 insertions(+), 224 deletions(-)
create mode 100644 alr_envs/mp/__init__.py
create mode 100644 alr_envs/mp/basis_generator_factory.py
create mode 100644 alr_envs/mp/controllers/__init__.py
create mode 100644 alr_envs/mp/controllers/base_controller.py
create mode 100644 alr_envs/mp/controllers/controller_factory.py
create mode 100644 alr_envs/mp/controllers/meta_world_controller.py
create mode 100644 alr_envs/mp/controllers/pd_controller.py
create mode 100644 alr_envs/mp/controllers/pos_controller.py
create mode 100644 alr_envs/mp/controllers/vel_controller.py
rename mp_wrapper.py => alr_envs/mp/episodic_wrapper.py (57%)
create mode 100644 alr_envs/mp/mp_factory.py
create mode 100644 alr_envs/mp/phase_generator_factory.py
delete mode 100644 policies.py
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 3182b72..f91e004 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -391,7 +391,8 @@ register(
max_episode_steps=600,
kwargs={
"rndm_goal": False,
- "cup_goal_pos": [-0.3, -1.2]
+ "cup_goal_pos": [0.1, -2.0],
+ "learn_release_step": True
}
)
@@ -403,7 +404,8 @@ register(
max_episode_steps=600,
kwargs={
"rndm_goal": True,
- "cup_goal_pos": [-0.3, -1.2]
+ "cup_goal_pos": [-0.3, -1.2],
+ "learn_release_step": True
}
)
@@ -546,34 +548,72 @@ for _v in _versions:
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+# ## Beerpong
+# _versions = ["v0", "v1"]
+# for _v in _versions:
+# _env_id = f'BeerpongProMP-{_v}'
+# register(
+# id=_env_id,
+# entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+# kwargs={
+# "name": f"alr_envs:ALRBeerPong-{_v}",
+# "wrappers": [mujoco.beerpong.MPWrapper],
+# "mp_kwargs": {
+# "num_dof": 7,
+# "num_basis": 2,
+# # "duration": 1,
+# "duration": 0.5,
+# # "post_traj_time": 2,
+# "post_traj_time": 2.5,
+# "policy_type": "motor",
+# "weights_scale": 0.14,
+# # "weights_scale": 1,
+# "zero_start": True,
+# "zero_goal": False,
+# "policy_kwargs": {
+# "p_gains": np.array([ 1.5, 5, 2.55, 3, 2., 2, 1.25]),
+# "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125])
+# }
+# }
+# }
+# )
+# ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+
## Beerpong
_versions = ["v0", "v1"]
for _v in _versions:
_env_id = f'BeerpongProMP-{_v}'
register(
id=_env_id,
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+ entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
kwargs={
"name": f"alr_envs:ALRBeerPong-{_v}",
- "wrappers": [mujoco.beerpong.MPWrapper],
- "mp_kwargs": {
- "num_dof": 7,
- "num_basis": 2,
- # "duration": 1,
- "duration": 0.5,
- # "post_traj_time": 2,
- "post_traj_time": 2.5,
- "policy_type": "motor",
- "weights_scale": 0.14,
- # "weights_scale": 1,
- "zero_start": True,
- "zero_goal": False,
- "policy_kwargs": {
- "p_gains": np.array([ 1.5, 5, 2.55, 3, 2., 2, 1.25]),
- "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125])
+ "wrappers": [mujoco.beerpong.NewMPWrapper],
+ "ep_wrapper_kwargs": {
+ "weight_scale": 1
+ },
+ "movement_primitives_kwargs": {
+ 'movement_primitives_type': 'promp',
+ 'num_dof': 7
+ },
+ "phase_generator_kwargs": {
+ 'phase_generator_type': 'linear',
+ 'delay': 0,
+ 'tau': 0.8, # initial value
+ 'learn_tau': True,
+ 'learn_delay': False
+ },
+ "controller_kwargs": {
+ 'controller_type': 'motor',
+ "p_gains": np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]),
+ "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]),
+ },
+ "basis_generator_kwargs": {
+ 'basis_generator_type': 'zero_rbf',
+ 'num_basis': 2,
+ 'num_basis_zero_start': 2
}
}
- }
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
diff --git a/alr_envs/alr/mujoco/beerpong/__init__.py b/alr_envs/alr/mujoco/beerpong/__init__.py
index 989b5a9..18e33ce 100644
--- a/alr_envs/alr/mujoco/beerpong/__init__.py
+++ b/alr_envs/alr/mujoco/beerpong/__init__.py
@@ -1 +1,2 @@
-from .mp_wrapper import MPWrapper
\ No newline at end of file
+from .mp_wrapper import MPWrapper
+from .new_mp_wrapper import NewMPWrapper
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py
index f5d2bd8..661d10c 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@@ -3,6 +3,7 @@ import os
import numpy as np
from gym import utils
+from gym import spaces
from gym.envs.mujoco import MujocoEnv
from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
@@ -17,7 +18,7 @@ CUP_POS_MAX = np.array([0.32, -1.2])
class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False,
- rndm_goal=False, cup_goal_pos=None):
+ rndm_goal=False, learn_release_step=True, cup_goal_pos=None):
cup_goal_pos = np.array(cup_goal_pos if cup_goal_pos is not None else [-0.3, -1.2, 0.840])
if cup_goal_pos.shape[0]==2:
cup_goal_pos = np.insert(cup_goal_pos, 2, 0.840)
@@ -51,10 +52,9 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
self.noise_std = 0.01
else:
self.noise_std = 0
-
+ self.learn_release_step = learn_release_step
reward_function = BeerPongReward
self.reward_function = reward_function()
- self.n_table_bounces_first = 0
MujocoEnv.__init__(self, self.xml_path, frame_skip)
utils.EzPickle.__init__(self)
@@ -63,6 +63,13 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
def start_pos(self):
return self._start_pos
+ def _set_action_space(self):
+ bounds = self.model.actuator_ctrlrange.copy().astype(np.float32)
+ bounds = np.concatenate((bounds, [[50, self.ep_length*0.333]]), axis=0)
+ low, high = bounds.T
+ self.action_space = spaces.Box(low=low, high=high, dtype=np.float32)
+ return self.action_space
+
@property
def start_vel(self):
return self._start_vel
@@ -76,8 +83,6 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
return self.sim.data.qvel[0:7].copy()
def reset(self):
- print(not self.reward_function.ball_ground_contact_first)
- self.n_table_bounces_first += int(not self.reward_function.ball_ground_contact_first)
self.reward_function.reset(self.add_noise)
return super().reset()
@@ -104,14 +109,17 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
return self._get_obs()
def step(self, a):
+ self._release_step = a[-1] if self.learn_release_step else self._release_step
+ self._release_step = np.clip(self._release_step, self.action_space.low[-1], self.action_space.high[-1]) \
+ if self.learn_release_step else self._release_step
reward_dist = 0.0
angular_vel = 0.0
- reward_ctrl = - np.square(a).sum()
-
+ applied_action = a[:a.shape[0]-int(self.learn_release_step)]
+ reward_ctrl = - np.square(applied_action).sum()
if self.apply_gravity_comp:
- a = a + self.sim.data.qfrc_bias[:len(a)].copy() / self.model.actuator_gear[:, 0]
+ applied_action += self.sim.data.qfrc_bias[:len(applied_action)].copy() / self.model.actuator_gear[:, 0]
try:
- self.do_simulation(a, self.frame_skip)
+ self.do_simulation(applied_action, self.frame_skip)
if self._steps < self._release_step:
self.sim.data.qpos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
self.sim.data.qvel[7::] = self.sim.data.site_xvelp[self.ball_site_id, :].copy()
@@ -125,7 +133,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
ob = self._get_obs()
if not crash:
- reward, reward_infos = self.reward_function.compute_reward(self, a)
+ reward, reward_infos = self.reward_function.compute_reward(self, applied_action)
success = reward_infos['success']
is_collided = reward_infos['is_collided']
ball_pos = reward_infos['ball_pos']
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
index 24c48be..910763a 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
@@ -162,6 +162,10 @@ class BeerPongReward:
min_dist_coeff, final_dist_coeff, rew_offset = 0, 1, 0
reward = rew_offset - min_dist_coeff * min_dist ** 2 - final_dist_coeff * final_dist ** 2 - \
1e-4 * np.mean(action_cost)
+ if env.learn_release_step and not self.ball_in_cup:
+ too_small = (env._release_step<50)*(env._release_step-50)**2
+ too_big = (env._release_step>200)*0.2*(env._release_step-200)**2
+ reward = reward - too_small -too_big
# 1e-7*np.mean(action_cost)
success = self.ball_in_cup
else:
diff --git a/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py b/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
index ca2e17e..9cd6374 100644
--- a/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
@@ -1,13 +1,15 @@
-from mp_wrapper import BaseMPWrapper
+from alr_envs.mp.episodic_wrapper import EpisodicWrapper
from typing import Union, Tuple
import numpy as np
+import gym
-class MPWrapper(BaseMPWrapper):
-
+class NewMPWrapper(EpisodicWrapper):
+ @property
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.sim.data.qpos[0:7].copy()
+ @property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.sim.data.qvel[0:7].copy()
@@ -18,3 +20,25 @@ class MPWrapper(BaseMPWrapper):
[True] * 2, # xy position of cup
[False] # env steps
])
+
+ def _step_callback(self, t: int, env_spec_params: Union[np.ndarray, None], step_action: np.ndarray) -> Union[np.ndarray]:
+ if self.env.learn_release_step:
+ return np.concatenate((step_action, np.atleast_1d(env_spec_params)))
+ else:
+ return step_action
+
+ def _episode_callback(self, action: np.ndarray) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
+ if self.env.learn_release_step:
+ return action[:-1], action[-1] # mp_params, release step
+ else:
+ return action, None
+
+ def set_action_space(self):
+ if self.env.learn_release_step:
+ min_action_bounds, max_action_bounds = self.mp.get_param_bounds()
+ min_action_bounds = np.concatenate((min_action_bounds.numpy(), [self.env.action_space.low[-1]]))
+ max_action_bounds = np.concatenate((max_action_bounds.numpy(), [self.env.action_space.high[-1]]))
+ self.mp_action_space = gym.spaces.Box(low=min_action_bounds, high=max_action_bounds, dtype=np.float32)
+ return self.mp_action_space
+ else:
+ return super(NewMPWrapper, self).set_action_space()
diff --git a/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py b/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py
index 5475366..02dc1d8 100644
--- a/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py
@@ -1,9 +1,9 @@
-from mp_wrapper import BaseMPWrapper
+from alr_envs.mp.episodic_wrapper import EpisodicWrapper
from typing import Union, Tuple
import numpy as np
-class MPWrapper(BaseMPWrapper):
+class MPWrapper(EpisodicWrapper):
@property
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
diff --git a/alr_envs/examples/pd_control_gain_tuning.py b/alr_envs/examples/pd_control_gain_tuning.py
index 90aac11..465042c 100644
--- a/alr_envs/examples/pd_control_gain_tuning.py
+++ b/alr_envs/examples/pd_control_gain_tuning.py
@@ -39,7 +39,7 @@ actual_vel = np.zeros((len(pos), *base_shape))
act = np.zeros((len(pos), *base_shape))
for t, pos_vel in enumerate(zip(pos, vel)):
- actions = env.policy.get_action(pos_vel[0], pos_vel[1])
+ actions = env.policy.get_action(pos_vel[0], pos_vel[1],, self.current_vel, self.current_pos
actions = np.clip(actions, env.full_action_space.low, env.full_action_space.high)
_, _, _, _ = env.env.step(actions)
act[t, :] = actions
diff --git a/alr_envs/mp/__init__.py b/alr_envs/mp/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/alr_envs/mp/basis_generator_factory.py b/alr_envs/mp/basis_generator_factory.py
new file mode 100644
index 0000000..610ad20
--- /dev/null
+++ b/alr_envs/mp/basis_generator_factory.py
@@ -0,0 +1,17 @@
+from mp_pytorch import PhaseGenerator, NormalizedRBFBasisGenerator, ZeroStartNormalizedRBFBasisGenerator
+from mp_pytorch.basis_gn.rhytmic_basis import RhythmicBasisGenerator
+
+ALL_TYPES = ["rbf", "zero_rbf", "rhythmic"]
+
+
+def get_basis_generator(basis_generator_type: str, phase_generator: PhaseGenerator, **kwargs):
+ basis_generator_type = basis_generator_type.lower()
+ if basis_generator_type == "rbf":
+ return NormalizedRBFBasisGenerator(phase_generator, **kwargs)
+ elif basis_generator_type == "zero_rbf":
+ return ZeroStartNormalizedRBFBasisGenerator(phase_generator, **kwargs)
+ elif basis_generator_type == "rhythmic":
+ return RhythmicBasisGenerator(phase_generator, **kwargs)
+ else:
+ raise ValueError(f"Specified basis generator type {basis_generator_type} not supported, "
+ f"please choose one of {ALL_TYPES}.")
diff --git a/alr_envs/mp/controllers/__init__.py b/alr_envs/mp/controllers/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/alr_envs/mp/controllers/base_controller.py b/alr_envs/mp/controllers/base_controller.py
new file mode 100644
index 0000000..1ac1522
--- /dev/null
+++ b/alr_envs/mp/controllers/base_controller.py
@@ -0,0 +1,4 @@
+class BaseController:
+
+ def get_action(self, des_pos, des_vel, c_pos, c_vel):
+ raise NotImplementedError
diff --git a/alr_envs/mp/controllers/controller_factory.py b/alr_envs/mp/controllers/controller_factory.py
new file mode 100644
index 0000000..1c1cfec
--- /dev/null
+++ b/alr_envs/mp/controllers/controller_factory.py
@@ -0,0 +1,21 @@
+from alr_envs.mp.controllers.meta_world_controller import MetaWorldController
+from alr_envs.mp.controllers.pd_controller import PDController
+from alr_envs.mp.controllers.vel_controller import VelController
+from alr_envs.mp.controllers.pos_controller import PosController
+
+ALL_TYPES = ["motor", "velocity", "position", "metaworld"]
+
+
+def get_controller(controller_type: str, **kwargs):
+ controller_type = controller_type.lower()
+ if controller_type == "motor":
+ return PDController(**kwargs)
+ elif controller_type == "velocity":
+ return VelController()
+ elif controller_type == "position":
+ return PosController()
+ elif controller_type == "metaworld":
+ return MetaWorldController()
+ else:
+ raise ValueError(f"Specified controller type {controller_type} not supported, "
+ f"please choose one of {ALL_TYPES}.")
\ No newline at end of file
diff --git a/alr_envs/mp/controllers/meta_world_controller.py b/alr_envs/mp/controllers/meta_world_controller.py
new file mode 100644
index 0000000..07988e5
--- /dev/null
+++ b/alr_envs/mp/controllers/meta_world_controller.py
@@ -0,0 +1,25 @@
+import numpy as np
+
+from alr_envs.mp.controllers.base_controller import BaseController
+
+
+class MetaWorldController(BaseController):
+ """
+ A Metaworld Controller. Using position and velocity information from a provided environment,
+ the controller calculates a response based on the desired position and velocity.
+ Unlike the other Controllers, this is a special controller for MetaWorld environments.
+ They use a position delta for the xyz coordinates and a raw position for the gripper opening.
+
+ :param env: A position environment
+ """
+
+ def get_action(self, des_pos, des_vel, c_pos, c_vel):
+ gripper_pos = des_pos[-1]
+
+ cur_pos = env.current_pos[:-1]
+ xyz_pos = des_pos[:-1]
+
+ assert xyz_pos.shape == cur_pos.shape, \
+ f"Mismatch in dimension between desired position {xyz_pos.shape} and current position {cur_pos.shape}"
+ trq = np.hstack([(xyz_pos - cur_pos), gripper_pos])
+ return trq
diff --git a/alr_envs/mp/controllers/pd_controller.py b/alr_envs/mp/controllers/pd_controller.py
new file mode 100644
index 0000000..d22f6b4
--- /dev/null
+++ b/alr_envs/mp/controllers/pd_controller.py
@@ -0,0 +1,28 @@
+from typing import Union, Tuple
+
+from alr_envs.mp.controllers.base_controller import BaseController
+
+
+class PDController(BaseController):
+ """
+ A PD-Controller. Using position and velocity information from a provided environment,
+ the controller calculates a response based on the desired position and velocity
+
+ :param env: A position environment
+ :param p_gains: Factors for the proportional gains
+ :param d_gains: Factors for the differential gains
+ """
+
+ def __init__(self,
+ p_gains: Union[float, Tuple] = 1,
+ d_gains: Union[float, Tuple] = 0.5):
+ self.p_gains = p_gains
+ self.d_gains = d_gains
+
+ def get_action(self, des_pos, des_vel, c_pos, c_vel):
+ assert des_pos.shape == c_pos.shape, \
+ f"Mismatch in dimension between desired position {des_pos.shape} and current position {c_pos.shape}"
+ assert des_vel.shape == c_vel.shape, \
+ f"Mismatch in dimension between desired velocity {des_vel.shape} and current velocity {c_vel.shape}"
+ trq = self.p_gains * (des_pos - c_pos) + self.d_gains * (des_vel - c_vel)
+ return trq
diff --git a/alr_envs/mp/controllers/pos_controller.py b/alr_envs/mp/controllers/pos_controller.py
new file mode 100644
index 0000000..bec3c68
--- /dev/null
+++ b/alr_envs/mp/controllers/pos_controller.py
@@ -0,0 +1,9 @@
+from alr_envs.mp.controllers.base_controller import BaseController
+
+
+class PosController(BaseController):
+ """
+ A Position Controller. The controller calculates a response only based on the desired position.
+ """
+ def get_action(self, des_pos, des_vel, c_pos, c_vel):
+ return des_pos
diff --git a/alr_envs/mp/controllers/vel_controller.py b/alr_envs/mp/controllers/vel_controller.py
new file mode 100644
index 0000000..38128be
--- /dev/null
+++ b/alr_envs/mp/controllers/vel_controller.py
@@ -0,0 +1,9 @@
+from alr_envs.mp.controllers.base_controller import BaseController
+
+
+class VelController(BaseController):
+ """
+ A Velocity Controller. The controller calculates a response only based on the desired velocity.
+ """
+ def get_action(self, des_pos, des_vel, c_pos, c_vel):
+ return des_vel
diff --git a/mp_wrapper.py b/alr_envs/mp/episodic_wrapper.py
similarity index 57%
rename from mp_wrapper.py
rename to alr_envs/mp/episodic_wrapper.py
index 3defe07..ef0723e 100644
--- a/mp_wrapper.py
+++ b/alr_envs/mp/episodic_wrapper.py
@@ -3,15 +3,13 @@ from typing import Union, Tuple
import gym
import numpy as np
-
from gym import spaces
-from gym.envs.mujoco import MujocoEnv
-
-from policies import get_policy_class, BaseController
from mp_pytorch.mp.mp_interfaces import MPInterface
+from alr_envs.mp.controllers.base_controller import BaseController
-class BaseMPWrapper(gym.Env, ABC):
+
+class EpisodicWrapper(gym.Env, ABC):
"""
Base class for movement primitive based gym.Wrapper implementations.
@@ -20,41 +18,34 @@ class BaseMPWrapper(gym.Env, ABC):
num_dof: Dimension of the action space of the wrapped env
num_basis: Number of basis functions per dof
duration: Length of the trajectory of the movement primitive in seconds
- post_traj_time: Time for which the last position of the trajectory is fed to the environment to continue
- simulation in seconds
- policy_type: Type or object defining the policy that is used to generate action based on the trajectory
+ controller: Type or object defining the policy that is used to generate action based on the trajectory
weight_scale: Scaling parameter for the actions given to this wrapper
render_mode: Equivalent to gym render mode
"""
-
def __init__(self,
- env: MujocoEnv,
+ env: gym.Env,
mp: MPInterface,
+ controller: BaseController,
duration: float,
- policy_type: Union[str, BaseController] = None,
render_mode: str = None,
- verbose=2,
- **mp_kwargs
- ):
+ verbose: int = 1,
+ weight_scale: float = 1):
super().__init__()
- assert env.dt is not None
self.env = env
- self.dt = env.dt
+ try:
+ self.dt = env.dt
+ except AttributeError:
+ raise AttributeError("step based environment needs to have a function 'dt' ")
self.duration = duration
self.traj_steps = int(duration / self.dt)
self.post_traj_steps = self.env.spec.max_episode_steps - self.traj_steps
- # TODO: move to constructor, use policy factory instead what Fabian already coded
- if isinstance(policy_type, str):
- # pop policy kwargs here such that they are not passed to the initialize_mp method
- self.policy = get_policy_class(policy_type, self, **mp_kwargs.pop('policy_kwargs', {}))
- else:
- self.policy = policy_type
-
+ self.controller = controller
self.mp = mp
self.env = env
self.verbose = verbose
+ self.weight_scale = weight_scale
# rendering
self.render_mode = render_mode
@@ -62,19 +53,24 @@ class BaseMPWrapper(gym.Env, ABC):
# self.time_steps = np.linspace(0, self.duration, self.traj_steps + 1)
self.time_steps = np.linspace(0, self.duration, self.traj_steps)
self.mp.set_mp_times(self.time_steps)
-
# action_bounds = np.inf * np.ones((np.prod(self.mp.num_params)))
min_action_bounds, max_action_bounds = mp.get_param_bounds()
- self.action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
+ self.mp_action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
dtype=np.float32)
+ self.action_space = self.set_action_space()
self.active_obs = self.set_active_obs()
self.observation_space = spaces.Box(low=self.env.observation_space.low[self.active_obs],
high=self.env.observation_space.high[self.active_obs],
dtype=self.env.observation_space.dtype)
def get_trajectory(self, action: np.ndarray) -> Tuple:
- self.mp.set_params(action)
+ # TODO: this follows the implementation of the mp_pytorch library which includes the paramters tau and delay at
+ # the beginning of the array.
+ ignore_indices = int(self.mp.learn_tau) + int(self.mp.learn_delay)
+ scaled_mp_params = action.copy()
+ scaled_mp_params[ignore_indices:] *= self.weight_scale
+ self.mp.set_params(scaled_mp_params)
self.mp.set_boundary_conditions(bc_time=self.time_steps[:1], bc_pos=self.current_pos, bc_vel=self.current_vel)
traj_dict = self.mp.get_mp_trajs(get_pos = True, get_vel = True)
trajectory_tensor, velocity_tensor = traj_dict['pos'], traj_dict['vel']
@@ -88,9 +84,54 @@ class BaseMPWrapper(gym.Env, ABC):
return trajectory, velocity
+ def set_action_space(self):
+ """
+ This function can be used to modify the action space for considering actions which are not learned via motion
+ primitives. E.g. ball releasing time for the beer pong task. By default, it is the parameter space of the
+ motion primitive.
+ Only needs to be overwritten if the action space needs to be modified.
+ """
+ return self.mp_action_space
+
+ def _episode_callback(self, action: np.ndarray) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
+ """
+ Used to extract the parameters for the motion primitive and other parameters from an action array which might
+ include other actions like ball releasing time for the beer pong environment.
+ This only needs to be overwritten if the action space is modified.
+ Args:
+ action: a vector instance of the whole action space, includes mp parameters and additional parameters if
+ specified, else only mp parameters
+
+ Returns:
+ Tuple: mp_arguments and other arguments
+ """
+ return action, None
+
+ def _step_callback(self, t: int, env_spec_params: Union[np.ndarray, None], step_action: np.ndarray) -> Union[np.ndarray]:
+ """
+ This function can be used to modify the step_action with additional parameters e.g. releasing the ball in the
+ Beerpong env. The parameters used should not be part of the motion primitive parameters.
+ Returns step_action by default, can be overwritten in individual mp_wrappers.
+ Args:
+ t: the current time step of the episode
+ env_spec_params: the environment specific parameter, as defined in fucntion _episode_callback
+ (e.g. ball release time in Beer Pong)
+ step_action: the current step-based action
+
+ Returns:
+ modified step action
+ """
+ return step_action
+
@abstractmethod
- def set_active_obs(self):
- pass
+ def set_active_obs(self) -> np.ndarray:
+ """
+ This function defines the contexts. The contexts are defined as specific observations.
+ Returns:
+ boolearn array representing the indices of the observations
+
+ """
+ return np.ones(self.env.observation_space.shape[0], dtype=bool)
@property
@abstractmethod
@@ -116,38 +157,34 @@ class BaseMPWrapper(gym.Env, ABC):
"""
raise NotImplementedError()
- def _step_callback(self, t, action):
- pass
-
def step(self, action: np.ndarray):
""" This function generates a trajectory based on a MP and then does the usual loop over reset and step"""
# TODO: Think about sequencing
# TODO: Reward Function rather here?
# agent to learn when to release the ball
- trajectory, velocity = self.get_trajectory(action)
+ mp_params, env_spec_params = self._episode_callback(action)
+ trajectory, velocity = self.get_trajectory(mp_params)
trajectory_length = len(trajectory)
- actions = np.zeros(shape=(trajectory_length,) + self.env.action_space.shape)
- if isinstance(self.env.observation_space, spaces.Dict): # For goal environments
- observations = np.zeros(shape=(trajectory_length,) + self.env.observation_space["observation"].shape,
- dtype=self.env.observation_space.dtype)
- else:
+ if self.verbose >=2 :
+ actions = np.zeros(shape=(trajectory_length,) + self.env.action_space.shape)
observations = np.zeros(shape=(trajectory_length,) + self.env.observation_space.shape,
- dtype=self.env.observation_space.dtype)
- rewards = np.zeros(shape=(trajectory_length,))
+ dtype=self.env.observation_space.dtype)
+ rewards = np.zeros(shape=(trajectory_length,))
trajectory_return = 0
infos = dict()
for t, pos_vel in enumerate(zip(trajectory, velocity)):
- ac = self.policy.get_action(pos_vel[0], pos_vel[1])
- callback_action = self._step_callback(t, action)
- if callback_action is not None:
- ac = np.concatenate((callback_action, ac)) # include callback action at first pos of vector
- actions[t, :] = np.clip(ac, self.env.action_space.low, self.env.action_space.high)
- obs, rewards[t], done, info = self.env.step(actions[t, :])
- observations[t, :] = obs["observation"] if isinstance(self.env.observation_space, spaces.Dict) else obs
- trajectory_return += rewards[t]
+ step_action = self.controller.get_action(pos_vel[0], pos_vel[1], self.current_pos, self.current_vel)
+ step_action = self._step_callback(t, env_spec_params, step_action) # include possible callback info
+ c_action = np.clip(step_action, self.env.action_space.low, self.env.action_space.high)
+ obs, c_reward, done, info = self.env.step(c_action)
+ if self.verbose >= 2:
+ actions[t, :] = c_action
+ rewards[t] = c_reward
+ observations[t, :] = obs
+ trajectory_return += c_reward
for k, v in info.items():
elems = infos.get(k, [None] * trajectory_length)
elems[t] = v
@@ -158,14 +195,14 @@ class BaseMPWrapper(gym.Env, ABC):
if done:
break
infos.update({k: v[:t + 1] for k, v in infos.items()})
- infos['trajectory'] = trajectory
- if self.verbose == 2:
+ if self.verbose >= 2:
+ infos['trajectory'] = trajectory
infos['step_actions'] = actions[:t + 1]
infos['step_observations'] = observations[:t + 1]
infos['step_rewards'] = rewards[:t + 1]
infos['trajectory_length'] = t + 1
done = True
- return self.get_observation_from_step(observations[t]), trajectory_return, done, infos
+ return self.get_observation_from_step(obs), trajectory_return, done, infos
def reset(self):
return self.get_observation_from_step(self.env.reset())
diff --git a/alr_envs/mp/mp_factory.py b/alr_envs/mp/mp_factory.py
new file mode 100644
index 0000000..5cf7231
--- /dev/null
+++ b/alr_envs/mp/mp_factory.py
@@ -0,0 +1,22 @@
+from mp_pytorch.mp.dmp import DMP
+from mp_pytorch.mp.promp import ProMP
+from mp_pytorch.mp.idmp import IDMP
+
+from mp_pytorch.basis_gn.basis_generator import BasisGenerator
+
+ALL_TYPES = ["promp", "dmp", "idmp"]
+
+
+def get_movement_primitive(
+ movement_primitives_type: str, action_dim: int, basis_generator: BasisGenerator, **kwargs
+ ):
+ movement_primitives_type = movement_primitives_type.lower()
+ if movement_primitives_type == "promp":
+ return ProMP(basis_generator, action_dim, **kwargs)
+ elif movement_primitives_type == "dmp":
+ return DMP(basis_generator, action_dim, **kwargs)
+ elif movement_primitives_type == 'idmp':
+ return IDMP(basis_generator, action_dim, **kwargs)
+ else:
+ raise ValueError(f"Specified movement primitive type {movement_primitives_type} not supported, "
+ f"please choose one of {ALL_TYPES}.")
\ No newline at end of file
diff --git a/alr_envs/mp/phase_generator_factory.py b/alr_envs/mp/phase_generator_factory.py
new file mode 100644
index 0000000..45cfdc1
--- /dev/null
+++ b/alr_envs/mp/phase_generator_factory.py
@@ -0,0 +1,20 @@
+from mp_pytorch import LinearPhaseGenerator, ExpDecayPhaseGenerator
+from mp_pytorch.phase_gn.rhythmic_phase_generator import RhythmicPhaseGenerator
+from mp_pytorch.phase_gn.smooth_phase_generator import SmoothPhaseGenerator
+
+ALL_TYPES = ["linear", "exp", "rhythmic", "smooth"]
+
+
+def get_phase_generator(phase_generator_type, **kwargs):
+ phase_generator_type = phase_generator_type.lower()
+ if phase_generator_type == "linear":
+ return LinearPhaseGenerator(**kwargs)
+ elif phase_generator_type == "exp":
+ return ExpDecayPhaseGenerator(**kwargs)
+ elif phase_generator_type == "rhythmic":
+ return RhythmicPhaseGenerator(**kwargs)
+ elif phase_generator_type == "smooth":
+ return SmoothPhaseGenerator(**kwargs)
+ else:
+ raise ValueError(f"Specified phase generator type {phase_generator_type} not supported, "
+ f"please choose one of {ALL_TYPES}.")
\ No newline at end of file
diff --git a/alr_envs/utils/make_env_helpers.py b/alr_envs/utils/make_env_helpers.py
index 4300439..0c06906 100644
--- a/alr_envs/utils/make_env_helpers.py
+++ b/alr_envs/utils/make_env_helpers.py
@@ -1,13 +1,20 @@
import warnings
-from typing import Iterable, Type, Union
+from typing import Iterable, Type, Union, Mapping, MutableMapping
import gym
import numpy as np
from gym.envs.registration import EnvSpec
-from mp_env_api import MPEnvWrapper
from mp_env_api.mp_wrappers.dmp_wrapper import DmpWrapper
from mp_env_api.mp_wrappers.promp_wrapper import ProMPWrapper
+from mp_pytorch import MPInterface
+
+from alr_envs.mp.basis_generator_factory import get_basis_generator
+from alr_envs.mp.controllers.base_controller import BaseController
+from alr_envs.mp.controllers.controller_factory import get_controller
+from alr_envs.mp.mp_factory import get_movement_primitive
+from alr_envs.mp.episodic_wrapper import EpisodicWrapper
+from alr_envs.mp.phase_generator_factory import get_phase_generator
def make_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwargs):
@@ -92,7 +99,8 @@ def make(env_id: str, seed, **kwargs):
return env
-def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], seed=1, **kwargs):
+def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], mp: MPInterface, controller: BaseController,
+ ep_wrapper_kwargs: Mapping, seed=1, **kwargs):
"""
Helper function for creating a wrapped gym environment using MPs.
It adds all provided wrappers to the specified environment and verifies at least one MPEnvWrapper is
@@ -108,14 +116,102 @@ def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], seed=1
"""
# _env = gym.make(env_id)
_env = make(env_id, seed, **kwargs)
-
- assert any(issubclass(w, MPEnvWrapper) for w in wrappers), \
- "At least one MPEnvWrapper is required in order to leverage motion primitive environments."
+ has_episodic_wrapper = False
for w in wrappers:
- _env = w(_env)
-
+ # only wrap the environment if not EpisodicWrapper, e.g. for vision
+ if not issubclass(w, EpisodicWrapper):
+ _env = w(_env)
+ else: # if EpisodicWrapper, use specific constructor
+ has_episodic_wrapper = True
+ _env = w(env=_env, mp=mp, controller=controller, **ep_wrapper_kwargs)
+ assert has_episodic_wrapper, \
+ "At least one MPEnvWrapper is required in order to leverage motion primitive environments."
return _env
+def make_mp_from_kwargs(
+ env_id: str, wrappers: Iterable, ep_wrapper_kwargs: MutableMapping, mp_kwargs: MutableMapping,
+ controller_kwargs: MutableMapping, phase_kwargs: MutableMapping, basis_kwargs: MutableMapping, seed=1,
+ sequenced=False, **kwargs
+ ):
+ """
+ This can also be used standalone for manually building a custom DMP environment.
+ Args:
+ ep_wrapper_kwargs:
+ basis_kwargs:
+ phase_kwargs:
+ controller_kwargs:
+ env_id: base_env_name,
+ wrappers: list of wrappers (at least an EpisodicWrapper),
+ seed: seed of environment
+ sequenced: When true, this allows to sequence multiple ProMPs by specifying the duration of each sub-trajectory,
+ this behavior is much closer to step based learning.
+ mp_kwargs: dict of at least {num_dof: int, num_basis: int} for DMP
+
+ Returns: DMP wrapped gym env
+
+ """
+ _verify_time_limit(mp_kwargs.get("duration", None), kwargs.get("time_limit", None))
+ dummy_env = make(env_id, seed)
+ if ep_wrapper_kwargs.get('duration', None) is None:
+ ep_wrapper_kwargs['duration'] = dummy_env.spec.max_episode_steps*dummy_env.dt
+ if phase_kwargs.get('tau', None) is None:
+ phase_kwargs['tau'] = ep_wrapper_kwargs['duration']
+ action_dim = mp_kwargs.pop('num_dof', None)
+ action_dim = action_dim if action_dim is not None else np.prod(dummy_env.action_space.shape).item()
+ phase_gen = get_phase_generator(**phase_kwargs)
+ basis_gen = get_basis_generator(phase_generator=phase_gen, **basis_kwargs)
+ controller = get_controller(**controller_kwargs)
+ mp = get_movement_primitive(action_dim=action_dim, basis_generator=basis_gen, **mp_kwargs)
+ _env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, mp=mp, controller=controller,
+ ep_wrapper_kwargs=ep_wrapper_kwargs, seed=seed, **kwargs)
+ return _env
+
+
+def make_mp_env_helper(**kwargs):
+ """
+ Helper function for registering a DMP gym environments.
+ Args:
+ **kwargs: expects at least the following:
+ {
+ "name": base environment name.
+ "wrappers": list of wrappers (at least an EpisodicWrapper is required),
+ "movement_primitives_kwargs": {
+ "movement_primitives_type": type_of_your_movement_primitive,
+ non default arguments for the movement primitive instance
+ ...
+ }
+ "controller_kwargs": {
+ "controller_type": type_of_your_controller,
+ non default arguments for the controller instance
+ ...
+ },
+ "basis_generator_kwargs": {
+ "basis_generator_type": type_of_your_basis_generator,
+ non default arguments for the basis generator instance
+ ...
+ },
+ "phase_generator_kwargs": {
+ "phase_generator_type": type_of_your_phase_generator,
+ non default arguments for the phase generator instance
+ ...
+ },
+ }
+
+ Returns: MP wrapped gym env
+
+ """
+ seed = kwargs.pop("seed", None)
+ wrappers = kwargs.pop("wrappers")
+
+ mp_kwargs = kwargs.pop("movement_primitives_kwargs")
+ ep_wrapper_kwargs = kwargs.pop('ep_wrapper_kwargs')
+ contr_kwargs = kwargs.pop("controller_kwargs")
+ phase_kwargs = kwargs.pop("phase_generator_kwargs")
+ basis_kwargs = kwargs.pop("basis_generator_kwargs")
+
+ return make_mp_from_kwargs(env_id=kwargs.pop("name"), wrappers=wrappers, ep_wrapper_kwargs=ep_wrapper_kwargs,
+ mp_kwargs=mp_kwargs, controller_kwargs=contr_kwargs, phase_kwargs=phase_kwargs,
+ basis_kwargs=basis_kwargs, **kwargs, seed=seed)
def make_dmp_env(env_id: str, wrappers: Iterable, seed=1, mp_kwargs={}, **kwargs):
"""
diff --git a/policies.py b/policies.py
deleted file mode 100644
index fd89f9e..0000000
--- a/policies.py
+++ /dev/null
@@ -1,129 +0,0 @@
-from typing import Tuple, Union
-
-import numpy as np
-
-
-
-class BaseController:
- def __init__(self, env, **kwargs):
- self.env = env
-
- def get_action(self, des_pos, des_vel):
- raise NotImplementedError
-
-
-class PosController(BaseController):
- """
- A Position Controller. The controller calculates a response only based on the desired position.
- """
- def get_action(self, des_pos, des_vel):
- return des_pos
-
-
-class VelController(BaseController):
- """
- A Velocity Controller. The controller calculates a response only based on the desired velocity.
- """
- def get_action(self, des_pos, des_vel):
- return des_vel
-
-
-class PDController(BaseController):
- """
- A PD-Controller. Using position and velocity information from a provided environment,
- the controller calculates a response based on the desired position and velocity
-
- :param env: A position environment
- :param p_gains: Factors for the proportional gains
- :param d_gains: Factors for the differential gains
- """
-
- def __init__(self,
- env,
- p_gains: Union[float, Tuple],
- d_gains: Union[float, Tuple]):
- self.p_gains = p_gains
- self.d_gains = d_gains
- super(PDController, self).__init__(env)
-
- def get_action(self, des_pos, des_vel):
- cur_pos = self.env.current_pos
- cur_vel = self.env.current_vel
- assert des_pos.shape == cur_pos.shape, \
- f"Mismatch in dimension between desired position {des_pos.shape} and current position {cur_pos.shape}"
- assert des_vel.shape == cur_vel.shape, \
- f"Mismatch in dimension between desired velocity {des_vel.shape} and current velocity {cur_vel.shape}"
- trq = self.p_gains * (des_pos - cur_pos) + self.d_gains * (des_vel - cur_vel)
- return trq
-
-
-class MetaWorldController(BaseController):
- """
- A Metaworld Controller. Using position and velocity information from a provided environment,
- the controller calculates a response based on the desired position and velocity.
- Unlike the other Controllers, this is a special controller for MetaWorld environments.
- They use a position delta for the xyz coordinates and a raw position for the gripper opening.
-
- :param env: A position environment
- """
-
- def __init__(self,
- env
- ):
- super(MetaWorldController, self).__init__(env)
-
- def get_action(self, des_pos, des_vel):
- gripper_pos = des_pos[-1]
-
- cur_pos = self.env.current_pos[:-1]
- xyz_pos = des_pos[:-1]
-
- assert xyz_pos.shape == cur_pos.shape, \
- f"Mismatch in dimension between desired position {xyz_pos.shape} and current position {cur_pos.shape}"
- trq = np.hstack([(xyz_pos - cur_pos), gripper_pos])
- return trq
-
-
-#TODO: Do we need this class?
-class PDControllerExtend(BaseController):
- """
- A PD-Controller. Using position and velocity information from a provided positional environment,
- the controller calculates a response based on the desired position and velocity
-
- :param env: A position environment
- :param p_gains: Factors for the proportional gains
- :param d_gains: Factors for the differential gains
- """
-
- def __init__(self,
- env,
- p_gains: Union[float, Tuple],
- d_gains: Union[float, Tuple]):
-
- self.p_gains = p_gains
- self.d_gains = d_gains
- super(PDControllerExtend, self).__init__(env)
-
- def get_action(self, des_pos, des_vel):
- cur_pos = self.env.current_pos
- cur_vel = self.env.current_vel
- if len(des_pos) != len(cur_pos):
- des_pos = self.env.extend_des_pos(des_pos)
- if len(des_vel) != len(cur_vel):
- des_vel = self.env.extend_des_vel(des_vel)
- trq = self.p_gains * (des_pos - cur_pos) + self.d_gains * (des_vel - cur_vel)
- return trq
-
-
-def get_policy_class(policy_type, env, **kwargs):
- if policy_type == "motor":
- return PDController(env, **kwargs)
- elif policy_type == "velocity":
- return VelController(env)
- elif policy_type == "position":
- return PosController(env)
- elif policy_type == "metaworld":
- return MetaWorldController(env)
- else:
- raise ValueError(f"Invalid controller type {policy_type} provided. Only 'motor', 'velocity', 'position' "
- f"and 'metaworld are currently supported controllers.")
From 640f3b2d909656325a213e1a24592bbedf3c922c Mon Sep 17 00:00:00 2001
From: Onur
Date: Tue, 3 May 2022 21:34:39 +0200
Subject: [PATCH 024/104] fix action space bugs for bp
---
alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py | 4 ++--
alr_envs/mp/episodic_wrapper.py | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py b/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
index 9cd6374..267e76e 100644
--- a/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
@@ -38,7 +38,7 @@ class NewMPWrapper(EpisodicWrapper):
min_action_bounds, max_action_bounds = self.mp.get_param_bounds()
min_action_bounds = np.concatenate((min_action_bounds.numpy(), [self.env.action_space.low[-1]]))
max_action_bounds = np.concatenate((max_action_bounds.numpy(), [self.env.action_space.high[-1]]))
- self.mp_action_space = gym.spaces.Box(low=min_action_bounds, high=max_action_bounds, dtype=np.float32)
- return self.mp_action_space
+ self.action_space = gym.spaces.Box(low=min_action_bounds, high=max_action_bounds, dtype=np.float32)
+ return self.action_space
else:
return super(NewMPWrapper, self).set_action_space()
diff --git a/alr_envs/mp/episodic_wrapper.py b/alr_envs/mp/episodic_wrapper.py
index ef0723e..092f1bc 100644
--- a/alr_envs/mp/episodic_wrapper.py
+++ b/alr_envs/mp/episodic_wrapper.py
@@ -70,7 +70,7 @@ class EpisodicWrapper(gym.Env, ABC):
ignore_indices = int(self.mp.learn_tau) + int(self.mp.learn_delay)
scaled_mp_params = action.copy()
scaled_mp_params[ignore_indices:] *= self.weight_scale
- self.mp.set_params(scaled_mp_params)
+ self.mp.set_params(np.clip(scaled_mp_params, self.mp_action_space.low, self.mp_action_space.high))
self.mp.set_boundary_conditions(bc_time=self.time_steps[:1], bc_pos=self.current_pos, bc_vel=self.current_vel)
traj_dict = self.mp.get_mp_trajs(get_pos = True, get_vel = True)
trajectory_tensor, velocity_tensor = traj_dict['pos'], traj_dict['vel']
From 1881c14a48b74b142f939a7a9edd1c47832614ac Mon Sep 17 00:00:00 2001
From: Fabian
Date: Thu, 5 May 2022 16:48:59 +0200
Subject: [PATCH 025/104] reacher adjustments
---
alr_envs/alr/__init__.py | 5 +++--
alr_envs/alr/mujoco/reacher/alr_reacher.py | 8 ++++++--
2 files changed, 9 insertions(+), 4 deletions(-)
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 8a7140d..2e23d44 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -97,6 +97,7 @@ register(
"hole_depth": 1,
"hole_x": None,
"collision_penalty": 100,
+ "rew_fct": "unbounded"
}
)
@@ -354,7 +355,7 @@ for _v in _versions:
"wrappers": [classic_control.hole_reacher.MPWrapper],
"mp_kwargs": {
"num_dof": 5,
- "num_basis": 5,
+ "num_basis": 3,
"duration": 2,
"policy_type": "velocity",
"weights_scale": 5,
@@ -402,7 +403,7 @@ for _v in _versions:
"wrappers": [mujoco.reacher.MPWrapper],
"mp_kwargs": {
"num_dof": 5 if "long" not in _v.lower() else 7,
- "num_basis": 1,
+ "num_basis": 2,
"duration": 4,
"policy_type": "motor",
"weights_scale": 5,
diff --git a/alr_envs/alr/mujoco/reacher/alr_reacher.py b/alr_envs/alr/mujoco/reacher/alr_reacher.py
index c2b5f18..b436fdd 100644
--- a/alr_envs/alr/mujoco/reacher/alr_reacher.py
+++ b/alr_envs/alr/mujoco/reacher/alr_reacher.py
@@ -39,14 +39,18 @@ class ALRReacherEnv(MujocoEnv, utils.EzPickle):
reward_dist = 0.0
angular_vel = 0.0
reward_balance = 0.0
+ is_delayed = self.steps_before_reward > 0
+ reward_ctrl = - np.square(a).sum()
if self._steps >= self.steps_before_reward:
vec = self.get_body_com("fingertip") - self.get_body_com("target")
reward_dist -= self.reward_weight * np.linalg.norm(vec)
- if self.steps_before_reward > 0:
+ if is_delayed:
# avoid giving this penalty for normal step based case
# angular_vel -= 10 * np.linalg.norm(self.sim.data.qvel.flat[:self.n_links])
angular_vel -= 10 * np.square(self.sim.data.qvel.flat[:self.n_links]).sum()
- reward_ctrl = - 10 * np.square(a).sum()
+ if is_delayed:
+ # Higher control penalty for sparse reward per timestep
+ reward_ctrl *= 10
if self.balance:
reward_balance -= self.balance_weight * np.abs(
From ad30e732c84f4fb29263a68443f6ba74a6b00730 Mon Sep 17 00:00:00 2001
From: Fabian
Date: Thu, 5 May 2022 16:53:56 +0200
Subject: [PATCH 026/104] reacher adjustments
---
alr_envs/alr/mujoco/reacher/alr_reacher.py | 82 ++++++++++++----------
1 file changed, 46 insertions(+), 36 deletions(-)
diff --git a/alr_envs/alr/mujoco/reacher/alr_reacher.py b/alr_envs/alr/mujoco/reacher/alr_reacher.py
index 4477a66..b436fdd 100644
--- a/alr_envs/alr/mujoco/reacher/alr_reacher.py
+++ b/alr_envs/alr/mujoco/reacher/alr_reacher.py
@@ -39,11 +39,18 @@ class ALRReacherEnv(MujocoEnv, utils.EzPickle):
reward_dist = 0.0
angular_vel = 0.0
reward_balance = 0.0
+ is_delayed = self.steps_before_reward > 0
+ reward_ctrl = - np.square(a).sum()
if self._steps >= self.steps_before_reward:
vec = self.get_body_com("fingertip") - self.get_body_com("target")
reward_dist -= self.reward_weight * np.linalg.norm(vec)
- angular_vel -= np.linalg.norm(self.sim.data.qvel.flat[:self.n_links])
- reward_ctrl = - np.square(a).sum()
+ if is_delayed:
+ # avoid giving this penalty for normal step based case
+ # angular_vel -= 10 * np.linalg.norm(self.sim.data.qvel.flat[:self.n_links])
+ angular_vel -= 10 * np.square(self.sim.data.qvel.flat[:self.n_links]).sum()
+ if is_delayed:
+ # Higher control penalty for sparse reward per timestep
+ reward_ctrl *= 10
if self.balance:
reward_balance -= self.balance_weight * np.abs(
@@ -56,63 +63,66 @@ class ALRReacherEnv(MujocoEnv, utils.EzPickle):
return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl,
velocity=angular_vel, reward_balance=reward_balance,
end_effector=self.get_body_com("fingertip").copy(),
- goal=self.goal if hasattr(self, "goal") else None,
- joint_pos = self.sim.data.qpos.flat[:self.n_links].copy(),
- joint_vel = self.sim.data.qvel.flat[:self.n_links].copy())
+ goal=self.goal if hasattr(self, "goal") else None)
def viewer_setup(self):
self.viewer.cam.trackbodyid = 0
+ # def reset_model(self):
+ # qpos = self.init_qpos
+ # if not hasattr(self, "goal"):
+ # self.goal = np.array([-0.25, 0.25])
+ # # self.goal = self.init_qpos.copy()[:2] + 0.05
+ # qpos[-2:] = self.goal
+ # qvel = self.init_qvel
+ # qvel[-2:] = 0
+ # self.set_state(qpos, qvel)
+ # self._steps = 0
+ #
+ # return self._get_obs()
+
def reset_model(self):
- # qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
- qpos = self.init_qpos
+ qpos = self.init_qpos.copy()
while True:
self.goal = self.np_random.uniform(low=-self.n_links / 10, high=self.n_links / 10, size=2)
+ # self.goal = self.np_random.uniform(low=0, high=self.n_links / 10, size=2)
+ # self.goal = np.random.uniform(low=[-self.n_links / 10, 0], high=[0, self.n_links / 10], size=2)
if np.linalg.norm(self.goal) < self.n_links / 10:
break
qpos[-2:] = self.goal
- qvel = self.init_qvel# + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
+ qvel = self.init_qvel.copy()
qvel[-2:] = 0
self.set_state(qpos, qvel)
self._steps = 0
return self._get_obs()
+ # def reset_model(self):
+ # qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
+ # while True:
+ # self.goal = self.np_random.uniform(low=-self.n_links / 10, high=self.n_links / 10, size=2)
+ # if np.linalg.norm(self.goal) < self.n_links / 10:
+ # break
+ # qpos[-2:] = self.goal
+ # qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
+ # qvel[-2:] = 0
+ # self.set_state(qpos, qvel)
+ # self._steps = 0
+ #
+ # return self._get_obs()
+
def _get_obs(self):
theta = self.sim.data.qpos.flat[:self.n_links]
+ target = self.get_body_com("target")
return np.concatenate([
np.cos(theta),
np.sin(theta),
- self.sim.data.qpos.flat[self.n_links:], # this is goal position
- self.sim.data.qvel.flat[:self.n_links], # this is angular velocity
- self.get_body_com("fingertip") - self.get_body_com("target"),
- # self.get_body_com("target"), # only return target to make problem harder
+ target[:2], # x-y of goal position
+ self.sim.data.qvel.flat[:self.n_links], # angular velocity
+ self.get_body_com("fingertip") - target, # goal distance
[self._steps],
])
-class ALRReacherOptCtrlEnv(ALRReacherEnv):
- def __init__(self, steps_before_reward=200, n_links=5, balance=False):
- self.goal = np.array([0.1, 0.1])
- super(ALRReacherOptCtrlEnv, self).__init__(steps_before_reward, n_links, balance)
-
- def _get_obs(self):
- theta = self.sim.data.qpos.flat[:self.n_links]
- tip_pos = self.get_body_com("fingertip")
- return np.concatenate([
- tip_pos[:2],
- theta,
- self.sim.data.qvel.flat[:self.n_links], # this is angular velocity
- ])
-
- def reset_model(self):
- qpos = self.init_qpos
- qpos[-2:] = self.goal
- qvel = self.init_qvel
- qvel[-2:] = 0
- self.set_state(qpos, qvel)
- self._steps = 0
-
- return self._get_obs()
if __name__ == '__main__':
nl = 5
@@ -130,4 +140,4 @@ if __name__ == '__main__':
if d:
env.reset()
- env.close()
\ No newline at end of file
+ env.close()
From bd4632af8431faa19e698f2f572f6ea87ee0c54e Mon Sep 17 00:00:00 2001
From: Fabian
Date: Thu, 5 May 2022 16:54:39 +0200
Subject: [PATCH 027/104] hole_reacher update
---
.../hole_reacher/hole_reacher.py | 3 +
.../hole_reacher/hr_unbounded_reward.py | 60 +++++++++++++++++++
2 files changed, 63 insertions(+)
create mode 100644 alr_envs/alr/classic_control/hole_reacher/hr_unbounded_reward.py
diff --git a/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py b/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py
index 208f005..883be8c 100644
--- a/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py
+++ b/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py
@@ -45,6 +45,9 @@ class HoleReacherEnv(BaseReacherDirectEnv):
elif rew_fct == "vel_acc":
from alr_envs.alr.classic_control.hole_reacher.hr_dist_vel_acc_reward import HolereacherReward
self.reward_function = HolereacherReward(allow_self_collision, allow_wall_collision, collision_penalty)
+ elif rew_fct == "unbounded":
+ from alr_envs.alr.classic_control.hole_reacher.hr_unbounded_reward import HolereacherReward
+ self.reward_function = HolereacherReward(allow_self_collision, allow_wall_collision)
else:
raise ValueError("Unknown reward function {}".format(rew_fct))
diff --git a/alr_envs/alr/classic_control/hole_reacher/hr_unbounded_reward.py b/alr_envs/alr/classic_control/hole_reacher/hr_unbounded_reward.py
new file mode 100644
index 0000000..7ed13a1
--- /dev/null
+++ b/alr_envs/alr/classic_control/hole_reacher/hr_unbounded_reward.py
@@ -0,0 +1,60 @@
+import numpy as np
+
+
+class HolereacherReward:
+ def __init__(self, allow_self_collision, allow_wall_collision):
+
+ # collision
+ self.allow_self_collision = allow_self_collision
+ self.allow_wall_collision = allow_wall_collision
+ self._is_collided = False
+
+ self.reward_factors = np.array((1, -5e-6))
+
+ def reset(self):
+ self._is_collided = False
+
+ def get_reward(self, env):
+ dist_reward = 0
+ success = False
+
+ self_collision = False
+ wall_collision = False
+
+ if not self.allow_self_collision:
+ self_collision = env._check_self_collision()
+
+ if not self.allow_wall_collision:
+ wall_collision = env.check_wall_collision()
+
+ self._is_collided = self_collision or wall_collision
+
+ if env._steps == 180 or self._is_collided:
+ self.end_eff_pos = np.copy(env.end_effector)
+
+ if env._steps == 199 or self._is_collided:
+ # return reward only in last time step
+ # Episode also terminates when colliding, hence return reward
+ dist = np.linalg.norm(self.end_eff_pos - env._goal)
+
+ if self._is_collided:
+ dist_reward = 0.25 * np.exp(- dist)
+ else:
+ if env.end_effector[1] > 0:
+ dist_reward = np.exp(- dist)
+ else:
+ dist_reward = 1 - self.end_eff_pos[1]
+
+ success = not self._is_collided
+
+ info = {"is_success": success,
+ "is_collided": self._is_collided,
+ "end_effector": np.copy(env.end_effector),
+ "joints": np.copy(env.current_pos)}
+
+ acc_cost = np.sum(env._acc ** 2)
+
+ reward_features = np.array((dist_reward, acc_cost))
+ reward = np.dot(reward_features, self.reward_factors)
+
+ return reward, info
\ No newline at end of file
From 2cc1ab759c25f5068a5fdebbac4c8092a877c105 Mon Sep 17 00:00:00 2001
From: Onur
Date: Thu, 5 May 2022 18:50:20 +0200
Subject: [PATCH 028/104] commit last version of
---
alr_envs/alr/__init__.py | 150 +++++++++++++-----
alr_envs/alr/mujoco/__init__.py | 4 +-
alr_envs/alr/mujoco/beerpong/beerpong.py | 29 ++--
.../mujoco/beerpong/beerpong_reward_staged.py | 12 +-
.../alr/mujoco/beerpong/new_mp_wrapper.py | 43 +++--
alr_envs/alr/mujoco/hopper_jump/__init__.py | 1 +
.../alr/mujoco/hopper_jump/hopper_jump.py | 49 +++---
.../alr/mujoco/hopper_jump/new_mp_wrapper.py | 29 ++++
alr_envs/alr/mujoco/reacher/alr_reacher.py | 42 ++---
alr_envs/mp/episodic_wrapper.py | 15 +-
alr_envs/utils/make_env_helpers.py | 5 +-
11 files changed, 250 insertions(+), 129 deletions(-)
create mode 100644 alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 552adfa..fa9b71b 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -391,8 +391,7 @@ register(
max_episode_steps=600,
kwargs={
"rndm_goal": False,
- "cup_goal_pos": [0.1, -2.0],
- "learn_release_step": True
+ "cup_goal_pos": [0.1, -2.0]
}
)
@@ -404,8 +403,7 @@ register(
max_episode_steps=600,
kwargs={
"rndm_goal": True,
- "cup_goal_pos": [-0.3, -1.2],
- "learn_release_step": True
+ "cup_goal_pos": [-0.3, -1.2]
}
)
@@ -646,7 +644,7 @@ for _v in _versions:
},
"movement_primitives_kwargs": {
'movement_primitives_type': 'promp',
- 'num_dof': 7
+ 'action_dim': 7
},
"phase_generator_kwargs": {
'phase_generator_type': 'linear',
@@ -658,7 +656,9 @@ for _v in _versions:
"controller_kwargs": {
'controller_type': 'motor',
"p_gains": np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]),
+ # "p_gains": 0.125*np.array([200, 300, 100, 100, 10, 10, 2.5]),
"d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]),
+ # "d_gains": 0.025*np.array([7, 15, 5, 2.5, 0.3, 0.3, 0.05]),
},
"basis_generator_kwargs": {
'basis_generator_type': 'zero_rbf',
@@ -753,29 +753,92 @@ for _v in _versions:
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+# ## HopperJump
+# _versions = ["v0", "v1"]
+# for _v in _versions:
+# _env_id = f'ALRHopperJumpProMP-{_v}'
+# register(
+# id= _env_id,
+# entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+# kwargs={
+# "name": f"alr_envs:ALRHopperJump-{_v}",
+# "wrappers": [mujoco.hopper_jump.MPWrapper],
+# "mp_kwargs": {
+# "num_dof": 3,
+# "num_basis": 5,
+# "duration": 2,
+# "post_traj_time": 0,
+# "policy_type": "motor",
+# "weights_scale": 1.0,
+# "zero_start": True,
+# "zero_goal": False,
+# "policy_kwargs": {
+# "p_gains": np.ones(3),
+# "d_gains": 0.1*np.ones(3)
+# }
+# }
+# }
+# )
+# ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+
+# ## HopperJump
+# register(
+# id= "ALRHopperJumpProMP-v2",
+# entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+# kwargs={
+# "name": f"alr_envs:ALRHopperJump-v2",
+# "wrappers": [mujoco.hopper_jump.HighCtxtMPWrapper],
+# "mp_kwargs": {
+# "num_dof": 3,
+# "num_basis": 5,
+# "duration": 2,
+# "post_traj_time": 0,
+# "policy_type": "motor",
+# "weights_scale": 1.0,
+# "zero_start": True,
+# "zero_goal": False,
+# "policy_kwargs": {
+# "p_gains": np.ones(3),
+# "d_gains": 0.1*np.ones(3)
+# }
+# }
+# }
+# )
+# ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v2")
+
## HopperJump
_versions = ["v0", "v1"]
for _v in _versions:
_env_id = f'ALRHopperJumpProMP-{_v}'
register(
id= _env_id,
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+ entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
kwargs={
"name": f"alr_envs:ALRHopperJump-{_v}",
- "wrappers": [mujoco.hopper_jump.MPWrapper],
- "mp_kwargs": {
- "num_dof": 3,
- "num_basis": 5,
- "duration": 2,
- "post_traj_time": 0,
- "policy_type": "motor",
- "weights_scale": 1.0,
- "zero_start": True,
- "zero_goal": False,
- "policy_kwargs": {
- "p_gains": np.ones(3),
- "d_gains": 0.1*np.ones(3)
- }
+ "wrappers": [mujoco.hopper_jump.NewMPWrapper],
+ "ep_wrapper_kwargs": {
+ "weight_scale": 1
+ },
+ "movement_primitives_kwargs": {
+ 'movement_primitives_type': 'promp',
+ 'action_dim': 3
+ },
+ "phase_generator_kwargs": {
+ 'phase_generator_type': 'linear',
+ 'delay': 0,
+ 'tau': 2, # initial value
+ 'learn_tau': False,
+ 'learn_delay': False
+ },
+ "controller_kwargs": {
+ 'controller_type': 'motor',
+ "p_gains": np.ones(3),
+ "d_gains": 0.1*np.ones(3),
+ },
+ "basis_generator_kwargs": {
+ 'basis_generator_type': 'zero_rbf',
+ 'num_basis': 5,
+ 'num_basis_zero_start': 2
}
}
)
@@ -784,25 +847,35 @@ for _v in _versions:
## HopperJump
register(
id= "ALRHopperJumpProMP-v2",
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+ entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
kwargs={
"name": f"alr_envs:ALRHopperJump-v2",
- "wrappers": [mujoco.hopper_jump.HighCtxtMPWrapper],
- "mp_kwargs": {
- "num_dof": 3,
- "num_basis": 5,
- "duration": 2,
- "post_traj_time": 0,
- "policy_type": "motor",
- "weights_scale": 1.0,
- "zero_start": True,
- "zero_goal": False,
- "policy_kwargs": {
+ "wrappers": [mujoco.hopper_jump.NewHighCtxtMPWrapper],
+ "ep_wrapper_kwargs": {
+ "weight_scale": 1
+ },
+ "movement_primitives_kwargs": {
+ 'movement_primitives_type': 'promp',
+ 'action_dim': 3
+ },
+ "phase_generator_kwargs": {
+ 'phase_generator_type': 'linear',
+ 'delay': 0,
+ 'tau': 2, # initial value
+ 'learn_tau': False,
+ 'learn_delay': False
+ },
+ "controller_kwargs": {
+ 'controller_type': 'motor',
"p_gains": np.ones(3),
- "d_gains": 0.1*np.ones(3)
+ "d_gains": 0.1*np.ones(3),
+ },
+ "basis_generator_kwargs": {
+ 'basis_generator_type': 'zero_rbf',
+ 'num_basis': 5,
+ 'num_basis_zero_start': 2
}
}
- }
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v2")
@@ -916,11 +989,4 @@ for _v in _versions:
}
}
)
- ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-
-# --------------------- Testing new mp wrapper -----------------------------------------------------
-
-# register(
-# id='ALRReacherProMP-v0'
-# )
\ No newline at end of file
+ ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/__init__.py b/alr_envs/alr/mujoco/__init__.py
index 9c7eecf..fb86186 100644
--- a/alr_envs/alr/mujoco/__init__.py
+++ b/alr_envs/alr/mujoco/__init__.py
@@ -1,4 +1,3 @@
-from .reacher.alr_reacher import ALRReacherEnv, ALRReacherOptCtrlEnv
from .reacher.balancing import BalancingEnv
from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
@@ -10,4 +9,5 @@ from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv
from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv
from .hopper_throw.hopper_throw import ALRHopperThrowEnv
from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv
-from .walker_2d_jump.walker_2d_jump import ALRWalker2dJumpEnv
\ No newline at end of file
+from .walker_2d_jump.walker_2d_jump import ALRWalker2dJumpEnv
+from .reacher.alr_reacher import ALRReacherEnv
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py
index 661d10c..36998b9 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@@ -3,7 +3,6 @@ import os
import numpy as np
from gym import utils
-from gym import spaces
from gym.envs.mujoco import MujocoEnv
from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
@@ -18,7 +17,7 @@ CUP_POS_MAX = np.array([0.32, -1.2])
class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False,
- rndm_goal=False, learn_release_step=True, cup_goal_pos=None):
+ rndm_goal=False, cup_goal_pos=None):
cup_goal_pos = np.array(cup_goal_pos if cup_goal_pos is not None else [-0.3, -1.2, 0.840])
if cup_goal_pos.shape[0]==2:
cup_goal_pos = np.insert(cup_goal_pos, 2, 0.840)
@@ -43,7 +42,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
# self._release_step = 175 # time step of ball release
# self._release_step = 130 # time step of ball release
- self._release_step = 100 # time step of ball release
+ self.release_step = 100 # time step of ball release
self.ep_length = 600 # based on 3 seconds with dt = 0.005 int(self.sim_time / self.dt)
self.cup_table_id = 10
@@ -52,7 +51,6 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
self.noise_std = 0.01
else:
self.noise_std = 0
- self.learn_release_step = learn_release_step
reward_function = BeerPongReward
self.reward_function = reward_function()
@@ -63,13 +61,6 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
def start_pos(self):
return self._start_pos
- def _set_action_space(self):
- bounds = self.model.actuator_ctrlrange.copy().astype(np.float32)
- bounds = np.concatenate((bounds, [[50, self.ep_length*0.333]]), axis=0)
- low, high = bounds.T
- self.action_space = spaces.Box(low=low, high=high, dtype=np.float32)
- return self.action_space
-
@property
def start_vel(self):
return self._start_vel
@@ -109,21 +100,22 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
return self._get_obs()
def step(self, a):
- self._release_step = a[-1] if self.learn_release_step else self._release_step
- self._release_step = np.clip(self._release_step, self.action_space.low[-1], self.action_space.high[-1]) \
- if self.learn_release_step else self._release_step
+ # if a.shape[0] == 8: # we learn also when to release the ball
+ # self._release_step = a[-1]
+ # self._release_step = np.clip(self._release_step, 50, 250)
+ # self.release_step = 0.5/self.dt
reward_dist = 0.0
angular_vel = 0.0
- applied_action = a[:a.shape[0]-int(self.learn_release_step)]
+ applied_action = a
reward_ctrl = - np.square(applied_action).sum()
if self.apply_gravity_comp:
applied_action += self.sim.data.qfrc_bias[:len(applied_action)].copy() / self.model.actuator_gear[:, 0]
try:
self.do_simulation(applied_action, self.frame_skip)
- if self._steps < self._release_step:
+ if self._steps < self.release_step:
self.sim.data.qpos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
self.sim.data.qvel[7::] = self.sim.data.site_xvelp[self.ball_site_id, :].copy()
- elif self._steps == self._release_step and self.add_noise:
+ elif self._steps == self.release_step and self.add_noise:
self.sim.data.qvel[7::] += self.noise_std * np.random.randn(3)
crash = False
except mujoco_py.builder.MujocoException:
@@ -160,7 +152,8 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
ball_pos=ball_pos,
ball_vel=ball_vel,
success=success,
- is_collided=is_collided, sim_crash=crash)
+ is_collided=is_collided, sim_crash=crash,
+ table_contact_first=int(not self.reward_function.ball_ground_contact_first))
infos.update(reward_infos)
return ob, reward, done, infos
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
index 910763a..bb5dd3f 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
@@ -162,12 +162,16 @@ class BeerPongReward:
min_dist_coeff, final_dist_coeff, rew_offset = 0, 1, 0
reward = rew_offset - min_dist_coeff * min_dist ** 2 - final_dist_coeff * final_dist ** 2 - \
1e-4 * np.mean(action_cost)
- if env.learn_release_step and not self.ball_in_cup:
- too_small = (env._release_step<50)*(env._release_step-50)**2
- too_big = (env._release_step>200)*0.2*(env._release_step-200)**2
- reward = reward - too_small -too_big
# 1e-7*np.mean(action_cost)
+ # release step punishment
+ min_time_bound = 0.1
+ max_time_bound = 1.0
+ release_time = env.release_step*env.dt
+ release_time_rew = int(release_timemax_time_bound)*(-30-10*(release_time-max_time_bound)**2)
+ reward += release_time_rew
success = self.ball_in_cup
+ # print('release time :', release_time)
else:
reward = - 1e-2 * action_cost
# reward = - 1e-4 * action_cost
diff --git a/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py b/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
index 267e76e..7c4ee9d 100644
--- a/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
@@ -21,24 +21,35 @@ class NewMPWrapper(EpisodicWrapper):
[False] # env steps
])
- def _step_callback(self, t: int, env_spec_params: Union[np.ndarray, None], step_action: np.ndarray) -> Union[np.ndarray]:
- if self.env.learn_release_step:
- return np.concatenate((step_action, np.atleast_1d(env_spec_params)))
- else:
- return step_action
+ # def set_mp_action_space(self):
+ # min_action_bounds, max_action_bounds = self.mp.get_param_bounds()
+ # if self.mp.learn_tau:
+ # min_action_bounds[0] = 20*self.env.dt
+ # max_action_bounds[0] = 260*self.env.dt
+ # mp_action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
+ # dtype=np.float32)
+ # return mp_action_space
+
+ # def _step_callback(self, t: int, env_spec_params: Union[np.ndarray, None], step_action: np.ndarray) -> Union[np.ndarray]:
+ # if self.mp.learn_tau:
+ # return np.concatenate((step_action, np.atleast_1d(env_spec_params)))
+ # else:
+ # return step_action
def _episode_callback(self, action: np.ndarray) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
- if self.env.learn_release_step:
- return action[:-1], action[-1] # mp_params, release step
+ if self.mp.learn_tau:
+ self.env.env.release_step = action[0]/self.env.dt # Tau value
+ # self.env.env.release_step = np.clip(action[0]/self.env.dt, 20, 260) # Tau value
+ return action, None
else:
return action, None
- def set_action_space(self):
- if self.env.learn_release_step:
- min_action_bounds, max_action_bounds = self.mp.get_param_bounds()
- min_action_bounds = np.concatenate((min_action_bounds.numpy(), [self.env.action_space.low[-1]]))
- max_action_bounds = np.concatenate((max_action_bounds.numpy(), [self.env.action_space.high[-1]]))
- self.action_space = gym.spaces.Box(low=min_action_bounds, high=max_action_bounds, dtype=np.float32)
- return self.action_space
- else:
- return super(NewMPWrapper, self).set_action_space()
+ # def set_action_space(self):
+ # if self.mp.learn_tau:
+ # min_action_bounds, max_action_bounds = self.mp.get_param_bounds()
+ # min_action_bounds = np.concatenate((min_action_bounds.numpy(), [self.env.action_space.low[-1]]))
+ # max_action_bounds = np.concatenate((max_action_bounds.numpy(), [self.env.action_space.high[-1]]))
+ # self.action_space = gym.spaces.Box(low=min_action_bounds, high=max_action_bounds, dtype=np.float32)
+ # return self.action_space
+ # else:
+ # return super(NewMPWrapper, self).set_action_space()
diff --git a/alr_envs/alr/mujoco/hopper_jump/__init__.py b/alr_envs/alr/mujoco/hopper_jump/__init__.py
index c6db9c2..fbffe48 100644
--- a/alr_envs/alr/mujoco/hopper_jump/__init__.py
+++ b/alr_envs/alr/mujoco/hopper_jump/__init__.py
@@ -1 +1,2 @@
from .mp_wrapper import MPWrapper, HighCtxtMPWrapper
+from .new_mp_wrapper import NewMPWrapper, NewHighCtxtMPWrapper
diff --git a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
index e76cf4f..9a74bb0 100644
--- a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
+++ b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
@@ -80,8 +80,6 @@ class ALRHopperJumpEnv(HopperEnv):
# overwrite reset_model to make it deterministic
def reset_model(self):
- noise_low = -self._reset_noise_scale
- noise_high = self._reset_noise_scale
qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
@@ -104,14 +102,26 @@ class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
def reset_model(self):
self._floor_geom_id = self.model.geom_name2id('floor')
self._foot_geom_id = self.model.geom_name2id('foot_geom')
- noise_low = -self._reset_noise_scale
- noise_high = self._reset_noise_scale
+ noise_low = -np.ones(self.model.nq)*self._reset_noise_scale
+ noise_low[1] = 0
+ noise_low[2] = -0.3
+ noise_low[3] = -0.1
+ noise_low[4] = -1.1
+ noise_low[5] = -0.785
+
+ noise_high = np.ones(self.model.nq)*self._reset_noise_scale
+ noise_high[1] = 0
+ noise_high[2] = 0.3
+ noise_high[3] = 0
+ noise_high[4] = 0
+ noise_high[5] = 0.785
+
rnd_vec = self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
- rnd_vec[2] *= 0.05 # the angle around the y axis shouldn't be too high as the agent then falls down quickly and
+ # rnd_vec[2] *= 0.05 # the angle around the y axis shouldn't be too high as the agent then falls down quickly and
# can not recover
- rnd_vec[1] = np.clip(rnd_vec[1], 0, 0.3)
+ # rnd_vec[1] = np.clip(rnd_vec[1], 0, 0.3)
qpos = self.init_qpos + rnd_vec
- qvel = self.init_qvel #+ self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
+ qvel = self.init_qvel
self.set_state(qpos, qvel)
@@ -167,16 +177,19 @@ if __name__ == '__main__':
env = ALRHopperJumpRndmPosEnv()
obs = env.reset()
- for i in range(2000):
- # objective.load_result("/tmp/cma")
- # test with random actions
- ac = env.action_space.sample()
- obs, rew, d, info = env.step(ac)
- # if i % 10 == 0:
- # env.render(mode=render_mode)
- env.render(mode=render_mode)
- if d:
- print('After ', i, ' steps, done: ', d)
- env.reset()
+ for k in range(10):
+ obs = env.reset()
+ print('observation :', obs[:6])
+ for i in range(200):
+ # objective.load_result("/tmp/cma")
+ # test with random actions
+ ac = env.action_space.sample()
+ obs, rew, d, info = env.step(ac)
+ # if i % 10 == 0:
+ # env.render(mode=render_mode)
+ env.render(mode=render_mode)
+ if d:
+ print('After ', i, ' steps, done: ', d)
+ env.reset()
env.close()
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py b/alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py
new file mode 100644
index 0000000..9932403
--- /dev/null
+++ b/alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py
@@ -0,0 +1,29 @@
+from alr_envs.mp.episodic_wrapper import EpisodicWrapper
+from typing import Union, Tuple
+import numpy as np
+
+
+class NewMPWrapper(EpisodicWrapper):
+ @property
+ def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.env.sim.data.qpos[3:6].copy()
+
+ @property
+ def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.env.sim.data.qvel[3:6].copy()
+
+ def set_active_obs(self):
+ return np.hstack([
+ [False] * (5 + int(not self.env.exclude_current_positions_from_observation)), # position
+ [False] * 6, # velocity
+ [True]
+ ])
+
+
+class NewHighCtxtMPWrapper(NewMPWrapper):
+ def set_active_obs(self):
+ return np.hstack([
+ [True] * (5 + int(not self.env.exclude_current_positions_from_observation)), # position
+ [False] * 6, # velocity
+ [False]
+ ])
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/reacher/alr_reacher.py b/alr_envs/alr/mujoco/reacher/alr_reacher.py
index b436fdd..6b36407 100644
--- a/alr_envs/alr/mujoco/reacher/alr_reacher.py
+++ b/alr_envs/alr/mujoco/reacher/alr_reacher.py
@@ -68,35 +68,35 @@ class ALRReacherEnv(MujocoEnv, utils.EzPickle):
def viewer_setup(self):
self.viewer.cam.trackbodyid = 0
- # def reset_model(self):
- # qpos = self.init_qpos
- # if not hasattr(self, "goal"):
- # self.goal = np.array([-0.25, 0.25])
- # # self.goal = self.init_qpos.copy()[:2] + 0.05
- # qpos[-2:] = self.goal
- # qvel = self.init_qvel
- # qvel[-2:] = 0
- # self.set_state(qpos, qvel)
- # self._steps = 0
- #
- # return self._get_obs()
-
def reset_model(self):
- qpos = self.init_qpos.copy()
- while True:
- self.goal = self.np_random.uniform(low=-self.n_links / 10, high=self.n_links / 10, size=2)
- # self.goal = self.np_random.uniform(low=0, high=self.n_links / 10, size=2)
- # self.goal = np.random.uniform(low=[-self.n_links / 10, 0], high=[0, self.n_links / 10], size=2)
- if np.linalg.norm(self.goal) < self.n_links / 10:
- break
+ qpos = self.init_qpos
+ if not hasattr(self, "goal"):
+ self.goal = np.array([-0.25, 0.25])
+ # self.goal = self.init_qpos.copy()[:2] + 0.05
qpos[-2:] = self.goal
- qvel = self.init_qvel.copy()
+ qvel = self.init_qvel
qvel[-2:] = 0
self.set_state(qpos, qvel)
self._steps = 0
return self._get_obs()
+ # def reset_model(self):
+ # qpos = self.init_qpos.copy()
+ # while True:
+ # self.goal = self.np_random.uniform(low=-self.n_links / 10, high=self.n_links / 10, size=2)
+ # # self.goal = self.np_random.uniform(low=0, high=self.n_links / 10, size=2)
+ # # self.goal = np.random.uniform(low=[-self.n_links / 10, 0], high=[0, self.n_links / 10], size=2)
+ # if np.linalg.norm(self.goal) < self.n_links / 10:
+ # break
+ # qpos[-2:] = self.goal
+ # qvel = self.init_qvel.copy()
+ # qvel[-2:] = 0
+ # self.set_state(qpos, qvel)
+ # self._steps = 0
+ #
+ # return self._get_obs()
+
# def reset_model(self):
# qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
# while True:
diff --git a/alr_envs/mp/episodic_wrapper.py b/alr_envs/mp/episodic_wrapper.py
index 092f1bc..7426c15 100644
--- a/alr_envs/mp/episodic_wrapper.py
+++ b/alr_envs/mp/episodic_wrapper.py
@@ -50,13 +50,10 @@ class EpisodicWrapper(gym.Env, ABC):
# rendering
self.render_mode = render_mode
self.render_kwargs = {}
- # self.time_steps = np.linspace(0, self.duration, self.traj_steps + 1)
self.time_steps = np.linspace(0, self.duration, self.traj_steps)
self.mp.set_mp_times(self.time_steps)
# action_bounds = np.inf * np.ones((np.prod(self.mp.num_params)))
- min_action_bounds, max_action_bounds = mp.get_param_bounds()
- self.mp_action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
- dtype=np.float32)
+ self.mp_action_space = self.set_mp_action_space()
self.action_space = self.set_action_space()
self.active_obs = self.set_active_obs()
@@ -65,7 +62,7 @@ class EpisodicWrapper(gym.Env, ABC):
dtype=self.env.observation_space.dtype)
def get_trajectory(self, action: np.ndarray) -> Tuple:
- # TODO: this follows the implementation of the mp_pytorch library which includes the paramters tau and delay at
+ # TODO: this follows the implementation of the mp_pytorch library which includes the parameters tau and delay at
# the beginning of the array.
ignore_indices = int(self.mp.learn_tau) + int(self.mp.learn_delay)
scaled_mp_params = action.copy()
@@ -84,6 +81,13 @@ class EpisodicWrapper(gym.Env, ABC):
return trajectory, velocity
+ def set_mp_action_space(self):
+ """This function can be used to set up an individual space for the parameters of the mp."""
+ min_action_bounds, max_action_bounds = self.mp.get_param_bounds()
+ mp_action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
+ dtype=np.float32)
+ return mp_action_space
+
def set_action_space(self):
"""
This function can be used to modify the action space for considering actions which are not learned via motion
@@ -179,6 +183,7 @@ class EpisodicWrapper(gym.Env, ABC):
step_action = self.controller.get_action(pos_vel[0], pos_vel[1], self.current_pos, self.current_vel)
step_action = self._step_callback(t, env_spec_params, step_action) # include possible callback info
c_action = np.clip(step_action, self.env.action_space.low, self.env.action_space.high)
+ # print('step/clipped action ratio: ', step_action/c_action)
obs, c_reward, done, info = self.env.step(c_action)
if self.verbose >= 2:
actions[t, :] = c_action
diff --git a/alr_envs/utils/make_env_helpers.py b/alr_envs/utils/make_env_helpers.py
index 0c06906..f22e0d7 100644
--- a/alr_envs/utils/make_env_helpers.py
+++ b/alr_envs/utils/make_env_helpers.py
@@ -156,12 +156,11 @@ def make_mp_from_kwargs(
ep_wrapper_kwargs['duration'] = dummy_env.spec.max_episode_steps*dummy_env.dt
if phase_kwargs.get('tau', None) is None:
phase_kwargs['tau'] = ep_wrapper_kwargs['duration']
- action_dim = mp_kwargs.pop('num_dof', None)
- action_dim = action_dim if action_dim is not None else np.prod(dummy_env.action_space.shape).item()
+ mp_kwargs['action_dim'] = mp_kwargs.get('action_dim', np.prod(dummy_env.action_space.shape).item())
phase_gen = get_phase_generator(**phase_kwargs)
basis_gen = get_basis_generator(phase_generator=phase_gen, **basis_kwargs)
controller = get_controller(**controller_kwargs)
- mp = get_movement_primitive(action_dim=action_dim, basis_generator=basis_gen, **mp_kwargs)
+ mp = get_movement_primitive(basis_generator=basis_gen, **mp_kwargs)
_env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, mp=mp, controller=controller,
ep_wrapper_kwargs=ep_wrapper_kwargs, seed=seed, **kwargs)
return _env
From 647f086a8d3545a96f5018ebeed489625a08f01c Mon Sep 17 00:00:00 2001
From: Onur
Date: Tue, 17 May 2022 10:31:47 +0200
Subject: [PATCH 029/104] remove unnecessary code lines
---
alr_envs/alr/__init__.py | 2 --
1 file changed, 2 deletions(-)
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index fa9b71b..1101020 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -656,9 +656,7 @@ for _v in _versions:
"controller_kwargs": {
'controller_type': 'motor',
"p_gains": np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]),
- # "p_gains": 0.125*np.array([200, 300, 100, 100, 10, 10, 2.5]),
"d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]),
- # "d_gains": 0.025*np.array([7, 15, 5, 2.5, 0.3, 0.3, 0.05]),
},
"basis_generator_kwargs": {
'basis_generator_type': 'zero_rbf',
From f1a96c055b7787831f494cbd41933a221dd46c97 Mon Sep 17 00:00:00 2001
From: Onur
Date: Sun, 29 May 2022 11:58:01 +0200
Subject: [PATCH 030/104] safety
---
alr_envs/alr/__init__.py | 90 ++++++++-
alr_envs/alr/mujoco/__init__.py | 2 +-
alr_envs/alr/mujoco/ant_jump/__init__.py | 1 +
.../alr/mujoco/ant_jump/new_mp_wrapper.py | 21 +++
.../assets/beerpong_wo_cup_big_table.xml | 2 +-
alr_envs/alr/mujoco/beerpong/beerpong.py | 29 ++-
.../mujoco/beerpong/beerpong_reward_staged.py | 154 ++++-----------
.../alr/mujoco/beerpong/new_mp_wrapper.py | 6 +
.../assets/{hopper.xml => hopper_jump.xml} | 4 +
.../alr/mujoco/hopper_jump/hopper_jump.py | 178 +++++++++++++++---
.../alr/mujoco/hopper_jump/new_mp_wrapper.py | 12 +-
alr_envs/mp/episodic_wrapper.py | 2 +-
12 files changed, 333 insertions(+), 168 deletions(-)
create mode 100644 alr_envs/alr/mujoco/ant_jump/new_mp_wrapper.py
rename alr_envs/alr/mujoco/hopper_jump/assets/{hopper.xml => hopper_jump.xml} (92%)
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 1101020..9dc31ae 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -252,7 +252,8 @@ register(
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP,
- "context": False
+ "context": False,
+ "healthy_rewasrd": 1.0
}
)
register(
@@ -273,6 +274,17 @@ register(
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP
}
)
+
+register(
+ id='ALRHopperJump-v3',
+ entry_point='alr_envs.alr.mujoco:ALRHopperXYJumpEnv',
+ max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
+ kwargs={
+ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP,
+ "context": True,
+ "healthy_reward": 1.0
+ }
+)
# CtxtFree are v0, Contextual are v1
register(
id='ALRHopperJumpOnBox-v0',
@@ -723,6 +735,46 @@ for _v in _versions:
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+## AntJump
+_versions = ["v0", "v1"]
+for _v in _versions:
+ _env_id = f'ALRAntJumpProMP-{_v}'
+ register(
+ id= _env_id,
+ entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
+ kwargs={
+ "name": f"alr_envs:ALRAntJump-{_v}",
+ "wrappers": [mujoco.ant_jump.NewMPWrapper],
+ "ep_wrapper_kwargs": {
+ "weight_scale": 1
+ },
+ "movement_primitives_kwargs": {
+ 'movement_primitives_type': 'promp',
+ 'action_dim': 8
+ },
+ "phase_generator_kwargs": {
+ 'phase_generator_type': 'linear',
+ 'delay': 0,
+ 'tau': 10, # initial value
+ 'learn_tau': False,
+ 'learn_delay': False
+ },
+ "controller_kwargs": {
+ 'controller_type': 'motor',
+ "p_gains": np.ones(8),
+ "d_gains": 0.1*np.ones(8),
+ },
+ "basis_generator_kwargs": {
+ 'basis_generator_type': 'zero_rbf',
+ 'num_basis': 5,
+ 'num_basis_zero_start': 2
+ }
+ }
+ )
+ ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+
+
+
## HalfCheetahJump
_versions = ["v0", "v1"]
for _v in _versions:
@@ -877,6 +929,42 @@ register(
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v2")
+
+## HopperJump
+register(
+ id= "ALRHopperJumpProMP-v3",
+ entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
+ kwargs={
+ "name": f"alr_envs:ALRHopperJump-v3",
+ "wrappers": [mujoco.hopper_jump.NewMPWrapper],
+ "ep_wrapper_kwargs": {
+ "weight_scale": 1
+ },
+ "movement_primitives_kwargs": {
+ 'movement_primitives_type': 'promp',
+ 'action_dim': 3
+ },
+ "phase_generator_kwargs": {
+ 'phase_generator_type': 'linear',
+ 'delay': 0,
+ 'tau': 2, # initial value
+ 'learn_tau': False,
+ 'learn_delay': False
+ },
+ "controller_kwargs": {
+ 'controller_type': 'motor',
+ "p_gains": np.ones(3),
+ "d_gains": 0.1*np.ones(3),
+ },
+ "basis_generator_kwargs": {
+ 'basis_generator_type': 'zero_rbf',
+ 'num_basis': 5,
+ 'num_basis_zero_start': 2
+ }
+ }
+)
+ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v3")
+
## HopperJumpOnBox
_versions = ["v0", "v1"]
for _v in _versions:
diff --git a/alr_envs/alr/mujoco/__init__.py b/alr_envs/alr/mujoco/__init__.py
index fb86186..5c04b03 100644
--- a/alr_envs/alr/mujoco/__init__.py
+++ b/alr_envs/alr/mujoco/__init__.py
@@ -5,7 +5,7 @@ from .table_tennis.tt_gym import TTEnvGym
from .beerpong.beerpong import ALRBeerBongEnv
from .ant_jump.ant_jump import ALRAntJumpEnv
from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
-from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv
+from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv, ALRHopperXYJumpEnv
from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv
from .hopper_throw.hopper_throw import ALRHopperThrowEnv
from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv
diff --git a/alr_envs/alr/mujoco/ant_jump/__init__.py b/alr_envs/alr/mujoco/ant_jump/__init__.py
index c5e6d2f..5d15867 100644
--- a/alr_envs/alr/mujoco/ant_jump/__init__.py
+++ b/alr_envs/alr/mujoco/ant_jump/__init__.py
@@ -1 +1,2 @@
from .mp_wrapper import MPWrapper
+from .new_mp_wrapper import NewMPWrapper
diff --git a/alr_envs/alr/mujoco/ant_jump/new_mp_wrapper.py b/alr_envs/alr/mujoco/ant_jump/new_mp_wrapper.py
new file mode 100644
index 0000000..c33048f
--- /dev/null
+++ b/alr_envs/alr/mujoco/ant_jump/new_mp_wrapper.py
@@ -0,0 +1,21 @@
+from alr_envs.mp.episodic_wrapper import EpisodicWrapper
+from typing import Union, Tuple
+import numpy as np
+
+
+
+class NewMPWrapper(EpisodicWrapper):
+
+ def set_active_obs(self):
+ return np.hstack([
+ [False] * 111, # ant has 111 dimensional observation space !!
+ [True] # goal height
+ ])
+
+ @property
+ def current_pos(self) -> Union[float, int, np.ndarray]:
+ return self.env.sim.data.qpos[7:15].copy()
+
+ @property
+ def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.env.sim.data.qvel[6:14].copy()
diff --git a/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup_big_table.xml b/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup_big_table.xml
index 436b36c..78e2c32 100644
--- a/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup_big_table.xml
+++ b/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup_big_table.xml
@@ -144,7 +144,7 @@
solref="-10000 -100"/>
-
+
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py
index 36998b9..017bfcd 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@@ -7,8 +7,12 @@ from gym.envs.mujoco import MujocoEnv
from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
-CUP_POS_MIN = np.array([-0.32, -2.2])
-CUP_POS_MAX = np.array([0.32, -1.2])
+CUP_POS_MIN = np.array([-1.42, -4.05])
+CUP_POS_MAX = np.array([1.42, -1.25])
+
+
+# CUP_POS_MIN = np.array([-0.32, -2.2])
+# CUP_POS_MAX = np.array([0.32, -1.2])
# smaller context space -> Easier task
# CUP_POS_MIN = np.array([-0.16, -2.2])
@@ -24,8 +28,10 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
self.cup_goal_pos = np.array(cup_goal_pos)
self._steps = 0
+ # self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
+ # "beerpong_wo_cup" + ".xml")
self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
- "beerpong_wo_cup" + ".xml")
+ "beerpong_wo_cup_big_table" + ".xml")
self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7])
self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
@@ -100,10 +106,6 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
return self._get_obs()
def step(self, a):
- # if a.shape[0] == 8: # we learn also when to release the ball
- # self._release_step = a[-1]
- # self._release_step = np.clip(self._release_step, 50, 250)
- # self.release_step = 0.5/self.dt
reward_dist = 0.0
angular_vel = 0.0
applied_action = a
@@ -170,16 +172,11 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
[self._steps],
])
- # TODO
- @property
- def active_obs(self):
- return np.hstack([
- [False] * 7, # cos
- [False] * 7, # sin
- [True] * 2, # xy position of cup
- [False] # env steps
- ])
+class ALRBeerPongStepEnv(ALRBeerBongEnv):
+ def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False,
+ rndm_goal=False, cup_goal_pos=None):
+ super(ALRBeerPongStepEnv, self).__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
if __name__ == "__main__":
env = ALRBeerBongEnv(rndm_goal=True)
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
index bb5dd3f..cb75127 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
@@ -40,6 +40,7 @@ class BeerPongReward:
self.cup_z_axes = None
self.collision_penalty = 500
self.reset(None)
+ self.is_initialized = False
def reset(self, noisy):
self.ball_traj = []
@@ -55,113 +56,60 @@ class BeerPongReward:
self.ball_wall_contact = False
self.ball_cup_contact = False
self.ball_in_cup = False
+ self.dist_ground_cup = -1 # distance floor to cup if first floor contact
self.noisy_bp = noisy
self._t_min_final_dist = -1
def compute_reward(self, env, action):
- self.ball_id = env.sim.model._body_name2id["ball"]
- self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"]
- self.goal_id = env.sim.model._site_name2id["cup_goal_table"]
- self.goal_final_id = env.sim.model._site_name2id["cup_goal_final_table"]
- self.cup_collision_ids = [env.sim.model._geom_name2id[name] for name in self.cup_collision_objects]
- self.cup_table_id = env.sim.model._body_name2id["cup_table"]
- self.table_collision_id = env.sim.model._geom_name2id["table_contact_geom"]
- self.wall_collision_id = env.sim.model._geom_name2id["wall"]
- self.cup_table_collision_id = env.sim.model._geom_name2id["cup_base_table_contact"]
- self.init_ball_pos_site_id = env.sim.model._site_name2id["init_ball_pos_site"]
- self.ground_collision_id = env.sim.model._geom_name2id["ground"]
- self.robot_collision_ids = [env.sim.model._geom_name2id[name] for name in self.robot_collision_objects]
+
+ if not self.is_initialized:
+ self.is_initialized = True
+ self.ball_id = env.sim.model._body_name2id["ball"]
+ self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"]
+ self.goal_id = env.sim.model._site_name2id["cup_goal_table"]
+ self.goal_final_id = env.sim.model._site_name2id["cup_goal_final_table"]
+ self.cup_collision_ids = [env.sim.model._geom_name2id[name] for name in self.cup_collision_objects]
+ self.cup_table_id = env.sim.model._body_name2id["cup_table"]
+ self.table_collision_id = env.sim.model._geom_name2id["table_contact_geom"]
+ self.wall_collision_id = env.sim.model._geom_name2id["wall"]
+ self.cup_table_collision_id = env.sim.model._geom_name2id["cup_base_table_contact"]
+ self.init_ball_pos_site_id = env.sim.model._site_name2id["init_ball_pos_site"]
+ self.ground_collision_id = env.sim.model._geom_name2id["ground"]
+ self.robot_collision_ids = [env.sim.model._geom_name2id[name] for name in self.robot_collision_objects]
goal_pos = env.sim.data.site_xpos[self.goal_id]
ball_pos = env.sim.data.body_xpos[self.ball_id]
ball_vel = env.sim.data.body_xvelp[self.ball_id]
goal_final_pos = env.sim.data.site_xpos[self.goal_final_id]
+
+ self._check_contacts(env.sim)
self.dists.append(np.linalg.norm(goal_pos - ball_pos))
self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
-
+ self.dist_ground_cup = np.linalg.norm(ball_pos-goal_pos) \
+ if self.ball_ground_contact_first and self.dist_ground_cup == -1 else self.dist_ground_cup
action_cost = np.sum(np.square(action))
self.action_costs.append(action_cost)
- # ##################### Reward function which forces to bounce once on the table (tanh) ########################
- # if not self.ball_table_contact:
- # self.ball_table_contact = self._check_collision_single_objects(env.sim, self.ball_collision_id,
- # self.table_collision_id)
- #
- # self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
- # if env._steps == env.ep_length - 1 or self._is_collided:
- # min_dist = np.min(self.dists)
- # final_dist = self.dists_final[-1]
- #
- # ball_in_cup = self._check_collision_single_objects(env.sim, self.ball_collision_id,
- # self.cup_table_collision_id)
- #
- # # encourage bounce before falling into cup
- # if not ball_in_cup:
- # if not self.ball_table_contact:
- # reward = 0.2 * (1 - np.tanh(0.5*min_dist)) + 0.1 * (1 - np.tanh(0.5*final_dist))
- # else:
- # reward = (1 - np.tanh(0.5*min_dist)) + 0.5 * (1 - np.tanh(0.5*final_dist))
- # else:
- # if not self.ball_table_contact:
- # reward = 2 * (1 - np.tanh(0.5*final_dist)) + 1 * (1 - np.tanh(0.5*min_dist)) + 1
- # else:
- # reward = 2 * (1 - np.tanh(0.5*final_dist)) + 1 * (1 - np.tanh(0.5*min_dist)) + 3
- #
- # # reward = - 1 * cost - self.collision_penalty * int(self._is_collided)
- # success = ball_in_cup
- # crash = self._is_collided
- # else:
- # reward = - 1e-2 * action_cost
- # success = False
- # crash = False
- # ################################################################################################################
-
- ##################### Reward function which does not force to bounce once on the table (tanh) ################
- # self._check_contacts(env.sim)
- # self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
- # if env._steps == env.ep_length - 1 or self._is_collided:
- # min_dist = np.min(self.dists)
- # final_dist = self.dists_final[-1]
- #
- # # encourage bounce before falling into cup
- # if not self.ball_in_cup:
- # if not self.ball_table_contact and not self.ball_cup_contact and not self.ball_wall_contact:
- # min_dist_coeff, final_dist_coeff, rew_offset = 0.2, 0.1, 0
- # # reward = 0.2 * (1 - np.tanh(0.5*min_dist)) + 0.1 * (1 - np.tanh(0.5*final_dist))
- # else:
- # min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, 0
- # # reward = (1 - np.tanh(0.5*min_dist)) + 0.5 * (1 - np.tanh(0.5*final_dist))
- # else:
- # min_dist_coeff, final_dist_coeff, rew_offset = 1, 2, 3
- # # reward = 2 * (1 - np.tanh(0.5*final_dist)) + 1 * (1 - np.tanh(0.5*min_dist)) + 3
- #
- # reward = final_dist_coeff * (1 - np.tanh(0.5 * final_dist)) + min_dist_coeff * (1 - np.tanh(0.5 * min_dist)) \
- # + rew_offset
- # success = self.ball_in_cup
- # crash = self._is_collided
- # else:
- # reward = - 1e-2 * action_cost
- # success = False
- # crash = False
- ################################################################################################################
-
# # ##################### Reward function which does not force to bounce once on the table (quad dist) ############
- self._check_contacts(env.sim)
+
self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
+
if env._steps == env.ep_length - 1 or self._is_collided:
min_dist = np.min(self.dists)
final_dist = self.dists_final[-1]
- # if self.ball_ground_contact_first:
- # min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -6
- # else:
- if not self.ball_in_cup:
- if not self.ball_table_contact and not self.ball_cup_contact and not self.ball_wall_contact:
- min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -4
- else:
- min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -2
+ if self.ball_ground_contact_first:
+ min_dist_coeff, final_dist_coeff, ground_contact_dist_coeff, rew_offset = 1, 0.5, 2, -4 # relative rew offset when first bounding on ground
+ # min_dist_coeff, final_dist_coeff, ground_contact_dist_coeff, rew_offset = 1, 0.5, 0, -6 # absolute rew offset when first bouncing on ground
else:
- min_dist_coeff, final_dist_coeff, rew_offset = 0, 1, 0
+ if not self.ball_in_cup:
+ if not self.ball_table_contact and not self.ball_cup_contact and not self.ball_wall_contact:
+ min_dist_coeff, final_dist_coeff, ground_contact_dist_coeff, rew_offset = 1, 0.5, 0, -4
+ else:
+ min_dist_coeff, final_dist_coeff, ground_contact_dist_coeff, rew_offset = 1, 0.5, 0, -2
+ else:
+ min_dist_coeff, final_dist_coeff, ground_contact_dist_coeff, rew_offset = 0, 1, 0 ,0
+ # dist_ground_cup = 1 * self.dist_ground_cup
reward = rew_offset - min_dist_coeff * min_dist ** 2 - final_dist_coeff * final_dist ** 2 - \
- 1e-4 * np.mean(action_cost)
+ 1e-4 * np.mean(action_cost) - ground_contact_dist_coeff*self.dist_ground_cup ** 2
# 1e-7*np.mean(action_cost)
# release step punishment
min_time_bound = 0.1
@@ -172,43 +120,13 @@ class BeerPongReward:
reward += release_time_rew
success = self.ball_in_cup
# print('release time :', release_time)
+ # print('dist_ground_cup :', dist_ground_cup)
else:
reward = - 1e-2 * action_cost
# reward = - 1e-4 * action_cost
# reward = 0
success = False
# ################################################################################################################
-
- # # # ##################### Reward function which does not force to bounce once on the table (quad dist) ############
- # self._check_contacts(env.sim)
- # self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
- # if env._steps == env.ep_length - 1 or self._is_collided:
- # min_dist = np.min(self.dists)
- # final_dist = self.dists_final[-1]
- #
- # if not self.ball_in_cup:
- # if not self.ball_table_contact and not self.ball_cup_contact and not self.ball_wall_contact:
- # min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -6
- # else:
- # if self.ball_ground_contact_first:
- # min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -4
- # else:
- # min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -2
- # else:
- # if self.ball_ground_contact_first:
- # min_dist_coeff, final_dist_coeff, rew_offset = 0, 1, -1
- # else:
- # min_dist_coeff, final_dist_coeff, rew_offset = 0, 1, 0
- # reward = rew_offset - min_dist_coeff * min_dist ** 2 - final_dist_coeff * final_dist ** 2 - \
- # 1e-7 * np.mean(action_cost)
- # # 1e-4*np.mean(action_cost)
- # success = self.ball_in_cup
- # else:
- # # reward = - 1e-2 * action_cost
- # # reward = - 1e-4 * action_cost
- # reward = 0
- # success = False
- # ################################################################################################################
infos = {}
infos["success"] = success
infos["is_collided"] = self._is_collided
diff --git a/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py b/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
index 7c4ee9d..9683687 100644
--- a/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
@@ -44,6 +44,12 @@ class NewMPWrapper(EpisodicWrapper):
else:
return action, None
+ def set_context(self, context):
+ xyz = np.zeros(3)
+ xyz[:2] = context
+ xyz[-1] = 0.840
+ self.env.env.model.body_pos[self.env.env.cup_table_id] = xyz
+ return self.get_observation_from_step(self.env.env._get_obs())
# def set_action_space(self):
# if self.mp.learn_tau:
# min_action_bounds, max_action_bounds = self.mp.get_param_bounds()
diff --git a/alr_envs/alr/mujoco/hopper_jump/assets/hopper.xml b/alr_envs/alr/mujoco/hopper_jump/assets/hopper_jump.xml
similarity index 92%
rename from alr_envs/alr/mujoco/hopper_jump/assets/hopper.xml
rename to alr_envs/alr/mujoco/hopper_jump/assets/hopper_jump.xml
index f18bc46..3348bab 100644
--- a/alr_envs/alr/mujoco/hopper_jump/assets/hopper.xml
+++ b/alr_envs/alr/mujoco/hopper_jump/assets/hopper_jump.xml
@@ -25,12 +25,16 @@
+
+
+
+
diff --git a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
index 9a74bb0..979e224 100644
--- a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
+++ b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
@@ -1,5 +1,6 @@
from gym.envs.mujoco.hopper_v3 import HopperEnv
import numpy as np
+import os
MAX_EPISODE_STEPS_HOPPERJUMP = 250
@@ -14,13 +15,13 @@ class ALRHopperJumpEnv(HopperEnv):
"""
def __init__(self,
- xml_file='hopper.xml',
+ xml_file='hopper_jump.xml',
forward_reward_weight=1.0,
ctrl_cost_weight=1e-3,
healthy_reward=0.0,
penalty=0.0,
context=True,
- terminate_when_unhealthy=True,
+ terminate_when_unhealthy=False,
healthy_state_range=(-100.0, 100.0),
healthy_z_range=(0.5, float('inf')),
healthy_angle_range=(-float('inf'), float('inf')),
@@ -34,6 +35,13 @@ class ALRHopperJumpEnv(HopperEnv):
self.goal = 0
self.context = context
self.exclude_current_positions_from_observation = exclude_current_positions_from_observation
+ self._floor_geom_id = None
+ self._foot_geom_id = None
+ self.contact_with_floor = False
+ self.init_floor_contact = False
+ self.has_left_floor = False
+ self.contact_dist = None
+ xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
exclude_current_positions_from_observation)
@@ -43,28 +51,36 @@ class ALRHopperJumpEnv(HopperEnv):
self.current_step += 1
self.do_simulation(action, self.frame_skip)
height_after = self.get_body_com("torso")[2]
+ site_pos_after = self.sim.data.site_xpos[self.model.site_name2id('foot_site')].copy()
self.max_height = max(height_after, self.max_height)
ctrl_cost = self.control_cost(action)
costs = ctrl_cost
done = False
-
+ rewards = 0
if self.current_step >= self.max_episode_steps:
hight_goal_distance = -10*np.linalg.norm(self.max_height - self.goal) if self.context else self.max_height
- healthy_reward = 0 if self.context else self.healthy_reward * self.current_step
+ healthy_reward = 0 if self.context else self.healthy_reward * 2 # self.current_step
height_reward = self._forward_reward_weight * hight_goal_distance # maybe move reward calculation into if structure and define two different _forward_reward_weight variables for context and episodic seperatley
rewards = height_reward + healthy_reward
- else:
- # penalty for wrong start direction of first two joints; not needed, could be removed
- rewards = ((action[:2] > 0) * self.penalty).sum() if self.current_step < 10 else 0
+ # else:
+ # # penalty for wrong start direction of first two joints; not needed, could be removed
+ # rewards = ((action[:2] > 0) * self.penalty).sum() if self.current_step < 10 else 0
observation = self._get_obs()
reward = rewards - costs
+ # info = {
+ # 'height' : height_after,
+ # 'max_height': self.max_height,
+ # 'goal' : self.goal
+ # }
info = {
- 'height' : height_after,
+ 'height': height_after,
+ 'x_pos': site_pos_after,
'max_height': self.max_height,
- 'goal' : self.goal
+ 'height_rew': self.max_height,
+ 'healthy_reward': self.healthy_reward * 2
}
return observation, reward, done, info
@@ -73,7 +89,7 @@ class ALRHopperJumpEnv(HopperEnv):
return np.append(super()._get_obs(), self.goal)
def reset(self):
- self.goal = np.random.uniform(1.4, 2.3, 1) # 1.3 2.3
+ self.goal = np.random.uniform(1.4, 2.16, 1) # 1.3 2.3
self.max_height = 0
self.current_step = 0
return super().reset()
@@ -87,14 +103,124 @@ class ALRHopperJumpEnv(HopperEnv):
self.set_state(qpos, qvel)
observation = self._get_obs()
+ self.has_left_floor = False
+ self.contact_with_floor = False
+ self.init_floor_contact = False
+ self.contact_dist = None
return observation
+ def _contact_checker(self, id_1, id_2):
+ for coni in range(0, self.sim.data.ncon):
+ con = self.sim.data.contact[coni]
+ collision = con.geom1 == id_1 and con.geom2 == id_2
+ collision_trans = con.geom1 == id_2 and con.geom2 == id_1
+ if collision or collision_trans:
+ return True
+ return False
+
+
+class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
+ # The goal here is the desired x-Position of the Torso
+
+ def step(self, action):
+
+ self._floor_geom_id = self.model.geom_name2id('floor')
+ self._foot_geom_id = self.model.geom_name2id('foot_geom')
+
+ self.current_step += 1
+ self.do_simulation(action, self.frame_skip)
+ height_after = self.get_body_com("torso")[2]
+ site_pos_after = self.sim.data.site_xpos[self.model.site_name2id('foot_site')].copy()
+ self.max_height = max(height_after, self.max_height)
+
+ # floor_contact = self._contact_checker(self._floor_geom_id, self._foot_geom_id) if not self.contact_with_floor else False
+ # self.init_floor_contact = floor_contact if not self.init_floor_contact else self.init_floor_contact
+ # self.has_left_floor = not floor_contact if self.init_floor_contact and not self.has_left_floor else self.has_left_floor
+ # self.contact_with_floor = floor_contact if not self.contact_with_floor and self.has_left_floor else self.contact_with_floor
+
+ floor_contact = self._contact_checker(self._floor_geom_id,
+ self._foot_geom_id) if not self.contact_with_floor else False
+ if not self.init_floor_contact:
+ self.init_floor_contact = floor_contact
+ if self.init_floor_contact and not self.has_left_floor:
+ self.has_left_floor = not floor_contact
+ if not self.contact_with_floor and self.has_left_floor:
+ self.contact_with_floor = floor_contact
+
+ if self.contact_dist is None and self.contact_with_floor:
+ self.contact_dist = np.linalg.norm(self.sim.data.site_xpos[self.model.site_name2id('foot_site')]
+ - np.array([self.goal, 0, 0], dtype=object))[0]
+
+ ctrl_cost = self.control_cost(action)
+ costs = ctrl_cost
+ done = False
+ goal_dist = 0
+ rewards = 0
+ if self.current_step >= self.max_episode_steps:
+ # healthy_reward = 0 if self.context else self.healthy_reward * self.current_step
+ healthy_reward = self.healthy_reward * 2 #* self.current_step
+ goal_dist = np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0], dtype=object))[0]
+ contact_dist = self.contact_dist if self.contact_dist is not None else 5
+ dist_reward = self._forward_reward_weight * (-3*goal_dist + 10*self.max_height - 2*contact_dist)
+ rewards = dist_reward + healthy_reward
+
+ # else:
+ ## penalty for wrong start direction of first two joints; not needed, could be removed
+ # rewards = ((action[:2] > 0) * self.penalty).sum() if self.current_step < 10 else 0
+
+ observation = self._get_obs()
+ reward = rewards - costs
+ info = {
+ 'height': height_after,
+ 'x_pos': site_pos_after,
+ 'max_height': self.max_height,
+ 'goal': self.goal,
+ 'dist_rew': goal_dist,
+ 'height_rew': self.max_height,
+ 'healthy_reward': self.healthy_reward * 2
+ }
+
+ return observation, reward, done, info
+
+ def reset_model(self):
+ self.init_qpos[1] = 1.5
+ self._floor_geom_id = self.model.geom_name2id('floor')
+ self._foot_geom_id = self.model.geom_name2id('foot_geom')
+ noise_low = -np.zeros(self.model.nq)
+ noise_low[3] = -0.5
+ noise_low[4] = -0.2
+ noise_low[5] = 0
+
+ noise_high = np.zeros(self.model.nq)
+ noise_high[3] = 0
+ noise_high[4] = 0
+ noise_high[5] = 0.785
+
+ rnd_vec = self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
+ qpos = self.init_qpos + rnd_vec
+ qvel = self.init_qvel
+
+ self.set_state(qpos, qvel)
+
+ observation = self._get_obs()
+ self.has_left_floor = False
+ self.contact_with_floor = False
+ self.init_floor_contact = False
+ self.contact_dist = None
+
+ return observation
+
+ def reset(self):
+ super().reset()
+ # self.goal = np.random.uniform(-1.5, 1.5, 1)
+ self.goal = np.random.uniform(0, 1.5, 1)
+ # self.goal = np.array([1.5])
+ self.sim.model.body_pos[self.sim.model.body_name2id('goal_site_body')] = np.array([self.goal, 0, 0], dtype=object)
+ return self.reset_model()
+
class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
def __init__(self, max_episode_steps=250):
- self.contact_with_floor = False
- self._floor_geom_id = None
- self._foot_geom_id = None
super(ALRHopperJumpRndmPosEnv, self).__init__(exclude_current_positions_from_observation=False,
reset_noise_scale=5e-1,
max_episode_steps=max_episode_steps)
@@ -104,17 +230,17 @@ class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
self._foot_geom_id = self.model.geom_name2id('foot_geom')
noise_low = -np.ones(self.model.nq)*self._reset_noise_scale
noise_low[1] = 0
- noise_low[2] = -0.3
- noise_low[3] = -0.1
- noise_low[4] = -1.1
- noise_low[5] = -0.785
+ noise_low[2] = 0
+ noise_low[3] = -0.2
+ noise_low[4] = -0.2
+ noise_low[5] = -0.1
noise_high = np.ones(self.model.nq)*self._reset_noise_scale
noise_high[1] = 0
- noise_high[2] = 0.3
+ noise_high[2] = 0
noise_high[3] = 0
noise_high[4] = 0
- noise_high[5] = 0.785
+ noise_high[5] = 0.1
rnd_vec = self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
# rnd_vec[2] *= 0.05 # the angle around the y axis shouldn't be too high as the agent then falls down quickly and
@@ -162,24 +288,18 @@ class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
return observation, reward, done, info
- def _contact_checker(self, id_1, id_2):
- for coni in range(0, self.sim.data.ncon):
- con = self.sim.data.contact[coni]
- collision = con.geom1 == id_1 and con.geom2 == id_2
- collision_trans = con.geom1 == id_2 and con.geom2 == id_1
- if collision or collision_trans:
- return True
- return False
+
if __name__ == '__main__':
render_mode = "human" # "human" or "partial" or "final"
# env = ALRHopperJumpEnv()
- env = ALRHopperJumpRndmPosEnv()
+ env = ALRHopperXYJumpEnv()
+ # env = ALRHopperJumpRndmPosEnv()
obs = env.reset()
for k in range(10):
obs = env.reset()
- print('observation :', obs[:6])
+ print('observation :', obs[:])
for i in range(200):
# objective.load_result("/tmp/cma")
# test with random actions
diff --git a/alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py b/alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py
index 9932403..31be6be 100644
--- a/alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py
@@ -12,9 +12,19 @@ class NewMPWrapper(EpisodicWrapper):
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.sim.data.qvel[3:6].copy()
+ # # random goal
+ # def set_active_obs(self):
+ # return np.hstack([
+ # [False] * (5 + int(not self.env.exclude_current_positions_from_observation)), # position
+ # [False] * 6, # velocity
+ # [True]
+ # ])
+
+ # Random x goal + random init pos
def set_active_obs(self):
return np.hstack([
- [False] * (5 + int(not self.env.exclude_current_positions_from_observation)), # position
+ [False] * (2 + int(not self.env.exclude_current_positions_from_observation)), # position
+ [True] * 3, # set to true if randomize initial pos
[False] * 6, # velocity
[True]
])
diff --git a/alr_envs/mp/episodic_wrapper.py b/alr_envs/mp/episodic_wrapper.py
index 7426c15..21fb035 100644
--- a/alr_envs/mp/episodic_wrapper.py
+++ b/alr_envs/mp/episodic_wrapper.py
@@ -205,7 +205,7 @@ class EpisodicWrapper(gym.Env, ABC):
infos['step_actions'] = actions[:t + 1]
infos['step_observations'] = observations[:t + 1]
infos['step_rewards'] = rewards[:t + 1]
- infos['trajectory_length'] = t + 1
+ infos['trajectory_length'] = t + 1
done = True
return self.get_observation_from_step(obs), trajectory_return, done, infos
From 863ef77e5ee178cdc9975c151d05d38fe53cd88e Mon Sep 17 00:00:00 2001
From: Onur
Date: Sun, 29 May 2022 11:59:02 +0200
Subject: [PATCH 031/104] safety
---
alr_envs/alr/mujoco/beerpong/beerpong.py | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py
index 017bfcd..6790523 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@@ -106,6 +106,10 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
return self._get_obs()
def step(self, a):
+ # if a.shape[0] == 8: # we learn also when to release the ball
+ # self._release_step = a[-1]
+ # self._release_step = np.clip(self._release_step, 50, 250)
+ # self.release_step = 0.5/self.dt
reward_dist = 0.0
angular_vel = 0.0
applied_action = a
@@ -172,11 +176,16 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
[self._steps],
])
+ # TODO
+ @property
+ def active_obs(self):
+ return np.hstack([
+ [False] * 7, # cos
+ [False] * 7, # sin
+ [True] * 2, # xy position of cup
+ [False] # env steps
+ ])
-class ALRBeerPongStepEnv(ALRBeerBongEnv):
- def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False,
- rndm_goal=False, cup_goal_pos=None):
- super(ALRBeerPongStepEnv, self).__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
if __name__ == "__main__":
env = ALRBeerBongEnv(rndm_goal=True)
From 3cc1cd1456ad927d1f1838f83766dad466fd9a3c Mon Sep 17 00:00:00 2001
From: Onur
Date: Sun, 29 May 2022 12:15:04 +0200
Subject: [PATCH 032/104] bp frameskip version
---
alr_envs/alr/__init__.py | 10 ++--
alr_envs/alr/mujoco/beerpong/beerpong.py | 57 +++++++++----------
.../mujoco/beerpong/beerpong_reward_staged.py | 8 ++-
3 files changed, 38 insertions(+), 37 deletions(-)
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 9dc31ae..3d42b9c 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -400,10 +400,11 @@ register(id='TableTennis4DCtxt-v0',
register(
id='ALRBeerPong-v0',
entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv',
- max_episode_steps=600,
+ max_episode_steps=150,
kwargs={
"rndm_goal": False,
- "cup_goal_pos": [0.1, -2.0]
+ "cup_goal_pos": [0.1, -2.0],
+ "frameskip": 4
}
)
@@ -412,10 +413,11 @@ register(
register(
id='ALRBeerPong-v1',
entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv',
- max_episode_steps=600,
+ max_episode_steps=150,
kwargs={
"rndm_goal": True,
- "cup_goal_pos": [-0.3, -1.2]
+ "cup_goal_pos": [-0.3, -1.2],
+ "frameskip": 4
}
)
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py
index 6790523..4f7b85f 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@@ -50,7 +50,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
# self._release_step = 130 # time step of ball release
self.release_step = 100 # time step of ball release
- self.ep_length = 600 # based on 3 seconds with dt = 0.005 int(self.sim_time / self.dt)
+ self.ep_length = 600//frame_skip
self.cup_table_id = 10
if noisy:
@@ -59,8 +59,8 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
self.noise_std = 0
reward_function = BeerPongReward
self.reward_function = reward_function()
-
- MujocoEnv.__init__(self, self.xml_path, frame_skip)
+ self.repeat_action = frame_skip
+ MujocoEnv.__init__(self, self.xml_path, frame_skip=1)
utils.EzPickle.__init__(self)
@property
@@ -106,26 +106,26 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
return self._get_obs()
def step(self, a):
- # if a.shape[0] == 8: # we learn also when to release the ball
- # self._release_step = a[-1]
- # self._release_step = np.clip(self._release_step, 50, 250)
- # self.release_step = 0.5/self.dt
reward_dist = 0.0
angular_vel = 0.0
- applied_action = a
- reward_ctrl = - np.square(applied_action).sum()
- if self.apply_gravity_comp:
- applied_action += self.sim.data.qfrc_bias[:len(applied_action)].copy() / self.model.actuator_gear[:, 0]
- try:
- self.do_simulation(applied_action, self.frame_skip)
- if self._steps < self.release_step:
- self.sim.data.qpos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
- self.sim.data.qvel[7::] = self.sim.data.site_xvelp[self.ball_site_id, :].copy()
- elif self._steps == self.release_step and self.add_noise:
- self.sim.data.qvel[7::] += self.noise_std * np.random.randn(3)
- crash = False
- except mujoco_py.builder.MujocoException:
- crash = True
+
+ for _ in range(self.repeat_action):
+ if self.apply_gravity_comp:
+ applied_action = a + self.sim.data.qfrc_bias[:len(a)].copy() / self.model.actuator_gear[:, 0]
+ else:
+ applied_action = a
+ try:
+ self.do_simulation(applied_action, self.frame_skip)
+ self.reward_function.initialize(self)
+ self.reward_function.check_contacts(self.sim)
+ if self._steps < self.release_step:
+ self.sim.data.qpos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
+ self.sim.data.qvel[7::] = self.sim.data.site_xvelp[self.ball_site_id, :].copy()
+ elif self._steps == self.release_step and self.add_noise:
+ self.sim.data.qvel[7::] += self.noise_std * np.random.randn(3)
+ crash = False
+ except mujoco_py.builder.MujocoException:
+ crash = True
# joint_cons_viol = self.check_traj_in_joint_limits()
ob = self._get_obs()
@@ -148,7 +148,6 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
ball_vel = np.zeros(3)
infos = dict(reward_dist=reward_dist,
- reward_ctrl=reward_ctrl,
reward=reward,
velocity=angular_vel,
# traj=self._q_pos,
@@ -176,16 +175,14 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
[self._steps],
])
- # TODO
@property
- def active_obs(self):
- return np.hstack([
- [False] * 7, # cos
- [False] * 7, # sin
- [True] * 2, # xy position of cup
- [False] # env steps
- ])
+ def dt(self):
+ return super(ALRBeerBongEnv, self).dt()*self.repeat_action
+class ALRBeerPongStepEnv(ALRBeerBongEnv):
+ def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False,
+ rndm_goal=False, cup_goal_pos=None):
+ super(ALRBeerPongStepEnv, self).__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
if __name__ == "__main__":
env = ALRBeerBongEnv(rndm_goal=True)
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
index cb75127..3ef8b7b 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
@@ -60,7 +60,7 @@ class BeerPongReward:
self.noisy_bp = noisy
self._t_min_final_dist = -1
- def compute_reward(self, env, action):
+ def initialize(self, env):
if not self.is_initialized:
self.is_initialized = True
@@ -77,12 +77,14 @@ class BeerPongReward:
self.ground_collision_id = env.sim.model._geom_name2id["ground"]
self.robot_collision_ids = [env.sim.model._geom_name2id[name] for name in self.robot_collision_objects]
+ def compute_reward(self, env, action):
+
goal_pos = env.sim.data.site_xpos[self.goal_id]
ball_pos = env.sim.data.body_xpos[self.ball_id]
ball_vel = env.sim.data.body_xvelp[self.ball_id]
goal_final_pos = env.sim.data.site_xpos[self.goal_final_id]
- self._check_contacts(env.sim)
+ self.check_contacts(env.sim)
self.dists.append(np.linalg.norm(goal_pos - ball_pos))
self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
self.dist_ground_cup = np.linalg.norm(ball_pos-goal_pos) \
@@ -137,7 +139,7 @@ class BeerPongReward:
return reward, infos
- def _check_contacts(self, sim):
+ def check_contacts(self, sim):
if not self.ball_table_contact:
self.ball_table_contact = self._check_collision_single_objects(sim, self.ball_collision_id,
self.table_collision_id)
From 9b4a0f89b6dd9ff7fa5a16679cabf146a8994af6 Mon Sep 17 00:00:00 2001
From: Onur
Date: Sun, 29 May 2022 13:32:45 +0200
Subject: [PATCH 033/104] bp frameskip sanity check
---
alr_envs/alr/__init__.py | 8 ++++----
alr_envs/alr/mujoco/beerpong/beerpong.py | 7 +------
alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py | 1 -
3 files changed, 5 insertions(+), 11 deletions(-)
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 3d42b9c..9b0fb2f 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -400,11 +400,11 @@ register(id='TableTennis4DCtxt-v0',
register(
id='ALRBeerPong-v0',
entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv',
- max_episode_steps=150,
+ max_episode_steps=600,
kwargs={
"rndm_goal": False,
"cup_goal_pos": [0.1, -2.0],
- "frameskip": 4
+ "frame_skip": 1
}
)
@@ -413,11 +413,11 @@ register(
register(
id='ALRBeerPong-v1',
entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv',
- max_episode_steps=150,
+ max_episode_steps=600,
kwargs={
"rndm_goal": True,
"cup_goal_pos": [-0.3, -1.2],
- "frameskip": 4
+ "frame_skip": 1
}
)
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py
index 4f7b85f..508cfba 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@@ -177,12 +177,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
@property
def dt(self):
- return super(ALRBeerBongEnv, self).dt()*self.repeat_action
-
-class ALRBeerPongStepEnv(ALRBeerBongEnv):
- def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False,
- rndm_goal=False, cup_goal_pos=None):
- super(ALRBeerPongStepEnv, self).__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
+ return super(ALRBeerBongEnv, self).dt*self.repeat_action
if __name__ == "__main__":
env = ALRBeerBongEnv(rndm_goal=True)
diff --git a/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py b/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
index 9683687..2bdc11a 100644
--- a/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
@@ -39,7 +39,6 @@ class NewMPWrapper(EpisodicWrapper):
def _episode_callback(self, action: np.ndarray) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
if self.mp.learn_tau:
self.env.env.release_step = action[0]/self.env.dt # Tau value
- # self.env.env.release_step = np.clip(action[0]/self.env.dt, 20, 260) # Tau value
return action, None
else:
return action, None
From 2ea7f6a2eda76160b7f283eb539e2dbbd04cddd4 Mon Sep 17 00:00:00 2001
From: Onur
Date: Sun, 29 May 2022 13:57:33 +0200
Subject: [PATCH 034/104] bp_frameskip_dev
---
alr_envs/alr/__init__.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 9b0fb2f..dd45698 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -400,11 +400,11 @@ register(id='TableTennis4DCtxt-v0',
register(
id='ALRBeerPong-v0',
entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv',
- max_episode_steps=600,
+ max_episode_steps=300,
kwargs={
"rndm_goal": False,
"cup_goal_pos": [0.1, -2.0],
- "frame_skip": 1
+ "frame_skip": 2
}
)
@@ -413,11 +413,11 @@ register(
register(
id='ALRBeerPong-v1',
entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv',
- max_episode_steps=600,
+ max_episode_steps=300,
kwargs={
"rndm_goal": True,
"cup_goal_pos": [-0.3, -1.2],
- "frame_skip": 1
+ "frame_skip": 2
}
)
From 59b15e82eaac6113586755808a20af965ecd0a68 Mon Sep 17 00:00:00 2001
From: Onur
Date: Tue, 31 May 2022 19:41:08 +0200
Subject: [PATCH 035/104] bp 2fs seems to work
---
alr_envs/alr/__init__.py | 54 ++++++++++++++++++-
alr_envs/alr/mujoco/__init__.py | 2 +-
alr_envs/alr/mujoco/beerpong/beerpong.py | 2 +-
.../alr/mujoco/hopper_jump/hopper_jump.py | 52 ++++++++++++++----
alr_envs/mp/episodic_wrapper.py | 3 ++
5 files changed, 98 insertions(+), 15 deletions(-)
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index dd45698..8315a09 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -285,6 +285,20 @@ register(
"healthy_reward": 1.0
}
)
+
+##### Hopper Jump step based reward
+register(
+ id='ALRHopperJump-v4',
+ entry_point='alr_envs.alr.mujoco:ALRHopperXYJumpEnvStepBased',
+ max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
+ kwargs={
+ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP,
+ "context": True,
+ "healthy_reward": 1.0
+ }
+)
+
+
# CtxtFree are v0, Contextual are v1
register(
id='ALRHopperJumpOnBox-v0',
@@ -925,7 +939,7 @@ register(
"basis_generator_kwargs": {
'basis_generator_type': 'zero_rbf',
'num_basis': 5,
- 'num_basis_zero_start': 2
+ 'num_basis_zero_start': 1
}
}
)
@@ -961,12 +975,48 @@ register(
"basis_generator_kwargs": {
'basis_generator_type': 'zero_rbf',
'num_basis': 5,
- 'num_basis_zero_start': 2
+ 'num_basis_zero_start': 1
}
}
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v3")
+
+## HopperJump
+register(
+ id= "ALRHopperJumpProMP-v4",
+ entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
+ kwargs={
+ "name": f"alr_envs:ALRHopperJump-v4",
+ "wrappers": [mujoco.hopper_jump.NewMPWrapper],
+ "ep_wrapper_kwargs": {
+ "weight_scale": 1
+ },
+ "movement_primitives_kwargs": {
+ 'movement_primitives_type': 'promp',
+ 'action_dim': 3
+ },
+ "phase_generator_kwargs": {
+ 'phase_generator_type': 'linear',
+ 'delay': 0,
+ 'tau': 2, # initial value
+ 'learn_tau': False,
+ 'learn_delay': False
+ },
+ "controller_kwargs": {
+ 'controller_type': 'motor',
+ "p_gains": np.ones(3),
+ "d_gains": 0.1*np.ones(3),
+ },
+ "basis_generator_kwargs": {
+ 'basis_generator_type': 'zero_rbf',
+ 'num_basis': 5,
+ 'num_basis_zero_start': 1
+ }
+ }
+)
+ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v4")
+
## HopperJumpOnBox
_versions = ["v0", "v1"]
for _v in _versions:
diff --git a/alr_envs/alr/mujoco/__init__.py b/alr_envs/alr/mujoco/__init__.py
index 5c04b03..c02a70f 100644
--- a/alr_envs/alr/mujoco/__init__.py
+++ b/alr_envs/alr/mujoco/__init__.py
@@ -5,7 +5,7 @@ from .table_tennis.tt_gym import TTEnvGym
from .beerpong.beerpong import ALRBeerBongEnv
from .ant_jump.ant_jump import ALRAntJumpEnv
from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
-from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv, ALRHopperXYJumpEnv
+from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv, ALRHopperXYJumpEnv, ALRHopperXYJumpEnvStepBased
from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv
from .hopper_throw.hopper_throw import ALRHopperThrowEnv
from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py
index 508cfba..8846643 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@@ -189,7 +189,7 @@ if __name__ == "__main__":
ac = np.zeros(7)
obs, rew, d, info = env.step(ac)
env.render("human")
-
+ print(env.dt)
print(rew)
if d:
diff --git a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
index 979e224..6bbbb5b 100644
--- a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
+++ b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
@@ -120,7 +120,6 @@ class ALRHopperJumpEnv(HopperEnv):
class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
- # The goal here is the desired x-Position of the Torso
def step(self, action):
@@ -154,20 +153,15 @@ class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
ctrl_cost = self.control_cost(action)
costs = ctrl_cost
done = False
- goal_dist = 0
+ goal_dist = np.atleast_1d(np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0], dtype=object)))[0]
rewards = 0
if self.current_step >= self.max_episode_steps:
# healthy_reward = 0 if self.context else self.healthy_reward * self.current_step
healthy_reward = self.healthy_reward * 2 #* self.current_step
- goal_dist = np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0], dtype=object))[0]
contact_dist = self.contact_dist if self.contact_dist is not None else 5
dist_reward = self._forward_reward_weight * (-3*goal_dist + 10*self.max_height - 2*contact_dist)
rewards = dist_reward + healthy_reward
- # else:
- ## penalty for wrong start direction of first two joints; not needed, could be removed
- # rewards = ((action[:2] > 0) * self.penalty).sum() if self.current_step < 10 else 0
-
observation = self._get_obs()
reward = rewards - costs
info = {
@@ -175,11 +169,10 @@ class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
'x_pos': site_pos_after,
'max_height': self.max_height,
'goal': self.goal,
- 'dist_rew': goal_dist,
+ 'goal_dist': goal_dist,
'height_rew': self.max_height,
'healthy_reward': self.healthy_reward * 2
}
-
return observation, reward, done, info
def reset_model(self):
@@ -213,12 +206,48 @@ class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
def reset(self):
super().reset()
# self.goal = np.random.uniform(-1.5, 1.5, 1)
- self.goal = np.random.uniform(0, 1.5, 1)
+ # self.goal = np.random.uniform(0, 1.5, 1)
+ self.goal = self.np_random.uniform(0, 1.5, 1)
# self.goal = np.array([1.5])
self.sim.model.body_pos[self.sim.model.body_name2id('goal_site_body')] = np.array([self.goal, 0, 0], dtype=object)
return self.reset_model()
+class ALRHopperXYJumpEnvStepBased(ALRHopperXYJumpEnv):
+ def step(self, action):
+
+ self._floor_geom_id = self.model.geom_name2id('floor')
+ self._foot_geom_id = self.model.geom_name2id('foot_geom')
+
+ self.current_step += 1
+ self.do_simulation(action, self.frame_skip)
+ height_after = self.get_body_com("torso")[2]
+ site_pos_after = self.sim.data.site_xpos[self.model.site_name2id('foot_site')].copy()
+ self.max_height = max(height_after, self.max_height)
+ ctrl_cost = self.control_cost(action)
+
+ healthy_reward = self.healthy_reward * 2
+ height_reward = 10*height_after
+ goal_dist = np.atleast_1d(np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0], dtype=object)))[0]
+ goal_dist_reward = -3*goal_dist
+ dist_reward = self._forward_reward_weight * (goal_dist_reward + height_reward)
+ reward = -ctrl_cost + healthy_reward + dist_reward
+ done = False
+ observation = self._get_obs()
+ info = {
+ 'height': height_after,
+ 'x_pos': site_pos_after,
+ 'max_height': self.max_height,
+ 'goal': self.goal,
+ 'goal_dist': goal_dist,
+ 'dist_rew': dist_reward,
+ 'height_rew': height_reward,
+ 'healthy_reward': healthy_reward,
+ 'ctrl_reward': -ctrl_cost
+ }
+
+ return observation, reward, done, info
+
class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
def __init__(self, max_episode_steps=250):
super(ALRHopperJumpRndmPosEnv, self).__init__(exclude_current_positions_from_observation=False,
@@ -293,7 +322,8 @@ class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
if __name__ == '__main__':
render_mode = "human" # "human" or "partial" or "final"
# env = ALRHopperJumpEnv()
- env = ALRHopperXYJumpEnv()
+ # env = ALRHopperXYJumpEnv()
+ env = ALRHopperXYJumpEnvStepBased()
# env = ALRHopperJumpRndmPosEnv()
obs = env.reset()
diff --git a/alr_envs/mp/episodic_wrapper.py b/alr_envs/mp/episodic_wrapper.py
index 21fb035..456c0c9 100644
--- a/alr_envs/mp/episodic_wrapper.py
+++ b/alr_envs/mp/episodic_wrapper.py
@@ -223,6 +223,9 @@ class EpisodicWrapper(gym.Env, ABC):
def get_observation_from_step(self, observation: np.ndarray) -> np.ndarray:
return observation[self.active_obs]
+ def seed(self, seed=None):
+ self.env.seed(seed)
+
def plot_trajs(self, des_trajs, des_vels):
import matplotlib.pyplot as plt
import matplotlib
From 24604e60be365e38304961c0626b8ad8651f85e7 Mon Sep 17 00:00:00 2001
From: Onur
Date: Thu, 2 Jun 2022 09:05:38 +0200
Subject: [PATCH 036/104] bp step based -> release time for PPO
---
alr_envs/alr/__init__.py | 11 ++++++
alr_envs/alr/mujoco/__init__.py | 2 +-
alr_envs/alr/mujoco/beerpong/beerpong.py | 37 +++++++++++++++++--
alr_envs/alr/mujoco/beerpong/mp_wrapper.py | 3 ++
.../alr/mujoco/beerpong/new_mp_wrapper.py | 3 ++
5 files changed, 51 insertions(+), 5 deletions(-)
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 8315a09..978e85c 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -435,6 +435,17 @@ register(
}
)
+# random goal cup position
+register(
+ id='ALRBeerPong-v2',
+ entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvStepBased',
+ max_episode_steps=300,
+ kwargs={
+ "rndm_goal": True,
+ "cup_goal_pos": [-0.3, -1.2],
+ "frame_skip": 2
+ }
+ )
# Motion Primitive Environments
diff --git a/alr_envs/alr/mujoco/__init__.py b/alr_envs/alr/mujoco/__init__.py
index c02a70f..1cde867 100644
--- a/alr_envs/alr/mujoco/__init__.py
+++ b/alr_envs/alr/mujoco/__init__.py
@@ -2,7 +2,7 @@ from .reacher.balancing import BalancingEnv
from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
from .table_tennis.tt_gym import TTEnvGym
-from .beerpong.beerpong import ALRBeerBongEnv
+from .beerpong.beerpong import ALRBeerBongEnv, ALRBeerBongEnvStepBased
from .ant_jump.ant_jump import ALRAntJumpEnv
from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv, ALRHopperXYJumpEnv, ALRHopperXYJumpEnvStepBased
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py
index 8846643..0678da6 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@@ -2,7 +2,7 @@ import mujoco_py.builder
import os
import numpy as np
-from gym import utils
+from gym import utils, spaces
from gym.envs.mujoco import MujocoEnv
from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
@@ -160,7 +160,6 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
is_collided=is_collided, sim_crash=crash,
table_contact_first=int(not self.reward_function.ball_ground_contact_first))
infos.update(reward_infos)
-
return ob, reward, done, infos
def check_traj_in_joint_limits(self):
@@ -168,9 +167,16 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
def _get_obs(self):
theta = self.sim.data.qpos.flat[:7]
+ theta_dot = self.sim.data.qvel.flat[:7]
+ ball_pos = self.sim.data.body_xpos[self.sim.model._body_name2id["ball"]].copy()
+ cup_goal_diff_final = ball_pos - self.sim.data.site_xpos[self.sim.model._site_name2id["cup_goal_final_table"]].copy()
+ cup_goal_diff_top = ball_pos - self.sim.data.site_xpos[self.sim.model._site_name2id["cup_goal_table"]].copy()
return np.concatenate([
np.cos(theta),
np.sin(theta),
+ theta_dot,
+ cup_goal_diff_final,
+ cup_goal_diff_top,
self.sim.model.body_pos[self.cup_table_id][:2].copy(),
[self._steps],
])
@@ -179,14 +185,37 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
def dt(self):
return super(ALRBeerBongEnv, self).dt*self.repeat_action
+
+class ALRBeerBongEnvStepBased(ALRBeerBongEnv):
+
+ def _set_action_space(self):
+ bounds = super(ALRBeerBongEnvStepBased, self)._set_action_space()
+ min_bound = np.concatenate(([-1], bounds.low), dtype=bounds.dtype)
+ max_bound = np.concatenate(([1], bounds.high), dtype=bounds.dtype)
+ self.action_space = spaces.Box(low=min_bound, high=max_bound, dtype=bounds.dtype)
+ return self.action_space
+
+ def step(self, a):
+ self.release_step = self._steps if a[0]>=0 and self.release_step >= self._steps else self.release_step
+ return super(ALRBeerBongEnvStepBased, self).step(a[1:])
+
+ def reset(self):
+ ob = super(ALRBeerBongEnvStepBased, self).reset()
+ self.release_step = self.ep_length + 1
+ return ob
+
if __name__ == "__main__":
- env = ALRBeerBongEnv(rndm_goal=True)
+ # env = ALRBeerBongEnv(rndm_goal=True)
+ env = ALRBeerBongEnvStepBased(rndm_goal=True)
import time
env.reset()
env.render("human")
for i in range(1500):
# ac = 10 * env.action_space.sample()[0:7]
- ac = np.zeros(7)
+ ac = np.zeros(8)
+ ac[0] = -1
+ if env._steps > 150:
+ ac[0] = 1
obs, rew, d, info = env.step(ac)
env.render("human")
print(env.dt)
diff --git a/alr_envs/alr/mujoco/beerpong/mp_wrapper.py b/alr_envs/alr/mujoco/beerpong/mp_wrapper.py
index 11af9a5..022490c 100644
--- a/alr_envs/alr/mujoco/beerpong/mp_wrapper.py
+++ b/alr_envs/alr/mujoco/beerpong/mp_wrapper.py
@@ -12,6 +12,9 @@ class MPWrapper(MPEnvWrapper):
return np.hstack([
[False] * 7, # cos
[False] * 7, # sin
+ [False] * 7, # joint velocities
+ [False] * 3, # cup_goal_diff_final
+ [False] * 3, # cup_goal_diff_top
[True] * 2, # xy position of cup
[False] # env steps
])
diff --git a/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py b/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
index 2bdc11a..2a2d4f9 100644
--- a/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
@@ -17,6 +17,9 @@ class NewMPWrapper(EpisodicWrapper):
return np.hstack([
[False] * 7, # cos
[False] * 7, # sin
+ [False] * 7, # joint velocities
+ [False] * 3, # cup_goal_diff_final
+ [False] * 3, # cup_goal_diff_top
[True] * 2, # xy position of cup
[False] # env steps
])
From 8b8be4b5826c3c59434b76f55ce63c8b79cc07eb Mon Sep 17 00:00:00 2001
From: Onur
Date: Sat, 4 Jun 2022 15:27:20 +0200
Subject: [PATCH 037/104] smaller ctxt range hopper jump + step-based rew for
bp
---
alr_envs/alr/__init__.py | 6 +-
alr_envs/alr/mujoco/beerpong/beerpong.py | 66 ++++++++++++++-----
.../mujoco/beerpong/beerpong_reward_staged.py | 8 ++-
.../alr/mujoco/hopper_jump/hopper_jump.py | 49 ++++++++++----
4 files changed, 93 insertions(+), 36 deletions(-)
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 978e85c..bf2ae74 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -253,7 +253,7 @@ register(
kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP,
"context": False,
- "healthy_rewasrd": 1.0
+ "healthy_reward": 1.0
}
)
register(
@@ -405,7 +405,7 @@ register(id='TableTennis2DCtxt-v1',
kwargs={'ctxt_dim': 2, 'fixed_goal': True})
register(id='TableTennis4DCtxt-v0',
- entry_point='alr_envs.alr.mujoco:TTEnvGym',
+ entry_point='alr_envs.alr.mujocco:TTEnvGym',
max_episode_steps=MAX_EPISODE_STEPS,
kwargs={'ctxt_dim': 4})
@@ -915,7 +915,7 @@ for _v in _versions:
"basis_generator_kwargs": {
'basis_generator_type': 'zero_rbf',
'num_basis': 5,
- 'num_basis_zero_start': 2
+ 'num_basis_zero_start': 1
}
}
)
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py
index 0678da6..c72220b 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@@ -187,35 +187,65 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
class ALRBeerBongEnvStepBased(ALRBeerBongEnv):
+ def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
+ super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
+ self.release_step = 62 # empirically evaluated for frame_skip=2!
- def _set_action_space(self):
- bounds = super(ALRBeerBongEnvStepBased, self)._set_action_space()
- min_bound = np.concatenate(([-1], bounds.low), dtype=bounds.dtype)
- max_bound = np.concatenate(([1], bounds.high), dtype=bounds.dtype)
- self.action_space = spaces.Box(low=min_bound, high=max_bound, dtype=bounds.dtype)
- return self.action_space
+ # def _set_action_space(self):
+ # bounds = super(ALRBeerBongEnvStepBased, self)._set_action_space()
+ # min_bound = np.concatenate(([-1], bounds.low), dtype=bounds.dtype)
+ # max_bound = np.concatenate(([1], bounds.high), dtype=bounds.dtype)
+ # self.action_space = spaces.Box(low=min_bound, high=max_bound, dtype=bounds.dtype)
+ # return self.action_space
+
+ # def step(self, a):
+ # self.release_step = self._steps if a[0]>=0 and self.release_step >= self._steps else self.release_step
+ # return super(ALRBeerBongEnvStepBased, self).step(a[1:])
+ #
+ # def reset(self):
+ # ob = super(ALRBeerBongEnvStepBased, self).reset()
+ # self.release_step = self.ep_length + 1
+ # return ob
def step(self, a):
- self.release_step = self._steps if a[0]>=0 and self.release_step >= self._steps else self.release_step
- return super(ALRBeerBongEnvStepBased, self).step(a[1:])
+ if self._steps < self.release_step:
+ return super(ALRBeerBongEnvStepBased, self).step(a)
+ else:
+ reward = 0
+ done = False
+ while not done:
+ sub_ob, sub_reward, done, sub_infos = super(ALRBeerBongEnvStepBased, self).step(np.zeros(a.shape))
+ if not done or sub_infos['sim_crash']:
+ reward += sub_reward
+ else:
+ ball_pos = self.sim.data.body_xpos[self.sim.model._body_name2id["ball"]].copy()
+ cup_goal_dist_final = np.linalg.norm(ball_pos - self.sim.data.site_xpos[
+ self.sim.model._site_name2id["cup_goal_final_table"]].copy())
+ cup_goal_dist_top = np.linalg.norm(ball_pos - self.sim.data.site_xpos[
+ self.sim.model._site_name2id["cup_goal_table"]].copy())
+ if sub_infos['success']:
+ dist_rew = -cup_goal_dist_final**2
+ else:
+ dist_rew = -0.5*cup_goal_dist_final**2 - cup_goal_dist_top**2
+ reward = reward - sub_infos['action_cost'] + dist_rew
+ infos = sub_infos
+ ob = sub_ob
+ return ob, reward, done, infos
+
- def reset(self):
- ob = super(ALRBeerBongEnvStepBased, self).reset()
- self.release_step = self.ep_length + 1
- return ob
if __name__ == "__main__":
# env = ALRBeerBongEnv(rndm_goal=True)
- env = ALRBeerBongEnvStepBased(rndm_goal=True)
+ env = ALRBeerBongEnvStepBased(frame_skip=2, rndm_goal=True)
import time
env.reset()
env.render("human")
for i in range(1500):
- # ac = 10 * env.action_space.sample()[0:7]
- ac = np.zeros(8)
- ac[0] = -1
- if env._steps > 150:
- ac[0] = 1
+ ac = 10 * env.action_space.sample()
+ # ac = np.zeros(7)
+ # ac[0] = -1
+ # if env._steps > 150:
+ # ac[0] = 1
obs, rew, d, info = env.step(ac)
env.render("human")
print(env.dt)
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
index 3ef8b7b..4629461 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
@@ -90,7 +90,7 @@ class BeerPongReward:
self.dist_ground_cup = np.linalg.norm(ball_pos-goal_pos) \
if self.ball_ground_contact_first and self.dist_ground_cup == -1 else self.dist_ground_cup
action_cost = np.sum(np.square(action))
- self.action_costs.append(action_cost)
+ self.action_costs.append(np.copy(action_cost))
# # ##################### Reward function which does not force to bounce once on the table (quad dist) ############
self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
@@ -110,8 +110,9 @@ class BeerPongReward:
else:
min_dist_coeff, final_dist_coeff, ground_contact_dist_coeff, rew_offset = 0, 1, 0 ,0
# dist_ground_cup = 1 * self.dist_ground_cup
+ action_cost = 1e-4 * np.mean(action_cost)
reward = rew_offset - min_dist_coeff * min_dist ** 2 - final_dist_coeff * final_dist ** 2 - \
- 1e-4 * np.mean(action_cost) - ground_contact_dist_coeff*self.dist_ground_cup ** 2
+ action_cost - ground_contact_dist_coeff*self.dist_ground_cup ** 2
# 1e-7*np.mean(action_cost)
# release step punishment
min_time_bound = 0.1
@@ -124,7 +125,8 @@ class BeerPongReward:
# print('release time :', release_time)
# print('dist_ground_cup :', dist_ground_cup)
else:
- reward = - 1e-2 * action_cost
+ action_cost = 1e-2 * action_cost
+ reward = - action_cost
# reward = - 1e-4 * action_cost
# reward = 0
success = False
diff --git a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
index 6bbbb5b..afedf4e 100644
--- a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
+++ b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
@@ -80,7 +80,8 @@ class ALRHopperJumpEnv(HopperEnv):
'x_pos': site_pos_after,
'max_height': self.max_height,
'height_rew': self.max_height,
- 'healthy_reward': self.healthy_reward * 2
+ 'healthy_reward': self.healthy_reward * 2,
+ 'healthy': self.is_healthy
}
return observation, reward, done, info
@@ -171,7 +172,8 @@ class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
'goal': self.goal,
'goal_dist': goal_dist,
'height_rew': self.max_height,
- 'healthy_reward': self.healthy_reward * 2
+ 'healthy_reward': self.healthy_reward * 2,
+ 'healthy': self.is_healthy
}
return observation, reward, done, info
@@ -207,13 +209,38 @@ class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
super().reset()
# self.goal = np.random.uniform(-1.5, 1.5, 1)
# self.goal = np.random.uniform(0, 1.5, 1)
- self.goal = self.np_random.uniform(0, 1.5, 1)
- # self.goal = np.array([1.5])
+ # self.goal = self.np_random.uniform(0, 1.5, 1)
+ self.goal = self.np_random.uniform(0.3, 1.35, 1)
self.sim.model.body_pos[self.sim.model.body_name2id('goal_site_body')] = np.array([self.goal, 0, 0], dtype=object)
return self.reset_model()
class ALRHopperXYJumpEnvStepBased(ALRHopperXYJumpEnv):
+
+ def __init__(self,
+ xml_file='hopper_jump.xml',
+ forward_reward_weight=1.0,
+ ctrl_cost_weight=1e-3,
+ healthy_reward=0.0,
+ penalty=0.0,
+ context=True,
+ terminate_when_unhealthy=False,
+ healthy_state_range=(-100.0, 100.0),
+ healthy_z_range=(0.5, float('inf')),
+ healthy_angle_range=(-float('inf'), float('inf')),
+ reset_noise_scale=5e-3,
+ exclude_current_positions_from_observation=True,
+ max_episode_steps=250,
+ height_scale = 10,
+ dist_scale = 3,
+ healthy_scale = 2):
+ self.height_scale = height_scale
+ self.dist_scale = dist_scale
+ self.healthy_scale = healthy_scale
+ super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, penalty, context,
+ terminate_when_unhealthy, healthy_state_range, healthy_z_range, healthy_angle_range,
+ reset_noise_scale, exclude_current_positions_from_observation, max_episode_steps)
+
def step(self, action):
self._floor_geom_id = self.model.geom_name2id('floor')
@@ -226,10 +253,10 @@ class ALRHopperXYJumpEnvStepBased(ALRHopperXYJumpEnv):
self.max_height = max(height_after, self.max_height)
ctrl_cost = self.control_cost(action)
- healthy_reward = self.healthy_reward * 2
- height_reward = 10*height_after
+ healthy_reward = self.healthy_reward * self.healthy_scale
+ height_reward = self.height_scale*height_after
goal_dist = np.atleast_1d(np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0], dtype=object)))[0]
- goal_dist_reward = -3*goal_dist
+ goal_dist_reward = -self.dist_scale*goal_dist
dist_reward = self._forward_reward_weight * (goal_dist_reward + height_reward)
reward = -ctrl_cost + healthy_reward + dist_reward
done = False
@@ -240,12 +267,10 @@ class ALRHopperXYJumpEnvStepBased(ALRHopperXYJumpEnv):
'max_height': self.max_height,
'goal': self.goal,
'goal_dist': goal_dist,
- 'dist_rew': dist_reward,
- 'height_rew': height_reward,
- 'healthy_reward': healthy_reward,
- 'ctrl_reward': -ctrl_cost
+ 'height_rew': self.max_height,
+ 'healthy_reward': self.healthy_reward * 2,
+ 'healthy': self.is_healthy
}
-
return observation, reward, done, info
class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
From 719b40c4e48d92dbdd965e7ea4e17b299c12f9f4 Mon Sep 17 00:00:00 2001
From: Onur
Date: Sat, 4 Jun 2022 17:43:35 +0200
Subject: [PATCH 038/104] update bp step based env
---
alr_envs/alr/__init__.py | 11 +++++++++++
alr_envs/alr/mujoco/__init__.py | 2 +-
alr_envs/alr/mujoco/beerpong/beerpong.py | 25 +++++++++++++++++++++++-
3 files changed, 36 insertions(+), 2 deletions(-)
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index bf2ae74..cc9768a 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -446,6 +446,17 @@ register(
"frame_skip": 2
}
)
+# Beerpong with episodic reward, but fixed release time step
+register(
+ id='ALRBeerPong-v3',
+ entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvStepBasedEpisodicReward',
+ max_episode_steps=300,
+ kwargs={
+ "rndm_goal": True,
+ "cup_goal_pos": [-0.3, -1.2],
+ "frame_skip": 2
+ }
+ )
# Motion Primitive Environments
diff --git a/alr_envs/alr/mujoco/__init__.py b/alr_envs/alr/mujoco/__init__.py
index 1cde867..2b19331 100644
--- a/alr_envs/alr/mujoco/__init__.py
+++ b/alr_envs/alr/mujoco/__init__.py
@@ -2,7 +2,7 @@ from .reacher.balancing import BalancingEnv
from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
from .table_tennis.tt_gym import TTEnvGym
-from .beerpong.beerpong import ALRBeerBongEnv, ALRBeerBongEnvStepBased
+from .beerpong.beerpong import ALRBeerBongEnv, ALRBeerBongEnvStepBased, ALRBeerBongEnvStepBasedEpisodicReward
from .ant_jump.ant_jump import ALRAntJumpEnv
from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv, ALRHopperXYJumpEnv, ALRHopperXYJumpEnvStepBased
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py
index c72220b..9028bd0 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@@ -186,6 +186,26 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
return super(ALRBeerBongEnv, self).dt*self.repeat_action
+class ALRBeerBongEnvStepBasedEpisodicReward(ALRBeerBongEnv):
+ def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
+ super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
+ self.release_step = 62 # empirically evaluated for frame_skip=2!
+
+ def step(self, a):
+ if self._steps < self.release_step:
+ return super(ALRBeerBongEnvStepBasedEpisodicReward, self).step(a)
+ else:
+ reward = 0
+ done = False
+ while not done:
+ sub_ob, sub_reward, done, sub_infos = super(ALRBeerBongEnvStepBasedEpisodicReward, self).step(np.zeros(a.shape))
+ reward += sub_reward
+ infos = sub_infos
+ ob = sub_ob
+ ob[-1] = self.release_step + 1 # Since we simulate until the end of the episode, PPO does not see the
+ # internal steps and thus, the observation also needs to be set correctly
+ return ob, reward, done, infos
+
class ALRBeerBongEnvStepBased(ALRBeerBongEnv):
def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
@@ -230,13 +250,16 @@ class ALRBeerBongEnvStepBased(ALRBeerBongEnv):
reward = reward - sub_infos['action_cost'] + dist_rew
infos = sub_infos
ob = sub_ob
+ ob[-1] = self.release_step + 1 # Since we simulate until the end of the episode, PPO does not see the
+ # internal steps and thus, the observation also needs to be set correctly
return ob, reward, done, infos
if __name__ == "__main__":
# env = ALRBeerBongEnv(rndm_goal=True)
- env = ALRBeerBongEnvStepBased(frame_skip=2, rndm_goal=True)
+ # env = ALRBeerBongEnvStepBased(frame_skip=2, rndm_goal=True)
+ env = ALRBeerBongEnvStepBasedEpisodicReward(frame_skip=2, rndm_goal=True)
import time
env.reset()
env.render("human")
From c47845c0dd0bbf2592a4b54156adae61a193a08c Mon Sep 17 00:00:00 2001
From: Onur
Date: Sun, 5 Jun 2022 15:11:07 +0200
Subject: [PATCH 039/104] prepare HJ for PPO
---
alr_envs/alr/__init__.py | 4 +--
.../alr/mujoco/hopper_jump/hopper_jump.py | 27 ++++++++++++-------
.../alr/mujoco/hopper_jump/new_mp_wrapper.py | 16 ++++++-----
3 files changed, 29 insertions(+), 18 deletions(-)
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index cc9768a..d3e976c 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -974,7 +974,7 @@ register(
entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
kwargs={
"name": f"alr_envs:ALRHopperJump-v3",
- "wrappers": [mujoco.hopper_jump.NewMPWrapper],
+ "wrappers": [mujoco.hopper_jump.NewHighCtxtMPWrapper],
"ep_wrapper_kwargs": {
"weight_scale": 1
},
@@ -1010,7 +1010,7 @@ register(
entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
kwargs={
"name": f"alr_envs:ALRHopperJump-v4",
- "wrappers": [mujoco.hopper_jump.NewMPWrapper],
+ "wrappers": [mujoco.hopper_jump.NewHighCtxtMPWrapper],
"ep_wrapper_kwargs": {
"weight_scale": 1
},
diff --git a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
index afedf4e..2759fd4 100644
--- a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
+++ b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
@@ -11,7 +11,7 @@ class ALRHopperJumpEnv(HopperEnv):
- healthy_reward: 1.0 -> 0.1 -> 0
- healthy_angle_range: (-0.2, 0.2) -> (-float('inf'), float('inf'))
- healthy_z_range: (0.7, float('inf')) -> (0.5, float('inf'))
-
+ - exclude current positions from observatiosn is set to False
"""
def __init__(self,
@@ -26,7 +26,7 @@ class ALRHopperJumpEnv(HopperEnv):
healthy_z_range=(0.5, float('inf')),
healthy_angle_range=(-float('inf'), float('inf')),
reset_noise_scale=5e-3,
- exclude_current_positions_from_observation=True,
+ exclude_current_positions_from_observation=False,
max_episode_steps=250):
self.current_step = 0
self.max_height = 0
@@ -90,7 +90,7 @@ class ALRHopperJumpEnv(HopperEnv):
return np.append(super()._get_obs(), self.goal)
def reset(self):
- self.goal = np.random.uniform(1.4, 2.16, 1) # 1.3 2.3
+ self.goal = self.np_random.uniform(1.4, 2.16, 1)[0] # 1.3 2.3
self.max_height = 0
self.current_step = 0
return super().reset()
@@ -149,12 +149,12 @@ class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
if self.contact_dist is None and self.contact_with_floor:
self.contact_dist = np.linalg.norm(self.sim.data.site_xpos[self.model.site_name2id('foot_site')]
- - np.array([self.goal, 0, 0], dtype=object))[0]
+ - np.array([self.goal, 0, 0]))
ctrl_cost = self.control_cost(action)
costs = ctrl_cost
done = False
- goal_dist = np.atleast_1d(np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0], dtype=object)))[0]
+ goal_dist = np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0]))
rewards = 0
if self.current_step >= self.max_episode_steps:
# healthy_reward = 0 if self.context else self.healthy_reward * self.current_step
@@ -210,10 +210,14 @@ class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
# self.goal = np.random.uniform(-1.5, 1.5, 1)
# self.goal = np.random.uniform(0, 1.5, 1)
# self.goal = self.np_random.uniform(0, 1.5, 1)
- self.goal = self.np_random.uniform(0.3, 1.35, 1)
- self.sim.model.body_pos[self.sim.model.body_name2id('goal_site_body')] = np.array([self.goal, 0, 0], dtype=object)
+ self.goal = self.np_random.uniform(0.3, 1.35, 1)[0]
+ self.sim.model.body_pos[self.sim.model.body_name2id('goal_site_body')] = np.array([self.goal, 0, 0])
return self.reset_model()
+ def _get_obs(self):
+ goal_diff = self.sim.data.site_xpos[self.model.site_name2id('foot_site')].copy() \
+ - np.array([self.goal, 0, 0])
+ return np.concatenate((super(ALRHopperXYJumpEnv, self)._get_obs(), goal_diff))
class ALRHopperXYJumpEnvStepBased(ALRHopperXYJumpEnv):
@@ -229,7 +233,7 @@ class ALRHopperXYJumpEnvStepBased(ALRHopperXYJumpEnv):
healthy_z_range=(0.5, float('inf')),
healthy_angle_range=(-float('inf'), float('inf')),
reset_noise_scale=5e-3,
- exclude_current_positions_from_observation=True,
+ exclude_current_positions_from_observation=False,
max_episode_steps=250,
height_scale = 10,
dist_scale = 3,
@@ -242,7 +246,10 @@ class ALRHopperXYJumpEnvStepBased(ALRHopperXYJumpEnv):
reset_noise_scale, exclude_current_positions_from_observation, max_episode_steps)
def step(self, action):
-
+ print("")
+ print('height_scale: ', self.height_scale)
+ print('healthy_scale: ', self.healthy_scale)
+ print('dist_scale: ', self.dist_scale)
self._floor_geom_id = self.model.geom_name2id('floor')
self._foot_geom_id = self.model.geom_name2id('foot_geom')
@@ -348,7 +355,9 @@ if __name__ == '__main__':
render_mode = "human" # "human" or "partial" or "final"
# env = ALRHopperJumpEnv()
# env = ALRHopperXYJumpEnv()
+ np.random.seed(0)
env = ALRHopperXYJumpEnvStepBased()
+ env.seed(0)
# env = ALRHopperJumpRndmPosEnv()
obs = env.reset()
diff --git a/alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py b/alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py
index 31be6be..7a26d62 100644
--- a/alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py
@@ -23,17 +23,19 @@ class NewMPWrapper(EpisodicWrapper):
# Random x goal + random init pos
def set_active_obs(self):
return np.hstack([
- [False] * (2 + int(not self.env.exclude_current_positions_from_observation)), # position
- [True] * 3, # set to true if randomize initial pos
- [False] * 6, # velocity
- [True]
- ])
+ [False] * (2 + int(not self.env.exclude_current_positions_from_observation)), # position
+ [True] * 3, # set to true if randomize initial pos
+ [False] * 6, # velocity
+ [True]
+ ])
class NewHighCtxtMPWrapper(NewMPWrapper):
def set_active_obs(self):
return np.hstack([
- [True] * (5 + int(not self.env.exclude_current_positions_from_observation)), # position
+ [False] * (2 + int(not self.env.exclude_current_positions_from_observation)), # position
+ [True] * 3, # set to true if randomize initial pos
[False] * 6, # velocity
- [False]
+ [True], # goal
+ [False] * 3 # goal diff
])
\ No newline at end of file
From 7bd9848c31f1e377aab54a7ba8cf72bede900d0b Mon Sep 17 00:00:00 2001
From: Onur
Date: Tue, 21 Jun 2022 17:15:01 +0200
Subject: [PATCH 040/104] after deadline
---
alr_envs/alr/__init__.py | 126 ++++++++++++++++++
alr_envs/alr/mujoco/__init__.py | 2 +-
alr_envs/alr/mujoco/beerpong/beerpong.py | 26 +++-
.../alr/mujoco/hopper_jump/hopper_jump.py | 46 +++++--
.../alr/mujoco/hopper_jump/new_mp_wrapper.py | 6 +-
alr_envs/alr/mujoco/reacher/alr_reacher.py | 63 +++++----
alr_envs/alr/mujoco/reacher/mp_wrapper.py | 2 +-
alr_envs/utils/make_env_helpers.py | 1 -
8 files changed, 227 insertions(+), 45 deletions(-)
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index d3e976c..09f533a 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -203,6 +203,34 @@ register(
}
)
+_vs = np.arange(101).tolist() + [1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1]
+for i in _vs:
+ _env_id = f'ALRReacher{i}-v0'
+ register(
+ id=_env_id,
+ entry_point='alr_envs.alr.mujoco:ALRReacherEnv',
+ max_episode_steps=200,
+ kwargs={
+ "steps_before_reward": 0,
+ "n_links": 5,
+ "balance": False,
+ 'ctrl_cost_weight': i
+ }
+ )
+
+ _env_id = f'ALRReacherSparse{i}-v0'
+ register(
+ id=_env_id,
+ entry_point='alr_envs.alr.mujoco:ALRReacherEnv',
+ max_episode_steps=200,
+ kwargs={
+ "steps_before_reward": 200,
+ "n_links": 5,
+ "balance": False,
+ 'ctrl_cost_weight': i
+ }
+ )
+
# CtxtFree are v0, Contextual are v1
register(
id='ALRAntJump-v0',
@@ -458,6 +486,18 @@ register(
}
)
+# Beerpong with episodic reward, but fixed release time step
+register(
+ id='ALRBeerPong-v4',
+ entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvFixedReleaseStep',
+ max_episode_steps=300,
+ kwargs={
+ "rndm_goal": True,
+ "cup_goal_pos": [-0.3, -1.2],
+ "frame_skip": 2
+ }
+ )
+
# Motion Primitive Environments
## Simple Reacher
@@ -648,6 +688,56 @@ for _v in _versions:
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+
+_vs = np.arange(101).tolist() + [1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1]
+for i in _vs:
+ _env_id = f'ALRReacher{i}ProMP-v0'
+ register(
+ id=_env_id,
+ entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+ kwargs={
+ "name": f"alr_envs:{_env_id.replace('ProMP', '')}",
+ "wrappers": [mujoco.reacher.MPWrapper],
+ "mp_kwargs": {
+ "num_dof": 5,
+ "num_basis": 5,
+ "duration": 4,
+ "policy_type": "motor",
+ # "weights_scale": 5,
+ "n_zero_basis": 1,
+ "zero_start": True,
+ "policy_kwargs": {
+ "p_gains": 1,
+ "d_gains": 0.1
+ }
+ }
+ }
+ )
+
+ _env_id = f'ALRReacherSparse{i}ProMP-v0'
+ register(
+ id=_env_id,
+ entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+ kwargs={
+ "name": f"alr_envs:{_env_id.replace('ProMP', '')}",
+ "wrappers": [mujoco.reacher.MPWrapper],
+ "mp_kwargs": {
+ "num_dof": 5,
+ "num_basis": 5,
+ "duration": 4,
+ "policy_type": "motor",
+ # "weights_scale": 5,
+ "n_zero_basis": 1,
+ "zero_start": True,
+ "policy_kwargs": {
+ "p_gains": 1,
+ "d_gains": 0.1
+ }
+ }
+ }
+ )
+
+
# ## Beerpong
# _versions = ["v0", "v1"]
# for _v in _versions:
@@ -717,6 +807,42 @@ for _v in _versions:
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+## Beerpong ProMP fixed release
+_env_id = 'BeerpongProMP-v2'
+register(
+ id=_env_id,
+ entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
+ kwargs={
+ "name": "alr_envs:ALRBeerPong-v4",
+ "wrappers": [mujoco.beerpong.NewMPWrapper],
+ "ep_wrapper_kwargs": {
+ "weight_scale": 1
+ },
+ "movement_primitives_kwargs": {
+ 'movement_primitives_type': 'promp',
+ 'action_dim': 7
+ },
+ "phase_generator_kwargs": {
+ 'phase_generator_type': 'linear',
+ 'delay': 0,
+ 'tau': 0.62, # initial value
+ 'learn_tau': False,
+ 'learn_delay': False
+ },
+ "controller_kwargs": {
+ 'controller_type': 'motor',
+ "p_gains": np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]),
+ "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]),
+ },
+ "basis_generator_kwargs": {
+ 'basis_generator_type': 'zero_rbf',
+ 'num_basis': 2,
+ 'num_basis_zero_start': 2
+ }
+ }
+)
+ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+
## Table Tennis
ctxt_dim = [2, 4]
for _v, cd in enumerate(ctxt_dim):
diff --git a/alr_envs/alr/mujoco/__init__.py b/alr_envs/alr/mujoco/__init__.py
index 2b19331..2885321 100644
--- a/alr_envs/alr/mujoco/__init__.py
+++ b/alr_envs/alr/mujoco/__init__.py
@@ -2,7 +2,7 @@ from .reacher.balancing import BalancingEnv
from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
from .table_tennis.tt_gym import TTEnvGym
-from .beerpong.beerpong import ALRBeerBongEnv, ALRBeerBongEnvStepBased, ALRBeerBongEnvStepBasedEpisodicReward
+from .beerpong.beerpong import ALRBeerBongEnv, ALRBeerBongEnvStepBased, ALRBeerBongEnvStepBasedEpisodicReward, ALRBeerBongEnvFixedReleaseStep
from .ant_jump.ant_jump import ALRAntJumpEnv
from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv, ALRHopperXYJumpEnv, ALRHopperXYJumpEnvStepBased
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py
index 9028bd0..5040ec7 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@@ -185,6 +185,10 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
def dt(self):
return super(ALRBeerBongEnv, self).dt*self.repeat_action
+class ALRBeerBongEnvFixedReleaseStep(ALRBeerBongEnv):
+ def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
+ super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
+ self.release_step = 62 # empirically evaluated for frame_skip=2!
class ALRBeerBongEnvStepBasedEpisodicReward(ALRBeerBongEnv):
def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
@@ -206,6 +210,25 @@ class ALRBeerBongEnvStepBasedEpisodicReward(ALRBeerBongEnv):
# internal steps and thus, the observation also needs to be set correctly
return ob, reward, done, infos
+
+# class ALRBeerBongEnvStepBasedEpisodicReward(ALRBeerBongEnv):
+# def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
+# super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
+# self.release_step = 62 # empirically evaluated for frame_skip=2!
+#
+# def step(self, a):
+# if self._steps < self.release_step:
+# return super(ALRBeerBongEnvStepBasedEpisodicReward, self).step(a)
+# else:
+# sub_ob, sub_reward, done, sub_infos = super(ALRBeerBongEnvStepBasedEpisodicReward, self).step(np.zeros(a.shape))
+# reward = sub_reward
+# infos = sub_infos
+# ob = sub_ob
+# ob[-1] = self.release_step + 1 # Since we simulate until the end of the episode, PPO does not see the
+# # internal steps and thus, the observation also needs to be set correctly
+# return ob, reward, done, infos
+
+
class ALRBeerBongEnvStepBased(ALRBeerBongEnv):
def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
@@ -259,7 +282,8 @@ class ALRBeerBongEnvStepBased(ALRBeerBongEnv):
if __name__ == "__main__":
# env = ALRBeerBongEnv(rndm_goal=True)
# env = ALRBeerBongEnvStepBased(frame_skip=2, rndm_goal=True)
- env = ALRBeerBongEnvStepBasedEpisodicReward(frame_skip=2, rndm_goal=True)
+ # env = ALRBeerBongEnvStepBasedEpisodicReward(frame_skip=2, rndm_goal=True)
+ env = ALRBeerBongEnvFixedReleaseStep(frame_skip=2, rndm_goal=True)
import time
env.reset()
env.render("human")
diff --git a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
index 2759fd4..20ede40 100644
--- a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
+++ b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
@@ -123,7 +123,6 @@ class ALRHopperJumpEnv(HopperEnv):
class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
def step(self, action):
-
self._floor_geom_id = self.model.geom_name2id('floor')
self._foot_geom_id = self.model.geom_name2id('foot_geom')
@@ -173,7 +172,8 @@ class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
'goal_dist': goal_dist,
'height_rew': self.max_height,
'healthy_reward': self.healthy_reward * 2,
- 'healthy': self.is_healthy
+ 'healthy': self.is_healthy,
+ 'contact_dist': self.contact_dist if self.contact_dist is not None else 0
}
return observation, reward, done, info
@@ -194,7 +194,6 @@ class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
rnd_vec = self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
qpos = self.init_qpos + rnd_vec
qvel = self.init_qvel
-
self.set_state(qpos, qvel)
observation = self._get_obs()
@@ -207,9 +206,6 @@ class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
def reset(self):
super().reset()
- # self.goal = np.random.uniform(-1.5, 1.5, 1)
- # self.goal = np.random.uniform(0, 1.5, 1)
- # self.goal = self.np_random.uniform(0, 1.5, 1)
self.goal = self.np_random.uniform(0.3, 1.35, 1)[0]
self.sim.model.body_pos[self.sim.model.body_name2id('goal_site_body')] = np.array([self.goal, 0, 0])
return self.reset_model()
@@ -219,6 +215,16 @@ class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
- np.array([self.goal, 0, 0])
return np.concatenate((super(ALRHopperXYJumpEnv, self)._get_obs(), goal_diff))
+ def set_context(self, context):
+ # context is 4 dimensional
+ qpos = self.init_qpos
+ qvel = self.init_qvel
+ qpos[-3:] = context[:3]
+ self.goal = context[-1]
+ self.set_state(qpos, qvel)
+ self.sim.model.body_pos[self.sim.model.body_name2id('goal_site_body')] = np.array([self.goal, 0, 0])
+ return self._get_obs()
+
class ALRHopperXYJumpEnvStepBased(ALRHopperXYJumpEnv):
def __init__(self,
@@ -246,10 +252,6 @@ class ALRHopperXYJumpEnvStepBased(ALRHopperXYJumpEnv):
reset_noise_scale, exclude_current_positions_from_observation, max_episode_steps)
def step(self, action):
- print("")
- print('height_scale: ', self.height_scale)
- print('healthy_scale: ', self.healthy_scale)
- print('dist_scale: ', self.dist_scale)
self._floor_geom_id = self.model.geom_name2id('floor')
self._foot_geom_id = self.model.geom_name2id('foot_geom')
@@ -268,6 +270,23 @@ class ALRHopperXYJumpEnvStepBased(ALRHopperXYJumpEnv):
reward = -ctrl_cost + healthy_reward + dist_reward
done = False
observation = self._get_obs()
+
+
+ ###########################################################
+ # This is only for logging the distance to goal when first having the contact
+ ##########################################################
+ floor_contact = self._contact_checker(self._floor_geom_id,
+ self._foot_geom_id) if not self.contact_with_floor else False
+ if not self.init_floor_contact:
+ self.init_floor_contact = floor_contact
+ if self.init_floor_contact and not self.has_left_floor:
+ self.has_left_floor = not floor_contact
+ if not self.contact_with_floor and self.has_left_floor:
+ self.contact_with_floor = floor_contact
+
+ if self.contact_dist is None and self.contact_with_floor:
+ self.contact_dist = np.linalg.norm(self.sim.data.site_xpos[self.model.site_name2id('foot_site')]
+ - np.array([self.goal, 0, 0]))
info = {
'height': height_after,
'x_pos': site_pos_after,
@@ -275,8 +294,9 @@ class ALRHopperXYJumpEnvStepBased(ALRHopperXYJumpEnv):
'goal': self.goal,
'goal_dist': goal_dist,
'height_rew': self.max_height,
- 'healthy_reward': self.healthy_reward * 2,
- 'healthy': self.is_healthy
+ 'healthy_reward': self.healthy_reward * self.healthy_reward,
+ 'healthy': self.is_healthy,
+ 'contact_dist': self.contact_dist if self.contact_dist is not None else 0
}
return observation, reward, done, info
@@ -361,7 +381,7 @@ if __name__ == '__main__':
# env = ALRHopperJumpRndmPosEnv()
obs = env.reset()
- for k in range(10):
+ for k in range(1000):
obs = env.reset()
print('observation :', obs[:])
for i in range(200):
diff --git a/alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py b/alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py
index 7a26d62..77a1bf6 100644
--- a/alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py
@@ -38,4 +38,8 @@ class NewHighCtxtMPWrapper(NewMPWrapper):
[False] * 6, # velocity
[True], # goal
[False] * 3 # goal diff
- ])
\ No newline at end of file
+ ])
+
+ def set_context(self, context):
+ return self.get_observation_from_step(self.env.env.set_context(context))
+
diff --git a/alr_envs/alr/mujoco/reacher/alr_reacher.py b/alr_envs/alr/mujoco/reacher/alr_reacher.py
index 6b36407..c12352a 100644
--- a/alr_envs/alr/mujoco/reacher/alr_reacher.py
+++ b/alr_envs/alr/mujoco/reacher/alr_reacher.py
@@ -8,7 +8,8 @@ import alr_envs.utils.utils as alr_utils
class ALRReacherEnv(MujocoEnv, utils.EzPickle):
- def __init__(self, steps_before_reward=200, n_links=5, balance=False):
+ def __init__(self, steps_before_reward: int = 200, n_links: int = 5, ctrl_cost_weight: int = 1,
+ balance: bool = False):
utils.EzPickle.__init__(**locals())
self._steps = 0
@@ -17,6 +18,7 @@ class ALRReacherEnv(MujocoEnv, utils.EzPickle):
self.balance = balance
self.balance_weight = 1.0
+ self.ctrl_cost_weight = ctrl_cost_weight
self.reward_weight = 1
if steps_before_reward == 200:
@@ -40,7 +42,7 @@ class ALRReacherEnv(MujocoEnv, utils.EzPickle):
angular_vel = 0.0
reward_balance = 0.0
is_delayed = self.steps_before_reward > 0
- reward_ctrl = - np.square(a).sum()
+ reward_ctrl = - np.square(a).sum() * self.ctrl_cost_weight
if self._steps >= self.steps_before_reward:
vec = self.get_body_com("fingertip") - self.get_body_com("target")
reward_dist -= self.reward_weight * np.linalg.norm(vec)
@@ -48,9 +50,9 @@ class ALRReacherEnv(MujocoEnv, utils.EzPickle):
# avoid giving this penalty for normal step based case
# angular_vel -= 10 * np.linalg.norm(self.sim.data.qvel.flat[:self.n_links])
angular_vel -= 10 * np.square(self.sim.data.qvel.flat[:self.n_links]).sum()
- if is_delayed:
- # Higher control penalty for sparse reward per timestep
- reward_ctrl *= 10
+ # if is_delayed:
+ # # Higher control penalty for sparse reward per timestep
+ # reward_ctrl *= 10
if self.balance:
reward_balance -= self.balance_weight * np.abs(
@@ -68,35 +70,42 @@ class ALRReacherEnv(MujocoEnv, utils.EzPickle):
def viewer_setup(self):
self.viewer.cam.trackbodyid = 0
- def reset_model(self):
- qpos = self.init_qpos
- if not hasattr(self, "goal"):
- self.goal = np.array([-0.25, 0.25])
- # self.goal = self.init_qpos.copy()[:2] + 0.05
- qpos[-2:] = self.goal
- qvel = self.init_qvel
- qvel[-2:] = 0
- self.set_state(qpos, qvel)
- self._steps = 0
-
- return self._get_obs()
-
# def reset_model(self):
- # qpos = self.init_qpos.copy()
- # while True:
- # self.goal = self.np_random.uniform(low=-self.n_links / 10, high=self.n_links / 10, size=2)
- # # self.goal = self.np_random.uniform(low=0, high=self.n_links / 10, size=2)
- # # self.goal = np.random.uniform(low=[-self.n_links / 10, 0], high=[0, self.n_links / 10], size=2)
- # if np.linalg.norm(self.goal) < self.n_links / 10:
- # break
+ # qpos = self.init_qpos
+ # if not hasattr(self, "goal"):
+ # self.goal = np.array([-0.25, 0.25])
+ # # self.goal = self.init_qpos.copy()[:2] + 0.05
# qpos[-2:] = self.goal
- # qvel = self.init_qvel.copy()
+ # qvel = self.init_qvel
# qvel[-2:] = 0
# self.set_state(qpos, qvel)
# self._steps = 0
#
# return self._get_obs()
+ def reset_model(self):
+ qpos = self.init_qpos.copy()
+ while True:
+ # full space
+ # self.goal = self.np_random.uniform(low=-self.n_links / 10, high=self.n_links / 10, size=2)
+ # I Quadrant
+ # self.goal = self.np_random.uniform(low=0, high=self.n_links / 10, size=2)
+ # II Quadrant
+ # self.goal = np.random.uniform(low=[-self.n_links / 10, 0], high=[0, self.n_links / 10], size=2)
+ # II + III Quadrant
+ # self.goal = np.random.uniform(low=-self.n_links / 10, high=[0, self.n_links / 10], size=2)
+ # I + II Quadrant
+ self.goal = np.random.uniform(low=[-self.n_links / 10, 0], high=self.n_links, size=2)
+ if np.linalg.norm(self.goal) < self.n_links / 10:
+ break
+ qpos[-2:] = self.goal
+ qvel = self.init_qvel.copy()
+ qvel[-2:] = 0
+ self.set_state(qpos, qvel)
+ self._steps = 0
+
+ return self._get_obs()
+
# def reset_model(self):
# qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
# while True:
@@ -140,4 +149,4 @@ if __name__ == '__main__':
if d:
env.reset()
- env.close()
+ env.close()
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/reacher/mp_wrapper.py b/alr_envs/alr/mujoco/reacher/mp_wrapper.py
index abcdc50..3b655d4 100644
--- a/alr_envs/alr/mujoco/reacher/mp_wrapper.py
+++ b/alr_envs/alr/mujoco/reacher/mp_wrapper.py
@@ -40,4 +40,4 @@ class MPWrapper(MPEnvWrapper):
@property
def dt(self) -> Union[float, int]:
- return self.env.dt
+ return self.env.dt
\ No newline at end of file
diff --git a/alr_envs/utils/make_env_helpers.py b/alr_envs/utils/make_env_helpers.py
index f22e0d7..301a5aa 100644
--- a/alr_envs/utils/make_env_helpers.py
+++ b/alr_envs/utils/make_env_helpers.py
@@ -98,7 +98,6 @@ def make(env_id: str, seed, **kwargs):
return env
-
def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], mp: MPInterface, controller: BaseController,
ep_wrapper_kwargs: Mapping, seed=1, **kwargs):
"""
From 9ad6fbe712e66e4815adc22357132aeb3d4c28e5 Mon Sep 17 00:00:00 2001
From: Fabian Otto
Date: Tue, 28 Jun 2022 16:05:09 +0200
Subject: [PATCH 041/104] first clean up and some non working ideas sketched
---
README.md | 38 +++---
.../alr/mujoco/ball_in_a_cup/ball_in_a_cup.py | 15 +--
.../ball_in_a_cup_reward_simple.py | 2 +-
alr_envs/alr/mujoco/beerpong/beerpong.py | 114 ++++++----------
.../mujoco/beerpong/beerpong_reward_staged.py | 48 +++----
.../alr/mujoco/beerpong/new_mp_wrapper.py | 42 ++----
.../alr/mujoco/hopper_jump/hopper_jump.py | 123 +++++++++---------
alr_envs/mp/episodic_wrapper.py | 67 ++++++----
alr_envs/utils/__init__.py | 9 +-
alr_envs/utils/make_env_helpers.py | 27 ++--
10 files changed, 225 insertions(+), 260 deletions(-)
diff --git a/README.md b/README.md
index edd1aac..c08a1d4 100644
--- a/README.md
+++ b/README.md
@@ -1,26 +1,32 @@
## ALR Robotics Control Environments
This project offers a large variety of reinforcement learning environments under the unifying interface of [OpenAI gym](https://gym.openai.com/).
-Besides, we also provide support (under the OpenAI interface) for the benchmark suites
+We provide support (under the OpenAI interface) for the benchmark suites
[DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control)
-(DMC) and [Metaworld](https://meta-world.github.io/). Custom (Mujoco) gym environments can be created according
-to [this guide](https://github.com/openai/gym/blob/master/docs/creating-environments.md). Unlike existing libraries, we
-additionally support to control agents with Dynamic Movement Primitives (DMPs) and Probabilistic Movement Primitives (ProMP,
-we only consider the mean usually).
+(DMC) and [Metaworld](https://meta-world.github.io/).
+Custom (Mujoco) gym environments can be created according
+to [this guide](https://www.gymlibrary.ml/content/environment_creation/).
+Unlike existing libraries, we additionally support to control agents with movement primitives, such as
+Dynamic Movement Primitives (DMPs) and Probabilistic Movement Primitives (ProMP, we only consider the mean usually).
-## Motion Primitive Environments (Episodic environments)
+## Movement Primitive Environments (Episode-Based/Black-Box Environments)
-Unlike step-based environments, motion primitive (MP) environments are closer related to stochastic search, black box
-optimization, and methods that are often used in robotics. MP environments are trajectory-based and always execute a full
-trajectory, which is generated by a Dynamic Motion Primitive (DMP) or a Probabilistic Motion Primitive (ProMP). The
-generated trajectory is translated into individual step-wise actions by a controller. The exact choice of controller is,
-however, dependent on the type of environment. We currently support position, velocity, and PD-Controllers for position,
-velocity, and torque control, respectively. The goal of all MP environments is still to learn a policy. Yet, an action
+Unlike step-based environments, movement primitive (MP) environments are closer related to stochastic search, black-box
+optimization, and methods that are often used in traditional robotics and control.
+MP environments are episode-based and always execute a full trajectory, which is generated by a trajectory generator,
+such as a Dynamic Movement Primitive (DMP) or a Probabilistic Movement Primitive (ProMP).
+The generated trajectory is translated into individual step-wise actions by a trajectory tracking controller.
+The exact choice of controller is, however, dependent on the type of environment.
+We currently support position, velocity, and PD-Controllers for position, velocity, and torque control, respectively
+as well as a special controller for the MetaWorld control suite.
+The goal of all MP environments is still to learn a optimal policy. Yet, an action
represents the parametrization of the motion primitives to generate a suitable trajectory. Additionally, in this
-framework we support all of this also for the contextual setting, for which we expose all changing substates of the
-task as a single observation in the beginning. This requires to predict a new action/MP parametrization for each
-trajectory. All environments provide next to the cumulative episode reward all collected information from each
-step as part of the info dictionary. This information should, however, mainly be used for debugging and logging.
+framework we support all of this also for the contextual setting, i.e. we expose a subset of the observation space
+as a single context in the beginning of the episode. This requires to predict a new action/MP parametrization for each
+context.
+All environments provide next to the cumulative episode reward all collected information from each
+step as part of the info dictionary. This information is, however, mainly meant for debugging as well as logging
+and not for training.
|Key| Description|
|---|---|
diff --git a/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup.py b/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup.py
index 7cff670..7b286bc 100644
--- a/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup.py
+++ b/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup.py
@@ -4,10 +4,11 @@ import numpy as np
from gym.envs.mujoco import MujocoEnv
-
class ALRBallInACupEnv(MujocoEnv, utils.EzPickle):
- def __init__(self, n_substeps=4, apply_gravity_comp=True, simplified: bool = False,
- reward_type: str = None, context: np.ndarray = None):
+ def __init__(
+ self, n_substeps=4, apply_gravity_comp=True, simplified: bool = False,
+ reward_type: str = None, context: np.ndarray = None
+ ):
utils.EzPickle.__init__(**locals())
self._steps = 0
@@ -23,9 +24,7 @@ class ALRBallInACupEnv(MujocoEnv, utils.EzPickle):
self.context = context
- alr_mujoco_env.AlrMujocoEnv.__init__(self,
- self.xml_path,
- apply_gravity_comp=apply_gravity_comp,
+ alr_mujoco_env.AlrMujocoEnv.__init__(self, self.xml_path, apply_gravity_comp=apply_gravity_comp,
n_substeps=n_substeps)
self._start_pos = np.array([0.0, 0.58760536, 0.0, 1.36004913, 0.0, -0.32072943, -1.57])
self._start_vel = np.zeros(7)
@@ -129,7 +128,7 @@ class ALRBallInACupEnv(MujocoEnv, utils.EzPickle):
np.sin(theta),
# self.get_body_com("target"), # only return target to make problem harder
[self._steps],
- ])
+ ])
# TODO
@property
@@ -139,7 +138,7 @@ class ALRBallInACupEnv(MujocoEnv, utils.EzPickle):
[False] * 7, # sin
# [True] * 2, # x-y coordinates of target distance
[False] # env steps
- ])
+ ])
# These functions are for the task with 3 joint actuations
def extend_des_pos(self, des_pos):
diff --git a/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py b/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py
index a147d89..0762e22 100644
--- a/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py
+++ b/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py
@@ -67,7 +67,7 @@ class BallInACupReward(alr_reward_fct.AlrReward):
action_cost = np.sum(np.square(action))
self.action_costs.append(action_cost)
- self._is_collided = self.check_collision(self.env.sim) or self.env.check_traj_in_joint_limits()
+ self._is_collided = self.check_collision(self.env.sim) or self.env._check_traj_in_joint_limits()
if self.env._steps == self.env.ep_length - 1 or self._is_collided:
t_min_dist = np.argmin(self.dists)
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py
index 5040ec7..b7d376e 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@@ -1,11 +1,11 @@
-import mujoco_py.builder
import os
+import mujoco_py.builder
import numpy as np
-from gym import utils, spaces
+from gym import utils
from gym.envs.mujoco import MujocoEnv
-from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
+from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
CUP_POS_MIN = np.array([-1.42, -4.05])
CUP_POS_MAX = np.array([1.42, -1.25])
@@ -18,12 +18,14 @@ CUP_POS_MAX = np.array([1.42, -1.25])
# CUP_POS_MIN = np.array([-0.16, -2.2])
# CUP_POS_MAX = np.array([0.16, -1.7])
-
class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
- def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False,
- rndm_goal=False, cup_goal_pos=None):
+ def __init__(
+ self, frame_skip=1, apply_gravity_comp=True, noisy=False,
+ rndm_goal=False, cup_goal_pos=None
+ ):
+
cup_goal_pos = np.array(cup_goal_pos if cup_goal_pos is not None else [-0.3, -1.2, 0.840])
- if cup_goal_pos.shape[0]==2:
+ if cup_goal_pos.shape[0] == 2:
cup_goal_pos = np.insert(cup_goal_pos, 2, 0.840)
self.cup_goal_pos = np.array(cup_goal_pos)
@@ -50,7 +52,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
# self._release_step = 130 # time step of ball release
self.release_step = 100 # time step of ball release
- self.ep_length = 600//frame_skip
+ self.ep_length = 600 // frame_skip
self.cup_table_id = 10
if noisy:
@@ -71,14 +73,6 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
def start_vel(self):
return self._start_vel
- @property
- def current_pos(self):
- return self.sim.data.qpos[0:7].copy()
-
- @property
- def current_vel(self):
- return self.sim.data.qvel[0:7].copy()
-
def reset(self):
self.reward_function.reset(self.add_noise)
return super().reset()
@@ -122,7 +116,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
self.sim.data.qpos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
self.sim.data.qvel[7::] = self.sim.data.site_xvelp[self.ball_site_id, :].copy()
elif self._steps == self.release_step and self.add_noise:
- self.sim.data.qvel[7::] += self.noise_std * np.random.randn(3)
+ self.sim.data.qvel[7::] += self.noise_std * np.random.randn(3)
crash = False
except mujoco_py.builder.MujocoException:
crash = True
@@ -147,29 +141,32 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
ball_pos = np.zeros(3)
ball_vel = np.zeros(3)
- infos = dict(reward_dist=reward_dist,
- reward=reward,
- velocity=angular_vel,
- # traj=self._q_pos,
- action=a,
- q_pos=self.sim.data.qpos[0:7].ravel().copy(),
- q_vel=self.sim.data.qvel[0:7].ravel().copy(),
- ball_pos=ball_pos,
- ball_vel=ball_vel,
- success=success,
- is_collided=is_collided, sim_crash=crash,
- table_contact_first=int(not self.reward_function.ball_ground_contact_first))
+ infos = dict(
+ reward_dist=reward_dist,
+ reward=reward,
+ velocity=angular_vel,
+ # traj=self._q_pos,
+ action=a,
+ q_pos=self.sim.data.qpos[0:7].ravel().copy(),
+ q_vel=self.sim.data.qvel[0:7].ravel().copy(),
+ ball_pos=ball_pos,
+ ball_vel=ball_vel,
+ success=success,
+ is_collided=is_collided, sim_crash=crash,
+ table_contact_first=int(not self.reward_function.ball_ground_contact_first)
+ )
infos.update(reward_infos)
return ob, reward, done, infos
- def check_traj_in_joint_limits(self):
+ def _check_traj_in_joint_limits(self):
return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)
def _get_obs(self):
theta = self.sim.data.qpos.flat[:7]
theta_dot = self.sim.data.qvel.flat[:7]
ball_pos = self.sim.data.body_xpos[self.sim.model._body_name2id["ball"]].copy()
- cup_goal_diff_final = ball_pos - self.sim.data.site_xpos[self.sim.model._site_name2id["cup_goal_final_table"]].copy()
+ cup_goal_diff_final = ball_pos - self.sim.data.site_xpos[
+ self.sim.model._site_name2id["cup_goal_final_table"]].copy()
cup_goal_diff_top = ball_pos - self.sim.data.site_xpos[self.sim.model._site_name2id["cup_goal_table"]].copy()
return np.concatenate([
np.cos(theta),
@@ -179,17 +176,21 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
cup_goal_diff_top,
self.sim.model.body_pos[self.cup_table_id][:2].copy(),
[self._steps],
- ])
+ ])
+
+ def compute_reward(self):
@property
def dt(self):
- return super(ALRBeerBongEnv, self).dt*self.repeat_action
+ return super(ALRBeerBongEnv, self).dt * self.repeat_action
+
class ALRBeerBongEnvFixedReleaseStep(ALRBeerBongEnv):
def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
self.release_step = 62 # empirically evaluated for frame_skip=2!
+
class ALRBeerBongEnvStepBasedEpisodicReward(ALRBeerBongEnv):
def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
@@ -202,54 +203,21 @@ class ALRBeerBongEnvStepBasedEpisodicReward(ALRBeerBongEnv):
reward = 0
done = False
while not done:
- sub_ob, sub_reward, done, sub_infos = super(ALRBeerBongEnvStepBasedEpisodicReward, self).step(np.zeros(a.shape))
+ sub_ob, sub_reward, done, sub_infos = super(ALRBeerBongEnvStepBasedEpisodicReward, self).step(
+ np.zeros(a.shape))
reward += sub_reward
infos = sub_infos
ob = sub_ob
- ob[-1] = self.release_step + 1 # Since we simulate until the end of the episode, PPO does not see the
- # internal steps and thus, the observation also needs to be set correctly
+ ob[-1] = self.release_step + 1 # Since we simulate until the end of the episode, PPO does not see the
+ # internal steps and thus, the observation also needs to be set correctly
return ob, reward, done, infos
-# class ALRBeerBongEnvStepBasedEpisodicReward(ALRBeerBongEnv):
-# def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
-# super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
-# self.release_step = 62 # empirically evaluated for frame_skip=2!
-#
-# def step(self, a):
-# if self._steps < self.release_step:
-# return super(ALRBeerBongEnvStepBasedEpisodicReward, self).step(a)
-# else:
-# sub_ob, sub_reward, done, sub_infos = super(ALRBeerBongEnvStepBasedEpisodicReward, self).step(np.zeros(a.shape))
-# reward = sub_reward
-# infos = sub_infos
-# ob = sub_ob
-# ob[-1] = self.release_step + 1 # Since we simulate until the end of the episode, PPO does not see the
-# # internal steps and thus, the observation also needs to be set correctly
-# return ob, reward, done, infos
-
-
class ALRBeerBongEnvStepBased(ALRBeerBongEnv):
def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
self.release_step = 62 # empirically evaluated for frame_skip=2!
- # def _set_action_space(self):
- # bounds = super(ALRBeerBongEnvStepBased, self)._set_action_space()
- # min_bound = np.concatenate(([-1], bounds.low), dtype=bounds.dtype)
- # max_bound = np.concatenate(([1], bounds.high), dtype=bounds.dtype)
- # self.action_space = spaces.Box(low=min_bound, high=max_bound, dtype=bounds.dtype)
- # return self.action_space
-
- # def step(self, a):
- # self.release_step = self._steps if a[0]>=0 and self.release_step >= self._steps else self.release_step
- # return super(ALRBeerBongEnvStepBased, self).step(a[1:])
- #
- # def reset(self):
- # ob = super(ALRBeerBongEnvStepBased, self).reset()
- # self.release_step = self.ep_length + 1
- # return ob
-
def step(self, a):
if self._steps < self.release_step:
return super(ALRBeerBongEnvStepBased, self).step(a)
@@ -267,9 +235,9 @@ class ALRBeerBongEnvStepBased(ALRBeerBongEnv):
cup_goal_dist_top = np.linalg.norm(ball_pos - self.sim.data.site_xpos[
self.sim.model._site_name2id["cup_goal_table"]].copy())
if sub_infos['success']:
- dist_rew = -cup_goal_dist_final**2
+ dist_rew = -cup_goal_dist_final ** 2
else:
- dist_rew = -0.5*cup_goal_dist_final**2 - cup_goal_dist_top**2
+ dist_rew = -0.5 * cup_goal_dist_final ** 2 - cup_goal_dist_top ** 2
reward = reward - sub_infos['action_cost'] + dist_rew
infos = sub_infos
ob = sub_ob
@@ -278,13 +246,13 @@ class ALRBeerBongEnvStepBased(ALRBeerBongEnv):
return ob, reward, done, infos
-
if __name__ == "__main__":
# env = ALRBeerBongEnv(rndm_goal=True)
# env = ALRBeerBongEnvStepBased(frame_skip=2, rndm_goal=True)
# env = ALRBeerBongEnvStepBasedEpisodicReward(frame_skip=2, rndm_goal=True)
env = ALRBeerBongEnvFixedReleaseStep(frame_skip=2, rndm_goal=True)
import time
+
env.reset()
env.render("human")
for i in range(1500):
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
index 4629461..7e5fda6 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
@@ -29,52 +29,38 @@ class BeerPongReward:
# "cup_base_table"
]
-
- self.ball_traj = None
self.dists = None
self.dists_final = None
- self.costs = None
self.action_costs = None
- self.angle_rewards = None
- self.cup_angles = None
- self.cup_z_axes = None
- self.collision_penalty = 500
- self.reset(None)
+ self.reset()
self.is_initialized = False
- def reset(self, noisy):
- self.ball_traj = []
+ def reset(self):
self.dists = []
self.dists_final = []
- self.costs = []
self.action_costs = []
- self.angle_rewards = []
- self.cup_angles = []
- self.cup_z_axes = []
self.ball_ground_contact_first = False
self.ball_table_contact = False
self.ball_wall_contact = False
self.ball_cup_contact = False
self.ball_in_cup = False
- self.dist_ground_cup = -1 # distance floor to cup if first floor contact
- self.noisy_bp = noisy
- self._t_min_final_dist = -1
+ self.dist_ground_cup = -1 # distance floor to cup if first floor contact
def initialize(self, env):
if not self.is_initialized:
self.is_initialized = True
self.ball_id = env.sim.model._body_name2id["ball"]
- self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"]
self.goal_id = env.sim.model._site_name2id["cup_goal_table"]
self.goal_final_id = env.sim.model._site_name2id["cup_goal_final_table"]
- self.cup_collision_ids = [env.sim.model._geom_name2id[name] for name in self.cup_collision_objects]
self.cup_table_id = env.sim.model._body_name2id["cup_table"]
+
+ self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"]
self.table_collision_id = env.sim.model._geom_name2id["table_contact_geom"]
self.wall_collision_id = env.sim.model._geom_name2id["wall"]
self.cup_table_collision_id = env.sim.model._geom_name2id["cup_base_table_contact"]
- self.init_ball_pos_site_id = env.sim.model._site_name2id["init_ball_pos_site"]
self.ground_collision_id = env.sim.model._geom_name2id["ground"]
+ self.cup_collision_ids = [env.sim.model._geom_name2id[name] for name in self.cup_collision_objects]
self.robot_collision_ids = [env.sim.model._geom_name2id[name] for name in self.robot_collision_objects]
def compute_reward(self, env, action):
@@ -87,7 +73,7 @@ class BeerPongReward:
self.check_contacts(env.sim)
self.dists.append(np.linalg.norm(goal_pos - ball_pos))
self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
- self.dist_ground_cup = np.linalg.norm(ball_pos-goal_pos) \
+ self.dist_ground_cup = np.linalg.norm(ball_pos - goal_pos) \
if self.ball_ground_contact_first and self.dist_ground_cup == -1 else self.dist_ground_cup
action_cost = np.sum(np.square(action))
self.action_costs.append(np.copy(action_cost))
@@ -99,7 +85,7 @@ class BeerPongReward:
min_dist = np.min(self.dists)
final_dist = self.dists_final[-1]
if self.ball_ground_contact_first:
- min_dist_coeff, final_dist_coeff, ground_contact_dist_coeff, rew_offset = 1, 0.5, 2, -4 # relative rew offset when first bounding on ground
+ min_dist_coeff, final_dist_coeff, ground_contact_dist_coeff, rew_offset = 1, 0.5, 2, -4 # relative rew offset when first bounding on ground
# min_dist_coeff, final_dist_coeff, ground_contact_dist_coeff, rew_offset = 1, 0.5, 0, -6 # absolute rew offset when first bouncing on ground
else:
if not self.ball_in_cup:
@@ -108,18 +94,18 @@ class BeerPongReward:
else:
min_dist_coeff, final_dist_coeff, ground_contact_dist_coeff, rew_offset = 1, 0.5, 0, -2
else:
- min_dist_coeff, final_dist_coeff, ground_contact_dist_coeff, rew_offset = 0, 1, 0 ,0
+ min_dist_coeff, final_dist_coeff, ground_contact_dist_coeff, rew_offset = 0, 1, 0, 0
# dist_ground_cup = 1 * self.dist_ground_cup
action_cost = 1e-4 * np.mean(action_cost)
reward = rew_offset - min_dist_coeff * min_dist ** 2 - final_dist_coeff * final_dist ** 2 - \
- action_cost - ground_contact_dist_coeff*self.dist_ground_cup ** 2
+ action_cost - ground_contact_dist_coeff * self.dist_ground_cup ** 2
# 1e-7*np.mean(action_cost)
# release step punishment
min_time_bound = 0.1
max_time_bound = 1.0
- release_time = env.release_step*env.dt
- release_time_rew = int(release_timemax_time_bound)*(-30-10*(release_time-max_time_bound)**2)
+ release_time = env.release_step * env.dt
+ release_time_rew = int(release_time < min_time_bound) * (-30 - 10 * (release_time - min_time_bound) ** 2) \
+ + int(release_time > max_time_bound) * (-30 - 10 * (release_time - max_time_bound) ** 2)
reward += release_time_rew
success = self.ball_in_cup
# print('release time :', release_time)
@@ -147,17 +133,17 @@ class BeerPongReward:
self.table_collision_id)
if not self.ball_cup_contact:
self.ball_cup_contact = self._check_collision_with_set_of_objects(sim, self.ball_collision_id,
- self.cup_collision_ids)
+ self.cup_collision_ids)
if not self.ball_wall_contact:
self.ball_wall_contact = self._check_collision_single_objects(sim, self.ball_collision_id,
- self.wall_collision_id)
+ self.wall_collision_id)
if not self.ball_in_cup:
self.ball_in_cup = self._check_collision_single_objects(sim, self.ball_collision_id,
self.cup_table_collision_id)
if not self.ball_ground_contact_first:
if not self.ball_table_contact and not self.ball_cup_contact and not self.ball_wall_contact and not self.ball_in_cup:
self.ball_ground_contact_first = self._check_collision_single_objects(sim, self.ball_collision_id,
- self.ground_collision_id)
+ self.ground_collision_id)
def _check_collision_single_objects(self, sim, id_1, id_2):
for coni in range(0, sim.data.ncon):
@@ -190,4 +176,4 @@ class BeerPongReward:
if collision or collision_trans:
return True
- return False
\ No newline at end of file
+ return False
diff --git a/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py b/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
index 2a2d4f9..53d7a1a 100644
--- a/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
@@ -1,10 +1,15 @@
-from alr_envs.mp.episodic_wrapper import EpisodicWrapper
-from typing import Union, Tuple
+from typing import Tuple, Union
+
import numpy as np
-import gym
+
+from alr_envs.mp.episodic_wrapper import EpisodicWrapper
class NewMPWrapper(EpisodicWrapper):
+
+ # def __init__(self, replanning_model):
+ # self.replanning_model = replanning_model
+
@property
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.sim.data.qpos[0:7].copy()
@@ -22,26 +27,16 @@ class NewMPWrapper(EpisodicWrapper):
[False] * 3, # cup_goal_diff_top
[True] * 2, # xy position of cup
[False] # env steps
- ])
+ ])
- # def set_mp_action_space(self):
- # min_action_bounds, max_action_bounds = self.mp.get_param_bounds()
- # if self.mp.learn_tau:
- # min_action_bounds[0] = 20*self.env.dt
- # max_action_bounds[0] = 260*self.env.dt
- # mp_action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
- # dtype=np.float32)
- # return mp_action_space
-
- # def _step_callback(self, t: int, env_spec_params: Union[np.ndarray, None], step_action: np.ndarray) -> Union[np.ndarray]:
- # if self.mp.learn_tau:
- # return np.concatenate((step_action, np.atleast_1d(env_spec_params)))
- # else:
- # return step_action
+ def do_replanning(self, pos, vel, s, a, t, last_replan_step):
+ return False
+ # const = np.arange(0, 1000, 10)
+ # return bool(self.replanning_model(s))
def _episode_callback(self, action: np.ndarray) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
if self.mp.learn_tau:
- self.env.env.release_step = action[0]/self.env.dt # Tau value
+ self.env.env.release_step = action[0] / self.env.dt # Tau value
return action, None
else:
return action, None
@@ -52,12 +47,3 @@ class NewMPWrapper(EpisodicWrapper):
xyz[-1] = 0.840
self.env.env.model.body_pos[self.env.env.cup_table_id] = xyz
return self.get_observation_from_step(self.env.env._get_obs())
- # def set_action_space(self):
- # if self.mp.learn_tau:
- # min_action_bounds, max_action_bounds = self.mp.get_param_bounds()
- # min_action_bounds = np.concatenate((min_action_bounds.numpy(), [self.env.action_space.low[-1]]))
- # max_action_bounds = np.concatenate((max_action_bounds.numpy(), [self.env.action_space.high[-1]]))
- # self.action_space = gym.spaces.Box(low=min_action_bounds, high=max_action_bounds, dtype=np.float32)
- # return self.action_space
- # else:
- # return super(NewMPWrapper, self).set_action_space()
diff --git a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
index 20ede40..146b039 100644
--- a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
+++ b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
@@ -14,20 +14,23 @@ class ALRHopperJumpEnv(HopperEnv):
- exclude current positions from observatiosn is set to False
"""
- def __init__(self,
- xml_file='hopper_jump.xml',
- forward_reward_weight=1.0,
- ctrl_cost_weight=1e-3,
- healthy_reward=0.0,
- penalty=0.0,
- context=True,
- terminate_when_unhealthy=False,
- healthy_state_range=(-100.0, 100.0),
- healthy_z_range=(0.5, float('inf')),
- healthy_angle_range=(-float('inf'), float('inf')),
- reset_noise_scale=5e-3,
- exclude_current_positions_from_observation=False,
- max_episode_steps=250):
+ def __init__(
+ self,
+ xml_file='hopper_jump.xml',
+ forward_reward_weight=1.0,
+ ctrl_cost_weight=1e-3,
+ healthy_reward=0.0,
+ penalty=0.0,
+ context=True,
+ terminate_when_unhealthy=False,
+ healthy_state_range=(-100.0, 100.0),
+ healthy_z_range=(0.5, float('inf')),
+ healthy_angle_range=(-float('inf'), float('inf')),
+ reset_noise_scale=5e-3,
+ exclude_current_positions_from_observation=False,
+ max_episode_steps=250
+ ):
+
self.current_step = 0
self.max_height = 0
self.max_episode_steps = max_episode_steps
@@ -47,7 +50,7 @@ class ALRHopperJumpEnv(HopperEnv):
exclude_current_positions_from_observation)
def step(self, action):
-
+
self.current_step += 1
self.do_simulation(action, self.frame_skip)
height_after = self.get_body_com("torso")[2]
@@ -59,22 +62,14 @@ class ALRHopperJumpEnv(HopperEnv):
done = False
rewards = 0
if self.current_step >= self.max_episode_steps:
- hight_goal_distance = -10*np.linalg.norm(self.max_height - self.goal) if self.context else self.max_height
- healthy_reward = 0 if self.context else self.healthy_reward * 2 # self.current_step
- height_reward = self._forward_reward_weight * hight_goal_distance # maybe move reward calculation into if structure and define two different _forward_reward_weight variables for context and episodic seperatley
+ hight_goal_distance = -10 * np.linalg.norm(self.max_height - self.goal) if self.context else self.max_height
+ healthy_reward = 0 if self.context else self.healthy_reward * 2 # self.current_step
+ height_reward = self._forward_reward_weight * hight_goal_distance # maybe move reward calculation into if structure and define two different _forward_reward_weight variables for context and episodic seperatley
rewards = height_reward + healthy_reward
- # else:
- # # penalty for wrong start direction of first two joints; not needed, could be removed
- # rewards = ((action[:2] > 0) * self.penalty).sum() if self.current_step < 10 else 0
-
observation = self._get_obs()
reward = rewards - costs
- # info = {
- # 'height' : height_after,
- # 'max_height': self.max_height,
- # 'goal' : self.goal
- # }
+
info = {
'height': height_after,
'x_pos': site_pos_after,
@@ -82,7 +77,7 @@ class ALRHopperJumpEnv(HopperEnv):
'height_rew': self.max_height,
'healthy_reward': self.healthy_reward * 2,
'healthy': self.is_healthy
- }
+ }
return observation, reward, done, info
@@ -90,7 +85,7 @@ class ALRHopperJumpEnv(HopperEnv):
return np.append(super()._get_obs(), self.goal)
def reset(self):
- self.goal = self.np_random.uniform(1.4, 2.16, 1)[0] # 1.3 2.3
+ self.goal = self.np_random.uniform(1.4, 2.16, 1)[0] # 1.3 2.3
self.max_height = 0
self.current_step = 0
return super().reset()
@@ -98,8 +93,8 @@ class ALRHopperJumpEnv(HopperEnv):
# overwrite reset_model to make it deterministic
def reset_model(self):
- qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
- qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
+ qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
+ qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
self.set_state(qpos, qvel)
@@ -147,7 +142,7 @@ class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
self.contact_with_floor = floor_contact
if self.contact_dist is None and self.contact_with_floor:
- self.contact_dist = np.linalg.norm(self.sim.data.site_xpos[self.model.site_name2id('foot_site')]
+ self.contact_dist = np.linalg.norm(self.sim.data.site_xpos[self.model.site_name2id('foot_site')]
- np.array([self.goal, 0, 0]))
ctrl_cost = self.control_cost(action)
@@ -157,9 +152,9 @@ class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
rewards = 0
if self.current_step >= self.max_episode_steps:
# healthy_reward = 0 if self.context else self.healthy_reward * self.current_step
- healthy_reward = self.healthy_reward * 2 #* self.current_step
+ healthy_reward = self.healthy_reward * 2 # * self.current_step
contact_dist = self.contact_dist if self.contact_dist is not None else 5
- dist_reward = self._forward_reward_weight * (-3*goal_dist + 10*self.max_height - 2*contact_dist)
+ dist_reward = self._forward_reward_weight * (-3 * goal_dist + 10 * self.max_height - 2 * contact_dist)
rewards = dist_reward + healthy_reward
observation = self._get_obs()
@@ -174,7 +169,7 @@ class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
'healthy_reward': self.healthy_reward * 2,
'healthy': self.is_healthy,
'contact_dist': self.contact_dist if self.contact_dist is not None else 0
- }
+ }
return observation, reward, done, info
def reset_model(self):
@@ -225,25 +220,28 @@ class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
self.sim.model.body_pos[self.sim.model.body_name2id('goal_site_body')] = np.array([self.goal, 0, 0])
return self._get_obs()
+
class ALRHopperXYJumpEnvStepBased(ALRHopperXYJumpEnv):
- def __init__(self,
- xml_file='hopper_jump.xml',
- forward_reward_weight=1.0,
- ctrl_cost_weight=1e-3,
- healthy_reward=0.0,
- penalty=0.0,
- context=True,
- terminate_when_unhealthy=False,
- healthy_state_range=(-100.0, 100.0),
- healthy_z_range=(0.5, float('inf')),
- healthy_angle_range=(-float('inf'), float('inf')),
- reset_noise_scale=5e-3,
- exclude_current_positions_from_observation=False,
- max_episode_steps=250,
- height_scale = 10,
- dist_scale = 3,
- healthy_scale = 2):
+ def __init__(
+ self,
+ xml_file='hopper_jump.xml',
+ forward_reward_weight=1.0,
+ ctrl_cost_weight=1e-3,
+ healthy_reward=0.0,
+ penalty=0.0,
+ context=True,
+ terminate_when_unhealthy=False,
+ healthy_state_range=(-100.0, 100.0),
+ healthy_z_range=(0.5, float('inf')),
+ healthy_angle_range=(-float('inf'), float('inf')),
+ reset_noise_scale=5e-3,
+ exclude_current_positions_from_observation=False,
+ max_episode_steps=250,
+ height_scale=10,
+ dist_scale=3,
+ healthy_scale=2
+ ):
self.height_scale = height_scale
self.dist_scale = dist_scale
self.healthy_scale = healthy_scale
@@ -263,15 +261,14 @@ class ALRHopperXYJumpEnvStepBased(ALRHopperXYJumpEnv):
ctrl_cost = self.control_cost(action)
healthy_reward = self.healthy_reward * self.healthy_scale
- height_reward = self.height_scale*height_after
+ height_reward = self.height_scale * height_after
goal_dist = np.atleast_1d(np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0], dtype=object)))[0]
- goal_dist_reward = -self.dist_scale*goal_dist
+ goal_dist_reward = -self.dist_scale * goal_dist
dist_reward = self._forward_reward_weight * (goal_dist_reward + height_reward)
reward = -ctrl_cost + healthy_reward + dist_reward
done = False
observation = self._get_obs()
-
###########################################################
# This is only for logging the distance to goal when first having the contact
##########################################################
@@ -297,9 +294,10 @@ class ALRHopperXYJumpEnvStepBased(ALRHopperXYJumpEnv):
'healthy_reward': self.healthy_reward * self.healthy_reward,
'healthy': self.is_healthy,
'contact_dist': self.contact_dist if self.contact_dist is not None else 0
- }
+ }
return observation, reward, done, info
+
class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
def __init__(self, max_episode_steps=250):
super(ALRHopperJumpRndmPosEnv, self).__init__(exclude_current_positions_from_observation=False,
@@ -309,14 +307,14 @@ class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
def reset_model(self):
self._floor_geom_id = self.model.geom_name2id('floor')
self._foot_geom_id = self.model.geom_name2id('foot_geom')
- noise_low = -np.ones(self.model.nq)*self._reset_noise_scale
+ noise_low = -np.ones(self.model.nq) * self._reset_noise_scale
noise_low[1] = 0
noise_low[2] = 0
noise_low[3] = -0.2
noise_low[4] = -0.2
noise_low[5] = -0.1
- noise_high = np.ones(self.model.nq)*self._reset_noise_scale
+ noise_high = np.ones(self.model.nq) * self._reset_noise_scale
noise_high[1] = 0
noise_high[2] = 0
noise_high[3] = 0
@@ -325,7 +323,7 @@ class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
rnd_vec = self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
# rnd_vec[2] *= 0.05 # the angle around the y axis shouldn't be too high as the agent then falls down quickly and
- # can not recover
+ # can not recover
# rnd_vec[1] = np.clip(rnd_vec[1], 0, 0.3)
qpos = self.init_qpos + rnd_vec
qvel = self.init_qvel
@@ -341,7 +339,7 @@ class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
self.do_simulation(action, self.frame_skip)
self.contact_with_floor = self._contact_checker(self._floor_geom_id, self._foot_geom_id) if not \
- self.contact_with_floor else True
+ self.contact_with_floor else True
height_after = self.get_body_com("torso")[2]
self.max_height = max(height_after, self.max_height) if self.contact_with_floor else 0
@@ -365,12 +363,11 @@ class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
'height': height_after,
'max_height': self.max_height,
'goal': self.goal
- }
+ }
return observation, reward, done, info
-
if __name__ == '__main__':
render_mode = "human" # "human" or "partial" or "final"
# env = ALRHopperJumpEnv()
@@ -396,4 +393,4 @@ if __name__ == '__main__':
print('After ', i, ' steps, done: ', d)
env.reset()
- env.close()
\ No newline at end of file
+ env.close()
diff --git a/alr_envs/mp/episodic_wrapper.py b/alr_envs/mp/episodic_wrapper.py
index 456c0c9..b2eb391 100644
--- a/alr_envs/mp/episodic_wrapper.py
+++ b/alr_envs/mp/episodic_wrapper.py
@@ -9,7 +9,7 @@ from mp_pytorch.mp.mp_interfaces import MPInterface
from alr_envs.mp.controllers.base_controller import BaseController
-class EpisodicWrapper(gym.Env, ABC):
+class EpisodicWrapper(gym.Env, gym.wrappers.TransformReward, ABC):
"""
Base class for movement primitive based gym.Wrapper implementations.
@@ -22,14 +22,19 @@ class EpisodicWrapper(gym.Env, ABC):
weight_scale: Scaling parameter for the actions given to this wrapper
render_mode: Equivalent to gym render mode
"""
- def __init__(self,
- env: gym.Env,
- mp: MPInterface,
- controller: BaseController,
- duration: float,
- render_mode: str = None,
- verbose: int = 1,
- weight_scale: float = 1):
+
+ def __init__(
+ self,
+ env: gym.Env,
+ mp: MPInterface,
+ controller: BaseController,
+ duration: float,
+ render_mode: str = None,
+ verbose: int = 1,
+ weight_scale: float = 1,
+ sequencing=True,
+ reward_aggregation=np.mean,
+ ):
super().__init__()
self.env = env
@@ -40,11 +45,11 @@ class EpisodicWrapper(gym.Env, ABC):
self.duration = duration
self.traj_steps = int(duration / self.dt)
self.post_traj_steps = self.env.spec.max_episode_steps - self.traj_steps
+ # duration = self.env.max_episode_steps * self.dt
- self.controller = controller
self.mp = mp
self.env = env
- self.verbose = verbose
+ self.controller = controller
self.weight_scale = weight_scale
# rendering
@@ -52,15 +57,18 @@ class EpisodicWrapper(gym.Env, ABC):
self.render_kwargs = {}
self.time_steps = np.linspace(0, self.duration, self.traj_steps)
self.mp.set_mp_times(self.time_steps)
+ # self.mp.set_mp_duration(self.time_steps, dt)
# action_bounds = np.inf * np.ones((np.prod(self.mp.num_params)))
- self.mp_action_space = self.set_mp_action_space()
+ self.mp_action_space = self.get_mp_action_space()
- self.action_space = self.set_action_space()
+ self.action_space = self.get_action_space()
self.active_obs = self.set_active_obs()
self.observation_space = spaces.Box(low=self.env.observation_space.low[self.active_obs],
high=self.env.observation_space.high[self.active_obs],
dtype=self.env.observation_space.dtype)
+ self.verbose = verbose
+
def get_trajectory(self, action: np.ndarray) -> Tuple:
# TODO: this follows the implementation of the mp_pytorch library which includes the parameters tau and delay at
# the beginning of the array.
@@ -69,33 +77,37 @@ class EpisodicWrapper(gym.Env, ABC):
scaled_mp_params[ignore_indices:] *= self.weight_scale
self.mp.set_params(np.clip(scaled_mp_params, self.mp_action_space.low, self.mp_action_space.high))
self.mp.set_boundary_conditions(bc_time=self.time_steps[:1], bc_pos=self.current_pos, bc_vel=self.current_vel)
- traj_dict = self.mp.get_mp_trajs(get_pos = True, get_vel = True)
+ traj_dict = self.mp.get_mp_trajs(get_pos=True, get_vel=True)
trajectory_tensor, velocity_tensor = traj_dict['pos'], traj_dict['vel']
trajectory = trajectory_tensor.numpy()
velocity = velocity_tensor.numpy()
+ # TODO: Do we need this or does mp_pytorch have this?
if self.post_traj_steps > 0:
trajectory = np.vstack([trajectory, np.tile(trajectory[-1, :], [self.post_traj_steps, 1])])
velocity = np.vstack([velocity, np.zeros(shape=(self.post_traj_steps, self.mp.num_dof))])
return trajectory, velocity
- def set_mp_action_space(self):
+ def get_mp_action_space(self):
"""This function can be used to set up an individual space for the parameters of the mp."""
min_action_bounds, max_action_bounds = self.mp.get_param_bounds()
mp_action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
- dtype=np.float32)
+ dtype=np.float32)
return mp_action_space
- def set_action_space(self):
+ def get_action_space(self):
"""
This function can be used to modify the action space for considering actions which are not learned via motion
primitives. E.g. ball releasing time for the beer pong task. By default, it is the parameter space of the
motion primitive.
Only needs to be overwritten if the action space needs to be modified.
"""
- return self.mp_action_space
+ try:
+ return self.mp_action_space
+ except AttributeError:
+ return self.get_mp_action_space()
def _episode_callback(self, action: np.ndarray) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
"""
@@ -111,7 +123,8 @@ class EpisodicWrapper(gym.Env, ABC):
"""
return action, None
- def _step_callback(self, t: int, env_spec_params: Union[np.ndarray, None], step_action: np.ndarray) -> Union[np.ndarray]:
+ def _step_callback(self, t: int, env_spec_params: Union[np.ndarray, None], step_action: np.ndarray) -> Union[
+ np.ndarray]:
"""
This function can be used to modify the step_action with additional parameters e.g. releasing the ball in the
Beerpong env. The parameters used should not be part of the motion primitive parameters.
@@ -169,11 +182,15 @@ class EpisodicWrapper(gym.Env, ABC):
mp_params, env_spec_params = self._episode_callback(action)
trajectory, velocity = self.get_trajectory(mp_params)
+ # TODO
+ # self.time_steps = np.linspace(0, learned_duration, self.traj_steps)
+ # self.mp.set_mp_times(self.time_steps)
+
trajectory_length = len(trajectory)
- if self.verbose >=2 :
+ if self.verbose >= 2:
actions = np.zeros(shape=(trajectory_length,) + self.env.action_space.shape)
observations = np.zeros(shape=(trajectory_length,) + self.env.observation_space.shape,
- dtype=self.env.observation_space.dtype)
+ dtype=self.env.observation_space.dtype)
rewards = np.zeros(shape=(trajectory_length,))
trajectory_return = 0
@@ -181,7 +198,7 @@ class EpisodicWrapper(gym.Env, ABC):
for t, pos_vel in enumerate(zip(trajectory, velocity)):
step_action = self.controller.get_action(pos_vel[0], pos_vel[1], self.current_pos, self.current_vel)
- step_action = self._step_callback(t, env_spec_params, step_action) # include possible callback info
+ step_action = self._step_callback(t, env_spec_params, step_action) # include possible callback info
c_action = np.clip(step_action, self.env.action_space.low, self.env.action_space.high)
# print('step/clipped action ratio: ', step_action/c_action)
obs, c_reward, done, info = self.env.step(c_action)
@@ -197,7 +214,7 @@ class EpisodicWrapper(gym.Env, ABC):
# infos['step_infos'].append(info)
if self.render_mode:
self.render(mode=self.render_mode, **self.render_kwargs)
- if done:
+ if done or do_replanning(kwargs):
break
infos.update({k: v[:t + 1] for k, v in infos.items()})
if self.verbose >= 2:
@@ -235,10 +252,10 @@ class EpisodicWrapper(gym.Env, ABC):
for i in range(des_trajs.shape[1]):
plt.figure(pos_fig.number)
plt.subplot(des_trajs.shape[1], 1, i + 1)
- plt.plot(np.ones(des_trajs.shape[0])*self.current_pos[i])
+ plt.plot(np.ones(des_trajs.shape[0]) * self.current_pos[i])
plt.plot(des_trajs[:, i])
plt.figure(vel_fig.number)
plt.subplot(des_vels.shape[1], 1, i + 1)
- plt.plot(np.ones(des_trajs.shape[0])*self.current_vel[i])
+ plt.plot(np.ones(des_trajs.shape[0]) * self.current_vel[i])
plt.plot(des_vels[:, i])
diff --git a/alr_envs/utils/__init__.py b/alr_envs/utils/__init__.py
index bb4b0bc..a96d882 100644
--- a/alr_envs/utils/__init__.py
+++ b/alr_envs/utils/__init__.py
@@ -20,12 +20,13 @@ def make_dmc(
environment_kwargs: dict = {},
time_limit: Union[None, float] = None,
channels_first: bool = True
-):
+ ):
# Adopted from: https://github.com/denisyarats/dmc2gym/blob/master/dmc2gym/__init__.py
# License: MIT
# Copyright (c) 2020 Denis Yarats
- assert re.match(r"\w+-\w+", id), "env_id does not have the following structure: 'domain_name-task_name'"
+ if not re.match(r"\w+-\w+", id):
+ raise ValueError("env_id does not have the following structure: 'domain_name-task_name'")
domain_name, task_name = id.split("-")
env_id = f'dmc_{domain_name}_{task_name}_{seed}-v1'
@@ -60,7 +61,7 @@ def make_dmc(
camera_id=camera_id,
frame_skip=frame_skip,
channels_first=channels_first,
- ),
+ ),
max_episode_steps=max_episode_steps,
- )
+ )
return gym.make(env_id)
diff --git a/alr_envs/utils/make_env_helpers.py b/alr_envs/utils/make_env_helpers.py
index 301a5aa..dbefeee 100644
--- a/alr_envs/utils/make_env_helpers.py
+++ b/alr_envs/utils/make_env_helpers.py
@@ -55,7 +55,7 @@ def make(env_id: str, seed, **kwargs):
Returns: Gym environment
"""
- if any([det_pmp in env_id for det_pmp in ["DetPMP", "detpmp"]]):
+ if any(deprec in env_id for deprec in ["DetPMP", "detpmp"]):
warnings.warn("DetPMP is deprecated and converted to ProMP")
env_id = env_id.replace("DetPMP", "ProMP")
env_id = env_id.replace("detpmp", "promp")
@@ -92,14 +92,17 @@ def make(env_id: str, seed, **kwargs):
from alr_envs import make_dmc
env = make_dmc(env_id, seed=seed, **kwargs)
- assert env.base_step_limit == env.spec.max_episode_steps, \
- f"The specified 'episode_length' of {env.spec.max_episode_steps} steps for gym is different from " \
- f"the DMC environment specification of {env.base_step_limit} steps."
+ if not env.base_step_limit == env.spec.max_episode_steps:
+ raise ValueError(f"The specified 'episode_length' of {env.spec.max_episode_steps} steps for gym "
+ f"is different from the DMC environment specification of {env.base_step_limit} steps.")
return env
-def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], mp: MPInterface, controller: BaseController,
- ep_wrapper_kwargs: Mapping, seed=1, **kwargs):
+
+def _make_wrapped_env(
+ env_id: str, wrappers: Iterable[Type[gym.Wrapper]], mp: MPInterface, controller: BaseController,
+ ep_wrapper_kwargs: Mapping, seed=1, **kwargs
+ ):
"""
Helper function for creating a wrapped gym environment using MPs.
It adds all provided wrappers to the specified environment and verifies at least one MPEnvWrapper is
@@ -120,13 +123,14 @@ def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], mp: MP
# only wrap the environment if not EpisodicWrapper, e.g. for vision
if not issubclass(w, EpisodicWrapper):
_env = w(_env)
- else: # if EpisodicWrapper, use specific constructor
+ else: # if EpisodicWrapper, use specific constructor
has_episodic_wrapper = True
_env = w(env=_env, mp=mp, controller=controller, **ep_wrapper_kwargs)
- assert has_episodic_wrapper, \
- "At least one MPEnvWrapper is required in order to leverage motion primitive environments."
+ if not has_episodic_wrapper:
+ raise ValueError("An EpisodicWrapper is required in order to leverage movement primitive environments.")
return _env
+
def make_mp_from_kwargs(
env_id: str, wrappers: Iterable, ep_wrapper_kwargs: MutableMapping, mp_kwargs: MutableMapping,
controller_kwargs: MutableMapping, phase_kwargs: MutableMapping, basis_kwargs: MutableMapping, seed=1,
@@ -152,7 +156,7 @@ def make_mp_from_kwargs(
_verify_time_limit(mp_kwargs.get("duration", None), kwargs.get("time_limit", None))
dummy_env = make(env_id, seed)
if ep_wrapper_kwargs.get('duration', None) is None:
- ep_wrapper_kwargs['duration'] = dummy_env.spec.max_episode_steps*dummy_env.dt
+ ep_wrapper_kwargs['duration'] = dummy_env.spec.max_episode_steps * dummy_env.dt
if phase_kwargs.get('tau', None) is None:
phase_kwargs['tau'] = ep_wrapper_kwargs['duration']
mp_kwargs['action_dim'] = mp_kwargs.get('action_dim', np.prod(dummy_env.action_space.shape).item())
@@ -207,10 +211,11 @@ def make_mp_env_helper(**kwargs):
phase_kwargs = kwargs.pop("phase_generator_kwargs")
basis_kwargs = kwargs.pop("basis_generator_kwargs")
- return make_mp_from_kwargs(env_id=kwargs.pop("name"), wrappers=wrappers, ep_wrapper_kwargs=ep_wrapper_kwargs,
+ return make_mp_from_kwargs(env_id=kwargs.pop("name"), wrappers=wrappers, ep_wrapper_kwargs=ep_wrapper_kwargs,
mp_kwargs=mp_kwargs, controller_kwargs=contr_kwargs, phase_kwargs=phase_kwargs,
basis_kwargs=basis_kwargs, **kwargs, seed=seed)
+
def make_dmp_env(env_id: str, wrappers: Iterable, seed=1, mp_kwargs={}, **kwargs):
"""
This can also be used standalone for manually building a custom DMP environment.
From 8fe6a83271e015c41710cd55861f5344695da982 Mon Sep 17 00:00:00 2001
From: Onur
Date: Tue, 28 Jun 2022 20:33:19 +0200
Subject: [PATCH 042/104] started cleaning up init. DMP envs are still not
transferred. Wrappers for various environments still missing
---
alr_envs/alr/__init__.py | 1204 ++++++-----------
alr_envs/alr/mujoco/beerpong/beerpong.py | 64 +-
.../alr/mujoco/hopper_jump/hopper_jump.py | 70 -
3 files changed, 418 insertions(+), 920 deletions(-)
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 09f533a..ec539db 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -21,6 +21,35 @@ from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
+DEFAULT_MP_ENV_DICT = {
+ "name": 'EnvName',
+ "wrappers": [],
+ "ep_wrapper_kwargs": {
+ "weight_scale": 1
+ },
+ "movement_primitives_kwargs": {
+ 'movement_primitives_type': 'promp',
+ 'action_dim': 7
+ },
+ "phase_generator_kwargs": {
+ 'phase_generator_type': 'linear',
+ 'delay': 0,
+ 'tau': 1.5, # initial value
+ 'learn_tau': False,
+ 'learn_delay': False
+ },
+ "controller_kwargs": {
+ 'controller_type': 'motor',
+ "p_gains": np.ones(7),
+ "d_gains": np.ones(7) * 0.1,
+ },
+ "basis_generator_kwargs": {
+ 'basis_generator_type': 'zero_rbf',
+ 'num_basis': 5,
+ 'num_basis_zero_start': 2
+ }
+}
+
# Classic Control
## Simple Reacher
register(
@@ -32,16 +61,6 @@ register(
}
)
-register(
- id='SimpleReacher-v1',
- entry_point='alr_envs.alr.classic_control:SimpleReacherEnv',
- max_episode_steps=200,
- kwargs={
- "n_links": 2,
- "random_start": False
- }
-)
-
register(
id='LongSimpleReacher-v0',
entry_point='alr_envs.alr.classic_control:SimpleReacherEnv',
@@ -51,16 +70,6 @@ register(
}
)
-register(
- id='LongSimpleReacher-v1',
- entry_point='alr_envs.alr.classic_control:SimpleReacherEnv',
- max_episode_steps=200,
- kwargs={
- "n_links": 5,
- "random_start": False
- }
-)
-
## Viapoint Reacher
register(
@@ -91,38 +100,6 @@ register(
}
)
-register(
- id='HoleReacher-v1',
- entry_point='alr_envs.alr.classic_control:HoleReacherEnv',
- max_episode_steps=200,
- kwargs={
- "n_links": 5,
- "random_start": False,
- "allow_self_collision": False,
- "allow_wall_collision": False,
- "hole_width": 0.25,
- "hole_depth": 1,
- "hole_x": None,
- "collision_penalty": 100,
- }
-)
-
-register(
- id='HoleReacher-v2',
- entry_point='alr_envs.alr.classic_control:HoleReacherEnv',
- max_episode_steps=200,
- kwargs={
- "n_links": 5,
- "random_start": False,
- "allow_self_collision": False,
- "allow_wall_collision": False,
- "hole_width": 0.25,
- "hole_depth": 1,
- "hole_x": 2,
- "collision_penalty": 1,
- }
-)
-
# Mujoco
## Reacher
@@ -203,108 +180,39 @@ register(
}
)
-_vs = np.arange(101).tolist() + [1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1]
-for i in _vs:
- _env_id = f'ALRReacher{i}-v0'
- register(
- id=_env_id,
- entry_point='alr_envs.alr.mujoco:ALRReacherEnv',
- max_episode_steps=200,
- kwargs={
- "steps_before_reward": 0,
- "n_links": 5,
- "balance": False,
- 'ctrl_cost_weight': i
- }
- )
-
- _env_id = f'ALRReacherSparse{i}-v0'
- register(
- id=_env_id,
- entry_point='alr_envs.alr.mujoco:ALRReacherEnv',
- max_episode_steps=200,
- kwargs={
- "steps_before_reward": 200,
- "n_links": 5,
- "balance": False,
- 'ctrl_cost_weight': i
- }
- )
-
-# CtxtFree are v0, Contextual are v1
register(
id='ALRAntJump-v0',
entry_point='alr_envs.alr.mujoco:ALRAntJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP,
- kwargs={
- "max_episode_steps": MAX_EPISODE_STEPS_ANTJUMP,
- "context": False
- }
-)
-
-# CtxtFree are v0, Contextual are v1
-register(
- id='ALRAntJump-v1',
- entry_point='alr_envs.alr.mujoco:ALRAntJumpEnv',
- max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP,
kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_ANTJUMP,
"context": True
}
)
-# CtxtFree are v0, Contextual are v1
register(
id='ALRHalfCheetahJump-v0',
entry_point='alr_envs.alr.mujoco:ALRHalfCheetahJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP,
- kwargs={
- "max_episode_steps": MAX_EPISODE_STEPS_HALFCHEETAHJUMP,
- "context": False
- }
-)
-# CtxtFree are v0, Contextual are v1
-register(
- id='ALRHalfCheetahJump-v1',
- entry_point='alr_envs.alr.mujoco:ALRHalfCheetahJumpEnv',
- max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP,
kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_HALFCHEETAHJUMP,
"context": True
}
)
-# CtxtFree are v0, Contextual are v1
+
register(
id='ALRHopperJump-v0',
entry_point='alr_envs.alr.mujoco:ALRHopperJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
- kwargs={
- "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP,
- "context": False,
- "healthy_reward": 1.0
- }
-)
-register(
- id='ALRHopperJump-v1',
- entry_point='alr_envs.alr.mujoco:ALRHopperJumpEnv',
- max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP,
"context": True
}
)
+#### Hopper Jump random joints and des position
register(
- id='ALRHopperJump-v2',
- entry_point='alr_envs.alr.mujoco:ALRHopperJumpRndmPosEnv',
- max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
- kwargs={
- "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP
- }
-)
-
-register(
- id='ALRHopperJump-v3',
+ id='ALRHopperJumpRndmJointsDesPos-v0',
entry_point='alr_envs.alr.mujoco:ALRHopperXYJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
kwargs={
@@ -314,9 +222,9 @@ register(
}
)
-##### Hopper Jump step based reward
+##### Hopper Jump random joints and des position step based reward
register(
- id='ALRHopperJump-v4',
+ id='ALRHopperJumpRndmJointsDesPosStepBased-v0',
entry_point='alr_envs.alr.mujoco:ALRHopperXYJumpEnvStepBased',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
kwargs={
@@ -326,84 +234,40 @@ register(
}
)
-
-# CtxtFree are v0, Contextual are v1
register(
id='ALRHopperJumpOnBox-v0',
entry_point='alr_envs.alr.mujoco:ALRHopperJumpOnBoxEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX,
- kwargs={
- "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMPONBOX,
- "context": False
- }
-)
-# CtxtFree are v0, Contextual are v1
-register(
- id='ALRHopperJumpOnBox-v1',
- entry_point='alr_envs.alr.mujoco:ALRHopperJumpOnBoxEnv',
- max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX,
kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMPONBOX,
"context": True
}
)
-# CtxtFree are v0, Contextual are v1
register(
id='ALRHopperThrow-v0',
entry_point='alr_envs.alr.mujoco:ALRHopperThrowEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW,
- kwargs={
- "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROW,
- "context": False
- }
-)
-# CtxtFree are v0, Contextual are v1
-register(
- id='ALRHopperThrow-v1',
- entry_point='alr_envs.alr.mujoco:ALRHopperThrowEnv',
- max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW,
kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROW,
"context": True
}
)
-# CtxtFree are v0, Contextual are v1
register(
id='ALRHopperThrowInBasket-v0',
entry_point='alr_envs.alr.mujoco:ALRHopperThrowInBasketEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET,
- kwargs={
- "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROWINBASKET,
- "context": False
- }
-)
-# CtxtFree are v0, Contextual are v1
-register(
- id='ALRHopperThrowInBasket-v1',
- entry_point='alr_envs.alr.mujoco:ALRHopperThrowInBasketEnv',
- max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET,
kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROWINBASKET,
"context": True
}
)
-# CtxtFree are v0, Contextual are v1
+
register(
id='ALRWalker2DJump-v0',
entry_point='alr_envs.alr.mujoco:ALRWalker2dJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP,
- kwargs={
- "max_episode_steps": MAX_EPISODE_STEPS_WALKERJUMP,
- "context": False
- }
-)
-# CtxtFree are v0, Contextual are v1
-register(
- id='ALRWalker2DJump-v1',
- entry_point='alr_envs.alr.mujoco:ALRWalker2dJumpEnv',
- max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP,
kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_WALKERJUMP,
"context": True
@@ -427,76 +291,46 @@ register(id='TableTennis2DCtxt-v0',
max_episode_steps=MAX_EPISODE_STEPS,
kwargs={'ctxt_dim': 2})
-register(id='TableTennis2DCtxt-v1',
- entry_point='alr_envs.alr.mujoco:TTEnvGym',
- max_episode_steps=MAX_EPISODE_STEPS,
- kwargs={'ctxt_dim': 2, 'fixed_goal': True})
-
register(id='TableTennis4DCtxt-v0',
entry_point='alr_envs.alr.mujocco:TTEnvGym',
max_episode_steps=MAX_EPISODE_STEPS,
kwargs={'ctxt_dim': 4})
-## BeerPong
-# fixed goal cup position
register(
- id='ALRBeerPong-v0',
- entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv',
- max_episode_steps=300,
- kwargs={
- "rndm_goal": False,
- "cup_goal_pos": [0.1, -2.0],
- "frame_skip": 2
- }
- )
+ id='ALRBeerPong-v0',
+ entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv',
+ max_episode_steps=300,
+ kwargs={
+ "rndm_goal": True,
+ "cup_goal_pos": [-0.3, -1.2],
+ "frame_skip": 2
+ }
+)
-
-# random goal cup position
+# Here we use the same reward as in ALRBeerPong-v0, but now consider after the release,
+# only one time step, i.e. we simulate until the end of th episode
register(
- id='ALRBeerPong-v1',
- entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv',
- max_episode_steps=300,
- kwargs={
- "rndm_goal": True,
- "cup_goal_pos": [-0.3, -1.2],
- "frame_skip": 2
- }
- )
-
-# random goal cup position
-register(
- id='ALRBeerPong-v2',
- entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvStepBased',
- max_episode_steps=300,
- kwargs={
- "rndm_goal": True,
- "cup_goal_pos": [-0.3, -1.2],
- "frame_skip": 2
- }
- )
-# Beerpong with episodic reward, but fixed release time step
-register(
- id='ALRBeerPong-v3',
- entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvStepBasedEpisodicReward',
- max_episode_steps=300,
- kwargs={
- "rndm_goal": True,
- "cup_goal_pos": [-0.3, -1.2],
- "frame_skip": 2
- }
- )
+ id='ALRBeerPongStepBased-v0',
+ entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvStepBased',
+ max_episode_steps=300,
+ kwargs={
+ "rndm_goal": True,
+ "cup_goal_pos": [-0.3, -1.2],
+ "frame_skip": 2
+ }
+)
# Beerpong with episodic reward, but fixed release time step
register(
- id='ALRBeerPong-v4',
- entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvFixedReleaseStep',
- max_episode_steps=300,
- kwargs={
- "rndm_goal": True,
- "cup_goal_pos": [-0.3, -1.2],
- "frame_skip": 2
- }
- )
+ id='ALRBeerPongFixedRelease-v0',
+ entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvFixedReleaseStep',
+ max_episode_steps=300,
+ kwargs={
+ "rndm_goal": True,
+ "cup_goal_pos": [-0.3, -1.2],
+ "frame_skip": 2
+ }
+)
# Motion Primitive Environments
@@ -530,25 +364,17 @@ for _v in _versions:
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
_env_id = f'{_name[0]}ProMP-{_name[1]}'
+ kwargs_dict_simple_reacher_promp = dict(DEFAULT_MP_ENV_DICT)
+ kwargs_dict_simple_reacher_promp['wrappers'].append('TODO') # TODO
+ kwargs_dict_simple_reacher_promp['movement_primitives_kwargs']['action_dim'] = 2 if "long" not in _v.lower() else 5
+ kwargs_dict_simple_reacher_promp['phase_generator_kwargs']['tau'] = 2
+ kwargs_dict_simple_reacher_promp['controller_kwargs']['p_gains'] = 0.6
+ kwargs_dict_simple_reacher_promp['controller_kwargs']['d_gains'] = 0.075
+ kwargs_dict_simple_reacher_promp['name'] = _env_id
register(
id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": f"alr_envs:{_v}",
- "wrappers": [classic_control.simple_reacher.MPWrapper],
- "mp_kwargs": {
- "num_dof": 2 if "long" not in _v.lower() else 5,
- "num_basis": 5,
- "duration": 2,
- "policy_type": "motor",
- "weights_scale": 1,
- "zero_start": True,
- "policy_kwargs": {
- "p_gains": .6,
- "d_gains": .075
- }
- }
- }
+ kwargs=kwargs_dict_simple_reacher_promp
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
@@ -573,28 +399,24 @@ register(
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("ViaPointReacherDMP-v0")
+kwargs_dict_via_point_reacher_promp = dict(DEFAULT_MP_ENV_DICT)
+kwargs_dict_via_point_reacher_promp['wrappers'].append('TODO') # TODO
+kwargs_dict_via_point_reacher_promp['movement_primitives_kwargs']['action_dim'] = 5
+kwargs_dict_via_point_reacher_promp['phase_generator_kwargs']['tau'] = 2
+kwargs_dict_via_point_reacher_promp['controller_kwargs']['controller_type'] = 'velocity'
+kwargs_dict_via_point_reacher_promp['name'] = "ViaPointReacherProMP-v0"
register(
id="ViaPointReacherProMP-v0",
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": f"alr_envs:ViaPointReacher-v0",
- "wrappers": [classic_control.viapoint_reacher.MPWrapper],
- "mp_kwargs": {
- "num_dof": 5,
- "num_basis": 5,
- "duration": 2,
- "policy_type": "velocity",
- "weights_scale": 1,
- "zero_start": True
- }
- }
+ kwargs=kwargs_dict_via_point_reacher_promp
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ViaPointReacherProMP-v0")
## Hole Reacher
-_versions = ["v0", "v1", "v2"]
+_versions = ["HoleReacher-v0"]
for _v in _versions:
- _env_id = f'HoleReacherDMP-{_v}'
+ _name = _v.split("-")
+ _env_id = f'{_name[0]}DMP-{_name[1]}'
register(
id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
@@ -617,22 +439,19 @@ for _v in _versions:
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
- _env_id = f'HoleReacherProMP-{_v}'
+ _env_id = f'{_name[0]}ProMP-{_name[1]}'
+ kwargs_dict_hole_reacher_promp = dict(DEFAULT_MP_ENV_DICT)
+ kwargs_dict_hole_reacher_promp['wrappers'].append('TODO') # TODO
+ kwargs_dict_hole_reacher_promp['ep_wrapper_kwargs']['weight_scale'] = 2
+ kwargs_dict_hole_reacher_promp['movement_primitives_kwargs']['action_dim'] = 5
+ kwargs_dict_hole_reacher_promp['phase_generator_kwargs']['tau'] = 2
+ kwargs_dict_hole_reacher_promp['controller_kwargs']['controller_type'] = 'velocity'
+ kwargs_dict_hole_reacher_promp['basis_generator_kwargs']['num_basis'] = 5
+ kwargs_dict_hole_reacher_promp['name'] = f"alr_envs:{_v}"
register(
id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": f"alr_envs:HoleReacher-{_v}",
- "wrappers": [classic_control.hole_reacher.MPWrapper],
- "mp_kwargs": {
- "num_dof": 5,
- "num_basis": 3,
- "duration": 2,
- "policy_type": "velocity",
- "weights_scale": 5,
- "zero_start": True
- }
- }
+ kwargs=kwargs_dict_hole_reacher_promp
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
@@ -666,30 +485,268 @@ for _v in _versions:
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
_env_id = f'{_name[0]}ProMP-{_name[1]}'
+ kwargs_dict_alr_reacher_promp = dict(DEFAULT_MP_ENV_DICT)
+ kwargs_dict_alr_reacher_promp['wrappers'].append('TODO') # TODO
+ kwargs_dict_alr_reacher_promp['movement_primitives_kwargs']['action_dim'] = 5 if "long" not in _v.lower() else 7
+ kwargs_dict_alr_reacher_promp['phase_generator_kwargs']['tau'] = 4
+ kwargs_dict_alr_reacher_promp['controller_kwargs']['p_gains'] = 1
+ kwargs_dict_alr_reacher_promp['controller_kwargs']['d_gains'] = 0.1
+ kwargs_dict_alr_reacher_promp['name'] = f"alr_envs:{_v}"
register(
id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": f"alr_envs:{_v}",
- "wrappers": [mujoco.reacher.MPWrapper],
- "mp_kwargs": {
- "num_dof": 5 if "long" not in _v.lower() else 7,
- "num_basis": 2,
- "duration": 4,
- "policy_type": "motor",
- "weights_scale": 5,
- "zero_start": True,
- "policy_kwargs": {
- "p_gains": 1,
- "d_gains": 0.1
- }
- }
- }
+ kwargs=kwargs_dict_alr_reacher_promp
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+########################################################################################################################
+## Beerpong ProMP
+_versions = ['ALRBeerPong-v0']
+for _v in _versions:
+ _name = _v.split("-")
+ _env_id = f'{_name[0]}ProMP-{_name[1]}'
+ kwargs_dict_bp_promp = dict(DEFAULT_MP_ENV_DICT)
+ kwargs_dict_bp_promp['wrappers'].append(mujoco.beerpong.NewMPWrapper)
+ kwargs_dict_bp_promp['movement_primitives_kwargs']['action_dim'] = 7
+ kwargs_dict_bp_promp['phase_generator_kwargs']['tau'] = 0.8
+ kwargs_dict_bp_promp['phase_generator_kwargs']['learn_tau'] = True
+ kwargs_dict_bp_promp['controller_kwargs']['p_gains'] = np.array([1.5, 5, 2.55, 3, 2., 2, 1.25])
+ kwargs_dict_bp_promp['controller_kwargs']['d_gains'] = np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125])
+ kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis'] = 2
+ kwargs_dict_bp_promp['name'] = f"alr_envs:{_v}"
+ register(
+ id=_env_id,
+ entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
+ kwargs=kwargs_dict_bp_promp
+ )
+ ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+### BP with Fixed release
+_versions = ["ALRBeerPongStepBased-v0", "ALRBeerPongFixedRelease-v0"]
+for _v in _versions:
+ _name = _v.split("-")
+ _env_id = f'{_name[0]}ProMP-{_name[1]}'
+ kwargs_dict_bp_promp = dict(DEFAULT_MP_ENV_DICT)
+ kwargs_dict_bp_promp['wrappers'].append(mujoco.beerpong.NewMPWrapper)
+ kwargs_dict_bp_promp['movement_primitives_kwargs']['action_dim'] = 7
+ kwargs_dict_bp_promp['phase_generator_kwargs']['tau'] = 0.62
+ kwargs_dict_bp_promp['controller_kwargs']['p_gains'] = np.array([1.5, 5, 2.55, 3, 2., 2, 1.25])
+ kwargs_dict_bp_promp['controller_kwargs']['d_gains'] = np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125])
+ kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis'] = 2
+ kwargs_dict_bp_promp['name'] = f"alr_envs:{_v}"
+ register(
+ id=_env_id,
+ entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
+ kwargs=kwargs_dict_bp_promp_fixed_release
+ )
+ ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+########################################################################################################################
+
+## Table Tennis needs to be fixed according to Zhou's implementation
+
+########################################################################################################################
+
+## AntJump
+_versions = ['ALRAntJump-v0']
+for _v in _versions:
+ _name = _v.split("-")
+ _env_id = f'{_name[0]}ProMP-{_name[1]}'
+ kwargs_dict_ant_jump_promp = dict(DEFAULT_MP_ENV_DICT)
+ kwargs_dict_ant_jump_promp['wrappers'].append(mujoco.ant_jump.NewMPWrapper)
+ kwargs_dict_ant_jump_promp['movement_primitives_kwargs']['action_dim'] = 8
+ kwargs_dict_ant_jump_promp['phase_generator_kwargs']['tau'] = 10
+ kwargs_dict_ant_jump_promp['controller_kwargs']['p_gains'] = np.ones(8)
+ kwargs_dict_ant_jump_promp['controller_kwargs']['d_gains'] = 0.1 * np.ones(8)
+ kwargs_dict_ant_jump_promp['name'] = f"alr_envs:{_v}"
+ register(
+ id=_env_id,
+ entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
+ kwargs=kwargs_dict_ant_jump_promp
+ )
+ ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+
+########################################################################################################################
+
+## HalfCheetahJump
+_versions = ['ALRHalfCheetahJump-v0']
+for _v in _versions:
+ _name = _v.split("-")
+ _env_id = f'{_name[0]}ProMP-{_name[1]}'
+ kwargs_dict_halfcheetah_jump_promp = dict(DEFAULT_MP_ENV_DICT)
+ kwargs_dict_halfcheetah_jump_promp['wrappers'].append(mujoco.ant_jump.NewMPWrapper)
+ kwargs_dict_halfcheetah_jump_promp['movement_primitives_kwargs']['action_dim'] = 6
+ kwargs_dict_halfcheetah_jump_promp['phase_generator_kwargs']['tau'] = 5
+ kwargs_dict_halfcheetah_jump_promp['controller_kwargs']['p_gains'] = np.ones(6)
+ kwargs_dict_halfcheetah_jump_promp['controller_kwargs']['d_gains'] = 0.1 * np.ones(6)
+ kwargs_dict_halfcheetah_jump_promp['name'] = f"alr_envs:{_v}"
+ register(
+ id=_env_id,
+ entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+ kwargs=kwargs_dict_halfcheetah_jump_promp
+ )
+ ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+
+########################################################################################################################
+
+
+## HopperJump
+_versions = ['ALRHopperJump-v0', 'ALRHopperJumpRndmJointsDesPos-v0', 'ALRHopperJumpRndmJointsDesPosStepBased-v0',
+ 'ALRHopperJumpOnBox-v0', 'ALRHopperThrow-v0', 'ALRHopperThrowInBasket-v0']
+
+for _v in _versions:
+ _name = _v.split("-")
+ _env_id = f'{_name[0]}ProMP-{_name[1]}'
+ kwargs_dict_hopper_jump_promp = dict(DEFAULT_MP_ENV_DICT)
+ kwargs_dict_hopper_jump_promp['wrappers'].append('TODO') # TODO
+ kwargs_dict_hopper_jump_promp['movement_primitives_kwargs']['action_dim'] = 3
+ kwargs_dict_hopper_jump_promp['phase_generator_kwargs']['tau'] = 2
+ kwargs_dict_hopper_jump_promp['controller_kwargs']['p_gains'] = np.ones(3)
+ kwargs_dict_hopper_jump_promp['controller_kwargs']['d_gains'] = 0.1 * np.ones(3)
+ kwargs_dict_hopper_jump_promp['name'] = f"alr_envs:{_v}"
+ register(
+ id=_env_id,
+ entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
+ kwargs=kwargs_dict_hopper_jump_promp
+ )
+ ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+
+########################################################################################################################
+
+
+## Walker2DJump
+_versions = ['ALRWalker2DJump-v0']
+for _v in _versions:
+ _name = _v.split("-")
+ _env_id = f'{_name[0]}ProMP-{_name[1]}'
+ kwargs_dict_walker2d_jump_promp = dict(DEFAULT_MP_ENV_DICT)
+ kwargs_dict_walker2d_jump_promp['wrappers'].append('TODO') # TODO
+ kwargs_dict_walker2d_jump_promp['movement_primitives_kwargs']['action_dim'] = 6
+ kwargs_dict_walker2d_jump_promp['phase_generator_kwargs']['tau'] = 2.4
+ kwargs_dict_walker2d_jump_promp['controller_kwargs']['p_gains'] = np.ones(6)
+ kwargs_dict_walker2d_jump_promp['controller_kwargs']['d_gains'] = 0.1 * np.ones(6)
+ kwargs_dict_walker2d_jump_promp['name'] = f"alr_envs:{_v}"
+ register(
+ id=_env_id,
+ entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+ kwargs=kwargs_dict_walker2d_jump_promp
+ )
+ ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+
+### Depricated, we will not provide non random starts anymore
+"""
+register(
+ id='SimpleReacher-v1',
+ entry_point='alr_envs.alr.classic_control:SimpleReacherEnv',
+ max_episode_steps=200,
+ kwargs={
+ "n_links": 2,
+ "random_start": False
+ }
+)
+
+register(
+ id='LongSimpleReacher-v1',
+ entry_point='alr_envs.alr.classic_control:SimpleReacherEnv',
+ max_episode_steps=200,
+ kwargs={
+ "n_links": 5,
+ "random_start": False
+ }
+)
+register(
+ id='HoleReacher-v1',
+ entry_point='alr_envs.alr.classic_control:HoleReacherEnv',
+ max_episode_steps=200,
+ kwargs={
+ "n_links": 5,
+ "random_start": False,
+ "allow_self_collision": False,
+ "allow_wall_collision": False,
+ "hole_width": 0.25,
+ "hole_depth": 1,
+ "hole_x": None,
+ "collision_penalty": 100,
+ }
+)
+register(
+ id='HoleReacher-v2',
+ entry_point='alr_envs.alr.classic_control:HoleReacherEnv',
+ max_episode_steps=200,
+ kwargs={
+ "n_links": 5,
+ "random_start": False,
+ "allow_self_collision": False,
+ "allow_wall_collision": False,
+ "hole_width": 0.25,
+ "hole_depth": 1,
+ "hole_x": 2,
+ "collision_penalty": 1,
+ }
+)
+
+# CtxtFree are v0, Contextual are v1
+register(
+ id='ALRAntJump-v0',
+ entry_point='alr_envs.alr.mujoco:ALRAntJumpEnv',
+ max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP,
+ kwargs={
+ "max_episode_steps": MAX_EPISODE_STEPS_ANTJUMP,
+ "context": False
+ }
+)
+# CtxtFree are v0, Contextual are v1
+register(
+ id='ALRHalfCheetahJump-v0',
+ entry_point='alr_envs.alr.mujoco:ALRHalfCheetahJumpEnv',
+ max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP,
+ kwargs={
+ "max_episode_steps": MAX_EPISODE_STEPS_HALFCHEETAHJUMP,
+ "context": False
+ }
+)
+register(
+ id='ALRHopperJump-v0',
+ entry_point='alr_envs.alr.mujoco:ALRHopperJumpEnv',
+ max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
+ kwargs={
+ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP,
+ "context": False,
+ "healthy_reward": 1.0
+ }
+)
+
+"""
+
+### Deprecated used for CorL paper
+"""
_vs = np.arange(101).tolist() + [1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1]
+for i in _vs:
+ _env_id = f'ALRReacher{i}-v0'
+ register(
+ id=_env_id,
+ entry_point='alr_envs.alr.mujoco:ALRReacherEnv',
+ max_episode_steps=200,
+ kwargs={
+ "steps_before_reward": 0,
+ "n_links": 5,
+ "balance": False,
+ 'ctrl_cost_weight': i
+ }
+ )
+
+ _env_id = f'ALRReacherSparse{i}-v0'
+ register(
+ id=_env_id,
+ entry_point='alr_envs.alr.mujoco:ALRReacherEnv',
+ max_episode_steps=200,
+ kwargs={
+ "steps_before_reward": 200,
+ "n_links": 5,
+ "balance": False,
+ 'ctrl_cost_weight': i
+ }
+ )
+ _vs = np.arange(101).tolist() + [1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1]
for i in _vs:
_env_id = f'ALRReacher{i}ProMP-v0'
register(
@@ -736,543 +793,56 @@ for i in _vs:
}
}
)
-
-
-# ## Beerpong
-# _versions = ["v0", "v1"]
-# for _v in _versions:
-# _env_id = f'BeerpongProMP-{_v}'
-# register(
-# id=_env_id,
-# entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
-# kwargs={
-# "name": f"alr_envs:ALRBeerPong-{_v}",
-# "wrappers": [mujoco.beerpong.MPWrapper],
-# "mp_kwargs": {
-# "num_dof": 7,
-# "num_basis": 2,
-# # "duration": 1,
-# "duration": 0.5,
-# # "post_traj_time": 2,
-# "post_traj_time": 2.5,
-# "policy_type": "motor",
-# "weights_scale": 0.14,
-# # "weights_scale": 1,
-# "zero_start": True,
-# "zero_goal": False,
-# "policy_kwargs": {
-# "p_gains": np.array([ 1.5, 5, 2.55, 3, 2., 2, 1.25]),
-# "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125])
-# }
-# }
-# }
-# )
-# ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-## Beerpong
-_versions = ["v0", "v1"]
-for _v in _versions:
- _env_id = f'BeerpongProMP-{_v}'
+
register(
- id=_env_id,
- entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
+ id='ALRHopperJumpOnBox-v0',
+ entry_point='alr_envs.alr.mujoco:ALRHopperJumpOnBoxEnv',
+ max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX,
kwargs={
- "name": f"alr_envs:ALRBeerPong-{_v}",
- "wrappers": [mujoco.beerpong.NewMPWrapper],
- "ep_wrapper_kwargs": {
- "weight_scale": 1
- },
- "movement_primitives_kwargs": {
- 'movement_primitives_type': 'promp',
- 'action_dim': 7
- },
- "phase_generator_kwargs": {
- 'phase_generator_type': 'linear',
- 'delay': 0,
- 'tau': 0.8, # initial value
- 'learn_tau': True,
- 'learn_delay': False
- },
- "controller_kwargs": {
- 'controller_type': 'motor',
- "p_gains": np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]),
- "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]),
- },
- "basis_generator_kwargs": {
- 'basis_generator_type': 'zero_rbf',
- 'num_basis': 2,
- 'num_basis_zero_start': 2
- }
- }
+ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMPONBOX,
+ "context": False
+ }
)
- ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-## Beerpong ProMP fixed release
-_env_id = 'BeerpongProMP-v2'
-register(
- id=_env_id,
- entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
+ register(
+ id='ALRHopperThrow-v0',
+ entry_point='alr_envs.alr.mujoco:ALRHopperThrowEnv',
+ max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW,
kwargs={
- "name": "alr_envs:ALRBeerPong-v4",
- "wrappers": [mujoco.beerpong.NewMPWrapper],
- "ep_wrapper_kwargs": {
- "weight_scale": 1
- },
- "movement_primitives_kwargs": {
- 'movement_primitives_type': 'promp',
- 'action_dim': 7
- },
- "phase_generator_kwargs": {
- 'phase_generator_type': 'linear',
- 'delay': 0,
- 'tau': 0.62, # initial value
- 'learn_tau': False,
- 'learn_delay': False
- },
- "controller_kwargs": {
- 'controller_type': 'motor',
- "p_gains": np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]),
- "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]),
- },
- "basis_generator_kwargs": {
- 'basis_generator_type': 'zero_rbf',
- 'num_basis': 2,
- 'num_basis_zero_start': 2
- }
- }
-)
-ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-## Table Tennis
-ctxt_dim = [2, 4]
-for _v, cd in enumerate(ctxt_dim):
- _env_id = f'TableTennisProMP-v{_v}'
+ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROW,
+ "context": False
+ }
+ )
register(
- id=_env_id,
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": "alr_envs:TableTennis{}DCtxt-v0".format(cd),
- "wrappers": [mujoco.table_tennis.MPWrapper],
- "mp_kwargs": {
- "num_dof": 7,
- "num_basis": 2,
- "duration": 1.25,
- "post_traj_time": 1.5,
- "policy_type": "motor",
- "weights_scale": 1.0,
- "zero_start": True,
- "zero_goal": False,
- "policy_kwargs": {
- "p_gains": 0.5*np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]),
- "d_gains": 0.5*np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1])
- }
- }
- }
- )
- ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-## AntJump
-_versions = ["v0", "v1"]
-for _v in _versions:
- _env_id = f'ALRAntJumpProMP-{_v}'
- register(
- id=_env_id,
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": f"alr_envs:ALRAntJump-{_v}",
- "wrappers": [mujoco.ant_jump.MPWrapper],
- "mp_kwargs": {
- "num_dof": 8,
- "num_basis": 5,
- "duration": 10,
- "post_traj_time": 0,
- "policy_type": "motor",
- "weights_scale": 1.0,
- "zero_start": True,
- "zero_goal": False,
- "policy_kwargs": {
- "p_gains": np.ones(8),
- "d_gains": 0.1*np.ones(8)
- }
- }
- }
- )
- ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-## AntJump
-_versions = ["v0", "v1"]
-for _v in _versions:
- _env_id = f'ALRAntJumpProMP-{_v}'
- register(
- id= _env_id,
- entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
- kwargs={
- "name": f"alr_envs:ALRAntJump-{_v}",
- "wrappers": [mujoco.ant_jump.NewMPWrapper],
- "ep_wrapper_kwargs": {
- "weight_scale": 1
- },
- "movement_primitives_kwargs": {
- 'movement_primitives_type': 'promp',
- 'action_dim': 8
- },
- "phase_generator_kwargs": {
- 'phase_generator_type': 'linear',
- 'delay': 0,
- 'tau': 10, # initial value
- 'learn_tau': False,
- 'learn_delay': False
- },
- "controller_kwargs": {
- 'controller_type': 'motor',
- "p_gains": np.ones(8),
- "d_gains": 0.1*np.ones(8),
- },
- "basis_generator_kwargs": {
- 'basis_generator_type': 'zero_rbf',
- 'num_basis': 5,
- 'num_basis_zero_start': 2
- }
- }
- )
- ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-
-
-## HalfCheetahJump
-_versions = ["v0", "v1"]
-for _v in _versions:
- _env_id = f'ALRHalfCheetahJumpProMP-{_v}'
- register(
- id=_env_id,
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": f"alr_envs:ALRHalfCheetahJump-{_v}",
- "wrappers": [mujoco.half_cheetah_jump.MPWrapper],
- "mp_kwargs": {
- "num_dof": 6,
- "num_basis": 5,
- "duration": 5,
- "post_traj_time": 0,
- "policy_type": "motor",
- "weights_scale": 1.0,
- "zero_start": True,
- "zero_goal": False,
- "policy_kwargs": {
- "p_gains": np.ones(6),
- "d_gains": 0.1*np.ones(6)
- }
- }
- }
- )
- ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-# ## HopperJump
-# _versions = ["v0", "v1"]
-# for _v in _versions:
-# _env_id = f'ALRHopperJumpProMP-{_v}'
-# register(
-# id= _env_id,
-# entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
-# kwargs={
-# "name": f"alr_envs:ALRHopperJump-{_v}",
-# "wrappers": [mujoco.hopper_jump.MPWrapper],
-# "mp_kwargs": {
-# "num_dof": 3,
-# "num_basis": 5,
-# "duration": 2,
-# "post_traj_time": 0,
-# "policy_type": "motor",
-# "weights_scale": 1.0,
-# "zero_start": True,
-# "zero_goal": False,
-# "policy_kwargs": {
-# "p_gains": np.ones(3),
-# "d_gains": 0.1*np.ones(3)
-# }
-# }
-# }
-# )
-# ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-# ## HopperJump
-# register(
-# id= "ALRHopperJumpProMP-v2",
-# entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
-# kwargs={
-# "name": f"alr_envs:ALRHopperJump-v2",
-# "wrappers": [mujoco.hopper_jump.HighCtxtMPWrapper],
-# "mp_kwargs": {
-# "num_dof": 3,
-# "num_basis": 5,
-# "duration": 2,
-# "post_traj_time": 0,
-# "policy_type": "motor",
-# "weights_scale": 1.0,
-# "zero_start": True,
-# "zero_goal": False,
-# "policy_kwargs": {
-# "p_gains": np.ones(3),
-# "d_gains": 0.1*np.ones(3)
-# }
-# }
-# }
-# )
-# ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v2")
-
-## HopperJump
-_versions = ["v0", "v1"]
-for _v in _versions:
- _env_id = f'ALRHopperJumpProMP-{_v}'
- register(
- id= _env_id,
- entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
- kwargs={
- "name": f"alr_envs:ALRHopperJump-{_v}",
- "wrappers": [mujoco.hopper_jump.NewMPWrapper],
- "ep_wrapper_kwargs": {
- "weight_scale": 1
- },
- "movement_primitives_kwargs": {
- 'movement_primitives_type': 'promp',
- 'action_dim': 3
- },
- "phase_generator_kwargs": {
- 'phase_generator_type': 'linear',
- 'delay': 0,
- 'tau': 2, # initial value
- 'learn_tau': False,
- 'learn_delay': False
- },
- "controller_kwargs": {
- 'controller_type': 'motor',
- "p_gains": np.ones(3),
- "d_gains": 0.1*np.ones(3),
- },
- "basis_generator_kwargs": {
- 'basis_generator_type': 'zero_rbf',
- 'num_basis': 5,
- 'num_basis_zero_start': 1
- }
- }
- )
- ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-## HopperJump
-register(
- id= "ALRHopperJumpProMP-v2",
- entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
+ id='ALRHopperThrowInBasket-v0',
+ entry_point='alr_envs.alr.mujoco:ALRHopperThrowInBasketEnv',
+ max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET,
kwargs={
- "name": f"alr_envs:ALRHopperJump-v2",
- "wrappers": [mujoco.hopper_jump.NewHighCtxtMPWrapper],
- "ep_wrapper_kwargs": {
- "weight_scale": 1
- },
- "movement_primitives_kwargs": {
- 'movement_primitives_type': 'promp',
- 'action_dim': 3
- },
- "phase_generator_kwargs": {
- 'phase_generator_type': 'linear',
- 'delay': 0,
- 'tau': 2, # initial value
- 'learn_tau': False,
- 'learn_delay': False
- },
- "controller_kwargs": {
- 'controller_type': 'motor',
- "p_gains": np.ones(3),
- "d_gains": 0.1*np.ones(3),
- },
- "basis_generator_kwargs": {
- 'basis_generator_type': 'zero_rbf',
- 'num_basis': 5,
- 'num_basis_zero_start': 1
- }
- }
-)
-ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v2")
-
-
-## HopperJump
-register(
- id= "ALRHopperJumpProMP-v3",
- entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
+ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROWINBASKET,
+ "context": False
+ }
+ )
+ register(
+ id='ALRWalker2DJump-v0',
+ entry_point='alr_envs.alr.mujoco:ALRWalker2dJumpEnv',
+ max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP,
kwargs={
- "name": f"alr_envs:ALRHopperJump-v3",
- "wrappers": [mujoco.hopper_jump.NewHighCtxtMPWrapper],
- "ep_wrapper_kwargs": {
- "weight_scale": 1
- },
- "movement_primitives_kwargs": {
- 'movement_primitives_type': 'promp',
- 'action_dim': 3
- },
- "phase_generator_kwargs": {
- 'phase_generator_type': 'linear',
- 'delay': 0,
- 'tau': 2, # initial value
- 'learn_tau': False,
- 'learn_delay': False
- },
- "controller_kwargs": {
- 'controller_type': 'motor',
- "p_gains": np.ones(3),
- "d_gains": 0.1*np.ones(3),
- },
- "basis_generator_kwargs": {
- 'basis_generator_type': 'zero_rbf',
- 'num_basis': 5,
- 'num_basis_zero_start': 1
- }
- }
-)
-ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v3")
-
-
-## HopperJump
-register(
- id= "ALRHopperJumpProMP-v4",
- entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
- kwargs={
- "name": f"alr_envs:ALRHopperJump-v4",
- "wrappers": [mujoco.hopper_jump.NewHighCtxtMPWrapper],
- "ep_wrapper_kwargs": {
- "weight_scale": 1
- },
- "movement_primitives_kwargs": {
- 'movement_primitives_type': 'promp',
- 'action_dim': 3
- },
- "phase_generator_kwargs": {
- 'phase_generator_type': 'linear',
- 'delay': 0,
- 'tau': 2, # initial value
- 'learn_tau': False,
- 'learn_delay': False
- },
- "controller_kwargs": {
- 'controller_type': 'motor',
- "p_gains": np.ones(3),
- "d_gains": 0.1*np.ones(3),
- },
- "basis_generator_kwargs": {
- 'basis_generator_type': 'zero_rbf',
- 'num_basis': 5,
- 'num_basis_zero_start': 1
- }
- }
-)
-ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v4")
-
-## HopperJumpOnBox
-_versions = ["v0", "v1"]
-for _v in _versions:
- _env_id = f'ALRHopperJumpOnBoxProMP-{_v}'
- register(
- id=_env_id,
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": f"alr_envs:ALRHopperJumpOnBox-{_v}",
- "wrappers": [mujoco.hopper_jump.MPWrapper],
- "mp_kwargs": {
- "num_dof": 3,
- "num_basis": 5,
- "duration": 2,
- "post_traj_time": 0,
- "policy_type": "motor",
- "weights_scale": 1.0,
- "zero_start": True,
- "zero_goal": False,
- "policy_kwargs": {
- "p_gains": np.ones(3),
- "d_gains": 0.1*np.ones(3)
- }
- }
- }
+ "max_episode_steps": MAX_EPISODE_STEPS_WALKERJUMP,
+ "context": False
+ }
)
- ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+ register(id='TableTennis2DCtxt-v1',
+ entry_point='alr_envs.alr.mujoco:TTEnvGym',
+ max_episode_steps=MAX_EPISODE_STEPS,
+ kwargs={'ctxt_dim': 2, 'fixed_goal': True})
-#HopperThrow
-_versions = ["v0", "v1"]
-for _v in _versions:
- _env_id = f'ALRHopperThrowProMP-{_v}'
register(
- id=_env_id,
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+ id='ALRBeerPong-v0',
+ entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv',
+ max_episode_steps=300,
kwargs={
- "name": f"alr_envs:ALRHopperThrow-{_v}",
- "wrappers": [mujoco.hopper_throw.MPWrapper],
- "mp_kwargs": {
- "num_dof": 3,
- "num_basis": 5,
- "duration": 2,
- "post_traj_time": 0,
- "policy_type": "motor",
- "weights_scale": 1.0,
- "zero_start": True,
- "zero_goal": False,
- "policy_kwargs": {
- "p_gains": np.ones(3),
- "d_gains": 0.1*np.ones(3)
- }
- }
+ "rndm_goal": False,
+ "cup_goal_pos": [0.1, -2.0],
+ "frame_skip": 2
}
- )
- ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-## HopperThrowInBasket
-_versions = ["v0", "v1"]
-for _v in _versions:
- _env_id = f'ALRHopperThrowInBasketProMP-{_v}'
- register(
- id=_env_id,
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": f"alr_envs:ALRHopperThrowInBasket-{_v}",
- "wrappers": [mujoco.hopper_throw.MPWrapper],
- "mp_kwargs": {
- "num_dof": 3,
- "num_basis": 5,
- "duration": 2,
- "post_traj_time": 0,
- "policy_type": "motor",
- "weights_scale": 1.0,
- "zero_start": True,
- "zero_goal": False,
- "policy_kwargs": {
- "p_gains": np.ones(3),
- "d_gains": 0.1*np.ones(3)
- }
- }
- }
- )
- ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-## Walker2DJump
-_versions = ["v0", "v1"]
-for _v in _versions:
- _env_id = f'ALRWalker2DJumpProMP-{_v}'
- register(
- id=_env_id,
- entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
- kwargs={
- "name": f"alr_envs:ALRWalker2DJump-{_v}",
- "wrappers": [mujoco.walker_2d_jump.MPWrapper],
- "mp_kwargs": {
- "num_dof": 6,
- "num_basis": 5,
- "duration": 2.4,
- "post_traj_time": 0,
- "policy_type": "motor",
- "weights_scale": 1.0,
- "zero_start": True,
- "zero_goal": False,
- "policy_kwargs": {
- "p_gains": np.ones(6),
- "d_gains": 0.1*np.ones(6)
- }
- }
- }
- )
- ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
\ No newline at end of file
+ )
+"""
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py
index b7d376e..64d9e78 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@@ -178,8 +178,6 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
[self._steps],
])
- def compute_reward(self):
-
@property
def dt(self):
return super(ALRBeerBongEnv, self).dt * self.repeat_action
@@ -213,37 +211,37 @@ class ALRBeerBongEnvStepBasedEpisodicReward(ALRBeerBongEnv):
return ob, reward, done, infos
-class ALRBeerBongEnvStepBased(ALRBeerBongEnv):
- def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
- super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
- self.release_step = 62 # empirically evaluated for frame_skip=2!
-
- def step(self, a):
- if self._steps < self.release_step:
- return super(ALRBeerBongEnvStepBased, self).step(a)
- else:
- reward = 0
- done = False
- while not done:
- sub_ob, sub_reward, done, sub_infos = super(ALRBeerBongEnvStepBased, self).step(np.zeros(a.shape))
- if not done or sub_infos['sim_crash']:
- reward += sub_reward
- else:
- ball_pos = self.sim.data.body_xpos[self.sim.model._body_name2id["ball"]].copy()
- cup_goal_dist_final = np.linalg.norm(ball_pos - self.sim.data.site_xpos[
- self.sim.model._site_name2id["cup_goal_final_table"]].copy())
- cup_goal_dist_top = np.linalg.norm(ball_pos - self.sim.data.site_xpos[
- self.sim.model._site_name2id["cup_goal_table"]].copy())
- if sub_infos['success']:
- dist_rew = -cup_goal_dist_final ** 2
- else:
- dist_rew = -0.5 * cup_goal_dist_final ** 2 - cup_goal_dist_top ** 2
- reward = reward - sub_infos['action_cost'] + dist_rew
- infos = sub_infos
- ob = sub_ob
- ob[-1] = self.release_step + 1 # Since we simulate until the end of the episode, PPO does not see the
- # internal steps and thus, the observation also needs to be set correctly
- return ob, reward, done, infos
+# class ALRBeerBongEnvStepBased(ALRBeerBongEnv):
+# def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
+# super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
+# self.release_step = 62 # empirically evaluated for frame_skip=2!
+#
+# def step(self, a):
+# if self._steps < self.release_step:
+# return super(ALRBeerBongEnvStepBased, self).step(a)
+# else:
+# reward = 0
+# done = False
+# while not done:
+# sub_ob, sub_reward, done, sub_infos = super(ALRBeerBongEnvStepBased, self).step(np.zeros(a.shape))
+# if not done or sub_infos['sim_crash']:
+# reward += sub_reward
+# else:
+# ball_pos = self.sim.data.body_xpos[self.sim.model._body_name2id["ball"]].copy()
+# cup_goal_dist_final = np.linalg.norm(ball_pos - self.sim.data.site_xpos[
+# self.sim.model._site_name2id["cup_goal_final_table"]].copy())
+# cup_goal_dist_top = np.linalg.norm(ball_pos - self.sim.data.site_xpos[
+# self.sim.model._site_name2id["cup_goal_table"]].copy())
+# if sub_infos['success']:
+# dist_rew = -cup_goal_dist_final ** 2
+# else:
+# dist_rew = -0.5 * cup_goal_dist_final ** 2 - cup_goal_dist_top ** 2
+# reward = reward - sub_infos['action_cost'] + dist_rew
+# infos = sub_infos
+# ob = sub_ob
+# ob[-1] = self.release_step + 1 # Since we simulate until the end of the episode, PPO does not see the
+# # internal steps and thus, the observation also needs to be set correctly
+# return ob, reward, done, infos
if __name__ == "__main__":
diff --git a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
index 146b039..5cd234c 100644
--- a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
+++ b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
@@ -298,76 +298,6 @@ class ALRHopperXYJumpEnvStepBased(ALRHopperXYJumpEnv):
return observation, reward, done, info
-class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
- def __init__(self, max_episode_steps=250):
- super(ALRHopperJumpRndmPosEnv, self).__init__(exclude_current_positions_from_observation=False,
- reset_noise_scale=5e-1,
- max_episode_steps=max_episode_steps)
-
- def reset_model(self):
- self._floor_geom_id = self.model.geom_name2id('floor')
- self._foot_geom_id = self.model.geom_name2id('foot_geom')
- noise_low = -np.ones(self.model.nq) * self._reset_noise_scale
- noise_low[1] = 0
- noise_low[2] = 0
- noise_low[3] = -0.2
- noise_low[4] = -0.2
- noise_low[5] = -0.1
-
- noise_high = np.ones(self.model.nq) * self._reset_noise_scale
- noise_high[1] = 0
- noise_high[2] = 0
- noise_high[3] = 0
- noise_high[4] = 0
- noise_high[5] = 0.1
-
- rnd_vec = self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
- # rnd_vec[2] *= 0.05 # the angle around the y axis shouldn't be too high as the agent then falls down quickly and
- # can not recover
- # rnd_vec[1] = np.clip(rnd_vec[1], 0, 0.3)
- qpos = self.init_qpos + rnd_vec
- qvel = self.init_qvel
-
- self.set_state(qpos, qvel)
-
- observation = self._get_obs()
- return observation
-
- def step(self, action):
-
- self.current_step += 1
- self.do_simulation(action, self.frame_skip)
-
- self.contact_with_floor = self._contact_checker(self._floor_geom_id, self._foot_geom_id) if not \
- self.contact_with_floor else True
-
- height_after = self.get_body_com("torso")[2]
- self.max_height = max(height_after, self.max_height) if self.contact_with_floor else 0
-
- ctrl_cost = self.control_cost(action)
- costs = ctrl_cost
- done = False
-
- if self.current_step >= self.max_episode_steps:
- healthy_reward = 0
- height_reward = self._forward_reward_weight * self.max_height # maybe move reward calculation into if structure and define two different _forward_reward_weight variables for context and episodic seperatley
- rewards = height_reward + healthy_reward
-
- else:
- # penalty for wrong start direction of first two joints; not needed, could be removed
- rewards = ((action[:2] > 0) * self.penalty).sum() if self.current_step < 10 else 0
-
- observation = self._get_obs()
- reward = rewards - costs
- info = {
- 'height': height_after,
- 'max_height': self.max_height,
- 'goal': self.goal
- }
-
- return observation, reward, done, info
-
-
if __name__ == '__main__':
render_mode = "human" # "human" or "partial" or "final"
# env = ALRHopperJumpEnv()
From 02b8a65bab918e18d72a0ead7770e39a8b982cb7 Mon Sep 17 00:00:00 2001
From: Fabian
Date: Wed, 29 Jun 2022 09:37:18 +0200
Subject: [PATCH 043/104] restructuring
---
README.md | 4 +-
alr_envs/alr/__init__.py | 8 +-
.../alr/mujoco/ant_jump/new_mp_wrapper.py | 7 +-
.../alr/mujoco/beerpong/new_mp_wrapper.py | 19 +-
.../alr/mujoco/hopper_jump/new_mp_wrapper.py | 8 +-
alr_envs/alr/mujoco/reacher/alr_reacher.py | 2 +-
alr_envs/alr/mujoco/reacher/new_mp_wrapper.py | 6 +-
alr_envs/dmc/__init__.py | 28 +--
alr_envs/examples/examples_dmc.py | 6 +-
alr_envs/examples/examples_metaworld.py | 6 +-
.../examples/examples_motion_primitives.py | 4 +-
alr_envs/examples/examples_open_ai.py | 2 +-
alr_envs/examples/pd_control_gain_tuning.py | 2 +-
alr_envs/meta/__init__.py | 8 +-
...isodic_wrapper.py => black_box_wrapper.py} | 199 ++++++------------
.../mp/controllers/meta_world_controller.py | 4 +-
alr_envs/mp/controllers/pd_controller.py | 2 +-
alr_envs/mp/controllers/pos_controller.py | 2 +-
alr_envs/mp/controllers/vel_controller.py | 2 +-
alr_envs/mp/mp_factory.py | 14 +-
alr_envs/mp/raw_interface_wrapper.py | 88 ++++++++
alr_envs/open_ai/__init__.py | 14 +-
alr_envs/utils/make_env_helpers.py | 184 +++++-----------
23 files changed, 280 insertions(+), 339 deletions(-)
rename alr_envs/mp/{episodic_wrapper.py => black_box_wrapper.py} (51%)
create mode 100644 alr_envs/mp/raw_interface_wrapper.py
diff --git a/README.md b/README.md
index c08a1d4..607af63 100644
--- a/README.md
+++ b/README.md
@@ -198,8 +198,8 @@ wrappers = [alr_envs.dmc.suite.ball_in_cup.MPWrapper]
mp_kwargs = {...}
kwargs = {...}
env = alr_envs.make_dmp_env(base_env_id, wrappers=wrappers, seed=1, mp_kwargs=mp_kwargs, **kwargs)
-# OR for a deterministic ProMP (other mp_kwargs are required):
-# env = alr_envs.make_promp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_args)
+# OR for a deterministic ProMP (other traj_gen_kwargs are required):
+# env = alr_envs.make_promp_env(base_env, wrappers=wrappers, seed=seed, traj_gen_kwargs=mp_args)
rewards = 0
obs = env.reset()
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index ec539db..d8169a8 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -346,7 +346,7 @@ for _v in _versions:
kwargs={
"name": f"alr_envs:{_v}",
"wrappers": [classic_control.simple_reacher.MPWrapper],
- "mp_kwargs": {
+ "traj_gen_kwargs": {
"num_dof": 2 if "long" not in _v.lower() else 5,
"num_basis": 5,
"duration": 2,
@@ -386,7 +386,7 @@ register(
kwargs={
"name": "alr_envs:ViaPointReacher-v0",
"wrappers": [classic_control.viapoint_reacher.MPWrapper],
- "mp_kwargs": {
+ "traj_gen_kwargs": {
"num_dof": 5,
"num_basis": 5,
"duration": 2,
@@ -424,7 +424,7 @@ for _v in _versions:
kwargs={
"name": f"alr_envs:HoleReacher-{_v}",
"wrappers": [classic_control.hole_reacher.MPWrapper],
- "mp_kwargs": {
+ "traj_gen_kwargs": {
"num_dof": 5,
"num_basis": 5,
"duration": 2,
@@ -467,7 +467,7 @@ for _v in _versions:
kwargs={
"name": f"alr_envs:{_v}",
"wrappers": [mujoco.reacher.MPWrapper],
- "mp_kwargs": {
+ "traj_gen_kwargs": {
"num_dof": 5 if "long" not in _v.lower() else 7,
"num_basis": 2,
"duration": 4,
diff --git a/alr_envs/alr/mujoco/ant_jump/new_mp_wrapper.py b/alr_envs/alr/mujoco/ant_jump/new_mp_wrapper.py
index c33048f..c12aa56 100644
--- a/alr_envs/alr/mujoco/ant_jump/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/ant_jump/new_mp_wrapper.py
@@ -1,12 +1,13 @@
-from alr_envs.mp.episodic_wrapper import EpisodicWrapper
+from alr_envs.mp.black_box_wrapper import BlackBoxWrapper
from typing import Union, Tuple
import numpy as np
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
-class NewMPWrapper(EpisodicWrapper):
+class NewMPWrapper(RawInterfaceWrapper):
- def set_active_obs(self):
+ def get_context_mask(self):
return np.hstack([
[False] * 111, # ant has 111 dimensional observation space !!
[True] # goal height
diff --git a/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py b/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
index 53d7a1a..0df1a7c 100644
--- a/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
@@ -1,15 +1,11 @@
-from typing import Tuple, Union
+from typing import Union, Tuple
import numpy as np
-from alr_envs.mp.episodic_wrapper import EpisodicWrapper
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
-class NewMPWrapper(EpisodicWrapper):
-
- # def __init__(self, replanning_model):
- # self.replanning_model = replanning_model
-
+class NewMPWrapper(RawInterfaceWrapper):
@property
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.sim.data.qpos[0:7].copy()
@@ -18,7 +14,7 @@ class NewMPWrapper(EpisodicWrapper):
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.sim.data.qvel[0:7].copy()
- def set_active_obs(self):
+ def get_context_mask(self):
return np.hstack([
[False] * 7, # cos
[False] * 7, # sin
@@ -27,12 +23,7 @@ class NewMPWrapper(EpisodicWrapper):
[False] * 3, # cup_goal_diff_top
[True] * 2, # xy position of cup
[False] # env steps
- ])
-
- def do_replanning(self, pos, vel, s, a, t, last_replan_step):
- return False
- # const = np.arange(0, 1000, 10)
- # return bool(self.replanning_model(s))
+ ])
def _episode_callback(self, action: np.ndarray) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
if self.mp.learn_tau:
diff --git a/alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py b/alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py
index 77a1bf6..ccd8f76 100644
--- a/alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py
@@ -1,9 +1,9 @@
-from alr_envs.mp.episodic_wrapper import EpisodicWrapper
+from alr_envs.mp.black_box_wrapper import BlackBoxWrapper
from typing import Union, Tuple
import numpy as np
-class NewMPWrapper(EpisodicWrapper):
+class NewMPWrapper(BlackBoxWrapper):
@property
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.sim.data.qpos[3:6].copy()
@@ -21,7 +21,7 @@ class NewMPWrapper(EpisodicWrapper):
# ])
# Random x goal + random init pos
- def set_active_obs(self):
+ def get_context_mask(self):
return np.hstack([
[False] * (2 + int(not self.env.exclude_current_positions_from_observation)), # position
[True] * 3, # set to true if randomize initial pos
@@ -31,7 +31,7 @@ class NewMPWrapper(EpisodicWrapper):
class NewHighCtxtMPWrapper(NewMPWrapper):
- def set_active_obs(self):
+ def get_context_mask(self):
return np.hstack([
[False] * (2 + int(not self.env.exclude_current_positions_from_observation)), # position
[True] * 3, # set to true if randomize initial pos
diff --git a/alr_envs/alr/mujoco/reacher/alr_reacher.py b/alr_envs/alr/mujoco/reacher/alr_reacher.py
index c12352a..0699c44 100644
--- a/alr_envs/alr/mujoco/reacher/alr_reacher.py
+++ b/alr_envs/alr/mujoco/reacher/alr_reacher.py
@@ -149,4 +149,4 @@ if __name__ == '__main__':
if d:
env.reset()
- env.close()
\ No newline at end of file
+ env.close()
diff --git a/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py b/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py
index 02dc1d8..8df365a 100644
--- a/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py
@@ -1,9 +1,9 @@
-from alr_envs.mp.episodic_wrapper import EpisodicWrapper
+from alr_envs.mp.black_box_wrapper import BlackBoxWrapper
from typing import Union, Tuple
import numpy as np
-class MPWrapper(EpisodicWrapper):
+class MPWrapper(BlackBoxWrapper):
@property
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
@@ -12,7 +12,7 @@ class MPWrapper(EpisodicWrapper):
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.sim.data.qvel.flat[:self.env.n_links]
- def set_active_obs(self):
+ def get_context_mask(self):
return np.concatenate([
[False] * self.env.n_links, # cos
[False] * self.env.n_links, # sin
diff --git a/alr_envs/dmc/__init__.py b/alr_envs/dmc/__init__.py
index ac34415..dc3adf0 100644
--- a/alr_envs/dmc/__init__.py
+++ b/alr_envs/dmc/__init__.py
@@ -15,7 +15,7 @@ register(
"time_limit": 20,
"episode_length": 1000,
"wrappers": [suite.ball_in_cup.MPWrapper],
- "mp_kwargs": {
+ "traj_gen_kwargs": {
"num_dof": 2,
"num_basis": 5,
"duration": 20,
@@ -41,7 +41,7 @@ register(
"time_limit": 20,
"episode_length": 1000,
"wrappers": [suite.ball_in_cup.MPWrapper],
- "mp_kwargs": {
+ "traj_gen_kwargs": {
"num_dof": 2,
"num_basis": 5,
"duration": 20,
@@ -65,7 +65,7 @@ register(
"time_limit": 20,
"episode_length": 1000,
"wrappers": [suite.reacher.MPWrapper],
- "mp_kwargs": {
+ "traj_gen_kwargs": {
"num_dof": 2,
"num_basis": 5,
"duration": 20,
@@ -92,7 +92,7 @@ register(
"time_limit": 20,
"episode_length": 1000,
"wrappers": [suite.reacher.MPWrapper],
- "mp_kwargs": {
+ "traj_gen_kwargs": {
"num_dof": 2,
"num_basis": 5,
"duration": 20,
@@ -117,7 +117,7 @@ register(
"time_limit": 20,
"episode_length": 1000,
"wrappers": [suite.reacher.MPWrapper],
- "mp_kwargs": {
+ "traj_gen_kwargs": {
"num_dof": 2,
"num_basis": 5,
"duration": 20,
@@ -144,7 +144,7 @@ register(
"time_limit": 20,
"episode_length": 1000,
"wrappers": [suite.reacher.MPWrapper],
- "mp_kwargs": {
+ "traj_gen_kwargs": {
"num_dof": 2,
"num_basis": 5,
"duration": 20,
@@ -174,7 +174,7 @@ for _task in _dmc_cartpole_tasks:
"camera_id": 0,
"episode_length": 1000,
"wrappers": [suite.cartpole.MPWrapper],
- "mp_kwargs": {
+ "traj_gen_kwargs": {
"num_dof": 1,
"num_basis": 5,
"duration": 10,
@@ -203,7 +203,7 @@ for _task in _dmc_cartpole_tasks:
"camera_id": 0,
"episode_length": 1000,
"wrappers": [suite.cartpole.MPWrapper],
- "mp_kwargs": {
+ "traj_gen_kwargs": {
"num_dof": 1,
"num_basis": 5,
"duration": 10,
@@ -230,7 +230,7 @@ register(
"camera_id": 0,
"episode_length": 1000,
"wrappers": [suite.cartpole.TwoPolesMPWrapper],
- "mp_kwargs": {
+ "traj_gen_kwargs": {
"num_dof": 1,
"num_basis": 5,
"duration": 10,
@@ -259,7 +259,7 @@ register(
"camera_id": 0,
"episode_length": 1000,
"wrappers": [suite.cartpole.TwoPolesMPWrapper],
- "mp_kwargs": {
+ "traj_gen_kwargs": {
"num_dof": 1,
"num_basis": 5,
"duration": 10,
@@ -286,7 +286,7 @@ register(
"camera_id": 0,
"episode_length": 1000,
"wrappers": [suite.cartpole.ThreePolesMPWrapper],
- "mp_kwargs": {
+ "traj_gen_kwargs": {
"num_dof": 1,
"num_basis": 5,
"duration": 10,
@@ -315,7 +315,7 @@ register(
"camera_id": 0,
"episode_length": 1000,
"wrappers": [suite.cartpole.ThreePolesMPWrapper],
- "mp_kwargs": {
+ "traj_gen_kwargs": {
"num_dof": 1,
"num_basis": 5,
"duration": 10,
@@ -342,7 +342,7 @@ register(
# "time_limit": 1,
"episode_length": 250,
"wrappers": [manipulation.reach_site.MPWrapper],
- "mp_kwargs": {
+ "traj_gen_kwargs": {
"num_dof": 9,
"num_basis": 5,
"duration": 10,
@@ -365,7 +365,7 @@ register(
# "time_limit": 1,
"episode_length": 250,
"wrappers": [manipulation.reach_site.MPWrapper],
- "mp_kwargs": {
+ "traj_gen_kwargs": {
"num_dof": 9,
"num_basis": 5,
"duration": 10,
diff --git a/alr_envs/examples/examples_dmc.py b/alr_envs/examples/examples_dmc.py
index d223d3c..5658b1f 100644
--- a/alr_envs/examples/examples_dmc.py
+++ b/alr_envs/examples/examples_dmc.py
@@ -69,7 +69,7 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
"learn_goal": True, # learn the goal position (recommended)
"alpha_phase": 2,
"bandwidth_factor": 2,
- "policy_type": "motor", # controller type, 'velocity', 'position', and 'motor' (torque control)
+ "policy_type": "motor", # tracking_controller type, 'velocity', 'position', and 'motor' (torque control)
"weights_scale": 1, # scaling of MP weights
"goal_scale": 1, # scaling of learned goal position
"policy_kwargs": { # only required for torque control/PD-Controller
@@ -83,8 +83,8 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
# "frame_skip": 1
}
env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs, **kwargs)
- # OR for a deterministic ProMP (other mp_kwargs are required, see metaworld_examples):
- # env = alr_envs.make_promp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_args)
+ # OR for a deterministic ProMP (other traj_gen_kwargs are required, see metaworld_examples):
+ # env = alr_envs.make_promp_env(base_env, wrappers=wrappers, seed=seed, traj_gen_kwargs=mp_args)
# This renders the full MP trajectory
# It is only required to call render() once in the beginning, which renders every consecutive trajectory.
diff --git a/alr_envs/examples/examples_metaworld.py b/alr_envs/examples/examples_metaworld.py
index 9ead50c..3e040cc 100644
--- a/alr_envs/examples/examples_metaworld.py
+++ b/alr_envs/examples/examples_metaworld.py
@@ -73,12 +73,12 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
"width": 0.025, # width of the basis functions
"zero_start": True, # start from current environment position if True
"weights_scale": 1, # scaling of MP weights
- "policy_type": "metaworld", # custom controller type for metaworld environments
+ "policy_type": "metaworld", # custom tracking_controller type for metaworld environments
}
env = alr_envs.make_promp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)
- # OR for a DMP (other mp_kwargs are required, see dmc_examples):
- # env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs, **kwargs)
+ # OR for a DMP (other traj_gen_kwargs are required, see dmc_examples):
+ # env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, traj_gen_kwargs=traj_gen_kwargs, **kwargs)
# This renders the full MP trajectory
# It is only required to call render() once in the beginning, which renders every consecutive trajectory.
diff --git a/alr_envs/examples/examples_motion_primitives.py b/alr_envs/examples/examples_motion_primitives.py
index 1a679df..b9d355a 100644
--- a/alr_envs/examples/examples_motion_primitives.py
+++ b/alr_envs/examples/examples_motion_primitives.py
@@ -57,7 +57,7 @@ def example_custom_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=
Returns:
"""
- # Changing the mp_kwargs is possible by providing them to gym.
+ # Changing the traj_gen_kwargs is possible by providing them to gym.
# E.g. here by providing way to many basis functions
mp_kwargs = {
"num_dof": 5,
@@ -126,7 +126,7 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True):
}
env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)
# OR for a deterministic ProMP:
- # env = make_promp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)
+ # env = make_promp_env(base_env, wrappers=wrappers, seed=seed, traj_gen_kwargs=traj_gen_kwargs)
if render:
env.render(mode="human")
diff --git a/alr_envs/examples/examples_open_ai.py b/alr_envs/examples/examples_open_ai.py
index dc0c558..631a3a1 100644
--- a/alr_envs/examples/examples_open_ai.py
+++ b/alr_envs/examples/examples_open_ai.py
@@ -4,7 +4,7 @@ import alr_envs
def example_mp(env_name, seed=1):
"""
Example for running a motion primitive based version of a OpenAI-gym environment, which is already registered.
- For more information on motion primitive specific stuff, look at the mp examples.
+ For more information on motion primitive specific stuff, look at the trajectory_generator examples.
Args:
env_name: ProMP env_id
seed: seed
diff --git a/alr_envs/examples/pd_control_gain_tuning.py b/alr_envs/examples/pd_control_gain_tuning.py
index 1683c6f..e05bad1 100644
--- a/alr_envs/examples/pd_control_gain_tuning.py
+++ b/alr_envs/examples/pd_control_gain_tuning.py
@@ -8,7 +8,7 @@ from alr_envs.utils.make_env_helpers import make_promp_env
def visualize(env):
t = env.t
- pos_features = env.mp.basis_generator.basis(t)
+ pos_features = env.trajectory_generator.basis_generator.basis(t)
plt.plot(t, pos_features)
plt.show()
diff --git a/alr_envs/meta/__init__.py b/alr_envs/meta/__init__.py
index 5651224..e0d0ea0 100644
--- a/alr_envs/meta/__init__.py
+++ b/alr_envs/meta/__init__.py
@@ -19,7 +19,7 @@ for _task in _goal_change_envs:
kwargs={
"name": _task,
"wrappers": [goal_change_mp_wrapper.MPWrapper],
- "mp_kwargs": {
+ "traj_gen_kwargs": {
"num_dof": 4,
"num_basis": 5,
"duration": 6.25,
@@ -42,7 +42,7 @@ for _task in _object_change_envs:
kwargs={
"name": _task,
"wrappers": [object_change_mp_wrapper.MPWrapper],
- "mp_kwargs": {
+ "traj_gen_kwargs": {
"num_dof": 4,
"num_basis": 5,
"duration": 6.25,
@@ -75,7 +75,7 @@ for _task in _goal_and_object_change_envs:
kwargs={
"name": _task,
"wrappers": [goal_object_change_mp_wrapper.MPWrapper],
- "mp_kwargs": {
+ "traj_gen_kwargs": {
"num_dof": 4,
"num_basis": 5,
"duration": 6.25,
@@ -98,7 +98,7 @@ for _task in _goal_and_endeffector_change_envs:
kwargs={
"name": _task,
"wrappers": [goal_endeffector_change_mp_wrapper.MPWrapper],
- "mp_kwargs": {
+ "traj_gen_kwargs": {
"num_dof": 4,
"num_basis": 5,
"duration": 6.25,
diff --git a/alr_envs/mp/episodic_wrapper.py b/alr_envs/mp/black_box_wrapper.py
similarity index 51%
rename from alr_envs/mp/episodic_wrapper.py
rename to alr_envs/mp/black_box_wrapper.py
index b2eb391..5ae0ff9 100644
--- a/alr_envs/mp/episodic_wrapper.py
+++ b/alr_envs/mp/black_box_wrapper.py
@@ -1,5 +1,5 @@
-from abc import ABC, abstractmethod
-from typing import Union, Tuple
+from abc import ABC
+from typing import Tuple
import gym
import numpy as np
@@ -7,77 +7,77 @@ from gym import spaces
from mp_pytorch.mp.mp_interfaces import MPInterface
from alr_envs.mp.controllers.base_controller import BaseController
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
-class EpisodicWrapper(gym.Env, gym.wrappers.TransformReward, ABC):
- """
- Base class for movement primitive based gym.Wrapper implementations.
+class BlackBoxWrapper(gym.ObservationWrapper, ABC):
- Args:
- env: The (wrapped) environment this wrapper is applied on
- num_dof: Dimension of the action space of the wrapped env
- num_basis: Number of basis functions per dof
- duration: Length of the trajectory of the movement primitive in seconds
- controller: Type or object defining the policy that is used to generate action based on the trajectory
- weight_scale: Scaling parameter for the actions given to this wrapper
- render_mode: Equivalent to gym render mode
- """
+ def __init__(self,
+ env: RawInterfaceWrapper,
+ trajectory_generator: MPInterface, tracking_controller: BaseController,
+ duration: float, verbose: int = 1, sequencing=True, reward_aggregation: callable = np.sum):
+ """
+ gym.Wrapper for leveraging a black box approach with a trajectory generator.
- def __init__(
- self,
- env: gym.Env,
- mp: MPInterface,
- controller: BaseController,
- duration: float,
- render_mode: str = None,
- verbose: int = 1,
- weight_scale: float = 1,
- sequencing=True,
- reward_aggregation=np.mean,
- ):
+ Args:
+ env: The (wrapped) environment this wrapper is applied on
+ trajectory_generator: Generates the full or partial trajectory
+ tracking_controller: Translates the desired trajectory to raw action sequences
+ duration: Length of the trajectory of the movement primitive in seconds
+ verbose: level of detail for returned values in info dict.
+ reward_aggregation: function that takes the np.ndarray of step rewards as input and returns the trajectory
+ reward, default summation over all values.
+ """
super().__init__()
self.env = env
- try:
- self.dt = env.dt
- except AttributeError:
- raise AttributeError("step based environment needs to have a function 'dt' ")
self.duration = duration
self.traj_steps = int(duration / self.dt)
self.post_traj_steps = self.env.spec.max_episode_steps - self.traj_steps
# duration = self.env.max_episode_steps * self.dt
- self.mp = mp
- self.env = env
- self.controller = controller
- self.weight_scale = weight_scale
-
- # rendering
- self.render_mode = render_mode
- self.render_kwargs = {}
+ # trajectory generation
+ self.trajectory_generator = trajectory_generator
+ self.tracking_controller = tracking_controller
+ # self.weight_scale = weight_scale
self.time_steps = np.linspace(0, self.duration, self.traj_steps)
- self.mp.set_mp_times(self.time_steps)
- # self.mp.set_mp_duration(self.time_steps, dt)
- # action_bounds = np.inf * np.ones((np.prod(self.mp.num_params)))
- self.mp_action_space = self.get_mp_action_space()
+ self.trajectory_generator.set_mp_times(self.time_steps)
+ # self.trajectory_generator.set_mp_duration(self.time_steps, dt)
+ # action_bounds = np.inf * np.ones((np.prod(self.trajectory_generator.num_params)))
+ self.reward_aggregation = reward_aggregation
+ # spaces
+ self.mp_action_space = self.get_mp_action_space()
self.action_space = self.get_action_space()
- self.active_obs = self.set_active_obs()
- self.observation_space = spaces.Box(low=self.env.observation_space.low[self.active_obs],
- high=self.env.observation_space.high[self.active_obs],
+ self.observation_space = spaces.Box(low=self.env.observation_space.low[self.env.context_mask],
+ high=self.env.observation_space.high[self.env.context_mask],
dtype=self.env.observation_space.dtype)
+ # rendering
+ self.render_mode = None
+ self.render_kwargs = {}
+
self.verbose = verbose
+ @property
+ def dt(self):
+ return self.env.dt
+
+ def observation(self, observation):
+ return observation[self.env.context_mask]
+
def get_trajectory(self, action: np.ndarray) -> Tuple:
# TODO: this follows the implementation of the mp_pytorch library which includes the parameters tau and delay at
# the beginning of the array.
- ignore_indices = int(self.mp.learn_tau) + int(self.mp.learn_delay)
- scaled_mp_params = action.copy()
- scaled_mp_params[ignore_indices:] *= self.weight_scale
- self.mp.set_params(np.clip(scaled_mp_params, self.mp_action_space.low, self.mp_action_space.high))
- self.mp.set_boundary_conditions(bc_time=self.time_steps[:1], bc_pos=self.current_pos, bc_vel=self.current_vel)
- traj_dict = self.mp.get_mp_trajs(get_pos=True, get_vel=True)
+ # ignore_indices = int(self.trajectory_generator.learn_tau) + int(self.trajectory_generator.learn_delay)
+ # scaled_mp_params = action.copy()
+ # scaled_mp_params[ignore_indices:] *= self.weight_scale
+
+ clipped_params = np.clip(action, self.mp_action_space.low, self.mp_action_space.high)
+ self.trajectory_generator.set_params(clipped_params)
+ self.trajectory_generator.set_boundary_conditions(bc_time=self.time_steps[:1], bc_pos=self.current_pos,
+ bc_vel=self.current_vel)
+ traj_dict = self.trajectory_generator.get_mp_trajs(get_pos=True, get_vel=True)
trajectory_tensor, velocity_tensor = traj_dict['pos'], traj_dict['vel']
trajectory = trajectory_tensor.numpy()
@@ -86,13 +86,13 @@ class EpisodicWrapper(gym.Env, gym.wrappers.TransformReward, ABC):
# TODO: Do we need this or does mp_pytorch have this?
if self.post_traj_steps > 0:
trajectory = np.vstack([trajectory, np.tile(trajectory[-1, :], [self.post_traj_steps, 1])])
- velocity = np.vstack([velocity, np.zeros(shape=(self.post_traj_steps, self.mp.num_dof))])
+ velocity = np.vstack([velocity, np.zeros(shape=(self.post_traj_steps, self.trajectory_generator.num_dof))])
return trajectory, velocity
def get_mp_action_space(self):
- """This function can be used to set up an individual space for the parameters of the mp."""
- min_action_bounds, max_action_bounds = self.mp.get_param_bounds()
+ """This function can be used to set up an individual space for the parameters of the trajectory_generator."""
+ min_action_bounds, max_action_bounds = self.trajectory_generator.get_param_bounds()
mp_action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
dtype=np.float32)
return mp_action_space
@@ -109,71 +109,6 @@ class EpisodicWrapper(gym.Env, gym.wrappers.TransformReward, ABC):
except AttributeError:
return self.get_mp_action_space()
- def _episode_callback(self, action: np.ndarray) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
- """
- Used to extract the parameters for the motion primitive and other parameters from an action array which might
- include other actions like ball releasing time for the beer pong environment.
- This only needs to be overwritten if the action space is modified.
- Args:
- action: a vector instance of the whole action space, includes mp parameters and additional parameters if
- specified, else only mp parameters
-
- Returns:
- Tuple: mp_arguments and other arguments
- """
- return action, None
-
- def _step_callback(self, t: int, env_spec_params: Union[np.ndarray, None], step_action: np.ndarray) -> Union[
- np.ndarray]:
- """
- This function can be used to modify the step_action with additional parameters e.g. releasing the ball in the
- Beerpong env. The parameters used should not be part of the motion primitive parameters.
- Returns step_action by default, can be overwritten in individual mp_wrappers.
- Args:
- t: the current time step of the episode
- env_spec_params: the environment specific parameter, as defined in fucntion _episode_callback
- (e.g. ball release time in Beer Pong)
- step_action: the current step-based action
-
- Returns:
- modified step action
- """
- return step_action
-
- @abstractmethod
- def set_active_obs(self) -> np.ndarray:
- """
- This function defines the contexts. The contexts are defined as specific observations.
- Returns:
- boolearn array representing the indices of the observations
-
- """
- return np.ones(self.env.observation_space.shape[0], dtype=bool)
-
- @property
- @abstractmethod
- def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
- """
- Returns the current position of the action/control dimension.
- The dimensionality has to match the action/control dimension.
- This is not required when exclusively using velocity control,
- it should, however, be implemented regardless.
- E.g. The joint positions that are directly or indirectly controlled by the action.
- """
- raise NotImplementedError()
-
- @property
- @abstractmethod
- def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
- """
- Returns the current velocity of the action/control dimension.
- The dimensionality has to match the action/control dimension.
- This is not required when exclusively using position control,
- it should, however, be implemented regardless.
- E.g. The joint velocities that are directly or indirectly controlled by the action.
- """
- raise NotImplementedError()
-
def step(self, action: np.ndarray):
""" This function generates a trajectory based on a MP and then does the usual loop over reset and step"""
# TODO: Think about sequencing
@@ -184,46 +119,52 @@ class EpisodicWrapper(gym.Env, gym.wrappers.TransformReward, ABC):
# TODO
# self.time_steps = np.linspace(0, learned_duration, self.traj_steps)
- # self.mp.set_mp_times(self.time_steps)
+ # self.trajectory_generator.set_mp_times(self.time_steps)
trajectory_length = len(trajectory)
+ rewards = np.zeros(shape=(trajectory_length,))
if self.verbose >= 2:
actions = np.zeros(shape=(trajectory_length,) + self.env.action_space.shape)
observations = np.zeros(shape=(trajectory_length,) + self.env.observation_space.shape,
dtype=self.env.observation_space.dtype)
- rewards = np.zeros(shape=(trajectory_length,))
- trajectory_return = 0
infos = dict()
+ done = False
for t, pos_vel in enumerate(zip(trajectory, velocity)):
- step_action = self.controller.get_action(pos_vel[0], pos_vel[1], self.current_pos, self.current_vel)
+ step_action = self.tracking_controller.get_action(pos_vel[0], pos_vel[1], self.current_pos,
+ self.current_vel)
step_action = self._step_callback(t, env_spec_params, step_action) # include possible callback info
c_action = np.clip(step_action, self.env.action_space.low, self.env.action_space.high)
# print('step/clipped action ratio: ', step_action/c_action)
obs, c_reward, done, info = self.env.step(c_action)
+ rewards[t] = c_reward
+
if self.verbose >= 2:
actions[t, :] = c_action
- rewards[t] = c_reward
observations[t, :] = obs
- trajectory_return += c_reward
+
for k, v in info.items():
elems = infos.get(k, [None] * trajectory_length)
elems[t] = v
infos[k] = elems
- # infos['step_infos'].append(info)
- if self.render_mode:
+
+ if self.render_mode is not None:
self.render(mode=self.render_mode, **self.render_kwargs)
- if done or do_replanning(kwargs):
+
+ if done or self.env.do_replanning(self.env.current_pos, self.env.current_vel, obs, c_action, t):
break
+
infos.update({k: v[:t + 1] for k, v in infos.items()})
+
if self.verbose >= 2:
infos['trajectory'] = trajectory
infos['step_actions'] = actions[:t + 1]
infos['step_observations'] = observations[:t + 1]
infos['step_rewards'] = rewards[:t + 1]
+
infos['trajectory_length'] = t + 1
- done = True
+ trajectory_return = self.reward_aggregation(rewards[:t + 1])
return self.get_observation_from_step(obs), trajectory_return, done, infos
def reset(self):
diff --git a/alr_envs/mp/controllers/meta_world_controller.py b/alr_envs/mp/controllers/meta_world_controller.py
index 07988e5..5747f9e 100644
--- a/alr_envs/mp/controllers/meta_world_controller.py
+++ b/alr_envs/mp/controllers/meta_world_controller.py
@@ -6,8 +6,8 @@ from alr_envs.mp.controllers.base_controller import BaseController
class MetaWorldController(BaseController):
"""
A Metaworld Controller. Using position and velocity information from a provided environment,
- the controller calculates a response based on the desired position and velocity.
- Unlike the other Controllers, this is a special controller for MetaWorld environments.
+ the tracking_controller calculates a response based on the desired position and velocity.
+ Unlike the other Controllers, this is a special tracking_controller for MetaWorld environments.
They use a position delta for the xyz coordinates and a raw position for the gripper opening.
:param env: A position environment
diff --git a/alr_envs/mp/controllers/pd_controller.py b/alr_envs/mp/controllers/pd_controller.py
index d22f6b4..140aeee 100644
--- a/alr_envs/mp/controllers/pd_controller.py
+++ b/alr_envs/mp/controllers/pd_controller.py
@@ -6,7 +6,7 @@ from alr_envs.mp.controllers.base_controller import BaseController
class PDController(BaseController):
"""
A PD-Controller. Using position and velocity information from a provided environment,
- the controller calculates a response based on the desired position and velocity
+ the tracking_controller calculates a response based on the desired position and velocity
:param env: A position environment
:param p_gains: Factors for the proportional gains
diff --git a/alr_envs/mp/controllers/pos_controller.py b/alr_envs/mp/controllers/pos_controller.py
index bec3c68..5570307 100644
--- a/alr_envs/mp/controllers/pos_controller.py
+++ b/alr_envs/mp/controllers/pos_controller.py
@@ -3,7 +3,7 @@ from alr_envs.mp.controllers.base_controller import BaseController
class PosController(BaseController):
"""
- A Position Controller. The controller calculates a response only based on the desired position.
+ A Position Controller. The tracking_controller calculates a response only based on the desired position.
"""
def get_action(self, des_pos, des_vel, c_pos, c_vel):
return des_pos
diff --git a/alr_envs/mp/controllers/vel_controller.py b/alr_envs/mp/controllers/vel_controller.py
index 38128be..67bab2a 100644
--- a/alr_envs/mp/controllers/vel_controller.py
+++ b/alr_envs/mp/controllers/vel_controller.py
@@ -3,7 +3,7 @@ from alr_envs.mp.controllers.base_controller import BaseController
class VelController(BaseController):
"""
- A Velocity Controller. The controller calculates a response only based on the desired velocity.
+ A Velocity Controller. The tracking_controller calculates a response only based on the desired velocity.
"""
def get_action(self, des_pos, des_vel, c_pos, c_vel):
return des_vel
diff --git a/alr_envs/mp/mp_factory.py b/alr_envs/mp/mp_factory.py
index 5cf7231..d2c5460 100644
--- a/alr_envs/mp/mp_factory.py
+++ b/alr_envs/mp/mp_factory.py
@@ -7,16 +7,16 @@ from mp_pytorch.basis_gn.basis_generator import BasisGenerator
ALL_TYPES = ["promp", "dmp", "idmp"]
-def get_movement_primitive(
- movement_primitives_type: str, action_dim: int, basis_generator: BasisGenerator, **kwargs
+def get_trajectory_generator(
+ trajectory_generator_type: str, action_dim: int, basis_generator: BasisGenerator, **kwargs
):
- movement_primitives_type = movement_primitives_type.lower()
- if movement_primitives_type == "promp":
+ trajectory_generator_type = trajectory_generator_type.lower()
+ if trajectory_generator_type == "promp":
return ProMP(basis_generator, action_dim, **kwargs)
- elif movement_primitives_type == "dmp":
+ elif trajectory_generator_type == "dmp":
return DMP(basis_generator, action_dim, **kwargs)
- elif movement_primitives_type == 'idmp':
+ elif trajectory_generator_type == 'idmp':
return IDMP(basis_generator, action_dim, **kwargs)
else:
- raise ValueError(f"Specified movement primitive type {movement_primitives_type} not supported, "
+ raise ValueError(f"Specified movement primitive type {trajectory_generator_type} not supported, "
f"please choose one of {ALL_TYPES}.")
\ No newline at end of file
diff --git a/alr_envs/mp/raw_interface_wrapper.py b/alr_envs/mp/raw_interface_wrapper.py
new file mode 100644
index 0000000..45d5daf
--- /dev/null
+++ b/alr_envs/mp/raw_interface_wrapper.py
@@ -0,0 +1,88 @@
+from typing import Union, Tuple
+
+import gym
+import numpy as np
+from abc import abstractmethod
+
+
+class RawInterfaceWrapper(gym.Wrapper):
+
+ @property
+ @abstractmethod
+ def context_mask(self) -> np.ndarray:
+ """
+ This function defines the contexts. The contexts are defined as specific observations.
+ Returns:
+ bool array representing the indices of the observations
+
+ """
+ return np.ones(self.env.observation_space.shape[0], dtype=bool)
+
+ @property
+ @abstractmethod
+ def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+ """
+ Returns the current position of the action/control dimension.
+ The dimensionality has to match the action/control dimension.
+ This is not required when exclusively using velocity control,
+ it should, however, be implemented regardless.
+ E.g. The joint positions that are directly or indirectly controlled by the action.
+ """
+ raise NotImplementedError()
+
+ @property
+ @abstractmethod
+ def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+ """
+ Returns the current velocity of the action/control dimension.
+ The dimensionality has to match the action/control dimension.
+ This is not required when exclusively using position control,
+ it should, however, be implemented regardless.
+ E.g. The joint velocities that are directly or indirectly controlled by the action.
+ """
+ raise NotImplementedError()
+
+ @property
+ @abstractmethod
+ def dt(self) -> float:
+ """
+ Control frequency of the environment
+ Returns: float
+
+ """
+
+ def do_replanning(self, pos, vel, s, a, t):
+ # return t % 100 == 0
+ # return bool(self.replanning_model(s))
+ return False
+
+ def _episode_callback(self, action: np.ndarray) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
+ """
+ Used to extract the parameters for the motion primitive and other parameters from an action array which might
+ include other actions like ball releasing time for the beer pong environment.
+ This only needs to be overwritten if the action space is modified.
+ Args:
+ action: a vector instance of the whole action space, includes trajectory_generator parameters and additional parameters if
+ specified, else only trajectory_generator parameters
+
+ Returns:
+ Tuple: mp_arguments and other arguments
+ """
+ return action, None
+
+ def _step_callback(self, t: int, env_spec_params: Union[np.ndarray, None], step_action: np.ndarray) -> Union[
+ np.ndarray]:
+ """
+ This function can be used to modify the step_action with additional parameters e.g. releasing the ball in the
+ Beerpong env. The parameters used should not be part of the motion primitive parameters.
+ Returns step_action by default, can be overwritten in individual mp_wrappers.
+ Args:
+ t: the current time step of the episode
+ env_spec_params: the environment specific parameter, as defined in function _episode_callback
+ (e.g. ball release time in Beer Pong)
+ step_action: the current step-based action
+
+ Returns:
+ modified step action
+ """
+ return step_action
diff --git a/alr_envs/open_ai/__init__.py b/alr_envs/open_ai/__init__.py
index 41b770f..04610fa 100644
--- a/alr_envs/open_ai/__init__.py
+++ b/alr_envs/open_ai/__init__.py
@@ -21,7 +21,7 @@ register(
kwargs={
"name": "alr_envs:MountainCarContinuous-v1",
"wrappers": [classic_control.continuous_mountain_car.MPWrapper],
- "mp_kwargs": {
+ "traj_gen_kwargs": {
"num_dof": 1,
"num_basis": 4,
"duration": 2,
@@ -43,7 +43,7 @@ register(
kwargs={
"name": "gym.envs.classic_control:MountainCarContinuous-v0",
"wrappers": [classic_control.continuous_mountain_car.MPWrapper],
- "mp_kwargs": {
+ "traj_gen_kwargs": {
"num_dof": 1,
"num_basis": 4,
"duration": 19.98,
@@ -65,7 +65,7 @@ register(
kwargs={
"name": "gym.envs.mujoco:Reacher-v2",
"wrappers": [mujoco.reacher_v2.MPWrapper],
- "mp_kwargs": {
+ "traj_gen_kwargs": {
"num_dof": 2,
"num_basis": 6,
"duration": 1,
@@ -87,7 +87,7 @@ register(
kwargs={
"name": "gym.envs.robotics:FetchSlideDense-v1",
"wrappers": [FlattenObservation, robotics.fetch.MPWrapper],
- "mp_kwargs": {
+ "traj_gen_kwargs": {
"num_dof": 4,
"num_basis": 5,
"duration": 2,
@@ -105,7 +105,7 @@ register(
kwargs={
"name": "gym.envs.robotics:FetchSlide-v1",
"wrappers": [FlattenObservation, robotics.fetch.MPWrapper],
- "mp_kwargs": {
+ "traj_gen_kwargs": {
"num_dof": 4,
"num_basis": 5,
"duration": 2,
@@ -123,7 +123,7 @@ register(
kwargs={
"name": "gym.envs.robotics:FetchReachDense-v1",
"wrappers": [FlattenObservation, robotics.fetch.MPWrapper],
- "mp_kwargs": {
+ "traj_gen_kwargs": {
"num_dof": 4,
"num_basis": 5,
"duration": 2,
@@ -141,7 +141,7 @@ register(
kwargs={
"name": "gym.envs.robotics:FetchReach-v1",
"wrappers": [FlattenObservation, robotics.fetch.MPWrapper],
- "mp_kwargs": {
+ "traj_gen_kwargs": {
"num_dof": 4,
"num_basis": 5,
"duration": 2,
diff --git a/alr_envs/utils/make_env_helpers.py b/alr_envs/utils/make_env_helpers.py
index dbefeee..9af0a2d 100644
--- a/alr_envs/utils/make_env_helpers.py
+++ b/alr_envs/utils/make_env_helpers.py
@@ -4,17 +4,15 @@ from typing import Iterable, Type, Union, Mapping, MutableMapping
import gym
import numpy as np
from gym.envs.registration import EnvSpec
-
-from mp_env_api.mp_wrappers.dmp_wrapper import DmpWrapper
-from mp_env_api.mp_wrappers.promp_wrapper import ProMPWrapper
from mp_pytorch import MPInterface
from alr_envs.mp.basis_generator_factory import get_basis_generator
+from alr_envs.mp.black_box_wrapper import BlackBoxWrapper
from alr_envs.mp.controllers.base_controller import BaseController
from alr_envs.mp.controllers.controller_factory import get_controller
-from alr_envs.mp.mp_factory import get_movement_primitive
-from alr_envs.mp.episodic_wrapper import EpisodicWrapper
+from alr_envs.mp.mp_factory import get_trajectory_generator
from alr_envs.mp.phase_generator_factory import get_phase_generator
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
def make_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwargs):
@@ -100,9 +98,8 @@ def make(env_id: str, seed, **kwargs):
def _make_wrapped_env(
- env_id: str, wrappers: Iterable[Type[gym.Wrapper]], mp: MPInterface, controller: BaseController,
- ep_wrapper_kwargs: Mapping, seed=1, **kwargs
- ):
+ env_id: str, wrappers: Iterable[Type[gym.Wrapper]], seed=1, **kwargs
+):
"""
Helper function for creating a wrapped gym environment using MPs.
It adds all provided wrappers to the specified environment and verifies at least one MPEnvWrapper is
@@ -118,73 +115,74 @@ def _make_wrapped_env(
"""
# _env = gym.make(env_id)
_env = make(env_id, seed, **kwargs)
- has_episodic_wrapper = False
+ has_black_box_wrapper = False
for w in wrappers:
- # only wrap the environment if not EpisodicWrapper, e.g. for vision
- if not issubclass(w, EpisodicWrapper):
- _env = w(_env)
- else: # if EpisodicWrapper, use specific constructor
- has_episodic_wrapper = True
- _env = w(env=_env, mp=mp, controller=controller, **ep_wrapper_kwargs)
- if not has_episodic_wrapper:
- raise ValueError("An EpisodicWrapper is required in order to leverage movement primitive environments.")
+ # only wrap the environment if not BlackBoxWrapper, e.g. for vision
+ if issubclass(w, RawInterfaceWrapper):
+ has_black_box_wrapper = True
+ _env = w(_env)
+ if not has_black_box_wrapper:
+ raise ValueError("An RawInterfaceWrapper is required in order to leverage movement primitive environments.")
return _env
-def make_mp_from_kwargs(
- env_id: str, wrappers: Iterable, ep_wrapper_kwargs: MutableMapping, mp_kwargs: MutableMapping,
+def make_bb_env(
+ env_id: str, wrappers: Iterable, black_box_wrapper_kwargs: MutableMapping, traj_gen_kwargs: MutableMapping,
controller_kwargs: MutableMapping, phase_kwargs: MutableMapping, basis_kwargs: MutableMapping, seed=1,
- sequenced=False, **kwargs
- ):
+ sequenced=False, **kwargs):
"""
This can also be used standalone for manually building a custom DMP environment.
Args:
- ep_wrapper_kwargs:
- basis_kwargs:
- phase_kwargs:
- controller_kwargs:
+ black_box_wrapper_kwargs: kwargs for the black-box wrapper
+ basis_kwargs: kwargs for the basis generator
+ phase_kwargs: kwargs for the phase generator
+ controller_kwargs: kwargs for the tracking controller
env_id: base_env_name,
- wrappers: list of wrappers (at least an EpisodicWrapper),
+ wrappers: list of wrappers (at least an BlackBoxWrapper),
seed: seed of environment
sequenced: When true, this allows to sequence multiple ProMPs by specifying the duration of each sub-trajectory,
this behavior is much closer to step based learning.
- mp_kwargs: dict of at least {num_dof: int, num_basis: int} for DMP
+ traj_gen_kwargs: dict of at least {num_dof: int, num_basis: int} for DMP
Returns: DMP wrapped gym env
"""
- _verify_time_limit(mp_kwargs.get("duration", None), kwargs.get("time_limit", None))
- dummy_env = make(env_id, seed)
- if ep_wrapper_kwargs.get('duration', None) is None:
- ep_wrapper_kwargs['duration'] = dummy_env.spec.max_episode_steps * dummy_env.dt
+ _verify_time_limit(traj_gen_kwargs.get("duration", None), kwargs.get("time_limit", None))
+ _env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed, **kwargs)
+
+ if black_box_wrapper_kwargs.get('duration', None) is None:
+ black_box_wrapper_kwargs['duration'] = _env.spec.max_episode_steps * _env.dt
if phase_kwargs.get('tau', None) is None:
- phase_kwargs['tau'] = ep_wrapper_kwargs['duration']
- mp_kwargs['action_dim'] = mp_kwargs.get('action_dim', np.prod(dummy_env.action_space.shape).item())
+ phase_kwargs['tau'] = black_box_wrapper_kwargs['duration']
+ traj_gen_kwargs['action_dim'] = traj_gen_kwargs.get('action_dim', np.prod(_env.action_space.shape).item())
+
phase_gen = get_phase_generator(**phase_kwargs)
basis_gen = get_basis_generator(phase_generator=phase_gen, **basis_kwargs)
controller = get_controller(**controller_kwargs)
- mp = get_movement_primitive(basis_generator=basis_gen, **mp_kwargs)
- _env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, mp=mp, controller=controller,
- ep_wrapper_kwargs=ep_wrapper_kwargs, seed=seed, **kwargs)
- return _env
+ traj_gen = get_trajectory_generator(basis_generator=basis_gen, **traj_gen_kwargs)
+
+ bb_env = BlackBoxWrapper(_env, trajectory_generator=traj_gen, tracking_controller=controller,
+ **black_box_wrapper_kwargs)
+
+ return bb_env
-def make_mp_env_helper(**kwargs):
+def make_bb_env_helper(**kwargs):
"""
- Helper function for registering a DMP gym environments.
+ Helper function for registering a black box gym environment.
Args:
**kwargs: expects at least the following:
{
"name": base environment name.
- "wrappers": list of wrappers (at least an EpisodicWrapper is required),
- "movement_primitives_kwargs": {
- "movement_primitives_type": type_of_your_movement_primitive,
+ "wrappers": list of wrappers (at least an BlackBoxWrapper is required),
+ "traj_gen_kwargs": {
+ "trajectory_generator_type": type_of_your_movement_primitive,
non default arguments for the movement primitive instance
...
}
"controller_kwargs": {
"controller_type": type_of_your_controller,
- non default arguments for the controller instance
+ non default arguments for the tracking_controller instance
...
},
"basis_generator_kwargs": {
@@ -205,95 +203,17 @@ def make_mp_env_helper(**kwargs):
seed = kwargs.pop("seed", None)
wrappers = kwargs.pop("wrappers")
- mp_kwargs = kwargs.pop("movement_primitives_kwargs")
- ep_wrapper_kwargs = kwargs.pop('ep_wrapper_kwargs')
- contr_kwargs = kwargs.pop("controller_kwargs")
- phase_kwargs = kwargs.pop("phase_generator_kwargs")
- basis_kwargs = kwargs.pop("basis_generator_kwargs")
+ traj_gen_kwargs = kwargs.pop("traj_gen_kwargs", {})
+ black_box_kwargs = kwargs.pop('black_box_wrapper_kwargs', {})
+ contr_kwargs = kwargs.pop("controller_kwargs", {})
+ phase_kwargs = kwargs.pop("phase_generator_kwargs", {})
+ basis_kwargs = kwargs.pop("basis_generator_kwargs", {})
- return make_mp_from_kwargs(env_id=kwargs.pop("name"), wrappers=wrappers, ep_wrapper_kwargs=ep_wrapper_kwargs,
- mp_kwargs=mp_kwargs, controller_kwargs=contr_kwargs, phase_kwargs=phase_kwargs,
- basis_kwargs=basis_kwargs, **kwargs, seed=seed)
-
-
-def make_dmp_env(env_id: str, wrappers: Iterable, seed=1, mp_kwargs={}, **kwargs):
- """
- This can also be used standalone for manually building a custom DMP environment.
- Args:
- env_id: base_env_name,
- wrappers: list of wrappers (at least an MPEnvWrapper),
- seed: seed of environment
- mp_kwargs: dict of at least {num_dof: int, num_basis: int} for DMP
-
- Returns: DMP wrapped gym env
-
- """
- _verify_time_limit(mp_kwargs.get("duration", None), kwargs.get("time_limit", None))
-
- _env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed, **kwargs)
-
- _verify_dof(_env, mp_kwargs.get("num_dof"))
-
- return DmpWrapper(_env, **mp_kwargs)
-
-
-def make_promp_env(env_id: str, wrappers: Iterable, seed=1, mp_kwargs={}, **kwargs):
- """
- This can also be used standalone for manually building a custom ProMP environment.
- Args:
- env_id: base_env_name,
- wrappers: list of wrappers (at least an MPEnvWrapper),
- mp_kwargs: dict of at least {num_dof: int, num_basis: int, width: int}
-
- Returns: ProMP wrapped gym env
-
- """
- _verify_time_limit(mp_kwargs.get("duration", None), kwargs.get("time_limit", None))
-
- _env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed, **kwargs)
-
- _verify_dof(_env, mp_kwargs.get("num_dof"))
-
- return ProMPWrapper(_env, **mp_kwargs)
-
-
-def make_dmp_env_helper(**kwargs):
- """
- Helper function for registering a DMP gym environments.
- Args:
- **kwargs: expects at least the following:
- {
- "name": base_env_name,
- "wrappers": list of wrappers (at least an MPEnvWrapper),
- "mp_kwargs": dict of at least {num_dof: int, num_basis: int} for DMP
- }
-
- Returns: DMP wrapped gym env
-
- """
- seed = kwargs.pop("seed", None)
- return make_dmp_env(env_id=kwargs.pop("name"), wrappers=kwargs.pop("wrappers"), seed=seed,
- mp_kwargs=kwargs.pop("mp_kwargs"), **kwargs)
-
-
-def make_promp_env_helper(**kwargs):
- """
- Helper function for registering ProMP gym environments.
- This can also be used standalone for manually building a custom ProMP environment.
- Args:
- **kwargs: expects at least the following:
- {
- "name": base_env_name,
- "wrappers": list of wrappers (at least an MPEnvWrapper),
- "mp_kwargs": dict of at least {num_dof: int, num_basis: int, width: int}
- }
-
- Returns: ProMP wrapped gym env
-
- """
- seed = kwargs.pop("seed", None)
- return make_promp_env(env_id=kwargs.pop("name"), wrappers=kwargs.pop("wrappers"), seed=seed,
- mp_kwargs=kwargs.pop("mp_kwargs"), **kwargs)
+ return make_bb_env(env_id=kwargs.pop("name"), wrappers=wrappers,
+ black_box_wrapper_kwargs=black_box_kwargs,
+ traj_gen_kwargs=traj_gen_kwargs, controller_kwargs=contr_kwargs,
+ phase_kwargs=phase_kwargs,
+ basis_kwargs=basis_kwargs, **kwargs, seed=seed)
def _verify_time_limit(mp_time_limit: Union[None, float], env_time_limit: Union[None, float]):
@@ -304,7 +224,7 @@ def _verify_time_limit(mp_time_limit: Union[None, float], env_time_limit: Union[
It can be found in the BaseMP class.
Args:
- mp_time_limit: max trajectory length of mp in seconds
+ mp_time_limit: max trajectory length of trajectory_generator in seconds
env_time_limit: max trajectory length of DMC environment in seconds
Returns:
From 6e06e11cfa90487f7cb2de1cda20ed7c029d3210 Mon Sep 17 00:00:00 2001
From: Onur
Date: Wed, 29 Jun 2022 10:39:28 +0200
Subject: [PATCH 044/104] added new mp wrappers to all environments
---
alr_envs/alr/__init__.py | 2 +-
.../hole_reacher/new_mp_wrapper.py | 31 ++++++++++++++++++
.../simple_reacher/new_mp_wrapper.py | 31 ++++++++++++++++++
.../viapoint_reacher/new_mp_wrapper.py | 32 +++++++++++++++++++
.../half_cheetah_jump/new_mp_wrapper.py | 24 ++++++++++++++
.../alr/mujoco/hopper_throw/mp_wrapper.py | 2 +-
.../alr/mujoco/hopper_throw/new_mp_wrapper.py | 27 ++++++++++++++++
alr_envs/alr/mujoco/reacher/new_mp_wrapper.py | 8 +++--
.../mujoco/walker_2d_jump/new_mp_wrapper.py | 28 ++++++++++++++++
9 files changed, 180 insertions(+), 5 deletions(-)
create mode 100644 alr_envs/alr/classic_control/hole_reacher/new_mp_wrapper.py
create mode 100644 alr_envs/alr/classic_control/simple_reacher/new_mp_wrapper.py
create mode 100644 alr_envs/alr/classic_control/viapoint_reacher/new_mp_wrapper.py
create mode 100644 alr_envs/alr/mujoco/half_cheetah_jump/new_mp_wrapper.py
create mode 100644 alr_envs/alr/mujoco/hopper_throw/new_mp_wrapper.py
create mode 100644 alr_envs/alr/mujoco/walker_2d_jump/new_mp_wrapper.py
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index d8169a8..4c90512 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -537,7 +537,7 @@ for _v in _versions:
register(
id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
- kwargs=kwargs_dict_bp_promp_fixed_release
+ kwargs=kwargs_dict_bp_promp
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
########################################################################################################################
diff --git a/alr_envs/alr/classic_control/hole_reacher/new_mp_wrapper.py b/alr_envs/alr/classic_control/hole_reacher/new_mp_wrapper.py
new file mode 100644
index 0000000..1f1d198
--- /dev/null
+++ b/alr_envs/alr/classic_control/hole_reacher/new_mp_wrapper.py
@@ -0,0 +1,31 @@
+from typing import Tuple, Union
+
+import numpy as np
+
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+
+
+class NewMPWrapper(RawInterfaceWrapper):
+
+ def get_context_mask(self):
+ return np.hstack([
+ [self.env.random_start] * self.env.n_links, # cos
+ [self.env.random_start] * self.env.n_links, # sin
+ [self.env.random_start] * self.env.n_links, # velocity
+ [self.env.initial_width is None], # hole width
+ # [self.env.hole_depth is None], # hole depth
+ [True] * 2, # x-y coordinates of target distance
+ [False] # env steps
+ ])
+
+ @property
+ def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.env.current_pos
+
+ @property
+ def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.env.current_vel
+
+ @property
+ def dt(self) -> Union[float, int]:
+ return self.env.dt
diff --git a/alr_envs/alr/classic_control/simple_reacher/new_mp_wrapper.py b/alr_envs/alr/classic_control/simple_reacher/new_mp_wrapper.py
new file mode 100644
index 0000000..c1497e6
--- /dev/null
+++ b/alr_envs/alr/classic_control/simple_reacher/new_mp_wrapper.py
@@ -0,0 +1,31 @@
+from typing import Tuple, Union
+
+import numpy as np
+
+from mp_env_api import MPEnvWrapper
+
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+
+
+class MPWrapper(RawInterfaceWrapper):
+
+ def context_mask(self):
+ return np.hstack([
+ [self.env.random_start] * self.env.n_links, # cos
+ [self.env.random_start] * self.env.n_links, # sin
+ [self.env.random_start] * self.env.n_links, # velocity
+ [True] * 2, # x-y coordinates of target distance
+ [False] # env steps
+ ])
+
+ @property
+ def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.env.current_pos
+
+ @property
+ def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.env.current_vel
+
+ @property
+ def dt(self) -> Union[float, int]:
+ return self.env.dt
diff --git a/alr_envs/alr/classic_control/viapoint_reacher/new_mp_wrapper.py b/alr_envs/alr/classic_control/viapoint_reacher/new_mp_wrapper.py
new file mode 100644
index 0000000..f02dfe1
--- /dev/null
+++ b/alr_envs/alr/classic_control/viapoint_reacher/new_mp_wrapper.py
@@ -0,0 +1,32 @@
+from typing import Tuple, Union
+
+import numpy as np
+
+from mp_env_api import MPEnvWrapper
+
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+
+
+class MPWrapper(RawInterfaceWrapper):
+
+ def context_mask(self):
+ return np.hstack([
+ [self.env.random_start] * self.env.n_links, # cos
+ [self.env.random_start] * self.env.n_links, # sin
+ [self.env.random_start] * self.env.n_links, # velocity
+ [self.env.initial_via_target is None] * 2, # x-y coordinates of via point distance
+ [True] * 2, # x-y coordinates of target distance
+ [False] # env steps
+ ])
+
+ @property
+ def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.env.current_pos
+
+ @property
+ def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.env.current_vel
+
+ @property
+ def dt(self) -> Union[float, int]:
+ return self.env.dt
diff --git a/alr_envs/alr/mujoco/half_cheetah_jump/new_mp_wrapper.py b/alr_envs/alr/mujoco/half_cheetah_jump/new_mp_wrapper.py
new file mode 100644
index 0000000..f098c2d
--- /dev/null
+++ b/alr_envs/alr/mujoco/half_cheetah_jump/new_mp_wrapper.py
@@ -0,0 +1,24 @@
+from typing import Tuple, Union
+
+import numpy as np
+
+from mp_env_api import MPEnvWrapper
+
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+
+
+class MPWrapper(RawInterfaceWrapper):
+ def context_mask(self):
+ return np.hstack([
+ [False] * 17,
+ [True] # goal height
+ ])
+
+ @property
+ def current_pos(self) -> Union[float, int, np.ndarray]:
+ return self.env.sim.data.qpos[3:9].copy()
+
+ @property
+ def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.env.sim.data.qvel[3:9].copy()
+
diff --git a/alr_envs/alr/mujoco/hopper_throw/mp_wrapper.py b/alr_envs/alr/mujoco/hopper_throw/mp_wrapper.py
index e5e9486..909e00a 100644
--- a/alr_envs/alr/mujoco/hopper_throw/mp_wrapper.py
+++ b/alr_envs/alr/mujoco/hopper_throw/mp_wrapper.py
@@ -10,7 +10,7 @@ class MPWrapper(MPEnvWrapper):
def active_obs(self):
return np.hstack([
[False] * 17,
- [True] # goal pos
+ [True] # goal pos
])
@property
diff --git a/alr_envs/alr/mujoco/hopper_throw/new_mp_wrapper.py b/alr_envs/alr/mujoco/hopper_throw/new_mp_wrapper.py
new file mode 100644
index 0000000..049c2f0
--- /dev/null
+++ b/alr_envs/alr/mujoco/hopper_throw/new_mp_wrapper.py
@@ -0,0 +1,27 @@
+from typing import Tuple, Union
+
+import numpy as np
+
+from mp_env_api import MPEnvWrapper
+
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+
+
+class MPWrapper(RawInterfaceWrapper):
+ def context_mask(self):
+ return np.hstack([
+ [False] * 17,
+ [True] # goal pos
+ ])
+
+ @property
+ def current_pos(self) -> Union[float, int, np.ndarray]:
+ return self.env.sim.data.qpos[3:6].copy()
+
+ @property
+ def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.env.sim.data.qvel[3:6].copy()
+
+ @property
+ def dt(self) -> Union[float, int]:
+ return self.env.dt
diff --git a/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py b/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py
index 8df365a..54910e5 100644
--- a/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py
@@ -2,8 +2,10 @@ from alr_envs.mp.black_box_wrapper import BlackBoxWrapper
from typing import Union, Tuple
import numpy as np
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
-class MPWrapper(BlackBoxWrapper):
+
+class MPWrapper(RawInterfaceWrapper):
@property
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
@@ -12,7 +14,7 @@ class MPWrapper(BlackBoxWrapper):
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.sim.data.qvel.flat[:self.env.n_links]
- def get_context_mask(self):
+ def context_mask(self):
return np.concatenate([
[False] * self.env.n_links, # cos
[False] * self.env.n_links, # sin
@@ -21,4 +23,4 @@ class MPWrapper(BlackBoxWrapper):
[False] * 3, # goal distance
# self.get_body_com("target"), # only return target to make problem harder
[False], # step
- ])
\ No newline at end of file
+ ])
diff --git a/alr_envs/alr/mujoco/walker_2d_jump/new_mp_wrapper.py b/alr_envs/alr/mujoco/walker_2d_jump/new_mp_wrapper.py
new file mode 100644
index 0000000..dde928f
--- /dev/null
+++ b/alr_envs/alr/mujoco/walker_2d_jump/new_mp_wrapper.py
@@ -0,0 +1,28 @@
+from typing import Tuple, Union
+
+import numpy as np
+
+from mp_env_api import MPEnvWrapper
+
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+
+
+class MPWrapper(RawInterfaceWrapper):
+ def context_mask(self):
+ return np.hstack([
+ [False] * 17,
+ [True] # goal pos
+ ])
+
+ @property
+ def current_pos(self) -> Union[float, int, np.ndarray]:
+ return self.env.sim.data.qpos[3:9].copy()
+
+ @property
+ def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.env.sim.data.qvel[3:9].copy()
+
+ @property
+ def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+ raise ValueError("Goal position is not available and has to be learnt based on the environment.")
+
From a042f9f67143b4c5c7a379dd0cb24c7dcf440f81 Mon Sep 17 00:00:00 2001
From: Onur
Date: Wed, 29 Jun 2022 11:55:40 +0200
Subject: [PATCH 045/104] fix dict copy issue
---
alr_envs/alr/__init__.py | 21 +++++++++++----------
1 file changed, 11 insertions(+), 10 deletions(-)
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 4c90512..2badc2b 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -1,5 +1,6 @@
import numpy as np
from gym import register
+from copy import deepcopy
from . import classic_control, mujoco
from .classic_control.hole_reacher.hole_reacher import HoleReacherEnv
@@ -364,7 +365,7 @@ for _v in _versions:
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
_env_id = f'{_name[0]}ProMP-{_name[1]}'
- kwargs_dict_simple_reacher_promp = dict(DEFAULT_MP_ENV_DICT)
+ kwargs_dict_simple_reacher_promp = deepcopy(DEFAULT_MP_ENV_DICT)
kwargs_dict_simple_reacher_promp['wrappers'].append('TODO') # TODO
kwargs_dict_simple_reacher_promp['movement_primitives_kwargs']['action_dim'] = 2 if "long" not in _v.lower() else 5
kwargs_dict_simple_reacher_promp['phase_generator_kwargs']['tau'] = 2
@@ -399,7 +400,7 @@ register(
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("ViaPointReacherDMP-v0")
-kwargs_dict_via_point_reacher_promp = dict(DEFAULT_MP_ENV_DICT)
+kwargs_dict_via_point_reacher_promp = deepcopy(DEFAULT_MP_ENV_DICT)
kwargs_dict_via_point_reacher_promp['wrappers'].append('TODO') # TODO
kwargs_dict_via_point_reacher_promp['movement_primitives_kwargs']['action_dim'] = 5
kwargs_dict_via_point_reacher_promp['phase_generator_kwargs']['tau'] = 2
@@ -440,7 +441,7 @@ for _v in _versions:
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
_env_id = f'{_name[0]}ProMP-{_name[1]}'
- kwargs_dict_hole_reacher_promp = dict(DEFAULT_MP_ENV_DICT)
+ kwargs_dict_hole_reacher_promp = deepcopy(DEFAULT_MP_ENV_DICT)
kwargs_dict_hole_reacher_promp['wrappers'].append('TODO') # TODO
kwargs_dict_hole_reacher_promp['ep_wrapper_kwargs']['weight_scale'] = 2
kwargs_dict_hole_reacher_promp['movement_primitives_kwargs']['action_dim'] = 5
@@ -485,7 +486,7 @@ for _v in _versions:
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
_env_id = f'{_name[0]}ProMP-{_name[1]}'
- kwargs_dict_alr_reacher_promp = dict(DEFAULT_MP_ENV_DICT)
+ kwargs_dict_alr_reacher_promp = deepcopy(DEFAULT_MP_ENV_DICT)
kwargs_dict_alr_reacher_promp['wrappers'].append('TODO') # TODO
kwargs_dict_alr_reacher_promp['movement_primitives_kwargs']['action_dim'] = 5 if "long" not in _v.lower() else 7
kwargs_dict_alr_reacher_promp['phase_generator_kwargs']['tau'] = 4
@@ -505,7 +506,7 @@ _versions = ['ALRBeerPong-v0']
for _v in _versions:
_name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}'
- kwargs_dict_bp_promp = dict(DEFAULT_MP_ENV_DICT)
+ kwargs_dict_bp_promp = deepcopy(DEFAULT_MP_ENV_DICT)
kwargs_dict_bp_promp['wrappers'].append(mujoco.beerpong.NewMPWrapper)
kwargs_dict_bp_promp['movement_primitives_kwargs']['action_dim'] = 7
kwargs_dict_bp_promp['phase_generator_kwargs']['tau'] = 0.8
@@ -526,7 +527,7 @@ _versions = ["ALRBeerPongStepBased-v0", "ALRBeerPongFixedRelease-v0"]
for _v in _versions:
_name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}'
- kwargs_dict_bp_promp = dict(DEFAULT_MP_ENV_DICT)
+ kwargs_dict_bp_promp = deepcopy(DEFAULT_MP_ENV_DICT)
kwargs_dict_bp_promp['wrappers'].append(mujoco.beerpong.NewMPWrapper)
kwargs_dict_bp_promp['movement_primitives_kwargs']['action_dim'] = 7
kwargs_dict_bp_promp['phase_generator_kwargs']['tau'] = 0.62
@@ -551,7 +552,7 @@ _versions = ['ALRAntJump-v0']
for _v in _versions:
_name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}'
- kwargs_dict_ant_jump_promp = dict(DEFAULT_MP_ENV_DICT)
+ kwargs_dict_ant_jump_promp = deepcopy(DEFAULT_MP_ENV_DICT)
kwargs_dict_ant_jump_promp['wrappers'].append(mujoco.ant_jump.NewMPWrapper)
kwargs_dict_ant_jump_promp['movement_primitives_kwargs']['action_dim'] = 8
kwargs_dict_ant_jump_promp['phase_generator_kwargs']['tau'] = 10
@@ -572,7 +573,7 @@ _versions = ['ALRHalfCheetahJump-v0']
for _v in _versions:
_name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}'
- kwargs_dict_halfcheetah_jump_promp = dict(DEFAULT_MP_ENV_DICT)
+ kwargs_dict_halfcheetah_jump_promp = deepcopy(DEFAULT_MP_ENV_DICT)
kwargs_dict_halfcheetah_jump_promp['wrappers'].append(mujoco.ant_jump.NewMPWrapper)
kwargs_dict_halfcheetah_jump_promp['movement_primitives_kwargs']['action_dim'] = 6
kwargs_dict_halfcheetah_jump_promp['phase_generator_kwargs']['tau'] = 5
@@ -596,7 +597,7 @@ _versions = ['ALRHopperJump-v0', 'ALRHopperJumpRndmJointsDesPos-v0', 'ALRHopperJ
for _v in _versions:
_name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}'
- kwargs_dict_hopper_jump_promp = dict(DEFAULT_MP_ENV_DICT)
+ kwargs_dict_hopper_jump_promp = deepcopy(DEFAULT_MP_ENV_DICT)
kwargs_dict_hopper_jump_promp['wrappers'].append('TODO') # TODO
kwargs_dict_hopper_jump_promp['movement_primitives_kwargs']['action_dim'] = 3
kwargs_dict_hopper_jump_promp['phase_generator_kwargs']['tau'] = 2
@@ -618,7 +619,7 @@ _versions = ['ALRWalker2DJump-v0']
for _v in _versions:
_name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}'
- kwargs_dict_walker2d_jump_promp = dict(DEFAULT_MP_ENV_DICT)
+ kwargs_dict_walker2d_jump_promp = deepcopy(DEFAULT_MP_ENV_DICT)
kwargs_dict_walker2d_jump_promp['wrappers'].append('TODO') # TODO
kwargs_dict_walker2d_jump_promp['movement_primitives_kwargs']['action_dim'] = 6
kwargs_dict_walker2d_jump_promp['phase_generator_kwargs']['tau'] = 2.4
From 9b48fc9d489d14bc14d6bdaa3cac54ce94ad2bb1 Mon Sep 17 00:00:00 2001
From: Fabian
Date: Wed, 29 Jun 2022 12:25:40 +0200
Subject: [PATCH 046/104] todos
---
alr_envs/alr/__init__.py | 10 ++++--
.../half_cheetah_jump/new_mp_wrapper.py | 2 --
alr_envs/mp/black_box_wrapper.py | 31 ++++++++++---------
alr_envs/mp/raw_interface_wrapper.py | 2 +-
4 files changed, 25 insertions(+), 20 deletions(-)
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 4c90512..86491b5 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -24,13 +24,17 @@ ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
DEFAULT_MP_ENV_DICT = {
"name": 'EnvName',
"wrappers": [],
+ # TODO move scale to traj_gen
"ep_wrapper_kwargs": {
"weight_scale": 1
},
+ # TODO traj_gen_kwargs
+ # TODO remove action_dim
"movement_primitives_kwargs": {
'movement_primitives_type': 'promp',
'action_dim': 7
},
+ # TODO remove tau
"phase_generator_kwargs": {
'phase_generator_type': 'linear',
'delay': 0,
@@ -40,13 +44,13 @@ DEFAULT_MP_ENV_DICT = {
},
"controller_kwargs": {
'controller_type': 'motor',
- "p_gains": np.ones(7),
- "d_gains": np.ones(7) * 0.1,
+ "p_gains": 1.0,
+ "d_gains": 0.1,
},
"basis_generator_kwargs": {
'basis_generator_type': 'zero_rbf',
'num_basis': 5,
- 'num_basis_zero_start': 2
+ 'num_basis_zero_start': 2 # TODO: Change to 1
}
}
diff --git a/alr_envs/alr/mujoco/half_cheetah_jump/new_mp_wrapper.py b/alr_envs/alr/mujoco/half_cheetah_jump/new_mp_wrapper.py
index f098c2d..d14c9a9 100644
--- a/alr_envs/alr/mujoco/half_cheetah_jump/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/half_cheetah_jump/new_mp_wrapper.py
@@ -2,8 +2,6 @@ from typing import Tuple, Union
import numpy as np
-from mp_env_api import MPEnvWrapper
-
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
diff --git a/alr_envs/mp/black_box_wrapper.py b/alr_envs/mp/black_box_wrapper.py
index 5ae0ff9..d87c332 100644
--- a/alr_envs/mp/black_box_wrapper.py
+++ b/alr_envs/mp/black_box_wrapper.py
@@ -32,18 +32,24 @@ class BlackBoxWrapper(gym.ObservationWrapper, ABC):
self.env = env
self.duration = duration
- self.traj_steps = int(duration / self.dt)
- self.post_traj_steps = self.env.spec.max_episode_steps - self.traj_steps
+ self.sequencing = sequencing
+ # self.traj_steps = int(duration / self.dt)
+ # self.post_traj_steps = self.env.spec.max_episode_steps - self.traj_steps
# duration = self.env.max_episode_steps * self.dt
# trajectory generation
self.trajectory_generator = trajectory_generator
self.tracking_controller = tracking_controller
# self.weight_scale = weight_scale
- self.time_steps = np.linspace(0, self.duration, self.traj_steps)
- self.trajectory_generator.set_mp_times(self.time_steps)
- # self.trajectory_generator.set_mp_duration(self.time_steps, dt)
- # action_bounds = np.inf * np.ones((np.prod(self.trajectory_generator.num_params)))
+ # self.time_steps = np.linspace(0, self.duration, self.traj_steps)
+ # self.trajectory_generator.set_mp_times(self.time_steps)
+ if not sequencing:
+ self.trajectory_generator.set_mp_duration(np.array([self.duration]), np.array([self.dt]))
+ else:
+ # sequencing stuff
+ pass
+
+ # reward computation
self.reward_aggregation = reward_aggregation
# spaces
@@ -67,15 +73,12 @@ class BlackBoxWrapper(gym.ObservationWrapper, ABC):
return observation[self.env.context_mask]
def get_trajectory(self, action: np.ndarray) -> Tuple:
- # TODO: this follows the implementation of the mp_pytorch library which includes the parameters tau and delay at
- # the beginning of the array.
- # ignore_indices = int(self.trajectory_generator.learn_tau) + int(self.trajectory_generator.learn_delay)
- # scaled_mp_params = action.copy()
- # scaled_mp_params[ignore_indices:] *= self.weight_scale
-
clipped_params = np.clip(action, self.mp_action_space.low, self.mp_action_space.high)
self.trajectory_generator.set_params(clipped_params)
- self.trajectory_generator.set_boundary_conditions(bc_time=self.time_steps[:1], bc_pos=self.current_pos,
+ # if self.trajectory_generator.learn_tau:
+ # self.trajectory_generator.set_mp_duration(self.trajectory_generator.tau, np.array([self.dt]))
+ self.trajectory_generator.set_mp_duration(None if self.sequencing else self.duration, np.array([self.dt]))
+ self.trajectory_generator.set_boundary_conditions(bc_time=, bc_pos=self.current_pos,
bc_vel=self.current_vel)
traj_dict = self.trajectory_generator.get_mp_trajs(get_pos=True, get_vel=True)
trajectory_tensor, velocity_tensor = traj_dict['pos'], traj_dict['vel']
@@ -152,7 +155,7 @@ class BlackBoxWrapper(gym.ObservationWrapper, ABC):
if self.render_mode is not None:
self.render(mode=self.render_mode, **self.render_kwargs)
- if done or self.env.do_replanning(self.env.current_pos, self.env.current_vel, obs, c_action, t):
+ if done or self.env.do_replanning(self.current_pos, self.current_vel, obs, c_action, t + past_steps):
break
infos.update({k: v[:t + 1] for k, v in infos.items()})
diff --git a/alr_envs/mp/raw_interface_wrapper.py b/alr_envs/mp/raw_interface_wrapper.py
index 45d5daf..d57ff9a 100644
--- a/alr_envs/mp/raw_interface_wrapper.py
+++ b/alr_envs/mp/raw_interface_wrapper.py
@@ -43,13 +43,13 @@ class RawInterfaceWrapper(gym.Wrapper):
raise NotImplementedError()
@property
- @abstractmethod
def dt(self) -> float:
"""
Control frequency of the environment
Returns: float
"""
+ return self.env.dt
def do_replanning(self, pos, vel, s, a, t):
# return t % 100 == 0
From b200cf4b69883494bfbfb77080e90926ca24ec1e Mon Sep 17 00:00:00 2001
From: Fabian
Date: Wed, 29 Jun 2022 16:30:36 +0200
Subject: [PATCH 047/104] sequencing and replanning
---
alr_envs/mp/black_box_wrapper.py | 84 +++++++++++---------------------
alr_envs/utils/utils.py | 5 ++
2 files changed, 34 insertions(+), 55 deletions(-)
diff --git a/alr_envs/mp/black_box_wrapper.py b/alr_envs/mp/black_box_wrapper.py
index d87c332..9e6a9e5 100644
--- a/alr_envs/mp/black_box_wrapper.py
+++ b/alr_envs/mp/black_box_wrapper.py
@@ -8,6 +8,7 @@ from mp_pytorch.mp.mp_interfaces import MPInterface
from alr_envs.mp.controllers.base_controller import BaseController
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.utils.utils import get_numpy
class BlackBoxWrapper(gym.ObservationWrapper, ABC):
@@ -15,7 +16,7 @@ class BlackBoxWrapper(gym.ObservationWrapper, ABC):
def __init__(self,
env: RawInterfaceWrapper,
trajectory_generator: MPInterface, tracking_controller: BaseController,
- duration: float, verbose: int = 1, sequencing=True, reward_aggregation: callable = np.sum):
+ duration: float, verbose: int = 1, sequencing: bool = True, reward_aggregation: callable = np.sum):
"""
gym.Wrapper for leveraging a black box approach with a trajectory generator.
@@ -33,67 +34,50 @@ class BlackBoxWrapper(gym.ObservationWrapper, ABC):
self.env = env
self.duration = duration
self.sequencing = sequencing
- # self.traj_steps = int(duration / self.dt)
- # self.post_traj_steps = self.env.spec.max_episode_steps - self.traj_steps
- # duration = self.env.max_episode_steps * self.dt
+ self.current_traj_steps = 0
# trajectory generation
self.trajectory_generator = trajectory_generator
self.tracking_controller = tracking_controller
- # self.weight_scale = weight_scale
# self.time_steps = np.linspace(0, self.duration, self.traj_steps)
# self.trajectory_generator.set_mp_times(self.time_steps)
- if not sequencing:
- self.trajectory_generator.set_mp_duration(np.array([self.duration]), np.array([self.dt]))
- else:
- # sequencing stuff
- pass
+ self.trajectory_generator.set_duration(np.array([self.duration]), np.array([self.dt]))
# reward computation
self.reward_aggregation = reward_aggregation
# spaces
- self.mp_action_space = self.get_mp_action_space()
+ self.return_context_observation = not (self.sequencing) # TODO or we_do_replanning?)
+ self.traj_gen_action_space = self.get_traj_gen_action_space()
self.action_space = self.get_action_space()
self.observation_space = spaces.Box(low=self.env.observation_space.low[self.env.context_mask],
high=self.env.observation_space.high[self.env.context_mask],
dtype=self.env.observation_space.dtype)
# rendering
- self.render_mode = None
self.render_kwargs = {}
-
self.verbose = verbose
- @property
- def dt(self):
- return self.env.dt
-
def observation(self, observation):
- return observation[self.env.context_mask]
+ # return context space if we are
+ return observation[self.context_mask] if self.return_context_observation else observation
def get_trajectory(self, action: np.ndarray) -> Tuple:
- clipped_params = np.clip(action, self.mp_action_space.low, self.mp_action_space.high)
+ clipped_params = np.clip(action, self.traj_gen_action_space.low, self.traj_gen_action_space.high)
self.trajectory_generator.set_params(clipped_params)
# if self.trajectory_generator.learn_tau:
# self.trajectory_generator.set_mp_duration(self.trajectory_generator.tau, np.array([self.dt]))
- self.trajectory_generator.set_mp_duration(None if self.sequencing else self.duration, np.array([self.dt]))
- self.trajectory_generator.set_boundary_conditions(bc_time=, bc_pos=self.current_pos,
+ # TODO: Bruce said DMP, ProMP, ProDMP can have 0 bc_time
+ self.trajectory_generator.set_boundary_conditions(bc_time=np.zeros((1,)), bc_pos=self.current_pos,
bc_vel=self.current_vel)
- traj_dict = self.trajectory_generator.get_mp_trajs(get_pos=True, get_vel=True)
+ # TODO: is this correct for replanning? Do we need to adjust anything here?
+ self.trajectory_generator.set_duration(None if self.sequencing else self.duration, np.array([self.dt]))
+ traj_dict = self.trajectory_generator.get_trajs(get_pos=True, get_vel=True)
trajectory_tensor, velocity_tensor = traj_dict['pos'], traj_dict['vel']
- trajectory = trajectory_tensor.numpy()
- velocity = velocity_tensor.numpy()
+ return get_numpy(trajectory_tensor), get_numpy(velocity_tensor)
- # TODO: Do we need this or does mp_pytorch have this?
- if self.post_traj_steps > 0:
- trajectory = np.vstack([trajectory, np.tile(trajectory[-1, :], [self.post_traj_steps, 1])])
- velocity = np.vstack([velocity, np.zeros(shape=(self.post_traj_steps, self.trajectory_generator.num_dof))])
-
- return trajectory, velocity
-
- def get_mp_action_space(self):
+ def get_traj_gen_action_space(self):
"""This function can be used to set up an individual space for the parameters of the trajectory_generator."""
min_action_bounds, max_action_bounds = self.trajectory_generator.get_param_bounds()
mp_action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
@@ -108,22 +92,17 @@ class BlackBoxWrapper(gym.ObservationWrapper, ABC):
Only needs to be overwritten if the action space needs to be modified.
"""
try:
- return self.mp_action_space
+ return self.traj_gen_action_space
except AttributeError:
- return self.get_mp_action_space()
+ return self.get_traj_gen_action_space()
def step(self, action: np.ndarray):
""" This function generates a trajectory based on a MP and then does the usual loop over reset and step"""
- # TODO: Think about sequencing
- # TODO: Reward Function rather here?
+
# agent to learn when to release the ball
mp_params, env_spec_params = self._episode_callback(action)
trajectory, velocity = self.get_trajectory(mp_params)
- # TODO
- # self.time_steps = np.linspace(0, learned_duration, self.traj_steps)
- # self.trajectory_generator.set_mp_times(self.time_steps)
-
trajectory_length = len(trajectory)
rewards = np.zeros(shape=(trajectory_length,))
if self.verbose >= 2:
@@ -152,13 +131,15 @@ class BlackBoxWrapper(gym.ObservationWrapper, ABC):
elems[t] = v
infos[k] = elems
- if self.render_mode is not None:
- self.render(mode=self.render_mode, **self.render_kwargs)
+ if self.render_kwargs:
+ self.render(**self.render_kwargs)
- if done or self.env.do_replanning(self.current_pos, self.current_vel, obs, c_action, t + past_steps):
+ if done or self.env.do_replanning(self.current_pos, self.current_vel, obs, c_action,
+ t + 1 + self.current_traj_steps):
break
infos.update({k: v[:t + 1] for k, v in infos.items()})
+ self.current_traj_steps += t + 1
if self.verbose >= 2:
infos['trajectory'] = trajectory
@@ -168,24 +149,17 @@ class BlackBoxWrapper(gym.ObservationWrapper, ABC):
infos['trajectory_length'] = t + 1
trajectory_return = self.reward_aggregation(rewards[:t + 1])
- return self.get_observation_from_step(obs), trajectory_return, done, infos
+ return obs, trajectory_return, done, infos
- def reset(self):
- return self.get_observation_from_step(self.env.reset())
-
- def render(self, mode='human', **kwargs):
+ def render(self, **kwargs):
"""Only set render options here, such that they can be used during the rollout.
This only needs to be called once"""
- self.render_mode = mode
self.render_kwargs = kwargs
# self.env.render(mode=self.render_mode, **self.render_kwargs)
- self.env.render(mode=self.render_mode)
+ self.env.render(**kwargs)
- def get_observation_from_step(self, observation: np.ndarray) -> np.ndarray:
- return observation[self.active_obs]
-
- def seed(self, seed=None):
- self.env.seed(seed)
+ def reset(self, **kwargs):
+ self.current_traj_steps = 0
def plot_trajs(self, des_trajs, des_vels):
import matplotlib.pyplot as plt
diff --git a/alr_envs/utils/utils.py b/alr_envs/utils/utils.py
index 3354db3..b90cf60 100644
--- a/alr_envs/utils/utils.py
+++ b/alr_envs/utils/utils.py
@@ -1,4 +1,5 @@
import numpy as np
+import torch as ch
def angle_normalize(x, type="deg"):
@@ -19,3 +20,7 @@ def angle_normalize(x, type="deg"):
two_pi = 2 * np.pi
return x - two_pi * np.floor((x + np.pi) / two_pi)
+
+
+def get_numpy(x: ch.Tensor):
+ return x.detach().cpu().numpy()
From 3273f455c538ea1c5a3cea66679ff55c9f475f90 Mon Sep 17 00:00:00 2001
From: Fabian
Date: Thu, 30 Jun 2022 14:08:54 +0200
Subject: [PATCH 048/104] wrappers updated
---
alr_envs/__init__.py | 2 +-
alr_envs/alr/__init__.py | 8 +-
.../hole_reacher/mp_wrapper.py | 24 +--
.../hole_reacher/new_mp_wrapper.py | 31 ----
.../simple_reacher/mp_wrapper.py | 12 +-
.../simple_reacher/new_mp_wrapper.py | 31 ----
.../viapoint_reacher/mp_wrapper.py | 6 +-
.../viapoint_reacher/new_mp_wrapper.py | 2 -
alr_envs/alr/mujoco/__init__.py | 11 +-
alr_envs/alr/mujoco/ant_jump/mp_wrapper.py | 6 +-
.../ball_in_a_cup/ball_in_a_cup_mp_wrapper.py | 6 +-
alr_envs/alr/mujoco/beerpong/beerpong.py | 6 +-
.../alr/mujoco/beerpong/beerpong_reward.py | 171 ------------------
.../mujoco/beerpong/beerpong_reward_simple.py | 141 ---------------
.../alr/mujoco/beerpong/beerpong_simple.py | 166 -----------------
alr_envs/alr/mujoco/beerpong/mp_wrapper.py | 6 +-
.../mujoco/half_cheetah_jump/mp_wrapper.py | 6 +-
.../alr/mujoco/hopper_jump/hopper_jump.py | 3 +-
alr_envs/alr/mujoco/hopper_jump/mp_wrapper.py | 6 +-
.../alr/mujoco/hopper_throw/mp_wrapper.py | 6 +-
.../alr/mujoco/hopper_throw/new_mp_wrapper.py | 2 -
alr_envs/alr/mujoco/reacher/__init__.py | 3 +-
alr_envs/alr/mujoco/reacher/mp_wrapper.py | 7 +-
alr_envs/alr/mujoco/reacher/new_mp_wrapper.py | 14 +-
.../alr/mujoco/table_tennis/mp_wrapper.py | 6 +-
.../alr/mujoco/walker_2d_jump/mp_wrapper.py | 6 +-
.../mujoco/walker_2d_jump/new_mp_wrapper.py | 2 -
.../dmc/manipulation/reach_site/mp_wrapper.py | 6 +-
alr_envs/dmc/suite/ball_in_cup/mp_wrapper.py | 6 +-
alr_envs/dmc/suite/cartpole/mp_wrapper.py | 7 +-
alr_envs/dmc/suite/reacher/mp_wrapper.py | 6 +-
alr_envs/examples/examples_dmc.py | 2 +-
alr_envs/examples/examples_metaworld.py | 2 +-
...ves.py => examples_movement_primitives.py} | 29 ++-
alr_envs/examples/examples_open_ai.py | 2 +-
alr_envs/examples/pd_control_gain_tuning.py | 2 +-
alr_envs/meta/goal_change_mp_wrapper.py | 6 +-
.../goal_endeffector_change_mp_wrapper.py | 6 +-
.../meta/goal_object_change_mp_wrapper.py | 6 +-
alr_envs/meta/object_change_mp_wrapper.py | 6 +-
alr_envs/mp/black_box_wrapper.py | 50 +++--
alr_envs/mp/raw_interface_wrapper.py | 4 +-
.../continuous_mountain_car/mp_wrapper.py | 5 +-
.../open_ai/mujoco/reacher_v2/mp_wrapper.py | 9 +-
alr_envs/open_ai/robotics/fetch/mp_wrapper.py | 4 +-
alr_envs/utils/make_env_helpers.py | 71 +++++---
alr_envs/utils/utils.py | 22 +++
47 files changed, 219 insertions(+), 722 deletions(-)
delete mode 100644 alr_envs/alr/classic_control/hole_reacher/new_mp_wrapper.py
delete mode 100644 alr_envs/alr/classic_control/simple_reacher/new_mp_wrapper.py
delete mode 100644 alr_envs/alr/mujoco/beerpong/beerpong_reward.py
delete mode 100644 alr_envs/alr/mujoco/beerpong/beerpong_reward_simple.py
delete mode 100644 alr_envs/alr/mujoco/beerpong/beerpong_simple.py
rename alr_envs/examples/{examples_motion_primitives.py => examples_movement_primitives.py} (82%)
diff --git a/alr_envs/__init__.py b/alr_envs/__init__.py
index 858a66c..cf910b9 100644
--- a/alr_envs/__init__.py
+++ b/alr_envs/__init__.py
@@ -1,6 +1,6 @@
from alr_envs import dmc, meta, open_ai
-from alr_envs.utils.make_env_helpers import make, make_dmp_env, make_promp_env, make_rank
from alr_envs.utils import make_dmc
+from alr_envs.utils.make_env_helpers import make, make_bb, make_rank
# Convenience function for all MP environments
from .alr import ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 7a7db6d..607ef18 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -406,8 +406,6 @@ ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("ViaPointReacherDMP-v0")
kwargs_dict_via_point_reacher_promp = deepcopy(DEFAULT_MP_ENV_DICT)
kwargs_dict_via_point_reacher_promp['wrappers'].append('TODO') # TODO
-kwargs_dict_via_point_reacher_promp['movement_primitives_kwargs']['action_dim'] = 5
-kwargs_dict_via_point_reacher_promp['phase_generator_kwargs']['tau'] = 2
kwargs_dict_via_point_reacher_promp['controller_kwargs']['controller_type'] = 'velocity'
kwargs_dict_via_point_reacher_promp['name'] = "ViaPointReacherProMP-v0"
register(
@@ -448,10 +446,10 @@ for _v in _versions:
kwargs_dict_hole_reacher_promp = deepcopy(DEFAULT_MP_ENV_DICT)
kwargs_dict_hole_reacher_promp['wrappers'].append('TODO') # TODO
kwargs_dict_hole_reacher_promp['ep_wrapper_kwargs']['weight_scale'] = 2
- kwargs_dict_hole_reacher_promp['movement_primitives_kwargs']['action_dim'] = 5
- kwargs_dict_hole_reacher_promp['phase_generator_kwargs']['tau'] = 2
+ # kwargs_dict_hole_reacher_promp['movement_primitives_kwargs']['action_dim'] = 5
+ # kwargs_dict_hole_reacher_promp['phase_generator_kwargs']['tau'] = 2
kwargs_dict_hole_reacher_promp['controller_kwargs']['controller_type'] = 'velocity'
- kwargs_dict_hole_reacher_promp['basis_generator_kwargs']['num_basis'] = 5
+ # kwargs_dict_hole_reacher_promp['basis_generator_kwargs']['num_basis'] = 5
kwargs_dict_hole_reacher_promp['name'] = f"alr_envs:{_v}"
register(
id=_env_id,
diff --git a/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py b/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py
index feb545f..e249a71 100644
--- a/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py
+++ b/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py
@@ -2,12 +2,12 @@ from typing import Tuple, Union
import numpy as np
-from mp_env_api import MPEnvWrapper
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
-class MPWrapper(MPEnvWrapper):
- @property
- def active_obs(self):
+class MPWrapper(RawInterfaceWrapper):
+
+ def get_context_mask(self):
return np.hstack([
[self.env.random_start] * self.env.n_links, # cos
[self.env.random_start] * self.env.n_links, # sin
@@ -18,14 +18,6 @@ class MPWrapper(MPEnvWrapper):
[False] # env steps
])
- # @property
- # def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
- # return self._joint_angles.copy()
- #
- # @property
- # def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
- # return self._angle_velocity.copy()
-
@property
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.current_pos
@@ -33,11 +25,3 @@ class MPWrapper(MPEnvWrapper):
@property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.current_vel
-
- @property
- def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
- raise ValueError("Goal position is not available and has to be learnt based on the environment.")
-
- @property
- def dt(self) -> Union[float, int]:
- return self.env.dt
diff --git a/alr_envs/alr/classic_control/hole_reacher/new_mp_wrapper.py b/alr_envs/alr/classic_control/hole_reacher/new_mp_wrapper.py
deleted file mode 100644
index 1f1d198..0000000
--- a/alr_envs/alr/classic_control/hole_reacher/new_mp_wrapper.py
+++ /dev/null
@@ -1,31 +0,0 @@
-from typing import Tuple, Union
-
-import numpy as np
-
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
-
-
-class NewMPWrapper(RawInterfaceWrapper):
-
- def get_context_mask(self):
- return np.hstack([
- [self.env.random_start] * self.env.n_links, # cos
- [self.env.random_start] * self.env.n_links, # sin
- [self.env.random_start] * self.env.n_links, # velocity
- [self.env.initial_width is None], # hole width
- # [self.env.hole_depth is None], # hole depth
- [True] * 2, # x-y coordinates of target distance
- [False] # env steps
- ])
-
- @property
- def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.env.current_pos
-
- @property
- def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.env.current_vel
-
- @property
- def dt(self) -> Union[float, int]:
- return self.env.dt
diff --git a/alr_envs/alr/classic_control/simple_reacher/mp_wrapper.py b/alr_envs/alr/classic_control/simple_reacher/mp_wrapper.py
index 4b71e3a..30b0985 100644
--- a/alr_envs/alr/classic_control/simple_reacher/mp_wrapper.py
+++ b/alr_envs/alr/classic_control/simple_reacher/mp_wrapper.py
@@ -2,12 +2,12 @@ from typing import Tuple, Union
import numpy as np
-from mp_env_api import MPEnvWrapper
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
-class MPWrapper(MPEnvWrapper):
- @property
- def active_obs(self):
+class MPWrapper(RawInterfaceWrapper):
+
+ def context_mask(self):
return np.hstack([
[self.env.random_start] * self.env.n_links, # cos
[self.env.random_start] * self.env.n_links, # sin
@@ -24,10 +24,6 @@ class MPWrapper(MPEnvWrapper):
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.current_vel
- @property
- def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
- raise ValueError("Goal position is not available and has to be learnt based on the environment.")
-
@property
def dt(self) -> Union[float, int]:
return self.env.dt
diff --git a/alr_envs/alr/classic_control/simple_reacher/new_mp_wrapper.py b/alr_envs/alr/classic_control/simple_reacher/new_mp_wrapper.py
deleted file mode 100644
index c1497e6..0000000
--- a/alr_envs/alr/classic_control/simple_reacher/new_mp_wrapper.py
+++ /dev/null
@@ -1,31 +0,0 @@
-from typing import Tuple, Union
-
-import numpy as np
-
-from mp_env_api import MPEnvWrapper
-
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
-
-
-class MPWrapper(RawInterfaceWrapper):
-
- def context_mask(self):
- return np.hstack([
- [self.env.random_start] * self.env.n_links, # cos
- [self.env.random_start] * self.env.n_links, # sin
- [self.env.random_start] * self.env.n_links, # velocity
- [True] * 2, # x-y coordinates of target distance
- [False] # env steps
- ])
-
- @property
- def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.env.current_pos
-
- @property
- def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.env.current_vel
-
- @property
- def dt(self) -> Union[float, int]:
- return self.env.dt
diff --git a/alr_envs/alr/classic_control/viapoint_reacher/mp_wrapper.py b/alr_envs/alr/classic_control/viapoint_reacher/mp_wrapper.py
index 6b3e85d..68d203f 100644
--- a/alr_envs/alr/classic_control/viapoint_reacher/mp_wrapper.py
+++ b/alr_envs/alr/classic_control/viapoint_reacher/mp_wrapper.py
@@ -2,12 +2,12 @@ from typing import Tuple, Union
import numpy as np
-from mp_env_api import MPEnvWrapper
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
-class MPWrapper(MPEnvWrapper):
+class MPWrapper(RawInterfaceWrapper):
@property
- def active_obs(self):
+ def context_mask(self) -> np.ndarray:
return np.hstack([
[self.env.random_start] * self.env.n_links, # cos
[self.env.random_start] * self.env.n_links, # sin
diff --git a/alr_envs/alr/classic_control/viapoint_reacher/new_mp_wrapper.py b/alr_envs/alr/classic_control/viapoint_reacher/new_mp_wrapper.py
index f02dfe1..9f40292 100644
--- a/alr_envs/alr/classic_control/viapoint_reacher/new_mp_wrapper.py
+++ b/alr_envs/alr/classic_control/viapoint_reacher/new_mp_wrapper.py
@@ -2,8 +2,6 @@ from typing import Tuple, Union
import numpy as np
-from mp_env_api import MPEnvWrapper
-
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
diff --git a/alr_envs/alr/mujoco/__init__.py b/alr_envs/alr/mujoco/__init__.py
index 2885321..f2f4536 100644
--- a/alr_envs/alr/mujoco/__init__.py
+++ b/alr_envs/alr/mujoco/__init__.py
@@ -1,13 +1,12 @@
-from .reacher.balancing import BalancingEnv
+from .ant_jump.ant_jump import ALRAntJumpEnv
from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
-from .table_tennis.tt_gym import TTEnvGym
-from .beerpong.beerpong import ALRBeerBongEnv, ALRBeerBongEnvStepBased, ALRBeerBongEnvStepBasedEpisodicReward, ALRBeerBongEnvFixedReleaseStep
-from .ant_jump.ant_jump import ALRAntJumpEnv
+from .beerpong.beerpong import ALRBeerBongEnv
from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
-from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv, ALRHopperXYJumpEnv, ALRHopperXYJumpEnvStepBased
from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv
from .hopper_throw.hopper_throw import ALRHopperThrowEnv
from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv
+from .reacher.alr_reacher import ALRReacherEnv
+from .reacher.balancing import BalancingEnv
+from .table_tennis.tt_gym import TTEnvGym
from .walker_2d_jump.walker_2d_jump import ALRWalker2dJumpEnv
-from .reacher.alr_reacher import ALRReacherEnv
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/ant_jump/mp_wrapper.py b/alr_envs/alr/mujoco/ant_jump/mp_wrapper.py
index 4967b64..4d5c0d6 100644
--- a/alr_envs/alr/mujoco/ant_jump/mp_wrapper.py
+++ b/alr_envs/alr/mujoco/ant_jump/mp_wrapper.py
@@ -2,13 +2,13 @@ from typing import Tuple, Union
import numpy as np
-from mp_env_api import MPEnvWrapper
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
-class MPWrapper(MPEnvWrapper):
+class MPWrapper(RawInterfaceWrapper):
@property
- def active_obs(self):
+ def context_mask(self) -> np.ndarray:
return np.hstack([
[False] * 111, # ant has 111 dimensional observation space !!
[True] # goal height
diff --git a/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_mp_wrapper.py b/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_mp_wrapper.py
index 945fa8d..609858b 100644
--- a/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_mp_wrapper.py
@@ -2,13 +2,13 @@ from typing import Tuple, Union
import numpy as np
-from mp_env_api import MPEnvWrapper
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
-class BallInACupMPWrapper(MPEnvWrapper):
+class BallInACupMPWrapper(RawInterfaceWrapper):
@property
- def active_obs(self):
+ def context_mask(self) -> np.ndarray:
# TODO: @Max Filter observations correctly
return np.hstack([
[False] * 7, # cos
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py
index 64d9e78..dfd6ea4 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@@ -22,7 +22,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
def __init__(
self, frame_skip=1, apply_gravity_comp=True, noisy=False,
rndm_goal=False, cup_goal_pos=None
- ):
+ ):
cup_goal_pos = np.array(cup_goal_pos if cup_goal_pos is not None else [-0.3, -1.2, 0.840])
if cup_goal_pos.shape[0] == 2:
@@ -154,7 +154,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
success=success,
is_collided=is_collided, sim_crash=crash,
table_contact_first=int(not self.reward_function.ball_ground_contact_first)
- )
+ )
infos.update(reward_infos)
return ob, reward, done, infos
@@ -176,7 +176,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
cup_goal_diff_top,
self.sim.model.body_pos[self.cup_table_id][:2].copy(),
[self._steps],
- ])
+ ])
@property
def dt(self):
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward.py
deleted file mode 100644
index dc39ca8..0000000
--- a/alr_envs/alr/mujoco/beerpong/beerpong_reward.py
+++ /dev/null
@@ -1,171 +0,0 @@
-import numpy as np
-
-
-class BeerPongReward:
- def __init__(self):
-
- self.robot_collision_objects = ["wrist_palm_link_convex_geom",
- "wrist_pitch_link_convex_decomposition_p1_geom",
- "wrist_pitch_link_convex_decomposition_p2_geom",
- "wrist_pitch_link_convex_decomposition_p3_geom",
- "wrist_yaw_link_convex_decomposition_p1_geom",
- "wrist_yaw_link_convex_decomposition_p2_geom",
- "forearm_link_convex_decomposition_p1_geom",
- "forearm_link_convex_decomposition_p2_geom",
- "upper_arm_link_convex_decomposition_p1_geom",
- "upper_arm_link_convex_decomposition_p2_geom",
- "shoulder_link_convex_decomposition_p1_geom",
- "shoulder_link_convex_decomposition_p2_geom",
- "shoulder_link_convex_decomposition_p3_geom",
- "base_link_convex_geom", "table_contact_geom"]
-
- self.cup_collision_objects = ["cup_geom_table3", "cup_geom_table4", "cup_geom_table5", "cup_geom_table6",
- "cup_geom_table7", "cup_geom_table8", "cup_geom_table9", "cup_geom_table10",
- # "cup_base_table", "cup_base_table_contact",
- "cup_geom_table15",
- "cup_geom_table16",
- "cup_geom_table17", "cup_geom1_table8",
- # "cup_base_table_contact",
- # "cup_base_table"
- ]
-
-
- self.ball_traj = None
- self.dists = None
- self.dists_final = None
- self.costs = None
- self.action_costs = None
- self.angle_rewards = None
- self.cup_angles = None
- self.cup_z_axes = None
- self.collision_penalty = 500
- self.reset(None)
-
- def reset(self, context):
- self.ball_traj = []
- self.dists = []
- self.dists_final = []
- self.costs = []
- self.action_costs = []
- self.angle_rewards = []
- self.cup_angles = []
- self.cup_z_axes = []
- self.ball_ground_contact = False
- self.ball_table_contact = False
- self.ball_wall_contact = False
- self.ball_cup_contact = False
-
- def compute_reward(self, env, action):
- self.ball_id = env.sim.model._body_name2id["ball"]
- self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"]
- self.goal_id = env.sim.model._site_name2id["cup_goal_table"]
- self.goal_final_id = env.sim.model._site_name2id["cup_goal_final_table"]
- self.cup_collision_ids = [env.sim.model._geom_name2id[name] for name in self.cup_collision_objects]
- self.cup_table_id = env.sim.model._body_name2id["cup_table"]
- self.table_collision_id = env.sim.model._geom_name2id["table_contact_geom"]
- self.wall_collision_id = env.sim.model._geom_name2id["wall"]
- self.cup_table_collision_id = env.sim.model._geom_name2id["cup_base_table_contact"]
- self.init_ball_pos_site_id = env.sim.model._site_name2id["init_ball_pos_site"]
- self.ground_collision_id = env.sim.model._geom_name2id["ground"]
- self.robot_collision_ids = [env.sim.model._geom_name2id[name] for name in self.robot_collision_objects]
-
- goal_pos = env.sim.data.site_xpos[self.goal_id]
- ball_pos = env.sim.data.body_xpos[self.ball_id]
- ball_vel = env.sim.data.body_xvelp[self.ball_id]
- goal_final_pos = env.sim.data.site_xpos[self.goal_final_id]
- self.dists.append(np.linalg.norm(goal_pos - ball_pos))
- self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
-
- action_cost = np.sum(np.square(action))
- self.action_costs.append(action_cost)
-
- ball_table_bounce = self._check_collision_single_objects(env.sim, self.ball_collision_id,
- self.table_collision_id)
-
- if ball_table_bounce: # or ball_cup_table_cont or ball_wall_con
- self.ball_table_contact = True
-
- ball_cup_cont = self._check_collision_with_set_of_objects(env.sim, self.ball_collision_id,
- self.cup_collision_ids)
- if ball_cup_cont:
- self.ball_cup_contact = True
-
- ball_wall_cont = self._check_collision_single_objects(env.sim, self.ball_collision_id, self.wall_collision_id)
- if ball_wall_cont and not self.ball_table_contact:
- self.ball_wall_contact = True
-
- ball_ground_contact = self._check_collision_single_objects(env.sim, self.ball_collision_id,
- self.ground_collision_id)
- if ball_ground_contact and not self.ball_table_contact:
- self.ball_ground_contact = True
-
- self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
- if env._steps == env.ep_length - 1 or self._is_collided:
-
- min_dist = np.min(self.dists)
-
- ball_in_cup = self._check_collision_single_objects(env.sim, self.ball_collision_id, self.cup_table_collision_id)
-
- cost_offset = 0
-
- if self.ball_ground_contact: # or self.ball_wall_contact:
- cost_offset += 2
-
- if not self.ball_table_contact:
- cost_offset += 2
-
- if not ball_in_cup:
- cost_offset += 2
- cost = cost_offset + min_dist ** 2 + 0.5 * self.dists_final[-1] ** 2 + 1e-4 * action_cost # + min_dist ** 2
- else:
- if self.ball_cup_contact:
- cost_offset += 1
- cost = cost_offset + self.dists_final[-1] ** 2 + 1e-4 * action_cost
-
- reward = - 1*cost - self.collision_penalty * int(self._is_collided)
- success = ball_in_cup and not self.ball_ground_contact and not self.ball_wall_contact and not self.ball_cup_contact
- else:
- reward = - 1e-4 * action_cost
- success = False
-
- infos = {}
- infos["success"] = success
- infos["is_collided"] = self._is_collided
- infos["ball_pos"] = ball_pos.copy()
- infos["ball_vel"] = ball_vel.copy()
- infos["action_cost"] = 5e-4 * action_cost
-
- return reward, infos
-
- def _check_collision_single_objects(self, sim, id_1, id_2):
- for coni in range(0, sim.data.ncon):
- con = sim.data.contact[coni]
-
- collision = con.geom1 == id_1 and con.geom2 == id_2
- collision_trans = con.geom1 == id_2 and con.geom2 == id_1
-
- if collision or collision_trans:
- return True
- return False
-
- def _check_collision_with_itself(self, sim, collision_ids):
- col_1, col_2 = False, False
- for j, id in enumerate(collision_ids):
- col_1 = self._check_collision_with_set_of_objects(sim, id, collision_ids[:j])
- if j != len(collision_ids) - 1:
- col_2 = self._check_collision_with_set_of_objects(sim, id, collision_ids[j + 1:])
- else:
- col_2 = False
- collision = True if col_1 or col_2 else False
- return collision
-
- def _check_collision_with_set_of_objects(self, sim, id_1, id_list):
- for coni in range(0, sim.data.ncon):
- con = sim.data.contact[coni]
-
- collision = con.geom1 in id_list and con.geom2 == id_1
- collision_trans = con.geom1 == id_1 and con.geom2 in id_list
-
- if collision or collision_trans:
- return True
- return False
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward_simple.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward_simple.py
deleted file mode 100644
index fbe2163..0000000
--- a/alr_envs/alr/mujoco/beerpong/beerpong_reward_simple.py
+++ /dev/null
@@ -1,141 +0,0 @@
-import numpy as np
-from alr_envs.alr.mujoco import alr_reward_fct
-
-
-class BeerpongReward(alr_reward_fct.AlrReward):
- def __init__(self, sim, sim_time):
-
- self.sim = sim
- self.sim_time = sim_time
-
- self.collision_objects = ["cup_geom1", "cup_geom2", "wrist_palm_link_convex_geom",
- "wrist_pitch_link_convex_decomposition_p1_geom",
- "wrist_pitch_link_convex_decomposition_p2_geom",
- "wrist_pitch_link_convex_decomposition_p3_geom",
- "wrist_yaw_link_convex_decomposition_p1_geom",
- "wrist_yaw_link_convex_decomposition_p2_geom",
- "forearm_link_convex_decomposition_p1_geom",
- "forearm_link_convex_decomposition_p2_geom"]
-
- self.ball_id = None
- self.ball_collision_id = None
- self.goal_id = None
- self.goal_final_id = None
- self.collision_ids = None
-
- self.ball_traj = None
- self.dists = None
- self.dists_ctxt = None
- self.dists_final = None
- self.costs = None
-
- self.reset(None)
-
- def reset(self, context):
- self.ball_traj = np.zeros(shape=(self.sim_time, 3))
- self.dists = []
- self.dists_ctxt = []
- self.dists_final = []
- self.costs = []
- self.action_costs = []
- self.context = context
- self.ball_in_cup = False
- self.dist_ctxt = 5
- self.bounce_dist = 2
- self.min_dist = 2
- self.dist_final = 2
- self.table_contact = False
-
- self.ball_id = self.sim.model._body_name2id["ball"]
- self.ball_collision_id = self.sim.model._geom_name2id["ball_geom"]
- self.cup_robot_id = self.sim.model._site_name2id["cup_robot_final"]
- self.goal_id = self.sim.model._site_name2id["cup_goal_table"]
- self.goal_final_id = self.sim.model._site_name2id["cup_goal_final_table"]
- self.collision_ids = [self.sim.model._geom_name2id[name] for name in self.collision_objects]
- self.cup_table_id = self.sim.model._body_name2id["cup_table"]
- self.bounce_table_id = self.sim.model._site_name2id["bounce_table"]
-
- def compute_reward(self, action, sim, step):
- action_cost = np.sum(np.square(action))
- self.action_costs.append(action_cost)
-
- stop_sim = False
- success = False
-
- if self.check_collision(sim):
- reward = - 1e-2 * action_cost - 10
- stop_sim = True
- return reward, success, stop_sim
-
- # Compute the current distance from the ball to the inner part of the cup
- goal_pos = sim.data.site_xpos[self.goal_id]
- ball_pos = sim.data.body_xpos[self.ball_id]
- bounce_pos = sim.data.site_xpos[self.bounce_table_id]
- goal_final_pos = sim.data.site_xpos[self.goal_final_id]
- self.dists.append(np.linalg.norm(goal_pos - ball_pos))
- self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
- self.ball_traj[step, :] = ball_pos
-
- ball_in_cup = self.check_ball_in_cup(sim, self.ball_collision_id)
- table_contact = self.check_ball_table_contact(sim, self.ball_collision_id)
-
- if table_contact and not self.table_contact:
- self.bounce_dist = np.minimum((np.linalg.norm(bounce_pos - ball_pos)), 2)
- self.table_contact = True
-
- if step == self.sim_time - 1:
- min_dist = np.min(self.dists)
- self.min_dist = min_dist
- dist_final = self.dists_final[-1]
- self.dist_final = dist_final
-
- cost = 0.33 * min_dist + 0.33 * dist_final + 0.33 * self.bounce_dist
- reward = np.exp(-2 * cost) - 1e-2 * action_cost
- success = self.bounce_dist < 0.05 and dist_final < 0.05 and ball_in_cup
- else:
- reward = - 1e-2 * action_cost
- success = False
-
- return reward, success, stop_sim
-
- def _get_stage_wise_cost(self, ball_in_cup, min_dist, dist_final, dist_to_ctxt):
- if not ball_in_cup:
- cost = 3 + 2*(0.5 * min_dist**2 + 0.5 * dist_final**2)
- else:
- cost = 2 * dist_to_ctxt ** 2
- print('Context Distance:', dist_to_ctxt)
- return cost
-
- def check_ball_table_contact(self, sim, ball_collision_id):
- table_collision_id = sim.model._geom_name2id["table_contact_geom"]
- for coni in range(0, sim.data.ncon):
- con = sim.data.contact[coni]
- collision = con.geom1 == table_collision_id and con.geom2 == ball_collision_id
- collision_trans = con.geom1 == ball_collision_id and con.geom2 == table_collision_id
-
- if collision or collision_trans:
- return True
- return False
-
- def check_ball_in_cup(self, sim, ball_collision_id):
- cup_base_collision_id = sim.model._geom_name2id["cup_base_table_contact"]
- for coni in range(0, sim.data.ncon):
- con = sim.data.contact[coni]
-
- collision = con.geom1 == cup_base_collision_id and con.geom2 == ball_collision_id
- collision_trans = con.geom1 == ball_collision_id and con.geom2 == cup_base_collision_id
-
- if collision or collision_trans:
- return True
- return False
-
- def check_collision(self, sim):
- for coni in range(0, sim.data.ncon):
- con = sim.data.contact[coni]
-
- collision = con.geom1 in self.collision_ids and con.geom2 == self.ball_collision_id
- collision_trans = con.geom1 == self.ball_collision_id and con.geom2 in self.collision_ids
-
- if collision or collision_trans:
- return True
- return False
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_simple.py b/alr_envs/alr/mujoco/beerpong/beerpong_simple.py
deleted file mode 100644
index 1708d38..0000000
--- a/alr_envs/alr/mujoco/beerpong/beerpong_simple.py
+++ /dev/null
@@ -1,166 +0,0 @@
-from gym import utils
-import os
-import numpy as np
-from gym.envs.mujoco import MujocoEnv
-
-
-class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
- def __init__(self, n_substeps=4, apply_gravity_comp=True, reward_function=None):
- self._steps = 0
-
- self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
- "beerpong" + ".xml")
-
- self.start_pos = np.array([0.0, 1.35, 0.0, 1.18, 0.0, -0.786, -1.59])
- self.start_vel = np.zeros(7)
-
- self._q_pos = []
- self._q_vel = []
- # self.weight_matrix_scale = 50
- self.max_ctrl = np.array([150., 125., 40., 60., 5., 5., 2.])
- self.p_gains = 1 / self.max_ctrl * np.array([200, 300, 100, 100, 10, 10, 2.5])
- self.d_gains = 1 / self.max_ctrl * np.array([7, 15, 5, 2.5, 0.3, 0.3, 0.05])
-
- self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7])
- self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
-
- self.context = None
-
- # alr_mujoco_env.AlrMujocoEnv.__init__(self,
- # self.xml_path,
- # apply_gravity_comp=apply_gravity_comp,
- # n_substeps=n_substeps)
-
- self.sim_time = 8 # seconds
- # self.sim_steps = int(self.sim_time / self.dt)
- if reward_function is None:
- from alr_envs.alr.mujoco.beerpong.beerpong_reward_simple import BeerpongReward
- reward_function = BeerpongReward
- self.reward_function = reward_function(self.sim, self.sim_steps)
- self.cup_robot_id = self.sim.model._site_name2id["cup_robot_final"]
- self.ball_id = self.sim.model._body_name2id["ball"]
- self.cup_table_id = self.sim.model._body_name2id["cup_table"]
- # self.bounce_table_id = self.sim.model._body_name2id["bounce_table"]
-
- MujocoEnv.__init__(self, model_path=self.xml_path, frame_skip=n_substeps)
- utils.EzPickle.__init__(self)
-
- @property
- def current_pos(self):
- return self.sim.data.qpos[0:7].copy()
-
- @property
- def current_vel(self):
- return self.sim.data.qvel[0:7].copy()
-
- def configure(self, context):
- if context is None:
- context = np.array([0, -2, 0.840])
- self.context = context
- self.reward_function.reset(context)
-
- def reset_model(self):
- init_pos_all = self.init_qpos.copy()
- init_pos_robot = self.start_pos
- init_vel = np.zeros_like(init_pos_all)
-
- self._steps = 0
- self._q_pos = []
- self._q_vel = []
-
- start_pos = init_pos_all
- start_pos[0:7] = init_pos_robot
- # start_pos[7:] = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05])
-
- self.set_state(start_pos, init_vel)
-
- ball_pos = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05])
- self.sim.model.body_pos[self.ball_id] = ball_pos.copy()
- self.sim.model.body_pos[self.cup_table_id] = self.context.copy()
- # self.sim.model.body_pos[self.bounce_table_id] = self.context.copy()
-
- self.sim.forward()
-
- return self._get_obs()
-
- def step(self, a):
- reward_dist = 0.0
- angular_vel = 0.0
- reward_ctrl = - np.square(a).sum()
- action_cost = np.sum(np.square(a))
-
- crash = self.do_simulation(a, self.frame_skip)
- joint_cons_viol = self.check_traj_in_joint_limits()
-
- self._q_pos.append(self.sim.data.qpos[0:7].ravel().copy())
- self._q_vel.append(self.sim.data.qvel[0:7].ravel().copy())
-
- ob = self._get_obs()
-
- if not crash and not joint_cons_viol:
- reward, success, stop_sim = self.reward_function.compute_reward(a, self.sim, self._steps)
- done = success or self._steps == self.sim_steps - 1 or stop_sim
- self._steps += 1
- else:
- reward = -10 - 1e-2 * action_cost
- success = False
- done = True
- return ob, reward, done, dict(reward_dist=reward_dist,
- reward_ctrl=reward_ctrl,
- velocity=angular_vel,
- traj=self._q_pos, is_success=success,
- is_collided=crash or joint_cons_viol)
-
- def check_traj_in_joint_limits(self):
- return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)
-
- def extend_des_pos(self, des_pos):
- des_pos_full = self.start_pos.copy()
- des_pos_full[1] = des_pos[0]
- des_pos_full[3] = des_pos[1]
- des_pos_full[5] = des_pos[2]
- return des_pos_full
-
- def extend_des_vel(self, des_vel):
- des_vel_full = self.start_vel.copy()
- des_vel_full[1] = des_vel[0]
- des_vel_full[3] = des_vel[1]
- des_vel_full[5] = des_vel[2]
- return des_vel_full
-
- def _get_obs(self):
- theta = self.sim.data.qpos.flat[:7]
- return np.concatenate([
- np.cos(theta),
- np.sin(theta),
- # self.get_body_com("target"), # only return target to make problem harder
- [self._steps],
- ])
-
-
-
-if __name__ == "__main__":
- env = ALRBeerpongEnv()
- ctxt = np.array([0, -2, 0.840]) # initial
-
- env.configure(ctxt)
- env.reset()
- env.render()
- for i in range(16000):
- # test with random actions
- ac = 0.0 * env.action_space.sample()[0:7]
- ac[1] = -0.01
- ac[3] = - 0.01
- ac[5] = -0.01
- # ac = env.start_pos
- # ac[0] += np.pi/2
- obs, rew, d, info = env.step(ac)
- env.render()
-
- print(rew)
-
- if d:
- break
-
- env.close()
-
diff --git a/alr_envs/alr/mujoco/beerpong/mp_wrapper.py b/alr_envs/alr/mujoco/beerpong/mp_wrapper.py
index 022490c..40c371b 100644
--- a/alr_envs/alr/mujoco/beerpong/mp_wrapper.py
+++ b/alr_envs/alr/mujoco/beerpong/mp_wrapper.py
@@ -2,13 +2,13 @@ from typing import Tuple, Union
import numpy as np
-from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
-class MPWrapper(MPEnvWrapper):
+class MPWrapper(RawInterfaceWrapper):
@property
- def active_obs(self):
+ def context_mask(self) -> np.ndarray:
return np.hstack([
[False] * 7, # cos
[False] * 7, # sin
diff --git a/alr_envs/alr/mujoco/half_cheetah_jump/mp_wrapper.py b/alr_envs/alr/mujoco/half_cheetah_jump/mp_wrapper.py
index 6179b07..f9a298a 100644
--- a/alr_envs/alr/mujoco/half_cheetah_jump/mp_wrapper.py
+++ b/alr_envs/alr/mujoco/half_cheetah_jump/mp_wrapper.py
@@ -2,12 +2,12 @@ from typing import Tuple, Union
import numpy as np
-from mp_env_api import MPEnvWrapper
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
-class MPWrapper(MPEnvWrapper):
+class MPWrapper(RawInterfaceWrapper):
@property
- def active_obs(self):
+ def context_mask(self) -> np.ndarray:
return np.hstack([
[False] * 17,
[True] # goal height
diff --git a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
index 5cd234c..025bb8d 100644
--- a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
+++ b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
@@ -54,7 +54,8 @@ class ALRHopperJumpEnv(HopperEnv):
self.current_step += 1
self.do_simulation(action, self.frame_skip)
height_after = self.get_body_com("torso")[2]
- site_pos_after = self.sim.data.site_xpos[self.model.site_name2id('foot_site')].copy()
+ # site_pos_after = self.sim.data.site_xpos[self.model.site_name2id('foot_site')].copy()
+ site_pos_after = self.get_body_com('foot_site')
self.max_height = max(height_after, self.max_height)
ctrl_cost = self.control_cost(action)
diff --git a/alr_envs/alr/mujoco/hopper_jump/mp_wrapper.py b/alr_envs/alr/mujoco/hopper_jump/mp_wrapper.py
index 36b7158..e3279aa 100644
--- a/alr_envs/alr/mujoco/hopper_jump/mp_wrapper.py
+++ b/alr_envs/alr/mujoco/hopper_jump/mp_wrapper.py
@@ -2,12 +2,12 @@ from typing import Tuple, Union
import numpy as np
-from mp_env_api import MPEnvWrapper
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
-class MPWrapper(MPEnvWrapper):
+class MPWrapper(RawInterfaceWrapper):
@property
- def active_obs(self):
+ def context_mask(self) -> np.ndarray:
return np.hstack([
[False] * (5 + int(not self.exclude_current_positions_from_observation)), # position
[False] * 6, # velocity
diff --git a/alr_envs/alr/mujoco/hopper_throw/mp_wrapper.py b/alr_envs/alr/mujoco/hopper_throw/mp_wrapper.py
index 909e00a..f5bf08d 100644
--- a/alr_envs/alr/mujoco/hopper_throw/mp_wrapper.py
+++ b/alr_envs/alr/mujoco/hopper_throw/mp_wrapper.py
@@ -2,12 +2,12 @@ from typing import Tuple, Union
import numpy as np
-from mp_env_api import MPEnvWrapper
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
-class MPWrapper(MPEnvWrapper):
+class MPWrapper(RawInterfaceWrapper):
@property
- def active_obs(self):
+ def context_mask(self) -> np.ndarray:
return np.hstack([
[False] * 17,
[True] # goal pos
diff --git a/alr_envs/alr/mujoco/hopper_throw/new_mp_wrapper.py b/alr_envs/alr/mujoco/hopper_throw/new_mp_wrapper.py
index 049c2f0..a8cd696 100644
--- a/alr_envs/alr/mujoco/hopper_throw/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/hopper_throw/new_mp_wrapper.py
@@ -2,8 +2,6 @@ from typing import Tuple, Union
import numpy as np
-from mp_env_api import MPEnvWrapper
-
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
diff --git a/alr_envs/alr/mujoco/reacher/__init__.py b/alr_envs/alr/mujoco/reacher/__init__.py
index 989b5a9..c1a25d3 100644
--- a/alr_envs/alr/mujoco/reacher/__init__.py
+++ b/alr_envs/alr/mujoco/reacher/__init__.py
@@ -1 +1,2 @@
-from .mp_wrapper import MPWrapper
\ No newline at end of file
+from .mp_wrapper import MPWrapper
+from .new_mp_wrapper import MPWrapper as NewMPWrapper
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/reacher/mp_wrapper.py b/alr_envs/alr/mujoco/reacher/mp_wrapper.py
index 3b655d4..e51843c 100644
--- a/alr_envs/alr/mujoco/reacher/mp_wrapper.py
+++ b/alr_envs/alr/mujoco/reacher/mp_wrapper.py
@@ -1,13 +1,14 @@
from typing import Union
import numpy as np
-from mp_env_api import MPEnvWrapper
+
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
-class MPWrapper(MPEnvWrapper):
+class MPWrapper(RawInterfaceWrapper):
@property
- def active_obs(self):
+ def context_mask(self) -> np.ndarray:
return np.concatenate([
[False] * self.n_links, # cos
[False] * self.n_links, # sin
diff --git a/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py b/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py
index 54910e5..6b50d80 100644
--- a/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py
@@ -8,12 +8,6 @@ from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
@property
- def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.env.sim.data.qpos.flat[:self.env.n_links]
- @property
- def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.env.sim.data.qvel.flat[:self.env.n_links]
-
def context_mask(self):
return np.concatenate([
[False] * self.env.n_links, # cos
@@ -24,3 +18,11 @@ class MPWrapper(RawInterfaceWrapper):
# self.get_body_com("target"), # only return target to make problem harder
[False], # step
])
+
+ @property
+ def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.env.sim.data.qpos.flat[:self.env.n_links]
+
+ @property
+ def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.env.sim.data.qvel.flat[:self.env.n_links]
diff --git a/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py b/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py
index 473583f..408124a 100644
--- a/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py
+++ b/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py
@@ -2,13 +2,13 @@ from typing import Tuple, Union
import numpy as np
-from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
-class MPWrapper(MPEnvWrapper):
+class MPWrapper(RawInterfaceWrapper):
@property
- def active_obs(self):
+ def context_mask(self) -> np.ndarray:
# TODO: @Max Filter observations correctly
return np.hstack([
[False] * 7, # Joint Pos
diff --git a/alr_envs/alr/mujoco/walker_2d_jump/mp_wrapper.py b/alr_envs/alr/mujoco/walker_2d_jump/mp_wrapper.py
index 445fa40..0c2dba5 100644
--- a/alr_envs/alr/mujoco/walker_2d_jump/mp_wrapper.py
+++ b/alr_envs/alr/mujoco/walker_2d_jump/mp_wrapper.py
@@ -2,12 +2,12 @@ from typing import Tuple, Union
import numpy as np
-from mp_env_api import MPEnvWrapper
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
-class MPWrapper(MPEnvWrapper):
+class MPWrapper(RawInterfaceWrapper):
@property
- def active_obs(self):
+ def context_mask(self) -> np.ndarray:
return np.hstack([
[False] * 17,
[True] # goal pos
diff --git a/alr_envs/alr/mujoco/walker_2d_jump/new_mp_wrapper.py b/alr_envs/alr/mujoco/walker_2d_jump/new_mp_wrapper.py
index dde928f..96b0739 100644
--- a/alr_envs/alr/mujoco/walker_2d_jump/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/walker_2d_jump/new_mp_wrapper.py
@@ -2,8 +2,6 @@ from typing import Tuple, Union
import numpy as np
-from mp_env_api import MPEnvWrapper
-
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
diff --git a/alr_envs/dmc/manipulation/reach_site/mp_wrapper.py b/alr_envs/dmc/manipulation/reach_site/mp_wrapper.py
index 2d03f7b..6d5029e 100644
--- a/alr_envs/dmc/manipulation/reach_site/mp_wrapper.py
+++ b/alr_envs/dmc/manipulation/reach_site/mp_wrapper.py
@@ -2,13 +2,13 @@ from typing import Tuple, Union
import numpy as np
-from mp_env_api import MPEnvWrapper
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
-class MPWrapper(MPEnvWrapper):
+class MPWrapper(RawInterfaceWrapper):
@property
- def active_obs(self):
+ def context_mask(self) -> np.ndarray:
# Joint and target positions are randomized, velocities are always set to 0.
return np.hstack([
[True] * 3, # target position
diff --git a/alr_envs/dmc/suite/ball_in_cup/mp_wrapper.py b/alr_envs/dmc/suite/ball_in_cup/mp_wrapper.py
index fb068b3..9687bed 100644
--- a/alr_envs/dmc/suite/ball_in_cup/mp_wrapper.py
+++ b/alr_envs/dmc/suite/ball_in_cup/mp_wrapper.py
@@ -2,13 +2,13 @@ from typing import Tuple, Union
import numpy as np
-from mp_env_api import MPEnvWrapper
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
-class MPWrapper(MPEnvWrapper):
+class MPWrapper(RawInterfaceWrapper):
@property
- def active_obs(self):
+ def context_mask(self) -> np.ndarray:
# Besides the ball position, the environment is always set to 0.
return np.hstack([
[False] * 2, # cup position
diff --git a/alr_envs/dmc/suite/cartpole/mp_wrapper.py b/alr_envs/dmc/suite/cartpole/mp_wrapper.py
index 1ca99f5..3f16d24 100644
--- a/alr_envs/dmc/suite/cartpole/mp_wrapper.py
+++ b/alr_envs/dmc/suite/cartpole/mp_wrapper.py
@@ -2,18 +2,17 @@ from typing import Tuple, Union
import numpy as np
-from mp_env_api import MPEnvWrapper
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
-class MPWrapper(MPEnvWrapper):
+class MPWrapper(RawInterfaceWrapper):
def __init__(self, env, n_poles: int = 1):
self.n_poles = n_poles
super().__init__(env)
-
@property
- def active_obs(self):
+ def context_mask(self) -> np.ndarray:
# Besides the ball position, the environment is always set to 0.
return np.hstack([
[True], # slider position
diff --git a/alr_envs/dmc/suite/reacher/mp_wrapper.py b/alr_envs/dmc/suite/reacher/mp_wrapper.py
index 86bc992..ac857c1 100644
--- a/alr_envs/dmc/suite/reacher/mp_wrapper.py
+++ b/alr_envs/dmc/suite/reacher/mp_wrapper.py
@@ -2,13 +2,13 @@ from typing import Tuple, Union
import numpy as np
-from mp_env_api import MPEnvWrapper
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
-class MPWrapper(MPEnvWrapper):
+class MPWrapper(RawInterfaceWrapper):
@property
- def active_obs(self):
+ def context_mask(self) -> np.ndarray:
# Joint and target positions are randomized, velocities are always set to 0.
return np.hstack([
[True] * 2, # joint position
diff --git a/alr_envs/examples/examples_dmc.py b/alr_envs/examples/examples_dmc.py
index 5658b1f..41d2231 100644
--- a/alr_envs/examples/examples_dmc.py
+++ b/alr_envs/examples/examples_dmc.py
@@ -59,7 +59,7 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
# Base DMC name, according to structure of above example
base_env = "ball_in_cup-catch"
- # Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper.
+ # Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper.
# You can also add other gym.Wrappers in case they are needed.
wrappers = [alr_envs.dmc.suite.ball_in_cup.MPWrapper]
mp_kwargs = {
diff --git a/alr_envs/examples/examples_metaworld.py b/alr_envs/examples/examples_metaworld.py
index 3e040cc..f179149 100644
--- a/alr_envs/examples/examples_metaworld.py
+++ b/alr_envs/examples/examples_metaworld.py
@@ -62,7 +62,7 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
# Base MetaWorld name, according to structure of above example
base_env = "button-press-v2"
- # Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper.
+ # Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper.
# You can also add other gym.Wrappers in case they are needed.
wrappers = [alr_envs.meta.goal_and_object_change.MPWrapper]
mp_kwargs = {
diff --git a/alr_envs/examples/examples_motion_primitives.py b/alr_envs/examples/examples_movement_primitives.py
similarity index 82%
rename from alr_envs/examples/examples_motion_primitives.py
rename to alr_envs/examples/examples_movement_primitives.py
index b9d355a..bf1f950 100644
--- a/alr_envs/examples/examples_motion_primitives.py
+++ b/alr_envs/examples/examples_movement_primitives.py
@@ -59,6 +59,17 @@ def example_custom_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=
"""
# Changing the traj_gen_kwargs is possible by providing them to gym.
# E.g. here by providing way to many basis functions
+ # mp_dict = alr_envs.from_default_config('ALRReacher-v0', {'basis_generator_kwargs': {'num_basis': 10}})
+ # mp_dict.update({'basis_generator_kwargs': {'num_basis': 10}})
+ # mp_dict.update({'black_box_kwargs': {'learn_sub_trajectories': True}})
+ # mp_dict.update({'black_box_kwargs': {'do_replanning': lambda pos, vel, t: lambda t: t % 100}})
+
+ # default env with promp and no learn_sub_trajectories and replanning
+ # env = alr_envs.make('ALRReacherProMP-v0', 1, n_links=7)
+ env = alr_envs.make('ALRReacherProMP-v0', 1, basis_generator_kwargs={'num_basis': 10}, n_links=7)
+ # env = alr_envs.make('ALRReacher-v0', seed=1, bb_kwargs=mp_dict, n_links=1)
+ # env = alr_envs.make_bb('ALRReacher-v0', **mp_dict)
+
mp_kwargs = {
"num_dof": 5,
"num_basis": 1000,
@@ -110,7 +121,7 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True):
base_env = "alr_envs:HoleReacher-v1"
- # Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper.
+ # Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper.
# You can also add other gym.Wrappers in case they are needed.
wrappers = [alr_envs.alr.classic_control.hole_reacher.MPWrapper]
mp_kwargs = {
@@ -148,14 +159,14 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True):
if __name__ == '__main__':
render = False
- # DMP
- example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render)
-
- # ProMP
- example_mp("alr_envs:HoleReacherProMP-v1", seed=10, iterations=1, render=render)
-
- # DetProMP
- example_mp("alr_envs:HoleReacherDetPMP-v1", seed=10, iterations=1, render=render)
+ # # DMP
+ # example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render)
+ #
+ # # ProMP
+ # example_mp("alr_envs:HoleReacherProMP-v1", seed=10, iterations=1, render=render)
+ #
+ # # DetProMP
+ # example_mp("alr_envs:HoleReacherDetPMP-v1", seed=10, iterations=1, render=render)
# Altered basis functions
example_custom_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render)
diff --git a/alr_envs/examples/examples_open_ai.py b/alr_envs/examples/examples_open_ai.py
index 631a3a1..46dcf60 100644
--- a/alr_envs/examples/examples_open_ai.py
+++ b/alr_envs/examples/examples_open_ai.py
@@ -4,7 +4,7 @@ import alr_envs
def example_mp(env_name, seed=1):
"""
Example for running a motion primitive based version of a OpenAI-gym environment, which is already registered.
- For more information on motion primitive specific stuff, look at the trajectory_generator examples.
+ For more information on motion primitive specific stuff, look at the traj_gen examples.
Args:
env_name: ProMP env_id
seed: seed
diff --git a/alr_envs/examples/pd_control_gain_tuning.py b/alr_envs/examples/pd_control_gain_tuning.py
index e05bad1..27cf8f8 100644
--- a/alr_envs/examples/pd_control_gain_tuning.py
+++ b/alr_envs/examples/pd_control_gain_tuning.py
@@ -8,7 +8,7 @@ from alr_envs.utils.make_env_helpers import make_promp_env
def visualize(env):
t = env.t
- pos_features = env.trajectory_generator.basis_generator.basis(t)
+ pos_features = env.traj_gen.basis_generator.basis(t)
plt.plot(t, pos_features)
plt.show()
diff --git a/alr_envs/meta/goal_change_mp_wrapper.py b/alr_envs/meta/goal_change_mp_wrapper.py
index a558365..17495da 100644
--- a/alr_envs/meta/goal_change_mp_wrapper.py
+++ b/alr_envs/meta/goal_change_mp_wrapper.py
@@ -2,10 +2,10 @@ from typing import Tuple, Union
import numpy as np
-from mp_env_api import MPEnvWrapper
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
-class MPWrapper(MPEnvWrapper):
+class MPWrapper(RawInterfaceWrapper):
"""
This Wrapper is for environments where merely the goal changes in the beginning
and no secondary objects or end effectors are altered at the start of an episode.
@@ -27,7 +27,7 @@ class MPWrapper(MPEnvWrapper):
"""
@property
- def active_obs(self):
+ def context_mask(self) -> np.ndarray:
# This structure is the same for all metaworld environments.
# Only the observations which change could differ
return np.hstack([
diff --git a/alr_envs/meta/goal_endeffector_change_mp_wrapper.py b/alr_envs/meta/goal_endeffector_change_mp_wrapper.py
index 8912a72..3a6ad1c 100644
--- a/alr_envs/meta/goal_endeffector_change_mp_wrapper.py
+++ b/alr_envs/meta/goal_endeffector_change_mp_wrapper.py
@@ -2,10 +2,10 @@ from typing import Tuple, Union
import numpy as np
-from mp_env_api import MPEnvWrapper
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
-class MPWrapper(MPEnvWrapper):
+class MPWrapper(RawInterfaceWrapper):
"""
This Wrapper is for environments where merely the goal changes in the beginning
and no secondary objects or end effectors are altered at the start of an episode.
@@ -27,7 +27,7 @@ class MPWrapper(MPEnvWrapper):
"""
@property
- def active_obs(self):
+ def context_mask(self) -> np.ndarray:
# This structure is the same for all metaworld environments.
# Only the observations which change could differ
return np.hstack([
diff --git a/alr_envs/meta/goal_object_change_mp_wrapper.py b/alr_envs/meta/goal_object_change_mp_wrapper.py
index 63e16b7..97c64b8 100644
--- a/alr_envs/meta/goal_object_change_mp_wrapper.py
+++ b/alr_envs/meta/goal_object_change_mp_wrapper.py
@@ -2,10 +2,10 @@ from typing import Tuple, Union
import numpy as np
-from mp_env_api import MPEnvWrapper
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
-class MPWrapper(MPEnvWrapper):
+class MPWrapper(RawInterfaceWrapper):
"""
This Wrapper is for environments where merely the goal changes in the beginning
and no secondary objects or end effectors are altered at the start of an episode.
@@ -27,7 +27,7 @@ class MPWrapper(MPEnvWrapper):
"""
@property
- def active_obs(self):
+ def context_mask(self) -> np.ndarray:
# This structure is the same for all metaworld environments.
# Only the observations which change could differ
return np.hstack([
diff --git a/alr_envs/meta/object_change_mp_wrapper.py b/alr_envs/meta/object_change_mp_wrapper.py
index 4293148..f832c9f 100644
--- a/alr_envs/meta/object_change_mp_wrapper.py
+++ b/alr_envs/meta/object_change_mp_wrapper.py
@@ -2,10 +2,10 @@ from typing import Tuple, Union
import numpy as np
-from mp_env_api import MPEnvWrapper
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
-class MPWrapper(MPEnvWrapper):
+class MPWrapper(RawInterfaceWrapper):
"""
This Wrapper is for environments where merely the goal changes in the beginning
and no secondary objects or end effectors are altered at the start of an episode.
@@ -27,7 +27,7 @@ class MPWrapper(MPEnvWrapper):
"""
@property
- def active_obs(self):
+ def context_mask(self) -> np.ndarray:
# This structure is the same for all metaworld environments.
# Only the observations which change could differ
return np.hstack([
diff --git a/alr_envs/mp/black_box_wrapper.py b/alr_envs/mp/black_box_wrapper.py
index 9e6a9e5..0c2a7c8 100644
--- a/alr_envs/mp/black_box_wrapper.py
+++ b/alr_envs/mp/black_box_wrapper.py
@@ -1,5 +1,5 @@
from abc import ABC
-from typing import Tuple
+from typing import Tuple, Union
import gym
import numpy as np
@@ -16,7 +16,9 @@ class BlackBoxWrapper(gym.ObservationWrapper, ABC):
def __init__(self,
env: RawInterfaceWrapper,
trajectory_generator: MPInterface, tracking_controller: BaseController,
- duration: float, verbose: int = 1, sequencing: bool = True, reward_aggregation: callable = np.sum):
+ duration: float, verbose: int = 1, learn_sub_trajectories: bool = False,
+ replanning_schedule: Union[None, callable] = None,
+ reward_aggregation: callable = np.sum):
"""
gym.Wrapper for leveraging a black box approach with a trajectory generator.
@@ -26,6 +28,9 @@ class BlackBoxWrapper(gym.ObservationWrapper, ABC):
tracking_controller: Translates the desired trajectory to raw action sequences
duration: Length of the trajectory of the movement primitive in seconds
verbose: level of detail for returned values in info dict.
+ learn_sub_trajectories: Transforms full episode learning into learning sub-trajectories, similar to
+ step-based learning
+ replanning_schedule: callable that receives
reward_aggregation: function that takes the np.ndarray of step rewards as input and returns the trajectory
reward, default summation over all values.
"""
@@ -33,21 +38,22 @@ class BlackBoxWrapper(gym.ObservationWrapper, ABC):
self.env = env
self.duration = duration
- self.sequencing = sequencing
+ self.learn_sub_trajectories = learn_sub_trajectories
+ self.replanning_schedule = replanning_schedule
self.current_traj_steps = 0
# trajectory generation
- self.trajectory_generator = trajectory_generator
+ self.traj_gen = trajectory_generator
self.tracking_controller = tracking_controller
# self.time_steps = np.linspace(0, self.duration, self.traj_steps)
- # self.trajectory_generator.set_mp_times(self.time_steps)
- self.trajectory_generator.set_duration(np.array([self.duration]), np.array([self.dt]))
+ # self.traj_gen.set_mp_times(self.time_steps)
+ self.traj_gen.set_duration(np.array([self.duration]), np.array([self.dt]))
# reward computation
self.reward_aggregation = reward_aggregation
# spaces
- self.return_context_observation = not (self.sequencing) # TODO or we_do_replanning?)
+ self.return_context_observation = not (self.learn_sub_trajectories or replanning_schedule)
self.traj_gen_action_space = self.get_traj_gen_action_space()
self.action_space = self.get_action_space()
self.observation_space = spaces.Box(low=self.env.observation_space.low[self.env.context_mask],
@@ -60,26 +66,26 @@ class BlackBoxWrapper(gym.ObservationWrapper, ABC):
def observation(self, observation):
# return context space if we are
- return observation[self.context_mask] if self.return_context_observation else observation
+ return observation[self.env.context_mask] if self.return_context_observation else observation
def get_trajectory(self, action: np.ndarray) -> Tuple:
clipped_params = np.clip(action, self.traj_gen_action_space.low, self.traj_gen_action_space.high)
- self.trajectory_generator.set_params(clipped_params)
- # if self.trajectory_generator.learn_tau:
- # self.trajectory_generator.set_mp_duration(self.trajectory_generator.tau, np.array([self.dt]))
- # TODO: Bruce said DMP, ProMP, ProDMP can have 0 bc_time
- self.trajectory_generator.set_boundary_conditions(bc_time=np.zeros((1,)), bc_pos=self.current_pos,
- bc_vel=self.current_vel)
+ self.traj_gen.set_params(clipped_params)
+ # TODO: Bruce said DMP, ProMP, ProDMP can have 0 bc_time for sequencing
+ # TODO Check with Bruce for replanning
+ self.traj_gen.set_boundary_conditions(
+ bc_time=np.zeros((1,)) if not self.replanning_schedule else self.current_traj_steps * self.dt,
+ bc_pos=self.current_pos, bc_vel=self.current_vel)
# TODO: is this correct for replanning? Do we need to adjust anything here?
- self.trajectory_generator.set_duration(None if self.sequencing else self.duration, np.array([self.dt]))
- traj_dict = self.trajectory_generator.get_trajs(get_pos=True, get_vel=True)
+ self.traj_gen.set_duration(None if self.learn_sub_trajectories else self.duration, np.array([self.dt]))
+ traj_dict = self.traj_gen.get_trajs(get_pos=True, get_vel=True)
trajectory_tensor, velocity_tensor = traj_dict['pos'], traj_dict['vel']
return get_numpy(trajectory_tensor), get_numpy(velocity_tensor)
def get_traj_gen_action_space(self):
- """This function can be used to set up an individual space for the parameters of the trajectory_generator."""
- min_action_bounds, max_action_bounds = self.trajectory_generator.get_param_bounds()
+ """This function can be used to set up an individual space for the parameters of the traj_gen."""
+ min_action_bounds, max_action_bounds = self.traj_gen.get_param_bounds()
mp_action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
dtype=np.float32)
return mp_action_space
@@ -134,8 +140,11 @@ class BlackBoxWrapper(gym.ObservationWrapper, ABC):
if self.render_kwargs:
self.render(**self.render_kwargs)
- if done or self.env.do_replanning(self.current_pos, self.current_vel, obs, c_action,
- t + 1 + self.current_traj_steps):
+ if done:
+ break
+
+ if self.replanning_schedule and self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action,
+ t + 1 + self.current_traj_steps):
break
infos.update({k: v[:t + 1] for k, v in infos.items()})
@@ -160,6 +169,7 @@ class BlackBoxWrapper(gym.ObservationWrapper, ABC):
def reset(self, **kwargs):
self.current_traj_steps = 0
+ super(BlackBoxWrapper, self).reset(**kwargs)
def plot_trajs(self, des_trajs, des_vels):
import matplotlib.pyplot as plt
diff --git a/alr_envs/mp/raw_interface_wrapper.py b/alr_envs/mp/raw_interface_wrapper.py
index d57ff9a..fdbc2f7 100644
--- a/alr_envs/mp/raw_interface_wrapper.py
+++ b/alr_envs/mp/raw_interface_wrapper.py
@@ -62,8 +62,8 @@ class RawInterfaceWrapper(gym.Wrapper):
include other actions like ball releasing time for the beer pong environment.
This only needs to be overwritten if the action space is modified.
Args:
- action: a vector instance of the whole action space, includes trajectory_generator parameters and additional parameters if
- specified, else only trajectory_generator parameters
+ action: a vector instance of the whole action space, includes traj_gen parameters and additional parameters if
+ specified, else only traj_gen parameters
Returns:
Tuple: mp_arguments and other arguments
diff --git a/alr_envs/open_ai/classic_control/continuous_mountain_car/mp_wrapper.py b/alr_envs/open_ai/classic_control/continuous_mountain_car/mp_wrapper.py
index 2a2357a..189563c 100644
--- a/alr_envs/open_ai/classic_control/continuous_mountain_car/mp_wrapper.py
+++ b/alr_envs/open_ai/classic_control/continuous_mountain_car/mp_wrapper.py
@@ -1,10 +1,11 @@
from typing import Union
import numpy as np
-from mp_env_api import MPEnvWrapper
+
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
-class MPWrapper(MPEnvWrapper):
+class MPWrapper(RawInterfaceWrapper):
@property
def current_vel(self) -> Union[float, int, np.ndarray]:
return np.array([self.state[1]])
diff --git a/alr_envs/open_ai/mujoco/reacher_v2/mp_wrapper.py b/alr_envs/open_ai/mujoco/reacher_v2/mp_wrapper.py
index 16202e5..9d627b6 100644
--- a/alr_envs/open_ai/mujoco/reacher_v2/mp_wrapper.py
+++ b/alr_envs/open_ai/mujoco/reacher_v2/mp_wrapper.py
@@ -1,10 +1,11 @@
from typing import Union
import numpy as np
-from mp_env_api import MPEnvWrapper
+
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
-class MPWrapper(MPEnvWrapper):
+class MPWrapper(RawInterfaceWrapper):
@property
def current_vel(self) -> Union[float, int, np.ndarray]:
@@ -13,7 +14,3 @@ class MPWrapper(MPEnvWrapper):
@property
def current_pos(self) -> Union[float, int, np.ndarray]:
return self.sim.data.qpos[:2]
-
- @property
- def dt(self) -> Union[float, int]:
- return self.env.dt
\ No newline at end of file
diff --git a/alr_envs/open_ai/robotics/fetch/mp_wrapper.py b/alr_envs/open_ai/robotics/fetch/mp_wrapper.py
index 218e175..7a7bed6 100644
--- a/alr_envs/open_ai/robotics/fetch/mp_wrapper.py
+++ b/alr_envs/open_ai/robotics/fetch/mp_wrapper.py
@@ -2,10 +2,10 @@ from typing import Union
import numpy as np
-from mp_env_api import MPEnvWrapper
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
-class MPWrapper(MPEnvWrapper):
+class MPWrapper(RawInterfaceWrapper):
@property
def active_obs(self):
diff --git a/alr_envs/utils/make_env_helpers.py b/alr_envs/utils/make_env_helpers.py
index 9af0a2d..b5587a7 100644
--- a/alr_envs/utils/make_env_helpers.py
+++ b/alr_envs/utils/make_env_helpers.py
@@ -1,18 +1,19 @@
import warnings
-from typing import Iterable, Type, Union, Mapping, MutableMapping
+from copy import deepcopy
+from typing import Iterable, Type, Union, MutableMapping
import gym
import numpy as np
-from gym.envs.registration import EnvSpec
-from mp_pytorch import MPInterface
+from gym.envs.registration import EnvSpec, registry
+from gym.wrappers import TimeAwareObservation
from alr_envs.mp.basis_generator_factory import get_basis_generator
from alr_envs.mp.black_box_wrapper import BlackBoxWrapper
-from alr_envs.mp.controllers.base_controller import BaseController
from alr_envs.mp.controllers.controller_factory import get_controller
from alr_envs.mp.mp_factory import get_trajectory_generator
from alr_envs.mp.phase_generator_factory import get_phase_generator
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.utils.utils import nested_update
def make_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwargs):
@@ -41,7 +42,15 @@ def make_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwa
return f if return_callable else f()
-def make(env_id: str, seed, **kwargs):
+def make(env_id, seed, **kwargs):
+ spec = registry.get(env_id)
+ # This access is required to allow for nested dict updates
+ all_kwargs = deepcopy(spec._kwargs)
+ nested_update(all_kwargs, **kwargs)
+ return _make(env_id, seed, **all_kwargs)
+
+
+def _make(env_id: str, seed, **kwargs):
"""
Converts an env_id to an environment with the gym API.
This also works for DeepMind Control Suite interface_wrappers
@@ -102,12 +111,12 @@ def _make_wrapped_env(
):
"""
Helper function for creating a wrapped gym environment using MPs.
- It adds all provided wrappers to the specified environment and verifies at least one MPEnvWrapper is
+ It adds all provided wrappers to the specified environment and verifies at least one RawInterfaceWrapper is
provided to expose the interface for MPs.
Args:
env_id: name of the environment
- wrappers: list of wrappers (at least an MPEnvWrapper),
+ wrappers: list of wrappers (at least an RawInterfaceWrapper),
seed: seed of environment
Returns: gym environment with all specified wrappers applied
@@ -126,22 +135,20 @@ def _make_wrapped_env(
return _env
-def make_bb_env(
- env_id: str, wrappers: Iterable, black_box_wrapper_kwargs: MutableMapping, traj_gen_kwargs: MutableMapping,
+def make_bb(
+ env_id: str, wrappers: Iterable, black_box_kwargs: MutableMapping, traj_gen_kwargs: MutableMapping,
controller_kwargs: MutableMapping, phase_kwargs: MutableMapping, basis_kwargs: MutableMapping, seed=1,
- sequenced=False, **kwargs):
+ **kwargs):
"""
This can also be used standalone for manually building a custom DMP environment.
Args:
- black_box_wrapper_kwargs: kwargs for the black-box wrapper
+ black_box_kwargs: kwargs for the black-box wrapper
basis_kwargs: kwargs for the basis generator
phase_kwargs: kwargs for the phase generator
controller_kwargs: kwargs for the tracking controller
env_id: base_env_name,
wrappers: list of wrappers (at least an BlackBoxWrapper),
seed: seed of environment
- sequenced: When true, this allows to sequence multiple ProMPs by specifying the duration of each sub-trajectory,
- this behavior is much closer to step based learning.
traj_gen_kwargs: dict of at least {num_dof: int, num_basis: int} for DMP
Returns: DMP wrapped gym env
@@ -150,19 +157,33 @@ def make_bb_env(
_verify_time_limit(traj_gen_kwargs.get("duration", None), kwargs.get("time_limit", None))
_env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed, **kwargs)
- if black_box_wrapper_kwargs.get('duration', None) is None:
- black_box_wrapper_kwargs['duration'] = _env.spec.max_episode_steps * _env.dt
- if phase_kwargs.get('tau', None) is None:
- phase_kwargs['tau'] = black_box_wrapper_kwargs['duration']
+ learn_sub_trajs = black_box_kwargs.get('learn_sub_trajectories')
+ do_replanning = black_box_kwargs.get('replanning_schedule')
+ if learn_sub_trajs and do_replanning:
+ raise ValueError('Cannot used sub-trajectory learning and replanning together.')
+
+ if learn_sub_trajs or do_replanning:
+ # add time_step observation when replanning
+ kwargs['wrappers'].append(TimeAwareObservation)
+
traj_gen_kwargs['action_dim'] = traj_gen_kwargs.get('action_dim', np.prod(_env.action_space.shape).item())
+ if black_box_kwargs.get('duration') is None:
+ black_box_kwargs['duration'] = _env.spec.max_episode_steps * _env.dt
+ if phase_kwargs.get('tau') is None:
+ phase_kwargs['tau'] = black_box_kwargs['duration']
+
+ if learn_sub_trajs is not None:
+ # We have to learn the length when learning sub_trajectories trajectories
+ phase_kwargs['learn_tau'] = True
+
phase_gen = get_phase_generator(**phase_kwargs)
basis_gen = get_basis_generator(phase_generator=phase_gen, **basis_kwargs)
controller = get_controller(**controller_kwargs)
traj_gen = get_trajectory_generator(basis_generator=basis_gen, **traj_gen_kwargs)
bb_env = BlackBoxWrapper(_env, trajectory_generator=traj_gen, tracking_controller=controller,
- **black_box_wrapper_kwargs)
+ **black_box_kwargs)
return bb_env
@@ -204,16 +225,16 @@ def make_bb_env_helper(**kwargs):
wrappers = kwargs.pop("wrappers")
traj_gen_kwargs = kwargs.pop("traj_gen_kwargs", {})
- black_box_kwargs = kwargs.pop('black_box_wrapper_kwargs', {})
+ black_box_kwargs = kwargs.pop('black_box_kwargs', {})
contr_kwargs = kwargs.pop("controller_kwargs", {})
phase_kwargs = kwargs.pop("phase_generator_kwargs", {})
basis_kwargs = kwargs.pop("basis_generator_kwargs", {})
- return make_bb_env(env_id=kwargs.pop("name"), wrappers=wrappers,
- black_box_wrapper_kwargs=black_box_kwargs,
- traj_gen_kwargs=traj_gen_kwargs, controller_kwargs=contr_kwargs,
- phase_kwargs=phase_kwargs,
- basis_kwargs=basis_kwargs, **kwargs, seed=seed)
+ return make_bb(env_id=kwargs.pop("name"), wrappers=wrappers,
+ black_box_kwargs=black_box_kwargs,
+ traj_gen_kwargs=traj_gen_kwargs, controller_kwargs=contr_kwargs,
+ phase_kwargs=phase_kwargs,
+ basis_kwargs=basis_kwargs, **kwargs, seed=seed)
def _verify_time_limit(mp_time_limit: Union[None, float], env_time_limit: Union[None, float]):
@@ -224,7 +245,7 @@ def _verify_time_limit(mp_time_limit: Union[None, float], env_time_limit: Union[
It can be found in the BaseMP class.
Args:
- mp_time_limit: max trajectory length of trajectory_generator in seconds
+ mp_time_limit: max trajectory length of traj_gen in seconds
env_time_limit: max trajectory length of DMC environment in seconds
Returns:
diff --git a/alr_envs/utils/utils.py b/alr_envs/utils/utils.py
index b90cf60..b212aac 100644
--- a/alr_envs/utils/utils.py
+++ b/alr_envs/utils/utils.py
@@ -1,3 +1,5 @@
+from collections import Mapping, MutableMapping
+
import numpy as np
import torch as ch
@@ -23,4 +25,24 @@ def angle_normalize(x, type="deg"):
def get_numpy(x: ch.Tensor):
+ """
+ Returns numpy array from torch tensor
+ Args:
+ x:
+
+ Returns:
+
+ """
return x.detach().cpu().numpy()
+
+
+def nested_update(base: MutableMapping, update):
+ """
+ Updated method for nested Mappings
+ Args:
+ base: main Mapping to be updated
+ update: updated values for base Mapping
+
+ """
+ for k, v in update.items():
+ base[k] = nested_update(base.get(k, {}), v) if isinstance(v, Mapping) else v
From c3a8352c63f68a010b5d3b0288367574264f0fbd Mon Sep 17 00:00:00 2001
From: Fabian
Date: Thu, 30 Jun 2022 14:20:52 +0200
Subject: [PATCH 049/104] call on superclass for obs wrapper
---
.../base_reacher/base_reacher.py | 5 +--
.../hole_reacher/mp_wrapper.py | 27 --------------
.../viapoint_reacher/mp_wrapper.py | 8 ++---
.../viapoint_reacher/new_mp_wrapper.py | 30 ----------------
alr_envs/alr/mujoco/reacher/__init__.py | 2 +-
alr_envs/alr/mujoco/reacher/mp_wrapper.py | 36 ++++++-------------
alr_envs/alr/mujoco/reacher/new_mp_wrapper.py | 28 ---------------
alr_envs/mp/black_box_wrapper.py | 5 ++-
8 files changed, 18 insertions(+), 123 deletions(-)
delete mode 100644 alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py
delete mode 100644 alr_envs/alr/classic_control/viapoint_reacher/new_mp_wrapper.py
delete mode 100644 alr_envs/alr/mujoco/reacher/new_mp_wrapper.py
diff --git a/alr_envs/alr/classic_control/base_reacher/base_reacher.py b/alr_envs/alr/classic_control/base_reacher/base_reacher.py
index 1b1ad19..e76ce85 100644
--- a/alr_envs/alr/classic_control/base_reacher/base_reacher.py
+++ b/alr_envs/alr/classic_control/base_reacher/base_reacher.py
@@ -1,10 +1,11 @@
-from typing import Iterable, Union
from abc import ABC, abstractmethod
+from typing import Union
+
import gym
-import matplotlib.pyplot as plt
import numpy as np
from gym import spaces
from gym.utils import seeding
+
from alr_envs.alr.classic_control.utils import intersect
diff --git a/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py b/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py
deleted file mode 100644
index e249a71..0000000
--- a/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from typing import Tuple, Union
-
-import numpy as np
-
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
-
-
-class MPWrapper(RawInterfaceWrapper):
-
- def get_context_mask(self):
- return np.hstack([
- [self.env.random_start] * self.env.n_links, # cos
- [self.env.random_start] * self.env.n_links, # sin
- [self.env.random_start] * self.env.n_links, # velocity
- [self.env.initial_width is None], # hole width
- # [self.env.hole_depth is None], # hole depth
- [True] * 2, # x-y coordinates of target distance
- [False] # env steps
- ])
-
- @property
- def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.env.current_pos
-
- @property
- def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.env.current_vel
diff --git a/alr_envs/alr/classic_control/viapoint_reacher/mp_wrapper.py b/alr_envs/alr/classic_control/viapoint_reacher/mp_wrapper.py
index 68d203f..9f40292 100644
--- a/alr_envs/alr/classic_control/viapoint_reacher/mp_wrapper.py
+++ b/alr_envs/alr/classic_control/viapoint_reacher/mp_wrapper.py
@@ -6,8 +6,8 @@ from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
- @property
- def context_mask(self) -> np.ndarray:
+
+ def context_mask(self):
return np.hstack([
[self.env.random_start] * self.env.n_links, # cos
[self.env.random_start] * self.env.n_links, # sin
@@ -25,10 +25,6 @@ class MPWrapper(RawInterfaceWrapper):
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.current_vel
- @property
- def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
- raise ValueError("Goal position is not available and has to be learnt based on the environment.")
-
@property
def dt(self) -> Union[float, int]:
return self.env.dt
diff --git a/alr_envs/alr/classic_control/viapoint_reacher/new_mp_wrapper.py b/alr_envs/alr/classic_control/viapoint_reacher/new_mp_wrapper.py
deleted file mode 100644
index 9f40292..0000000
--- a/alr_envs/alr/classic_control/viapoint_reacher/new_mp_wrapper.py
+++ /dev/null
@@ -1,30 +0,0 @@
-from typing import Tuple, Union
-
-import numpy as np
-
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
-
-
-class MPWrapper(RawInterfaceWrapper):
-
- def context_mask(self):
- return np.hstack([
- [self.env.random_start] * self.env.n_links, # cos
- [self.env.random_start] * self.env.n_links, # sin
- [self.env.random_start] * self.env.n_links, # velocity
- [self.env.initial_via_target is None] * 2, # x-y coordinates of via point distance
- [True] * 2, # x-y coordinates of target distance
- [False] # env steps
- ])
-
- @property
- def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.env.current_pos
-
- @property
- def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.env.current_vel
-
- @property
- def dt(self) -> Union[float, int]:
- return self.env.dt
diff --git a/alr_envs/alr/mujoco/reacher/__init__.py b/alr_envs/alr/mujoco/reacher/__init__.py
index c1a25d3..6a15beb 100644
--- a/alr_envs/alr/mujoco/reacher/__init__.py
+++ b/alr_envs/alr/mujoco/reacher/__init__.py
@@ -1,2 +1,2 @@
from .mp_wrapper import MPWrapper
-from .new_mp_wrapper import MPWrapper as NewMPWrapper
\ No newline at end of file
+from .mp_wrapper import MPWrapper as NewMPWrapper
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/reacher/mp_wrapper.py b/alr_envs/alr/mujoco/reacher/mp_wrapper.py
index e51843c..966be23 100644
--- a/alr_envs/alr/mujoco/reacher/mp_wrapper.py
+++ b/alr_envs/alr/mujoco/reacher/mp_wrapper.py
@@ -1,4 +1,4 @@
-from typing import Union
+from typing import Union, Tuple
import numpy as np
@@ -8,37 +8,21 @@ from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
@property
- def context_mask(self) -> np.ndarray:
+ def context_mask(self):
return np.concatenate([
- [False] * self.n_links, # cos
- [False] * self.n_links, # sin
+ [False] * self.env.n_links, # cos
+ [False] * self.env.n_links, # sin
[True] * 2, # goal position
- [False] * self.n_links, # angular velocity
+ [False] * self.env.n_links, # angular velocity
[False] * 3, # goal distance
# self.get_body_com("target"), # only return target to make problem harder
[False], # step
])
- # @property
- # def active_obs(self):
- # return np.concatenate([
- # [True] * self.n_links, # cos, True
- # [True] * self.n_links, # sin, True
- # [True] * 2, # goal position
- # [True] * self.n_links, # angular velocity, True
- # [True] * 3, # goal distance
- # # self.get_body_com("target"), # only return target to make problem harder
- # [False], # step
- # ])
+ @property
+ def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.env.sim.data.qpos.flat[:self.env.n_links]
@property
- def current_vel(self) -> Union[float, int, np.ndarray]:
- return self.sim.data.qvel.flat[:self.n_links]
-
- @property
- def current_pos(self) -> Union[float, int, np.ndarray]:
- return self.sim.data.qpos.flat[:self.n_links]
-
- @property
- def dt(self) -> Union[float, int]:
- return self.env.dt
\ No newline at end of file
+ def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.env.sim.data.qvel.flat[:self.env.n_links]
diff --git a/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py b/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py
deleted file mode 100644
index 6b50d80..0000000
--- a/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py
+++ /dev/null
@@ -1,28 +0,0 @@
-from alr_envs.mp.black_box_wrapper import BlackBoxWrapper
-from typing import Union, Tuple
-import numpy as np
-
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
-
-
-class MPWrapper(RawInterfaceWrapper):
-
- @property
- def context_mask(self):
- return np.concatenate([
- [False] * self.env.n_links, # cos
- [False] * self.env.n_links, # sin
- [True] * 2, # goal position
- [False] * self.env.n_links, # angular velocity
- [False] * 3, # goal distance
- # self.get_body_com("target"), # only return target to make problem harder
- [False], # step
- ])
-
- @property
- def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.env.sim.data.qpos.flat[:self.env.n_links]
-
- @property
- def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.env.sim.data.qvel.flat[:self.env.n_links]
diff --git a/alr_envs/mp/black_box_wrapper.py b/alr_envs/mp/black_box_wrapper.py
index 0c2a7c8..f1ba41f 100644
--- a/alr_envs/mp/black_box_wrapper.py
+++ b/alr_envs/mp/black_box_wrapper.py
@@ -11,7 +11,7 @@ from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
from alr_envs.utils.utils import get_numpy
-class BlackBoxWrapper(gym.ObservationWrapper, ABC):
+class BlackBoxWrapper(gym.ObservationWrapper):
def __init__(self,
env: RawInterfaceWrapper,
@@ -34,9 +34,8 @@ class BlackBoxWrapper(gym.ObservationWrapper, ABC):
reward_aggregation: function that takes the np.ndarray of step rewards as input and returns the trajectory
reward, default summation over all values.
"""
- super().__init__()
+ super().__init__(env)
- self.env = env
self.duration = duration
self.learn_sub_trajectories = learn_sub_trajectories
self.replanning_schedule = replanning_schedule
From f31d85451f7b3c153dff9fd61096cb3c4ef81649 Mon Sep 17 00:00:00 2001
From: Onur
Date: Thu, 30 Jun 2022 14:55:34 +0200
Subject: [PATCH 050/104] adjust env registries in __init__
---
alr_envs/alr/__init__.py | 68 +++++--------------
.../viapoint_reacher/__init__.py | 2 +-
alr_envs/alr/mujoco/ant_jump/__init__.py | 3 +-
.../alr/mujoco/ant_jump/new_mp_wrapper.py | 2 +-
alr_envs/alr/mujoco/beerpong/__init__.py | 3 +-
.../alr/mujoco/beerpong/new_mp_wrapper.py | 2 +-
.../alr/mujoco/half_cheetah_jump/__init__.py | 2 +-
alr_envs/alr/mujoco/hopper_jump/__init__.py | 3 +-
.../alr/mujoco/hopper_jump/new_mp_wrapper.py | 4 +-
alr_envs/alr/mujoco/reacher/__init__.py | 3 +-
.../alr/mujoco/walker_2d_jump/__init__.py | 2 +-
11 files changed, 29 insertions(+), 65 deletions(-)
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 607ef18..435cfdb 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -25,21 +25,13 @@ ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
DEFAULT_MP_ENV_DICT = {
"name": 'EnvName',
"wrappers": [],
- # TODO move scale to traj_gen
- "ep_wrapper_kwargs": {
- "weight_scale": 1
+ "traj_gen_kwargs": {
+ "weight_scale": 1,
+ 'movement_primitives_type': 'promp'
},
- # TODO traj_gen_kwargs
- # TODO remove action_dim
- "movement_primitives_kwargs": {
- 'movement_primitives_type': 'promp',
- 'action_dim': 7
- },
- # TODO remove tau
"phase_generator_kwargs": {
'phase_generator_type': 'linear',
'delay': 0,
- 'tau': 1.5, # initial value
'learn_tau': False,
'learn_delay': False
},
@@ -51,7 +43,7 @@ DEFAULT_MP_ENV_DICT = {
"basis_generator_kwargs": {
'basis_generator_type': 'zero_rbf',
'num_basis': 5,
- 'num_basis_zero_start': 2 # TODO: Change to 1
+ 'num_basis_zero_start': 1
}
}
@@ -370,9 +362,7 @@ for _v in _versions:
_env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_simple_reacher_promp = deepcopy(DEFAULT_MP_ENV_DICT)
- kwargs_dict_simple_reacher_promp['wrappers'].append('TODO') # TODO
- kwargs_dict_simple_reacher_promp['movement_primitives_kwargs']['action_dim'] = 2 if "long" not in _v.lower() else 5
- kwargs_dict_simple_reacher_promp['phase_generator_kwargs']['tau'] = 2
+ kwargs_dict_simple_reacher_promp['wrappers'].append(classic_control.simple_reacher.MPWrapper)
kwargs_dict_simple_reacher_promp['controller_kwargs']['p_gains'] = 0.6
kwargs_dict_simple_reacher_promp['controller_kwargs']['d_gains'] = 0.075
kwargs_dict_simple_reacher_promp['name'] = _env_id
@@ -405,7 +395,7 @@ register(
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("ViaPointReacherDMP-v0")
kwargs_dict_via_point_reacher_promp = deepcopy(DEFAULT_MP_ENV_DICT)
-kwargs_dict_via_point_reacher_promp['wrappers'].append('TODO') # TODO
+kwargs_dict_via_point_reacher_promp['wrappers'].append(classic_control.viapoint_reacher.MPWrapper)
kwargs_dict_via_point_reacher_promp['controller_kwargs']['controller_type'] = 'velocity'
kwargs_dict_via_point_reacher_promp['name'] = "ViaPointReacherProMP-v0"
register(
@@ -444,12 +434,9 @@ for _v in _versions:
_env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_hole_reacher_promp = deepcopy(DEFAULT_MP_ENV_DICT)
- kwargs_dict_hole_reacher_promp['wrappers'].append('TODO') # TODO
- kwargs_dict_hole_reacher_promp['ep_wrapper_kwargs']['weight_scale'] = 2
- # kwargs_dict_hole_reacher_promp['movement_primitives_kwargs']['action_dim'] = 5
- # kwargs_dict_hole_reacher_promp['phase_generator_kwargs']['tau'] = 2
+ kwargs_dict_hole_reacher_promp['wrappers'].append(classic_control.hole_reacher.MPWrapper)
+ kwargs_dict_hole_reacher_promp['traj_gen_kwargs']['weight_scale'] = 2
kwargs_dict_hole_reacher_promp['controller_kwargs']['controller_type'] = 'velocity'
- # kwargs_dict_hole_reacher_promp['basis_generator_kwargs']['num_basis'] = 5
kwargs_dict_hole_reacher_promp['name'] = f"alr_envs:{_v}"
register(
id=_env_id,
@@ -489,9 +476,7 @@ for _v in _versions:
_env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_alr_reacher_promp = deepcopy(DEFAULT_MP_ENV_DICT)
- kwargs_dict_alr_reacher_promp['wrappers'].append('TODO') # TODO
- kwargs_dict_alr_reacher_promp['movement_primitives_kwargs']['action_dim'] = 5 if "long" not in _v.lower() else 7
- kwargs_dict_alr_reacher_promp['phase_generator_kwargs']['tau'] = 4
+ kwargs_dict_alr_reacher_promp['wrappers'].append(mujoco.reacher.MPWrapper)
kwargs_dict_alr_reacher_promp['controller_kwargs']['p_gains'] = 1
kwargs_dict_alr_reacher_promp['controller_kwargs']['d_gains'] = 0.1
kwargs_dict_alr_reacher_promp['name'] = f"alr_envs:{_v}"
@@ -509,13 +494,12 @@ for _v in _versions:
_name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_bp_promp = deepcopy(DEFAULT_MP_ENV_DICT)
- kwargs_dict_bp_promp['wrappers'].append(mujoco.beerpong.NewMPWrapper)
- kwargs_dict_bp_promp['movement_primitives_kwargs']['action_dim'] = 7
- kwargs_dict_bp_promp['phase_generator_kwargs']['tau'] = 0.8
+ kwargs_dict_bp_promp['wrappers'].append(mujoco.beerpong.MPWrapper)
kwargs_dict_bp_promp['phase_generator_kwargs']['learn_tau'] = True
kwargs_dict_bp_promp['controller_kwargs']['p_gains'] = np.array([1.5, 5, 2.55, 3, 2., 2, 1.25])
kwargs_dict_bp_promp['controller_kwargs']['d_gains'] = np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125])
kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis'] = 2
+ kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis_zero_start'] = 2
kwargs_dict_bp_promp['name'] = f"alr_envs:{_v}"
register(
id=_env_id,
@@ -530,12 +514,12 @@ for _v in _versions:
_name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_bp_promp = deepcopy(DEFAULT_MP_ENV_DICT)
- kwargs_dict_bp_promp['wrappers'].append(mujoco.beerpong.NewMPWrapper)
- kwargs_dict_bp_promp['movement_primitives_kwargs']['action_dim'] = 7
+ kwargs_dict_bp_promp['wrappers'].append(mujoco.beerpong.MPWrapper)
kwargs_dict_bp_promp['phase_generator_kwargs']['tau'] = 0.62
kwargs_dict_bp_promp['controller_kwargs']['p_gains'] = np.array([1.5, 5, 2.55, 3, 2., 2, 1.25])
kwargs_dict_bp_promp['controller_kwargs']['d_gains'] = np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125])
kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis'] = 2
+ kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis_zero_start'] = 2
kwargs_dict_bp_promp['name'] = f"alr_envs:{_v}"
register(
id=_env_id,
@@ -555,11 +539,7 @@ for _v in _versions:
_name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_ant_jump_promp = deepcopy(DEFAULT_MP_ENV_DICT)
- kwargs_dict_ant_jump_promp['wrappers'].append(mujoco.ant_jump.NewMPWrapper)
- kwargs_dict_ant_jump_promp['movement_primitives_kwargs']['action_dim'] = 8
- kwargs_dict_ant_jump_promp['phase_generator_kwargs']['tau'] = 10
- kwargs_dict_ant_jump_promp['controller_kwargs']['p_gains'] = np.ones(8)
- kwargs_dict_ant_jump_promp['controller_kwargs']['d_gains'] = 0.1 * np.ones(8)
+ kwargs_dict_ant_jump_promp['wrappers'].append(mujoco.ant_jump.MPWrapper)
kwargs_dict_ant_jump_promp['name'] = f"alr_envs:{_v}"
register(
id=_env_id,
@@ -576,11 +556,7 @@ for _v in _versions:
_name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_halfcheetah_jump_promp = deepcopy(DEFAULT_MP_ENV_DICT)
- kwargs_dict_halfcheetah_jump_promp['wrappers'].append(mujoco.ant_jump.NewMPWrapper)
- kwargs_dict_halfcheetah_jump_promp['movement_primitives_kwargs']['action_dim'] = 6
- kwargs_dict_halfcheetah_jump_promp['phase_generator_kwargs']['tau'] = 5
- kwargs_dict_halfcheetah_jump_promp['controller_kwargs']['p_gains'] = np.ones(6)
- kwargs_dict_halfcheetah_jump_promp['controller_kwargs']['d_gains'] = 0.1 * np.ones(6)
+ kwargs_dict_halfcheetah_jump_promp['wrappers'].append(mujoco.half_cheetah_jump.MPWrapper)
kwargs_dict_halfcheetah_jump_promp['name'] = f"alr_envs:{_v}"
register(
id=_env_id,
@@ -595,16 +571,12 @@ for _v in _versions:
## HopperJump
_versions = ['ALRHopperJump-v0', 'ALRHopperJumpRndmJointsDesPos-v0', 'ALRHopperJumpRndmJointsDesPosStepBased-v0',
'ALRHopperJumpOnBox-v0', 'ALRHopperThrow-v0', 'ALRHopperThrowInBasket-v0']
-
+# TODO: Check if all environments work with the same MPWrapper
for _v in _versions:
_name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_hopper_jump_promp = deepcopy(DEFAULT_MP_ENV_DICT)
- kwargs_dict_hopper_jump_promp['wrappers'].append('TODO') # TODO
- kwargs_dict_hopper_jump_promp['movement_primitives_kwargs']['action_dim'] = 3
- kwargs_dict_hopper_jump_promp['phase_generator_kwargs']['tau'] = 2
- kwargs_dict_hopper_jump_promp['controller_kwargs']['p_gains'] = np.ones(3)
- kwargs_dict_hopper_jump_promp['controller_kwargs']['d_gains'] = 0.1 * np.ones(3)
+ kwargs_dict_hopper_jump_promp['wrappers'].append(mujoco.hopper_jump.MPWrapper)
kwargs_dict_hopper_jump_promp['name'] = f"alr_envs:{_v}"
register(
id=_env_id,
@@ -622,11 +594,7 @@ for _v in _versions:
_name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_walker2d_jump_promp = deepcopy(DEFAULT_MP_ENV_DICT)
- kwargs_dict_walker2d_jump_promp['wrappers'].append('TODO') # TODO
- kwargs_dict_walker2d_jump_promp['movement_primitives_kwargs']['action_dim'] = 6
- kwargs_dict_walker2d_jump_promp['phase_generator_kwargs']['tau'] = 2.4
- kwargs_dict_walker2d_jump_promp['controller_kwargs']['p_gains'] = np.ones(6)
- kwargs_dict_walker2d_jump_promp['controller_kwargs']['d_gains'] = 0.1 * np.ones(6)
+ kwargs_dict_walker2d_jump_promp['wrappers'].append(mujoco.walker_2d_jump.MPWrapper)
kwargs_dict_walker2d_jump_promp['name'] = f"alr_envs:{_v}"
register(
id=_env_id,
diff --git a/alr_envs/alr/classic_control/viapoint_reacher/__init__.py b/alr_envs/alr/classic_control/viapoint_reacher/__init__.py
index 989b5a9..a919c3a 100644
--- a/alr_envs/alr/classic_control/viapoint_reacher/__init__.py
+++ b/alr_envs/alr/classic_control/viapoint_reacher/__init__.py
@@ -1 +1 @@
-from .mp_wrapper import MPWrapper
\ No newline at end of file
+from new_mp_wrapper import MPWrapper
diff --git a/alr_envs/alr/mujoco/ant_jump/__init__.py b/alr_envs/alr/mujoco/ant_jump/__init__.py
index 5d15867..8a04a02 100644
--- a/alr_envs/alr/mujoco/ant_jump/__init__.py
+++ b/alr_envs/alr/mujoco/ant_jump/__init__.py
@@ -1,2 +1 @@
-from .mp_wrapper import MPWrapper
-from .new_mp_wrapper import NewMPWrapper
+from .new_mp_wrapper import MPWrapper
diff --git a/alr_envs/alr/mujoco/ant_jump/new_mp_wrapper.py b/alr_envs/alr/mujoco/ant_jump/new_mp_wrapper.py
index c12aa56..0886065 100644
--- a/alr_envs/alr/mujoco/ant_jump/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/ant_jump/new_mp_wrapper.py
@@ -5,7 +5,7 @@ import numpy as np
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
-class NewMPWrapper(RawInterfaceWrapper):
+class MPWrapper(RawInterfaceWrapper):
def get_context_mask(self):
return np.hstack([
diff --git a/alr_envs/alr/mujoco/beerpong/__init__.py b/alr_envs/alr/mujoco/beerpong/__init__.py
index 18e33ce..8a04a02 100644
--- a/alr_envs/alr/mujoco/beerpong/__init__.py
+++ b/alr_envs/alr/mujoco/beerpong/__init__.py
@@ -1,2 +1 @@
-from .mp_wrapper import MPWrapper
-from .new_mp_wrapper import NewMPWrapper
\ No newline at end of file
+from .new_mp_wrapper import MPWrapper
diff --git a/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py b/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
index 0df1a7c..2969b82 100644
--- a/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
@@ -5,7 +5,7 @@ import numpy as np
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
-class NewMPWrapper(RawInterfaceWrapper):
+class MPWrapper(RawInterfaceWrapper):
@property
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.sim.data.qpos[0:7].copy()
diff --git a/alr_envs/alr/mujoco/half_cheetah_jump/__init__.py b/alr_envs/alr/mujoco/half_cheetah_jump/__init__.py
index c5e6d2f..8a04a02 100644
--- a/alr_envs/alr/mujoco/half_cheetah_jump/__init__.py
+++ b/alr_envs/alr/mujoco/half_cheetah_jump/__init__.py
@@ -1 +1 @@
-from .mp_wrapper import MPWrapper
+from .new_mp_wrapper import MPWrapper
diff --git a/alr_envs/alr/mujoco/hopper_jump/__init__.py b/alr_envs/alr/mujoco/hopper_jump/__init__.py
index fbffe48..8a04a02 100644
--- a/alr_envs/alr/mujoco/hopper_jump/__init__.py
+++ b/alr_envs/alr/mujoco/hopper_jump/__init__.py
@@ -1,2 +1 @@
-from .mp_wrapper import MPWrapper, HighCtxtMPWrapper
-from .new_mp_wrapper import NewMPWrapper, NewHighCtxtMPWrapper
+from .new_mp_wrapper import MPWrapper
diff --git a/alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py b/alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py
index ccd8f76..b919b22 100644
--- a/alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py
@@ -3,7 +3,7 @@ from typing import Union, Tuple
import numpy as np
-class NewMPWrapper(BlackBoxWrapper):
+class MPWrapper(BlackBoxWrapper):
@property
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.sim.data.qpos[3:6].copy()
@@ -30,7 +30,7 @@ class NewMPWrapper(BlackBoxWrapper):
])
-class NewHighCtxtMPWrapper(NewMPWrapper):
+class NewHighCtxtMPWrapper(MPWrapper):
def get_context_mask(self):
return np.hstack([
[False] * (2 + int(not self.env.exclude_current_positions_from_observation)), # position
diff --git a/alr_envs/alr/mujoco/reacher/__init__.py b/alr_envs/alr/mujoco/reacher/__init__.py
index c1a25d3..8a04a02 100644
--- a/alr_envs/alr/mujoco/reacher/__init__.py
+++ b/alr_envs/alr/mujoco/reacher/__init__.py
@@ -1,2 +1 @@
-from .mp_wrapper import MPWrapper
-from .new_mp_wrapper import MPWrapper as NewMPWrapper
\ No newline at end of file
+from .new_mp_wrapper import MPWrapper
diff --git a/alr_envs/alr/mujoco/walker_2d_jump/__init__.py b/alr_envs/alr/mujoco/walker_2d_jump/__init__.py
index 989b5a9..8a04a02 100644
--- a/alr_envs/alr/mujoco/walker_2d_jump/__init__.py
+++ b/alr_envs/alr/mujoco/walker_2d_jump/__init__.py
@@ -1 +1 @@
-from .mp_wrapper import MPWrapper
\ No newline at end of file
+from .new_mp_wrapper import MPWrapper
From fea2ae7d11548654e505f8e28a8d3b0748adcc83 Mon Sep 17 00:00:00 2001
From: Fabian
Date: Thu, 30 Jun 2022 17:33:05 +0200
Subject: [PATCH 051/104] current state
---
alr_envs/alr/__init__.py | 190 +++++-------------
.../hole_reacher/mp_wrapper.py | 27 +++
.../simple_reacher/mp_wrapper.py | 2 +-
.../viapoint_reacher/mp_wrapper.py | 2 +-
alr_envs/alr/mujoco/__init__.py | 2 +-
alr_envs/alr/mujoco/ant_jump/mp_wrapper.py | 2 +-
.../alr/mujoco/ant_jump/new_mp_wrapper.py | 4 +-
.../ball_in_a_cup/ball_in_a_cup_mp_wrapper.py | 2 +-
alr_envs/alr/mujoco/beerpong/mp_wrapper.py | 2 +-
.../alr/mujoco/beerpong/new_mp_wrapper.py | 2 +-
.../mujoco/half_cheetah_jump/mp_wrapper.py | 2 +-
.../half_cheetah_jump/new_mp_wrapper.py | 2 +-
alr_envs/alr/mujoco/hopper_jump/__init__.py | 2 +-
.../alr/mujoco/hopper_jump/hopper_jump.py | 147 +++++++-------
alr_envs/alr/mujoco/hopper_jump/mp_wrapper.py | 52 +----
.../alr/mujoco/hopper_jump/new_mp_wrapper.py | 45 -----
.../alr/mujoco/hopper_throw/hopper_throw.py | 2 +-
.../alr/mujoco/hopper_throw/mp_wrapper.py | 2 +-
.../alr/mujoco/hopper_throw/new_mp_wrapper.py | 2 +-
alr_envs/alr/mujoco/reacher/alr_reacher.py | 152 --------------
alr_envs/alr/mujoco/reacher/balancing.py | 53 -----
alr_envs/alr/mujoco/reacher/mp_wrapper.py | 2 +-
alr_envs/alr/mujoco/reacher/reacher.py | 105 ++++++++++
.../alr/mujoco/table_tennis/mp_wrapper.py | 2 +-
.../alr/mujoco/walker_2d_jump/mp_wrapper.py | 2 +-
.../mujoco/walker_2d_jump/new_mp_wrapper.py | 2 +-
alr_envs/{mp => black_box}/__init__.py | 0
.../{mp => black_box}/black_box_wrapper.py | 14 +-
.../controller}/__init__.py | 0
.../controller}/base_controller.py | 0
.../controller}/controller_factory.py | 8 +-
.../controller}/meta_world_controller.py | 2 +-
.../controller}/pd_controller.py | 2 +-
.../controller}/pos_controller.py | 2 +-
.../controller}/vel_controller.py | 2 +-
alr_envs/black_box/factory/__init__.py | 0
.../factory}/basis_generator_factory.py | 0
.../factory}/phase_generator_factory.py | 2 +-
.../factory/trajectory_generator_factory.py} | 4 +-
.../raw_interface_wrapper.py | 0
.../dmc/manipulation/reach_site/mp_wrapper.py | 2 +-
alr_envs/dmc/suite/ball_in_cup/mp_wrapper.py | 2 +-
alr_envs/dmc/suite/cartpole/mp_wrapper.py | 2 +-
alr_envs/dmc/suite/reacher/mp_wrapper.py | 2 +-
alr_envs/meta/goal_change_mp_wrapper.py | 2 +-
.../goal_endeffector_change_mp_wrapper.py | 2 +-
.../meta/goal_object_change_mp_wrapper.py | 2 +-
alr_envs/meta/object_change_mp_wrapper.py | 2 +-
.../continuous_mountain_car/mp_wrapper.py | 2 +-
.../open_ai/mujoco/reacher_v2/mp_wrapper.py | 2 +-
alr_envs/open_ai/robotics/fetch/mp_wrapper.py | 2 +-
alr_envs/utils/make_env_helpers.py | 15 +-
52 files changed, 325 insertions(+), 557 deletions(-)
create mode 100644 alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py
delete mode 100644 alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py
delete mode 100644 alr_envs/alr/mujoco/reacher/alr_reacher.py
delete mode 100644 alr_envs/alr/mujoco/reacher/balancing.py
create mode 100644 alr_envs/alr/mujoco/reacher/reacher.py
rename alr_envs/{mp => black_box}/__init__.py (100%)
rename alr_envs/{mp => black_box}/black_box_wrapper.py (94%)
rename alr_envs/{mp/controllers => black_box/controller}/__init__.py (100%)
rename alr_envs/{mp/controllers => black_box/controller}/base_controller.py (100%)
rename alr_envs/{mp/controllers => black_box/controller}/controller_factory.py (60%)
rename alr_envs/{mp/controllers => black_box/controller}/meta_world_controller.py (92%)
rename alr_envs/{mp/controllers => black_box/controller}/pd_controller.py (93%)
rename alr_envs/{mp/controllers => black_box/controller}/pos_controller.py (77%)
rename alr_envs/{mp/controllers => black_box/controller}/vel_controller.py (77%)
create mode 100644 alr_envs/black_box/factory/__init__.py
rename alr_envs/{mp => black_box/factory}/basis_generator_factory.py (100%)
rename alr_envs/{mp => black_box/factory}/phase_generator_factory.py (93%)
rename alr_envs/{mp/mp_factory.py => black_box/factory/trajectory_generator_factory.py} (91%)
rename alr_envs/{mp => black_box}/raw_interface_wrapper.py (100%)
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 435cfdb..cf009a4 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -1,33 +1,31 @@
-import numpy as np
-from gym import register
from copy import deepcopy
+import numpy as np
+from gym import register
+
+from alr_envs.alr.mujoco.table_tennis.tt_gym import MAX_EPISODE_STEPS
from . import classic_control, mujoco
from .classic_control.hole_reacher.hole_reacher import HoleReacherEnv
from .classic_control.simple_reacher.simple_reacher import SimpleReacherEnv
from .classic_control.viapoint_reacher.viapoint_reacher import ViaPointReacherEnv
+from .mujoco.ant_jump.ant_jump import MAX_EPISODE_STEPS_ANTJUMP
from .mujoco.ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
from .mujoco.ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
-from .mujoco.reacher.alr_reacher import ALRReacherEnv
-from .mujoco.reacher.balancing import BalancingEnv
-
-from alr_envs.alr.mujoco.table_tennis.tt_gym import MAX_EPISODE_STEPS
-from .mujoco.ant_jump.ant_jump import MAX_EPISODE_STEPS_ANTJUMP
from .mujoco.half_cheetah_jump.half_cheetah_jump import MAX_EPISODE_STEPS_HALFCHEETAHJUMP
from .mujoco.hopper_jump.hopper_jump import MAX_EPISODE_STEPS_HOPPERJUMP
from .mujoco.hopper_jump.hopper_jump_on_box import MAX_EPISODE_STEPS_HOPPERJUMPONBOX
from .mujoco.hopper_throw.hopper_throw import MAX_EPISODE_STEPS_HOPPERTHROW
from .mujoco.hopper_throw.hopper_throw_in_basket import MAX_EPISODE_STEPS_HOPPERTHROWINBASKET
+from .mujoco.reacher.reacher import ReacherEnv
from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
-DEFAULT_MP_ENV_DICT = {
+DEFAULT_BB_DICT = {
"name": 'EnvName',
"wrappers": [],
"traj_gen_kwargs": {
- "weight_scale": 1,
- 'movement_primitives_type': 'promp'
+ 'trajectory_generator_type': 'promp'
},
"phase_generator_kwargs": {
'phase_generator_type': 'linear',
@@ -100,80 +98,47 @@ register(
# Mujoco
## Reacher
+for _dims in [5, 7]:
+ register(
+ id=f'Reacher{_dims}d-v0',
+ entry_point='alr_envs.alr.mujoco:ReacherEnv',
+ max_episode_steps=200,
+ kwargs={
+ "n_links": _dims,
+ }
+ )
+
+ register(
+ id=f'Reacher{_dims}dSparse-v0',
+ entry_point='alr_envs.alr.mujoco:ReacherEnv',
+ max_episode_steps=200,
+ kwargs={
+ "sparse": True,
+ "n_links": _dims,
+ }
+ )
+
+## Hopper Jump random joints and desired position
register(
- id='ALRReacher-v0',
- entry_point='alr_envs.alr.mujoco:ALRReacherEnv',
- max_episode_steps=200,
+ id='HopperJumpSparse-v0',
+ entry_point='alr_envs.alr.mujoco:ALRHopperXYJumpEnv',
+ max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
kwargs={
- "steps_before_reward": 0,
- "n_links": 5,
- "balance": False,
+ # "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP,
+ "sparse": True,
+ # "healthy_reward": 1.0
}
)
+## Hopper Jump random joints and desired position step based reward
register(
- id='ALRReacherSparse-v0',
- entry_point='alr_envs.alr.mujoco:ALRReacherEnv',
- max_episode_steps=200,
+ id='HopperJump-v0',
+ entry_point='alr_envs.alr.mujoco:ALRHopperXYJumpEnvStepBased',
+ max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
kwargs={
- "steps_before_reward": 200,
- "n_links": 5,
- "balance": False,
- }
-)
-
-register(
- id='ALRReacherSparseOptCtrl-v0',
- entry_point='alr_envs.alr.mujoco:ALRReacherOptCtrlEnv',
- max_episode_steps=200,
- kwargs={
- "steps_before_reward": 200,
- "n_links": 5,
- "balance": False,
- }
-)
-
-register(
- id='ALRReacherSparseBalanced-v0',
- entry_point='alr_envs.alr.mujoco:ALRReacherEnv',
- max_episode_steps=200,
- kwargs={
- "steps_before_reward": 200,
- "n_links": 5,
- "balance": True,
- }
-)
-
-register(
- id='ALRLongReacher-v0',
- entry_point='alr_envs.alr.mujoco:ALRReacherEnv',
- max_episode_steps=200,
- kwargs={
- "steps_before_reward": 0,
- "n_links": 7,
- "balance": False,
- }
-)
-
-register(
- id='ALRLongReacherSparse-v0',
- entry_point='alr_envs.alr.mujoco:ALRReacherEnv',
- max_episode_steps=200,
- kwargs={
- "steps_before_reward": 200,
- "n_links": 7,
- "balance": False,
- }
-)
-
-register(
- id='ALRLongReacherSparseBalanced-v0',
- entry_point='alr_envs.alr.mujoco:ALRReacherEnv',
- max_episode_steps=200,
- kwargs={
- "steps_before_reward": 200,
- "n_links": 7,
- "balance": True,
+ # "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP,
+ "sparse": False,
+ # "healthy_reward": 1.0
}
)
@@ -198,41 +163,7 @@ register(
)
register(
- id='ALRHopperJump-v0',
- entry_point='alr_envs.alr.mujoco:ALRHopperJumpEnv',
- max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
- kwargs={
- "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP,
- "context": True
- }
-)
-
-#### Hopper Jump random joints and des position
-register(
- id='ALRHopperJumpRndmJointsDesPos-v0',
- entry_point='alr_envs.alr.mujoco:ALRHopperXYJumpEnv',
- max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
- kwargs={
- "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP,
- "context": True,
- "healthy_reward": 1.0
- }
-)
-
-##### Hopper Jump random joints and des position step based reward
-register(
- id='ALRHopperJumpRndmJointsDesPosStepBased-v0',
- entry_point='alr_envs.alr.mujoco:ALRHopperXYJumpEnvStepBased',
- max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
- kwargs={
- "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP,
- "context": True,
- "healthy_reward": 1.0
- }
-)
-
-register(
- id='ALRHopperJumpOnBox-v0',
+ id='HopperJumpOnBox-v0',
entry_point='alr_envs.alr.mujoco:ALRHopperJumpOnBoxEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX,
kwargs={
@@ -271,17 +202,6 @@ register(
}
)
-## Balancing Reacher
-
-register(
- id='Balancing-v0',
- entry_point='alr_envs.alr.mujoco:BalancingEnv',
- max_episode_steps=200,
- kwargs={
- "n_links": 5,
- }
-)
-
## Table Tennis
register(id='TableTennis2DCtxt-v0',
entry_point='alr_envs.alr.mujoco:TTEnvGym',
@@ -361,7 +281,7 @@ for _v in _versions:
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
_env_id = f'{_name[0]}ProMP-{_name[1]}'
- kwargs_dict_simple_reacher_promp = deepcopy(DEFAULT_MP_ENV_DICT)
+ kwargs_dict_simple_reacher_promp = deepcopy(DEFAULT_BB_DICT)
kwargs_dict_simple_reacher_promp['wrappers'].append(classic_control.simple_reacher.MPWrapper)
kwargs_dict_simple_reacher_promp['controller_kwargs']['p_gains'] = 0.6
kwargs_dict_simple_reacher_promp['controller_kwargs']['d_gains'] = 0.075
@@ -394,7 +314,7 @@ register(
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("ViaPointReacherDMP-v0")
-kwargs_dict_via_point_reacher_promp = deepcopy(DEFAULT_MP_ENV_DICT)
+kwargs_dict_via_point_reacher_promp = deepcopy(DEFAULT_BB_DICT)
kwargs_dict_via_point_reacher_promp['wrappers'].append(classic_control.viapoint_reacher.MPWrapper)
kwargs_dict_via_point_reacher_promp['controller_kwargs']['controller_type'] = 'velocity'
kwargs_dict_via_point_reacher_promp['name'] = "ViaPointReacherProMP-v0"
@@ -433,7 +353,7 @@ for _v in _versions:
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
_env_id = f'{_name[0]}ProMP-{_name[1]}'
- kwargs_dict_hole_reacher_promp = deepcopy(DEFAULT_MP_ENV_DICT)
+ kwargs_dict_hole_reacher_promp = deepcopy(DEFAULT_BB_DICT)
kwargs_dict_hole_reacher_promp['wrappers'].append(classic_control.hole_reacher.MPWrapper)
kwargs_dict_hole_reacher_promp['traj_gen_kwargs']['weight_scale'] = 2
kwargs_dict_hole_reacher_promp['controller_kwargs']['controller_type'] = 'velocity'
@@ -475,7 +395,7 @@ for _v in _versions:
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
_env_id = f'{_name[0]}ProMP-{_name[1]}'
- kwargs_dict_alr_reacher_promp = deepcopy(DEFAULT_MP_ENV_DICT)
+ kwargs_dict_alr_reacher_promp = deepcopy(DEFAULT_BB_DICT)
kwargs_dict_alr_reacher_promp['wrappers'].append(mujoco.reacher.MPWrapper)
kwargs_dict_alr_reacher_promp['controller_kwargs']['p_gains'] = 1
kwargs_dict_alr_reacher_promp['controller_kwargs']['d_gains'] = 0.1
@@ -493,7 +413,7 @@ _versions = ['ALRBeerPong-v0']
for _v in _versions:
_name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}'
- kwargs_dict_bp_promp = deepcopy(DEFAULT_MP_ENV_DICT)
+ kwargs_dict_bp_promp = deepcopy(DEFAULT_BB_DICT)
kwargs_dict_bp_promp['wrappers'].append(mujoco.beerpong.MPWrapper)
kwargs_dict_bp_promp['phase_generator_kwargs']['learn_tau'] = True
kwargs_dict_bp_promp['controller_kwargs']['p_gains'] = np.array([1.5, 5, 2.55, 3, 2., 2, 1.25])
@@ -513,7 +433,7 @@ _versions = ["ALRBeerPongStepBased-v0", "ALRBeerPongFixedRelease-v0"]
for _v in _versions:
_name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}'
- kwargs_dict_bp_promp = deepcopy(DEFAULT_MP_ENV_DICT)
+ kwargs_dict_bp_promp = deepcopy(DEFAULT_BB_DICT)
kwargs_dict_bp_promp['wrappers'].append(mujoco.beerpong.MPWrapper)
kwargs_dict_bp_promp['phase_generator_kwargs']['tau'] = 0.62
kwargs_dict_bp_promp['controller_kwargs']['p_gains'] = np.array([1.5, 5, 2.55, 3, 2., 2, 1.25])
@@ -538,7 +458,7 @@ _versions = ['ALRAntJump-v0']
for _v in _versions:
_name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}'
- kwargs_dict_ant_jump_promp = deepcopy(DEFAULT_MP_ENV_DICT)
+ kwargs_dict_ant_jump_promp = deepcopy(DEFAULT_BB_DICT)
kwargs_dict_ant_jump_promp['wrappers'].append(mujoco.ant_jump.MPWrapper)
kwargs_dict_ant_jump_promp['name'] = f"alr_envs:{_v}"
register(
@@ -555,7 +475,7 @@ _versions = ['ALRHalfCheetahJump-v0']
for _v in _versions:
_name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}'
- kwargs_dict_halfcheetah_jump_promp = deepcopy(DEFAULT_MP_ENV_DICT)
+ kwargs_dict_halfcheetah_jump_promp = deepcopy(DEFAULT_BB_DICT)
kwargs_dict_halfcheetah_jump_promp['wrappers'].append(mujoco.half_cheetah_jump.MPWrapper)
kwargs_dict_halfcheetah_jump_promp['name'] = f"alr_envs:{_v}"
register(
@@ -575,7 +495,7 @@ _versions = ['ALRHopperJump-v0', 'ALRHopperJumpRndmJointsDesPos-v0', 'ALRHopperJ
for _v in _versions:
_name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}'
- kwargs_dict_hopper_jump_promp = deepcopy(DEFAULT_MP_ENV_DICT)
+ kwargs_dict_hopper_jump_promp = deepcopy(DEFAULT_BB_DICT)
kwargs_dict_hopper_jump_promp['wrappers'].append(mujoco.hopper_jump.MPWrapper)
kwargs_dict_hopper_jump_promp['name'] = f"alr_envs:{_v}"
register(
@@ -593,7 +513,7 @@ _versions = ['ALRWalker2DJump-v0']
for _v in _versions:
_name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}'
- kwargs_dict_walker2d_jump_promp = deepcopy(DEFAULT_MP_ENV_DICT)
+ kwargs_dict_walker2d_jump_promp = deepcopy(DEFAULT_BB_DICT)
kwargs_dict_walker2d_jump_promp['wrappers'].append(mujoco.walker_2d_jump.MPWrapper)
kwargs_dict_walker2d_jump_promp['name'] = f"alr_envs:{_v}"
register(
@@ -695,7 +615,7 @@ for i in _vs:
_env_id = f'ALRReacher{i}-v0'
register(
id=_env_id,
- entry_point='alr_envs.alr.mujoco:ALRReacherEnv',
+ entry_point='alr_envs.alr.mujoco:ReacherEnv',
max_episode_steps=200,
kwargs={
"steps_before_reward": 0,
@@ -708,7 +628,7 @@ for i in _vs:
_env_id = f'ALRReacherSparse{i}-v0'
register(
id=_env_id,
- entry_point='alr_envs.alr.mujoco:ALRReacherEnv',
+ entry_point='alr_envs.alr.mujoco:ReacherEnv',
max_episode_steps=200,
kwargs={
"steps_before_reward": 200,
diff --git a/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py b/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py
new file mode 100644
index 0000000..19bb0c5
--- /dev/null
+++ b/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py
@@ -0,0 +1,27 @@
+from typing import Tuple, Union
+
+import numpy as np
+
+from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
+
+
+class MPWrapper(RawInterfaceWrapper):
+
+ def get_context_mask(self):
+ return np.hstack([
+ [self.env.random_start] * self.env.n_links, # cos
+ [self.env.random_start] * self.env.n_links, # sin
+ [self.env.random_start] * self.env.n_links, # velocity
+ [self.env.initial_width is None], # hole width
+ # [self.env.hole_depth is None], # hole depth
+ [True] * 2, # x-y coordinates of target distance
+ [False] # env steps
+ ])
+
+ @property
+ def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.env.current_pos
+
+ @property
+ def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.env.current_vel
diff --git a/alr_envs/alr/classic_control/simple_reacher/mp_wrapper.py b/alr_envs/alr/classic_control/simple_reacher/mp_wrapper.py
index 30b0985..c5ef66f 100644
--- a/alr_envs/alr/classic_control/simple_reacher/mp_wrapper.py
+++ b/alr_envs/alr/classic_control/simple_reacher/mp_wrapper.py
@@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
diff --git a/alr_envs/alr/classic_control/viapoint_reacher/mp_wrapper.py b/alr_envs/alr/classic_control/viapoint_reacher/mp_wrapper.py
index 9f40292..2b210de 100644
--- a/alr_envs/alr/classic_control/viapoint_reacher/mp_wrapper.py
+++ b/alr_envs/alr/classic_control/viapoint_reacher/mp_wrapper.py
@@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
diff --git a/alr_envs/alr/mujoco/__init__.py b/alr_envs/alr/mujoco/__init__.py
index f2f4536..df52cfc 100644
--- a/alr_envs/alr/mujoco/__init__.py
+++ b/alr_envs/alr/mujoco/__init__.py
@@ -6,7 +6,7 @@ from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv
from .hopper_throw.hopper_throw import ALRHopperThrowEnv
from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv
-from .reacher.alr_reacher import ALRReacherEnv
+from .reacher.reacher import ReacherEnv
from .reacher.balancing import BalancingEnv
from .table_tennis.tt_gym import TTEnvGym
from .walker_2d_jump.walker_2d_jump import ALRWalker2dJumpEnv
diff --git a/alr_envs/alr/mujoco/ant_jump/mp_wrapper.py b/alr_envs/alr/mujoco/ant_jump/mp_wrapper.py
index 4d5c0d6..f6e99a3 100644
--- a/alr_envs/alr/mujoco/ant_jump/mp_wrapper.py
+++ b/alr_envs/alr/mujoco/ant_jump/mp_wrapper.py
@@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
diff --git a/alr_envs/alr/mujoco/ant_jump/new_mp_wrapper.py b/alr_envs/alr/mujoco/ant_jump/new_mp_wrapper.py
index 0886065..f6f026b 100644
--- a/alr_envs/alr/mujoco/ant_jump/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/ant_jump/new_mp_wrapper.py
@@ -1,8 +1,8 @@
-from alr_envs.mp.black_box_wrapper import BlackBoxWrapper
+from alr_envs.black_box.black_box_wrapper import BlackBoxWrapper
from typing import Union, Tuple
import numpy as np
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
diff --git a/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_mp_wrapper.py b/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_mp_wrapper.py
index 609858b..81a08f5 100644
--- a/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_mp_wrapper.py
@@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class BallInACupMPWrapper(RawInterfaceWrapper):
diff --git a/alr_envs/alr/mujoco/beerpong/mp_wrapper.py b/alr_envs/alr/mujoco/beerpong/mp_wrapper.py
index 40c371b..20e7532 100644
--- a/alr_envs/alr/mujoco/beerpong/mp_wrapper.py
+++ b/alr_envs/alr/mujoco/beerpong/mp_wrapper.py
@@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
diff --git a/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py b/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
index 2969b82..bd22442 100644
--- a/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
@@ -2,7 +2,7 @@ from typing import Union, Tuple
import numpy as np
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
diff --git a/alr_envs/alr/mujoco/half_cheetah_jump/mp_wrapper.py b/alr_envs/alr/mujoco/half_cheetah_jump/mp_wrapper.py
index f9a298a..930da6d 100644
--- a/alr_envs/alr/mujoco/half_cheetah_jump/mp_wrapper.py
+++ b/alr_envs/alr/mujoco/half_cheetah_jump/mp_wrapper.py
@@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
diff --git a/alr_envs/alr/mujoco/half_cheetah_jump/new_mp_wrapper.py b/alr_envs/alr/mujoco/half_cheetah_jump/new_mp_wrapper.py
index d14c9a9..9a65952 100644
--- a/alr_envs/alr/mujoco/half_cheetah_jump/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/half_cheetah_jump/new_mp_wrapper.py
@@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
diff --git a/alr_envs/alr/mujoco/hopper_jump/__init__.py b/alr_envs/alr/mujoco/hopper_jump/__init__.py
index 8a04a02..c5e6d2f 100644
--- a/alr_envs/alr/mujoco/hopper_jump/__init__.py
+++ b/alr_envs/alr/mujoco/hopper_jump/__init__.py
@@ -1 +1 @@
-from .new_mp_wrapper import MPWrapper
+from .mp_wrapper import MPWrapper
diff --git a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
index 025bb8d..78b06d3 100644
--- a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
+++ b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
from gym.envs.mujoco.hopper_v3 import HopperEnv
import numpy as np
import os
@@ -8,10 +10,10 @@ MAX_EPISODE_STEPS_HOPPERJUMP = 250
class ALRHopperJumpEnv(HopperEnv):
"""
Initialization changes to normal Hopper:
- - healthy_reward: 1.0 -> 0.1 -> 0
- - healthy_angle_range: (-0.2, 0.2) -> (-float('inf'), float('inf'))
+ - terminate_when_unhealthy: True -> False
- healthy_z_range: (0.7, float('inf')) -> (0.5, float('inf'))
- - exclude current positions from observatiosn is set to False
+ - healthy_angle_range: (-0.2, 0.2) -> (-float('inf'), float('inf'))
+ - exclude_current_positions_from_observation: True -> False
"""
def __init__(
@@ -19,76 +21,93 @@ class ALRHopperJumpEnv(HopperEnv):
xml_file='hopper_jump.xml',
forward_reward_weight=1.0,
ctrl_cost_weight=1e-3,
- healthy_reward=0.0,
+ healthy_reward=1.0,
penalty=0.0,
- context=True,
terminate_when_unhealthy=False,
healthy_state_range=(-100.0, 100.0),
healthy_z_range=(0.5, float('inf')),
healthy_angle_range=(-float('inf'), float('inf')),
reset_noise_scale=5e-3,
exclude_current_positions_from_observation=False,
- max_episode_steps=250
- ):
+ ):
- self.current_step = 0
+ self._steps = 0
self.max_height = 0
- self.max_episode_steps = max_episode_steps
- self.penalty = penalty
+ # self.penalty = penalty
self.goal = 0
- self.context = context
- self.exclude_current_positions_from_observation = exclude_current_positions_from_observation
+
self._floor_geom_id = None
self._foot_geom_id = None
+
self.contact_with_floor = False
self.init_floor_contact = False
self.has_left_floor = False
self.contact_dist = None
+
xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
exclude_current_positions_from_observation)
def step(self, action):
+ self._steps += 1
+
+ self._floor_geom_id = self.model.geom_name2id('floor')
+ self._foot_geom_id = self.model.geom_name2id('foot_geom')
- self.current_step += 1
self.do_simulation(action, self.frame_skip)
+
height_after = self.get_body_com("torso")[2]
- # site_pos_after = self.sim.data.site_xpos[self.model.site_name2id('foot_site')].copy()
- site_pos_after = self.get_body_com('foot_site')
+ site_pos_after = self.data.get_site_xpos('foot_site')
self.max_height = max(height_after, self.max_height)
+ has_floor_contact = self._is_floor_foot_contact() if not self.contact_with_floor else False
+
+ if not self.init_floor_contact:
+ self.init_floor_contact = has_floor_contact
+ if self.init_floor_contact and not self.has_left_floor:
+ self.has_left_floor = not has_floor_contact
+ if not self.contact_with_floor and self.has_left_floor:
+ self.contact_with_floor = has_floor_contact
+
ctrl_cost = self.control_cost(action)
costs = ctrl_cost
done = False
+ goal_dist = np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0]))
+
+ if self.contact_dist is None and self.contact_with_floor:
+ self.contact_dist = goal_dist
+
rewards = 0
- if self.current_step >= self.max_episode_steps:
- hight_goal_distance = -10 * np.linalg.norm(self.max_height - self.goal) if self.context else self.max_height
- healthy_reward = 0 if self.context else self.healthy_reward * 2 # self.current_step
- height_reward = self._forward_reward_weight * hight_goal_distance # maybe move reward calculation into if structure and define two different _forward_reward_weight variables for context and episodic seperatley
- rewards = height_reward + healthy_reward
+ if self._steps >= MAX_EPISODE_STEPS_HOPPERJUMP:
+ # healthy_reward = 0 if self.context else self.healthy_reward * self._steps
+ healthy_reward = self.healthy_reward * 2 # * self._steps
+ contact_dist = self.contact_dist if self.contact_dist is not None else 5
+ dist_reward = self._forward_reward_weight * (-3 * goal_dist + 10 * self.max_height - 2 * contact_dist)
+ rewards = dist_reward + healthy_reward
observation = self._get_obs()
reward = rewards - costs
-
- info = {
- 'height': height_after,
- 'x_pos': site_pos_after,
- 'max_height': self.max_height,
- 'height_rew': self.max_height,
- 'healthy_reward': self.healthy_reward * 2,
- 'healthy': self.is_healthy
- }
-
+ info = dict(
+ height=height_after,
+ x_pos=site_pos_after,
+ max_height=self.max_height,
+ goal=self.goal,
+ goal_dist=goal_dist,
+ height_rew=self.max_height,
+ healthy_reward=self.healthy_reward * 2,
+ healthy=self.is_healthy,
+ contact_dist=self.contact_dist if self.contact_dist is not None else 0
+ )
return observation, reward, done, info
def _get_obs(self):
return np.append(super()._get_obs(), self.goal)
- def reset(self):
+ def reset(self, *, seed: Optional[int] = None, return_info: bool = False, options: Optional[dict] = None, ):
self.goal = self.np_random.uniform(1.4, 2.16, 1)[0] # 1.3 2.3
self.max_height = 0
- self.current_step = 0
+ self._steps = 0
return super().reset()
# overwrite reset_model to make it deterministic
@@ -106,11 +125,13 @@ class ALRHopperJumpEnv(HopperEnv):
self.contact_dist = None
return observation
- def _contact_checker(self, id_1, id_2):
- for coni in range(0, self.sim.data.ncon):
- con = self.sim.data.contact[coni]
- collision = con.geom1 == id_1 and con.geom2 == id_2
- collision_trans = con.geom1 == id_2 and con.geom2 == id_1
+ def _is_floor_foot_contact(self):
+ floor_geom_id = self.model.geom_name2id('floor')
+ foot_geom_id = self.model.geom_name2id('foot_geom')
+ for i in range(self.data.ncon):
+ contact = self.data.contact[i]
+ collision = contact.geom1 == floor_geom_id and contact.geom2 == foot_geom_id
+ collision_trans = contact.geom1 == foot_geom_id and contact.geom2 == floor_geom_id
if collision or collision_trans:
return True
return False
@@ -122,7 +143,7 @@ class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
self._floor_geom_id = self.model.geom_name2id('floor')
self._foot_geom_id = self.model.geom_name2id('foot_geom')
- self.current_step += 1
+ self._steps += 1
self.do_simulation(action, self.frame_skip)
height_after = self.get_body_com("torso")[2]
site_pos_after = self.sim.data.site_xpos[self.model.site_name2id('foot_site')].copy()
@@ -133,8 +154,8 @@ class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
# self.has_left_floor = not floor_contact if self.init_floor_contact and not self.has_left_floor else self.has_left_floor
# self.contact_with_floor = floor_contact if not self.contact_with_floor and self.has_left_floor else self.contact_with_floor
- floor_contact = self._contact_checker(self._floor_geom_id,
- self._foot_geom_id) if not self.contact_with_floor else False
+ floor_contact = self._is_floor_foot_contact(self._floor_geom_id,
+ self._foot_geom_id) if not self.contact_with_floor else False
if not self.init_floor_contact:
self.init_floor_contact = floor_contact
if self.init_floor_contact and not self.has_left_floor:
@@ -151,9 +172,9 @@ class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
done = False
goal_dist = np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0]))
rewards = 0
- if self.current_step >= self.max_episode_steps:
- # healthy_reward = 0 if self.context else self.healthy_reward * self.current_step
- healthy_reward = self.healthy_reward * 2 # * self.current_step
+ if self._steps >= self.max_episode_steps:
+ # healthy_reward = 0 if self.context else self.healthy_reward * self._steps
+ healthy_reward = self.healthy_reward * 2 # * self._steps
contact_dist = self.contact_dist if self.contact_dist is not None else 5
dist_reward = self._forward_reward_weight * (-3 * goal_dist + 10 * self.max_height - 2 * contact_dist)
rewards = dist_reward + healthy_reward
@@ -170,7 +191,7 @@ class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
'healthy_reward': self.healthy_reward * 2,
'healthy': self.is_healthy,
'contact_dist': self.contact_dist if self.contact_dist is not None else 0
- }
+ }
return observation, reward, done, info
def reset_model(self):
@@ -242,7 +263,7 @@ class ALRHopperXYJumpEnvStepBased(ALRHopperXYJumpEnv):
height_scale=10,
dist_scale=3,
healthy_scale=2
- ):
+ ):
self.height_scale = height_scale
self.dist_scale = dist_scale
self.healthy_scale = healthy_scale
@@ -254,7 +275,7 @@ class ALRHopperXYJumpEnvStepBased(ALRHopperXYJumpEnv):
self._floor_geom_id = self.model.geom_name2id('floor')
self._foot_geom_id = self.model.geom_name2id('foot_geom')
- self.current_step += 1
+ self._steps += 1
self.do_simulation(action, self.frame_skip)
height_after = self.get_body_com("torso")[2]
site_pos_after = self.sim.data.site_xpos[self.model.site_name2id('foot_site')].copy()
@@ -273,8 +294,8 @@ class ALRHopperXYJumpEnvStepBased(ALRHopperXYJumpEnv):
###########################################################
# This is only for logging the distance to goal when first having the contact
##########################################################
- floor_contact = self._contact_checker(self._floor_geom_id,
- self._foot_geom_id) if not self.contact_with_floor else False
+ floor_contact = self._is_floor_foot_contact(self._floor_geom_id,
+ self._foot_geom_id) if not self.contact_with_floor else False
if not self.init_floor_contact:
self.init_floor_contact = floor_contact
if self.init_floor_contact and not self.has_left_floor:
@@ -295,33 +316,5 @@ class ALRHopperXYJumpEnvStepBased(ALRHopperXYJumpEnv):
'healthy_reward': self.healthy_reward * self.healthy_reward,
'healthy': self.is_healthy,
'contact_dist': self.contact_dist if self.contact_dist is not None else 0
- }
+ }
return observation, reward, done, info
-
-
-if __name__ == '__main__':
- render_mode = "human" # "human" or "partial" or "final"
- # env = ALRHopperJumpEnv()
- # env = ALRHopperXYJumpEnv()
- np.random.seed(0)
- env = ALRHopperXYJumpEnvStepBased()
- env.seed(0)
- # env = ALRHopperJumpRndmPosEnv()
- obs = env.reset()
-
- for k in range(1000):
- obs = env.reset()
- print('observation :', obs[:])
- for i in range(200):
- # objective.load_result("/tmp/cma")
- # test with random actions
- ac = env.action_space.sample()
- obs, rew, d, info = env.step(ac)
- # if i % 10 == 0:
- # env.render(mode=render_mode)
- env.render(mode=render_mode)
- if d:
- print('After ', i, ' steps, done: ', d)
- env.reset()
-
- env.close()
diff --git a/alr_envs/alr/mujoco/hopper_jump/mp_wrapper.py b/alr_envs/alr/mujoco/hopper_jump/mp_wrapper.py
index e3279aa..c7b16db 100644
--- a/alr_envs/alr/mujoco/hopper_jump/mp_wrapper.py
+++ b/alr_envs/alr/mujoco/hopper_jump/mp_wrapper.py
@@ -1,57 +1,25 @@
-from typing import Tuple, Union
+from typing import Union, Tuple
import numpy as np
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
- @property
- def context_mask(self) -> np.ndarray:
+
+ # Random x goal + random init pos
+ def context_mask(self):
return np.hstack([
- [False] * (5 + int(not self.exclude_current_positions_from_observation)), # position
+ [False] * (2 + int(not self.exclude_current_positions_from_observation)), # position
+ [True] * 3, # set to true if randomize initial pos
[False] * 6, # velocity
[True]
])
@property
- def current_pos(self) -> Union[float, int, np.ndarray]:
- return self.env.sim.data.qpos[3:6].copy()
+ def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.sim.data.qpos[3:6].copy()
@property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.env.sim.data.qvel[3:6].copy()
-
- @property
- def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
- raise ValueError("Goal position is not available and has to be learnt based on the environment.")
-
- @property
- def dt(self) -> Union[float, int]:
- return self.env.dt
-
-
-class HighCtxtMPWrapper(MPWrapper):
- @property
- def active_obs(self):
- return np.hstack([
- [True] * (5 + int(not self.exclude_current_positions_from_observation)), # position
- [False] * 6, # velocity
- [False]
- ])
-
- @property
- def current_pos(self) -> Union[float, int, np.ndarray]:
- return self.env.sim.data.qpos[3:6].copy()
-
- @property
- def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.env.sim.data.qvel[3:6].copy()
-
- @property
- def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
- raise ValueError("Goal position is not available and has to be learnt based on the environment.")
-
- @property
- def dt(self) -> Union[float, int]:
- return self.env.dt
+ return self.sim.data.qvel[3:6].copy()
diff --git a/alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py b/alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py
deleted file mode 100644
index b919b22..0000000
--- a/alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py
+++ /dev/null
@@ -1,45 +0,0 @@
-from alr_envs.mp.black_box_wrapper import BlackBoxWrapper
-from typing import Union, Tuple
-import numpy as np
-
-
-class MPWrapper(BlackBoxWrapper):
- @property
- def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.env.sim.data.qpos[3:6].copy()
-
- @property
- def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.env.sim.data.qvel[3:6].copy()
-
- # # random goal
- # def set_active_obs(self):
- # return np.hstack([
- # [False] * (5 + int(not self.env.exclude_current_positions_from_observation)), # position
- # [False] * 6, # velocity
- # [True]
- # ])
-
- # Random x goal + random init pos
- def get_context_mask(self):
- return np.hstack([
- [False] * (2 + int(not self.env.exclude_current_positions_from_observation)), # position
- [True] * 3, # set to true if randomize initial pos
- [False] * 6, # velocity
- [True]
- ])
-
-
-class NewHighCtxtMPWrapper(MPWrapper):
- def get_context_mask(self):
- return np.hstack([
- [False] * (2 + int(not self.env.exclude_current_positions_from_observation)), # position
- [True] * 3, # set to true if randomize initial pos
- [False] * 6, # velocity
- [True], # goal
- [False] * 3 # goal diff
- ])
-
- def set_context(self, context):
- return self.get_observation_from_step(self.env.env.set_context(context))
-
diff --git a/alr_envs/alr/mujoco/hopper_throw/hopper_throw.py b/alr_envs/alr/mujoco/hopper_throw/hopper_throw.py
index 03553f2..7ae33d1 100644
--- a/alr_envs/alr/mujoco/hopper_throw/hopper_throw.py
+++ b/alr_envs/alr/mujoco/hopper_throw/hopper_throw.py
@@ -67,7 +67,7 @@ class ALRHopperThrowEnv(HopperEnv):
info = {
'ball_pos': ball_pos_after,
'ball_pos_y': ball_pos_after_y,
- 'current_step' : self.current_step,
+ '_steps' : self.current_step,
'goal' : self.goal,
}
diff --git a/alr_envs/alr/mujoco/hopper_throw/mp_wrapper.py b/alr_envs/alr/mujoco/hopper_throw/mp_wrapper.py
index f5bf08d..7778e8c 100644
--- a/alr_envs/alr/mujoco/hopper_throw/mp_wrapper.py
+++ b/alr_envs/alr/mujoco/hopper_throw/mp_wrapper.py
@@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
diff --git a/alr_envs/alr/mujoco/hopper_throw/new_mp_wrapper.py b/alr_envs/alr/mujoco/hopper_throw/new_mp_wrapper.py
index a8cd696..01d87a4 100644
--- a/alr_envs/alr/mujoco/hopper_throw/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/hopper_throw/new_mp_wrapper.py
@@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
diff --git a/alr_envs/alr/mujoco/reacher/alr_reacher.py b/alr_envs/alr/mujoco/reacher/alr_reacher.py
deleted file mode 100644
index 0699c44..0000000
--- a/alr_envs/alr/mujoco/reacher/alr_reacher.py
+++ /dev/null
@@ -1,152 +0,0 @@
-import os
-
-import numpy as np
-from gym import utils
-from gym.envs.mujoco import MujocoEnv
-
-import alr_envs.utils.utils as alr_utils
-
-
-class ALRReacherEnv(MujocoEnv, utils.EzPickle):
- def __init__(self, steps_before_reward: int = 200, n_links: int = 5, ctrl_cost_weight: int = 1,
- balance: bool = False):
- utils.EzPickle.__init__(**locals())
-
- self._steps = 0
- self.steps_before_reward = steps_before_reward
- self.n_links = n_links
-
- self.balance = balance
- self.balance_weight = 1.0
- self.ctrl_cost_weight = ctrl_cost_weight
-
- self.reward_weight = 1
- if steps_before_reward == 200:
- self.reward_weight = 200
- elif steps_before_reward == 50:
- self.reward_weight = 50
-
- if n_links == 5:
- file_name = 'reacher_5links.xml'
- elif n_links == 7:
- file_name = 'reacher_7links.xml'
- else:
- raise ValueError(f"Invalid number of links {n_links}, only 5 or 7 allowed.")
-
- MujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), "assets", file_name), 2)
-
- def step(self, a):
- self._steps += 1
-
- reward_dist = 0.0
- angular_vel = 0.0
- reward_balance = 0.0
- is_delayed = self.steps_before_reward > 0
- reward_ctrl = - np.square(a).sum() * self.ctrl_cost_weight
- if self._steps >= self.steps_before_reward:
- vec = self.get_body_com("fingertip") - self.get_body_com("target")
- reward_dist -= self.reward_weight * np.linalg.norm(vec)
- if is_delayed:
- # avoid giving this penalty for normal step based case
- # angular_vel -= 10 * np.linalg.norm(self.sim.data.qvel.flat[:self.n_links])
- angular_vel -= 10 * np.square(self.sim.data.qvel.flat[:self.n_links]).sum()
- # if is_delayed:
- # # Higher control penalty for sparse reward per timestep
- # reward_ctrl *= 10
-
- if self.balance:
- reward_balance -= self.balance_weight * np.abs(
- alr_utils.angle_normalize(np.sum(self.sim.data.qpos.flat[:self.n_links]), type="rad"))
-
- reward = reward_dist + reward_ctrl + angular_vel + reward_balance
- self.do_simulation(a, self.frame_skip)
- ob = self._get_obs()
- done = False
- return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl,
- velocity=angular_vel, reward_balance=reward_balance,
- end_effector=self.get_body_com("fingertip").copy(),
- goal=self.goal if hasattr(self, "goal") else None)
-
- def viewer_setup(self):
- self.viewer.cam.trackbodyid = 0
-
- # def reset_model(self):
- # qpos = self.init_qpos
- # if not hasattr(self, "goal"):
- # self.goal = np.array([-0.25, 0.25])
- # # self.goal = self.init_qpos.copy()[:2] + 0.05
- # qpos[-2:] = self.goal
- # qvel = self.init_qvel
- # qvel[-2:] = 0
- # self.set_state(qpos, qvel)
- # self._steps = 0
- #
- # return self._get_obs()
-
- def reset_model(self):
- qpos = self.init_qpos.copy()
- while True:
- # full space
- # self.goal = self.np_random.uniform(low=-self.n_links / 10, high=self.n_links / 10, size=2)
- # I Quadrant
- # self.goal = self.np_random.uniform(low=0, high=self.n_links / 10, size=2)
- # II Quadrant
- # self.goal = np.random.uniform(low=[-self.n_links / 10, 0], high=[0, self.n_links / 10], size=2)
- # II + III Quadrant
- # self.goal = np.random.uniform(low=-self.n_links / 10, high=[0, self.n_links / 10], size=2)
- # I + II Quadrant
- self.goal = np.random.uniform(low=[-self.n_links / 10, 0], high=self.n_links, size=2)
- if np.linalg.norm(self.goal) < self.n_links / 10:
- break
- qpos[-2:] = self.goal
- qvel = self.init_qvel.copy()
- qvel[-2:] = 0
- self.set_state(qpos, qvel)
- self._steps = 0
-
- return self._get_obs()
-
- # def reset_model(self):
- # qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
- # while True:
- # self.goal = self.np_random.uniform(low=-self.n_links / 10, high=self.n_links / 10, size=2)
- # if np.linalg.norm(self.goal) < self.n_links / 10:
- # break
- # qpos[-2:] = self.goal
- # qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
- # qvel[-2:] = 0
- # self.set_state(qpos, qvel)
- # self._steps = 0
- #
- # return self._get_obs()
-
- def _get_obs(self):
- theta = self.sim.data.qpos.flat[:self.n_links]
- target = self.get_body_com("target")
- return np.concatenate([
- np.cos(theta),
- np.sin(theta),
- target[:2], # x-y of goal position
- self.sim.data.qvel.flat[:self.n_links], # angular velocity
- self.get_body_com("fingertip") - target, # goal distance
- [self._steps],
- ])
-
-
-if __name__ == '__main__':
- nl = 5
- render_mode = "human" # "human" or "partial" or "final"
- env = ALRReacherEnv(n_links=nl)
- obs = env.reset()
-
- for i in range(2000):
- # objective.load_result("/tmp/cma")
- # test with random actions
- ac = env.action_space.sample()
- obs, rew, d, info = env.step(ac)
- if i % 10 == 0:
- env.render(mode=render_mode)
- if d:
- env.reset()
-
- env.close()
diff --git a/alr_envs/alr/mujoco/reacher/balancing.py b/alr_envs/alr/mujoco/reacher/balancing.py
deleted file mode 100644
index 3e34298..0000000
--- a/alr_envs/alr/mujoco/reacher/balancing.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import os
-
-import numpy as np
-from gym import utils
-from gym.envs.mujoco import mujoco_env
-
-import alr_envs.utils.utils as alr_utils
-
-
-class BalancingEnv(mujoco_env.MujocoEnv, utils.EzPickle):
- def __init__(self, n_links=5):
- utils.EzPickle.__init__(**locals())
-
- self.n_links = n_links
-
- if n_links == 5:
- file_name = 'reacher_5links.xml'
- elif n_links == 7:
- file_name = 'reacher_7links.xml'
- else:
- raise ValueError(f"Invalid number of links {n_links}, only 5 or 7 allowed.")
-
- mujoco_env.MujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), "assets", file_name), 2)
-
- def step(self, a):
- angle = alr_utils.angle_normalize(np.sum(self.sim.data.qpos.flat[:self.n_links]), type="rad")
- reward = - np.abs(angle)
-
- self.do_simulation(a, self.frame_skip)
- ob = self._get_obs()
- done = False
- return ob, reward, done, dict(angle=angle, end_effector=self.get_body_com("fingertip").copy())
-
- def viewer_setup(self):
- self.viewer.cam.trackbodyid = 1
-
- def reset_model(self):
- # This also generates a goal, we however do not need/use it
- qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
- qpos[-2:] = 0
- qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
- qvel[-2:] = 0
- self.set_state(qpos, qvel)
-
- return self._get_obs()
-
- def _get_obs(self):
- theta = self.sim.data.qpos.flat[:self.n_links]
- return np.concatenate([
- np.cos(theta),
- np.sin(theta),
- self.sim.data.qvel.flat[:self.n_links], # this is angular velocity
- ])
diff --git a/alr_envs/alr/mujoco/reacher/mp_wrapper.py b/alr_envs/alr/mujoco/reacher/mp_wrapper.py
index 966be23..de33ae0 100644
--- a/alr_envs/alr/mujoco/reacher/mp_wrapper.py
+++ b/alr_envs/alr/mujoco/reacher/mp_wrapper.py
@@ -2,7 +2,7 @@ from typing import Union, Tuple
import numpy as np
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
diff --git a/alr_envs/alr/mujoco/reacher/reacher.py b/alr_envs/alr/mujoco/reacher/reacher.py
new file mode 100644
index 0000000..a5d34ee
--- /dev/null
+++ b/alr_envs/alr/mujoco/reacher/reacher.py
@@ -0,0 +1,105 @@
+import os
+
+import numpy as np
+from gym import utils
+from gym.envs.mujoco import MujocoEnv
+
+
+class ReacherEnv(MujocoEnv, utils.EzPickle):
+ """
+ More general version of the gym mujoco Reacher environment
+ """
+
+ def __init__(self, sparse: bool = False, n_links: int = 5, ctrl_cost_weight: int = 1):
+ utils.EzPickle.__init__(**locals())
+
+ self._steps = 0
+ self.n_links = n_links
+
+ self.ctrl_cost_weight = ctrl_cost_weight
+
+ self.sparse = sparse
+ self.reward_weight = 1 if not sparse else 200
+
+ file_name = f'reacher_{n_links}links.xml'
+
+ MujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), "assets", file_name), 2)
+
+ def step(self, action):
+ self._steps += 1
+
+ is_reward = not self.sparse or (self.sparse and self._steps == 200)
+
+ reward_dist = 0.0
+ angular_vel = 0.0
+ if is_reward:
+ reward_dist = self.distance_reward()
+ angular_vel = self.velocity_reward()
+
+ reward_ctrl = -self.ctrl_cost_weight * np.square(action).sum()
+
+ reward = reward_dist + reward_ctrl + angular_vel
+ self.do_simulation(action, self.frame_skip)
+ ob = self._get_obs()
+ done = False
+
+ infos = dict(
+ reward_dist=reward_dist,
+ reward_ctrl=reward_ctrl,
+ velocity=angular_vel,
+ end_effector=self.get_body_com("fingertip").copy(),
+ goal=self.goal if hasattr(self, "goal") else None
+ )
+
+ return ob, reward, done, infos
+
+ def distance_reward(self):
+ vec = self.get_body_com("fingertip") - self.get_body_com("target")
+ return -self.reward_weight * np.linalg.norm(vec)
+
+ def velocity_reward(self):
+ return -10 * np.square(self.sim.data.qvel.flat[:self.n_links]).sum() if self.sparse else 0.0
+
+ def viewer_setup(self):
+ self.viewer.cam.trackbodyid = 0
+
+ def reset_model(self):
+ qpos = (
+ # self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) +
+ self.init_qpos.copy()
+ )
+ while True:
+ # full space
+ self.goal = self.np_random.uniform(low=-self.n_links / 10, high=self.n_links / 10, size=2)
+ # I Quadrant
+ # self.goal = self.np_random.uniform(low=0, high=self.n_links / 10, size=2)
+ # II Quadrant
+ # self.goal = np.random.uniform(low=[-self.n_links / 10, 0], high=[0, self.n_links / 10], size=2)
+ # II + III Quadrant
+ # self.goal = np.random.uniform(low=-self.n_links / 10, high=[0, self.n_links / 10], size=2)
+ # I + II Quadrant
+ # self.goal = np.random.uniform(low=[-self.n_links / 10, 0], high=self.n_links, size=2)
+ if np.linalg.norm(self.goal) < self.n_links / 10:
+ break
+
+ qpos[-2:] = self.goal
+ qvel = (
+ # self.np_random.uniform(low=-0.005, high=0.005, size=self.model.nv) +
+ self.init_qvel.copy()
+ )
+ qvel[-2:] = 0
+ self.set_state(qpos, qvel)
+ self._steps = 0
+
+ return self._get_obs()
+
+ def _get_obs(self):
+ theta = self.sim.data.qpos.flat[:self.n_links]
+ target = self.get_body_com("target")
+ return np.concatenate([
+ np.cos(theta),
+ np.sin(theta),
+ target[:2], # x-y of goal position
+ self.sim.data.qvel.flat[:self.n_links], # angular velocity
+ self.get_body_com("fingertip") - target, # goal distance
+ ])
diff --git a/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py b/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py
index 408124a..40e4252 100644
--- a/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py
+++ b/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py
@@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
diff --git a/alr_envs/alr/mujoco/walker_2d_jump/mp_wrapper.py b/alr_envs/alr/mujoco/walker_2d_jump/mp_wrapper.py
index 0c2dba5..5e9d0eb 100644
--- a/alr_envs/alr/mujoco/walker_2d_jump/mp_wrapper.py
+++ b/alr_envs/alr/mujoco/walker_2d_jump/mp_wrapper.py
@@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
diff --git a/alr_envs/alr/mujoco/walker_2d_jump/new_mp_wrapper.py b/alr_envs/alr/mujoco/walker_2d_jump/new_mp_wrapper.py
index 96b0739..b2bfde9 100644
--- a/alr_envs/alr/mujoco/walker_2d_jump/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/walker_2d_jump/new_mp_wrapper.py
@@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
diff --git a/alr_envs/mp/__init__.py b/alr_envs/black_box/__init__.py
similarity index 100%
rename from alr_envs/mp/__init__.py
rename to alr_envs/black_box/__init__.py
diff --git a/alr_envs/mp/black_box_wrapper.py b/alr_envs/black_box/black_box_wrapper.py
similarity index 94%
rename from alr_envs/mp/black_box_wrapper.py
rename to alr_envs/black_box/black_box_wrapper.py
index f1ba41f..0c10ef8 100644
--- a/alr_envs/mp/black_box_wrapper.py
+++ b/alr_envs/black_box/black_box_wrapper.py
@@ -6,8 +6,8 @@ import numpy as np
from gym import spaces
from mp_pytorch.mp.mp_interfaces import MPInterface
-from alr_envs.mp.controllers.base_controller import BaseController
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.black_box.controller.base_controller import BaseController
+from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
from alr_envs.utils.utils import get_numpy
@@ -15,10 +15,14 @@ class BlackBoxWrapper(gym.ObservationWrapper):
def __init__(self,
env: RawInterfaceWrapper,
- trajectory_generator: MPInterface, tracking_controller: BaseController,
- duration: float, verbose: int = 1, learn_sub_trajectories: bool = False,
+ trajectory_generator: MPInterface,
+ tracking_controller: BaseController,
+ duration: float,
+ verbose: int = 1,
+ learn_sub_trajectories: bool = False,
replanning_schedule: Union[None, callable] = None,
- reward_aggregation: callable = np.sum):
+ reward_aggregation: callable = np.sum
+ ):
"""
gym.Wrapper for leveraging a black box approach with a trajectory generator.
diff --git a/alr_envs/mp/controllers/__init__.py b/alr_envs/black_box/controller/__init__.py
similarity index 100%
rename from alr_envs/mp/controllers/__init__.py
rename to alr_envs/black_box/controller/__init__.py
diff --git a/alr_envs/mp/controllers/base_controller.py b/alr_envs/black_box/controller/base_controller.py
similarity index 100%
rename from alr_envs/mp/controllers/base_controller.py
rename to alr_envs/black_box/controller/base_controller.py
diff --git a/alr_envs/mp/controllers/controller_factory.py b/alr_envs/black_box/controller/controller_factory.py
similarity index 60%
rename from alr_envs/mp/controllers/controller_factory.py
rename to alr_envs/black_box/controller/controller_factory.py
index 1c1cfec..6ef7960 100644
--- a/alr_envs/mp/controllers/controller_factory.py
+++ b/alr_envs/black_box/controller/controller_factory.py
@@ -1,7 +1,7 @@
-from alr_envs.mp.controllers.meta_world_controller import MetaWorldController
-from alr_envs.mp.controllers.pd_controller import PDController
-from alr_envs.mp.controllers.vel_controller import VelController
-from alr_envs.mp.controllers.pos_controller import PosController
+from alr_envs.black_box.controller.meta_world_controller import MetaWorldController
+from alr_envs.black_box.controller.pd_controller import PDController
+from alr_envs.black_box.controller.vel_controller import VelController
+from alr_envs.black_box.controller.pos_controller import PosController
ALL_TYPES = ["motor", "velocity", "position", "metaworld"]
diff --git a/alr_envs/mp/controllers/meta_world_controller.py b/alr_envs/black_box/controller/meta_world_controller.py
similarity index 92%
rename from alr_envs/mp/controllers/meta_world_controller.py
rename to alr_envs/black_box/controller/meta_world_controller.py
index 5747f9e..296ea3a 100644
--- a/alr_envs/mp/controllers/meta_world_controller.py
+++ b/alr_envs/black_box/controller/meta_world_controller.py
@@ -1,6 +1,6 @@
import numpy as np
-from alr_envs.mp.controllers.base_controller import BaseController
+from alr_envs.black_box.controller.base_controller import BaseController
class MetaWorldController(BaseController):
diff --git a/alr_envs/mp/controllers/pd_controller.py b/alr_envs/black_box/controller/pd_controller.py
similarity index 93%
rename from alr_envs/mp/controllers/pd_controller.py
rename to alr_envs/black_box/controller/pd_controller.py
index 140aeee..ab21444 100644
--- a/alr_envs/mp/controllers/pd_controller.py
+++ b/alr_envs/black_box/controller/pd_controller.py
@@ -1,6 +1,6 @@
from typing import Union, Tuple
-from alr_envs.mp.controllers.base_controller import BaseController
+from alr_envs.black_box.controller.base_controller import BaseController
class PDController(BaseController):
diff --git a/alr_envs/mp/controllers/pos_controller.py b/alr_envs/black_box/controller/pos_controller.py
similarity index 77%
rename from alr_envs/mp/controllers/pos_controller.py
rename to alr_envs/black_box/controller/pos_controller.py
index 5570307..3f3526a 100644
--- a/alr_envs/mp/controllers/pos_controller.py
+++ b/alr_envs/black_box/controller/pos_controller.py
@@ -1,4 +1,4 @@
-from alr_envs.mp.controllers.base_controller import BaseController
+from alr_envs.black_box.controller.base_controller import BaseController
class PosController(BaseController):
diff --git a/alr_envs/mp/controllers/vel_controller.py b/alr_envs/black_box/controller/vel_controller.py
similarity index 77%
rename from alr_envs/mp/controllers/vel_controller.py
rename to alr_envs/black_box/controller/vel_controller.py
index 67bab2a..2134207 100644
--- a/alr_envs/mp/controllers/vel_controller.py
+++ b/alr_envs/black_box/controller/vel_controller.py
@@ -1,4 +1,4 @@
-from alr_envs.mp.controllers.base_controller import BaseController
+from alr_envs.black_box.controller.base_controller import BaseController
class VelController(BaseController):
diff --git a/alr_envs/black_box/factory/__init__.py b/alr_envs/black_box/factory/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/alr_envs/mp/basis_generator_factory.py b/alr_envs/black_box/factory/basis_generator_factory.py
similarity index 100%
rename from alr_envs/mp/basis_generator_factory.py
rename to alr_envs/black_box/factory/basis_generator_factory.py
diff --git a/alr_envs/mp/phase_generator_factory.py b/alr_envs/black_box/factory/phase_generator_factory.py
similarity index 93%
rename from alr_envs/mp/phase_generator_factory.py
rename to alr_envs/black_box/factory/phase_generator_factory.py
index 45cfdc1..ca0dd84 100644
--- a/alr_envs/mp/phase_generator_factory.py
+++ b/alr_envs/black_box/factory/phase_generator_factory.py
@@ -17,4 +17,4 @@ def get_phase_generator(phase_generator_type, **kwargs):
return SmoothPhaseGenerator(**kwargs)
else:
raise ValueError(f"Specified phase generator type {phase_generator_type} not supported, "
- f"please choose one of {ALL_TYPES}.")
\ No newline at end of file
+ f"please choose one of {ALL_TYPES}.")
diff --git a/alr_envs/mp/mp_factory.py b/alr_envs/black_box/factory/trajectory_generator_factory.py
similarity index 91%
rename from alr_envs/mp/mp_factory.py
rename to alr_envs/black_box/factory/trajectory_generator_factory.py
index d2c5460..784513e 100644
--- a/alr_envs/mp/mp_factory.py
+++ b/alr_envs/black_box/factory/trajectory_generator_factory.py
@@ -9,7 +9,7 @@ ALL_TYPES = ["promp", "dmp", "idmp"]
def get_trajectory_generator(
trajectory_generator_type: str, action_dim: int, basis_generator: BasisGenerator, **kwargs
- ):
+):
trajectory_generator_type = trajectory_generator_type.lower()
if trajectory_generator_type == "promp":
return ProMP(basis_generator, action_dim, **kwargs)
@@ -19,4 +19,4 @@ def get_trajectory_generator(
return IDMP(basis_generator, action_dim, **kwargs)
else:
raise ValueError(f"Specified movement primitive type {trajectory_generator_type} not supported, "
- f"please choose one of {ALL_TYPES}.")
\ No newline at end of file
+ f"please choose one of {ALL_TYPES}.")
diff --git a/alr_envs/mp/raw_interface_wrapper.py b/alr_envs/black_box/raw_interface_wrapper.py
similarity index 100%
rename from alr_envs/mp/raw_interface_wrapper.py
rename to alr_envs/black_box/raw_interface_wrapper.py
diff --git a/alr_envs/dmc/manipulation/reach_site/mp_wrapper.py b/alr_envs/dmc/manipulation/reach_site/mp_wrapper.py
index 6d5029e..d918edc 100644
--- a/alr_envs/dmc/manipulation/reach_site/mp_wrapper.py
+++ b/alr_envs/dmc/manipulation/reach_site/mp_wrapper.py
@@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
diff --git a/alr_envs/dmc/suite/ball_in_cup/mp_wrapper.py b/alr_envs/dmc/suite/ball_in_cup/mp_wrapper.py
index 9687bed..b3f882e 100644
--- a/alr_envs/dmc/suite/ball_in_cup/mp_wrapper.py
+++ b/alr_envs/dmc/suite/ball_in_cup/mp_wrapper.py
@@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
diff --git a/alr_envs/dmc/suite/cartpole/mp_wrapper.py b/alr_envs/dmc/suite/cartpole/mp_wrapper.py
index 3f16d24..6cc0687 100644
--- a/alr_envs/dmc/suite/cartpole/mp_wrapper.py
+++ b/alr_envs/dmc/suite/cartpole/mp_wrapper.py
@@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
diff --git a/alr_envs/dmc/suite/reacher/mp_wrapper.py b/alr_envs/dmc/suite/reacher/mp_wrapper.py
index ac857c1..82e4da8 100644
--- a/alr_envs/dmc/suite/reacher/mp_wrapper.py
+++ b/alr_envs/dmc/suite/reacher/mp_wrapper.py
@@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
diff --git a/alr_envs/meta/goal_change_mp_wrapper.py b/alr_envs/meta/goal_change_mp_wrapper.py
index 17495da..e628a0c 100644
--- a/alr_envs/meta/goal_change_mp_wrapper.py
+++ b/alr_envs/meta/goal_change_mp_wrapper.py
@@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
diff --git a/alr_envs/meta/goal_endeffector_change_mp_wrapper.py b/alr_envs/meta/goal_endeffector_change_mp_wrapper.py
index 3a6ad1c..1a128e7 100644
--- a/alr_envs/meta/goal_endeffector_change_mp_wrapper.py
+++ b/alr_envs/meta/goal_endeffector_change_mp_wrapper.py
@@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
diff --git a/alr_envs/meta/goal_object_change_mp_wrapper.py b/alr_envs/meta/goal_object_change_mp_wrapper.py
index 97c64b8..1a6f57e 100644
--- a/alr_envs/meta/goal_object_change_mp_wrapper.py
+++ b/alr_envs/meta/goal_object_change_mp_wrapper.py
@@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
diff --git a/alr_envs/meta/object_change_mp_wrapper.py b/alr_envs/meta/object_change_mp_wrapper.py
index f832c9f..07e88dc 100644
--- a/alr_envs/meta/object_change_mp_wrapper.py
+++ b/alr_envs/meta/object_change_mp_wrapper.py
@@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
diff --git a/alr_envs/open_ai/classic_control/continuous_mountain_car/mp_wrapper.py b/alr_envs/open_ai/classic_control/continuous_mountain_car/mp_wrapper.py
index 189563c..db9f1f2 100644
--- a/alr_envs/open_ai/classic_control/continuous_mountain_car/mp_wrapper.py
+++ b/alr_envs/open_ai/classic_control/continuous_mountain_car/mp_wrapper.py
@@ -2,7 +2,7 @@ from typing import Union
import numpy as np
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
diff --git a/alr_envs/open_ai/mujoco/reacher_v2/mp_wrapper.py b/alr_envs/open_ai/mujoco/reacher_v2/mp_wrapper.py
index 9d627b6..1e02f10 100644
--- a/alr_envs/open_ai/mujoco/reacher_v2/mp_wrapper.py
+++ b/alr_envs/open_ai/mujoco/reacher_v2/mp_wrapper.py
@@ -2,7 +2,7 @@ from typing import Union
import numpy as np
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
diff --git a/alr_envs/open_ai/robotics/fetch/mp_wrapper.py b/alr_envs/open_ai/robotics/fetch/mp_wrapper.py
index 7a7bed6..331aa40 100644
--- a/alr_envs/open_ai/robotics/fetch/mp_wrapper.py
+++ b/alr_envs/open_ai/robotics/fetch/mp_wrapper.py
@@ -2,7 +2,7 @@ from typing import Union
import numpy as np
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
diff --git a/alr_envs/utils/make_env_helpers.py b/alr_envs/utils/make_env_helpers.py
index b5587a7..3a8eec5 100644
--- a/alr_envs/utils/make_env_helpers.py
+++ b/alr_envs/utils/make_env_helpers.py
@@ -7,12 +7,12 @@ import numpy as np
from gym.envs.registration import EnvSpec, registry
from gym.wrappers import TimeAwareObservation
-from alr_envs.mp.basis_generator_factory import get_basis_generator
-from alr_envs.mp.black_box_wrapper import BlackBoxWrapper
-from alr_envs.mp.controllers.controller_factory import get_controller
-from alr_envs.mp.mp_factory import get_trajectory_generator
-from alr_envs.mp.phase_generator_factory import get_phase_generator
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.black_box.factory.basis_generator_factory import get_basis_generator
+from alr_envs.black_box.black_box_wrapper import BlackBoxWrapper
+from alr_envs.black_box.controller.controller_factory import get_controller
+from alr_envs.black_box.factory.trajectory_generator_factory import get_trajectory_generator
+from alr_envs.black_box.factory.phase_generator_factory import get_phase_generator
+from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
from alr_envs.utils.utils import nested_update
@@ -46,6 +46,7 @@ def make(env_id, seed, **kwargs):
spec = registry.get(env_id)
# This access is required to allow for nested dict updates
all_kwargs = deepcopy(spec._kwargs)
+ # TODO append wrapper here
nested_update(all_kwargs, **kwargs)
return _make(env_id, seed, **all_kwargs)
@@ -224,8 +225,8 @@ def make_bb_env_helper(**kwargs):
seed = kwargs.pop("seed", None)
wrappers = kwargs.pop("wrappers")
- traj_gen_kwargs = kwargs.pop("traj_gen_kwargs", {})
black_box_kwargs = kwargs.pop('black_box_kwargs', {})
+ traj_gen_kwargs = kwargs.pop("traj_gen_kwargs", {})
contr_kwargs = kwargs.pop("controller_kwargs", {})
phase_kwargs = kwargs.pop("phase_generator_kwargs", {})
basis_kwargs = kwargs.pop("basis_generator_kwargs", {})
From a7051cd8b70721fc640571fbc04416b211e18b79 Mon Sep 17 00:00:00 2001
From: Onur
Date: Thu, 30 Jun 2022 17:55:00 +0200
Subject: [PATCH 052/104] mainly tidied up beerpong.py and
beerpong_reward_staged.py
---
alr_envs/alr/__init__.py | 6 +-
.../hole_reacher/mp_wrapper.py | 8 +-
alr_envs/alr/mujoco/beerpong/beerpong.py | 57 +++----
.../mujoco/beerpong/beerpong_reward_staged.py | 156 +++++++++---------
alr_envs/alr/mujoco/reacher/__init__.py | 2 +-
alr_envs/utils/make_env_helpers.py | 15 +-
6 files changed, 120 insertions(+), 124 deletions(-)
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index cf009a4..3d2415c 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -24,7 +24,7 @@ ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
DEFAULT_BB_DICT = {
"name": 'EnvName',
"wrappers": [],
- "traj_gen_kwargs": {
+ "trajectory_generator_kwargs": {
'trajectory_generator_type': 'promp'
},
"phase_generator_kwargs": {
@@ -231,7 +231,6 @@ register(
entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvStepBased',
max_episode_steps=300,
kwargs={
- "rndm_goal": True,
"cup_goal_pos": [-0.3, -1.2],
"frame_skip": 2
}
@@ -243,7 +242,6 @@ register(
entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvFixedReleaseStep',
max_episode_steps=300,
kwargs={
- "rndm_goal": True,
"cup_goal_pos": [-0.3, -1.2],
"frame_skip": 2
}
@@ -355,7 +353,7 @@ for _v in _versions:
_env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_hole_reacher_promp = deepcopy(DEFAULT_BB_DICT)
kwargs_dict_hole_reacher_promp['wrappers'].append(classic_control.hole_reacher.MPWrapper)
- kwargs_dict_hole_reacher_promp['traj_gen_kwargs']['weight_scale'] = 2
+ kwargs_dict_hole_reacher_promp['trajectory_generator_kwargs']['weight_scale'] = 2
kwargs_dict_hole_reacher_promp['controller_kwargs']['controller_type'] = 'velocity'
kwargs_dict_hole_reacher_promp['name'] = f"alr_envs:{_v}"
register(
diff --git a/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py b/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py
index 19bb0c5..85bc784 100644
--- a/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py
+++ b/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py
@@ -2,12 +2,12 @@ from typing import Tuple, Union
import numpy as np
-from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
- def get_context_mask(self):
+ def context_mask(self):
return np.hstack([
[self.env.random_start] * self.env.n_links, # cos
[self.env.random_start] * self.env.n_links, # sin
@@ -25,3 +25,7 @@ class MPWrapper(RawInterfaceWrapper):
@property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.current_vel
+
+ @property
+ def dt(self) -> Union[float, int]:
+ return self.env.dt
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py
index dfd6ea4..117003f 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@@ -19,12 +19,9 @@ CUP_POS_MAX = np.array([1.42, -1.25])
# CUP_POS_MAX = np.array([0.16, -1.7])
class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
- def __init__(
- self, frame_skip=1, apply_gravity_comp=True, noisy=False,
- rndm_goal=False, cup_goal_pos=None
- ):
+ def __init__(self, frame_skip=2, apply_gravity_comp=True):
- cup_goal_pos = np.array(cup_goal_pos if cup_goal_pos is not None else [-0.3, -1.2, 0.840])
+ cup_goal_pos = np.array([-0.3, -1.2, 0.840])
if cup_goal_pos.shape[0] == 2:
cup_goal_pos = np.insert(cup_goal_pos, 2, 0.840)
self.cup_goal_pos = np.array(cup_goal_pos)
@@ -38,9 +35,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7])
self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
- self.rndm_goal = rndm_goal
self.apply_gravity_comp = apply_gravity_comp
- self.add_noise = noisy
self._start_pos = np.array([0.0, 1.35, 0.0, 1.18, 0.0, -0.786, -1.59])
self._start_vel = np.zeros(7)
@@ -48,17 +43,13 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
self.ball_site_id = 0
self.ball_id = 11
- # self._release_step = 175 # time step of ball release
- # self._release_step = 130 # time step of ball release
self.release_step = 100 # time step of ball release
self.ep_length = 600 // frame_skip
self.cup_table_id = 10
- if noisy:
- self.noise_std = 0.01
- else:
- self.noise_std = 0
+ self.add_noise = False
+
reward_function = BeerPongReward
self.reward_function = reward_function()
self.repeat_action = frame_skip
@@ -74,7 +65,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
return self._start_vel
def reset(self):
- self.reward_function.reset(self.add_noise)
+ self.reward_function.reset()
return super().reset()
def reset_model(self):
@@ -88,15 +79,13 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
start_pos[0:7] = init_pos_robot
self.set_state(start_pos, init_vel)
- self.sim.model.body_pos[self.cup_table_id] = self.cup_goal_pos
start_pos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
self.set_state(start_pos, init_vel)
- if self.rndm_goal:
- xy = self.np_random.uniform(CUP_POS_MIN, CUP_POS_MAX)
- xyz = np.zeros(3)
- xyz[:2] = xy
- xyz[-1] = 0.840
- self.sim.model.body_pos[self.cup_table_id] = xyz
+ xy = self.np_random.uniform(CUP_POS_MIN, CUP_POS_MAX)
+ xyz = np.zeros(3)
+ xyz[:2] = xy
+ xyz[-1] = 0.840
+ self.sim.model.body_pos[self.cup_table_id] = xyz
return self._get_obs()
def step(self, a):
@@ -115,12 +104,9 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
if self._steps < self.release_step:
self.sim.data.qpos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
self.sim.data.qvel[7::] = self.sim.data.site_xvelp[self.ball_site_id, :].copy()
- elif self._steps == self.release_step and self.add_noise:
- self.sim.data.qvel[7::] += self.noise_std * np.random.randn(3)
crash = False
except mujoco_py.builder.MujocoException:
crash = True
- # joint_cons_viol = self.check_traj_in_joint_limits()
ob = self._get_obs()
@@ -184,14 +170,14 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
class ALRBeerBongEnvFixedReleaseStep(ALRBeerBongEnv):
- def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
- super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
+ def __init__(self, frame_skip=2, apply_gravity_comp=True):
+ super().__init__(frame_skip, apply_gravity_comp)
self.release_step = 62 # empirically evaluated for frame_skip=2!
class ALRBeerBongEnvStepBasedEpisodicReward(ALRBeerBongEnv):
- def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
- super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
+ def __init__(self, frame_skip=2, apply_gravity_comp=True):
+ super().__init__(frame_skip, apply_gravity_comp)
self.release_step = 62 # empirically evaluated for frame_skip=2!
def step(self, a):
@@ -245,18 +231,21 @@ class ALRBeerBongEnvStepBasedEpisodicReward(ALRBeerBongEnv):
if __name__ == "__main__":
- # env = ALRBeerBongEnv(rndm_goal=True)
- # env = ALRBeerBongEnvStepBased(frame_skip=2, rndm_goal=True)
- # env = ALRBeerBongEnvStepBasedEpisodicReward(frame_skip=2, rndm_goal=True)
- env = ALRBeerBongEnvFixedReleaseStep(frame_skip=2, rndm_goal=True)
+ env = ALRBeerBongEnv(frame_skip=2)
+ # env = ALRBeerBongEnvStepBased(frame_skip=2)
+ # env = ALRBeerBongEnvStepBasedEpisodicReward(frame_skip=2)
+ # env = ALRBeerBongEnvFixedReleaseStep(frame_skip=2)
import time
env.reset()
env.render("human")
for i in range(1500):
- ac = 10 * env.action_space.sample()
+ # ac = 10 * env.action_space.sample()
+ ac = np.ones(7)
# ac = np.zeros(7)
- # ac[0] = -1
+ # ac[0] = 0
+ # ac[1] = -0.01
+ # ac[3] = -0.01
# if env._steps > 150:
# ac[0] = 1
obs, rew, d, info = env.step(ac)
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
index 7e5fda6..cd2ab75 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
@@ -32,6 +32,21 @@ class BeerPongReward:
self.dists = None
self.dists_final = None
self.action_costs = None
+ self.ball_ground_contact_first = False
+ self.ball_table_contact = False
+ self.ball_wall_contact = False
+ self.ball_cup_contact = False
+ self.ball_in_cup = False
+ self.dist_ground_cup = -1 # distance floor to cup if first floor contact
+
+ ### IDs
+ self.ball_collision_id = None
+ self.table_collision_id = None
+ self.wall_collision_id = None
+ self.cup_table_collision_id = None
+ self.ground_collision_id = None
+ self.cup_collision_ids = None
+ self.robot_collision_ids = None
self.reset()
self.is_initialized = False
@@ -50,25 +65,27 @@ class BeerPongReward:
if not self.is_initialized:
self.is_initialized = True
- self.ball_id = env.sim.model._body_name2id["ball"]
- self.goal_id = env.sim.model._site_name2id["cup_goal_table"]
- self.goal_final_id = env.sim.model._site_name2id["cup_goal_final_table"]
- self.cup_table_id = env.sim.model._body_name2id["cup_table"]
-
- self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"]
- self.table_collision_id = env.sim.model._geom_name2id["table_contact_geom"]
- self.wall_collision_id = env.sim.model._geom_name2id["wall"]
- self.cup_table_collision_id = env.sim.model._geom_name2id["cup_base_table_contact"]
- self.ground_collision_id = env.sim.model._geom_name2id["ground"]
- self.cup_collision_ids = [env.sim.model._geom_name2id[name] for name in self.cup_collision_objects]
- self.robot_collision_ids = [env.sim.model._geom_name2id[name] for name in self.robot_collision_objects]
+ # TODO: Find a more elegant way to acces to the geom ids in each step -> less code
+ self.ball_collision_id = {env.model.geom_name2id("ball_geom")}
+ # self.ball_collision_id = env.model.geom_name2id("ball_geom")
+ self.table_collision_id = {env.model.geom_name2id("table_contact_geom")}
+ # self.table_collision_id = env.model.geom_name2id("table_contact_geom")
+ self.wall_collision_id = {env.model.geom_name2id("wall")}
+ # self.wall_collision_id = env.model.geom_name2id("wall")
+ self.cup_table_collision_id = {env.model.geom_name2id("cup_base_table_contact")}
+ # self.cup_table_collision_id = env.model.geom_name2id("cup_base_table_contact")
+ self.ground_collision_id = {env.model.geom_name2id("ground")}
+ # self.ground_collision_id = env.model.geom_name2id("ground")
+ self.cup_collision_ids = set(env.model.geom_name2id(name) for name in self.cup_collision_objects)
+ # self.cup_collision_ids = [env.model.geom_name2id(name) for name in self.cup_collision_objects]
+ self.robot_collision_ids = [env.model.geom_name2id(name) for name in self.robot_collision_objects]
def compute_reward(self, env, action):
- goal_pos = env.sim.data.site_xpos[self.goal_id]
- ball_pos = env.sim.data.body_xpos[self.ball_id]
- ball_vel = env.sim.data.body_xvelp[self.ball_id]
- goal_final_pos = env.sim.data.site_xpos[self.goal_final_id]
+ goal_pos = env.data.get_site_xpos("cup_goal_table")
+ ball_pos = env.data.get_body_xpos("ball")
+ ball_vel = env.data.get_body_xvelp("ball")
+ goal_final_pos = env.data.get_site_xpos("cup_goal_final_table")
self.check_contacts(env.sim)
self.dists.append(np.linalg.norm(goal_pos - ball_pos))
@@ -77,16 +94,16 @@ class BeerPongReward:
if self.ball_ground_contact_first and self.dist_ground_cup == -1 else self.dist_ground_cup
action_cost = np.sum(np.square(action))
self.action_costs.append(np.copy(action_cost))
- # # ##################### Reward function which does not force to bounce once on the table (quad dist) ############
+ # # ##################### Reward function which does not force to bounce once on the table (quad dist) #########
- self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
+ # Comment Onur: Is this needed?
+ # self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
- if env._steps == env.ep_length - 1 or self._is_collided:
+ if env._steps == env.ep_length - 1:# or self._is_collided:
min_dist = np.min(self.dists)
final_dist = self.dists_final[-1]
if self.ball_ground_contact_first:
- min_dist_coeff, final_dist_coeff, ground_contact_dist_coeff, rew_offset = 1, 0.5, 2, -4 # relative rew offset when first bounding on ground
- # min_dist_coeff, final_dist_coeff, ground_contact_dist_coeff, rew_offset = 1, 0.5, 0, -6 # absolute rew offset when first bouncing on ground
+ min_dist_coeff, final_dist_coeff, ground_contact_dist_coeff, rew_offset = 1, 0.5, 2, -4
else:
if not self.ball_in_cup:
if not self.ball_table_contact and not self.ball_cup_contact and not self.ball_wall_contact:
@@ -95,85 +112,74 @@ class BeerPongReward:
min_dist_coeff, final_dist_coeff, ground_contact_dist_coeff, rew_offset = 1, 0.5, 0, -2
else:
min_dist_coeff, final_dist_coeff, ground_contact_dist_coeff, rew_offset = 0, 1, 0, 0
- # dist_ground_cup = 1 * self.dist_ground_cup
action_cost = 1e-4 * np.mean(action_cost)
reward = rew_offset - min_dist_coeff * min_dist ** 2 - final_dist_coeff * final_dist ** 2 - \
action_cost - ground_contact_dist_coeff * self.dist_ground_cup ** 2
- # 1e-7*np.mean(action_cost)
# release step punishment
min_time_bound = 0.1
max_time_bound = 1.0
release_time = env.release_step * env.dt
- release_time_rew = int(release_time < min_time_bound) * (-30 - 10 * (release_time - min_time_bound) ** 2) \
- + int(release_time > max_time_bound) * (-30 - 10 * (release_time - max_time_bound) ** 2)
+ release_time_rew = int(release_time < min_time_bound) * (-30 - 10 * (release_time - min_time_bound) ** 2) + \
+ int(release_time > max_time_bound) * (-30 - 10 * (release_time - max_time_bound) ** 2)
reward += release_time_rew
success = self.ball_in_cup
- # print('release time :', release_time)
- # print('dist_ground_cup :', dist_ground_cup)
else:
action_cost = 1e-2 * action_cost
reward = - action_cost
- # reward = - 1e-4 * action_cost
- # reward = 0
success = False
- # ################################################################################################################
- infos = {}
- infos["success"] = success
- infos["is_collided"] = self._is_collided
- infos["ball_pos"] = ball_pos.copy()
- infos["ball_vel"] = ball_vel.copy()
- infos["action_cost"] = action_cost
- infos["task_reward"] = reward
+ # ##############################################################################################################
+ infos = {"success": success, "ball_pos": ball_pos.copy(),
+ "ball_vel": ball_vel.copy(), "action_cost": action_cost, "task_reward": reward, "is_collided": False} # TODO: Check if is collided is needed
return reward, infos
def check_contacts(self, sim):
if not self.ball_table_contact:
- self.ball_table_contact = self._check_collision_single_objects(sim, self.ball_collision_id,
- self.table_collision_id)
+ self.ball_table_contact = self._check_collision(sim, self.ball_collision_id, self.table_collision_id)
if not self.ball_cup_contact:
- self.ball_cup_contact = self._check_collision_with_set_of_objects(sim, self.ball_collision_id,
- self.cup_collision_ids)
+ self.ball_cup_contact = self._check_collision(sim, self.ball_collision_id, self.cup_collision_ids)
if not self.ball_wall_contact:
- self.ball_wall_contact = self._check_collision_single_objects(sim, self.ball_collision_id,
- self.wall_collision_id)
+ self.ball_wall_contact = self._check_collision(sim, self.ball_collision_id, self.wall_collision_id)
if not self.ball_in_cup:
- self.ball_in_cup = self._check_collision_single_objects(sim, self.ball_collision_id,
- self.cup_table_collision_id)
+ self.ball_in_cup = self._check_collision(sim, self.ball_collision_id, self.cup_table_collision_id)
if not self.ball_ground_contact_first:
- if not self.ball_table_contact and not self.ball_cup_contact and not self.ball_wall_contact and not self.ball_in_cup:
- self.ball_ground_contact_first = self._check_collision_single_objects(sim, self.ball_collision_id,
- self.ground_collision_id)
+ if not self.ball_table_contact and not self.ball_cup_contact and not self.ball_wall_contact \
+ and not self.ball_in_cup:
+ self.ball_ground_contact_first = self._check_collision(sim, self.ball_collision_id,
+ self.ground_collision_id)
- def _check_collision_single_objects(self, sim, id_1, id_2):
- for coni in range(0, sim.data.ncon):
+ # Checks if id_set1 has a collision with id_set2
+ def _check_collision(self, sim, id_set_1, id_set_2):
+ """
+ If id_set_2 is set to None, it will check for a collision with itself (id_set_1).
+ """
+ collision_id_set = id_set_2 - id_set_1 if id_set_2 is not None else id_set_1
+ for coni in range(sim.data.ncon):
con = sim.data.contact[coni]
-
- collision = con.geom1 == id_1 and con.geom2 == id_2
- collision_trans = con.geom1 == id_2 and con.geom2 == id_1
-
- if collision or collision_trans:
+ if ((con.geom1 in id_set_1 and con.geom2 in collision_id_set) or
+ (con.geom2 in id_set_1 and con.geom1 in collision_id_set)):
return True
return False
- def _check_collision_with_itself(self, sim, collision_ids):
- col_1, col_2 = False, False
- for j, id in enumerate(collision_ids):
- col_1 = self._check_collision_with_set_of_objects(sim, id, collision_ids[:j])
- if j != len(collision_ids) - 1:
- col_2 = self._check_collision_with_set_of_objects(sim, id, collision_ids[j + 1:])
- else:
- col_2 = False
- collision = True if col_1 or col_2 else False
- return collision
+ # def _check_collision_with_itself(self, sim, collision_ids):
+ # col_1, col_2 = False, False
+ # for j, id in enumerate(collision_ids):
+ # col_1 = self._check_collision_with_set_of_objects(sim, id, collision_ids[:j])
+ # if j != len(collision_ids) - 1:
+ # col_2 = self._check_collision_with_set_of_objects(sim, id, collision_ids[j + 1:])
+ # else:
+ # col_2 = False
+ # collision = True if col_1 or col_2 else False
+ # return collision
- def _check_collision_with_set_of_objects(self, sim, id_1, id_list):
- for coni in range(0, sim.data.ncon):
- con = sim.data.contact[coni]
-
- collision = con.geom1 in id_list and con.geom2 == id_1
- collision_trans = con.geom1 == id_1 and con.geom2 in id_list
-
- if collision or collision_trans:
- return True
- return False
+ ### This function will not be needed if we really do not need to check for collision with itself
+ # def _check_collision_with_set_of_objects(self, sim, id_1, id_list):
+ # for coni in range(0, sim.data.ncon):
+ # con = sim.data.contact[coni]
+ #
+ # collision = con.geom1 in id_list and con.geom2 == id_1
+ # collision_trans = con.geom1 == id_1 and con.geom2 in id_list
+ #
+ # if collision or collision_trans:
+ # return True
+ # return False
diff --git a/alr_envs/alr/mujoco/reacher/__init__.py b/alr_envs/alr/mujoco/reacher/__init__.py
index 8a04a02..c5e6d2f 100644
--- a/alr_envs/alr/mujoco/reacher/__init__.py
+++ b/alr_envs/alr/mujoco/reacher/__init__.py
@@ -1 +1 @@
-from .new_mp_wrapper import MPWrapper
+from .mp_wrapper import MPWrapper
diff --git a/alr_envs/utils/make_env_helpers.py b/alr_envs/utils/make_env_helpers.py
index 3a8eec5..b5587a7 100644
--- a/alr_envs/utils/make_env_helpers.py
+++ b/alr_envs/utils/make_env_helpers.py
@@ -7,12 +7,12 @@ import numpy as np
from gym.envs.registration import EnvSpec, registry
from gym.wrappers import TimeAwareObservation
-from alr_envs.black_box.factory.basis_generator_factory import get_basis_generator
-from alr_envs.black_box.black_box_wrapper import BlackBoxWrapper
-from alr_envs.black_box.controller.controller_factory import get_controller
-from alr_envs.black_box.factory.trajectory_generator_factory import get_trajectory_generator
-from alr_envs.black_box.factory.phase_generator_factory import get_phase_generator
-from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.mp.basis_generator_factory import get_basis_generator
+from alr_envs.mp.black_box_wrapper import BlackBoxWrapper
+from alr_envs.mp.controllers.controller_factory import get_controller
+from alr_envs.mp.mp_factory import get_trajectory_generator
+from alr_envs.mp.phase_generator_factory import get_phase_generator
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
from alr_envs.utils.utils import nested_update
@@ -46,7 +46,6 @@ def make(env_id, seed, **kwargs):
spec = registry.get(env_id)
# This access is required to allow for nested dict updates
all_kwargs = deepcopy(spec._kwargs)
- # TODO append wrapper here
nested_update(all_kwargs, **kwargs)
return _make(env_id, seed, **all_kwargs)
@@ -225,8 +224,8 @@ def make_bb_env_helper(**kwargs):
seed = kwargs.pop("seed", None)
wrappers = kwargs.pop("wrappers")
- black_box_kwargs = kwargs.pop('black_box_kwargs', {})
traj_gen_kwargs = kwargs.pop("traj_gen_kwargs", {})
+ black_box_kwargs = kwargs.pop('black_box_kwargs', {})
contr_kwargs = kwargs.pop("controller_kwargs", {})
phase_kwargs = kwargs.pop("phase_generator_kwargs", {})
basis_kwargs = kwargs.pop("basis_generator_kwargs", {})
From 4437ab9577dc07b7b7b8c2123cb59ff6df41a689 Mon Sep 17 00:00:00 2001
From: Onur
Date: Thu, 30 Jun 2022 18:17:02 +0200
Subject: [PATCH 053/104] change accessing body position in beerpong.py
---
alr_envs/alr/mujoco/beerpong/beerpong.py | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py
index 117003f..11c09e4 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@@ -144,16 +144,15 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
infos.update(reward_infos)
return ob, reward, done, infos
- def _check_traj_in_joint_limits(self):
- return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)
+ # def _check_traj_in_joint_limits(self):
+ # return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)
def _get_obs(self):
theta = self.sim.data.qpos.flat[:7]
theta_dot = self.sim.data.qvel.flat[:7]
- ball_pos = self.sim.data.body_xpos[self.sim.model._body_name2id["ball"]].copy()
- cup_goal_diff_final = ball_pos - self.sim.data.site_xpos[
- self.sim.model._site_name2id["cup_goal_final_table"]].copy()
- cup_goal_diff_top = ball_pos - self.sim.data.site_xpos[self.sim.model._site_name2id["cup_goal_table"]].copy()
+ ball_pos = self.data.get_body_xpos("ball").copy()
+ cup_goal_diff_final = ball_pos - self.data.get_site_xpos("cup_goal_final_table").copy()
+ cup_goal_diff_top = ball_pos - self.data.get_site_xpos("cup_goal_table").copy()
return np.concatenate([
np.cos(theta),
np.sin(theta),
From d4e3b957a9693ee64cd6e1fc0b363a13dc291692 Mon Sep 17 00:00:00 2001
From: Onur
Date: Fri, 1 Jul 2022 09:54:42 +0200
Subject: [PATCH 054/104] finish up beerpong, walker2d and ant needs more
extensions, fix import bugs.
---
alr_envs/alr/__init__.py | 11 +++-
.../hole_reacher/mp_wrapper.py | 2 +-
alr_envs/alr/mujoco/__init__.py | 1 -
alr_envs/alr/mujoco/ant_jump/ant_jump.py | 25 ++++----
alr_envs/alr/mujoco/beerpong/beerpong.py | 57 +++++--------------
.../mujoco/beerpong/beerpong_reward_staged.py | 16 +++---
.../mujoco/walker_2d_jump/assets/walker2d.xml | 4 +-
.../mujoco/walker_2d_jump/walker_2d_jump.py | 28 ++++-----
alr_envs/utils/make_env_helpers.py | 12 ++--
9 files changed, 62 insertions(+), 94 deletions(-)
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 3d2415c..459a0e6 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -218,8 +218,15 @@ register(
entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv',
max_episode_steps=300,
kwargs={
- "rndm_goal": True,
- "cup_goal_pos": [-0.3, -1.2],
+ "frame_skip": 2
+ }
+)
+
+register(
+ id='ALRBeerPong-v1',
+ entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv',
+ max_episode_steps=300,
+ kwargs={
"frame_skip": 2
}
)
diff --git a/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py b/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py
index 85bc784..0a5eaa6 100644
--- a/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py
+++ b/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py
@@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
diff --git a/alr_envs/alr/mujoco/__init__.py b/alr_envs/alr/mujoco/__init__.py
index df52cfc..359f97b 100644
--- a/alr_envs/alr/mujoco/__init__.py
+++ b/alr_envs/alr/mujoco/__init__.py
@@ -7,6 +7,5 @@ from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv
from .hopper_throw.hopper_throw import ALRHopperThrowEnv
from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv
from .reacher.reacher import ReacherEnv
-from .reacher.balancing import BalancingEnv
from .table_tennis.tt_gym import TTEnvGym
from .walker_2d_jump.walker_2d_jump import ALRWalker2dJumpEnv
diff --git a/alr_envs/alr/mujoco/ant_jump/ant_jump.py b/alr_envs/alr/mujoco/ant_jump/ant_jump.py
index 09c623d..27098ac 100644
--- a/alr_envs/alr/mujoco/ant_jump/ant_jump.py
+++ b/alr_envs/alr/mujoco/ant_jump/ant_jump.py
@@ -2,6 +2,10 @@ import numpy as np
from gym.envs.mujoco.ant_v3 import AntEnv
MAX_EPISODE_STEPS_ANTJUMP = 200
+# TODO: This environment was not testet yet. Do the following todos and test it.
+# TODO: Right now this environment only considers jumping to a specific height, which is not nice. It should be extended
+# to the same structure as the Hopper, where the angles are randomized (->contexts) and the agent should jump as heigh
+# as possible, while landing at a specific target position
class ALRAntJumpEnv(AntEnv):
@@ -22,14 +26,12 @@ class ALRAntJumpEnv(AntEnv):
healthy_z_range=(0.3, float('inf')),
contact_force_range=(-1.0, 1.0),
reset_noise_scale=0.1,
- context=True, # variable to decide if context is used or not
exclude_current_positions_from_observation=True,
max_episode_steps=200):
self.current_step = 0
self.max_height = 0
- self.context = context
self.max_episode_steps = max_episode_steps
- self.goal = 0 # goal when training with context
+ self.goal = 0
super().__init__(xml_file, ctrl_cost_weight, contact_cost_weight, healthy_reward, terminate_when_unhealthy,
healthy_z_range, contact_force_range, reset_noise_scale,
exclude_current_positions_from_observation)
@@ -53,15 +55,11 @@ class ALRAntJumpEnv(AntEnv):
done = height < 0.3 # fall over -> is the 0.3 value from healthy_z_range? TODO change 0.3 to the value of healthy z angle
if self.current_step == self.max_episode_steps or done:
- if self.context:
- # -10 for scaling the value of the distance between the max_height and the goal height; only used when context is enabled
- # height_reward = -10 * (np.linalg.norm(self.max_height - self.goal))
- height_reward = -10*np.linalg.norm(self.max_height - self.goal)
- # no healthy reward when using context, because we optimize a negative value
- healthy_reward = 0
- else:
- height_reward = self.max_height - 0.7
- healthy_reward = self.healthy_reward * self.current_step
+ # -10 for scaling the value of the distance between the max_height and the goal height; only used when context is enabled
+ # height_reward = -10 * (np.linalg.norm(self.max_height - self.goal))
+ height_reward = -10*np.linalg.norm(self.max_height - self.goal)
+ # no healthy reward when using context, because we optimize a negative value
+ healthy_reward = 0
rewards = height_reward + healthy_reward
@@ -105,7 +103,6 @@ if __name__ == '__main__':
obs = env.reset()
for i in range(2000):
- # objective.load_result("/tmp/cma")
# test with random actions
ac = env.action_space.sample()
obs, rew, d, info = env.step(ac)
@@ -114,4 +111,4 @@ if __name__ == '__main__':
if d:
env.reset()
- env.close()
\ No newline at end of file
+ env.close()
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py
index 11c09e4..218df6d 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@@ -7,16 +7,6 @@ from gym.envs.mujoco import MujocoEnv
from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
-CUP_POS_MIN = np.array([-1.42, -4.05])
-CUP_POS_MAX = np.array([1.42, -1.25])
-
-
-# CUP_POS_MIN = np.array([-0.32, -2.2])
-# CUP_POS_MAX = np.array([0.32, -1.2])
-
-# smaller context space -> Easier task
-# CUP_POS_MIN = np.array([-0.16, -2.2])
-# CUP_POS_MAX = np.array([0.16, -1.7])
class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
def __init__(self, frame_skip=2, apply_gravity_comp=True):
@@ -27,10 +17,16 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
self.cup_goal_pos = np.array(cup_goal_pos)
self._steps = 0
+ # Small Context -> Easier. Todo: Should we do different versions?
# self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
# "beerpong_wo_cup" + ".xml")
+ # self.cup_pos_min = np.array([-0.32, -2.2])
+ # self.cup_pos_max = np.array([0.32, -1.2])
+
self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
"beerpong_wo_cup_big_table" + ".xml")
+ self.cup_pos_min = np.array([-1.42, -4.05])
+ self.cup_pos_max = np.array([1.42, -1.25])
self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7])
self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
@@ -49,9 +45,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
self.cup_table_id = 10
self.add_noise = False
-
- reward_function = BeerPongReward
- self.reward_function = reward_function()
+ self.reward_function = BeerPongReward()
self.repeat_action = frame_skip
MujocoEnv.__init__(self, self.xml_path, frame_skip=1)
utils.EzPickle.__init__(self)
@@ -78,10 +72,11 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
start_pos = init_pos_all
start_pos[0:7] = init_pos_robot
+ # TODO: Ask Max why we need to set the state twice.
self.set_state(start_pos, init_vel)
start_pos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
self.set_state(start_pos, init_vel)
- xy = self.np_random.uniform(CUP_POS_MIN, CUP_POS_MAX)
+ xy = self.np_random.uniform(self.cup_pos_min, self.cup_pos_max)
xyz = np.zeros(3)
xyz[:2] = xy
xyz[-1] = 0.840
@@ -89,9 +84,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
return self._get_obs()
def step(self, a):
- reward_dist = 0.0
- angular_vel = 0.0
-
+ crash = False
for _ in range(self.repeat_action):
if self.apply_gravity_comp:
applied_action = a + self.sim.data.qfrc_bias[:len(a)].copy() / self.model.actuator_gear[:, 0]
@@ -100,7 +93,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
try:
self.do_simulation(applied_action, self.frame_skip)
self.reward_function.initialize(self)
- self.reward_function.check_contacts(self.sim)
+ # self.reward_function.check_contacts(self.sim) # I assume this is not important?
if self._steps < self.release_step:
self.sim.data.qpos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
self.sim.data.qvel[7::] = self.sim.data.site_xvelp[self.ball_site_id, :].copy()
@@ -112,34 +105,19 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
if not crash:
reward, reward_infos = self.reward_function.compute_reward(self, applied_action)
- success = reward_infos['success']
is_collided = reward_infos['is_collided']
- ball_pos = reward_infos['ball_pos']
- ball_vel = reward_infos['ball_vel']
done = is_collided or self._steps == self.ep_length - 1
self._steps += 1
else:
reward = -30
- reward_infos = dict()
- success = False
- is_collided = False
done = True
- ball_pos = np.zeros(3)
- ball_vel = np.zeros(3)
+ reward_infos = {"success": False, "ball_pos": np.zeros(3), "ball_vel": np.zeros(3), "is_collided": False}
infos = dict(
- reward_dist=reward_dist,
reward=reward,
- velocity=angular_vel,
- # traj=self._q_pos,
action=a,
q_pos=self.sim.data.qpos[0:7].ravel().copy(),
- q_vel=self.sim.data.qvel[0:7].ravel().copy(),
- ball_pos=ball_pos,
- ball_vel=ball_vel,
- success=success,
- is_collided=is_collided, sim_crash=crash,
- table_contact_first=int(not self.reward_function.ball_ground_contact_first)
+ q_vel=self.sim.data.qvel[0:7].ravel().copy(), sim_crash=crash,
)
infos.update(reward_infos)
return ob, reward, done, infos
@@ -239,14 +217,7 @@ if __name__ == "__main__":
env.reset()
env.render("human")
for i in range(1500):
- # ac = 10 * env.action_space.sample()
- ac = np.ones(7)
- # ac = np.zeros(7)
- # ac[0] = 0
- # ac[1] = -0.01
- # ac[3] = -0.01
- # if env._steps > 150:
- # ac[0] = 1
+ ac = 10 * env.action_space.sample()
obs, rew, d, info = env.step(ac)
env.render("human")
print(env.dt)
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
index cd2ab75..807fb19 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
@@ -21,12 +21,9 @@ class BeerPongReward:
self.cup_collision_objects = ["cup_geom_table3", "cup_geom_table4", "cup_geom_table5", "cup_geom_table6",
"cup_geom_table7", "cup_geom_table8", "cup_geom_table9", "cup_geom_table10",
- # "cup_base_table", "cup_base_table_contact",
"cup_geom_table15",
"cup_geom_table16",
"cup_geom_table17", "cup_geom1_table8",
- # "cup_base_table_contact",
- # "cup_base_table"
]
self.dists = None
@@ -39,7 +36,7 @@ class BeerPongReward:
self.ball_in_cup = False
self.dist_ground_cup = -1 # distance floor to cup if first floor contact
- ### IDs
+ # IDs
self.ball_collision_id = None
self.table_collision_id = None
self.wall_collision_id = None
@@ -96,10 +93,10 @@ class BeerPongReward:
self.action_costs.append(np.copy(action_cost))
# # ##################### Reward function which does not force to bounce once on the table (quad dist) #########
- # Comment Onur: Is this needed?
+ # Is this needed?
# self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
- if env._steps == env.ep_length - 1:# or self._is_collided:
+ if env._steps == env.ep_length - 1: # or self._is_collided:
min_dist = np.min(self.dists)
final_dist = self.dists_final[-1]
if self.ball_ground_contact_first:
@@ -128,9 +125,10 @@ class BeerPongReward:
reward = - action_cost
success = False
# ##############################################################################################################
- infos = {"success": success, "ball_pos": ball_pos.copy(),
- "ball_vel": ball_vel.copy(), "action_cost": action_cost, "task_reward": reward, "is_collided": False} # TODO: Check if is collided is needed
-
+ infos = {"success": success, "ball_pos": ball_pos.copy(),
+ "ball_vel": ball_vel.copy(), "action_cost": action_cost, "task_reward": reward,
+ "table_contact_first": int(not self.ball_ground_contact_first),
+ "is_collided": False} # TODO: Check if is collided is needed
return reward, infos
def check_contacts(self, sim):
diff --git a/alr_envs/alr/mujoco/walker_2d_jump/assets/walker2d.xml b/alr_envs/alr/mujoco/walker_2d_jump/assets/walker2d.xml
index 1b48e36..f3bcbd1 100644
--- a/alr_envs/alr/mujoco/walker_2d_jump/assets/walker2d.xml
+++ b/alr_envs/alr/mujoco/walker_2d_jump/assets/walker2d.xml
@@ -21,6 +21,7 @@
+
@@ -34,6 +35,7 @@
+
@@ -59,4 +61,4 @@
-
\ No newline at end of file
+
diff --git a/alr_envs/alr/mujoco/walker_2d_jump/walker_2d_jump.py b/alr_envs/alr/mujoco/walker_2d_jump/walker_2d_jump.py
index 009dd9d..1ab0d29 100644
--- a/alr_envs/alr/mujoco/walker_2d_jump/walker_2d_jump.py
+++ b/alr_envs/alr/mujoco/walker_2d_jump/walker_2d_jump.py
@@ -4,6 +4,10 @@ import numpy as np
MAX_EPISODE_STEPS_WALKERJUMP = 300
+# TODO: Right now this environment only considers jumping to a specific height, which is not nice. It should be extended
+# to the same structure as the Hopper, where the angles are randomized (->contexts) and the agent should jump as height
+# as possible, while landing at a specific target position
+
class ALRWalker2dJumpEnv(Walker2dEnv):
"""
@@ -21,14 +25,12 @@ class ALRWalker2dJumpEnv(Walker2dEnv):
healthy_angle_range=(-1.0, 1.0),
reset_noise_scale=5e-3,
penalty=0,
- context=True,
exclude_current_positions_from_observation=True,
max_episode_steps=300):
self.current_step = 0
self.max_episode_steps = max_episode_steps
self.max_height = 0
self._penalty = penalty
- self.context = context
self.goal = 0
xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
@@ -43,31 +45,24 @@ class ALRWalker2dJumpEnv(Walker2dEnv):
self.max_height = max(height, self.max_height)
- fell_over = height < 0.2
- done = fell_over
+ done = height < 0.2
ctrl_cost = self.control_cost(action)
costs = ctrl_cost
-
+ rewards = 0
if self.current_step >= self.max_episode_steps or done:
done = True
- height_goal_distance = -10 * (np.linalg.norm(self.max_height - self.goal)) if self.context else self.max_height
+ height_goal_distance = -10 * (np.linalg.norm(self.max_height - self.goal))
healthy_reward = self.healthy_reward * self.current_step
rewards = height_goal_distance + healthy_reward
- else:
- # penalty not needed
- rewards = 0
- rewards += ((action[:2] > 0) * self._penalty).sum() if self.current_step < 4 else 0
- rewards += ((action[3:5] > 0) * self._penalty).sum() if self.current_step < 4 else 0
-
observation = self._get_obs()
reward = rewards - costs
info = {
'height': height,
'max_height': self.max_height,
- 'goal' : self.goal,
+ 'goal': self.goal,
}
return observation, reward, done, info
@@ -78,7 +73,7 @@ class ALRWalker2dJumpEnv(Walker2dEnv):
def reset(self):
self.current_step = 0
self.max_height = 0
- self.goal = np.random.uniform(1.5, 2.5, 1) # 1.5 3.0
+ self.goal = np.random.uniform(1.5, 2.5, 1) # 1.5 3.0
return super().reset()
# overwrite reset_model to make it deterministic
@@ -99,8 +94,7 @@ if __name__ == '__main__':
env = ALRWalker2dJumpEnv()
obs = env.reset()
- for i in range(2000):
- # objective.load_result("/tmp/cma")
+ for i in range(6000):
# test with random actions
ac = env.action_space.sample()
obs, rew, d, info = env.step(ac)
@@ -110,4 +104,4 @@ if __name__ == '__main__':
print('After ', i, ' steps, done: ', d)
env.reset()
- env.close()
\ No newline at end of file
+ env.close()
diff --git a/alr_envs/utils/make_env_helpers.py b/alr_envs/utils/make_env_helpers.py
index b5587a7..57b4c69 100644
--- a/alr_envs/utils/make_env_helpers.py
+++ b/alr_envs/utils/make_env_helpers.py
@@ -7,12 +7,12 @@ import numpy as np
from gym.envs.registration import EnvSpec, registry
from gym.wrappers import TimeAwareObservation
-from alr_envs.mp.basis_generator_factory import get_basis_generator
-from alr_envs.mp.black_box_wrapper import BlackBoxWrapper
-from alr_envs.mp.controllers.controller_factory import get_controller
-from alr_envs.mp.mp_factory import get_trajectory_generator
-from alr_envs.mp.phase_generator_factory import get_phase_generator
-from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.black_box.black_box_wrapper import BlackBoxWrapper
+from alr_envs.black_box.controller.controller_factory import get_controller
+from alr_envs.black_box.factory.basis_generator_factory import get_basis_generator
+from alr_envs.black_box.factory.phase_generator_factory import get_phase_generator
+from alr_envs.black_box.factory.trajectory_generator_factory import get_trajectory_generator
+from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
from alr_envs.utils.utils import nested_update
From 2161cfd3a6e0c0d52cc4f7cf789653a4abdfd95a Mon Sep 17 00:00:00 2001
From: Onur
Date: Fri, 1 Jul 2022 11:42:42 +0200
Subject: [PATCH 055/104] Fix bugs to create mp environments. Still conflicts
with mp_pytorch_lib
---
alr_envs/alr/__init__.py | 2 +-
alr_envs/alr/classic_control/viapoint_reacher/__init__.py | 2 +-
alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py | 5 +++--
alr_envs/black_box/black_box_wrapper.py | 5 ++---
alr_envs/black_box/raw_interface_wrapper.py | 5 +++--
alr_envs/utils/make_env_helpers.py | 7 ++++---
6 files changed, 14 insertions(+), 12 deletions(-)
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 459a0e6..53828ed 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -428,7 +428,7 @@ for _v in _versions:
kwargs_dict_bp_promp['name'] = f"alr_envs:{_v}"
register(
id=_env_id,
- entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
+ entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_bp_promp
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
diff --git a/alr_envs/alr/classic_control/viapoint_reacher/__init__.py b/alr_envs/alr/classic_control/viapoint_reacher/__init__.py
index a919c3a..c5e6d2f 100644
--- a/alr_envs/alr/classic_control/viapoint_reacher/__init__.py
+++ b/alr_envs/alr/classic_control/viapoint_reacher/__init__.py
@@ -1 +1 @@
-from new_mp_wrapper import MPWrapper
+from .mp_wrapper import MPWrapper
diff --git a/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py b/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
index bd22442..e447a49 100644
--- a/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
@@ -25,8 +25,9 @@ class MPWrapper(RawInterfaceWrapper):
[False] # env steps
])
- def _episode_callback(self, action: np.ndarray) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
- if self.mp.learn_tau:
+ # TODO: Fix this
+ def _episode_callback(self, action: np.ndarray, mp) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
+ if mp.learn_tau:
self.env.env.release_step = action[0] / self.env.dt # Tau value
return action, None
else:
diff --git a/alr_envs/black_box/black_box_wrapper.py b/alr_envs/black_box/black_box_wrapper.py
index 0c10ef8..43cc4f0 100644
--- a/alr_envs/black_box/black_box_wrapper.py
+++ b/alr_envs/black_box/black_box_wrapper.py
@@ -1,4 +1,3 @@
-from abc import ABC
from typing import Tuple, Union
import gym
@@ -80,7 +79,7 @@ class BlackBoxWrapper(gym.ObservationWrapper):
bc_time=np.zeros((1,)) if not self.replanning_schedule else self.current_traj_steps * self.dt,
bc_pos=self.current_pos, bc_vel=self.current_vel)
# TODO: is this correct for replanning? Do we need to adjust anything here?
- self.traj_gen.set_duration(None if self.learn_sub_trajectories else self.duration, np.array([self.dt]))
+ self.traj_gen.set_duration(None if self.learn_sub_trajectories else np.array([self.duration]), np.array([self.dt]))
traj_dict = self.traj_gen.get_trajs(get_pos=True, get_vel=True)
trajectory_tensor, velocity_tensor = traj_dict['pos'], traj_dict['vel']
@@ -109,7 +108,7 @@ class BlackBoxWrapper(gym.ObservationWrapper):
""" This function generates a trajectory based on a MP and then does the usual loop over reset and step"""
# agent to learn when to release the ball
- mp_params, env_spec_params = self._episode_callback(action)
+ mp_params, env_spec_params = self.env._episode_callback(action, self.traj_gen)
trajectory, velocity = self.get_trajectory(mp_params)
trajectory_length = len(trajectory)
diff --git a/alr_envs/black_box/raw_interface_wrapper.py b/alr_envs/black_box/raw_interface_wrapper.py
index fdbc2f7..becbfe3 100644
--- a/alr_envs/black_box/raw_interface_wrapper.py
+++ b/alr_envs/black_box/raw_interface_wrapper.py
@@ -1,8 +1,9 @@
from typing import Union, Tuple
+from mp_pytorch.mp.mp_interfaces import MPInterface
+from abc import abstractmethod
import gym
import numpy as np
-from abc import abstractmethod
class RawInterfaceWrapper(gym.Wrapper):
@@ -56,7 +57,7 @@ class RawInterfaceWrapper(gym.Wrapper):
# return bool(self.replanning_model(s))
return False
- def _episode_callback(self, action: np.ndarray) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
+ def _episode_callback(self, action: np.ndarray, traj_gen: MPInterface) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
"""
Used to extract the parameters for the motion primitive and other parameters from an action array which might
include other actions like ball releasing time for the beer pong environment.
diff --git a/alr_envs/utils/make_env_helpers.py b/alr_envs/utils/make_env_helpers.py
index 57b4c69..0051546 100644
--- a/alr_envs/utils/make_env_helpers.py
+++ b/alr_envs/utils/make_env_helpers.py
@@ -43,10 +43,11 @@ def make_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwa
def make(env_id, seed, **kwargs):
- spec = registry.get(env_id)
+ # spec = registry.get(env_id) # TODO: This doesn't work with gym ==0.21.0
+ spec = registry.spec(env_id)
# This access is required to allow for nested dict updates
all_kwargs = deepcopy(spec._kwargs)
- nested_update(all_kwargs, **kwargs)
+ nested_update(all_kwargs, kwargs)
return _make(env_id, seed, **all_kwargs)
@@ -224,7 +225,7 @@ def make_bb_env_helper(**kwargs):
seed = kwargs.pop("seed", None)
wrappers = kwargs.pop("wrappers")
- traj_gen_kwargs = kwargs.pop("traj_gen_kwargs", {})
+ traj_gen_kwargs = kwargs.pop("trajectory_generator_kwargs", {})
black_box_kwargs = kwargs.pop('black_box_kwargs', {})
contr_kwargs = kwargs.pop("controller_kwargs", {})
phase_kwargs = kwargs.pop("phase_generator_kwargs", {})
From d80df03145a30a3727fb41ce45826ffb546a4001 Mon Sep 17 00:00:00 2001
From: Onur
Date: Mon, 4 Jul 2022 11:29:51 +0200
Subject: [PATCH 056/104] minor changes at beerpong during call with Fabian
---
alr_envs/alr/mujoco/__init__.py | 2 +-
alr_envs/alr/mujoco/beerpong/beerpong.py | 42 +++++++------------
.../mujoco/beerpong/beerpong_reward_staged.py | 2 +-
3 files changed, 17 insertions(+), 29 deletions(-)
diff --git a/alr_envs/alr/mujoco/__init__.py b/alr_envs/alr/mujoco/__init__.py
index 359f97b..ac10cda 100644
--- a/alr_envs/alr/mujoco/__init__.py
+++ b/alr_envs/alr/mujoco/__init__.py
@@ -1,7 +1,7 @@
from .ant_jump.ant_jump import ALRAntJumpEnv
from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
-from .beerpong.beerpong import ALRBeerBongEnv
+from .beerpong.beerpong import BeerPongEnv
from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv
from .hopper_throw.hopper_throw import ALRHopperThrowEnv
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py
index 218df6d..5415073 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@@ -8,14 +8,9 @@ from gym.envs.mujoco import MujocoEnv
from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
-class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
+class BeerPongEnv(MujocoEnv, utils.EzPickle):
+ # TODO: always use gravity compensation
def __init__(self, frame_skip=2, apply_gravity_comp=True):
-
- cup_goal_pos = np.array([-0.3, -1.2, 0.840])
- if cup_goal_pos.shape[0] == 2:
- cup_goal_pos = np.insert(cup_goal_pos, 2, 0.840)
- self.cup_goal_pos = np.array(cup_goal_pos)
-
self._steps = 0
# Small Context -> Easier. Todo: Should we do different versions?
# self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
@@ -25,26 +20,22 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
"beerpong_wo_cup_big_table" + ".xml")
- self.cup_pos_min = np.array([-1.42, -4.05])
- self.cup_pos_max = np.array([1.42, -1.25])
-
- self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7])
- self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
+ self._cup_pos_min = np.array([-1.42, -4.05])
+ self._cup_pos_max = np.array([1.42, -1.25])
self.apply_gravity_comp = apply_gravity_comp
self._start_pos = np.array([0.0, 1.35, 0.0, 1.18, 0.0, -0.786, -1.59])
self._start_vel = np.zeros(7)
+ # TODO: check if we need to define that in the constructor?
self.ball_site_id = 0
self.ball_id = 11
-
- self.release_step = 100 # time step of ball release
-
- self.ep_length = 600 // frame_skip
self.cup_table_id = 10
- self.add_noise = False
+ self.release_step = 100 # time step of ball release
+ self.ep_length = 600 // frame_skip
+
self.reward_function = BeerPongReward()
self.repeat_action = frame_skip
MujocoEnv.__init__(self, self.xml_path, frame_skip=1)
@@ -76,7 +67,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
self.set_state(start_pos, init_vel)
start_pos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
self.set_state(start_pos, init_vel)
- xy = self.np_random.uniform(self.cup_pos_min, self.cup_pos_max)
+ xy = self.np_random.uniform(self._cup_pos_min, self._cup_pos_max)
xyz = np.zeros(3)
xyz[:2] = xy
xyz[-1] = 0.840
@@ -122,9 +113,6 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
infos.update(reward_infos)
return ob, reward, done, infos
- # def _check_traj_in_joint_limits(self):
- # return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)
-
def _get_obs(self):
theta = self.sim.data.qpos.flat[:7]
theta_dot = self.sim.data.qvel.flat[:7]
@@ -143,28 +131,28 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
@property
def dt(self):
- return super(ALRBeerBongEnv, self).dt * self.repeat_action
+ return super(BeerPongEnv, self).dt * self.repeat_action
-class ALRBeerBongEnvFixedReleaseStep(ALRBeerBongEnv):
+class BeerPongEnvFixedReleaseStep(BeerPongEnv):
def __init__(self, frame_skip=2, apply_gravity_comp=True):
super().__init__(frame_skip, apply_gravity_comp)
self.release_step = 62 # empirically evaluated for frame_skip=2!
-class ALRBeerBongEnvStepBasedEpisodicReward(ALRBeerBongEnv):
+class BeerPongEnvStepBasedEpisodicReward(BeerPongEnv):
def __init__(self, frame_skip=2, apply_gravity_comp=True):
super().__init__(frame_skip, apply_gravity_comp)
self.release_step = 62 # empirically evaluated for frame_skip=2!
def step(self, a):
if self._steps < self.release_step:
- return super(ALRBeerBongEnvStepBasedEpisodicReward, self).step(a)
+ return super(BeerPongEnvStepBasedEpisodicReward, self).step(a)
else:
reward = 0
done = False
while not done:
- sub_ob, sub_reward, done, sub_infos = super(ALRBeerBongEnvStepBasedEpisodicReward, self).step(
+ sub_ob, sub_reward, done, sub_infos = super(BeerPongEnvStepBasedEpisodicReward, self).step(
np.zeros(a.shape))
reward += sub_reward
infos = sub_infos
@@ -208,7 +196,7 @@ class ALRBeerBongEnvStepBasedEpisodicReward(ALRBeerBongEnv):
if __name__ == "__main__":
- env = ALRBeerBongEnv(frame_skip=2)
+ env = BeerPongEnv(frame_skip=2)
# env = ALRBeerBongEnvStepBased(frame_skip=2)
# env = ALRBeerBongEnvStepBasedEpisodicReward(frame_skip=2)
# env = ALRBeerBongEnvFixedReleaseStep(frame_skip=2)
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
index 807fb19..4308921 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
@@ -73,7 +73,7 @@ class BeerPongReward:
# self.cup_table_collision_id = env.model.geom_name2id("cup_base_table_contact")
self.ground_collision_id = {env.model.geom_name2id("ground")}
# self.ground_collision_id = env.model.geom_name2id("ground")
- self.cup_collision_ids = set(env.model.geom_name2id(name) for name in self.cup_collision_objects)
+ self.cup_collision_ids = {env.model.geom_name2id(name) for name in self.cup_collision_objects}
# self.cup_collision_ids = [env.model.geom_name2id(name) for name in self.cup_collision_objects]
self.robot_collision_ids = [env.model.geom_name2id(name) for name in self.robot_collision_objects]
From 4dc33b0e976505f3adfb25c11a9e04d5cc7eb8a5 Mon Sep 17 00:00:00 2001
From: Onur
Date: Mon, 4 Jul 2022 19:14:31 +0200
Subject: [PATCH 057/104] slim down beerpong constructor further. Not sure, if
we should merge the reward into the environment class.
---
.../beerpong/assets/beerpong_wo_cup.xml | 2 +-
.../assets/beerpong_wo_cup_big_table.xml | 2 +-
alr_envs/alr/mujoco/beerpong/beerpong.py | 43 ++++++++-----------
alr_envs/utils/utils.py | 1 +
4 files changed, 21 insertions(+), 27 deletions(-)
diff --git a/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml b/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml
index e96d2bc..6786ecf 100644
--- a/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml
+++ b/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml
@@ -123,7 +123,7 @@
-
+