beerpong.py done flag fixed
This commit is contained in:
parent
8d1c1b44bf
commit
c96802564e
@ -1,4 +1,4 @@
|
||||
## Fancy Gym
|
||||
# Fancy Gym
|
||||
|
||||
Fancy gym offers a large variety of reinforcement learning environments under the unifying interface
|
||||
of [OpenAI gym](https://gym.openai.com/). We provide support (under the OpenAI interface) for the benchmark suites
|
||||
@ -65,7 +65,7 @@ We prepared [multiple examples](fancy_gym/examples/), please have a look there f
|
||||
```python
|
||||
import fancy_gym
|
||||
|
||||
env = fancy_gym.make('HoleReacher-v0', seed=1)
|
||||
env = fancy_gym.make('Reacher5d-v0', seed=1)
|
||||
state = env.reset()
|
||||
|
||||
for i in range(1000):
|
||||
@ -106,7 +106,7 @@ keys `DMP` and `ProMP` that store a list of available environment names.
|
||||
import fancy_gym
|
||||
|
||||
print("Custom MP tasks:")
|
||||
print(fancy_gym.ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS)
|
||||
print(fancy_gym.ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS)
|
||||
|
||||
print("OpenAI Gym MP tasks:")
|
||||
print(fancy_gym.ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS)
|
||||
|
@ -2,7 +2,7 @@ from fancy_gym import dmc, meta, open_ai
|
||||
from fancy_gym.utils.make_env_helpers import make, make_bb, make_rank
|
||||
from .dmc import ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS
|
||||
# Convenience function for all MP environments
|
||||
from .envs import ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS
|
||||
from .envs import ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS
|
||||
from .meta import ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS
|
||||
from .open_ai import ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS
|
||||
|
||||
@ -10,4 +10,4 @@ ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {
|
||||
key: value + ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] +
|
||||
ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] +
|
||||
ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key]
|
||||
for key, value in ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items()}
|
||||
for key, value in ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items()}
|
||||
|
@ -17,7 +17,7 @@ from .mujoco.hopper_throw.hopper_throw_in_basket import MAX_EPISODE_STEPS_HOPPER
|
||||
from .mujoco.reacher.reacher import ReacherEnv, MAX_EPISODE_STEPS_REACHER
|
||||
from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP
|
||||
|
||||
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
|
||||
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
|
||||
|
||||
DEFAULT_BB_DICT_ProMP = {
|
||||
"name": 'EnvName',
|
||||
@ -231,7 +231,7 @@ for _v in _versions:
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_simple_reacher_dmp
|
||||
)
|
||||
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
|
||||
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
|
||||
|
||||
_env_id = f'{_name[0]}ProMP-{_name[1]}'
|
||||
kwargs_dict_simple_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
|
||||
@ -244,7 +244,7 @@ for _v in _versions:
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_simple_reacher_promp
|
||||
)
|
||||
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
|
||||
# Viapoint reacher
|
||||
kwargs_dict_via_point_reacher_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
|
||||
@ -259,7 +259,7 @@ register(
|
||||
# max_episode_steps=1,
|
||||
kwargs=kwargs_dict_via_point_reacher_dmp
|
||||
)
|
||||
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("ViaPointReacherDMP-v0")
|
||||
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("ViaPointReacherDMP-v0")
|
||||
|
||||
kwargs_dict_via_point_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
|
||||
kwargs_dict_via_point_reacher_promp['wrappers'].append(classic_control.viapoint_reacher.MPWrapper)
|
||||
@ -270,7 +270,7 @@ register(
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_via_point_reacher_promp
|
||||
)
|
||||
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ViaPointReacherProMP-v0")
|
||||
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ViaPointReacherProMP-v0")
|
||||
|
||||
## Hole Reacher
|
||||
_versions = ["HoleReacher-v0"]
|
||||
@ -290,7 +290,7 @@ for _v in _versions:
|
||||
# max_episode_steps=1,
|
||||
kwargs=kwargs_dict_hole_reacher_dmp
|
||||
)
|
||||
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
|
||||
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
|
||||
|
||||
_env_id = f'{_name[0]}ProMP-{_name[1]}'
|
||||
kwargs_dict_hole_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
|
||||
@ -303,7 +303,7 @@ for _v in _versions:
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_hole_reacher_promp
|
||||
)
|
||||
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
|
||||
## ReacherNd
|
||||
_versions = ["Reacher5d-v0", "Reacher7d-v0", "Reacher5dSparse-v0", "Reacher7dSparse-v0"]
|
||||
@ -320,7 +320,7 @@ for _v in _versions:
|
||||
# max_episode_steps=1,
|
||||
kwargs=kwargs_dict_reacher_dmp
|
||||
)
|
||||
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
|
||||
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
|
||||
|
||||
_env_id = f'{_name[0]}ProMP-{_name[1]}'
|
||||
kwargs_dict_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
|
||||
@ -331,7 +331,7 @@ for _v in _versions:
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_reacher_promp
|
||||
)
|
||||
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
|
||||
########################################################################################################################
|
||||
## Beerpong ProMP
|
||||
@ -352,7 +352,7 @@ for _v in _versions:
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_bp_promp
|
||||
)
|
||||
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
|
||||
### BP with Fixed release
|
||||
_versions = ["BeerPongStepBased-v0", "BeerPongFixedRelease-v0"]
|
||||
@ -372,7 +372,7 @@ for _v in _versions:
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_bp_promp
|
||||
)
|
||||
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
########################################################################################################################
|
||||
|
||||
## Table Tennis needs to be fixed according to Zhou's implementation
|
||||
@ -393,7 +393,7 @@ for _v in _versions:
|
||||
# entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
# kwargs=kwargs_dict_ant_jump_promp
|
||||
# )
|
||||
# ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
# ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
#
|
||||
# ########################################################################################################################
|
||||
#
|
||||
@ -410,7 +410,7 @@ for _v in _versions:
|
||||
# entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
# kwargs=kwargs_dict_halfcheetah_jump_promp
|
||||
# )
|
||||
# ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
# ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
#
|
||||
# ########################################################################################################################
|
||||
|
||||
@ -431,7 +431,7 @@ for _v in _versions:
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_hopper_jump_promp
|
||||
)
|
||||
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
|
||||
# ########################################################################################################################
|
||||
#
|
||||
@ -449,7 +449,7 @@ for _v in _versions:
|
||||
# entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
# kwargs=kwargs_dict_walker2d_jump_promp
|
||||
# )
|
||||
# ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
# ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
|
||||
### Depricated, we will not provide non random starts anymore
|
||||
"""
|
||||
|
@ -6,6 +6,7 @@ from gym import utils
|
||||
from gym.envs.mujoco import MujocoEnv
|
||||
|
||||
MAX_EPISODE_STEPS_BEERPONG = 300
|
||||
FIXED_RELEASE_STEP = 62 # empirically evaluated for frame_skip=2!
|
||||
|
||||
# XML Variables
|
||||
ROBOT_COLLISION_OBJ = ["wrist_palm_link_convex_geom",
|
||||
@ -44,7 +45,7 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
|
||||
self._start_pos = np.array([0.0, 1.35, 0.0, 1.18, 0.0, -0.786, -1.59])
|
||||
self._start_vel = np.zeros(7)
|
||||
|
||||
self.release_step = 100 # time step of ball release
|
||||
self.release_step = FIXED_RELEASE_STEP
|
||||
|
||||
self.repeat_action = 2
|
||||
# TODO: If accessing IDs is easier in the (new) official mujoco bindings, remove this
|
||||
@ -250,86 +251,16 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
|
||||
return False
|
||||
|
||||
|
||||
class BeerPongEnvFixedReleaseStep(BeerPongEnv):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.release_step = 62 # empirically evaluated for frame_skip=2!
|
||||
|
||||
|
||||
class BeerPongEnvStepBasedEpisodicReward(BeerPongEnv):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.release_step = 62 # empirically evaluated for frame_skip=2!
|
||||
|
||||
def step(self, a):
|
||||
if self._steps < self.release_step:
|
||||
if self._steps < FIXED_RELEASE_STEP:
|
||||
return super(BeerPongEnvStepBasedEpisodicReward, self).step(a)
|
||||
else:
|
||||
reward = 0
|
||||
done = False
|
||||
while not done:
|
||||
sub_ob, sub_reward, done, sub_infos = super(BeerPongEnvStepBasedEpisodicReward, self).step(
|
||||
done = True
|
||||
while self._steps < MAX_EPISODE_STEPS_BEERPONG:
|
||||
obs, sub_reward, done, infos = super(BeerPongEnvStepBasedEpisodicReward, self).step(
|
||||
np.zeros(a.shape))
|
||||
reward += sub_reward
|
||||
infos = sub_infos
|
||||
ob = sub_ob
|
||||
ob[-1] = self.release_step + 1 # Since we simulate until the end of the episode, PPO does not see the
|
||||
# internal steps and thus, the observation also needs to be set correctly
|
||||
return ob, reward, done, infos
|
||||
|
||||
|
||||
# class BeerBongEnvStepBased(BeerBongEnv):
|
||||
# def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
|
||||
# super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
|
||||
# self.release_step = 62 # empirically evaluated for frame_skip=2!
|
||||
#
|
||||
# def step(self, a):
|
||||
# if self._steps < self.release_step:
|
||||
# return super(BeerBongEnvStepBased, self).step(a)
|
||||
# else:
|
||||
# reward = 0
|
||||
# done = False
|
||||
# while not done:
|
||||
# sub_ob, sub_reward, done, sub_infos = super(BeerBongEnvStepBased, self).step(np.zeros(a.shape))
|
||||
# if not done or sub_infos['sim_crash']:
|
||||
# reward += sub_reward
|
||||
# else:
|
||||
# ball_pos = self.sim.data.body_xpos[self.sim.model._body_name2id["ball"]].copy()
|
||||
# cup_goal_dist_final = np.linalg.norm(ball_pos - self.sim.data.site_xpos[
|
||||
# self.sim.model._site_name2id["cup_goal_final_table"]].copy())
|
||||
# cup_goal_dist_top = np.linalg.norm(ball_pos - self.sim.data.site_xpos[
|
||||
# self.sim.model._site_name2id["cup_goal_table"]].copy())
|
||||
# if sub_infos['success']:
|
||||
# dist_rew = -cup_goal_dist_final ** 2
|
||||
# else:
|
||||
# dist_rew = -0.5 * cup_goal_dist_final ** 2 - cup_goal_dist_top ** 2
|
||||
# reward = reward - sub_infos['action_cost'] + dist_rew
|
||||
# infos = sub_infos
|
||||
# ob = sub_ob
|
||||
# ob[-1] = self.release_step + 1 # Since we simulate until the end of the episode, PPO does not see the
|
||||
# # internal steps and thus, the observation also needs to be set correctly
|
||||
# return ob, reward, done, infos
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
env = BeerPongEnv()
|
||||
env.seed(0)
|
||||
# env = BeerBongEnvStepBased(frame_skip=2)
|
||||
# env = BeerBongEnvStepBasedEpisodicReward(frame_skip=2)
|
||||
# env = BeerBongEnvFixedReleaseStep(frame_skip=2)
|
||||
import time
|
||||
|
||||
env.reset()
|
||||
env.render("human")
|
||||
for i in range(600):
|
||||
# ac = 10 * env.action_space.sample()
|
||||
ac = 0.05 * np.ones(7)
|
||||
obs, rew, d, info = env.step(ac)
|
||||
env.render("human")
|
||||
|
||||
if d:
|
||||
print('reward:', rew)
|
||||
print('RESETTING')
|
||||
env.reset()
|
||||
time.sleep(1)
|
||||
env.close()
|
||||
return obs, reward, done, infos
|
||||
|
@ -31,9 +31,7 @@ class MPWrapper(RawInterfaceWrapper):
|
||||
def episode_callback(self, action: np.ndarray, mp) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
|
||||
if mp.learn_tau:
|
||||
self.release_step = action[0] / self.dt # Tau value
|
||||
return action, None
|
||||
else:
|
||||
return action, None
|
||||
return action, None
|
||||
|
||||
def set_context(self, context):
|
||||
xyz = np.zeros(3)
|
||||
|
@ -156,8 +156,6 @@ class HopperJumpEnv(HopperEnv):
|
||||
self.init_floor_contact = False
|
||||
self.contact_dist = None
|
||||
|
||||
self.data.geom()
|
||||
|
||||
return observation
|
||||
|
||||
def _is_floor_foot_contact(self):
|
||||
|
@ -101,7 +101,7 @@ class TestCustomEnvironments(unittest.TestCase):
|
||||
|
||||
def test_bb_functionality(self):
|
||||
"""Tests that black box environments run without errors using random actions."""
|
||||
for traj_gen, env_ids in fancy_gym.ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items():
|
||||
for traj_gen, env_ids in fancy_gym.ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items():
|
||||
with self.subTest(msg=traj_gen):
|
||||
for id in env_ids:
|
||||
with self.subTest(msg=id):
|
||||
@ -109,7 +109,7 @@ class TestCustomEnvironments(unittest.TestCase):
|
||||
|
||||
def test_bb_determinism(self):
|
||||
"""Tests that for black box environment identical seeds produce identical trajectories."""
|
||||
for traj_gen, env_ids in fancy_gym.ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items():
|
||||
for traj_gen, env_ids in fancy_gym.ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items():
|
||||
with self.subTest(msg=traj_gen):
|
||||
self._run_env_determinism(env_ids)
|
||||
|
Loading…
Reference in New Issue
Block a user