beerpong.py done flag fixed

This commit is contained in:
Fabian 2022-07-13 16:01:48 +02:00
parent 8d1c1b44bf
commit c96802564e
7 changed files with 30 additions and 103 deletions

View File

@ -1,4 +1,4 @@
## Fancy Gym # Fancy Gym
Fancy gym offers a large variety of reinforcement learning environments under the unifying interface Fancy gym offers a large variety of reinforcement learning environments under the unifying interface
of [OpenAI gym](https://gym.openai.com/). We provide support (under the OpenAI interface) for the benchmark suites of [OpenAI gym](https://gym.openai.com/). We provide support (under the OpenAI interface) for the benchmark suites
@ -65,7 +65,7 @@ We prepared [multiple examples](fancy_gym/examples/), please have a look there f
```python ```python
import fancy_gym import fancy_gym
env = fancy_gym.make('HoleReacher-v0', seed=1) env = fancy_gym.make('Reacher5d-v0', seed=1)
state = env.reset() state = env.reset()
for i in range(1000): for i in range(1000):
@ -106,7 +106,7 @@ keys `DMP` and `ProMP` that store a list of available environment names.
import fancy_gym import fancy_gym
print("Custom MP tasks:") print("Custom MP tasks:")
print(fancy_gym.ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS) print(fancy_gym.ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS)
print("OpenAI Gym MP tasks:") print("OpenAI Gym MP tasks:")
print(fancy_gym.ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS) print(fancy_gym.ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS)

View File

@ -2,7 +2,7 @@ from fancy_gym import dmc, meta, open_ai
from fancy_gym.utils.make_env_helpers import make, make_bb, make_rank from fancy_gym.utils.make_env_helpers import make, make_bb, make_rank
from .dmc import ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS from .dmc import ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS
# Convenience function for all MP environments # Convenience function for all MP environments
from .envs import ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS from .envs import ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS
from .meta import ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS from .meta import ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS
from .open_ai import ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS from .open_ai import ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS
@ -10,4 +10,4 @@ ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {
key: value + ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] + key: value + ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] +
ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] + ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] +
ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key]
for key, value in ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items()} for key, value in ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items()}

View File

@ -17,7 +17,7 @@ from .mujoco.hopper_throw.hopper_throw_in_basket import MAX_EPISODE_STEPS_HOPPER
from .mujoco.reacher.reacher import ReacherEnv, MAX_EPISODE_STEPS_REACHER from .mujoco.reacher.reacher import ReacherEnv, MAX_EPISODE_STEPS_REACHER
from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []} ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
DEFAULT_BB_DICT_ProMP = { DEFAULT_BB_DICT_ProMP = {
"name": 'EnvName', "name": 'EnvName',
@ -231,7 +231,7 @@ for _v in _versions:
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_simple_reacher_dmp kwargs=kwargs_dict_simple_reacher_dmp
) )
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
_env_id = f'{_name[0]}ProMP-{_name[1]}' _env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_simple_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP) kwargs_dict_simple_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
@ -244,7 +244,7 @@ for _v in _versions:
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_simple_reacher_promp kwargs=kwargs_dict_simple_reacher_promp
) )
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
# Viapoint reacher # Viapoint reacher
kwargs_dict_via_point_reacher_dmp = deepcopy(DEFAULT_BB_DICT_DMP) kwargs_dict_via_point_reacher_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
@ -259,7 +259,7 @@ register(
# max_episode_steps=1, # max_episode_steps=1,
kwargs=kwargs_dict_via_point_reacher_dmp kwargs=kwargs_dict_via_point_reacher_dmp
) )
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("ViaPointReacherDMP-v0") ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("ViaPointReacherDMP-v0")
kwargs_dict_via_point_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP) kwargs_dict_via_point_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
kwargs_dict_via_point_reacher_promp['wrappers'].append(classic_control.viapoint_reacher.MPWrapper) kwargs_dict_via_point_reacher_promp['wrappers'].append(classic_control.viapoint_reacher.MPWrapper)
@ -270,7 +270,7 @@ register(
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_via_point_reacher_promp kwargs=kwargs_dict_via_point_reacher_promp
) )
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ViaPointReacherProMP-v0") ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ViaPointReacherProMP-v0")
## Hole Reacher ## Hole Reacher
_versions = ["HoleReacher-v0"] _versions = ["HoleReacher-v0"]
@ -290,7 +290,7 @@ for _v in _versions:
# max_episode_steps=1, # max_episode_steps=1,
kwargs=kwargs_dict_hole_reacher_dmp kwargs=kwargs_dict_hole_reacher_dmp
) )
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
_env_id = f'{_name[0]}ProMP-{_name[1]}' _env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_hole_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP) kwargs_dict_hole_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
@ -303,7 +303,7 @@ for _v in _versions:
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_hole_reacher_promp kwargs=kwargs_dict_hole_reacher_promp
) )
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
## ReacherNd ## ReacherNd
_versions = ["Reacher5d-v0", "Reacher7d-v0", "Reacher5dSparse-v0", "Reacher7dSparse-v0"] _versions = ["Reacher5d-v0", "Reacher7d-v0", "Reacher5dSparse-v0", "Reacher7dSparse-v0"]
@ -320,7 +320,7 @@ for _v in _versions:
# max_episode_steps=1, # max_episode_steps=1,
kwargs=kwargs_dict_reacher_dmp kwargs=kwargs_dict_reacher_dmp
) )
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
_env_id = f'{_name[0]}ProMP-{_name[1]}' _env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP) kwargs_dict_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
@ -331,7 +331,7 @@ for _v in _versions:
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_reacher_promp kwargs=kwargs_dict_reacher_promp
) )
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
######################################################################################################################## ########################################################################################################################
## Beerpong ProMP ## Beerpong ProMP
@ -352,7 +352,7 @@ for _v in _versions:
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_bp_promp kwargs=kwargs_dict_bp_promp
) )
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
### BP with Fixed release ### BP with Fixed release
_versions = ["BeerPongStepBased-v0", "BeerPongFixedRelease-v0"] _versions = ["BeerPongStepBased-v0", "BeerPongFixedRelease-v0"]
@ -372,7 +372,7 @@ for _v in _versions:
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_bp_promp kwargs=kwargs_dict_bp_promp
) )
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
######################################################################################################################## ########################################################################################################################
## Table Tennis needs to be fixed according to Zhou's implementation ## Table Tennis needs to be fixed according to Zhou's implementation
@ -393,7 +393,7 @@ for _v in _versions:
# entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', # entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
# kwargs=kwargs_dict_ant_jump_promp # kwargs=kwargs_dict_ant_jump_promp
# ) # )
# ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) # ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
# #
# ######################################################################################################################## # ########################################################################################################################
# #
@ -410,7 +410,7 @@ for _v in _versions:
# entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', # entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
# kwargs=kwargs_dict_halfcheetah_jump_promp # kwargs=kwargs_dict_halfcheetah_jump_promp
# ) # )
# ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) # ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
# #
# ######################################################################################################################## # ########################################################################################################################
@ -431,7 +431,7 @@ for _v in _versions:
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_hopper_jump_promp kwargs=kwargs_dict_hopper_jump_promp
) )
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
# ######################################################################################################################## # ########################################################################################################################
# #
@ -449,7 +449,7 @@ for _v in _versions:
# entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', # entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
# kwargs=kwargs_dict_walker2d_jump_promp # kwargs=kwargs_dict_walker2d_jump_promp
# ) # )
# ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) # ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
### Depricated, we will not provide non random starts anymore ### Depricated, we will not provide non random starts anymore
""" """

View File

@ -6,6 +6,7 @@ from gym import utils
from gym.envs.mujoco import MujocoEnv from gym.envs.mujoco import MujocoEnv
MAX_EPISODE_STEPS_BEERPONG = 300 MAX_EPISODE_STEPS_BEERPONG = 300
FIXED_RELEASE_STEP = 62 # empirically evaluated for frame_skip=2!
# XML Variables # XML Variables
ROBOT_COLLISION_OBJ = ["wrist_palm_link_convex_geom", ROBOT_COLLISION_OBJ = ["wrist_palm_link_convex_geom",
@ -44,7 +45,7 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
self._start_pos = np.array([0.0, 1.35, 0.0, 1.18, 0.0, -0.786, -1.59]) self._start_pos = np.array([0.0, 1.35, 0.0, 1.18, 0.0, -0.786, -1.59])
self._start_vel = np.zeros(7) self._start_vel = np.zeros(7)
self.release_step = 100 # time step of ball release self.release_step = FIXED_RELEASE_STEP
self.repeat_action = 2 self.repeat_action = 2
# TODO: If accessing IDs is easier in the (new) official mujoco bindings, remove this # TODO: If accessing IDs is easier in the (new) official mujoco bindings, remove this
@ -250,86 +251,16 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
return False return False
class BeerPongEnvFixedReleaseStep(BeerPongEnv):
def __init__(self):
super().__init__()
self.release_step = 62 # empirically evaluated for frame_skip=2!
class BeerPongEnvStepBasedEpisodicReward(BeerPongEnv): class BeerPongEnvStepBasedEpisodicReward(BeerPongEnv):
def __init__(self):
super().__init__()
self.release_step = 62 # empirically evaluated for frame_skip=2!
def step(self, a): def step(self, a):
if self._steps < self.release_step: if self._steps < FIXED_RELEASE_STEP:
return super(BeerPongEnvStepBasedEpisodicReward, self).step(a) return super(BeerPongEnvStepBasedEpisodicReward, self).step(a)
else: else:
reward = 0 reward = 0
done = False done = True
while not done: while self._steps < MAX_EPISODE_STEPS_BEERPONG:
sub_ob, sub_reward, done, sub_infos = super(BeerPongEnvStepBasedEpisodicReward, self).step( obs, sub_reward, done, infos = super(BeerPongEnvStepBasedEpisodicReward, self).step(
np.zeros(a.shape)) np.zeros(a.shape))
reward += sub_reward reward += sub_reward
infos = sub_infos return obs, reward, done, infos
ob = sub_ob
ob[-1] = self.release_step + 1 # Since we simulate until the end of the episode, PPO does not see the
# internal steps and thus, the observation also needs to be set correctly
return ob, reward, done, infos
# class BeerBongEnvStepBased(BeerBongEnv):
# def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
# super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
# self.release_step = 62 # empirically evaluated for frame_skip=2!
#
# def step(self, a):
# if self._steps < self.release_step:
# return super(BeerBongEnvStepBased, self).step(a)
# else:
# reward = 0
# done = False
# while not done:
# sub_ob, sub_reward, done, sub_infos = super(BeerBongEnvStepBased, self).step(np.zeros(a.shape))
# if not done or sub_infos['sim_crash']:
# reward += sub_reward
# else:
# ball_pos = self.sim.data.body_xpos[self.sim.model._body_name2id["ball"]].copy()
# cup_goal_dist_final = np.linalg.norm(ball_pos - self.sim.data.site_xpos[
# self.sim.model._site_name2id["cup_goal_final_table"]].copy())
# cup_goal_dist_top = np.linalg.norm(ball_pos - self.sim.data.site_xpos[
# self.sim.model._site_name2id["cup_goal_table"]].copy())
# if sub_infos['success']:
# dist_rew = -cup_goal_dist_final ** 2
# else:
# dist_rew = -0.5 * cup_goal_dist_final ** 2 - cup_goal_dist_top ** 2
# reward = reward - sub_infos['action_cost'] + dist_rew
# infos = sub_infos
# ob = sub_ob
# ob[-1] = self.release_step + 1 # Since we simulate until the end of the episode, PPO does not see the
# # internal steps and thus, the observation also needs to be set correctly
# return ob, reward, done, infos
if __name__ == "__main__":
env = BeerPongEnv()
env.seed(0)
# env = BeerBongEnvStepBased(frame_skip=2)
# env = BeerBongEnvStepBasedEpisodicReward(frame_skip=2)
# env = BeerBongEnvFixedReleaseStep(frame_skip=2)
import time
env.reset()
env.render("human")
for i in range(600):
# ac = 10 * env.action_space.sample()
ac = 0.05 * np.ones(7)
obs, rew, d, info = env.step(ac)
env.render("human")
if d:
print('reward:', rew)
print('RESETTING')
env.reset()
time.sleep(1)
env.close()

View File

@ -32,8 +32,6 @@ class MPWrapper(RawInterfaceWrapper):
if mp.learn_tau: if mp.learn_tau:
self.release_step = action[0] / self.dt # Tau value self.release_step = action[0] / self.dt # Tau value
return action, None return action, None
else:
return action, None
def set_context(self, context): def set_context(self, context):
xyz = np.zeros(3) xyz = np.zeros(3)

View File

@ -156,8 +156,6 @@ class HopperJumpEnv(HopperEnv):
self.init_floor_contact = False self.init_floor_contact = False
self.contact_dist = None self.contact_dist = None
self.data.geom()
return observation return observation
def _is_floor_foot_contact(self): def _is_floor_foot_contact(self):

View File

@ -101,7 +101,7 @@ class TestCustomEnvironments(unittest.TestCase):
def test_bb_functionality(self): def test_bb_functionality(self):
"""Tests that black box environments run without errors using random actions.""" """Tests that black box environments run without errors using random actions."""
for traj_gen, env_ids in fancy_gym.ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items(): for traj_gen, env_ids in fancy_gym.ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items():
with self.subTest(msg=traj_gen): with self.subTest(msg=traj_gen):
for id in env_ids: for id in env_ids:
with self.subTest(msg=id): with self.subTest(msg=id):
@ -109,7 +109,7 @@ class TestCustomEnvironments(unittest.TestCase):
def test_bb_determinism(self): def test_bb_determinism(self):
"""Tests that for black box environment identical seeds produce identical trajectories.""" """Tests that for black box environment identical seeds produce identical trajectories."""
for traj_gen, env_ids in fancy_gym.ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items(): for traj_gen, env_ids in fancy_gym.ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items():
with self.subTest(msg=traj_gen): with self.subTest(msg=traj_gen):
self._run_env_determinism(env_ids) self._run_env_determinism(env_ids)