beerpong.py done flag fixed
This commit is contained in:
parent
8d1c1b44bf
commit
c96802564e
@ -1,4 +1,4 @@
|
|||||||
## Fancy Gym
|
# Fancy Gym
|
||||||
|
|
||||||
Fancy gym offers a large variety of reinforcement learning environments under the unifying interface
|
Fancy gym offers a large variety of reinforcement learning environments under the unifying interface
|
||||||
of [OpenAI gym](https://gym.openai.com/). We provide support (under the OpenAI interface) for the benchmark suites
|
of [OpenAI gym](https://gym.openai.com/). We provide support (under the OpenAI interface) for the benchmark suites
|
||||||
@ -65,7 +65,7 @@ We prepared [multiple examples](fancy_gym/examples/), please have a look there f
|
|||||||
```python
|
```python
|
||||||
import fancy_gym
|
import fancy_gym
|
||||||
|
|
||||||
env = fancy_gym.make('HoleReacher-v0', seed=1)
|
env = fancy_gym.make('Reacher5d-v0', seed=1)
|
||||||
state = env.reset()
|
state = env.reset()
|
||||||
|
|
||||||
for i in range(1000):
|
for i in range(1000):
|
||||||
@ -106,7 +106,7 @@ keys `DMP` and `ProMP` that store a list of available environment names.
|
|||||||
import fancy_gym
|
import fancy_gym
|
||||||
|
|
||||||
print("Custom MP tasks:")
|
print("Custom MP tasks:")
|
||||||
print(fancy_gym.ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS)
|
print(fancy_gym.ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS)
|
||||||
|
|
||||||
print("OpenAI Gym MP tasks:")
|
print("OpenAI Gym MP tasks:")
|
||||||
print(fancy_gym.ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS)
|
print(fancy_gym.ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS)
|
||||||
|
@ -2,7 +2,7 @@ from fancy_gym import dmc, meta, open_ai
|
|||||||
from fancy_gym.utils.make_env_helpers import make, make_bb, make_rank
|
from fancy_gym.utils.make_env_helpers import make, make_bb, make_rank
|
||||||
from .dmc import ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS
|
from .dmc import ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS
|
||||||
# Convenience function for all MP environments
|
# Convenience function for all MP environments
|
||||||
from .envs import ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS
|
from .envs import ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS
|
||||||
from .meta import ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS
|
from .meta import ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS
|
||||||
from .open_ai import ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS
|
from .open_ai import ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS
|
||||||
|
|
||||||
@ -10,4 +10,4 @@ ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {
|
|||||||
key: value + ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] +
|
key: value + ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] +
|
||||||
ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] +
|
ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] +
|
||||||
ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key]
|
ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key]
|
||||||
for key, value in ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items()}
|
for key, value in ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items()}
|
||||||
|
@ -17,7 +17,7 @@ from .mujoco.hopper_throw.hopper_throw_in_basket import MAX_EPISODE_STEPS_HOPPER
|
|||||||
from .mujoco.reacher.reacher import ReacherEnv, MAX_EPISODE_STEPS_REACHER
|
from .mujoco.reacher.reacher import ReacherEnv, MAX_EPISODE_STEPS_REACHER
|
||||||
from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP
|
from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP
|
||||||
|
|
||||||
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
|
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
|
||||||
|
|
||||||
DEFAULT_BB_DICT_ProMP = {
|
DEFAULT_BB_DICT_ProMP = {
|
||||||
"name": 'EnvName',
|
"name": 'EnvName',
|
||||||
@ -231,7 +231,7 @@ for _v in _versions:
|
|||||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||||
kwargs=kwargs_dict_simple_reacher_dmp
|
kwargs=kwargs_dict_simple_reacher_dmp
|
||||||
)
|
)
|
||||||
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
|
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
|
||||||
|
|
||||||
_env_id = f'{_name[0]}ProMP-{_name[1]}'
|
_env_id = f'{_name[0]}ProMP-{_name[1]}'
|
||||||
kwargs_dict_simple_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
|
kwargs_dict_simple_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
|
||||||
@ -244,7 +244,7 @@ for _v in _versions:
|
|||||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||||
kwargs=kwargs_dict_simple_reacher_promp
|
kwargs=kwargs_dict_simple_reacher_promp
|
||||||
)
|
)
|
||||||
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||||
|
|
||||||
# Viapoint reacher
|
# Viapoint reacher
|
||||||
kwargs_dict_via_point_reacher_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
|
kwargs_dict_via_point_reacher_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
|
||||||
@ -259,7 +259,7 @@ register(
|
|||||||
# max_episode_steps=1,
|
# max_episode_steps=1,
|
||||||
kwargs=kwargs_dict_via_point_reacher_dmp
|
kwargs=kwargs_dict_via_point_reacher_dmp
|
||||||
)
|
)
|
||||||
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("ViaPointReacherDMP-v0")
|
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("ViaPointReacherDMP-v0")
|
||||||
|
|
||||||
kwargs_dict_via_point_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
|
kwargs_dict_via_point_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
|
||||||
kwargs_dict_via_point_reacher_promp['wrappers'].append(classic_control.viapoint_reacher.MPWrapper)
|
kwargs_dict_via_point_reacher_promp['wrappers'].append(classic_control.viapoint_reacher.MPWrapper)
|
||||||
@ -270,7 +270,7 @@ register(
|
|||||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||||
kwargs=kwargs_dict_via_point_reacher_promp
|
kwargs=kwargs_dict_via_point_reacher_promp
|
||||||
)
|
)
|
||||||
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ViaPointReacherProMP-v0")
|
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ViaPointReacherProMP-v0")
|
||||||
|
|
||||||
## Hole Reacher
|
## Hole Reacher
|
||||||
_versions = ["HoleReacher-v0"]
|
_versions = ["HoleReacher-v0"]
|
||||||
@ -290,7 +290,7 @@ for _v in _versions:
|
|||||||
# max_episode_steps=1,
|
# max_episode_steps=1,
|
||||||
kwargs=kwargs_dict_hole_reacher_dmp
|
kwargs=kwargs_dict_hole_reacher_dmp
|
||||||
)
|
)
|
||||||
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
|
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
|
||||||
|
|
||||||
_env_id = f'{_name[0]}ProMP-{_name[1]}'
|
_env_id = f'{_name[0]}ProMP-{_name[1]}'
|
||||||
kwargs_dict_hole_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
|
kwargs_dict_hole_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
|
||||||
@ -303,7 +303,7 @@ for _v in _versions:
|
|||||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||||
kwargs=kwargs_dict_hole_reacher_promp
|
kwargs=kwargs_dict_hole_reacher_promp
|
||||||
)
|
)
|
||||||
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||||
|
|
||||||
## ReacherNd
|
## ReacherNd
|
||||||
_versions = ["Reacher5d-v0", "Reacher7d-v0", "Reacher5dSparse-v0", "Reacher7dSparse-v0"]
|
_versions = ["Reacher5d-v0", "Reacher7d-v0", "Reacher5dSparse-v0", "Reacher7dSparse-v0"]
|
||||||
@ -320,7 +320,7 @@ for _v in _versions:
|
|||||||
# max_episode_steps=1,
|
# max_episode_steps=1,
|
||||||
kwargs=kwargs_dict_reacher_dmp
|
kwargs=kwargs_dict_reacher_dmp
|
||||||
)
|
)
|
||||||
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
|
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
|
||||||
|
|
||||||
_env_id = f'{_name[0]}ProMP-{_name[1]}'
|
_env_id = f'{_name[0]}ProMP-{_name[1]}'
|
||||||
kwargs_dict_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
|
kwargs_dict_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
|
||||||
@ -331,7 +331,7 @@ for _v in _versions:
|
|||||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||||
kwargs=kwargs_dict_reacher_promp
|
kwargs=kwargs_dict_reacher_promp
|
||||||
)
|
)
|
||||||
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||||
|
|
||||||
########################################################################################################################
|
########################################################################################################################
|
||||||
## Beerpong ProMP
|
## Beerpong ProMP
|
||||||
@ -352,7 +352,7 @@ for _v in _versions:
|
|||||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||||
kwargs=kwargs_dict_bp_promp
|
kwargs=kwargs_dict_bp_promp
|
||||||
)
|
)
|
||||||
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||||
|
|
||||||
### BP with Fixed release
|
### BP with Fixed release
|
||||||
_versions = ["BeerPongStepBased-v0", "BeerPongFixedRelease-v0"]
|
_versions = ["BeerPongStepBased-v0", "BeerPongFixedRelease-v0"]
|
||||||
@ -372,7 +372,7 @@ for _v in _versions:
|
|||||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||||
kwargs=kwargs_dict_bp_promp
|
kwargs=kwargs_dict_bp_promp
|
||||||
)
|
)
|
||||||
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||||
########################################################################################################################
|
########################################################################################################################
|
||||||
|
|
||||||
## Table Tennis needs to be fixed according to Zhou's implementation
|
## Table Tennis needs to be fixed according to Zhou's implementation
|
||||||
@ -393,7 +393,7 @@ for _v in _versions:
|
|||||||
# entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
# entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||||
# kwargs=kwargs_dict_ant_jump_promp
|
# kwargs=kwargs_dict_ant_jump_promp
|
||||||
# )
|
# )
|
||||||
# ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
# ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||||
#
|
#
|
||||||
# ########################################################################################################################
|
# ########################################################################################################################
|
||||||
#
|
#
|
||||||
@ -410,7 +410,7 @@ for _v in _versions:
|
|||||||
# entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
# entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||||
# kwargs=kwargs_dict_halfcheetah_jump_promp
|
# kwargs=kwargs_dict_halfcheetah_jump_promp
|
||||||
# )
|
# )
|
||||||
# ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
# ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||||
#
|
#
|
||||||
# ########################################################################################################################
|
# ########################################################################################################################
|
||||||
|
|
||||||
@ -431,7 +431,7 @@ for _v in _versions:
|
|||||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||||
kwargs=kwargs_dict_hopper_jump_promp
|
kwargs=kwargs_dict_hopper_jump_promp
|
||||||
)
|
)
|
||||||
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||||
|
|
||||||
# ########################################################################################################################
|
# ########################################################################################################################
|
||||||
#
|
#
|
||||||
@ -449,7 +449,7 @@ for _v in _versions:
|
|||||||
# entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
# entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||||
# kwargs=kwargs_dict_walker2d_jump_promp
|
# kwargs=kwargs_dict_walker2d_jump_promp
|
||||||
# )
|
# )
|
||||||
# ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
# ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||||
|
|
||||||
### Depricated, we will not provide non random starts anymore
|
### Depricated, we will not provide non random starts anymore
|
||||||
"""
|
"""
|
||||||
|
@ -6,6 +6,7 @@ from gym import utils
|
|||||||
from gym.envs.mujoco import MujocoEnv
|
from gym.envs.mujoco import MujocoEnv
|
||||||
|
|
||||||
MAX_EPISODE_STEPS_BEERPONG = 300
|
MAX_EPISODE_STEPS_BEERPONG = 300
|
||||||
|
FIXED_RELEASE_STEP = 62 # empirically evaluated for frame_skip=2!
|
||||||
|
|
||||||
# XML Variables
|
# XML Variables
|
||||||
ROBOT_COLLISION_OBJ = ["wrist_palm_link_convex_geom",
|
ROBOT_COLLISION_OBJ = ["wrist_palm_link_convex_geom",
|
||||||
@ -44,7 +45,7 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
|
|||||||
self._start_pos = np.array([0.0, 1.35, 0.0, 1.18, 0.0, -0.786, -1.59])
|
self._start_pos = np.array([0.0, 1.35, 0.0, 1.18, 0.0, -0.786, -1.59])
|
||||||
self._start_vel = np.zeros(7)
|
self._start_vel = np.zeros(7)
|
||||||
|
|
||||||
self.release_step = 100 # time step of ball release
|
self.release_step = FIXED_RELEASE_STEP
|
||||||
|
|
||||||
self.repeat_action = 2
|
self.repeat_action = 2
|
||||||
# TODO: If accessing IDs is easier in the (new) official mujoco bindings, remove this
|
# TODO: If accessing IDs is easier in the (new) official mujoco bindings, remove this
|
||||||
@ -250,86 +251,16 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
class BeerPongEnvFixedReleaseStep(BeerPongEnv):
|
|
||||||
def __init__(self):
|
|
||||||
super().__init__()
|
|
||||||
self.release_step = 62 # empirically evaluated for frame_skip=2!
|
|
||||||
|
|
||||||
|
|
||||||
class BeerPongEnvStepBasedEpisodicReward(BeerPongEnv):
|
class BeerPongEnvStepBasedEpisodicReward(BeerPongEnv):
|
||||||
def __init__(self):
|
|
||||||
super().__init__()
|
|
||||||
self.release_step = 62 # empirically evaluated for frame_skip=2!
|
|
||||||
|
|
||||||
def step(self, a):
|
def step(self, a):
|
||||||
if self._steps < self.release_step:
|
if self._steps < FIXED_RELEASE_STEP:
|
||||||
return super(BeerPongEnvStepBasedEpisodicReward, self).step(a)
|
return super(BeerPongEnvStepBasedEpisodicReward, self).step(a)
|
||||||
else:
|
else:
|
||||||
reward = 0
|
reward = 0
|
||||||
done = False
|
done = True
|
||||||
while not done:
|
while self._steps < MAX_EPISODE_STEPS_BEERPONG:
|
||||||
sub_ob, sub_reward, done, sub_infos = super(BeerPongEnvStepBasedEpisodicReward, self).step(
|
obs, sub_reward, done, infos = super(BeerPongEnvStepBasedEpisodicReward, self).step(
|
||||||
np.zeros(a.shape))
|
np.zeros(a.shape))
|
||||||
reward += sub_reward
|
reward += sub_reward
|
||||||
infos = sub_infos
|
return obs, reward, done, infos
|
||||||
ob = sub_ob
|
|
||||||
ob[-1] = self.release_step + 1 # Since we simulate until the end of the episode, PPO does not see the
|
|
||||||
# internal steps and thus, the observation also needs to be set correctly
|
|
||||||
return ob, reward, done, infos
|
|
||||||
|
|
||||||
|
|
||||||
# class BeerBongEnvStepBased(BeerBongEnv):
|
|
||||||
# def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
|
|
||||||
# super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
|
|
||||||
# self.release_step = 62 # empirically evaluated for frame_skip=2!
|
|
||||||
#
|
|
||||||
# def step(self, a):
|
|
||||||
# if self._steps < self.release_step:
|
|
||||||
# return super(BeerBongEnvStepBased, self).step(a)
|
|
||||||
# else:
|
|
||||||
# reward = 0
|
|
||||||
# done = False
|
|
||||||
# while not done:
|
|
||||||
# sub_ob, sub_reward, done, sub_infos = super(BeerBongEnvStepBased, self).step(np.zeros(a.shape))
|
|
||||||
# if not done or sub_infos['sim_crash']:
|
|
||||||
# reward += sub_reward
|
|
||||||
# else:
|
|
||||||
# ball_pos = self.sim.data.body_xpos[self.sim.model._body_name2id["ball"]].copy()
|
|
||||||
# cup_goal_dist_final = np.linalg.norm(ball_pos - self.sim.data.site_xpos[
|
|
||||||
# self.sim.model._site_name2id["cup_goal_final_table"]].copy())
|
|
||||||
# cup_goal_dist_top = np.linalg.norm(ball_pos - self.sim.data.site_xpos[
|
|
||||||
# self.sim.model._site_name2id["cup_goal_table"]].copy())
|
|
||||||
# if sub_infos['success']:
|
|
||||||
# dist_rew = -cup_goal_dist_final ** 2
|
|
||||||
# else:
|
|
||||||
# dist_rew = -0.5 * cup_goal_dist_final ** 2 - cup_goal_dist_top ** 2
|
|
||||||
# reward = reward - sub_infos['action_cost'] + dist_rew
|
|
||||||
# infos = sub_infos
|
|
||||||
# ob = sub_ob
|
|
||||||
# ob[-1] = self.release_step + 1 # Since we simulate until the end of the episode, PPO does not see the
|
|
||||||
# # internal steps and thus, the observation also needs to be set correctly
|
|
||||||
# return ob, reward, done, infos
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
env = BeerPongEnv()
|
|
||||||
env.seed(0)
|
|
||||||
# env = BeerBongEnvStepBased(frame_skip=2)
|
|
||||||
# env = BeerBongEnvStepBasedEpisodicReward(frame_skip=2)
|
|
||||||
# env = BeerBongEnvFixedReleaseStep(frame_skip=2)
|
|
||||||
import time
|
|
||||||
|
|
||||||
env.reset()
|
|
||||||
env.render("human")
|
|
||||||
for i in range(600):
|
|
||||||
# ac = 10 * env.action_space.sample()
|
|
||||||
ac = 0.05 * np.ones(7)
|
|
||||||
obs, rew, d, info = env.step(ac)
|
|
||||||
env.render("human")
|
|
||||||
|
|
||||||
if d:
|
|
||||||
print('reward:', rew)
|
|
||||||
print('RESETTING')
|
|
||||||
env.reset()
|
|
||||||
time.sleep(1)
|
|
||||||
env.close()
|
|
||||||
|
@ -32,8 +32,6 @@ class MPWrapper(RawInterfaceWrapper):
|
|||||||
if mp.learn_tau:
|
if mp.learn_tau:
|
||||||
self.release_step = action[0] / self.dt # Tau value
|
self.release_step = action[0] / self.dt # Tau value
|
||||||
return action, None
|
return action, None
|
||||||
else:
|
|
||||||
return action, None
|
|
||||||
|
|
||||||
def set_context(self, context):
|
def set_context(self, context):
|
||||||
xyz = np.zeros(3)
|
xyz = np.zeros(3)
|
||||||
|
@ -156,8 +156,6 @@ class HopperJumpEnv(HopperEnv):
|
|||||||
self.init_floor_contact = False
|
self.init_floor_contact = False
|
||||||
self.contact_dist = None
|
self.contact_dist = None
|
||||||
|
|
||||||
self.data.geom()
|
|
||||||
|
|
||||||
return observation
|
return observation
|
||||||
|
|
||||||
def _is_floor_foot_contact(self):
|
def _is_floor_foot_contact(self):
|
||||||
|
@ -101,7 +101,7 @@ class TestCustomEnvironments(unittest.TestCase):
|
|||||||
|
|
||||||
def test_bb_functionality(self):
|
def test_bb_functionality(self):
|
||||||
"""Tests that black box environments run without errors using random actions."""
|
"""Tests that black box environments run without errors using random actions."""
|
||||||
for traj_gen, env_ids in fancy_gym.ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items():
|
for traj_gen, env_ids in fancy_gym.ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items():
|
||||||
with self.subTest(msg=traj_gen):
|
with self.subTest(msg=traj_gen):
|
||||||
for id in env_ids:
|
for id in env_ids:
|
||||||
with self.subTest(msg=id):
|
with self.subTest(msg=id):
|
||||||
@ -109,7 +109,7 @@ class TestCustomEnvironments(unittest.TestCase):
|
|||||||
|
|
||||||
def test_bb_determinism(self):
|
def test_bb_determinism(self):
|
||||||
"""Tests that for black box environment identical seeds produce identical trajectories."""
|
"""Tests that for black box environment identical seeds produce identical trajectories."""
|
||||||
for traj_gen, env_ids in fancy_gym.ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items():
|
for traj_gen, env_ids in fancy_gym.ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items():
|
||||||
with self.subTest(msg=traj_gen):
|
with self.subTest(msg=traj_gen):
|
||||||
self._run_env_determinism(env_ids)
|
self._run_env_determinism(env_ids)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user