add tau, delay bound and update context range

This commit is contained in:
Hongyi Zhou 2022-11-05 15:56:59 +01:00
parent 6193f87fe7
commit 7d16b420c1
3 changed files with 9 additions and 7 deletions

View File

@ -70,7 +70,10 @@ class BlackBoxWrapper(gym.ObservationWrapper):
# tricky_action_upperbound = [np.inf] * (self.traj_gen_action_space.shape[0] - 7) # tricky_action_upperbound = [np.inf] * (self.traj_gen_action_space.shape[0] - 7)
# tricky_action_lowerbound = [-np.inf] * (self.traj_gen_action_space.shape[0] - 7) # tricky_action_lowerbound = [-np.inf] * (self.traj_gen_action_space.shape[0] - 7)
# self.action_space = spaces.Box(np.array(tricky_action_lowerbound), np.array(tricky_action_upperbound), dtype=np.float32) # self.action_space = spaces.Box(np.array(tricky_action_lowerbound), np.array(tricky_action_upperbound), dtype=np.float32)
self.action_space.low[0] = 0.5
self.action_space.high[0] = 1.5
self.action_space.low[1] = 0.05
self.action_space.high[1] = 0.2
self.observation_space = self._get_observation_space() self.observation_space = self._get_observation_space()
# rendering # rendering

View File

@ -10,9 +10,9 @@ import mujoco
MAX_EPISODE_STEPS_TABLE_TENNIS = 250 MAX_EPISODE_STEPS_TABLE_TENNIS = 250
CONTEXT_BOUNDS_2DIMS = np.array([[-1.2, -0.6], [-0.2, 0.0]]) CONTEXT_BOUNDS_2DIMS = np.array([[-1.0, -0.65], [-0.2, 0.65]])
CONTEXT_BOUNDS_4DIMS = np.array([[-1.2, -0.6, -1.0, -0.65], CONTEXT_BOUNDS_4DIMS = np.array([[-1.0, -0.65, -1.0, -0.65],
[-0.2, 0.6, -0.2, 0.65]]) [-0.2, 0.65, -0.2, 0.65]])
class TableTennisEnv(MujocoEnv, utils.EzPickle): class TableTennisEnv(MujocoEnv, utils.EzPickle):

View File

@ -22,7 +22,6 @@ def example_mp(env_name="HoleReacherProMP-v0", seed=1, iterations=1, render=True
returns = 0 returns = 0
# env.render(mode=None) # env.render(mode=None)
obs = env.reset() obs = env.reset()
print(obs)
# number of samples/full trajectories (multiple environment steps) # number of samples/full trajectories (multiple environment steps)
for i in range(iterations): for i in range(iterations):
@ -51,7 +50,7 @@ def example_mp(env_name="HoleReacherProMP-v0", seed=1, iterations=1, render=True
if done: if done:
# print(reward) # print(reward)
obs = env.reset() obs = env.reset()
print(obs) print("steps: {}".format(info["num_steps"][-1]))
def example_custom_mp(env_name="Reacher5dProMP-v0", seed=1, iterations=1, render=True): def example_custom_mp(env_name="Reacher5dProMP-v0", seed=1, iterations=1, render=True):
@ -166,7 +165,7 @@ if __name__ == '__main__':
# ProMP # ProMP
# example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render) # example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render)
# example_mp("BoxPushingTemporalSparseProMP-v0", seed=10, iterations=1, render=render) # example_mp("BoxPushingTemporalSparseProMP-v0", seed=10, iterations=1, render=render)
example_mp("TableTennis4DProMP-v0", seed=10, iterations=5, render=render) example_mp("TableTennis4DProMP-v0", seed=10, iterations=10, render=True)
# ProDMP # ProDMP
# example_mp("BoxPushingDenseProDMP-v0", seed=10, iterations=16, render=render) # example_mp("BoxPushingDenseProDMP-v0", seed=10, iterations=16, render=render)