From 4f9b1fad25b51f53e296eda4b85af07f9afb72fa Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Mon, 31 Oct 2022 22:37:13 +0100 Subject: [PATCH] learn goal with auto scale basis --- fancy_gym/black_box/black_box_wrapper.py | 16 +++++++++------- fancy_gym/envs/__init__.py | 2 +- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index ad314f9..5269d29 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -64,11 +64,13 @@ class BlackBoxWrapper(gym.ObservationWrapper): # self.return_context_observation = not (learn_sub_trajectories or self.do_replanning) self.return_context_observation = True self.traj_gen_action_space = self._get_traj_gen_action_space() - # self.action_space = self._get_action_space() + self.action_space = self._get_action_space() + + # no goal learning + # tricky_action_upperbound = [np.inf] * (self.traj_gen_action_space.shape[0] - 7) + # tricky_action_lowerbound = [-np.inf] * (self.traj_gen_action_space.shape[0] - 7) + # self.action_space = spaces.Box(np.array(tricky_action_lowerbound), np.array(tricky_action_upperbound), dtype=np.float32) - tricky_action_upperbound = [np.inf] * (self.traj_gen_action_space.shape[0] - 7) - tricky_action_lowerbound = [-np.inf] * (self.traj_gen_action_space.shape[0] - 7) - self.action_space = spaces.Box(np.array(tricky_action_lowerbound), np.array(tricky_action_upperbound), dtype=np.float32) self.observation_space = self._get_observation_space() # rendering @@ -156,9 +158,9 @@ class BlackBoxWrapper(gym.ObservationWrapper): """ This function generates a trajectory based on a MP and then does the usual loop over reset and step""" ## tricky part, only use weights basis - basis_weights = action.reshape(7, -1) - goal_weights = np.zeros((7, 1)) - action = np.concatenate((basis_weights, goal_weights), axis=1).flatten() + # basis_weights = action.reshape(7, -1) + # goal_weights = np.zeros((7, 1)) + # action = np.concatenate((basis_weights, goal_weights), axis=1).flatten() # TODO remove this part, right now only needed for beer pong mp_params, env_spec_params = self.env.episode_callback(action, self.traj_gen) diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index 3083e81..91e41db 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -502,7 +502,7 @@ for _v in _versions: kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_scale'] = 0.3 kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = True kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_offset'] = 1.0 - kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['num_basis'] = 5 + kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['num_basis'] = 4 kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['alpha'] = 10. kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3 # 3.5, 4 to try kwargs_dict_box_pushing_prodmp['phase_generator_kwargs']['alpha_phase'] = 3