learn goal with auto scale basis
This commit is contained in:
parent
83eb8f7f64
commit
4f9b1fad25
@ -64,11 +64,13 @@ class BlackBoxWrapper(gym.ObservationWrapper):
|
|||||||
# self.return_context_observation = not (learn_sub_trajectories or self.do_replanning)
|
# self.return_context_observation = not (learn_sub_trajectories or self.do_replanning)
|
||||||
self.return_context_observation = True
|
self.return_context_observation = True
|
||||||
self.traj_gen_action_space = self._get_traj_gen_action_space()
|
self.traj_gen_action_space = self._get_traj_gen_action_space()
|
||||||
# self.action_space = self._get_action_space()
|
self.action_space = self._get_action_space()
|
||||||
|
|
||||||
|
# no goal learning
|
||||||
|
# tricky_action_upperbound = [np.inf] * (self.traj_gen_action_space.shape[0] - 7)
|
||||||
|
# tricky_action_lowerbound = [-np.inf] * (self.traj_gen_action_space.shape[0] - 7)
|
||||||
|
# self.action_space = spaces.Box(np.array(tricky_action_lowerbound), np.array(tricky_action_upperbound), dtype=np.float32)
|
||||||
|
|
||||||
tricky_action_upperbound = [np.inf] * (self.traj_gen_action_space.shape[0] - 7)
|
|
||||||
tricky_action_lowerbound = [-np.inf] * (self.traj_gen_action_space.shape[0] - 7)
|
|
||||||
self.action_space = spaces.Box(np.array(tricky_action_lowerbound), np.array(tricky_action_upperbound), dtype=np.float32)
|
|
||||||
self.observation_space = self._get_observation_space()
|
self.observation_space = self._get_observation_space()
|
||||||
|
|
||||||
# rendering
|
# rendering
|
||||||
@ -156,9 +158,9 @@ class BlackBoxWrapper(gym.ObservationWrapper):
|
|||||||
""" This function generates a trajectory based on a MP and then does the usual loop over reset and step"""
|
""" This function generates a trajectory based on a MP and then does the usual loop over reset and step"""
|
||||||
|
|
||||||
## tricky part, only use weights basis
|
## tricky part, only use weights basis
|
||||||
basis_weights = action.reshape(7, -1)
|
# basis_weights = action.reshape(7, -1)
|
||||||
goal_weights = np.zeros((7, 1))
|
# goal_weights = np.zeros((7, 1))
|
||||||
action = np.concatenate((basis_weights, goal_weights), axis=1).flatten()
|
# action = np.concatenate((basis_weights, goal_weights), axis=1).flatten()
|
||||||
|
|
||||||
# TODO remove this part, right now only needed for beer pong
|
# TODO remove this part, right now only needed for beer pong
|
||||||
mp_params, env_spec_params = self.env.episode_callback(action, self.traj_gen)
|
mp_params, env_spec_params = self.env.episode_callback(action, self.traj_gen)
|
||||||
|
@ -502,7 +502,7 @@ for _v in _versions:
|
|||||||
kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_scale'] = 0.3
|
kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_scale'] = 0.3
|
||||||
kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = True
|
kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = True
|
||||||
kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_offset'] = 1.0
|
kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_offset'] = 1.0
|
||||||
kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['num_basis'] = 5
|
kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['num_basis'] = 4
|
||||||
kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['alpha'] = 10.
|
kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['alpha'] = 10.
|
||||||
kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3 # 3.5, 4 to try
|
kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3 # 3.5, 4 to try
|
||||||
kwargs_dict_box_pushing_prodmp['phase_generator_kwargs']['alpha_phase'] = 3
|
kwargs_dict_box_pushing_prodmp['phase_generator_kwargs']['alpha_phase'] = 3
|
||||||
|
Loading…
Reference in New Issue
Block a user