From a0692b10896cbefa6708a2f07490881e00522ef8 Mon Sep 17 00:00:00 2001 From: Maximilian Huettenrauch Date: Fri, 19 Mar 2021 16:31:46 +0100 Subject: [PATCH] updates --- alr_envs/classic_control/hole_reacher.py | 12 +- alr_envs/classic_control/utils.py | 48 ++++- alr_envs/mujoco/beerpong/assets/beerpong.xml | 88 ++++++---- alr_envs/mujoco/beerpong/beerpong.py | 20 +-- .../mujoco/beerpong/beerpong_reward_simple.py | 141 +++++++++++++++ alr_envs/mujoco/beerpong/beerpong_simple.py | 164 ++++++++++++++++++ alr_envs/mujoco/beerpong/utils.py | 105 +++++++++++ alr_envs/utils/dmp_env_wrapper.py | 10 +- dmp_env_wrapper_example.py | 18 +- 9 files changed, 537 insertions(+), 69 deletions(-) create mode 100644 alr_envs/mujoco/beerpong/beerpong_reward_simple.py create mode 100644 alr_envs/mujoco/beerpong/beerpong_simple.py create mode 100644 alr_envs/mujoco/beerpong/utils.py diff --git a/alr_envs/classic_control/hole_reacher.py b/alr_envs/classic_control/hole_reacher.py index be1cd1f..d3faf13 100644 --- a/alr_envs/classic_control/hole_reacher.py +++ b/alr_envs/classic_control/hole_reacher.py @@ -83,7 +83,7 @@ class HoleReacher(gym.Env): """ a single step with an action in joint velocity space """ - vel = action + vel = action # + 0.01 * np.random.randn(self.num_links) acc = (vel - self._angle_velocity) / self.dt self._angle_velocity = vel self._joint_angles = self._joint_angles + self.dt * self._angle_velocity @@ -96,20 +96,20 @@ class HoleReacher(gym.Env): dist_reward = 0 if not self._is_collided: - if self._steps == 180: + if self._steps == 199: dist_reward = np.linalg.norm(self.end_effector - self.bottom_center_of_hole) else: dist_reward = np.linalg.norm(self.end_effector - self.bottom_center_of_hole) reward = - dist_reward ** 2 - reward -= 1e-6 * np.sum(acc**2) + reward -= 5e-8 * np.sum(acc**2) - if self._steps == 180: - reward -= 0.1 * np.sum(vel**2) ** 2 + # if self._steps == 180: + # reward -= 0.1 * np.sum(vel**2) ** 2 if self._is_collided: - reward -= self.collision_penalty + reward = -self.collision_penalty info = {"is_collided": self._is_collided} diff --git a/alr_envs/classic_control/utils.py b/alr_envs/classic_control/utils.py index 3540ad2..b26246f 100644 --- a/alr_envs/classic_control/utils.py +++ b/alr_envs/classic_control/utils.py @@ -2,6 +2,7 @@ from alr_envs.classic_control.hole_reacher import HoleReacher from alr_envs.classic_control.viapoint_reacher import ViaPointReacher from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapper from alr_envs.utils.detpmp_env_wrapper import DetPMPEnvWrapper +import numpy as np def make_viapointreacher_env(rank, seed=0): @@ -54,7 +55,7 @@ def make_holereacher_env(rank, seed=0): hole_width=0.15, hole_depth=1, hole_x=1, - collision_penalty=1000) + collision_penalty=100) _env = DmpEnvWrapper(_env, num_dof=5, @@ -62,10 +63,51 @@ def make_holereacher_env(rank, seed=0): duration=2, dt=_env.dt, learn_goal=True, - alpha_phase=2, + alpha_phase=3.5, start_pos=_env.start_pos, policy_type="velocity", weights_scale=100, + goal_scale=0.1 + ) + + _env.seed(seed + rank) + return _env + + return _init + + +def make_holereacher_fix_goal_env(rank, seed=0): + """ + Utility function for multiprocessed env. + + :param env_id: (str) the environment ID + :param num_env: (int) the number of environments you wish to have in subprocesses + :param seed: (int) the initial seed for RNG + :param rank: (int) index of the subprocess + :returns a function that generates an environment + """ + + def _init(): + _env = HoleReacher(num_links=5, + allow_self_collision=False, + allow_wall_collision=False, + hole_width=0.15, + hole_depth=1, + hole_x=1, + collision_penalty=100) + + _env = DmpEnvWrapper(_env, + num_dof=5, + num_basis=5, + duration=2, + dt=_env.dt, + learn_goal=False, + final_pos=np.array([2.02669572, -1.25966385, -1.51618198, -0.80946476, 0.02012344]), + alpha_phase=3.5, + start_pos=_env.start_pos, + policy_type="velocity", + weights_scale=50, + goal_scale=1 ) _env.seed(seed + rank) @@ -103,7 +145,7 @@ def make_holereacher_env_pmp(rank, seed=0): duration=2, post_traj_time=0, dt=_env.dt, - weights_scale=0.15, + weights_scale=0.25, zero_start=True, zero_goal=False ) diff --git a/alr_envs/mujoco/beerpong/assets/beerpong.xml b/alr_envs/mujoco/beerpong/assets/beerpong.xml index d0d434f..017bbad 100644 --- a/alr_envs/mujoco/beerpong/assets/beerpong.xml +++ b/alr_envs/mujoco/beerpong/assets/beerpong.xml @@ -1,6 +1,6 @@ - diff --git a/alr_envs/mujoco/beerpong/beerpong.py b/alr_envs/mujoco/beerpong/beerpong.py index f57a6fc..dc6278d 100644 --- a/alr_envs/mujoco/beerpong/beerpong.py +++ b/alr_envs/mujoco/beerpong/beerpong.py @@ -39,6 +39,8 @@ class ALRBeerpongEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle): reward_function = BeerpongReward self.reward_function = reward_function(self.sim, self.sim_steps) self.cup_robot_id = self.sim.model._site_name2id["cup_robot_final"] + self.ball_id = self.sim.model._body_name2id["ball"] + self.cup_table_id = self.sim.model._body_name2id["cup_table"] @property def current_pos(self): @@ -67,14 +69,9 @@ class ALRBeerpongEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle): self.set_state(start_pos, init_vel) - start_pos = init_pos_all - start_pos[0:7] = init_pos_robot - start_pos[7:] = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05]) - - self.set_state(start_pos, init_vel) - - # ball_pos = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05]) - # self.sim.model.body_pos[self.ball_id] = ball_pos.copy() + ball_pos = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05]) + self.sim.model.body_pos[self.ball_id] = ball_pos.copy() + self.sim.model.body_pos[self.cup_table_id] = self.context.copy() return self._get_obs() @@ -121,14 +118,17 @@ class ALRBeerpongEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle): if __name__ == "__main__": env = ALRBeerpongEnv() - ctxt = np.array([-0.20869846, -0.66376693, 1.18088501]) + ctxt = np.array([0, -2, 0.840]) # initial env.configure(ctxt) env.reset() env.render() for i in range(16000): # test with random actions - ac = 0.01 * env.action_space.sample()[0:7] + ac = 0.0 * env.action_space.sample()[0:7] + ac[1] = -0.8 + ac[3] = - 0.3 + ac[5] = -0.2 # ac = env.start_pos # ac[0] += np.pi/2 obs, rew, d, info = env.step(ac) diff --git a/alr_envs/mujoco/beerpong/beerpong_reward_simple.py b/alr_envs/mujoco/beerpong/beerpong_reward_simple.py new file mode 100644 index 0000000..b79e963 --- /dev/null +++ b/alr_envs/mujoco/beerpong/beerpong_reward_simple.py @@ -0,0 +1,141 @@ +import numpy as np +from alr_envs.mujoco import alr_reward_fct + + +class BeerpongReward(alr_reward_fct.AlrReward): + def __init__(self, sim, sim_time): + + self.sim = sim + self.sim_time = sim_time + + self.collision_objects = ["cup_geom1", "cup_geom2", "wrist_palm_link_convex_geom", + "wrist_pitch_link_convex_decomposition_p1_geom", + "wrist_pitch_link_convex_decomposition_p2_geom", + "wrist_pitch_link_convex_decomposition_p3_geom", + "wrist_yaw_link_convex_decomposition_p1_geom", + "wrist_yaw_link_convex_decomposition_p2_geom", + "forearm_link_convex_decomposition_p1_geom", + "forearm_link_convex_decomposition_p2_geom"] + + self.ball_id = None + self.ball_collision_id = None + self.goal_id = None + self.goal_final_id = None + self.collision_ids = None + + self.ball_traj = None + self.dists = None + self.dists_ctxt = None + self.dists_final = None + self.costs = None + + self.reset(None) + + def reset(self, context): + self.ball_traj = np.zeros(shape=(self.sim_time, 3)) + self.dists = [] + self.dists_ctxt = [] + self.dists_final = [] + self.costs = [] + self.action_costs = [] + self.context = context + self.ball_in_cup = False + self.dist_ctxt = 5 + self.bounce_dist = 2 + self.min_dist = 2 + self.dist_final = 2 + self.table_contact = False + + self.ball_id = self.sim.model._body_name2id["ball"] + self.ball_collision_id = self.sim.model._geom_name2id["ball_geom"] + self.cup_robot_id = self.sim.model._site_name2id["cup_robot_final"] + self.goal_id = self.sim.model._site_name2id["cup_goal_table"] + self.goal_final_id = self.sim.model._site_name2id["cup_goal_final_table"] + self.collision_ids = [self.sim.model._geom_name2id[name] for name in self.collision_objects] + self.cup_table_id = self.sim.model._body_name2id["cup_table"] + self.bounce_table_id = self.sim.model._site_name2id["bounce_table"] + + def compute_reward(self, action, sim, step): + action_cost = np.sum(np.square(action)) + self.action_costs.append(action_cost) + + stop_sim = False + success = False + + if self.check_collision(sim): + reward = - 1e-2 * action_cost - 10 + stop_sim = True + return reward, success, stop_sim + + # Compute the current distance from the ball to the inner part of the cup + goal_pos = sim.data.site_xpos[self.goal_id] + ball_pos = sim.data.body_xpos[self.ball_id] + bounce_pos = sim.data.site_xpos[self.bounce_table_id] + goal_final_pos = sim.data.site_xpos[self.goal_final_id] + self.dists.append(np.linalg.norm(goal_pos - ball_pos)) + self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos)) + self.ball_traj[step, :] = ball_pos + + ball_in_cup = self.check_ball_in_cup(sim, self.ball_collision_id) + table_contact = self.check_ball_table_contact(sim, self.ball_collision_id) + + if table_contact and not self.table_contact: + self.bounce_dist = np.minimum((np.linalg.norm(bounce_pos - ball_pos)), 2) + self.table_contact = True + + if step == self.sim_time - 1: + min_dist = np.min(self.dists) + self.min_dist = min_dist + dist_final = self.dists_final[-1] + self.dist_final = dist_final + + cost = 0.33 * min_dist + 0.33 * dist_final + 0.33 * self.bounce_dist + reward = np.exp(-2 * cost) - 1e-2 * action_cost + success = self.bounce_dist < 0.05 and dist_final < 0.05 and ball_in_cup + else: + reward = - 1e-2 * action_cost + success = False + + return reward, success, stop_sim + + def _get_stage_wise_cost(self, ball_in_cup, min_dist, dist_final, dist_to_ctxt): + if not ball_in_cup: + cost = 3 + 2*(0.5 * min_dist**2 + 0.5 * dist_final**2) + else: + cost = 2 * dist_to_ctxt ** 2 + print('Context Distance:', dist_to_ctxt) + return cost + + def check_ball_table_contact(self, sim, ball_collision_id): + table_collision_id = sim.model._geom_name2id["table_contact_geom"] + for coni in range(0, sim.data.ncon): + con = sim.data.contact[coni] + collision = con.geom1 == table_collision_id and con.geom2 == ball_collision_id + collision_trans = con.geom1 == ball_collision_id and con.geom2 == table_collision_id + + if collision or collision_trans: + return True + return False + + def check_ball_in_cup(self, sim, ball_collision_id): + cup_base_collision_id = sim.model._geom_name2id["cup_base_table_contact"] + for coni in range(0, sim.data.ncon): + con = sim.data.contact[coni] + + collision = con.geom1 == cup_base_collision_id and con.geom2 == ball_collision_id + collision_trans = con.geom1 == ball_collision_id and con.geom2 == cup_base_collision_id + + if collision or collision_trans: + return True + return False + + def check_collision(self, sim): + for coni in range(0, sim.data.ncon): + con = sim.data.contact[coni] + + collision = con.geom1 in self.collision_ids and con.geom2 == self.ball_collision_id + collision_trans = con.geom1 == self.ball_collision_id and con.geom2 in self.collision_ids + + if collision or collision_trans: + return True + return False diff --git a/alr_envs/mujoco/beerpong/beerpong_simple.py b/alr_envs/mujoco/beerpong/beerpong_simple.py new file mode 100644 index 0000000..7a0908d --- /dev/null +++ b/alr_envs/mujoco/beerpong/beerpong_simple.py @@ -0,0 +1,164 @@ +from gym import utils +import os +import numpy as np +from alr_envs.mujoco import alr_mujoco_env + + +class ALRBeerpongEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle): + def __init__(self, n_substeps=4, apply_gravity_comp=True, reward_function=None): + self._steps = 0 + + self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", + "beerpong" + ".xml") + + self.start_pos = np.array([0.0, 1.35, 0.0, 1.18, 0.0, -0.786, -1.59]) + self.start_vel = np.zeros(7) + + self._q_pos = [] + self._q_vel = [] + # self.weight_matrix_scale = 50 + self.max_ctrl = np.array([150., 125., 40., 60., 5., 5., 2.]) + self.p_gains = 1 / self.max_ctrl * np.array([200, 300, 100, 100, 10, 10, 2.5]) + self.d_gains = 1 / self.max_ctrl * np.array([7, 15, 5, 2.5, 0.3, 0.3, 0.05]) + + self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7]) + self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7]) + + self.context = None + + utils.EzPickle.__init__(self) + alr_mujoco_env.AlrMujocoEnv.__init__(self, + self.xml_path, + apply_gravity_comp=apply_gravity_comp, + n_substeps=n_substeps) + + self.sim_time = 8 # seconds + self.sim_steps = int(self.sim_time / self.dt) + if reward_function is None: + from alr_envs.mujoco.beerpong.beerpong_reward_simple import BeerpongReward + reward_function = BeerpongReward + self.reward_function = reward_function(self.sim, self.sim_steps) + self.cup_robot_id = self.sim.model._site_name2id["cup_robot_final"] + self.ball_id = self.sim.model._body_name2id["ball"] + self.cup_table_id = self.sim.model._body_name2id["cup_table"] + # self.bounce_table_id = self.sim.model._body_name2id["bounce_table"] + + @property + def current_pos(self): + return self.sim.data.qpos[0:7].copy() + + @property + def current_vel(self): + return self.sim.data.qvel[0:7].copy() + + def configure(self, context): + if context is None: + context = np.array([0, -2, 0.840]) + self.context = context + self.reward_function.reset(context) + + def reset_model(self): + init_pos_all = self.init_qpos.copy() + init_pos_robot = self.start_pos + init_vel = np.zeros_like(init_pos_all) + + self._steps = 0 + self._q_pos = [] + self._q_vel = [] + + start_pos = init_pos_all + start_pos[0:7] = init_pos_robot + # start_pos[7:] = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05]) + + self.set_state(start_pos, init_vel) + + ball_pos = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05]) + self.sim.model.body_pos[self.ball_id] = ball_pos.copy() + self.sim.model.body_pos[self.cup_table_id] = self.context.copy() + # self.sim.model.body_pos[self.bounce_table_id] = self.context.copy() + + self.sim.forward() + + return self._get_obs() + + def step(self, a): + reward_dist = 0.0 + angular_vel = 0.0 + reward_ctrl = - np.square(a).sum() + action_cost = np.sum(np.square(a)) + + crash = self.do_simulation(a) + joint_cons_viol = self.check_traj_in_joint_limits() + + self._q_pos.append(self.sim.data.qpos[0:7].ravel().copy()) + self._q_vel.append(self.sim.data.qvel[0:7].ravel().copy()) + + ob = self._get_obs() + + if not crash and not joint_cons_viol: + reward, success, stop_sim = self.reward_function.compute_reward(a, self.sim, self._steps) + done = success or self._steps == self.sim_steps - 1 or stop_sim + self._steps += 1 + else: + reward = -10 - 1e-2 * action_cost + success = False + done = True + return ob, reward, done, dict(reward_dist=reward_dist, + reward_ctrl=reward_ctrl, + velocity=angular_vel, + traj=self._q_pos, is_success=success, + is_collided=crash or joint_cons_viol) + + def check_traj_in_joint_limits(self): + return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min) + + def extend_des_pos(self, des_pos): + des_pos_full = self.start_pos.copy() + des_pos_full[1] = des_pos[0] + des_pos_full[3] = des_pos[1] + des_pos_full[5] = des_pos[2] + return des_pos_full + + def extend_des_vel(self, des_vel): + des_vel_full = self.start_vel.copy() + des_vel_full[1] = des_vel[0] + des_vel_full[3] = des_vel[1] + des_vel_full[5] = des_vel[2] + return des_vel_full + + def _get_obs(self): + theta = self.sim.data.qpos.flat[:7] + return np.concatenate([ + np.cos(theta), + np.sin(theta), + # self.get_body_com("target"), # only return target to make problem harder + [self._steps], + ]) + + + +if __name__ == "__main__": + env = ALRBeerpongEnv() + ctxt = np.array([0, -2, 0.840]) # initial + + env.configure(ctxt) + env.reset() + env.render() + for i in range(16000): + # test with random actions + ac = 0.0 * env.action_space.sample()[0:7] + ac[1] = -0.01 + ac[3] = - 0.01 + ac[5] = -0.01 + # ac = env.start_pos + # ac[0] += np.pi/2 + obs, rew, d, info = env.step(ac) + env.render() + + print(rew) + + if d: + break + + env.close() + diff --git a/alr_envs/mujoco/beerpong/utils.py b/alr_envs/mujoco/beerpong/utils.py new file mode 100644 index 0000000..446dba1 --- /dev/null +++ b/alr_envs/mujoco/beerpong/utils.py @@ -0,0 +1,105 @@ +from alr_envs.utils.detpmp_env_wrapper import DetPMPEnvWrapper +from alr_envs.mujoco.beerpong.beerpong import ALRBeerpongEnv +from alr_envs.mujoco.beerpong.beerpong_simple import ALRBeerpongEnv as ALRBeerpongEnvSimple + + +def make_contextual_env(rank, seed=0): + """ + Utility function for multiprocessed env. + + :param env_id: (str) the environment ID + :param num_env: (int) the number of environments you wish to have in subprocesses + :param seed: (int) the initial seed for RNG + :param rank: (int) index of the subprocess + :returns a function that generates an environment + """ + + def _init(): + env = ALRBeerpongEnv() + + env = DetPMPEnvWrapper(env, + num_dof=7, + num_basis=5, + width=0.005, + policy_type="motor", + start_pos=env.start_pos, + duration=3.5, + post_traj_time=4.5, + dt=env.dt, + weights_scale=0.5, + zero_start=True, + zero_goal=True + ) + + env.seed(seed + rank) + return env + + return _init + + +def make_env(rank, seed=0): + """ + Utility function for multiprocessed env. + + :param env_id: (str) the environment ID + :param num_env: (int) the number of environments you wish to have in subprocesses + :param seed: (int) the initial seed for RNG + :param rank: (int) index of the subprocess + :returns a function that generates an environment + """ + + def _init(): + env = ALRBeerpongEnvSimple() + + env = DetPMPEnvWrapper(env, + num_dof=7, + num_basis=5, + width=0.005, + policy_type="motor", + start_pos=env.start_pos, + duration=3.5, + post_traj_time=4.5, + dt=env.dt, + weights_scale=0.25, + zero_start=True, + zero_goal=True + ) + + env.seed(seed + rank) + return env + + return _init + + +def make_simple_env(rank, seed=0): + """ + Utility function for multiprocessed env. + + :param env_id: (str) the environment ID + :param num_env: (int) the number of environments you wish to have in subprocesses + :param seed: (int) the initial seed for RNG + :param rank: (int) index of the subprocess + :returns a function that generates an environment + """ + + def _init(): + env = ALRBeerpongEnvSimple() + + env = DetPMPEnvWrapper(env, + num_dof=3, + num_basis=5, + width=0.005, + policy_type="motor", + start_pos=env.start_pos[1::2], + duration=3.5, + post_traj_time=4.5, + dt=env.dt, + weights_scale=0.5, + zero_start=True, + zero_goal=True + ) + + env.seed(seed + rank) + return env + + return _init diff --git a/alr_envs/utils/dmp_env_wrapper.py b/alr_envs/utils/dmp_env_wrapper.py index 849ac1b..5a2f27b 100644 --- a/alr_envs/utils/dmp_env_wrapper.py +++ b/alr_envs/utils/dmp_env_wrapper.py @@ -19,7 +19,9 @@ class DmpEnvWrapper(gym.Wrapper): learn_goal=False, post_traj_time=0., policy_type=None, - weights_scale=1.): + weights_scale=1., + goal_scale=1., + ): super(DmpEnvWrapper, self).__init__(env) self.num_dof = num_dof self.num_basis = num_basis @@ -52,6 +54,7 @@ class DmpEnvWrapper(gym.Wrapper): self.dmp.set_weights(dmp_weights, dmp_goal_pos) self.weights_scale = weights_scale + self.goal_scale = goal_scale policy_class = get_policy_class(policy_type) self.policy = policy_class(env) @@ -78,10 +81,11 @@ class DmpEnvWrapper(gym.Wrapper): goal_pos = params[0, -self.num_dof:] weight_matrix = np.reshape(params[:, :-self.num_dof], [self.num_basis, self.num_dof]) else: - goal_pos = None + goal_pos = self.dmp.dmp_goal_pos.flatten() + assert goal_pos is not None weight_matrix = np.reshape(params, [self.num_basis, self.num_dof]) - return goal_pos, weight_matrix * self.weights_scale + return goal_pos * self.goal_scale, weight_matrix * self.weights_scale def rollout(self, params, context=None, render=False): """ This function generates a trajectory based on a DMP and then does the usual loop over reset and step""" diff --git a/dmp_env_wrapper_example.py b/dmp_env_wrapper_example.py index 6fd0a57..9e817cd 100644 --- a/dmp_env_wrapper_example.py +++ b/dmp_env_wrapper_example.py @@ -1,4 +1,5 @@ from alr_envs.classic_control.utils import make_viapointreacher_env +from alr_envs.classic_control.utils import make_holereacher_env, make_holereacher_fix_goal_env from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv import numpy as np @@ -7,21 +8,20 @@ if __name__ == "__main__": n_samples = 1 n_cpus = 4 - dim = 25 + dim = 30 # env = DmpAsyncVectorEnv([make_viapointreacher_env(i) for i in range(n_cpus)], # n_samples=n_samples) - test_env = make_viapointreacher_env(0)() + test_env = make_holereacher_env(0)() # params = np.random.randn(n_samples, dim) - params = np.array([ 217.54494933, -1.85169983, 24.08414447, 42.23816868, - 23.32071702, 7.60780651, -31.74777741, 265.50634253, - 463.43822562, 245.93948374, -272.64003621, -45.24999553, - 503.21185823, 809.17742517, 393.12387021, -196.54196471, - 6.79327307, 374.82429078, 552.4119579 , 197.3963343 , - 243.87357056, -39.56041541, -616.93957463, -710.0772516 , - -414.21769789]) + params = np.array([[ 1.386102 , -3.29980525, 4.70402733, 1.3966668 , 0.73774902, + 3.14676681, -4.98644416, 6.20303193, 1.30502127, -0.09330522, + 7.62656797, -5.76893033, 3.4706711 , -0.6944142 , -3.33442788, + 12.31421548, -0.72760271, -6.9090723 , 7.02903814, -8.7236836 , + 1.4805914 , 0.53185824, -5.46626893, 0.69692163, 13.58472666, + 0.77199316, 2.02906724, -3.0203244 , -1.00533159, -0.57417351]]) # params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])])