From 80756553010d2493218c11bfb7ebd2172429377c Mon Sep 17 00:00:00 2001 From: Maximilian Huettenrauch Date: Tue, 22 Jun 2021 10:27:25 +0200 Subject: [PATCH] update --- alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py | 6 +++++- .../mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py | 4 ++-- alr_envs/utils/mps/mp_wrapper.py | 9 +++++---- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py index fdd299e..e7da171 100644 --- a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py +++ b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py @@ -113,7 +113,11 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle): return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl, velocity=angular_vel, - traj=self._q_pos, is_success=success, + # traj=self._q_pos, + action=a, + q_pos=self.sim.data.qpos[0:7].ravel().copy(), + q_vel=self.sim.data.qvel[0:7].ravel().copy(), + is_success=success, is_collided=is_collided, sim_crash=crash) def check_traj_in_joint_limits(self): diff --git a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py index 8f0c588..daee289 100644 --- a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py +++ b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py @@ -81,11 +81,11 @@ class BallInACupReward(alr_reward_fct.AlrReward): # cost = 0.5 * dist_final + 0.05 * cost_angle # TODO: Increase cost_angle weight # 0.5 * min_dist + # reward = np.exp(-2 * cost) - 1e-2 * action_cost - self.collision_penalty * int(self._is_collided) # reward = - dist_final**2 - 1e-4 * cost_angle - 1e-5 * action_cost - self.collision_penalty * int(self._is_collided) - reward = - dist_final**2 - min_dist_final**2 - 1e-4 * cost_angle - 5e-4 * action_cost - self.collision_penalty * int(self._is_collided) + reward = - dist_final**2 - min_dist_final**2 - 1e-4 * cost_angle - 1e-3 * action_cost - self.collision_penalty * int(self._is_collided) success = dist_final < 0.05 and ball_in_cup and not self._is_collided crash = self._is_collided else: - reward = - 5e-4 * action_cost - 1e-4 * cost_angle # TODO: increase action_cost weight + reward = - 1e-3 * action_cost - 1e-4 * cost_angle # TODO: increase action_cost weight success = False crash = False diff --git a/alr_envs/utils/mps/mp_wrapper.py b/alr_envs/utils/mps/mp_wrapper.py index c31072f..9ec1cbc 100644 --- a/alr_envs/utils/mps/mp_wrapper.py +++ b/alr_envs/utils/mps/mp_wrapper.py @@ -1,4 +1,5 @@ from abc import ABC, abstractmethod +from collections import defaultdict import gym import numpy as np @@ -64,11 +65,11 @@ class MPWrapper(gym.Wrapper, ABC): trajectory = np.vstack([trajectory, np.tile(trajectory[-1, :], [self.post_traj_steps, 1])]) velocity = np.vstack([velocity, np.zeros(shape=(self.post_traj_steps, self.mp.n_dof))]) - # self._trajectory = trajectory + self._trajectory = trajectory # self._velocity = velocity rewards = 0 - info = {} + infos = defaultdict(list) # create random obs as the reset function is called externally obs = self.env.observation_space.sample() @@ -77,14 +78,14 @@ class MPWrapper(gym.Wrapper, ABC): obs, rew, done, info = self.env.step(ac) rewards += rew # TODO return all dicts? - # [infos[k].append(v) for k, v in info.items()] + [infos[k].append(v) for k, v in info.items()] if self.render_mode: self.env.render(mode=self.render_mode, **self.render_kwargs) if done: break done = True - return obs[self.env.active_obs], rewards, done, info + return obs[self.env.active_obs], rewards, done, infos def render(self, mode='human', **kwargs): """Only set render options here, such that they can be used during the rollout.