This commit is contained in:
Maximilian Huettenrauch 2021-06-22 10:27:25 +02:00
parent 3876478b96
commit 8075655301
3 changed files with 12 additions and 7 deletions

View File

@ -113,7 +113,11 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
return ob, reward, done, dict(reward_dist=reward_dist,
reward_ctrl=reward_ctrl,
velocity=angular_vel,
traj=self._q_pos, is_success=success,
# traj=self._q_pos,
action=a,
q_pos=self.sim.data.qpos[0:7].ravel().copy(),
q_vel=self.sim.data.qvel[0:7].ravel().copy(),
is_success=success,
is_collided=is_collided, sim_crash=crash)
def check_traj_in_joint_limits(self):

View File

@ -81,11 +81,11 @@ class BallInACupReward(alr_reward_fct.AlrReward):
# cost = 0.5 * dist_final + 0.05 * cost_angle # TODO: Increase cost_angle weight # 0.5 * min_dist +
# reward = np.exp(-2 * cost) - 1e-2 * action_cost - self.collision_penalty * int(self._is_collided)
# reward = - dist_final**2 - 1e-4 * cost_angle - 1e-5 * action_cost - self.collision_penalty * int(self._is_collided)
reward = - dist_final**2 - min_dist_final**2 - 1e-4 * cost_angle - 5e-4 * action_cost - self.collision_penalty * int(self._is_collided)
reward = - dist_final**2 - min_dist_final**2 - 1e-4 * cost_angle - 1e-3 * action_cost - self.collision_penalty * int(self._is_collided)
success = dist_final < 0.05 and ball_in_cup and not self._is_collided
crash = self._is_collided
else:
reward = - 5e-4 * action_cost - 1e-4 * cost_angle # TODO: increase action_cost weight
reward = - 1e-3 * action_cost - 1e-4 * cost_angle # TODO: increase action_cost weight
success = False
crash = False

View File

@ -1,4 +1,5 @@
from abc import ABC, abstractmethod
from collections import defaultdict
import gym
import numpy as np
@ -64,11 +65,11 @@ class MPWrapper(gym.Wrapper, ABC):
trajectory = np.vstack([trajectory, np.tile(trajectory[-1, :], [self.post_traj_steps, 1])])
velocity = np.vstack([velocity, np.zeros(shape=(self.post_traj_steps, self.mp.n_dof))])
# self._trajectory = trajectory
self._trajectory = trajectory
# self._velocity = velocity
rewards = 0
info = {}
infos = defaultdict(list)
# create random obs as the reset function is called externally
obs = self.env.observation_space.sample()
@ -77,14 +78,14 @@ class MPWrapper(gym.Wrapper, ABC):
obs, rew, done, info = self.env.step(ac)
rewards += rew
# TODO return all dicts?
# [infos[k].append(v) for k, v in info.items()]
[infos[k].append(v) for k, v in info.items()]
if self.render_mode:
self.env.render(mode=self.render_mode, **self.render_kwargs)
if done:
break
done = True
return obs[self.env.active_obs], rewards, done, info
return obs[self.env.active_obs], rewards, done, infos
def render(self, mode='human', **kwargs):
"""Only set render options here, such that they can be used during the rollout.