update
This commit is contained in:
parent
3876478b96
commit
8075655301
@ -113,7 +113,11 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
|
||||
return ob, reward, done, dict(reward_dist=reward_dist,
|
||||
reward_ctrl=reward_ctrl,
|
||||
velocity=angular_vel,
|
||||
traj=self._q_pos, is_success=success,
|
||||
# traj=self._q_pos,
|
||||
action=a,
|
||||
q_pos=self.sim.data.qpos[0:7].ravel().copy(),
|
||||
q_vel=self.sim.data.qvel[0:7].ravel().copy(),
|
||||
is_success=success,
|
||||
is_collided=is_collided, sim_crash=crash)
|
||||
|
||||
def check_traj_in_joint_limits(self):
|
||||
|
@ -81,11 +81,11 @@ class BallInACupReward(alr_reward_fct.AlrReward):
|
||||
# cost = 0.5 * dist_final + 0.05 * cost_angle # TODO: Increase cost_angle weight # 0.5 * min_dist +
|
||||
# reward = np.exp(-2 * cost) - 1e-2 * action_cost - self.collision_penalty * int(self._is_collided)
|
||||
# reward = - dist_final**2 - 1e-4 * cost_angle - 1e-5 * action_cost - self.collision_penalty * int(self._is_collided)
|
||||
reward = - dist_final**2 - min_dist_final**2 - 1e-4 * cost_angle - 5e-4 * action_cost - self.collision_penalty * int(self._is_collided)
|
||||
reward = - dist_final**2 - min_dist_final**2 - 1e-4 * cost_angle - 1e-3 * action_cost - self.collision_penalty * int(self._is_collided)
|
||||
success = dist_final < 0.05 and ball_in_cup and not self._is_collided
|
||||
crash = self._is_collided
|
||||
else:
|
||||
reward = - 5e-4 * action_cost - 1e-4 * cost_angle # TODO: increase action_cost weight
|
||||
reward = - 1e-3 * action_cost - 1e-4 * cost_angle # TODO: increase action_cost weight
|
||||
success = False
|
||||
crash = False
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from collections import defaultdict
|
||||
|
||||
import gym
|
||||
import numpy as np
|
||||
@ -64,11 +65,11 @@ class MPWrapper(gym.Wrapper, ABC):
|
||||
trajectory = np.vstack([trajectory, np.tile(trajectory[-1, :], [self.post_traj_steps, 1])])
|
||||
velocity = np.vstack([velocity, np.zeros(shape=(self.post_traj_steps, self.mp.n_dof))])
|
||||
|
||||
# self._trajectory = trajectory
|
||||
self._trajectory = trajectory
|
||||
# self._velocity = velocity
|
||||
|
||||
rewards = 0
|
||||
info = {}
|
||||
infos = defaultdict(list)
|
||||
# create random obs as the reset function is called externally
|
||||
obs = self.env.observation_space.sample()
|
||||
|
||||
@ -77,14 +78,14 @@ class MPWrapper(gym.Wrapper, ABC):
|
||||
obs, rew, done, info = self.env.step(ac)
|
||||
rewards += rew
|
||||
# TODO return all dicts?
|
||||
# [infos[k].append(v) for k, v in info.items()]
|
||||
[infos[k].append(v) for k, v in info.items()]
|
||||
if self.render_mode:
|
||||
self.env.render(mode=self.render_mode, **self.render_kwargs)
|
||||
if done:
|
||||
break
|
||||
|
||||
done = True
|
||||
return obs[self.env.active_obs], rewards, done, info
|
||||
return obs[self.env.active_obs], rewards, done, infos
|
||||
|
||||
def render(self, mode='human', **kwargs):
|
||||
"""Only set render options here, such that they can be used during the rollout.
|
||||
|
Loading…
Reference in New Issue
Block a user