update
This commit is contained in:
parent
3876478b96
commit
8075655301
@ -113,7 +113,11 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
|
|||||||
return ob, reward, done, dict(reward_dist=reward_dist,
|
return ob, reward, done, dict(reward_dist=reward_dist,
|
||||||
reward_ctrl=reward_ctrl,
|
reward_ctrl=reward_ctrl,
|
||||||
velocity=angular_vel,
|
velocity=angular_vel,
|
||||||
traj=self._q_pos, is_success=success,
|
# traj=self._q_pos,
|
||||||
|
action=a,
|
||||||
|
q_pos=self.sim.data.qpos[0:7].ravel().copy(),
|
||||||
|
q_vel=self.sim.data.qvel[0:7].ravel().copy(),
|
||||||
|
is_success=success,
|
||||||
is_collided=is_collided, sim_crash=crash)
|
is_collided=is_collided, sim_crash=crash)
|
||||||
|
|
||||||
def check_traj_in_joint_limits(self):
|
def check_traj_in_joint_limits(self):
|
||||||
|
@ -81,11 +81,11 @@ class BallInACupReward(alr_reward_fct.AlrReward):
|
|||||||
# cost = 0.5 * dist_final + 0.05 * cost_angle # TODO: Increase cost_angle weight # 0.5 * min_dist +
|
# cost = 0.5 * dist_final + 0.05 * cost_angle # TODO: Increase cost_angle weight # 0.5 * min_dist +
|
||||||
# reward = np.exp(-2 * cost) - 1e-2 * action_cost - self.collision_penalty * int(self._is_collided)
|
# reward = np.exp(-2 * cost) - 1e-2 * action_cost - self.collision_penalty * int(self._is_collided)
|
||||||
# reward = - dist_final**2 - 1e-4 * cost_angle - 1e-5 * action_cost - self.collision_penalty * int(self._is_collided)
|
# reward = - dist_final**2 - 1e-4 * cost_angle - 1e-5 * action_cost - self.collision_penalty * int(self._is_collided)
|
||||||
reward = - dist_final**2 - min_dist_final**2 - 1e-4 * cost_angle - 5e-4 * action_cost - self.collision_penalty * int(self._is_collided)
|
reward = - dist_final**2 - min_dist_final**2 - 1e-4 * cost_angle - 1e-3 * action_cost - self.collision_penalty * int(self._is_collided)
|
||||||
success = dist_final < 0.05 and ball_in_cup and not self._is_collided
|
success = dist_final < 0.05 and ball_in_cup and not self._is_collided
|
||||||
crash = self._is_collided
|
crash = self._is_collided
|
||||||
else:
|
else:
|
||||||
reward = - 5e-4 * action_cost - 1e-4 * cost_angle # TODO: increase action_cost weight
|
reward = - 1e-3 * action_cost - 1e-4 * cost_angle # TODO: increase action_cost weight
|
||||||
success = False
|
success = False
|
||||||
crash = False
|
crash = False
|
||||||
|
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
import gym
|
import gym
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@ -64,11 +65,11 @@ class MPWrapper(gym.Wrapper, ABC):
|
|||||||
trajectory = np.vstack([trajectory, np.tile(trajectory[-1, :], [self.post_traj_steps, 1])])
|
trajectory = np.vstack([trajectory, np.tile(trajectory[-1, :], [self.post_traj_steps, 1])])
|
||||||
velocity = np.vstack([velocity, np.zeros(shape=(self.post_traj_steps, self.mp.n_dof))])
|
velocity = np.vstack([velocity, np.zeros(shape=(self.post_traj_steps, self.mp.n_dof))])
|
||||||
|
|
||||||
# self._trajectory = trajectory
|
self._trajectory = trajectory
|
||||||
# self._velocity = velocity
|
# self._velocity = velocity
|
||||||
|
|
||||||
rewards = 0
|
rewards = 0
|
||||||
info = {}
|
infos = defaultdict(list)
|
||||||
# create random obs as the reset function is called externally
|
# create random obs as the reset function is called externally
|
||||||
obs = self.env.observation_space.sample()
|
obs = self.env.observation_space.sample()
|
||||||
|
|
||||||
@ -77,14 +78,14 @@ class MPWrapper(gym.Wrapper, ABC):
|
|||||||
obs, rew, done, info = self.env.step(ac)
|
obs, rew, done, info = self.env.step(ac)
|
||||||
rewards += rew
|
rewards += rew
|
||||||
# TODO return all dicts?
|
# TODO return all dicts?
|
||||||
# [infos[k].append(v) for k, v in info.items()]
|
[infos[k].append(v) for k, v in info.items()]
|
||||||
if self.render_mode:
|
if self.render_mode:
|
||||||
self.env.render(mode=self.render_mode, **self.render_kwargs)
|
self.env.render(mode=self.render_mode, **self.render_kwargs)
|
||||||
if done:
|
if done:
|
||||||
break
|
break
|
||||||
|
|
||||||
done = True
|
done = True
|
||||||
return obs[self.env.active_obs], rewards, done, info
|
return obs[self.env.active_obs], rewards, done, infos
|
||||||
|
|
||||||
def render(self, mode='human', **kwargs):
|
def render(self, mode='human', **kwargs):
|
||||||
"""Only set render options here, such that they can be used during the rollout.
|
"""Only set render options here, such that they can be used during the rollout.
|
||||||
|
Loading…
Reference in New Issue
Block a user