corrected reward for hopperjumprndminit + ALRReacher for iLQR

This commit is contained in:
Onur 2022-04-21 08:44:13 +02:00
parent 77927e9157
commit c0502cf1d4
4 changed files with 58 additions and 5 deletions

View File

@ -148,6 +148,17 @@ register(
} }
) )
register(
id='ALRReacherSparseOptCtrl-v0',
entry_point='alr_envs.alr.mujoco:ALRReacherOptCtrlEnv',
max_episode_steps=200,
kwargs={
"steps_before_reward": 200,
"n_links": 5,
"balance": False,
}
)
register( register(
id='ALRReacherSparseBalanced-v0', id='ALRReacherSparseBalanced-v0',
entry_point='alr_envs.alr.mujoco:ALRReacherEnv', entry_point='alr_envs.alr.mujoco:ALRReacherEnv',

View File

@ -1,4 +1,4 @@
from .reacher.alr_reacher import ALRReacherEnv from .reacher.alr_reacher import ALRReacherEnv, ALRReacherOptCtrlEnv
from .reacher.balancing import BalancingEnv from .reacher.balancing import BalancingEnv
from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv

View File

@ -94,10 +94,16 @@ class ALRHopperJumpEnv(HopperEnv):
class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv): class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
def __init__(self, max_episode_steps=250): def __init__(self, max_episode_steps=250):
self.contact_with_floor = False
self._floor_geom_id = None
self._foot_geom_id = None
super(ALRHopperJumpRndmPosEnv, self).__init__(exclude_current_positions_from_observation=False, super(ALRHopperJumpRndmPosEnv, self).__init__(exclude_current_positions_from_observation=False,
reset_noise_scale=5e-1, reset_noise_scale=5e-1,
max_episode_steps=max_episode_steps) max_episode_steps=max_episode_steps)
def reset_model(self): def reset_model(self):
self._floor_geom_id = self.model.geom_name2id('floor')
self._foot_geom_id = self.model.geom_name2id('foot_geom')
noise_low = -self._reset_noise_scale noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale noise_high = self._reset_noise_scale
rnd_vec = self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq) rnd_vec = self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
@ -116,8 +122,12 @@ class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
self.current_step += 1 self.current_step += 1
self.do_simulation(action, self.frame_skip) self.do_simulation(action, self.frame_skip)
self.contact_with_floor = self._contact_checker(self._floor_geom_id, self._foot_geom_id) if not \
self.contact_with_floor else True
height_after = self.get_body_com("torso")[2] height_after = self.get_body_com("torso")[2]
self.max_height = max(height_after, self.max_height) self.max_height = max(height_after, self.max_height) if self.contact_with_floor else 0
ctrl_cost = self.control_cost(action) ctrl_cost = self.control_cost(action)
costs = ctrl_cost costs = ctrl_cost
@ -142,9 +152,19 @@ class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
return observation, reward, done, info return observation, reward, done, info
def _contact_checker(self, id_1, id_2):
for coni in range(0, self.sim.data.ncon):
con = self.sim.data.contact[coni]
collision = con.geom1 == id_1 and con.geom2 == id_2
collision_trans = con.geom1 == id_2 and con.geom2 == id_1
if collision or collision_trans:
return True
return False
if __name__ == '__main__': if __name__ == '__main__':
render_mode = "human" # "human" or "partial" or "final" render_mode = "human" # "human" or "partial" or "final"
env = ALRHopperJumpEnv() # env = ALRHopperJumpEnv()
env = ALRHopperJumpRndmPosEnv()
obs = env.reset() obs = env.reset()
for i in range(2000): for i in range(2000):
@ -152,8 +172,9 @@ if __name__ == '__main__':
# test with random actions # test with random actions
ac = env.action_space.sample() ac = env.action_space.sample()
obs, rew, d, info = env.step(ac) obs, rew, d, info = env.step(ac)
if i % 10 == 0: # if i % 10 == 0:
env.render(mode=render_mode) # env.render(mode=render_mode)
env.render(mode=render_mode)
if d: if d:
print('After ', i, ' steps, done: ', d) print('After ', i, ' steps, done: ', d)
env.reset() env.reset()

View File

@ -87,6 +87,27 @@ class ALRReacherEnv(MujocoEnv, utils.EzPickle):
[self._steps], [self._steps],
]) ])
class ALRReacherOptCtrlEnv(ALRReacherEnv):
def __init__(self, steps_before_reward=200, n_links=5, balance=False):
super(ALRReacherOptCtrlEnv, self).__init__(steps_before_reward, n_links, balance)
self.goal = np.array([0.1, 0.1])
def _get_obs(self):
theta = self.sim.data.qpos.flat[:self.n_links]
return np.concatenate([
theta,
self.sim.data.qvel.flat[:self.n_links], # this is angular velocity
])
def reset_model(self):
qpos = self.init_qpos
qpos[-2:] = self.goal
qvel = self.init_qvel
qvel[-2:] = 0
self.set_state(qpos, qvel)
self._steps = 0
return self._get_obs()
if __name__ == '__main__': if __name__ == '__main__':
nl = 5 nl = 5