finish up beerpong, walker2d and ant needs more extensions, fix import bugs.
This commit is contained in:
		
							parent
							
								
									4437ab9577
								
							
						
					
					
						commit
						d4e3b957a9
					
				@ -218,8 +218,15 @@ register(
 | 
				
			|||||||
    entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv',
 | 
					    entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv',
 | 
				
			||||||
    max_episode_steps=300,
 | 
					    max_episode_steps=300,
 | 
				
			||||||
    kwargs={
 | 
					    kwargs={
 | 
				
			||||||
        "rndm_goal": True,
 | 
					        "frame_skip": 2
 | 
				
			||||||
        "cup_goal_pos": [-0.3, -1.2],
 | 
					    }
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					register(
 | 
				
			||||||
 | 
					    id='ALRBeerPong-v1',
 | 
				
			||||||
 | 
					    entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv',
 | 
				
			||||||
 | 
					    max_episode_steps=300,
 | 
				
			||||||
 | 
					    kwargs={
 | 
				
			||||||
        "frame_skip": 2
 | 
					        "frame_skip": 2
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
				
			|||||||
@ -2,7 +2,7 @@ from typing import Tuple, Union
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
import numpy as np
 | 
					import numpy as np
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
 | 
					from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class MPWrapper(RawInterfaceWrapper):
 | 
					class MPWrapper(RawInterfaceWrapper):
 | 
				
			||||||
 | 
				
			|||||||
@ -7,6 +7,5 @@ from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv
 | 
				
			|||||||
from .hopper_throw.hopper_throw import ALRHopperThrowEnv
 | 
					from .hopper_throw.hopper_throw import ALRHopperThrowEnv
 | 
				
			||||||
from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv
 | 
					from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv
 | 
				
			||||||
from .reacher.reacher import ReacherEnv
 | 
					from .reacher.reacher import ReacherEnv
 | 
				
			||||||
from .reacher.balancing import BalancingEnv
 | 
					 | 
				
			||||||
from .table_tennis.tt_gym import TTEnvGym
 | 
					from .table_tennis.tt_gym import TTEnvGym
 | 
				
			||||||
from .walker_2d_jump.walker_2d_jump import ALRWalker2dJumpEnv
 | 
					from .walker_2d_jump.walker_2d_jump import ALRWalker2dJumpEnv
 | 
				
			||||||
 | 
				
			|||||||
@ -2,6 +2,10 @@ import numpy as np
 | 
				
			|||||||
from gym.envs.mujoco.ant_v3 import AntEnv
 | 
					from gym.envs.mujoco.ant_v3 import AntEnv
 | 
				
			||||||
 | 
					
 | 
				
			||||||
MAX_EPISODE_STEPS_ANTJUMP = 200
 | 
					MAX_EPISODE_STEPS_ANTJUMP = 200
 | 
				
			||||||
 | 
					# TODO: This environment was not testet yet. Do the following todos and test it.
 | 
				
			||||||
 | 
					# TODO: Right now this environment only considers jumping to a specific height, which is not nice. It should be extended
 | 
				
			||||||
 | 
					#  to the same structure as the Hopper, where the angles are randomized (->contexts) and the agent should jump as heigh
 | 
				
			||||||
 | 
					#  as possible, while landing at a specific target position
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class ALRAntJumpEnv(AntEnv):
 | 
					class ALRAntJumpEnv(AntEnv):
 | 
				
			||||||
@ -22,14 +26,12 @@ class ALRAntJumpEnv(AntEnv):
 | 
				
			|||||||
                 healthy_z_range=(0.3, float('inf')),
 | 
					                 healthy_z_range=(0.3, float('inf')),
 | 
				
			||||||
                 contact_force_range=(-1.0, 1.0),
 | 
					                 contact_force_range=(-1.0, 1.0),
 | 
				
			||||||
                 reset_noise_scale=0.1,
 | 
					                 reset_noise_scale=0.1,
 | 
				
			||||||
                 context=True,  # variable to decide if context is used or not
 | 
					 | 
				
			||||||
                 exclude_current_positions_from_observation=True,
 | 
					                 exclude_current_positions_from_observation=True,
 | 
				
			||||||
                 max_episode_steps=200):
 | 
					                 max_episode_steps=200):
 | 
				
			||||||
        self.current_step = 0
 | 
					        self.current_step = 0
 | 
				
			||||||
        self.max_height = 0
 | 
					        self.max_height = 0
 | 
				
			||||||
        self.context = context
 | 
					 | 
				
			||||||
        self.max_episode_steps = max_episode_steps
 | 
					        self.max_episode_steps = max_episode_steps
 | 
				
			||||||
        self.goal = 0  # goal when training with context
 | 
					        self.goal = 0
 | 
				
			||||||
        super().__init__(xml_file, ctrl_cost_weight, contact_cost_weight, healthy_reward, terminate_when_unhealthy,
 | 
					        super().__init__(xml_file, ctrl_cost_weight, contact_cost_weight, healthy_reward, terminate_when_unhealthy,
 | 
				
			||||||
                         healthy_z_range, contact_force_range, reset_noise_scale,
 | 
					                         healthy_z_range, contact_force_range, reset_noise_scale,
 | 
				
			||||||
                         exclude_current_positions_from_observation)
 | 
					                         exclude_current_positions_from_observation)
 | 
				
			||||||
@ -53,15 +55,11 @@ class ALRAntJumpEnv(AntEnv):
 | 
				
			|||||||
        done = height < 0.3 # fall over -> is the 0.3 value from healthy_z_range? TODO change 0.3 to the value of healthy z angle
 | 
					        done = height < 0.3 # fall over -> is the 0.3 value from healthy_z_range? TODO change 0.3 to the value of healthy z angle
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if self.current_step == self.max_episode_steps or done:
 | 
					        if self.current_step == self.max_episode_steps or done:
 | 
				
			||||||
            if self.context:
 | 
					            # -10 for scaling the value of the distance between the max_height and the goal height; only used when context is enabled
 | 
				
			||||||
                # -10 for scaling the value of the distance between the max_height and the goal height; only used when context is enabled
 | 
					            # height_reward = -10 * (np.linalg.norm(self.max_height - self.goal))
 | 
				
			||||||
                # height_reward = -10 * (np.linalg.norm(self.max_height - self.goal))
 | 
					            height_reward = -10*np.linalg.norm(self.max_height - self.goal)
 | 
				
			||||||
                height_reward = -10*np.linalg.norm(self.max_height - self.goal)
 | 
					            # no healthy reward when using context, because we optimize a negative value
 | 
				
			||||||
                # no healthy reward when using context, because we optimize a negative value
 | 
					            healthy_reward = 0
 | 
				
			||||||
                healthy_reward = 0
 | 
					 | 
				
			||||||
            else:
 | 
					 | 
				
			||||||
                height_reward = self.max_height - 0.7
 | 
					 | 
				
			||||||
                healthy_reward = self.healthy_reward * self.current_step
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
            rewards = height_reward + healthy_reward
 | 
					            rewards = height_reward + healthy_reward
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -105,7 +103,6 @@ if __name__ == '__main__':
 | 
				
			|||||||
    obs = env.reset()
 | 
					    obs = env.reset()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for i in range(2000):
 | 
					    for i in range(2000):
 | 
				
			||||||
        # objective.load_result("/tmp/cma")
 | 
					 | 
				
			||||||
        # test with random actions
 | 
					        # test with random actions
 | 
				
			||||||
        ac = env.action_space.sample()
 | 
					        ac = env.action_space.sample()
 | 
				
			||||||
        obs, rew, d, info = env.step(ac)
 | 
					        obs, rew, d, info = env.step(ac)
 | 
				
			||||||
@ -114,4 +111,4 @@ if __name__ == '__main__':
 | 
				
			|||||||
        if d:
 | 
					        if d:
 | 
				
			||||||
            env.reset()
 | 
					            env.reset()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    env.close()
 | 
					    env.close()
 | 
				
			||||||
 | 
				
			|||||||
@ -7,16 +7,6 @@ from gym.envs.mujoco import MujocoEnv
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
 | 
					from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
 | 
				
			||||||
 | 
					
 | 
				
			||||||
CUP_POS_MIN = np.array([-1.42, -4.05])
 | 
					 | 
				
			||||||
CUP_POS_MAX = np.array([1.42, -1.25])
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# CUP_POS_MIN = np.array([-0.32, -2.2])
 | 
					 | 
				
			||||||
# CUP_POS_MAX = np.array([0.32, -1.2])
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# smaller context space -> Easier task
 | 
					 | 
				
			||||||
# CUP_POS_MIN = np.array([-0.16, -2.2])
 | 
					 | 
				
			||||||
# CUP_POS_MAX = np.array([0.16, -1.7])
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
 | 
					class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
 | 
				
			||||||
    def __init__(self, frame_skip=2, apply_gravity_comp=True):
 | 
					    def __init__(self, frame_skip=2, apply_gravity_comp=True):
 | 
				
			||||||
@ -27,10 +17,16 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
 | 
				
			|||||||
        self.cup_goal_pos = np.array(cup_goal_pos)
 | 
					        self.cup_goal_pos = np.array(cup_goal_pos)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self._steps = 0
 | 
					        self._steps = 0
 | 
				
			||||||
 | 
					        # Small Context -> Easier. Todo: Should we do different versions?
 | 
				
			||||||
        # self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
 | 
					        # self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
 | 
				
			||||||
        #                              "beerpong_wo_cup" + ".xml")
 | 
					        #                              "beerpong_wo_cup" + ".xml")
 | 
				
			||||||
 | 
					        # self.cup_pos_min = np.array([-0.32, -2.2])
 | 
				
			||||||
 | 
					        # self.cup_pos_max = np.array([0.32, -1.2])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
 | 
					        self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
 | 
				
			||||||
                                     "beerpong_wo_cup_big_table" + ".xml")
 | 
					                                     "beerpong_wo_cup_big_table" + ".xml")
 | 
				
			||||||
 | 
					        self.cup_pos_min = np.array([-1.42, -4.05])
 | 
				
			||||||
 | 
					        self.cup_pos_max = np.array([1.42, -1.25])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7])
 | 
					        self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7])
 | 
				
			||||||
        self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
 | 
					        self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
 | 
				
			||||||
@ -49,9 +45,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
 | 
				
			|||||||
        self.cup_table_id = 10
 | 
					        self.cup_table_id = 10
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.add_noise = False
 | 
					        self.add_noise = False
 | 
				
			||||||
 | 
					        self.reward_function = BeerPongReward()
 | 
				
			||||||
        reward_function = BeerPongReward
 | 
					 | 
				
			||||||
        self.reward_function = reward_function()
 | 
					 | 
				
			||||||
        self.repeat_action = frame_skip
 | 
					        self.repeat_action = frame_skip
 | 
				
			||||||
        MujocoEnv.__init__(self, self.xml_path, frame_skip=1)
 | 
					        MujocoEnv.__init__(self, self.xml_path, frame_skip=1)
 | 
				
			||||||
        utils.EzPickle.__init__(self)
 | 
					        utils.EzPickle.__init__(self)
 | 
				
			||||||
@ -78,10 +72,11 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
 | 
				
			|||||||
        start_pos = init_pos_all
 | 
					        start_pos = init_pos_all
 | 
				
			||||||
        start_pos[0:7] = init_pos_robot
 | 
					        start_pos[0:7] = init_pos_robot
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # TODO: Ask Max why we need to set the state twice.
 | 
				
			||||||
        self.set_state(start_pos, init_vel)
 | 
					        self.set_state(start_pos, init_vel)
 | 
				
			||||||
        start_pos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
 | 
					        start_pos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
 | 
				
			||||||
        self.set_state(start_pos, init_vel)
 | 
					        self.set_state(start_pos, init_vel)
 | 
				
			||||||
        xy = self.np_random.uniform(CUP_POS_MIN, CUP_POS_MAX)
 | 
					        xy = self.np_random.uniform(self.cup_pos_min, self.cup_pos_max)
 | 
				
			||||||
        xyz = np.zeros(3)
 | 
					        xyz = np.zeros(3)
 | 
				
			||||||
        xyz[:2] = xy
 | 
					        xyz[:2] = xy
 | 
				
			||||||
        xyz[-1] = 0.840
 | 
					        xyz[-1] = 0.840
 | 
				
			||||||
@ -89,9 +84,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
 | 
				
			|||||||
        return self._get_obs()
 | 
					        return self._get_obs()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def step(self, a):
 | 
					    def step(self, a):
 | 
				
			||||||
        reward_dist = 0.0
 | 
					        crash = False
 | 
				
			||||||
        angular_vel = 0.0
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        for _ in range(self.repeat_action):
 | 
					        for _ in range(self.repeat_action):
 | 
				
			||||||
            if self.apply_gravity_comp:
 | 
					            if self.apply_gravity_comp:
 | 
				
			||||||
                applied_action = a + self.sim.data.qfrc_bias[:len(a)].copy() / self.model.actuator_gear[:, 0]
 | 
					                applied_action = a + self.sim.data.qfrc_bias[:len(a)].copy() / self.model.actuator_gear[:, 0]
 | 
				
			||||||
@ -100,7 +93,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
 | 
				
			|||||||
            try:
 | 
					            try:
 | 
				
			||||||
                self.do_simulation(applied_action, self.frame_skip)
 | 
					                self.do_simulation(applied_action, self.frame_skip)
 | 
				
			||||||
                self.reward_function.initialize(self)
 | 
					                self.reward_function.initialize(self)
 | 
				
			||||||
                self.reward_function.check_contacts(self.sim)
 | 
					                # self.reward_function.check_contacts(self.sim)   # I assume this is not important?
 | 
				
			||||||
                if self._steps < self.release_step:
 | 
					                if self._steps < self.release_step:
 | 
				
			||||||
                    self.sim.data.qpos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
 | 
					                    self.sim.data.qpos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
 | 
				
			||||||
                    self.sim.data.qvel[7::] = self.sim.data.site_xvelp[self.ball_site_id, :].copy()
 | 
					                    self.sim.data.qvel[7::] = self.sim.data.site_xvelp[self.ball_site_id, :].copy()
 | 
				
			||||||
@ -112,34 +105,19 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        if not crash:
 | 
					        if not crash:
 | 
				
			||||||
            reward, reward_infos = self.reward_function.compute_reward(self, applied_action)
 | 
					            reward, reward_infos = self.reward_function.compute_reward(self, applied_action)
 | 
				
			||||||
            success = reward_infos['success']
 | 
					 | 
				
			||||||
            is_collided = reward_infos['is_collided']
 | 
					            is_collided = reward_infos['is_collided']
 | 
				
			||||||
            ball_pos = reward_infos['ball_pos']
 | 
					 | 
				
			||||||
            ball_vel = reward_infos['ball_vel']
 | 
					 | 
				
			||||||
            done = is_collided or self._steps == self.ep_length - 1
 | 
					            done = is_collided or self._steps == self.ep_length - 1
 | 
				
			||||||
            self._steps += 1
 | 
					            self._steps += 1
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            reward = -30
 | 
					            reward = -30
 | 
				
			||||||
            reward_infos = dict()
 | 
					 | 
				
			||||||
            success = False
 | 
					 | 
				
			||||||
            is_collided = False
 | 
					 | 
				
			||||||
            done = True
 | 
					            done = True
 | 
				
			||||||
            ball_pos = np.zeros(3)
 | 
					            reward_infos = {"success": False,  "ball_pos": np.zeros(3), "ball_vel": np.zeros(3), "is_collided": False}
 | 
				
			||||||
            ball_vel = np.zeros(3)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        infos = dict(
 | 
					        infos = dict(
 | 
				
			||||||
            reward_dist=reward_dist,
 | 
					 | 
				
			||||||
            reward=reward,
 | 
					            reward=reward,
 | 
				
			||||||
            velocity=angular_vel,
 | 
					 | 
				
			||||||
            # traj=self._q_pos,
 | 
					 | 
				
			||||||
            action=a,
 | 
					            action=a,
 | 
				
			||||||
            q_pos=self.sim.data.qpos[0:7].ravel().copy(),
 | 
					            q_pos=self.sim.data.qpos[0:7].ravel().copy(),
 | 
				
			||||||
            q_vel=self.sim.data.qvel[0:7].ravel().copy(),
 | 
					            q_vel=self.sim.data.qvel[0:7].ravel().copy(), sim_crash=crash,
 | 
				
			||||||
            ball_pos=ball_pos,
 | 
					 | 
				
			||||||
            ball_vel=ball_vel,
 | 
					 | 
				
			||||||
            success=success,
 | 
					 | 
				
			||||||
            is_collided=is_collided, sim_crash=crash,
 | 
					 | 
				
			||||||
            table_contact_first=int(not self.reward_function.ball_ground_contact_first)
 | 
					 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        infos.update(reward_infos)
 | 
					        infos.update(reward_infos)
 | 
				
			||||||
        return ob, reward, done, infos
 | 
					        return ob, reward, done, infos
 | 
				
			||||||
@ -239,14 +217,7 @@ if __name__ == "__main__":
 | 
				
			|||||||
    env.reset()
 | 
					    env.reset()
 | 
				
			||||||
    env.render("human")
 | 
					    env.render("human")
 | 
				
			||||||
    for i in range(1500):
 | 
					    for i in range(1500):
 | 
				
			||||||
        # ac = 10 * env.action_space.sample()
 | 
					        ac = 10 * env.action_space.sample()
 | 
				
			||||||
        ac = np.ones(7)
 | 
					 | 
				
			||||||
        # ac = np.zeros(7)
 | 
					 | 
				
			||||||
        # ac[0] = 0
 | 
					 | 
				
			||||||
        # ac[1] = -0.01
 | 
					 | 
				
			||||||
        # ac[3] = -0.01
 | 
					 | 
				
			||||||
        # if env._steps > 150:
 | 
					 | 
				
			||||||
        #     ac[0] = 1
 | 
					 | 
				
			||||||
        obs, rew, d, info = env.step(ac)
 | 
					        obs, rew, d, info = env.step(ac)
 | 
				
			||||||
        env.render("human")
 | 
					        env.render("human")
 | 
				
			||||||
        print(env.dt)
 | 
					        print(env.dt)
 | 
				
			||||||
 | 
				
			|||||||
@ -21,12 +21,9 @@ class BeerPongReward:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        self.cup_collision_objects = ["cup_geom_table3", "cup_geom_table4", "cup_geom_table5", "cup_geom_table6",
 | 
					        self.cup_collision_objects = ["cup_geom_table3", "cup_geom_table4", "cup_geom_table5", "cup_geom_table6",
 | 
				
			||||||
                                      "cup_geom_table7", "cup_geom_table8", "cup_geom_table9", "cup_geom_table10",
 | 
					                                      "cup_geom_table7", "cup_geom_table8", "cup_geom_table9", "cup_geom_table10",
 | 
				
			||||||
                                      # "cup_base_table", "cup_base_table_contact",
 | 
					 | 
				
			||||||
                                      "cup_geom_table15",
 | 
					                                      "cup_geom_table15",
 | 
				
			||||||
                                      "cup_geom_table16",
 | 
					                                      "cup_geom_table16",
 | 
				
			||||||
                                      "cup_geom_table17", "cup_geom1_table8",
 | 
					                                      "cup_geom_table17", "cup_geom1_table8",
 | 
				
			||||||
                                      # "cup_base_table_contact",
 | 
					 | 
				
			||||||
                                      # "cup_base_table"
 | 
					 | 
				
			||||||
                                      ]
 | 
					                                      ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.dists = None
 | 
					        self.dists = None
 | 
				
			||||||
@ -39,7 +36,7 @@ class BeerPongReward:
 | 
				
			|||||||
        self.ball_in_cup = False
 | 
					        self.ball_in_cup = False
 | 
				
			||||||
        self.dist_ground_cup = -1  # distance floor to cup if first floor contact
 | 
					        self.dist_ground_cup = -1  # distance floor to cup if first floor contact
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        ### IDs
 | 
					        # IDs
 | 
				
			||||||
        self.ball_collision_id = None
 | 
					        self.ball_collision_id = None
 | 
				
			||||||
        self.table_collision_id = None
 | 
					        self.table_collision_id = None
 | 
				
			||||||
        self.wall_collision_id = None
 | 
					        self.wall_collision_id = None
 | 
				
			||||||
@ -96,10 +93,10 @@ class BeerPongReward:
 | 
				
			|||||||
        self.action_costs.append(np.copy(action_cost))
 | 
					        self.action_costs.append(np.copy(action_cost))
 | 
				
			||||||
        # # ##################### Reward function which does not force to bounce once on the table (quad dist) #########
 | 
					        # # ##################### Reward function which does not force to bounce once on the table (quad dist) #########
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Comment Onur: Is this needed?
 | 
					        # Is this needed?
 | 
				
			||||||
        # self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
 | 
					        # self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if env._steps == env.ep_length - 1:#  or self._is_collided:
 | 
					        if env._steps == env.ep_length - 1:  # or self._is_collided:
 | 
				
			||||||
            min_dist = np.min(self.dists)
 | 
					            min_dist = np.min(self.dists)
 | 
				
			||||||
            final_dist = self.dists_final[-1]
 | 
					            final_dist = self.dists_final[-1]
 | 
				
			||||||
            if self.ball_ground_contact_first:
 | 
					            if self.ball_ground_contact_first:
 | 
				
			||||||
@ -128,9 +125,10 @@ class BeerPongReward:
 | 
				
			|||||||
            reward = - action_cost
 | 
					            reward = - action_cost
 | 
				
			||||||
            success = False
 | 
					            success = False
 | 
				
			||||||
        # ##############################################################################################################
 | 
					        # ##############################################################################################################
 | 
				
			||||||
        infos = {"success": success,  "ball_pos": ball_pos.copy(),
 | 
					        infos = {"success": success, "ball_pos": ball_pos.copy(),
 | 
				
			||||||
                 "ball_vel": ball_vel.copy(), "action_cost": action_cost, "task_reward": reward, "is_collided": False} # TODO: Check if is collided is needed
 | 
					                 "ball_vel": ball_vel.copy(), "action_cost": action_cost, "task_reward": reward,
 | 
				
			||||||
 | 
					                 "table_contact_first": int(not self.ball_ground_contact_first),
 | 
				
			||||||
 | 
					                 "is_collided": False}  # TODO: Check if is collided is needed
 | 
				
			||||||
        return reward, infos
 | 
					        return reward, infos
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def check_contacts(self, sim):
 | 
					    def check_contacts(self, sim):
 | 
				
			||||||
 | 
				
			|||||||
@ -21,6 +21,7 @@
 | 
				
			|||||||
          <joint axis="0 -1 0" name="leg_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
 | 
					          <joint axis="0 -1 0" name="leg_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
 | 
				
			||||||
          <geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_geom" size="0.04" type="capsule"/>
 | 
					          <geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_geom" size="0.04" type="capsule"/>
 | 
				
			||||||
          <body name="foot" pos="0.2/2 0 0.1">
 | 
					          <body name="foot" pos="0.2/2 0 0.1">
 | 
				
			||||||
 | 
					            <site name="foot_right_site" pos="0 0 0.04" size="0.02 0.02 0.02" rgba="0 0 1 1" type="sphere"/>
 | 
				
			||||||
            <joint axis="0 -1 0" name="foot_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
 | 
					            <joint axis="0 -1 0" name="foot_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
 | 
				
			||||||
            <geom friction="0.9" fromto="-0.0 0 0.1 0.2 0 0.1" name="foot_geom" size="0.06" type="capsule"/>
 | 
					            <geom friction="0.9" fromto="-0.0 0 0.1 0.2 0 0.1" name="foot_geom" size="0.06" type="capsule"/>
 | 
				
			||||||
          </body>
 | 
					          </body>
 | 
				
			||||||
@ -34,6 +35,7 @@
 | 
				
			|||||||
          <joint axis="0 -1 0" name="leg_left_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
 | 
					          <joint axis="0 -1 0" name="leg_left_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
 | 
				
			||||||
          <geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_left_geom" rgba=".7 .3 .6 1" size="0.04" type="capsule"/>
 | 
					          <geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_left_geom" rgba=".7 .3 .6 1" size="0.04" type="capsule"/>
 | 
				
			||||||
          <body name="foot_left" pos="0.2/2 0 0.1">
 | 
					          <body name="foot_left" pos="0.2/2 0 0.1">
 | 
				
			||||||
 | 
					            <site name="foot_left_site" pos="0 0 0.04" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"/>
 | 
				
			||||||
            <joint axis="0 -1 0" name="foot_left_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
 | 
					            <joint axis="0 -1 0" name="foot_left_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
 | 
				
			||||||
            <geom friction="1.9" fromto="-0.0 0 0.1 0.2 0 0.1" name="foot_left_geom" rgba=".7 .3 .6 1" size="0.06" type="capsule"/>
 | 
					            <geom friction="1.9" fromto="-0.0 0 0.1 0.2 0 0.1" name="foot_left_geom" rgba=".7 .3 .6 1" size="0.06" type="capsule"/>
 | 
				
			||||||
          </body>
 | 
					          </body>
 | 
				
			||||||
@ -59,4 +61,4 @@
 | 
				
			|||||||
        <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
 | 
					        <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
 | 
				
			||||||
        <material name="geom" texture="texgeom" texuniform="true"/>
 | 
					        <material name="geom" texture="texgeom" texuniform="true"/>
 | 
				
			||||||
    </asset>
 | 
					    </asset>
 | 
				
			||||||
</mujoco>
 | 
					</mujoco>
 | 
				
			||||||
 | 
				
			|||||||
@ -4,6 +4,10 @@ import numpy as np
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
MAX_EPISODE_STEPS_WALKERJUMP = 300
 | 
					MAX_EPISODE_STEPS_WALKERJUMP = 300
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# TODO: Right now this environment only considers jumping to a specific height, which is not nice. It should be extended
 | 
				
			||||||
 | 
					#  to the same structure as the Hopper, where the angles are randomized (->contexts) and the agent should jump as height
 | 
				
			||||||
 | 
					#  as possible, while landing at a specific target position
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class ALRWalker2dJumpEnv(Walker2dEnv):
 | 
					class ALRWalker2dJumpEnv(Walker2dEnv):
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
@ -21,14 +25,12 @@ class ALRWalker2dJumpEnv(Walker2dEnv):
 | 
				
			|||||||
                 healthy_angle_range=(-1.0, 1.0),
 | 
					                 healthy_angle_range=(-1.0, 1.0),
 | 
				
			||||||
                 reset_noise_scale=5e-3,
 | 
					                 reset_noise_scale=5e-3,
 | 
				
			||||||
                 penalty=0,
 | 
					                 penalty=0,
 | 
				
			||||||
                 context=True,
 | 
					 | 
				
			||||||
                 exclude_current_positions_from_observation=True,
 | 
					                 exclude_current_positions_from_observation=True,
 | 
				
			||||||
                 max_episode_steps=300):
 | 
					                 max_episode_steps=300):
 | 
				
			||||||
        self.current_step = 0
 | 
					        self.current_step = 0
 | 
				
			||||||
        self.max_episode_steps = max_episode_steps
 | 
					        self.max_episode_steps = max_episode_steps
 | 
				
			||||||
        self.max_height = 0
 | 
					        self.max_height = 0
 | 
				
			||||||
        self._penalty = penalty
 | 
					        self._penalty = penalty
 | 
				
			||||||
        self.context = context
 | 
					 | 
				
			||||||
        self.goal = 0
 | 
					        self.goal = 0
 | 
				
			||||||
        xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
 | 
					        xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
 | 
				
			||||||
        super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
 | 
					        super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
 | 
				
			||||||
@ -43,31 +45,24 @@ class ALRWalker2dJumpEnv(Walker2dEnv):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        self.max_height = max(height, self.max_height)
 | 
					        self.max_height = max(height, self.max_height)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        fell_over = height < 0.2
 | 
					        done = height < 0.2
 | 
				
			||||||
        done = fell_over
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        ctrl_cost = self.control_cost(action)
 | 
					        ctrl_cost = self.control_cost(action)
 | 
				
			||||||
        costs = ctrl_cost
 | 
					        costs = ctrl_cost
 | 
				
			||||||
 | 
					        rewards = 0
 | 
				
			||||||
        if self.current_step >= self.max_episode_steps or done:
 | 
					        if self.current_step >= self.max_episode_steps or done:
 | 
				
			||||||
            done = True
 | 
					            done = True
 | 
				
			||||||
            height_goal_distance = -10 * (np.linalg.norm(self.max_height - self.goal)) if self.context else self.max_height
 | 
					            height_goal_distance = -10 * (np.linalg.norm(self.max_height - self.goal))
 | 
				
			||||||
            healthy_reward = self.healthy_reward * self.current_step
 | 
					            healthy_reward = self.healthy_reward * self.current_step
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            rewards = height_goal_distance + healthy_reward
 | 
					            rewards = height_goal_distance + healthy_reward
 | 
				
			||||||
        else:
 | 
					 | 
				
			||||||
            # penalty not needed
 | 
					 | 
				
			||||||
            rewards = 0
 | 
					 | 
				
			||||||
            rewards += ((action[:2] > 0) * self._penalty).sum() if self.current_step < 4 else 0
 | 
					 | 
				
			||||||
            rewards += ((action[3:5] > 0) * self._penalty).sum() if self.current_step < 4 else 0
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        observation = self._get_obs()
 | 
					        observation = self._get_obs()
 | 
				
			||||||
        reward = rewards - costs
 | 
					        reward = rewards - costs
 | 
				
			||||||
        info = {
 | 
					        info = {
 | 
				
			||||||
            'height': height,
 | 
					            'height': height,
 | 
				
			||||||
            'max_height': self.max_height,
 | 
					            'max_height': self.max_height,
 | 
				
			||||||
            'goal' : self.goal,
 | 
					            'goal': self.goal,
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return observation, reward, done, info
 | 
					        return observation, reward, done, info
 | 
				
			||||||
@ -78,7 +73,7 @@ class ALRWalker2dJumpEnv(Walker2dEnv):
 | 
				
			|||||||
    def reset(self):
 | 
					    def reset(self):
 | 
				
			||||||
        self.current_step = 0
 | 
					        self.current_step = 0
 | 
				
			||||||
        self.max_height = 0
 | 
					        self.max_height = 0
 | 
				
			||||||
        self.goal = np.random.uniform(1.5, 2.5, 1) # 1.5 3.0
 | 
					        self.goal = np.random.uniform(1.5, 2.5, 1)  # 1.5 3.0
 | 
				
			||||||
        return super().reset()
 | 
					        return super().reset()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # overwrite reset_model to make it deterministic
 | 
					    # overwrite reset_model to make it deterministic
 | 
				
			||||||
@ -99,8 +94,7 @@ if __name__ == '__main__':
 | 
				
			|||||||
    env = ALRWalker2dJumpEnv()
 | 
					    env = ALRWalker2dJumpEnv()
 | 
				
			||||||
    obs = env.reset()
 | 
					    obs = env.reset()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for i in range(2000):
 | 
					    for i in range(6000):
 | 
				
			||||||
        # objective.load_result("/tmp/cma")
 | 
					 | 
				
			||||||
        # test with random actions
 | 
					        # test with random actions
 | 
				
			||||||
        ac = env.action_space.sample()
 | 
					        ac = env.action_space.sample()
 | 
				
			||||||
        obs, rew, d, info = env.step(ac)
 | 
					        obs, rew, d, info = env.step(ac)
 | 
				
			||||||
@ -110,4 +104,4 @@ if __name__ == '__main__':
 | 
				
			|||||||
            print('After ', i, ' steps, done: ', d)
 | 
					            print('After ', i, ' steps, done: ', d)
 | 
				
			||||||
            env.reset()
 | 
					            env.reset()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    env.close()
 | 
					    env.close()
 | 
				
			||||||
 | 
				
			|||||||
@ -7,12 +7,12 @@ import numpy as np
 | 
				
			|||||||
from gym.envs.registration import EnvSpec, registry
 | 
					from gym.envs.registration import EnvSpec, registry
 | 
				
			||||||
from gym.wrappers import TimeAwareObservation
 | 
					from gym.wrappers import TimeAwareObservation
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from alr_envs.mp.basis_generator_factory import get_basis_generator
 | 
					from alr_envs.black_box.black_box_wrapper import BlackBoxWrapper
 | 
				
			||||||
from alr_envs.mp.black_box_wrapper import BlackBoxWrapper
 | 
					from alr_envs.black_box.controller.controller_factory import get_controller
 | 
				
			||||||
from alr_envs.mp.controllers.controller_factory import get_controller
 | 
					from alr_envs.black_box.factory.basis_generator_factory import get_basis_generator
 | 
				
			||||||
from alr_envs.mp.mp_factory import get_trajectory_generator
 | 
					from alr_envs.black_box.factory.phase_generator_factory import get_phase_generator
 | 
				
			||||||
from alr_envs.mp.phase_generator_factory import get_phase_generator
 | 
					from alr_envs.black_box.factory.trajectory_generator_factory import get_trajectory_generator
 | 
				
			||||||
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
 | 
					from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
 | 
				
			||||||
from alr_envs.utils.utils import nested_update
 | 
					from alr_envs.utils.utils import nested_update
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
		Reference in New Issue
	
	Block a user