randomized cup

2022-01-28 19:24:34 +01:00 · 2022-01-28 19:24:34 +01:00 · be5b287ae1
commit be5b287ae1
parent e1617c34a6
3 changed files with 44 additions and 40 deletions
--- a/alr_envs/alr/init.py
+++ b/alr_envs/alr/init.py
@ -213,16 +213,26 @@ register(id='TableTennis4DCtxt-v0',
         kwargs={'ctxt_dim': 4})
 ## BeerPong
-difficulties = ["simple", "intermediate", "hard", "hardest"]
+# fixed goal cup position
-
+register(
-for v, difficulty in enumerate(difficulties):
+        id='ALRBeerPong-v0',
    register(
        id='ALRBeerPong-v{}'.format(v),
        entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv',
        max_episode_steps=600,
        kwargs={
-            "difficulty": difficulty,
+            "rndm_goal": False,
-            "reward_type": "staged",
+            "cup_goal_pos": [-0.3, -1.2]
        }
    )
 # random goal cup position
 register(
        id='ALRBeerPong-v1',
        entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv',
        max_episode_steps=600,
        kwargs={
            "rndm_goal": True,
            "cup_goal_pos": [-0.3, -1.2]
        }
    )
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@ -4,11 +4,16 @@ import os
 import numpy as np
 from gym import utils
 from gym.envs.mujoco import MujocoEnv
 from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
 CUP_POS_MIN = np.array([-0.32, -2.2])
 CUP_POS_MAX = np.array([0.32, -1.2])
 class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
-    def __init__(self, frame_skip=1, apply_gravity_comp=True, reward_type: str = "staged", noisy=False,
+    def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False,
-                 context: np.ndarray = None, difficulty='simple'):
+                 rndm_goal=False, cup_goal_pos=[-0.3, -1.2]):
        self._steps = 0
        self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
@ -17,7 +22,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
        self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7])
        self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
-        self.context = context
+        self.rndm_goal = rndm_goal
        self.apply_gravity_comp = apply_gravity_comp
        self.add_noise = noisy
@ -38,23 +43,8 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
        else:
            self.noise_std = 0
-        if difficulty == 'simple':
+        self.cup_goal_pos = np.array(cup_goal_pos.append(0.840))
-            self.cup_goal_pos = np.array([0, -1.7, 0.840])
+        reward_function = BeerPongReward
        elif difficulty == 'intermediate':
            self.cup_goal_pos = np.array([0.3, -1.5, 0.840])
        elif difficulty == 'hard':
            self.cup_goal_pos = np.array([-0.3, -2.2, 0.840])
        elif difficulty == 'hardest':
            self.cup_goal_pos = np.array([-0.3, -1.2, 0.840])
        if reward_type == "no_context":
            from alr_envs.alr.mujoco.beerpong.beerpong_reward import BeerPongReward
            reward_function = BeerPongReward
        elif reward_type == "staged":
            from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
            reward_function = BeerPongReward
        else:
            raise ValueError("Unknown reward type: {}".format(reward_type))
        self.reward_function = reward_function()
        MujocoEnv.__init__(self, self.xml_path, frame_skip)
@ -94,6 +84,12 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
        self.sim.model.body_pos[self.cup_table_id] = self.cup_goal_pos
        start_pos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
        self.set_state(start_pos, init_vel)
        if self.rndm_goal:
            xy = np.random.uniform(CUP_POS_MIN, CUP_POS_MAX)
            xyz = np.zeros(3)
            xyz[:2] = xy
            xyz[-1] = 0.840
            self.sim.model.body_pos[self.cup_table_id] = xyz
        return self._get_obs()
    def step(self, a):
@ -153,13 +149,12 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
    def check_traj_in_joint_limits(self):
        return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)
    # TODO: extend observation space
    def _get_obs(self):
        theta = self.sim.data.qpos.flat[:7]
        return np.concatenate([
            np.cos(theta),
            np.sin(theta),
-            # self.get_body_com("target"),  # only return target to make problem harder
+            self.sim.model.body_pos[self.cup_table_id][:2].copy(),
            [self._steps],
        ])
@ -169,25 +164,26 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
        return np.hstack([
            [False] * 7,  # cos
            [False] * 7,  # sin
-            # [True] * 2,  # x-y coordinates of target distance
+            [True] * 2,  # xy position of cup
            [False]  # env steps
        ])
 if __name__ == "__main__":
-    env = ALRBeerBongEnv(reward_type="staged", difficulty='hardest')
+    env = ALRBeerBongEnv(rndm_goal=True)
-
+    import time
    # env.configure(ctxt)
    env.reset()
    env.render("human")
-    for i in range(800):
+    for i in range(1500):
-        ac = 10 * env.action_space.sample()[0:7]
+        # ac = 10 * env.action_space.sample()[0:7]
        ac = np.zeros(7)
        obs, rew, d, info = env.step(ac)
        env.render("human")
        print(rew)
        if d:
-            break
+            print('RESETTING')
-
+            env.reset()
            time.sleep(1)
    env.close()
--- a/alr_envs/alr/mujoco/beerpong/mp_wrapper.py
+++ b/alr_envs/alr/mujoco/beerpong/mp_wrapper.py
@ -9,11 +9,10 @@ class MPWrapper(MPEnvWrapper):
    @property
    def active_obs(self):
        # TODO: @Max Filter observations correctly
        return np.hstack([
            [False] * 7,  # cos
            [False] * 7,  # sin
-            # [True] * 2,  # x-y coordinates of target distance
+            [True] * 2,  # xy position of cup
            [False]  # env steps
        ])
@ -31,7 +30,6 @@ class MPWrapper(MPEnvWrapper):
    @property
    def goal_pos(self):
        # TODO: @Max I think the default value of returning to the start is reasonable here
        raise ValueError("Goal position is not available and has to be learnt based on the environment.")
    @property