Implement 'plausability tests' of PCA vs REX / PINK in Columbus envs

2023-05-03 17:02:42 +02:00 · 2023-05-03 17:02:42 +02:00 · 44b34fe12b
commit 44b34fe12b
parent 21de8f418b
1 changed files with 32 additions and 29 deletions
--- a/test.py
+++ b/test.py
@ -9,7 +9,7 @@ from columbus.observables import Observable, CnnObservable

 import colorednoise as cn

-from pca import *
+from priorConditionedAnnealing import pca


 def main():
@ -72,7 +72,7 @@ def chooseEnv():
        if envs[i] in [env.ColumbusConfigDefined]:
            return loadConfigDefinedEnv(envs[i])
        Env = envs[i]
-        return Env(fps=30)
+        return Env(fps=30, agent_draw_path=True, path_decay=1/256)


 def value_func(obs):
@ -80,7 +80,7 @@ def value_func(obs):
    # return th.rand(obs.shape[0])-0.5


-def human_input(obs):
+def human_input(obs, env):
    pos = (0.5, 0.5)
    pos = pygame.mouse.get_pos()
    pos = (min(max((pos[0]-env.joystick_offset[0]-20)/60, 0), 1),
@ -89,39 +89,40 @@ def human_input(obs):
    return pos


-def colored_noise(beta=1, dim_a=2, samples=2**18):
-    index = [0]*dim_a
-    samples = []
-    for d in range(dim_a):
-        samples.append(cn.powerlaw_psd_gaussian(beta, samples))
+class Colored_Noise():
+    def __init__(self, beta=1, dim_a=2, samples=2**18):
+        self.index = 0
+        self.samples = cn.powerlaw_psd_gaussian(beta, (dim_a, samples))

-    def noise_generator(obs):
-        sample = []
-        for d in range(dim_a):
-            sample.append(samples[d][index])
-        index += 1
+    def __call__(self, obs, env):
+        sample = self.samples[:, self.index]
+        self.index += 1
        return sample

-    return noise_generator

+class PCA_Noise():
+    def __init__(self, dim_a=2, kernel_func='SE_1.41_1', window=8, ssf=-1):
+        self.dist = pca.PCA_Distribution(
+            action_dim=dim_a, par_strength='CONT_DIAG', kernel_func=kernel_func, window=window)
+        self.dist.proba_distribution(th.Tensor([[0]*2]), th.Tensor([[1]*2]))
+        self.traj = [[0]*dim_a]
+        self.ssf = 32
+        self.index = 0

-def pca_noise(lengthscale=1, dim_a=2, kernel_func='RBF', window=16):
-
-    dist = PCA_Distribution(
-        action_dim=dim_a, par_strength='SCALAR', kernel_func=kernel_func, window=window)
-
-    traj = []
-
-    def noise_generator(obs):
-        sample = dist.sample(th.Tensor(traj))
-        traj.append(sample)
+    def __call__(self, obs, env):
+        if self.index == self.ssf:
+            self.index = 0
+            self.traj = [[0]*len(self.traj[0])]
+        traj = th.Tensor(self.traj).unsqueeze(0)
+        sample = self.dist.sample(traj).squeeze(0)
+        self.traj.append(sample)
+        self.index += 1
        return sample

-    return noise_generator
-

 def choosePlayType():
-    options = {'human': human_input, 'REX': None, 'PCA': None, 'PINK': None}
+    options = {'human': human_input, 'PCA': PCA_Noise(),
+               'REX': Colored_Noise(beta=0), 'PINK': Colored_Noise(beta=1), 'BROWN': Colored_Noise(beta=2), 'BETA.5': Colored_Noise(beta=.5)}
    for i, name in enumerate(options):
        print('['+str(i)+'] '+name)
    while True:
@ -130,10 +131,12 @@ def choosePlayType():
            i = int(inp)
        except:
            print('[!] You have to enter the number...')
+            continue
        if i < 0 or i >= len(options):
            print(
                '[!] That is a number, but not one that makes sense in this context...')
-    return options[name]
+        else:
+            return options[list(options.keys())[i]]


 def playEnv(env, agent_func):
@ -143,7 +146,7 @@ def playEnv(env, agent_func):
        t1 = time()
        # env.render(value_func=value_func)
        env.render()
-        inp = agent_func(obs)
+        inp = agent_func(obs, env)
        obs, rew, done, info = env.step(np.array(inp, dtype=np.float32))
        print('Reward: '+str(rew))
        print('Score: '+str(info))