Pad and Cut Trajectory to correct length

This commit is contained in:
Dominik Moritz Roth 2023-05-03 15:10:25 +02:00
parent 35259b9e99
commit a3356c4654
2 changed files with 171 additions and 2 deletions

View File

@ -6,6 +6,7 @@ from torch import nn
from stable_baselines3.common.distributions import Distribution as SB3_Distribution
from stable_baselines3.common.distributions import sum_independent_dims
from torch.distributions import Normal
import torch.nn.functional as F
class Par_Strength(Enum):
@ -112,7 +113,7 @@ class PCA_Distribution(SB3_Distribution):
return sum_independent_dims(self.distribution.entropy())
def sample(self, traj: th.Tensor) -> th.Tensor:
pi_mean, pi_std = self.distribution.mean, self.distribution.scale,
pi_mean, pi_std = self.distribution.mean, self.distribution.scale
rho_mean, rho_std = self._conditioning_engine(traj, pi_mean, pi_std)
eta = self._get_rigged(pi_mean, pi_std,
rho_mean, rho_std)
@ -136,7 +137,15 @@ class PCA_Distribution(SB3_Distribution):
return eta.detach()
def _conditioning_engine(self, traj, pi_mean, pi_std):
def _pad_and_cut_trajectory(self, traj, value=0):
cut = traj[:self.window]
if traj.shape[-2] < self.window:
missing = self.window - traj.shape[-2]
return F.pad(input=cut, pad=(missing, 0), value=value)
return cut
def _conditioning_engine(self, trajectory, pi_mean, pi_std):
traj = self._pad_and_cut_trajectory(trajectory)
y_np = np.append(np.swapaxes(traj, -1, -2),
np.expand_dims(pi_mean, -1), -1)

160
test.py Normal file
View File

@ -0,0 +1,160 @@
import torch as th
from time import sleep, time
import numpy as np
import pygame
import yaml
from columbus import env
from columbus.observables import Observable, CnnObservable
import colorednoise as cn
from pca import *
def main():
agent_func = choosePlayType()
env = chooseEnv()
while True:
playEnv(env, agent_func)
input('<again?>')
env.close()
def getAvaibleEnvs():
# kinda hacky... idk
strs = dir(env)
for s in strs:
if s.startswith('Columbus') and s != 'ColumbusEnv':
yield getattr(env, s)
def loadConfigDefinedEnv(EnvClass):
p = input('[Path to config> ')
with open(p, 'r') as f:
docs = list([d for d in yaml.safe_load_all(
f) if d and 'name' in d and d['name'] not in ['SLURM']])
for i, doc in enumerate(docs):
name = doc['name']
print('['+str(i)+'] '+name)
ds = int(input('[0]> ') or '0')
doc = docs[ds]
cur = doc
path = 'params.task.env_args'
p = path.split('.')
while True:
try:
if len(p) == 0:
break
key = p.pop(0)
print(key)
cur = cur[key]
except Exception as e:
print('Unable to find key "'+key+'"')
path = input('[Path> ')
print(cur)
return EnvClass(fps=30, **cur)
def chooseEnv():
envs = list(getAvaibleEnvs())
for i, Env in enumerate(envs):
print('['+str(i)+'] '+Env.__name__)
while True:
inp = input('[#> ')
try:
i = int(inp)
except:
print('[!] You have to enter the number...')
if i < 0 or i >= len(envs):
print(
'[!] That is a number, but not one that makes sense in this context...')
if envs[i] in [env.ColumbusConfigDefined]:
return loadConfigDefinedEnv(envs[i])
Env = envs[i]
return Env(fps=30)
def value_func(obs):
return obs[:, 0]
# return th.rand(obs.shape[0])-0.5
def human_input(obs):
pos = (0.5, 0.5)
pos = pygame.mouse.get_pos()
pos = (min(max((pos[0]-env.joystick_offset[0]-20)/60, 0), 1),
min(max((pos[1]-env.joystick_offset[1]-20)/60, 0), 1))
pos = pos[0]*2-1, pos[1]*2-1
return pos
def colored_noise(beta=1, dim_a=2, samples=2**18):
index = [0]*dim_a
samples = []
for d in range(dim_a):
samples.append(cn.powerlaw_psd_gaussian(beta, samples))
def noise_generator(obs):
sample = []
for d in range(dim_a):
sample.append(samples[d][index])
index += 1
return sample
return noise_generator
def pca_noise(lengthscale=1, dim_a=2, kernel_func='RBF', window=16):
dist = PCA_Distribution(
action_dim=dim_a, par_strength='SCALAR', kernel_func=kernel_func, window=window)
traj = []
def noise_generator(obs):
sample = dist.sample(th.Tensor(traj))
traj.append(sample)
return sample
return noise_generator
def choosePlayType():
options = {'human': human_input, 'REX': None, 'PCA': None, 'PINK': None}
for i, name in enumerate(options):
print('['+str(i)+'] '+name)
while True:
inp = input('[#> ')
try:
i = int(inp)
except:
print('[!] You have to enter the number...')
if i < 0 or i >= len(options):
print(
'[!] That is a number, but not one that makes sense in this context...')
return options[name]
def playEnv(env, agent_func):
done = False
obs = env.reset()
while not done:
t1 = time()
# env.render(value_func=value_func)
env.render()
inp = agent_func(obs)
obs, rew, done, info = env.step(np.array(inp, dtype=np.float32))
print('Reward: '+str(rew))
print('Score: '+str(info))
t2 = time()
dt = t2 - t1
delay = (1/env.fps - dt)
if delay < 0:
print("[!] Can't keep framerate!")
else:
sleep(delay)
if __name__ == '__main__':
main()