Pad and Cut Trajectory to correct length
This commit is contained in:
parent
35259b9e99
commit
a3356c4654
@ -6,6 +6,7 @@ from torch import nn
|
|||||||
from stable_baselines3.common.distributions import Distribution as SB3_Distribution
|
from stable_baselines3.common.distributions import Distribution as SB3_Distribution
|
||||||
from stable_baselines3.common.distributions import sum_independent_dims
|
from stable_baselines3.common.distributions import sum_independent_dims
|
||||||
from torch.distributions import Normal
|
from torch.distributions import Normal
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
|
||||||
class Par_Strength(Enum):
|
class Par_Strength(Enum):
|
||||||
@ -112,7 +113,7 @@ class PCA_Distribution(SB3_Distribution):
|
|||||||
return sum_independent_dims(self.distribution.entropy())
|
return sum_independent_dims(self.distribution.entropy())
|
||||||
|
|
||||||
def sample(self, traj: th.Tensor) -> th.Tensor:
|
def sample(self, traj: th.Tensor) -> th.Tensor:
|
||||||
pi_mean, pi_std = self.distribution.mean, self.distribution.scale,
|
pi_mean, pi_std = self.distribution.mean, self.distribution.scale
|
||||||
rho_mean, rho_std = self._conditioning_engine(traj, pi_mean, pi_std)
|
rho_mean, rho_std = self._conditioning_engine(traj, pi_mean, pi_std)
|
||||||
eta = self._get_rigged(pi_mean, pi_std,
|
eta = self._get_rigged(pi_mean, pi_std,
|
||||||
rho_mean, rho_std)
|
rho_mean, rho_std)
|
||||||
@ -136,7 +137,15 @@ class PCA_Distribution(SB3_Distribution):
|
|||||||
|
|
||||||
return eta.detach()
|
return eta.detach()
|
||||||
|
|
||||||
def _conditioning_engine(self, traj, pi_mean, pi_std):
|
def _pad_and_cut_trajectory(self, traj, value=0):
|
||||||
|
cut = traj[:self.window]
|
||||||
|
if traj.shape[-2] < self.window:
|
||||||
|
missing = self.window - traj.shape[-2]
|
||||||
|
return F.pad(input=cut, pad=(missing, 0), value=value)
|
||||||
|
return cut
|
||||||
|
|
||||||
|
def _conditioning_engine(self, trajectory, pi_mean, pi_std):
|
||||||
|
traj = self._pad_and_cut_trajectory(trajectory)
|
||||||
y_np = np.append(np.swapaxes(traj, -1, -2),
|
y_np = np.append(np.swapaxes(traj, -1, -2),
|
||||||
np.expand_dims(pi_mean, -1), -1)
|
np.expand_dims(pi_mean, -1), -1)
|
||||||
|
|
||||||
|
160
test.py
Normal file
160
test.py
Normal file
@ -0,0 +1,160 @@
|
|||||||
|
import torch as th
|
||||||
|
from time import sleep, time
|
||||||
|
import numpy as np
|
||||||
|
import pygame
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
from columbus import env
|
||||||
|
from columbus.observables import Observable, CnnObservable
|
||||||
|
|
||||||
|
import colorednoise as cn
|
||||||
|
|
||||||
|
from pca import *
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
agent_func = choosePlayType()
|
||||||
|
env = chooseEnv()
|
||||||
|
while True:
|
||||||
|
playEnv(env, agent_func)
|
||||||
|
input('<again?>')
|
||||||
|
env.close()
|
||||||
|
|
||||||
|
|
||||||
|
def getAvaibleEnvs():
|
||||||
|
# kinda hacky... idk
|
||||||
|
strs = dir(env)
|
||||||
|
for s in strs:
|
||||||
|
if s.startswith('Columbus') and s != 'ColumbusEnv':
|
||||||
|
yield getattr(env, s)
|
||||||
|
|
||||||
|
|
||||||
|
def loadConfigDefinedEnv(EnvClass):
|
||||||
|
p = input('[Path to config> ')
|
||||||
|
with open(p, 'r') as f:
|
||||||
|
docs = list([d for d in yaml.safe_load_all(
|
||||||
|
f) if d and 'name' in d and d['name'] not in ['SLURM']])
|
||||||
|
for i, doc in enumerate(docs):
|
||||||
|
name = doc['name']
|
||||||
|
print('['+str(i)+'] '+name)
|
||||||
|
ds = int(input('[0]> ') or '0')
|
||||||
|
doc = docs[ds]
|
||||||
|
cur = doc
|
||||||
|
path = 'params.task.env_args'
|
||||||
|
p = path.split('.')
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
if len(p) == 0:
|
||||||
|
break
|
||||||
|
key = p.pop(0)
|
||||||
|
print(key)
|
||||||
|
cur = cur[key]
|
||||||
|
except Exception as e:
|
||||||
|
print('Unable to find key "'+key+'"')
|
||||||
|
path = input('[Path> ')
|
||||||
|
print(cur)
|
||||||
|
return EnvClass(fps=30, **cur)
|
||||||
|
|
||||||
|
|
||||||
|
def chooseEnv():
|
||||||
|
envs = list(getAvaibleEnvs())
|
||||||
|
for i, Env in enumerate(envs):
|
||||||
|
print('['+str(i)+'] '+Env.__name__)
|
||||||
|
while True:
|
||||||
|
inp = input('[#> ')
|
||||||
|
try:
|
||||||
|
i = int(inp)
|
||||||
|
except:
|
||||||
|
print('[!] You have to enter the number...')
|
||||||
|
if i < 0 or i >= len(envs):
|
||||||
|
print(
|
||||||
|
'[!] That is a number, but not one that makes sense in this context...')
|
||||||
|
if envs[i] in [env.ColumbusConfigDefined]:
|
||||||
|
return loadConfigDefinedEnv(envs[i])
|
||||||
|
Env = envs[i]
|
||||||
|
return Env(fps=30)
|
||||||
|
|
||||||
|
|
||||||
|
def value_func(obs):
|
||||||
|
return obs[:, 0]
|
||||||
|
# return th.rand(obs.shape[0])-0.5
|
||||||
|
|
||||||
|
|
||||||
|
def human_input(obs):
|
||||||
|
pos = (0.5, 0.5)
|
||||||
|
pos = pygame.mouse.get_pos()
|
||||||
|
pos = (min(max((pos[0]-env.joystick_offset[0]-20)/60, 0), 1),
|
||||||
|
min(max((pos[1]-env.joystick_offset[1]-20)/60, 0), 1))
|
||||||
|
pos = pos[0]*2-1, pos[1]*2-1
|
||||||
|
return pos
|
||||||
|
|
||||||
|
|
||||||
|
def colored_noise(beta=1, dim_a=2, samples=2**18):
|
||||||
|
index = [0]*dim_a
|
||||||
|
samples = []
|
||||||
|
for d in range(dim_a):
|
||||||
|
samples.append(cn.powerlaw_psd_gaussian(beta, samples))
|
||||||
|
|
||||||
|
def noise_generator(obs):
|
||||||
|
sample = []
|
||||||
|
for d in range(dim_a):
|
||||||
|
sample.append(samples[d][index])
|
||||||
|
index += 1
|
||||||
|
return sample
|
||||||
|
|
||||||
|
return noise_generator
|
||||||
|
|
||||||
|
|
||||||
|
def pca_noise(lengthscale=1, dim_a=2, kernel_func='RBF', window=16):
|
||||||
|
|
||||||
|
dist = PCA_Distribution(
|
||||||
|
action_dim=dim_a, par_strength='SCALAR', kernel_func=kernel_func, window=window)
|
||||||
|
|
||||||
|
traj = []
|
||||||
|
|
||||||
|
def noise_generator(obs):
|
||||||
|
sample = dist.sample(th.Tensor(traj))
|
||||||
|
traj.append(sample)
|
||||||
|
return sample
|
||||||
|
|
||||||
|
return noise_generator
|
||||||
|
|
||||||
|
|
||||||
|
def choosePlayType():
|
||||||
|
options = {'human': human_input, 'REX': None, 'PCA': None, 'PINK': None}
|
||||||
|
for i, name in enumerate(options):
|
||||||
|
print('['+str(i)+'] '+name)
|
||||||
|
while True:
|
||||||
|
inp = input('[#> ')
|
||||||
|
try:
|
||||||
|
i = int(inp)
|
||||||
|
except:
|
||||||
|
print('[!] You have to enter the number...')
|
||||||
|
if i < 0 or i >= len(options):
|
||||||
|
print(
|
||||||
|
'[!] That is a number, but not one that makes sense in this context...')
|
||||||
|
return options[name]
|
||||||
|
|
||||||
|
|
||||||
|
def playEnv(env, agent_func):
|
||||||
|
done = False
|
||||||
|
obs = env.reset()
|
||||||
|
while not done:
|
||||||
|
t1 = time()
|
||||||
|
# env.render(value_func=value_func)
|
||||||
|
env.render()
|
||||||
|
inp = agent_func(obs)
|
||||||
|
obs, rew, done, info = env.step(np.array(inp, dtype=np.float32))
|
||||||
|
print('Reward: '+str(rew))
|
||||||
|
print('Score: '+str(info))
|
||||||
|
t2 = time()
|
||||||
|
dt = t2 - t1
|
||||||
|
delay = (1/env.fps - dt)
|
||||||
|
if delay < 0:
|
||||||
|
print("[!] Can't keep framerate!")
|
||||||
|
else:
|
||||||
|
sleep(delay)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
Loading…
Reference in New Issue
Block a user