Spikey/config.yaml

596 lines
12 KiB
YAML

name: EXAMPLE
feature_extractor:
input_size: 1953 # Input size for the Feature Extractor (length of snippets). (=0.1s)
transforms:
- type: 'identity' # Pass the last n samples of the input data directly.
length: 8 # Number of last samples to pass directly. Use full input size if set to null.
- type: 'fourier' # Apply Fourier transform to the input data.
length: null # Use full input size if set to null. Fourier transform outputs both real and imaginary parts, doubling the size. (Computationally expensive)
- type: 'wavelet' # Apply selected wavelet transform to the input data.
wavelet_type: 'haar' # Haar wavelet is simple and fast, but may not capture detailed features well.
length: null # Use full input size if set to null.
- type: 'wavelet'
wavelet_type: 'cgau1' # Complex Gaussian wavelets are used for complex-valued signal analysis and capturing phase information.
length: null # Use full input size if set to null.
- type: 'wavelet'
wavelet_type: 'db1' # Daubechies wavelets provide a balance between time and frequency localization.
length: null # Use full input size if set to null. (Computationally expensive)
- type: 'wavelet'
wavelet_type: 'sym2' # Symlet wavelets are nearly symmetrical, offering improved phase characteristics over Daubechies.
length: null # Use full input size if set to null. (Computationally expensive)
- type: 'wavelet'
wavelet_type: 'coif1' # Coiflet wavelets have more vanishing moments, suitable for capturing polynomial trends.
length: null # Use full input size if set to null. (Computationally expensive)
- type: 'wavelet'
wavelet_type: 'bior1.3' # Biorthogonal wavelets provide perfect reconstruction and linear phase characteristics.
length: null # Use full input size if set to null. (Computationally expensive)
- type: 'wavelet'
wavelet_type: 'rbio1.3' # Reverse Biorthogonal wavelets are similar to Biorthogonal but optimized for different applications.
length: null # Use full input size if set to null. (Computationally expensive)
- type: 'wavelet'
wavelet_type: 'dmey' # Discrete Meyer wavelets offer good frequency localization, ideal for signals with oscillatory components.
length: null # Use full input size if set to null. (Computationally expensive)
- type: 'wavelet'
wavelet_type: 'morl' # Morlet wavelets are useful for time-frequency analysis due to their Gaussian-modulated sinusoid shape.
length: null # Use full input size if set to null. (Computationally expensive)
latent_projector:
type: 'fc' # Type of latent projector: 'fc', 'rnn', 'fourier'
latent_size: 4 # Size of the latent representation before message passing.
layer_shapes: [32, 8] # List of layer sizes for the latent projector if type is 'fc' or 'fourier'.
activations: ['ReLU', 'ReLU'] # Activation functions for the latent projector layers if type is 'fc' or 'fourier'.
rnn_hidden_size: 4 # Hidden size for the RNN projector if type is 'rnn'.
rnn_num_layers: 1 # Number of layers for the RNN projector if type is 'rnn'.
pass_raw_len: 50 # Number of last samples to pass raw to the net in addition to frequencies (null = all) if type is 'fourier'.
middle_out:
region_latent_size: 4 # Size of the latent representation after message passing.
residual: false # Wether to use a ResNet style setup. Requires region_latent_size = latent_size
num_peers: 3 # Number of closest peers to consider.
predictor:
layer_shapes: [3] # List of layer sizes for the predictor.
activations: ['ReLU'] # Activation functions for the predictor layers.
training:
epochs: 1024 # Number of training epochs.
batch_size: 32 # Batch size for training.
num_batches: 1 # Number of batches per epoch.
learning_rate: 0.01 # Learning rate for the optimizer.
peer_gradients_factor: 0.33 # Factor for gradients acting on predictor throught peers. 0.0 = detach gradients.
value_scale: 1 # Normalize data by dividing values by this (and multiple outputs)
eval_freq: 8 # Frequency of evaluation during training (in epochs).
save_path: models # Directory to save the best model and encoder.
evaluation:
full_compression: false # Perform full compression during evaluation.
bitstream_encoding:
type: identity # Bitstream encoding type: 'arithmetic', 'identity', 'bzip2'.
data:
url: https://content.neuralink.com/compression-challenge/data.zip # URL to download the dataset.
directory: data # Directory to extract and store the dataset.
split_ratio: 0.8 # Ratio to split the data into train and test sets.
cut_length: null # Optional length to cut sequences to.
profiler:
enable: false # Enable profiler.
---
name: DEFAULT
project: Spikey_2
slurm:
name: 'Spikey_{config[name]}'
partitions:
- single
standard_output: ./reports/slurm/out_%A_%a.log
standard_error: ./reports/slurm/err_%A_%a.log
num_parallel_jobs: 50
cpus_per_task: 8
memory_per_cpu: 4000
time_limit: 1440 # in minutes
ntasks: 1
venv: '.venv/bin/activate'
sh_lines:
- 'mkdir -p {tmp}/wandb'
- 'mkdir -p {tmp}/local_pycache'
- 'export PYTHONPYCACHEPREFIX={tmp}/local_pycache'
runner: spikey
scheduler:
reps_per_version: 1
agents_per_job: 8
reps_per_agent: 1
wandb:
project: '{config[project]}'
group: '{config[name]}'
job_type: '{delta_desc}'
name: '{job_id}_{task_id}:{run_id}:{rand}={config[name]}_{delta_desc}'
#tags:
# - '{config[env][name]}'
# - '{config[algo][name]}'
sync_tensorboard: false
monitor_gym: false
save_code: false
evaluation:
full_compression: false
bitstream_encoding:
type: binomHuffman
data:
url: https://content.neuralink.com/compression-challenge/data.zip
directory: data
split_ratio: 0.8
cut_length: null
profiler:
enable: false
training:
eval_freq: 8
save_path: models
peer_gradients_factor: 0.25
value_scale: 1000
device: cpu
middle_out:
residual: false
---
name: FC
import: $
feature_extractor:
input size: 10
transforms:
- type: 'identity'
latent_projector:
type: fc
input_size: 1953
latent_size: 4
layer_shapes: [32, 8]
activations: ['ReLU', 'ReLU']
middle_out:
region_latent_size: 4
num_peers: 3
residual: true
predictor:
layer_shapes: [3]
activations: ['ReLU']
training:
epochs: 1024
batch_size: 32
num_batches: 1
learning_rate: 0.01
---
name: FC_AblLR
import: $
latent_projector:
type: fc
input_size: 1953
latent_size: 4
layer_shapes: [32, 8]
activations: ['ReLU', 'ReLU']
middle_out:
region_latent_size: 4
num_peers: 3
residual: true
predictor:
layer_shapes: [3]
activations: ['ReLU']
training:
epochs: 1024
batch_size: 32
num_batches: 1
learning_rate: 0.01
device: cpu
grid:
training.learning_rate: [0.1, 0.01, 0.001, 0.0001]
---
name: RNN
import: $
latent_projector:
type: rnn
input_size: 1953
latent_size: 4
rnn_hidden_size: 3
rnn_num_layers: 2
middle_out:
region_latent_size: 4
num_peers: 3
residual: true
predictor:
layer_shapes: [3]
activations: ['ReLU']
training:
epochs: 1024
batch_size: 32
num_batches: 2
learning_rate: 0.01
---
name: FOURIER
import: $
latent_projector:
type: fourier
input_size: 19531 # 1s
latent_size: 8
layer_shapes: [32, 8]
activations: ['ReLU', 'ReLU']
pass_raw_len: 1953 # 0.1s
middle_out:
region_latent_size: 8
num_peers: 3
residual: true
predictor:
layer_shapes: [3]
activations: ['ReLU']
training:
epochs: 1024
batch_size: 32
num_batches: 16
learning_rate: 0.01
---
name: FC_AblPeerGrad # Best: 0.33
import: $
latent_projector:
type: fc
input_size: 1953
latent_size: 4
layer_shapes: [32, 8]
activations: ['ReLU', 'ReLU']
middle_out:
region_latent_size: 4
num_peers: 2
residual: true
predictor:
layer_shapes: [3]
activations: ['ReLU']
training:
epochs: 1024
batch_size: 16
num_batches: 1
learning_rate: 0.01
device: cpu
grid:
training:
peer_gradients_factor: [0.0, 0.1, 0.25, 0.33, 0.5, 1.0]
---
name: FC_NoPeer # Worse
import: $
latent_projector:
type: fc
input_size: 1953
latent_size: 4
layer_shapes: [32, 8]
activations: ['ReLU', 'ReLU']
middle_out:
region_latent_size: 4
num_peers: 0
residual: true
predictor:
layer_shapes: [3]
activations: ['ReLU']
training:
epochs: 1024
batch_size: 16
num_batches: 1
learning_rate: 0.01
device: cpu
---
name: FC_ScaleAbl # Best: 1000
import: $
latent_projector:
type: fc
input_size: 1953
latent_size: 4
layer_shapes: [32, 8]
activations: ['ReLU', 'ReLU']
middle_out:
region_latent_size: 4
num_peers: 3
residual: true
predictor:
layer_shapes: [3]
activations: ['ReLU']
training:
epochs: 1024
batch_size: 32
num_batches: 1
learning_rate: 0.01
device: cpu
grid:
training.value_scale: [1, 100, 1000, 10000]
---
name: FC_BSAbl3 # 64 is best, everything >=64 is ok
import: $
latent_projector:
type: fc
input_size: 1953
latent_size: 4
layer_shapes: [32, 8]
activations: ['ReLU', 'ReLU']
middle_out:
region_latent_size: 4
num_peers: 3
residual: true
predictor:
layer_shapes: [3]
activations: ['ReLU']
training:
epochs: 1024
batch_size: 32
num_batches: 1
learning_rate: 0.01
device: cpu
grid:
training.batch_size: [64, 128, 256]
---
name: FC_smol_master2
import: $
scheduler:
reps_per_version: 8
agents_per_job: 8
latent_projector:
type: fc
input_size: 195
latent_size: 4
layer_shapes: [20, 6]
activations: ['ReLU', 'ReLU']
middle_out:
region_latent_size: 4
num_peers: 2
residual: true
predictor:
layer_shapes: [2]
activations: ['ReLU']
training:
epochs: 10000
batch_size: 32
num_batches: 1
learning_rate: 0.01
device: cpu
---
name: FC_smolTanh
import: $
latent_projector:
type: fc
input_size: 195
latent_size: 4
layer_shapes: [20, 6]
activations: ['Tanh', 'Tanh']
middle_out:
region_latent_size: 4
num_peers: 2
residual: true
predictor:
layer_shapes: [2]
activations: ['Tanh']
training:
epochs: 1024
batch_size: 32
num_batches: 1
learning_rate: 0.01
device: cpu
---
name: FOURIER_thin
import: $
latent_projector:
type: fourier
input_size: 1953 # 0.1s
latent_size: 4
layer_shapes: [32, 8]
activations: ['ReLU', 'ReLU']
pass_raw_len: 195 # 0.01s
middle_out:
region_latent_size: 4
num_peers: 3
residual: true
predictor:
layer_shapes: [3]
activations: ['ReLU']
training:
epochs: 1024
batch_size: 32
num_batches: 1
learning_rate: 0.01
---
name: FOURIER_thicc
import: $
latent_projector:
type: fourier
input_size: 1953 # 0.1s
latent_size: 8
layer_shapes: [32, 8]
activations: ['ReLU', 'ReLU']
pass_raw_len: 195 # 0.01s
middle_out:
region_latent_size: 8
num_peers: 3
residual: true
predictor:
layer_shapes: [4]
activations: ['ReLU']
training:
epochs: 1024
batch_size: 32
num_batches: 1
learning_rate: 0.01
---
name: FC_master3
import: $
scheduler:
reps_per_version: 8
agents_per_job: 8
latent_projector:
type: fc
input_size: 1953
latent_size: 4
layer_shapes: [32, 8]
activations: ['ReLU', 'ReLU']
middle_out:
region_latent_size: 4
num_peers: 3
residual: true
predictor:
layer_shapes: [3]
activations: ['ReLU']
training:
epochs: 1024
batch_size: 32
num_batches: 1
learning_rate: 0.01
---
name: FC_master_single
import: $
scheduler:
reps_per_version: 1
agents_per_job: 1
latent_projector:
type: fc
input_size: 1953
latent_size: 4
layer_shapes: [32, 8]
activations: ['ReLU', 'ReLU']
middle_out:
region_latent_size: 4
num_peers: 3
residual: true
predictor:
layer_shapes: [3]
activations: ['ReLU']
training:
epochs: 1024
batch_size: 32
num_batches: 1
learning_rate: 0.01
---
name: debug
import: $
scheduler:
reps_per_version: 1
agents_per_job: 1
latent_projector:
type: fc
input_size: 1953
latent_size: 4
layer_shapes: [32, 8]
activations: ['ReLU', 'ReLU']
middle_out:
region_latent_size: 4
num_peers: 3
residual: true
predictor:
layer_shapes: [3]
activations: ['ReLU']
training:
epochs: 1024
batch_size: 32
num_batches: 1
learning_rate: 0.01
---
name: FOURIER_smol_master
import: $
scheduler:
reps_per_version: 8
agents_per_job: 8
latent_projector:
type: fourier
input_size: 195
latent_size: 4
layer_shapes: [20, 6]
activations: ['ReLU', 'ReLU']
pass_raw_len: 20 # 0.001s
middle_out:
region_latent_size: 4
num_peers: 2
residual: true
predictor:
layer_shapes: [2]
activations: ['ReLU']
training:
epochs: 10000
batch_size: 32
num_batches: 1
learning_rate: 0.01
device: cpu