Spikey/config.yaml

name: EXAMPLE

feature_extractor:
  input_size: 1953 # Input size for the Feature Extractor (length of snippets). (=0.1s)
  transforms:
    - type: 'identity'  # Pass the last n samples of the input data directly.
      length: 8  # Number of last samples to pass directly. Use full input size if set to null.
    - type: 'fourier'  # Apply Fourier transform to the input data.
      length: null  # Use full input size if set to null. Fourier transform outputs both real and imaginary parts, doubling the size. (Computationally expensive)
    - type: 'wavelet'  # Apply selected wavelet transform to the input data.
      wavelet_type: 'haar'  # Haar wavelet is simple and fast, but may not capture detailed features well.
      length: null  # Use full input size if set to null.
    - type: 'wavelet'
      wavelet_type: 'cgau1'  # Complex Gaussian wavelets are used for complex-valued signal analysis and capturing phase information.
      length: null  # Use full input size if set to null.
    - type: 'wavelet'
      wavelet_type: 'db1'  # Daubechies wavelets provide a balance between time and frequency localization.
      length: null  # Use full input size if set to null. (Computationally expensive)
    - type: 'wavelet'
      wavelet_type: 'sym2'  # Symlet wavelets are nearly symmetrical, offering improved phase characteristics over Daubechies.
      length: null  # Use full input size if set to null. (Computationally expensive)
    - type: 'wavelet'
      wavelet_type: 'coif1'  # Coiflet wavelets have more vanishing moments, suitable for capturing polynomial trends.
      length: null  # Use full input size if set to null. (Computationally expensive)
    - type: 'wavelet'
      wavelet_type: 'bior1.3'  # Biorthogonal wavelets provide perfect reconstruction and linear phase characteristics.
      length: null  # Use full input size if set to null. (Computationally expensive)
    - type: 'wavelet'
      wavelet_type: 'rbio1.3'  # Reverse Biorthogonal wavelets are similar to Biorthogonal but optimized for different applications.
      length: null  # Use full input size if set to null. (Computationally expensive)
    - type: 'wavelet'
      wavelet_type: 'dmey'  # Discrete Meyer wavelets offer good frequency localization, ideal for signals with oscillatory components.
      length: null  # Use full input size if set to null. (Computationally expensive)
    - type: 'wavelet'
      wavelet_type: 'morl'  # Morlet wavelets are useful for time-frequency analysis due to their Gaussian-modulated sinusoid shape.
      length: null  # Use full input size if set to null. (Computationally expensive)

latent_projector:
  type: 'fc'  # Type of latent projector: 'fc', 'rnn', 'fourier'
  latent_size: 4  # Size of the latent representation before message passing.
  layer_shapes: [32, 8]  # List of layer sizes for the latent projector if type is 'fc' or 'fourier'.
  activations: ['ReLU', 'ReLU']  # Activation functions for the latent projector layers if type is 'fc' or 'fourier'.
  rnn_hidden_size: 4  # Hidden size for the RNN projector if type is 'rnn'.
  rnn_num_layers: 1  # Number of layers for the RNN projector if type is 'rnn'.
  pass_raw_len: 50  # Number of last samples to pass raw to the net in addition to frequencies (null = all) if type is 'fourier'.

middle_out:
  region_latent_size: 4  # Size of the latent representation after message passing.
  residual: false # Wether to use a ResNet style setup. Requires region_latent_size = latent_size
  num_peers: 3  # Number of closest peers to consider.

predictor:
  layer_shapes: [3]  # List of layer sizes for the predictor.
  activations: ['ReLU']  # Activation functions for the predictor layers.

training:
  epochs: 1024  # Number of training epochs.
  batch_size: 32  # Batch size for training.
  num_batches: 1  # Number of batches per epoch.
  learning_rate: 0.01  # Learning rate for the optimizer.
  peer_gradients_factor: 0.33 # Factor for gradients acting on predictor throught peers. 0.0 = detach gradients.
  value_scale: 1 # Normalize data by dividing values by this (and multiple outputs)
  eval_freq: 8  # Frequency of evaluation during training (in epochs).
  save_path: models  # Directory to save the best model and encoder.

evaluation:
  full_compression: false  # Perform full compression during evaluation.

bitstream_encoding:
  type: identity  # Bitstream encoding type: 'arithmetic', 'identity', 'bzip2'.

data:
  url: https://content.neuralink.com/compression-challenge/data.zip  # URL to download the dataset.
  directory: data  # Directory to extract and store the dataset.
  split_ratio: 0.8  # Ratio to split the data into train and test sets.
  cut_length: null  # Optional length to cut sequences to.

profiler:
  enable: false  # Enable profiler.

---

name: DEFAULT
project: Spikey_2

slurm:
  name: 'Spikey_{config[name]}'
  partitions:
    - single
  standard_output: ./reports/slurm/out_%A_%a.log
  standard_error: ./reports/slurm/err_%A_%a.log
  num_parallel_jobs: 50
  cpus_per_task: 8
  memory_per_cpu: 4000
  time_limit: 1440  # in minutes
  ntasks: 1
  venv: '.venv/bin/activate'
  sh_lines:
    - 'mkdir -p {tmp}/wandb'
    - 'mkdir -p {tmp}/local_pycache'
    - 'export PYTHONPYCACHEPREFIX={tmp}/local_pycache'

runner: spikey

scheduler:
  reps_per_version: 1
  agents_per_job: 8
  reps_per_agent: 1

wandb:
  project: '{config[project]}'
  group: '{config[name]}'
  job_type: '{delta_desc}'
  name: '{job_id}_{task_id}:{run_id}:{rand}={config[name]}_{delta_desc}'
  #tags:
  #  - '{config[env][name]}'
  #  - '{config[algo][name]}'
  sync_tensorboard: false
  monitor_gym: false
  save_code: false

evaluation:
  full_compression: false

bitstream_encoding:
  type: binomHuffman

data:
  url: https://content.neuralink.com/compression-challenge/data.zip
  directory: data
  split_ratio: 0.8
  cut_length: null

profiler:
  enable: false

training:
  eval_freq: 8
  save_path: models
  peer_gradients_factor: 0.25
  value_scale: 1000
  device: cpu

middle_out:
  residual: false
---
name: FC
import: $

feature_extractor:
  input size: 10
  transforms:
    - type: 'identity'

latent_projector:
  type: fc
  input_size: 1953
  latent_size: 4
  layer_shapes: [32, 8]
  activations: ['ReLU', 'ReLU']

middle_out:
  region_latent_size: 4
  num_peers: 3
  residual: true

predictor:
  layer_shapes: [3]
  activations: ['ReLU']

training:
  epochs: 1024
  batch_size: 32
  num_batches: 1
  learning_rate: 0.01
---
name: FC_AblLR
import: $

latent_projector:
  type: fc
  input_size: 1953
  latent_size: 4
  layer_shapes: [32, 8]
  activations: ['ReLU', 'ReLU']

middle_out:
  region_latent_size: 4
  num_peers: 3
  residual: true

predictor:
  layer_shapes: [3]
  activations: ['ReLU']

training:
  epochs: 1024
  batch_size: 32
  num_batches: 1
  learning_rate: 0.01
  device: cpu

grid:
  training.learning_rate: [0.1, 0.01, 0.001, 0.0001]
---
name: RNN
import: $

latent_projector:
  type: rnn
  input_size: 1953
  latent_size: 4
  rnn_hidden_size: 3
  rnn_num_layers: 2

middle_out:
  region_latent_size: 4
  num_peers: 3
  residual: true

predictor:
  layer_shapes: [3]
  activations: ['ReLU']

training:
  epochs: 1024
  batch_size: 32
  num_batches: 2
  learning_rate: 0.01
---
name: FOURIER
import: $

latent_projector:
  type: fourier
  input_size: 19531 # 1s
  latent_size: 8
  layer_shapes: [32, 8]
  activations: ['ReLU', 'ReLU']
  pass_raw_len: 1953 # 0.1s

middle_out:
  region_latent_size: 8
  num_peers: 3
  residual: true

predictor:
  layer_shapes: [3]
  activations: ['ReLU']

training:
  epochs: 1024
  batch_size: 32
  num_batches: 16
  learning_rate: 0.01
---
name: FC_AblPeerGrad # Best: 0.33
import: $

latent_projector:
  type: fc
  input_size: 1953
  latent_size: 4
  layer_shapes: [32, 8]
  activations: ['ReLU', 'ReLU']

middle_out:
  region_latent_size: 4
  num_peers: 2
  residual: true

predictor:
  layer_shapes: [3]
  activations: ['ReLU']

training:
  epochs: 1024
  batch_size: 16
  num_batches: 1
  learning_rate: 0.01
  device: cpu

grid:
  training:
    peer_gradients_factor: [0.0, 0.1, 0.25, 0.33, 0.5, 1.0]
---
name: FC_NoPeer # Worse
import: $

latent_projector:
  type: fc
  input_size: 1953
  latent_size: 4
  layer_shapes: [32, 8]
  activations: ['ReLU', 'ReLU']

middle_out:
  region_latent_size: 4
  num_peers: 0
  residual: true

predictor:
  layer_shapes: [3]
  activations: ['ReLU']

training:
  epochs: 1024
  batch_size: 16
  num_batches: 1
  learning_rate: 0.01
  device: cpu

---
name: FC_ScaleAbl # Best: 1000
import: $

latent_projector:
  type: fc
  input_size: 1953
  latent_size: 4
  layer_shapes: [32, 8]
  activations: ['ReLU', 'ReLU']

middle_out:
  region_latent_size: 4
  num_peers: 3
  residual: true

predictor:
  layer_shapes: [3]
  activations: ['ReLU']

training:
  epochs: 1024
  batch_size: 32
  num_batches: 1
  learning_rate: 0.01
  device: cpu

grid:
  training.value_scale: [1, 100, 1000, 10000]
---
name: FC_BSAbl3 # 64 is best, everything >=64 is ok
import: $

latent_projector:
  type: fc
  input_size: 1953
  latent_size: 4
  layer_shapes: [32, 8]
  activations: ['ReLU', 'ReLU']

middle_out:
  region_latent_size: 4
  num_peers: 3
  residual: true

predictor:
  layer_shapes: [3]
  activations: ['ReLU']

training:
  epochs: 1024
  batch_size: 32
  num_batches: 1
  learning_rate: 0.01
  device: cpu

grid:
  training.batch_size: [64, 128, 256]
---
name: FC_smol_master2
import: $

scheduler:
  reps_per_version: 8
  agents_per_job: 8

latent_projector:
  type: fc
  input_size: 195
  latent_size: 4
  layer_shapes: [20, 6]
  activations: ['ReLU', 'ReLU']

middle_out:
  region_latent_size: 4
  num_peers: 2
  residual: true

predictor:
  layer_shapes: [2]
  activations: ['ReLU']

training:
  epochs: 10000
  batch_size: 32
  num_batches: 1
  learning_rate: 0.01
  device: cpu
---
name: FC_smolTanh
import: $

latent_projector:
  type: fc
  input_size: 195
  latent_size: 4
  layer_shapes: [20, 6]
  activations: ['Tanh', 'Tanh']

middle_out:
  region_latent_size: 4
  num_peers: 2
  residual: true

predictor:
  layer_shapes: [2]
  activations: ['Tanh']

training:
  epochs: 1024
  batch_size: 32
  num_batches: 1
  learning_rate: 0.01
  device: cpu
---
name: FOURIER_thin
import: $

latent_projector:
  type: fourier
  input_size: 1953 # 0.1s
  latent_size: 4
  layer_shapes: [32, 8]
  activations: ['ReLU', 'ReLU']
  pass_raw_len: 195 # 0.01s

middle_out:
  region_latent_size: 4
  num_peers: 3
  residual: true

predictor:
  layer_shapes: [3]
  activations: ['ReLU']

training:
  epochs: 1024
  batch_size: 32
  num_batches: 1
  learning_rate: 0.01
---
name: FOURIER_thicc
import: $

latent_projector:
  type: fourier
  input_size: 1953 # 0.1s
  latent_size: 8
  layer_shapes: [32, 8]
  activations: ['ReLU', 'ReLU']
  pass_raw_len: 195 # 0.01s

middle_out:
  region_latent_size: 8
  num_peers: 3
  residual: true

predictor:
  layer_shapes: [4]
  activations: ['ReLU']

training:
  epochs: 1024
  batch_size: 32
  num_batches: 1
  learning_rate: 0.01
---
name: FC_master3
import: $

scheduler:
  reps_per_version: 8
  agents_per_job: 8

latent_projector:
  type: fc
  input_size: 1953
  latent_size: 4
  layer_shapes: [32, 8]
  activations: ['ReLU', 'ReLU']

middle_out:
  region_latent_size: 4
  num_peers: 3
  residual: true

predictor:
  layer_shapes: [3]
  activations: ['ReLU']

training:
  epochs: 1024
  batch_size: 32
  num_batches: 1
  learning_rate: 0.01
---
name: FC_master_single
import: $

scheduler:
  reps_per_version: 1
  agents_per_job: 1

latent_projector:
  type: fc
  input_size: 1953
  latent_size: 4
  layer_shapes: [32, 8]
  activations: ['ReLU', 'ReLU']

middle_out:
  region_latent_size: 4
  num_peers: 3
  residual: true

predictor:
  layer_shapes: [3]
  activations: ['ReLU']

training:
  epochs: 1024
  batch_size: 32
  num_batches: 1
  learning_rate: 0.01
---
name: debug
import: $

scheduler:
  reps_per_version: 1
  agents_per_job: 1

latent_projector:
  type: fc
  input_size: 1953
  latent_size: 4
  layer_shapes: [32, 8]
  activations: ['ReLU', 'ReLU']

middle_out:
  region_latent_size: 4
  num_peers: 3
  residual: true

predictor:
  layer_shapes: [3]
  activations: ['ReLU']

training:
  epochs: 1024
  batch_size: 32
  num_batches: 1
  learning_rate: 0.01
---
name: FOURIER_smol_master
import: $

scheduler:
  reps_per_version: 8
  agents_per_job: 8

latent_projector:
  type: fourier
  input_size: 195
  latent_size: 4
  layer_shapes: [20, 6]
  activations: ['ReLU', 'ReLU']
  pass_raw_len: 20 # 0.001s

middle_out:
  region_latent_size: 4
  num_peers: 2
  residual: true

predictor:
  layer_shapes: [2]
  activations: ['ReLU']

training:
  epochs: 10000
  batch_size: 32
  num_batches: 1
  learning_rate: 0.01
  device: cpu