Spikey/config.yaml

name: EXAMPLE

feature_extractor:
  input_size: 1953 # Input size for the Feature Extractor (length of snippets). (=0.1s)
  transforms:
    - type: 'identity'  # Pass the last n samples of the input data directly.
      length: 8  # Number of last samples to pass directly. Use full input size if set to null.
    - type: 'fourier'  # Apply Fourier transform to the input data.
      length: null  # Use full input size if set to null. Fourier transform outputs both real and imaginary parts, doubling the size. (Computationally expensive)
    - type: 'wavelet'  # Apply selected wavelet transform to the input data.
      wavelet_type: 'haar'  # Haar wavelet is simple and fast, but may not capture detailed features well.
      length: null  # Use full input size if set to null.
    - type: 'wavelet'
      wavelet_type: 'cgau1'  # Complex Gaussian wavelets are used for complex-valued signal analysis and capturing phase information.
      length: null  # Use full input size if set to null.
    - type: 'wavelet'
      wavelet_type: 'db1'  # Daubechies wavelets provide a balance between time and frequency localization.
      length: null  # Use full input size if set to null. (Computationally expensive)
    - type: 'wavelet'
      wavelet_type: 'sym2'  # Symlet wavelets are nearly symmetrical, offering improved phase characteristics over Daubechies.
      length: null  # Use full input size if set to null. (Computationally expensive)
    - type: 'wavelet'
      wavelet_type: 'coif1'  # Coiflet wavelets have more vanishing moments, suitable for capturing polynomial trends.
      length: null  # Use full input size if set to null. (Computationally expensive)
    - type: 'wavelet'
      wavelet_type: 'bior1.3'  # Biorthogonal wavelets provide perfect reconstruction and linear phase characteristics.
      length: null  # Use full input size if set to null. (Computationally expensive)
    - type: 'wavelet'
      wavelet_type: 'rbio1.3'  # Reverse Biorthogonal wavelets are similar to Biorthogonal but optimized for different applications.
      length: null  # Use full input size if set to null. (Computationally expensive)
    - type: 'wavelet'
      wavelet_type: 'dmey'  # Discrete Meyer wavelets offer good frequency localization, ideal for signals with oscillatory components.
      length: null  # Use full input size if set to null. (Computationally expensive)
    - type: 'wavelet'
      wavelet_type: 'morl'  # Morlet wavelets are useful for time-frequency analysis due to their Gaussian-modulated sinusoid shape.
      length: null  # Use full input size if set to null. (Computationally expensive)

latent_projector:
  type: 'fc'  # Type of latent projector: 'fc', 'rnn', 'fourier'
  latent_size: 4  # Size of the latent representation before message passing.
  layer_shapes: [32, 8]  # List of layer sizes for the latent projector if type is 'fc' or 'fourier'.
  activations: ['ReLU', 'ReLU']  # Activation functions for the latent projector layers if type is 'fc' or 'fourier'.
  rnn_hidden_size: 4  # Hidden size for the RNN projector if type is 'rnn'.
  rnn_num_layers: 1  # Number of layers for the RNN projector if type is 'rnn'.
  pass_raw_len: 50  # Number of last samples to pass raw to the net in addition to frequencies (null = all) if type is 'fourier'.

middle_out:
  region_latent_size: 4  # Size of the latent representation after message passing.
  residual: false # Wether to use a ResNet style setup. Requires region_latent_size = latent_size
  num_peers: 3  # Number of closest peers to consider.

predictor:
  layer_shapes: [3]  # List of layer sizes for the predictor.
  activations: ['ReLU']  # Activation functions for the predictor layers.

training:
  epochs: 1024  # Number of training epochs.
  batch_size: 32  # Batch size for training.
  num_batches: 1  # Number of batches per epoch.
  learning_rate: 0.01  # Learning rate for the optimizer.
  peer_gradients_factor: 0.33 # Factor for gradients acting on predictor throught peers. 0.0 = detach gradients.
  value_scale: 1 # Normalize data by dividing values by this (and multiple outputs)
  eval_freq: 8  # Frequency of evaluation during training (in epochs).
  save_path: models  # Directory to save the best model and encoder.

evaluation:
  full_compression: false  # Perform full compression during evaluation.

bitstream_encoding:
  type: identity  # Bitstream encoding type: 'arithmetic', 'identity', 'bzip2'.

data:
  url: https://content.neuralink.com/compression-challenge/data.zip  # URL to download the dataset.
  directory: data  # Directory to extract and store the dataset.
  split_ratio: 0.8  # Ratio to split the data into train and test sets.
  cut_length: null  # Optional length to cut sequences to.

profiler:
  enable: false  # Enable profiler.

---

name: DEFAULT
project: Spikey_2

slurm:
  name: 'Spikey_{config[name]}'
  partitions:
    - single
  standard_output: ./reports/slurm/out_%A_%a.log
  standard_error: ./reports/slurm/err_%A_%a.log
  num_parallel_jobs: 50
  cpus_per_task: 8
  memory_per_cpu: 4000
  time_limit: 1440  # in minutes
  ntasks: 1
  venv: '.venv/bin/activate'
  sh_lines:
    - 'mkdir -p {tmp}/wandb'
    - 'mkdir -p {tmp}/local_pycache'
    - 'export PYTHONPYCACHEPREFIX={tmp}/local_pycache'

runner: spikey

scheduler:
  reps_per_version: 1
  agents_per_job: 8
  reps_per_agent: 1

wandb:
  project: '{config[project]}'
  group: '{config[name]}'
  job_type: '{delta_desc}'
  name: '{job_id}_{task_id}:{run_id}:{rand}={config[name]}_{delta_desc}'
  #tags:
  #  - '{config[env][name]}'
  #  - '{config[algo][name]}'
  sync_tensorboard: false
  monitor_gym: false
  save_code: false

evaluation:
  full_compression: false

bitstream_encoding:
  type: binomHuffman

data:
  url: https://content.neuralink.com/compression-challenge/data.zip
  directory: data
  split_ratio: 0.8
  cut_length: null

profiler:
  enable: false

training:
  eval_freq: 8
  save_path: models
  peer_gradients_factor: 0.25
  value_scale: 1000
  device: cpu

middle_out:
  residual: false
---
name: FC
import: $

feature_extractor:
  input size: 10
  transforms:
    - type: 'identity'

latent_projector:
  type: fc
  input_size: 1953
  latent_size: 4
  layer_shapes: [32, 8]
  activations: ['ReLU', 'ReLU']

middle_out:
  region_latent_size: 4
  num_peers: 3
  residual: true

predictor:
  layer_shapes: [3]
  activations: ['ReLU']

training:
  epochs: 1024
  batch_size: 32
  num_batches: 1
  learning_rate: 0.01
---
name: FC_AblLR
import: $

latent_projector:
  type: fc
  input_size: 1953
  latent_size: 4
  layer_shapes: [32, 8]
  activations: ['ReLU', 'ReLU']

middle_out:
  region_latent_size: 4
  num_peers: 3
  residual: true

predictor:
  layer_shapes: [3]
  activations: ['ReLU']

training:
  epochs: 1024
  batch_size: 32
  num_batches: 1
  learning_rate: 0.01
  device: cpu

grid:
  training.learning_rate: [0.1, 0.01, 0.001, 0.0001]
---
name: RNN
import: $

latent_projector:
  type: rnn
  input_size: 1953
  latent_size: 4
  rnn_hidden_size: 3
  rnn_num_layers: 2

middle_out:
  region_latent_size: 4
  num_peers: 3
  residual: true

predictor:
  layer_shapes: [3]
  activations: ['ReLU']

training:
  epochs: 1024
  batch_size: 32
  num_batches: 2
  learning_rate: 0.01
---
name: FOURIER
import: $

latent_projector:
  type: fourier
  input_size: 19531 # 1s
  latent_size: 8
  layer_shapes: [32, 8]
  activations: ['ReLU', 'ReLU']
  pass_raw_len: 1953 # 0.1s

middle_out:
  region_latent_size: 8
  num_peers: 3
  residual: true

predictor:
  layer_shapes: [3]
  activations: ['ReLU']

training:
  epochs: 1024
  batch_size: 32
  num_batches: 16
  learning_rate: 0.01
---
name: FC_AblPeerGrad # Best: 0.33
import: $

latent_projector:
  type: fc
  input_size: 1953
  latent_size: 4
  layer_shapes: [32, 8]
  activations: ['ReLU', 'ReLU']

middle_out:
  region_latent_size: 4
  num_peers: 2
  residual: true

predictor:
  layer_shapes: [3]
  activations: ['ReLU']

training:
  epochs: 1024
  batch_size: 16
  num_batches: 1
  learning_rate: 0.01
  device: cpu

grid:
  training:
    peer_gradients_factor: [0.0, 0.1, 0.25, 0.33, 0.5, 1.0]
---
name: FC_NoPeer # Worse
import: $

latent_projector:
  type: fc
  input_size: 1953
  latent_size: 4
  layer_shapes: [32, 8]
  activations: ['ReLU', 'ReLU']

middle_out:
  region_latent_size: 4
  num_peers: 0
  residual: true

predictor:
  layer_shapes: [3]
  activations: ['ReLU']

training:
  epochs: 1024
  batch_size: 16
  num_batches: 1
  learning_rate: 0.01
  device: cpu

---
name: FC_ScaleAbl # Best: 1000
import: $

latent_projector:
  type: fc
  input_size: 1953
  latent_size: 4
  layer_shapes: [32, 8]
  activations: ['ReLU', 'ReLU']

middle_out:
  region_latent_size: 4
  num_peers: 3
  residual: true

predictor:
  layer_shapes: [3]
  activations: ['ReLU']

training:
  epochs: 1024
  batch_size: 32
  num_batches: 1
  learning_rate: 0.01
  device: cpu

grid:
  training.value_scale: [1, 100, 1000, 10000]
---
name: FC_BSAbl3 # 64 is best, everything >=64 is ok
import: $

latent_projector:
  type: fc
  input_size: 1953
  latent_size: 4
  layer_shapes: [32, 8]
  activations: ['ReLU', 'ReLU']

middle_out:
  region_latent_size: 4
  num_peers: 3
  residual: true

predictor:
  layer_shapes: [3]
  activations: ['ReLU']

training:
  epochs: 1024
  batch_size: 32
  num_batches: 1
  learning_rate: 0.01
  device: cpu

grid:
  training.batch_size: [64, 128, 256]
---
name: FC_smol_master2
import: $

scheduler:
  reps_per_version: 8
  agents_per_job: 8

latent_projector:
  type: fc
  input_size: 195
  latent_size: 4
  layer_shapes: [20, 6]
  activations: ['ReLU', 'ReLU']

middle_out:
  region_latent_size: 4
  num_peers: 2
  residual: true

predictor:
  layer_shapes: [2]
  activations: ['ReLU']

training:
  epochs: 10000
  batch_size: 32
  num_batches: 1
  learning_rate: 0.01
  device: cpu
---
name: FC_smolTanh
import: $

latent_projector:
  type: fc
  input_size: 195
  latent_size: 4
  layer_shapes: [20, 6]
  activations: ['Tanh', 'Tanh']

middle_out:
  region_latent_size: 4
  num_peers: 2
  residual: true

predictor:
  layer_shapes: [2]
  activations: ['Tanh']

training:
  epochs: 1024
  batch_size: 32
  num_batches: 1
  learning_rate: 0.01
  device: cpu
---
name: FOURIER_thin
import: $

latent_projector:
  type: fourier
  input_size: 1953 # 0.1s
  latent_size: 4
  layer_shapes: [32, 8]
  activations: ['ReLU', 'ReLU']
  pass_raw_len: 195 # 0.01s

middle_out:
  region_latent_size: 4
  num_peers: 3
  residual: true

predictor:
  layer_shapes: [3]
  activations: ['ReLU']

training:
  epochs: 1024
  batch_size: 32
  num_batches: 1
  learning_rate: 0.01
---
name: FOURIER_thicc
import: $

latent_projector:
  type: fourier
  input_size: 1953 # 0.1s
  latent_size: 8
  layer_shapes: [32, 8]
  activations: ['ReLU', 'ReLU']
  pass_raw_len: 195 # 0.01s

middle_out:
  region_latent_size: 8
  num_peers: 3
  residual: true

predictor:
  layer_shapes: [4]
  activations: ['ReLU']

training:
  epochs: 1024
  batch_size: 32
  num_batches: 1
  learning_rate: 0.01
---
name: FC_master3
import: $

scheduler:
  reps_per_version: 8
  agents_per_job: 8

latent_projector:
  type: fc
  input_size: 1953
  latent_size: 4
  layer_shapes: [32, 8]
  activations: ['ReLU', 'ReLU']

middle_out:
  region_latent_size: 4
  num_peers: 3
  residual: true

predictor:
  layer_shapes: [3]
  activations: ['ReLU']

training:
  epochs: 1024
  batch_size: 32
  num_batches: 1
  learning_rate: 0.01
---
name: FC_master_single
import: $

scheduler:
  reps_per_version: 1
  agents_per_job: 1

latent_projector:
  type: fc
  input_size: 1953
  latent_size: 4
  layer_shapes: [32, 8]
  activations: ['ReLU', 'ReLU']

middle_out:
  region_latent_size: 4
  num_peers: 3
  residual: true

predictor:
  layer_shapes: [3]
  activations: ['ReLU']

training:
  epochs: 1024
  batch_size: 32
  num_batches: 1
  learning_rate: 0.01
---
name: debug
import: $

scheduler:
  reps_per_version: 1
  agents_per_job: 1

latent_projector:
  type: fc
  input_size: 1953
  latent_size: 4
  layer_shapes: [32, 8]
  activations: ['ReLU', 'ReLU']

middle_out:
  region_latent_size: 4
  num_peers: 3
  residual: true

predictor:
  layer_shapes: [3]
  activations: ['ReLU']

training:
  epochs: 1024
  batch_size: 32
  num_batches: 1
  learning_rate: 0.01
---
name: FOURIER_smol_master
import: $

scheduler:
  reps_per_version: 8
  agents_per_job: 8

latent_projector:
  type: fourier
  input_size: 195
  latent_size: 4
  layer_shapes: [20, 6]
  activations: ['ReLU', 'ReLU']
  pass_raw_len: 20 # 0.001s

middle_out:
  region_latent_size: 4
  num_peers: 2
  residual: true

predictor:
  layer_shapes: [2]
  activations: ['ReLU']

training:
  epochs: 10000
  batch_size: 32
  num_batches: 1
  learning_rate: 0.01
  device: cpu
Cleaned up config 2024-05-26 15:59:05 +02:00			`name: EXAMPLE`

Feature Extraction 2024-05-27 17:00:02 +02:00			`feature_extractor:`
Better config for Feature Extractor 2024-05-27 17:07:00 +02:00			`input_size: 1953 # Input size for the Feature Extractor (length of snippets). (=0.1s)`
			`transforms:`
			`- type: 'identity' # Pass the last n samples of the input data directly.`
			`length: 8 # Number of last samples to pass directly. Use full input size if set to null.`
			`- type: 'fourier' # Apply Fourier transform to the input data.`
			`length: null # Use full input size if set to null. Fourier transform outputs both real and imaginary parts, doubling the size. (Computationally expensive)`
			`- type: 'wavelet' # Apply selected wavelet transform to the input data.`
			`wavelet_type: 'haar' # Haar wavelet is simple and fast, but may not capture detailed features well.`
			`length: null # Use full input size if set to null.`
			`- type: 'wavelet'`
			`wavelet_type: 'cgau1' # Complex Gaussian wavelets are used for complex-valued signal analysis and capturing phase information.`
			`length: null # Use full input size if set to null.`
			`- type: 'wavelet'`
			`wavelet_type: 'db1' # Daubechies wavelets provide a balance between time and frequency localization.`
			`length: null # Use full input size if set to null. (Computationally expensive)`
			`- type: 'wavelet'`
			`wavelet_type: 'sym2' # Symlet wavelets are nearly symmetrical, offering improved phase characteristics over Daubechies.`
			`length: null # Use full input size if set to null. (Computationally expensive)`
			`- type: 'wavelet'`
			`wavelet_type: 'coif1' # Coiflet wavelets have more vanishing moments, suitable for capturing polynomial trends.`
			`length: null # Use full input size if set to null. (Computationally expensive)`
			`- type: 'wavelet'`
			`wavelet_type: 'bior1.3' # Biorthogonal wavelets provide perfect reconstruction and linear phase characteristics.`
			`length: null # Use full input size if set to null. (Computationally expensive)`
			`- type: 'wavelet'`
			`wavelet_type: 'rbio1.3' # Reverse Biorthogonal wavelets are similar to Biorthogonal but optimized for different applications.`
			`length: null # Use full input size if set to null. (Computationally expensive)`
			`- type: 'wavelet'`
			`wavelet_type: 'dmey' # Discrete Meyer wavelets offer good frequency localization, ideal for signals with oscillatory components.`
			`length: null # Use full input size if set to null. (Computationally expensive)`
			`- type: 'wavelet'`
			`wavelet_type: 'morl' # Morlet wavelets are useful for time-frequency analysis due to their Gaussian-modulated sinusoid shape.`
			`length: null # Use full input size if set to null. (Computationally expensive)`
Feature Extraction 2024-05-27 17:00:02 +02:00
Cleaned up config 2024-05-26 15:59:05 +02:00			`latent_projector:`
New config 2024-05-26 15:59:42 +02:00			`type: 'fc' # Type of latent projector: 'fc', 'rnn', 'fourier'`
Cleaned up config 2024-05-26 15:59:05 +02:00			`latent_size: 4 # Size of the latent representation before message passing.`
			`layer_shapes: [32, 8] # List of layer sizes for the latent projector if type is 'fc' or 'fourier'.`
			`activations: ['ReLU', 'ReLU'] # Activation functions for the latent projector layers if type is 'fc' or 'fourier'.`
			`rnn_hidden_size: 4 # Hidden size for the RNN projector if type is 'rnn'.`
			`rnn_num_layers: 1 # Number of layers for the RNN projector if type is 'rnn'.`
			`pass_raw_len: 50 # Number of last samples to pass raw to the net in addition to frequencies (null = all) if type is 'fourier'.`

			`middle_out:`
			`region_latent_size: 4 # Size of the latent representation after message passing.`
			`residual: false # Wether to use a ResNet style setup. Requires region_latent_size = latent_size`
			`num_peers: 3 # Number of closest peers to consider.`

			`predictor:`
			`layer_shapes: [3] # List of layer sizes for the predictor.`
			`activations: ['ReLU'] # Activation functions for the predictor layers.`

			`training:`
			`epochs: 1024 # Number of training epochs.`
			`batch_size: 32 # Batch size for training.`
			`num_batches: 1 # Number of batches per epoch.`
			`learning_rate: 0.01 # Learning rate for the optimizer.`
Some tuning and updated README 2024-05-26 17:42:03 +02:00			`peer_gradients_factor: 0.33 # Factor for gradients acting on predictor throught peers. 0.0 = detach gradients.`
			`value_scale: 1 # Normalize data by dividing values by this (and multiple outputs)`
More tuning lel 2024-05-27 10:29:15 +02:00			`eval_freq: 8 # Frequency of evaluation during training (in epochs).`
Cleaned up config 2024-05-26 15:59:05 +02:00			`save_path: models # Directory to save the best model and encoder.`

			`evaluation:`
			`full_compression: false # Perform full compression during evaluation.`

			`bitstream_encoding:`
			`type: identity # Bitstream encoding type: 'arithmetic', 'identity', 'bzip2'.`

			`data:`
			`url: https://content.neuralink.com/compression-challenge/data.zip # URL to download the dataset.`
			`directory: data # Directory to extract and store the dataset.`
			`split_ratio: 0.8 # Ratio to split the data into train and test sets.`
			`cut_length: null # Optional length to cut sequences to.`

			`profiler:`
			`enable: false # Enable profiler.`

			`---`

initial commit 2024-05-24 22:01:59 +02:00			`name: DEFAULT`
Some tuning and updated README 2024-05-26 17:42:03 +02:00			`project: Spikey_2`
initial commit 2024-05-24 22:01:59 +02:00
			`slurm:`
			`name: 'Spikey_{config[name]}'`
			`partitions:`
			`- single`
			`standard_output: ./reports/slurm/out_%A_%a.log`
			`standard_error: ./reports/slurm/err_%A_%a.log`
			`num_parallel_jobs: 50`
More RAM 2024-05-25 01:20:24 +02:00			`cpus_per_task: 8`
			`memory_per_cpu: 4000`
initial commit 2024-05-24 22:01:59 +02:00			`time_limit: 1440 # in minutes`
			`ntasks: 1`
			`venv: '.venv/bin/activate'`
			`sh_lines:`
			`- 'mkdir -p {tmp}/wandb'`
			`- 'mkdir -p {tmp}/local_pycache'`
			`- 'export PYTHONPYCACHEPREFIX={tmp}/local_pycache'`

			`runner: spikey`

			`scheduler:`
			`reps_per_version: 1`
Some tuning and updated README 2024-05-26 17:42:03 +02:00			`agents_per_job: 8`
initial commit 2024-05-24 22:01:59 +02:00			`reps_per_agent: 1`

			`wandb:`
			`project: '{config[project]}'`
			`group: '{config[name]}'`
			`job_type: '{delta_desc}'`
			`name: '{job_id}_{task_id}:{run_id}:{rand}={config[name]}_{delta_desc}'`
2nd commit 2024-05-24 23:02:24 +02:00			`#tags:`
			`# - '{config[env][name]}'`
			`# - '{config[algo][name]}'`
Some tuning and updated README 2024-05-26 17:42:03 +02:00			`sync_tensorboard: false`
			`monitor_gym: false`
			`save_code: false`
initial commit 2024-05-24 22:01:59 +02:00
somewhat working 2024-05-26 00:28:33 +02:00			`evaluation:`
Cleaned up config 2024-05-26 15:59:05 +02:00			`full_compression: false`
somewhat working 2024-05-26 00:28:33 +02:00
			`bitstream_encoding:`
More tuning lel 2024-05-27 10:29:15 +02:00			`type: binomHuffman`
somewhat working 2024-05-26 00:28:33 +02:00
			`data:`
Cleaned up config 2024-05-26 15:59:05 +02:00			`url: https://content.neuralink.com/compression-challenge/data.zip`
			`directory: data`
			`split_ratio: 0.8`
			`cut_length: null`
somewhat working 2024-05-26 00:28:33 +02:00
			`profiler:`
			`enable: false`

			`training:`
More tuning lel 2024-05-27 10:29:15 +02:00			`eval_freq: 8`
Cleaned up config 2024-05-26 15:59:05 +02:00			`save_path: models`
Some tuning and updated README 2024-05-26 17:42:03 +02:00			`peer_gradients_factor: 0.25`
			`value_scale: 1000`
more tuning 2024-05-26 23:56:28 +02:00			`device: cpu`
Cleaned up config 2024-05-26 15:59:05 +02:00
			`middle_out:`
Some tuning and updated README 2024-05-26 17:42:03 +02:00			`residual: false`
initial commit 2024-05-24 22:01:59 +02:00			`---`
somewhat working 2024-05-26 00:28:33 +02:00			`name: FC`
2nd commit 2024-05-24 23:02:24 +02:00			`import: $`
initial commit 2024-05-24 22:01:59 +02:00
Better config for Feature Extractor 2024-05-27 17:07:00 +02:00			`feature_extractor:`
			`input size: 10`
			`transforms:`
			`- type: 'identity'`

Changed everything 2024-05-25 17:31:08 +02:00			`latent_projector:`
Cleaned up config 2024-05-26 15:59:05 +02:00			`type: fc`
			`input_size: 1953`
			`latent_size: 4`
			`layer_shapes: [32, 8]`
			`activations: ['ReLU', 'ReLU']`
Changed everything 2024-05-25 17:31:08 +02:00
			`middle_out:`
Cleaned up config 2024-05-26 15:59:05 +02:00			`region_latent_size: 4`
			`num_peers: 3`
Some tuning and updated README 2024-05-26 17:42:03 +02:00			`residual: true`
initial commit 2024-05-24 22:01:59 +02:00
			`predictor:`
Cleaned up config 2024-05-26 15:59:05 +02:00			`layer_shapes: [3]`
			`activations: ['ReLU']`
initial commit 2024-05-24 22:01:59 +02:00
			`training:`
Cleaned up config 2024-05-26 15:59:05 +02:00			`epochs: 1024`
			`batch_size: 32`
			`num_batches: 1`
			`learning_rate: 0.01`
somewhat working 2024-05-26 00:28:33 +02:00			`---`
Some tuning and updated README 2024-05-26 17:42:03 +02:00			`name: FC_AblLR`
somewhat working 2024-05-26 00:28:33 +02:00			`import: $`
initial commit 2024-05-24 22:01:59 +02:00
somewhat working 2024-05-26 00:28:33 +02:00			`latent_projector:`
Cleaned up config 2024-05-26 15:59:05 +02:00			`type: fc`
Some tuning and updated README 2024-05-26 17:42:03 +02:00			`input_size: 1953`
Cleaned up config 2024-05-26 15:59:05 +02:00			`latent_size: 4`
Some tuning and updated README 2024-05-26 17:42:03 +02:00			`layer_shapes: [32, 8]`
			`activations: ['ReLU', 'ReLU']`
Changed everything 2024-05-25 17:31:08 +02:00
somewhat working 2024-05-26 00:28:33 +02:00			`middle_out:`
Some tuning and updated README 2024-05-26 17:42:03 +02:00			`region_latent_size: 4`
Cleaned up config 2024-05-26 15:59:05 +02:00			`num_peers: 3`
Some tuning and updated README 2024-05-26 17:42:03 +02:00			`residual: true`
initial commit 2024-05-24 22:01:59 +02:00
somewhat working 2024-05-26 00:28:33 +02:00			`predictor:`
Cleaned up config 2024-05-26 15:59:05 +02:00			`layer_shapes: [3]`
			`activations: ['ReLU']`
2nd commit 2024-05-24 23:02:24 +02:00
somewhat working 2024-05-26 00:28:33 +02:00			`training:`
Cleaned up config 2024-05-26 15:59:05 +02:00			`epochs: 1024`
Some tuning and updated README 2024-05-26 17:42:03 +02:00			`batch_size: 32`
Cleaned up config 2024-05-26 15:59:05 +02:00			`num_batches: 1`
			`learning_rate: 0.01`
Some tuning and updated README 2024-05-26 17:42:03 +02:00			`device: cpu`

			`grid:`
			`training.learning_rate: [0.1, 0.01, 0.001, 0.0001]`
somewhat working 2024-05-26 00:28:33 +02:00			`---`
			`name: RNN`
			`import: $`

			`latent_projector:`
Cleaned up config 2024-05-26 15:59:05 +02:00			`type: rnn`
			`input_size: 1953`
			`latent_size: 4`
			`rnn_hidden_size: 3`
			`rnn_num_layers: 2`
somewhat working 2024-05-26 00:28:33 +02:00
			`middle_out:`
Cleaned up config 2024-05-26 15:59:05 +02:00			`region_latent_size: 4`
			`num_peers: 3`
Some tuning and updated README 2024-05-26 17:42:03 +02:00			`residual: true`
somewhat working 2024-05-26 00:28:33 +02:00
			`predictor:`
Cleaned up config 2024-05-26 15:59:05 +02:00			`layer_shapes: [3]`
			`activations: ['ReLU']`
somewhat working 2024-05-26 00:28:33 +02:00
			`training:`
Cleaned up config 2024-05-26 15:59:05 +02:00			`epochs: 1024`
Some tuning and updated README 2024-05-26 17:42:03 +02:00			`batch_size: 32`
Cleaned up config 2024-05-26 15:59:05 +02:00			`num_batches: 2`
			`learning_rate: 0.01`
Some tuning and updated README 2024-05-26 17:42:03 +02:00			`---`
			`name: FOURIER`
			`import: $`

			`latent_projector:`
			`type: fourier`
			`input_size: 19531 # 1s`
			`latent_size: 8`
			`layer_shapes: [32, 8]`
			`activations: ['ReLU', 'ReLU']`
			`pass_raw_len: 1953 # 0.1s`

			`middle_out:`
			`region_latent_size: 8`
			`num_peers: 3`
			`residual: true`

			`predictor:`
			`layer_shapes: [3]`
			`activations: ['ReLU']`

			`training:`
			`epochs: 1024`
			`batch_size: 32`
			`num_batches: 16`
			`learning_rate: 0.01`
			`---`
			`name: FC_AblPeerGrad # Best: 0.33`
			`import: $`

			`latent_projector:`
			`type: fc`
			`input_size: 1953`
			`latent_size: 4`
			`layer_shapes: [32, 8]`
			`activations: ['ReLU', 'ReLU']`

			`middle_out:`
			`region_latent_size: 4`
			`num_peers: 2`
			`residual: true`

			`predictor:`
			`layer_shapes: [3]`
			`activations: ['ReLU']`

			`training:`
			`epochs: 1024`
			`batch_size: 16`
			`num_batches: 1`
			`learning_rate: 0.01`
			`device: cpu`

			`grid:`
			`training:`
			`peer_gradients_factor: [0.0, 0.1, 0.25, 0.33, 0.5, 1.0]`
			`---`
			`name: FC_NoPeer # Worse`
			`import: $`

			`latent_projector:`
			`type: fc`
			`input_size: 1953`
			`latent_size: 4`
			`layer_shapes: [32, 8]`
			`activations: ['ReLU', 'ReLU']`

			`middle_out:`
			`region_latent_size: 4`
			`num_peers: 0`
			`residual: true`

			`predictor:`
			`layer_shapes: [3]`
			`activations: ['ReLU']`

			`training:`
			`epochs: 1024`
			`batch_size: 16`
			`num_batches: 1`
			`learning_rate: 0.01`
			`device: cpu`

			`---`
			`name: FC_ScaleAbl # Best: 1000`
			`import: $`

			`latent_projector:`
			`type: fc`
			`input_size: 1953`
			`latent_size: 4`
			`layer_shapes: [32, 8]`
			`activations: ['ReLU', 'ReLU']`

			`middle_out:`
			`region_latent_size: 4`
			`num_peers: 3`
			`residual: true`

			`predictor:`
			`layer_shapes: [3]`
			`activations: ['ReLU']`

			`training:`
			`epochs: 1024`
			`batch_size: 32`
			`num_batches: 1`
			`learning_rate: 0.01`
			`device: cpu`

			`grid:`
			`training.value_scale: [1, 100, 1000, 10000]`
			`---`
more tuning 2024-05-26 23:56:28 +02:00			`name: FC_BSAbl3 # 64 is best, everything >=64 is ok`
Some tuning and updated README 2024-05-26 17:42:03 +02:00			`import: $`

			`latent_projector:`
			`type: fc`
			`input_size: 1953`
			`latent_size: 4`
			`layer_shapes: [32, 8]`
			`activations: ['ReLU', 'ReLU']`

			`middle_out:`
			`region_latent_size: 4`
			`num_peers: 3`
			`residual: true`

			`predictor:`
			`layer_shapes: [3]`
			`activations: ['ReLU']`

			`training:`
			`epochs: 1024`
			`batch_size: 32`
			`num_batches: 1`
			`learning_rate: 0.01`
			`device: cpu`

			`grid:`
more tuning 2024-05-26 23:56:28 +02:00			`training.batch_size: [64, 128, 256]`
			`---`
More tuning lel 2024-05-27 10:29:15 +02:00			`name: FC_smol_master2`
more tuning 2024-05-26 23:56:28 +02:00			`import: $`

			`scheduler:`
			`reps_per_version: 8`
			`agents_per_job: 8`

			`latent_projector:`
			`type: fc`
			`input_size: 195`
			`latent_size: 4`
			`layer_shapes: [20, 6]`
			`activations: ['ReLU', 'ReLU']`

			`middle_out:`
			`region_latent_size: 4`
			`num_peers: 2`
			`residual: true`

			`predictor:`
			`layer_shapes: [2]`
			`activations: ['ReLU']`

			`training:`
			`epochs: 10000`
			`batch_size: 32`
			`num_batches: 1`
			`learning_rate: 0.01`
			`device: cpu`
			`---`
			`name: FC_smolTanh`
			`import: $`

			`latent_projector:`
			`type: fc`
			`input_size: 195`
			`latent_size: 4`
			`layer_shapes: [20, 6]`
			`activations: ['Tanh', 'Tanh']`

			`middle_out:`
			`region_latent_size: 4`
			`num_peers: 2`
			`residual: true`

			`predictor:`
			`layer_shapes: [2]`
			`activations: ['Tanh']`

			`training:`
			`epochs: 1024`
			`batch_size: 32`
			`num_batches: 1`
			`learning_rate: 0.01`
			`device: cpu`
			`---`
			`name: FOURIER_thin`
			`import: $`

			`latent_projector:`
			`type: fourier`
			`input_size: 1953 # 0.1s`
			`latent_size: 4`
			`layer_shapes: [32, 8]`
			`activations: ['ReLU', 'ReLU']`
			`pass_raw_len: 195 # 0.01s`

			`middle_out:`
			`region_latent_size: 4`
			`num_peers: 3`
			`residual: true`

			`predictor:`
			`layer_shapes: [3]`
			`activations: ['ReLU']`

			`training:`
			`epochs: 1024`
			`batch_size: 32`
			`num_batches: 1`
			`learning_rate: 0.01`
			`---`
			`name: FOURIER_thicc`
			`import: $`

			`latent_projector:`
			`type: fourier`
			`input_size: 1953 # 0.1s`
			`latent_size: 8`
			`layer_shapes: [32, 8]`
			`activations: ['ReLU', 'ReLU']`
			`pass_raw_len: 195 # 0.01s`

			`middle_out:`
			`region_latent_size: 8`
			`num_peers: 3`
			`residual: true`

			`predictor:`
			`layer_shapes: [4]`
			`activations: ['ReLU']`

			`training:`
			`epochs: 1024`
			`batch_size: 32`
			`num_batches: 1`
			`learning_rate: 0.01`
			`---`
More tuning lel 2024-05-27 10:29:15 +02:00			`name: FC_master3`
more tuning 2024-05-26 23:56:28 +02:00			`import: $`

			`scheduler:`
			`reps_per_version: 8`
			`agents_per_job: 8`

			`latent_projector:`
			`type: fc`
			`input_size: 1953`
			`latent_size: 4`
			`layer_shapes: [32, 8]`
			`activations: ['ReLU', 'ReLU']`

			`middle_out:`
			`region_latent_size: 4`
			`num_peers: 3`
			`residual: true`
More tuning lel 2024-05-27 10:29:15 +02:00
			`predictor:`
			`layer_shapes: [3]`
			`activations: ['ReLU']`

			`training:`
			`epochs: 1024`
			`batch_size: 32`
			`num_batches: 1`
			`learning_rate: 0.01`
			`---`
			`name: FC_master_single`
			`import: $`

			`scheduler:`
			`reps_per_version: 1`
			`agents_per_job: 1`

			`latent_projector:`
			`type: fc`
			`input_size: 1953`
			`latent_size: 4`
			`layer_shapes: [32, 8]`
			`activations: ['ReLU', 'ReLU']`

			`middle_out:`
			`region_latent_size: 4`
			`num_peers: 3`
			`residual: true`
more tuning 2024-05-26 23:56:28 +02:00
			`predictor:`
			`layer_shapes: [3]`
			`activations: ['ReLU']`

			`training:`
			`epochs: 1024`
			`batch_size: 32`
			`num_batches: 1`
			`learning_rate: 0.01`
			`---`
			`name: debug`
			`import: $`

			`scheduler:`
			`reps_per_version: 1`
			`agents_per_job: 1`

			`latent_projector:`
			`type: fc`
			`input_size: 1953`
			`latent_size: 4`
			`layer_shapes: [32, 8]`
			`activations: ['ReLU', 'ReLU']`

			`middle_out:`
			`region_latent_size: 4`
			`num_peers: 3`
			`residual: true`

			`predictor:`
			`layer_shapes: [3]`
			`activations: ['ReLU']`

			`training:`
			`epochs: 1024`
			`batch_size: 32`
			`num_batches: 1`
			`learning_rate: 0.01`
			`---`
			`name: FOURIER_smol_master`
			`import: $`

			`scheduler:`
			`reps_per_version: 8`
			`agents_per_job: 8`

			`latent_projector:`
			`type: fourier`
			`input_size: 195`
			`latent_size: 4`
			`layer_shapes: [20, 6]`
			`activations: ['ReLU', 'ReLU']`
			`pass_raw_len: 20 # 0.001s`

			`middle_out:`
			`region_latent_size: 4`
			`num_peers: 2`
			`residual: true`

			`predictor:`
			`layer_shapes: [2]`
			`activations: ['ReLU']`

			`training:`
			`epochs: 10000`
			`batch_size: 32`
			`num_batches: 1`
			`learning_rate: 0.01`
			`device: cpu`