Paper code basis
This commit is contained in:
commit
09a8df2cff
191
.gitignore
vendored
Normal file
191
.gitignore
vendored
Normal file
@ -0,0 +1,191 @@
|
|||||||
|
outputs/
|
||||||
|
wandb/
|
||||||
|
logs/
|
||||||
|
uv.lock
|
||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
share/python-wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.nox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
*.py,cover
|
||||||
|
.hypothesis/
|
||||||
|
.pytest_cache/
|
||||||
|
cover/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
local_settings.py
|
||||||
|
db.sqlite3
|
||||||
|
db.sqlite3-journal
|
||||||
|
|
||||||
|
# Flask stuff:
|
||||||
|
instance/
|
||||||
|
.webassets-cache
|
||||||
|
|
||||||
|
# Scrapy stuff:
|
||||||
|
.scrapy
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
.pybuilder/
|
||||||
|
target/
|
||||||
|
|
||||||
|
# Jupyter Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# IPython
|
||||||
|
profile_default/
|
||||||
|
ipython_config.py
|
||||||
|
|
||||||
|
# pyenv
|
||||||
|
# For a library or package, you might want to ignore these files since the code is
|
||||||
|
# intended to run in multiple environments; otherwise, check them in:
|
||||||
|
# .python-version
|
||||||
|
|
||||||
|
# pipenv
|
||||||
|
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||||
|
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||||
|
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||||
|
# install all needed dependencies.
|
||||||
|
#Pipfile.lock
|
||||||
|
|
||||||
|
# UV
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
||||||
|
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||||
|
# commonly ignored for libraries.
|
||||||
|
#uv.lock
|
||||||
|
|
||||||
|
# poetry
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||||
|
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||||
|
# commonly ignored for libraries.
|
||||||
|
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||||
|
#poetry.lock
|
||||||
|
|
||||||
|
# pdm
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||||
|
#pdm.lock
|
||||||
|
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||||
|
# in version control.
|
||||||
|
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
||||||
|
.pdm.toml
|
||||||
|
.pdm-python
|
||||||
|
.pdm-build/
|
||||||
|
|
||||||
|
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||||
|
__pypackages__/
|
||||||
|
|
||||||
|
# Celery stuff
|
||||||
|
celerybeat-schedule
|
||||||
|
celerybeat.pid
|
||||||
|
|
||||||
|
# SageMath parsed files
|
||||||
|
*.sage.py
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.venv
|
||||||
|
# env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
# Spyder project settings
|
||||||
|
.spyderproject
|
||||||
|
.spyproject
|
||||||
|
|
||||||
|
# Rope project settings
|
||||||
|
.ropeproject
|
||||||
|
|
||||||
|
# mkdocs documentation
|
||||||
|
/site
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
.dmypy.json
|
||||||
|
dmypy.json
|
||||||
|
|
||||||
|
# Pyre type checker
|
||||||
|
.pyre/
|
||||||
|
|
||||||
|
# pytype static type analyzer
|
||||||
|
.pytype/
|
||||||
|
|
||||||
|
# Cython debug symbols
|
||||||
|
cython_debug/
|
||||||
|
|
||||||
|
# PyCharm
|
||||||
|
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||||
|
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||||
|
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||||
|
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||||
|
#.idea/
|
||||||
|
|
||||||
|
# PyPI configuration file
|
||||||
|
.pypirc
|
||||||
|
multirun
|
||||||
|
*.npz
|
||||||
|
pip_env
|
||||||
|
*.db
|
||||||
|
slurm-*
|
||||||
|
|
||||||
|
runs/
|
||||||
|
slurm_logs/
|
||||||
|
*.png
|
||||||
|
data/
|
||||||
|
.ruff_cache/
|
||||||
|
|
||||||
|
.vscode/
|
||||||
|
models/
|
||||||
|
plots/
|
||||||
|
data.zip
|
||||||
8
config/env/brax.yaml
vendored
Normal file
8
config/env/brax.yaml
vendored
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
type: brax
|
||||||
|
name:
|
||||||
|
max_episode_steps: 1000
|
||||||
|
reward_scaling: 0.1
|
||||||
|
terminate: true
|
||||||
|
|
||||||
|
vmin: 0
|
||||||
|
vmax: 150
|
||||||
8
config/env/humanoid_brax.yaml
vendored
Normal file
8
config/env/humanoid_brax.yaml
vendored
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
type: brax
|
||||||
|
name:
|
||||||
|
max_episode_steps: 1000
|
||||||
|
reward_scaling: 0.1
|
||||||
|
terminate: true
|
||||||
|
|
||||||
|
vmin: 0
|
||||||
|
vmax: 200
|
||||||
3
config/env/isaaclab.yaml
vendored
Normal file
3
config/env/isaaclab.yaml
vendored
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
type: isaaclab
|
||||||
|
name:
|
||||||
|
action_bounds: [-1, 1]
|
||||||
16
config/env/maniskill.yaml
vendored
Normal file
16
config/env/maniskill.yaml
vendored
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
type: maniskill
|
||||||
|
name:
|
||||||
|
reconfiguration_freq: 1
|
||||||
|
partial_reset: true
|
||||||
|
asymmetric_obs: false
|
||||||
|
max_episode_steps: 50
|
||||||
|
stochastic_eval: true
|
||||||
|
has_final_obs: true
|
||||||
|
|
||||||
|
env_kwargs:
|
||||||
|
obs_mode: state
|
||||||
|
render_mode: rgb_array
|
||||||
|
sim_backend: physx_cuda
|
||||||
|
|
||||||
|
vmin: -15
|
||||||
|
vmax: 15
|
||||||
9
config/env/mjx_dmc.yaml
vendored
Normal file
9
config/env/mjx_dmc.yaml
vendored
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
type: mjx
|
||||||
|
name:
|
||||||
|
max_episode_steps: 1000
|
||||||
|
reward_scaling: 1.0
|
||||||
|
terminate: false
|
||||||
|
asymmetric_observation: false
|
||||||
|
|
||||||
|
vmin: 0
|
||||||
|
vmax: 150
|
||||||
10
config/env/mjx_humanoid.yaml
vendored
Normal file
10
config/env/mjx_humanoid.yaml
vendored
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
type: mjx
|
||||||
|
name:
|
||||||
|
max_episode_steps: 1000
|
||||||
|
reward_scaling: 1.0
|
||||||
|
terminate: false
|
||||||
|
push_distractions: false
|
||||||
|
asymmetric_observation: true
|
||||||
|
|
||||||
|
vmin: -10
|
||||||
|
vmax: 10
|
||||||
0
config/experiment_overrides/default.yaml
Normal file
0
config/experiment_overrides/default.yaml
Normal file
5
config/experiment_overrides/maniskill.yaml
Normal file
5
config/experiment_overrides/maniskill.yaml
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
lmbda: 0.95
|
||||||
|
|
||||||
|
num_epochs: 4
|
||||||
|
|
||||||
|
aux_loss_mult: 1.0
|
||||||
5
config/experiment_overrides/mjx_dmc_large_data.yaml
Normal file
5
config/experiment_overrides/mjx_dmc_large_data.yaml
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
num_envs: 1024
|
||||||
|
num_steps: 128
|
||||||
|
num_mini_batches: 64
|
||||||
|
num_epochs: 8
|
||||||
|
kl_bound: 0.1
|
||||||
5
config/experiment_overrides/mjx_dmc_medium_data.yaml
Normal file
5
config/experiment_overrides/mjx_dmc_medium_data.yaml
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
num_envs: 1024
|
||||||
|
num_steps: 64
|
||||||
|
num_mini_batches: 32
|
||||||
|
num_epochs: 8
|
||||||
|
kl_bound: 0.1
|
||||||
5
config/experiment_overrides/mjx_dmc_small_data.yaml
Normal file
5
config/experiment_overrides/mjx_dmc_small_data.yaml
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
num_envs: 1024
|
||||||
|
num_steps: 32
|
||||||
|
num_mini_batches: 16
|
||||||
|
num_epochs: 8
|
||||||
|
kl_bound: 0.1
|
||||||
8
config/experiment_overrides/mjx_humanoid_large_data.yaml
Normal file
8
config/experiment_overrides/mjx_humanoid_large_data.yaml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
gamma: 0.97
|
||||||
|
critic_hidden_dim: 1024
|
||||||
|
|
||||||
|
num_envs: 1024
|
||||||
|
num_steps: 128
|
||||||
|
num_mini_batches: 16
|
||||||
|
num_epochs: 8
|
||||||
|
kl_bound: 0.1
|
||||||
8
config/experiment_overrides/mjx_humanoid_small_data.yaml
Normal file
8
config/experiment_overrides/mjx_humanoid_small_data.yaml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
gamma: 0.97
|
||||||
|
critic_hidden_dim: 1024
|
||||||
|
|
||||||
|
num_envs: 1024
|
||||||
|
num_steps: 32
|
||||||
|
num_mini_batches: 4
|
||||||
|
num_epochs: 8
|
||||||
|
kl_bound: 0.1
|
||||||
7
config/platform/torch.yaml
Normal file
7
config/platform/torch.yaml
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
amp_enabled: false
|
||||||
|
amp_device: "cuda"
|
||||||
|
cuda: true
|
||||||
|
amp_dtype: f32
|
||||||
|
torch_deterministic: false
|
||||||
|
device_rank: 0
|
||||||
|
compile: true
|
||||||
35
config/ppo.yaml
Normal file
35
config/ppo.yaml
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
defaults:
|
||||||
|
- env: brax
|
||||||
|
- experiment_overrides: default
|
||||||
|
- trial_spec: default
|
||||||
|
- _self_
|
||||||
|
|
||||||
|
hyperparameters:
|
||||||
|
lr: 3e-4
|
||||||
|
gamma: 0.99
|
||||||
|
lmbda: 0.95
|
||||||
|
clip_ratio: 0.2
|
||||||
|
value_coef: 0.5
|
||||||
|
entropy_coef: 0.0
|
||||||
|
total_time_steps: 50_000_000
|
||||||
|
num_steps: 64
|
||||||
|
num_mini_batches: 32
|
||||||
|
num_envs: 2048
|
||||||
|
num_epochs: 16
|
||||||
|
max_grad_norm: 0.5
|
||||||
|
normalize_advantages: True
|
||||||
|
normalize_env: True
|
||||||
|
anneal_lr: False
|
||||||
|
num_eval: 20
|
||||||
|
max_episode_steps: 1000
|
||||||
|
name: "ppo"
|
||||||
|
tags: ["ppo_baseline_retuned"]
|
||||||
|
seed: 0
|
||||||
|
num_seeds: 1
|
||||||
|
tune: false
|
||||||
|
checkpoint_dir: null
|
||||||
|
trials: 8
|
||||||
|
wandb:
|
||||||
|
mode: "online" # set to online to activate wandb
|
||||||
|
entity: "viper_svg"
|
||||||
|
project: "online_sac"
|
||||||
89
config/sac.yaml
Normal file
89
config/sac.yaml
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
defaults:
|
||||||
|
- env: brax
|
||||||
|
- experiment_overrides: default
|
||||||
|
- trial_spec: default
|
||||||
|
- platform: torch
|
||||||
|
- _self_
|
||||||
|
|
||||||
|
hyperparameters:
|
||||||
|
# env and run settings (mostly don't touch)
|
||||||
|
total_time_steps: 50_000_000
|
||||||
|
normalize_env: true
|
||||||
|
max_episode_steps: 1000
|
||||||
|
eval_interval: 2
|
||||||
|
num_eval: 20
|
||||||
|
|
||||||
|
# optimization settings (seem very stable)
|
||||||
|
lr: 3e-4
|
||||||
|
anneal_lr: false
|
||||||
|
max_grad_norm: 0.5
|
||||||
|
polyak: 1.0 # maybe ablate ?
|
||||||
|
|
||||||
|
# problem discount settings (need tuning)
|
||||||
|
gamma: 0.99
|
||||||
|
lmbda: 0.95
|
||||||
|
lmbda_min: 0.50 # irrelevant if no exploration noise is added
|
||||||
|
|
||||||
|
# batch settings (need tuning for MJX humanoid)
|
||||||
|
num_steps: 128
|
||||||
|
num_mini_batches: 128
|
||||||
|
num_envs: 1024
|
||||||
|
num_epochs: 4
|
||||||
|
|
||||||
|
# exploration settings (currently not touched)
|
||||||
|
exploration_noise_max: 1.0
|
||||||
|
exploration_noise_min: 1.0
|
||||||
|
exploration_base_envs: 0
|
||||||
|
|
||||||
|
# critic architecture settings (need to be increased for MJX humanoid)
|
||||||
|
critic_hidden_dim: 512
|
||||||
|
actor_hidden_dim: 512
|
||||||
|
vmin: ${env.vmin}
|
||||||
|
vmax: ${env.vmax}
|
||||||
|
num_bins: 151
|
||||||
|
hl_gauss: true
|
||||||
|
use_critic_norm: true
|
||||||
|
num_critic_encoder_layers: 2
|
||||||
|
num_critic_head_layers: 2
|
||||||
|
num_critic_pred_layers: 2
|
||||||
|
use_simplical_embedding: False
|
||||||
|
|
||||||
|
# actor architecture settings (seem stable)
|
||||||
|
use_actor_norm: true
|
||||||
|
num_actor_layers: 3
|
||||||
|
actor_min_std: 0.0
|
||||||
|
|
||||||
|
# actor & critic loss settings (seem remarkably stable)
|
||||||
|
## kl settings
|
||||||
|
kl_start: 0.01
|
||||||
|
kl_bound: 0.1 # switched to tighter bounds for MJX
|
||||||
|
reduce_kl: true
|
||||||
|
reverse_kl: false # previous default "false"
|
||||||
|
update_kl_lagrangian: true
|
||||||
|
actor_kl_clip_mode: "clipped" # "full", "clipped", "kl_relu_clipped", "kl_bound_clipped", "value"
|
||||||
|
## entropy settings
|
||||||
|
ent_start: 0.01
|
||||||
|
ent_target_mult: 0.5
|
||||||
|
update_entropy_lagrangian: true
|
||||||
|
## auxiliary loss settings
|
||||||
|
aux_loss_mult: 1.0
|
||||||
|
|
||||||
|
|
||||||
|
measure_burnin: 3
|
||||||
|
|
||||||
|
|
||||||
|
name: "sac"
|
||||||
|
seed: 0
|
||||||
|
num_seeds: 1
|
||||||
|
tune: false
|
||||||
|
checkpoint_dir: null
|
||||||
|
num_trials: 10
|
||||||
|
tags: ["experimental"]
|
||||||
|
wandb:
|
||||||
|
mode: "online" # set to online to activate wandb
|
||||||
|
entity: "viper_svg"
|
||||||
|
project: "online_sac"
|
||||||
|
|
||||||
|
hydra:
|
||||||
|
job:
|
||||||
|
chdir: True
|
||||||
129
pyproject.toml
Normal file
129
pyproject.toml
Normal file
@ -0,0 +1,129 @@
|
|||||||
|
[project]
|
||||||
|
name = "Relative Entropy Pathwise Policy Optimization"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "Code release for the REPPO paper"
|
||||||
|
readme = "README.md"
|
||||||
|
requires-python = ">=3.12"
|
||||||
|
dependencies = [
|
||||||
|
"black>=25.1.0",
|
||||||
|
"brax>=0.12.1",
|
||||||
|
"distrax>=0.1.5",
|
||||||
|
"flax>=0.10.3",
|
||||||
|
"gymnax>=0.0.8",
|
||||||
|
"hydra-core>=1.3.2",
|
||||||
|
"hydra-submitit-launcher>=1.2.0",
|
||||||
|
"jax-tqdm>=0.3.1",
|
||||||
|
"jax[cuda12]==0.5.3 ; sys_platform == 'linux'",
|
||||||
|
"jax==0.5.3 ; sys_platform == 'darwin'",
|
||||||
|
"mani-skill>=3.0.0b21",
|
||||||
|
"matplotlib>=3.10.0",
|
||||||
|
"optax>=0.2.4",
|
||||||
|
"optuna-integration>=4.3.0",
|
||||||
|
"optuna>=4.3.0",
|
||||||
|
"playground",
|
||||||
|
"plotly>=6.0.1",
|
||||||
|
"rliable>=1.2.0",
|
||||||
|
"tensordict>=0.8.3",
|
||||||
|
"torch>=2.7.1",
|
||||||
|
"tyro>=0.9.25",
|
||||||
|
"sapien>=3.0.0b1",
|
||||||
|
"wandb>=0.20.1",
|
||||||
|
"torchinfo>=1.8.0",
|
||||||
|
"debugpy>=1.8.14",
|
||||||
|
"notebook>=7.4.4",
|
||||||
|
"inflection>=0.5.1",
|
||||||
|
"scalene>=1.5.51",
|
||||||
|
"torchinfo>=1.8.0",
|
||||||
|
"debugpy>=1.8.14",
|
||||||
|
]
|
||||||
|
|
||||||
|
[build-system]
|
||||||
|
requires = ["hatchling"]
|
||||||
|
build-backend = "hatchling.build"
|
||||||
|
|
||||||
|
[tool.hatch.build.targets.wheel]
|
||||||
|
packages = ["onpolicy_sac"]
|
||||||
|
|
||||||
|
[tool.ruff]
|
||||||
|
# Exclude a variety of commonly ignored directories.
|
||||||
|
exclude = [
|
||||||
|
".eggs",
|
||||||
|
".git",
|
||||||
|
".git-rewrite",
|
||||||
|
".hg",
|
||||||
|
".ipynb_checkpoints",
|
||||||
|
".mypy_cache",
|
||||||
|
".nox",
|
||||||
|
".pants.d",
|
||||||
|
".pyenv",
|
||||||
|
".pytest_cache",
|
||||||
|
".pytype",
|
||||||
|
".ruff_cache",
|
||||||
|
".svn",
|
||||||
|
".tox",
|
||||||
|
".venv",
|
||||||
|
".vscode",
|
||||||
|
"__pypackages__",
|
||||||
|
"_build",
|
||||||
|
"buck-out",
|
||||||
|
"build",
|
||||||
|
"dist",
|
||||||
|
"node_modules",
|
||||||
|
"site-packages",
|
||||||
|
"outputs",
|
||||||
|
"multirun",
|
||||||
|
"wandb"
|
||||||
|
]
|
||||||
|
|
||||||
|
# Same as Black.
|
||||||
|
line-length = 88
|
||||||
|
indent-width = 4
|
||||||
|
|
||||||
|
# Assume Python 3.13
|
||||||
|
target-version = "py313"
|
||||||
|
|
||||||
|
[tool.ruff.lint]
|
||||||
|
# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default.
|
||||||
|
# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or
|
||||||
|
# McCabe complexity (`C901`) by default.
|
||||||
|
select = ["E4", "E7", "E9", "F"]
|
||||||
|
# ignores not assigning lambda rule for jax compatibility
|
||||||
|
ignore = ["E731"]
|
||||||
|
|
||||||
|
|
||||||
|
# Allow fix for all enabled rules (when `--fix`) is provided.
|
||||||
|
fixable = ["ALL"]
|
||||||
|
unfixable = []
|
||||||
|
|
||||||
|
# Allow unused variables when underscore-prefixed.
|
||||||
|
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
|
||||||
|
|
||||||
|
[tool.ruff.format]
|
||||||
|
# Like Black, use double quotes for strings.
|
||||||
|
quote-style = "double"
|
||||||
|
|
||||||
|
# Like Black, indent with spaces, rather than tabs.
|
||||||
|
indent-style = "space"
|
||||||
|
|
||||||
|
# Like Black, respect magic trailing commas.
|
||||||
|
skip-magic-trailing-comma = false
|
||||||
|
|
||||||
|
# Like Black, automatically detect the appropriate line ending.
|
||||||
|
line-ending = "auto"
|
||||||
|
|
||||||
|
# Enable auto-formatting of code examples in docstrings. Markdown,
|
||||||
|
# reStructuredText code/literal blocks and doctests are all supported.
|
||||||
|
#
|
||||||
|
# This is currently disabled by default, but it is planned for this
|
||||||
|
# to be opt-out in the future.
|
||||||
|
docstring-code-format = false
|
||||||
|
|
||||||
|
# Set the line length limit used when formatting code snippets in
|
||||||
|
# docstrings.
|
||||||
|
#
|
||||||
|
# This only has an effect when the `docstring-code-format` setting is
|
||||||
|
# enabled.
|
||||||
|
docstring-code-line-length = "dynamic"
|
||||||
|
|
||||||
|
[tool.uv.sources]
|
||||||
|
playground = { git = "https://github.com/younggyoseo/mujoco_playground" }
|
||||||
375
reppo/env_utils/jax_wrappers.py
Normal file
375
reppo/env_utils/jax_wrappers.py
Normal file
@ -0,0 +1,375 @@
|
|||||||
|
from functools import partial
|
||||||
|
from typing import Any, Tuple, Union
|
||||||
|
|
||||||
|
import chex
|
||||||
|
import gymnax
|
||||||
|
import jax
|
||||||
|
import jax.numpy as jnp
|
||||||
|
from brax import envs
|
||||||
|
from brax.envs.wrappers.training import AutoResetWrapper, EpisodeWrapper
|
||||||
|
from flax import struct
|
||||||
|
from gymnax.environments import environment, spaces
|
||||||
|
from gymnax.environments.environment import Environment
|
||||||
|
from gymnax.environments.spaces import Box
|
||||||
|
from ml_collections import ConfigDict
|
||||||
|
from mujoco_playground import MjxEnv, registry
|
||||||
|
from mujoco_playground._src.wrapper import wrap_for_brax_training, Wrapper
|
||||||
|
|
||||||
|
|
||||||
|
class MjxGymnaxWrapper(Environment):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
env_or_name: str | MjxEnv,
|
||||||
|
episode_length: int = 1000,
|
||||||
|
action_repeat: int = 1,
|
||||||
|
reward_scale: float = 1.0,
|
||||||
|
push_distractions: bool = False,
|
||||||
|
config: dict = None,
|
||||||
|
asymmetric_observation: bool = False,
|
||||||
|
):
|
||||||
|
if isinstance(env_or_name, str):
|
||||||
|
if config is None:
|
||||||
|
config = registry.get_default_config(env_or_name)
|
||||||
|
is_humanoid_task = env_or_name in [
|
||||||
|
"G1JoystickRoughTerrain",
|
||||||
|
"G1JoystickFlatTerrain",
|
||||||
|
"T1JoystickRoughTerrain",
|
||||||
|
"T1JoystickFlatTerrain",
|
||||||
|
]
|
||||||
|
if is_humanoid_task:
|
||||||
|
config.push_config.enable = push_distractions
|
||||||
|
else:
|
||||||
|
config = ConfigDict(config)
|
||||||
|
env = registry.load(env_or_name, config=config)
|
||||||
|
if episode_length is not None:
|
||||||
|
env = wrap_for_brax_training(
|
||||||
|
env, episode_length=episode_length, action_repeat=action_repeat
|
||||||
|
)
|
||||||
|
self.env = env
|
||||||
|
else:
|
||||||
|
self.env = env_or_name
|
||||||
|
self.reward_scale = reward_scale
|
||||||
|
if isinstance(self.env.observation_size, int):
|
||||||
|
self.dict_obs = False
|
||||||
|
else:
|
||||||
|
self.dict_obs = True
|
||||||
|
if asymmetric_observation:
|
||||||
|
self.dict_obs_key = "privileged_state"
|
||||||
|
else:
|
||||||
|
self.dict_obs_key = "state"
|
||||||
|
print(self.dict_obs_key)
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
def action_space(self, params):
|
||||||
|
return gymnax.environments.spaces.Box(
|
||||||
|
low=-1.0,
|
||||||
|
high=1.0,
|
||||||
|
shape=(self.env.action_size,),
|
||||||
|
)
|
||||||
|
|
||||||
|
def observation_space(self, params):
|
||||||
|
if self.dict_obs:
|
||||||
|
return Box(
|
||||||
|
low=-float("inf"),
|
||||||
|
high=float("inf"),
|
||||||
|
shape=self.env.observation_size["state"],
|
||||||
|
), Box(
|
||||||
|
low=-float("inf"),
|
||||||
|
high=float("inf"),
|
||||||
|
shape=self.env.observation_size[self.dict_obs_key],
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
return Box(
|
||||||
|
low=-float("inf"),
|
||||||
|
high=float("inf"),
|
||||||
|
shape=(self.env.observation_size,),
|
||||||
|
), Box(
|
||||||
|
low=-float("inf"),
|
||||||
|
high=float("inf"),
|
||||||
|
shape=(self.env.observation_size,),
|
||||||
|
)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def default_params(self) -> gymnax.EnvParams:
|
||||||
|
return gymnax.EnvParams()
|
||||||
|
|
||||||
|
def reset(self, key):
|
||||||
|
state = self.env.reset(key)
|
||||||
|
# state.info["truncation"] = 0.0
|
||||||
|
obs = state.obs if not self.dict_obs else state.obs["state"]
|
||||||
|
critic_obs = state.obs if not self.dict_obs else state.obs[self.dict_obs_key]
|
||||||
|
return obs, critic_obs, state
|
||||||
|
|
||||||
|
def step(self, key, state, action):
|
||||||
|
# action = jnp.nan_to_num(action, 0.0)
|
||||||
|
state = self.env.step(state, action)
|
||||||
|
obs = state.obs if not self.dict_obs else state.obs["state"]
|
||||||
|
critic_obs = state.obs if not self.dict_obs else state.obs[self.dict_obs_key]
|
||||||
|
return (
|
||||||
|
obs,
|
||||||
|
critic_obs,
|
||||||
|
state,
|
||||||
|
state.reward * self.reward_scale,
|
||||||
|
state.done > 0.5,
|
||||||
|
{},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@struct.dataclass
|
||||||
|
class LogEnvState:
|
||||||
|
env_state: environment.EnvState
|
||||||
|
episode_returns: jnp.ndarray
|
||||||
|
episode_lengths: jnp.ndarray
|
||||||
|
returned_episode_returns: jnp.ndarray
|
||||||
|
returned_episode_lengths: jnp.ndarray
|
||||||
|
timestep: jnp.ndarray
|
||||||
|
truncated: jnp.ndarray
|
||||||
|
info: Any = None
|
||||||
|
|
||||||
|
def unwrapped(self):
|
||||||
|
return self.env_state
|
||||||
|
|
||||||
|
def set_env_state(self, env_state):
|
||||||
|
return self.replace(env_state=env_state)
|
||||||
|
|
||||||
|
|
||||||
|
class LogWrapper(Wrapper):
|
||||||
|
"""Log the episode returns and lengths."""
|
||||||
|
|
||||||
|
def __init__(self, env: environment.Environment, num_envs: int):
|
||||||
|
super().__init__(env)
|
||||||
|
self.num_envs = num_envs
|
||||||
|
|
||||||
|
@partial(jax.jit, static_argnums=(0,))
|
||||||
|
def reset(self, key) -> Tuple[chex.Array, environment.EnvState]:
|
||||||
|
obs, critic_obs, env_state = self.env.reset(key)
|
||||||
|
state = LogEnvState(
|
||||||
|
env_state=env_state,
|
||||||
|
episode_returns=jnp.zeros((self.num_envs,)),
|
||||||
|
episode_lengths=jnp.zeros((self.num_envs,), dtype=jnp.int32),
|
||||||
|
returned_episode_returns=jnp.zeros((self.num_envs,)),
|
||||||
|
returned_episode_lengths=jnp.zeros((self.num_envs,), dtype=jnp.int32),
|
||||||
|
timestep=jnp.zeros((self.num_envs,), dtype=jnp.int32),
|
||||||
|
truncated=jnp.ones((self.num_envs,), dtype=jnp.float32),
|
||||||
|
info={
|
||||||
|
"returned_episode": jnp.zeros((self.num_envs,), dtype=jnp.bool_),
|
||||||
|
"returned_episode_returns": jnp.zeros((self.num_envs,)),
|
||||||
|
"timestep": jnp.zeros((self.num_envs,), dtype=jnp.int32),
|
||||||
|
"returned_episode_lengths": jnp.zeros(
|
||||||
|
(self.num_envs,), dtype=jnp.int32
|
||||||
|
),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return obs, critic_obs, state
|
||||||
|
|
||||||
|
@partial(jax.jit, static_argnums=(0,))
|
||||||
|
def step(
|
||||||
|
self,
|
||||||
|
key: chex.PRNGKey,
|
||||||
|
state: environment.EnvState,
|
||||||
|
action: Union[int, float],
|
||||||
|
) -> Tuple[chex.Array, environment.EnvState, float, bool, dict]:
|
||||||
|
obs, critic_obs, env_state, reward, done, info = self.env.step(
|
||||||
|
key, state.env_state, action
|
||||||
|
)
|
||||||
|
new_episode_return = state.episode_returns + reward
|
||||||
|
new_episode_length = state.episode_lengths + 1
|
||||||
|
info["returned_episode_returns"] = (
|
||||||
|
state.returned_episode_returns * (1 - done) + new_episode_return * done
|
||||||
|
)
|
||||||
|
info["returned_episode_lengths"] = (
|
||||||
|
state.returned_episode_lengths * (1 - done) + new_episode_length * done
|
||||||
|
)
|
||||||
|
info["timestep"] = state.timestep
|
||||||
|
info["returned_episode"] = done
|
||||||
|
state = LogEnvState(
|
||||||
|
env_state=env_state,
|
||||||
|
episode_returns=new_episode_return * (1 - done),
|
||||||
|
episode_lengths=new_episode_length * (1 - done),
|
||||||
|
returned_episode_returns=state.returned_episode_returns * (1 - done)
|
||||||
|
+ new_episode_return * done,
|
||||||
|
returned_episode_lengths=state.returned_episode_lengths * (1 - done)
|
||||||
|
+ new_episode_length * done,
|
||||||
|
timestep=state.timestep + 1,
|
||||||
|
truncated=env_state.info["truncation"],
|
||||||
|
info=info,
|
||||||
|
)
|
||||||
|
return obs, critic_obs, state, reward, done, info
|
||||||
|
|
||||||
|
|
||||||
|
class BraxGymnaxWrapper:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
env_name,
|
||||||
|
backend="generalized",
|
||||||
|
episode_length=1000,
|
||||||
|
reward_scaling=1.0,
|
||||||
|
terminate=True,
|
||||||
|
):
|
||||||
|
env = envs.get_environment(
|
||||||
|
env_name=env_name, backend=backend, terminate_when_unhealthy=terminate
|
||||||
|
)
|
||||||
|
env = EpisodeWrapper(env, episode_length=episode_length, action_repeat=1)
|
||||||
|
env = AutoResetWrapper(env)
|
||||||
|
self.env = env
|
||||||
|
self.action_size = self.env.action_size
|
||||||
|
self.observation_size = (self.env.observation_size,)
|
||||||
|
self.default_params = ()
|
||||||
|
self.reward_scaling = reward_scaling
|
||||||
|
|
||||||
|
def reset(self, key):
|
||||||
|
state = self.env.reset(key)
|
||||||
|
return state.obs, state
|
||||||
|
|
||||||
|
def step(self, key, state, action):
|
||||||
|
next_state = self.env.step(state, action)
|
||||||
|
return (
|
||||||
|
next_state.obs,
|
||||||
|
next_state.obs,
|
||||||
|
next_state,
|
||||||
|
next_state.reward * self.reward_scaling,
|
||||||
|
next_state.done > 0.5,
|
||||||
|
{},
|
||||||
|
)
|
||||||
|
|
||||||
|
def observation_space(self):
|
||||||
|
return spaces.Box(
|
||||||
|
low=-jnp.inf,
|
||||||
|
high=jnp.inf,
|
||||||
|
shape=(self.env.observation_size,),
|
||||||
|
), spaces.Box(
|
||||||
|
low=-jnp.inf,
|
||||||
|
high=jnp.inf,
|
||||||
|
shape=(self.env.observation_size,),
|
||||||
|
)
|
||||||
|
|
||||||
|
def action_space(self):
|
||||||
|
return spaces.Box(
|
||||||
|
low=-1.0,
|
||||||
|
high=1.0,
|
||||||
|
shape=(self.env.action_size,),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ClipAction(Wrapper):
|
||||||
|
def __init__(self, env, low=-0.999, high=0.999):
|
||||||
|
super().__init__(env)
|
||||||
|
self.low = low
|
||||||
|
self.high = high
|
||||||
|
|
||||||
|
def step(self, key, state, action):
|
||||||
|
"""TODO: In theory the below line should be the way to do this."""
|
||||||
|
# action = jnp.clip(action, self.env.action_space.low, self.env.action_space.high)
|
||||||
|
action = jnp.clip(action, self.low, self.high)
|
||||||
|
return self.env.step(key, state, action)
|
||||||
|
|
||||||
|
|
||||||
|
@struct.dataclass
|
||||||
|
class NormalizeVecObsEnvState:
|
||||||
|
mean: jnp.ndarray
|
||||||
|
var: jnp.ndarray
|
||||||
|
critic_mean: jnp.ndarray
|
||||||
|
critic_var: jnp.ndarray
|
||||||
|
count: float
|
||||||
|
env_state: environment.EnvState
|
||||||
|
truncated: float
|
||||||
|
info: Any = None
|
||||||
|
|
||||||
|
def unwrapped(self):
|
||||||
|
return self.env_state.unwrapped()
|
||||||
|
|
||||||
|
def set_env_state(self, env_state):
|
||||||
|
return self.replace(env_state=self.env_state.set_env_state(env_state))
|
||||||
|
|
||||||
|
|
||||||
|
class NormalizeVec(Wrapper):
|
||||||
|
def __init__(self, env):
|
||||||
|
super().__init__(env)
|
||||||
|
|
||||||
|
def _init_state(self, key):
|
||||||
|
obs, critic_obs, env_state = self.env.reset(key)
|
||||||
|
return NormalizeVecObsEnvState(
|
||||||
|
mean=jnp.mean(obs, axis=0),
|
||||||
|
var=jnp.var(obs, axis=0),
|
||||||
|
critic_mean=jnp.mean(critic_obs, axis=0),
|
||||||
|
critic_var=jnp.var(critic_obs, axis=0),
|
||||||
|
count=obs.shape[0],
|
||||||
|
env_state=env_state,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _compute_stats(self, mean, var, count, obs):
|
||||||
|
batch_mean = jnp.mean(obs, axis=0)
|
||||||
|
batch_var = jnp.var(obs, axis=0)
|
||||||
|
batch_count = obs.shape[0]
|
||||||
|
|
||||||
|
delta = batch_mean - mean
|
||||||
|
tot_count = count + batch_count
|
||||||
|
|
||||||
|
new_mean = mean + delta * batch_count / tot_count
|
||||||
|
m_a = var * count
|
||||||
|
m_b = batch_var * batch_count
|
||||||
|
M2 = m_a + m_b + jnp.square(delta) * count * batch_count / tot_count
|
||||||
|
new_var = M2 / tot_count
|
||||||
|
|
||||||
|
return new_mean, new_var
|
||||||
|
|
||||||
|
def reset(self, key, params=None):
|
||||||
|
obs, critic_obs, env_state = self.env.reset(key)
|
||||||
|
if params is not None:
|
||||||
|
mean = params.mean
|
||||||
|
var = params.var
|
||||||
|
critic_mean = params.critic_mean
|
||||||
|
critic_var = params.critic_var
|
||||||
|
count = params.count
|
||||||
|
else:
|
||||||
|
mean = jnp.mean(obs, axis=0)
|
||||||
|
var = jnp.var(obs, axis=0)
|
||||||
|
critic_mean = jnp.mean(critic_obs, axis=0)
|
||||||
|
critic_var = jnp.var(critic_obs, axis=0)
|
||||||
|
count = obs.shape[0]
|
||||||
|
state = NormalizeVecObsEnvState(
|
||||||
|
mean=mean,
|
||||||
|
var=var,
|
||||||
|
critic_mean=critic_mean,
|
||||||
|
critic_var=critic_var,
|
||||||
|
count=count,
|
||||||
|
env_state=env_state,
|
||||||
|
truncated=env_state.truncated,
|
||||||
|
info=env_state.info,
|
||||||
|
)
|
||||||
|
return (
|
||||||
|
(obs - state.mean) / jnp.sqrt(state.var + 1e-2),
|
||||||
|
(critic_obs - state.critic_mean) / jnp.sqrt(state.critic_var + 1e-2),
|
||||||
|
state,
|
||||||
|
)
|
||||||
|
|
||||||
|
def step(self, key, state, action):
|
||||||
|
obs, critic_obs, env_state, reward, done, info = self.env.step(
|
||||||
|
key, state.env_state, action
|
||||||
|
)
|
||||||
|
|
||||||
|
new_mean, new_var = self._compute_stats(state.mean, state.var, state.count, obs)
|
||||||
|
new_critic_mean, new_critic_var = self._compute_stats(
|
||||||
|
state.critic_mean, state.critic_var, state.count, critic_obs
|
||||||
|
)
|
||||||
|
|
||||||
|
new_count = state.count + obs.shape[0]
|
||||||
|
|
||||||
|
state = NormalizeVecObsEnvState(
|
||||||
|
mean=new_mean,
|
||||||
|
var=new_var,
|
||||||
|
critic_mean=new_critic_mean,
|
||||||
|
critic_var=new_critic_var,
|
||||||
|
count=new_count,
|
||||||
|
env_state=env_state,
|
||||||
|
truncated=env_state.truncated,
|
||||||
|
info=env_state.info,
|
||||||
|
)
|
||||||
|
return (
|
||||||
|
(obs - state.mean) / jnp.sqrt(state.var + 1e-2),
|
||||||
|
(critic_obs - state.critic_mean) / jnp.sqrt(state.critic_var + 1e-2),
|
||||||
|
state,
|
||||||
|
reward,
|
||||||
|
done,
|
||||||
|
info,
|
||||||
|
)
|
||||||
124
reppo/env_utils/torch_wrappers/humanoid_bench_env.py
Normal file
124
reppo/env_utils/torch_wrappers/humanoid_bench_env.py
Normal file
@ -0,0 +1,124 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import gymnasium as gym
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
from gymnasium.wrappers import TimeLimit
|
||||||
|
from loguru import logger as log
|
||||||
|
from stable_baselines3.common.vec_env import SubprocVecEnv
|
||||||
|
|
||||||
|
# Disable all logging below CRITICAL level
|
||||||
|
log.remove()
|
||||||
|
log.add(lambda msg: False, level="CRITICAL")
|
||||||
|
|
||||||
|
|
||||||
|
def make_env(env_name, rank, render_mode=None, seed=0):
|
||||||
|
"""
|
||||||
|
Utility function for multiprocessed env.
|
||||||
|
|
||||||
|
:param rank: (int) index of the subprocess
|
||||||
|
:param seed: (int) the inital seed for RNG
|
||||||
|
"""
|
||||||
|
|
||||||
|
if env_name in [
|
||||||
|
"h1hand-push-v0",
|
||||||
|
"h1-push-v0",
|
||||||
|
"h1hand-cube-v0",
|
||||||
|
"h1cube-v0",
|
||||||
|
"h1hand-basketball-v0",
|
||||||
|
"h1-basketball-v0",
|
||||||
|
"h1hand-kitchen-v0",
|
||||||
|
"h1-kitchen-v0",
|
||||||
|
]:
|
||||||
|
max_episode_steps = 500
|
||||||
|
else:
|
||||||
|
max_episode_steps = 1000
|
||||||
|
|
||||||
|
def _init():
|
||||||
|
env = gym.make(env_name, render_mode=render_mode)
|
||||||
|
env = TimeLimit(env, max_episode_steps=max_episode_steps)
|
||||||
|
env.unwrapped.seed(seed + rank)
|
||||||
|
|
||||||
|
return env
|
||||||
|
|
||||||
|
return _init
|
||||||
|
|
||||||
|
|
||||||
|
class HumanoidBenchEnv:
|
||||||
|
"""Wraps HumanoidBench environment to support parallel environments."""
|
||||||
|
|
||||||
|
def __init__(self, env_name, num_envs=1, render_mode=None, device=None):
|
||||||
|
# NOTE: HumanoidBench action space is already normalized to [-1, 1]
|
||||||
|
device = device or torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||||
|
self.sim_device = device
|
||||||
|
self.num_envs = num_envs
|
||||||
|
|
||||||
|
# Create the base environment
|
||||||
|
self.envs = SubprocVecEnv(
|
||||||
|
[make_env(env_name, i, render_mode=render_mode) for i in range(num_envs)]
|
||||||
|
)
|
||||||
|
|
||||||
|
if env_name in [
|
||||||
|
"h1hand-push-v0",
|
||||||
|
"h1-push-v0",
|
||||||
|
"h1hand-cube-v0",
|
||||||
|
"h1cube-v0",
|
||||||
|
"h1hand-basketball-v0",
|
||||||
|
"h1-basketball-v0",
|
||||||
|
"h1hand-kitchen-v0",
|
||||||
|
"h1-kitchen-v0",
|
||||||
|
]:
|
||||||
|
self.max_episode_steps = 500
|
||||||
|
else:
|
||||||
|
self.max_episode_steps = 1000
|
||||||
|
|
||||||
|
# For compatibility with MuJoCo Playground
|
||||||
|
self.asymmetric_obs = False # For comptatibility with MuJoCo Playground
|
||||||
|
self.num_obs = self.envs.observation_space.shape[-1]
|
||||||
|
self.num_actions = self.envs.action_space.shape[-1]
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
"""Reset the environment."""
|
||||||
|
observations = self.envs.reset()
|
||||||
|
observations = torch.from_numpy(observations).to(
|
||||||
|
device=self.sim_device, dtype=torch.float
|
||||||
|
)
|
||||||
|
return observations
|
||||||
|
|
||||||
|
def render(self):
|
||||||
|
assert self.num_envs == 1, (
|
||||||
|
"Currently only supports single environment rendering"
|
||||||
|
)
|
||||||
|
return self.envs.render()
|
||||||
|
|
||||||
|
def step(self, actions):
|
||||||
|
assert isinstance(actions, torch.Tensor)
|
||||||
|
actions = actions.cpu().numpy()
|
||||||
|
|
||||||
|
observations, rewards, dones, raw_infos = self.envs.step(actions)
|
||||||
|
|
||||||
|
# This will be used for getting 'true' next observations
|
||||||
|
infos = dict()
|
||||||
|
infos["observations"] = {"raw": {"obs": observations.copy()}}
|
||||||
|
truncateds = np.zeros_like(dones)
|
||||||
|
for i in range(self.num_envs):
|
||||||
|
if raw_infos[i].get("TimeLimit.truncated", False):
|
||||||
|
truncateds[i] = True
|
||||||
|
infos["observations"]["raw"]["obs"][i] = raw_infos[i][
|
||||||
|
"terminal_observation"
|
||||||
|
]
|
||||||
|
|
||||||
|
observations = torch.from_numpy(observations).to(
|
||||||
|
device=self.sim_device, dtype=torch.float
|
||||||
|
)
|
||||||
|
rewards = torch.from_numpy(rewards).to(
|
||||||
|
device=self.sim_device, dtype=torch.float
|
||||||
|
)
|
||||||
|
dones = torch.from_numpy(dones).to(device=self.sim_device)
|
||||||
|
truncateds = torch.from_numpy(truncateds).to(device=self.sim_device)
|
||||||
|
infos["observations"]["raw"]["obs"] = torch.from_numpy(
|
||||||
|
infos["observations"]["raw"]["obs"]
|
||||||
|
).to(device=self.sim_device, dtype=torch.float)
|
||||||
|
infos["time_outs"] = truncateds
|
||||||
|
|
||||||
|
return observations, rewards, dones, infos
|
||||||
81
reppo/env_utils/torch_wrappers/isaaclab_env.py
Normal file
81
reppo/env_utils/torch_wrappers/isaaclab_env.py
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import gymnasium as gym
|
||||||
|
import torch
|
||||||
|
from isaaclab.app import AppLauncher
|
||||||
|
|
||||||
|
app_launcher = AppLauncher(headless=True)
|
||||||
|
simulation_app = app_launcher.app
|
||||||
|
|
||||||
|
from isaaclab_tasks.utils.parse_cfg import parse_env_cfg
|
||||||
|
|
||||||
|
|
||||||
|
class IsaacLabEnv:
|
||||||
|
"""Wrapper for IsaacLab environments to be compatible with MuJoCo Playground"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
task_name: str,
|
||||||
|
device: str,
|
||||||
|
num_envs: int,
|
||||||
|
seed: int,
|
||||||
|
action_bounds: Optional[float] = None,
|
||||||
|
):
|
||||||
|
env_cfg = parse_env_cfg(
|
||||||
|
task_name,
|
||||||
|
device=device,
|
||||||
|
num_envs=num_envs,
|
||||||
|
)
|
||||||
|
env_cfg.seed = seed
|
||||||
|
self.seed = seed
|
||||||
|
self.envs = gym.make(task_name, cfg=env_cfg, render_mode=None)
|
||||||
|
|
||||||
|
self.num_envs = self.envs.unwrapped.num_envs
|
||||||
|
self.max_episode_steps = self.envs.unwrapped.max_episode_length
|
||||||
|
self.action_bounds = action_bounds
|
||||||
|
self.num_obs = self.envs.unwrapped.single_observation_space["policy"].shape[0]
|
||||||
|
self.asymmetric_obs = "critic" in self.envs.unwrapped.single_observation_space
|
||||||
|
if self.asymmetric_obs:
|
||||||
|
self.num_privileged_obs = self.envs.unwrapped.single_observation_space[
|
||||||
|
"critic"
|
||||||
|
].shape[0]
|
||||||
|
else:
|
||||||
|
self.num_privileged_obs = 0
|
||||||
|
self.num_actions = self.envs.unwrapped.single_action_space.shape[0]
|
||||||
|
|
||||||
|
def reset(self, random_start_init: bool = True) -> torch.Tensor:
|
||||||
|
obs_dict, _ = self.envs.reset()
|
||||||
|
# NOTE: decorrelate episode horizons like RSL‑RL
|
||||||
|
if random_start_init:
|
||||||
|
self.envs.unwrapped.episode_length_buf = torch.randint_like(
|
||||||
|
self.envs.unwrapped.episode_length_buf, high=int(self.max_episode_steps)
|
||||||
|
)
|
||||||
|
return obs_dict["policy"]
|
||||||
|
|
||||||
|
def reset_with_critic_obs(self) -> tuple[torch.Tensor, torch.Tensor]:
|
||||||
|
obs_dict, _ = self.envs.reset()
|
||||||
|
return obs_dict["policy"], obs_dict["critic"]
|
||||||
|
|
||||||
|
def step(
|
||||||
|
self, actions: torch.Tensor
|
||||||
|
) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict]:
|
||||||
|
if self.action_bounds is not None:
|
||||||
|
actions = torch.clamp(actions, -1.0, 1.0) * self.action_bounds
|
||||||
|
obs_dict, rew, terminations, truncations, infos = self.envs.step(actions)
|
||||||
|
dones = (terminations | truncations).to(dtype=torch.long)
|
||||||
|
obs = obs_dict["policy"]
|
||||||
|
critic_obs = obs_dict["critic"] if self.asymmetric_obs else None
|
||||||
|
info_ret = {"time_outs": truncations, "observations": {"critic": critic_obs}}
|
||||||
|
# NOTE: There's really no way to get the raw observations from IsaacLab
|
||||||
|
# We just use the 'reset_obs' as next_obs, unfortunately.
|
||||||
|
# See https://github.com/isaac-sim/IsaacLab/issues/1362
|
||||||
|
info_ret["observations"]["raw"] = {
|
||||||
|
"obs": obs,
|
||||||
|
"critic_obs": critic_obs,
|
||||||
|
}
|
||||||
|
return obs, rew, dones, info_ret
|
||||||
|
|
||||||
|
def render(self):
|
||||||
|
raise NotImplementedError(
|
||||||
|
"We don't support rendering for IsaacLab environments"
|
||||||
|
)
|
||||||
89
reppo/env_utils/torch_wrappers/maniskill_wrapper.py
Normal file
89
reppo/env_utils/torch_wrappers/maniskill_wrapper.py
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
from gymnasium import Wrapper
|
||||||
|
import torch
|
||||||
|
|
||||||
|
|
||||||
|
class ManiSkillWrapper(Wrapper):
|
||||||
|
"""
|
||||||
|
A wrapper for ManiSkill environments to ensure compatibility with the expected API.
|
||||||
|
This wrapper is used to handle the ManiSkill environments in a way that is consistent
|
||||||
|
with the other environments in the codebase.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, env, max_episode_steps: int, partial_reset, device: str):
|
||||||
|
super().__init__(env)
|
||||||
|
self.action_space = env.action_space
|
||||||
|
self.observation_space = env.observation_space
|
||||||
|
self.metadata = env.metadata
|
||||||
|
self.asymmetric_obs = False
|
||||||
|
self.max_episode_steps = max_episode_steps
|
||||||
|
|
||||||
|
self.partial_reset = partial_reset
|
||||||
|
|
||||||
|
self.returns = torch.zeros(env.num_envs, dtype=torch.float32, device=device)
|
||||||
|
self.episode_len = torch.zeros(env.num_envs, dtype=torch.float32, device=device)
|
||||||
|
self.success = torch.zeros(env.num_envs, dtype=torch.float32, device=device)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def unwrapped(self):
|
||||||
|
"""
|
||||||
|
Returns the underlying environment.
|
||||||
|
"""
|
||||||
|
return self.env
|
||||||
|
|
||||||
|
@property
|
||||||
|
def num_actions(self):
|
||||||
|
"""
|
||||||
|
Returns the number of actions in the action space.
|
||||||
|
"""
|
||||||
|
return self.action_space.shape[1]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def num_obs(self):
|
||||||
|
"""
|
||||||
|
Returns the number of observations in the observation space.
|
||||||
|
"""
|
||||||
|
return self.observation_space.shape[1]
|
||||||
|
|
||||||
|
def reset(self, seed=None, options=dict()):
|
||||||
|
"""
|
||||||
|
Resets the environment and returns the initial observation.
|
||||||
|
"""
|
||||||
|
return self.env.reset(seed=seed, options=options)
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
"""
|
||||||
|
Takes a step in the environment with the given action.
|
||||||
|
Returns the next observation, reward, done, and info.
|
||||||
|
"""
|
||||||
|
obs, reward, terminated, truncated, info = self.env.step(action)
|
||||||
|
if "final_info" in info:
|
||||||
|
self.returns = (
|
||||||
|
info["final_info"]["episode"]["return"] * info["_final_info"].float()
|
||||||
|
+ (1.0 - info["_final_info"].float()) * self.returns
|
||||||
|
)
|
||||||
|
self.episode_len = (
|
||||||
|
info["final_info"]["episode"]["episode_len"]
|
||||||
|
* info["_final_info"].float()
|
||||||
|
+ (1.0 - info["_final_info"].float()) * self.episode_len
|
||||||
|
)
|
||||||
|
self.success = (
|
||||||
|
info["final_info"]["episode"]["success_once"]
|
||||||
|
* info["_final_info"].float()
|
||||||
|
+ (1.0 - info["_final_info"].float()) * self.success
|
||||||
|
)
|
||||||
|
info["log_info"] = {
|
||||||
|
"return": self.returns,
|
||||||
|
"episode_len": self.episode_len,
|
||||||
|
"success": self.success,
|
||||||
|
}
|
||||||
|
if self.partial_reset:
|
||||||
|
# maniskill continues bootstrap on terminated, which playground does on truncated.
|
||||||
|
# This unifies the interfaces in a very hacky way
|
||||||
|
done = torch.zeros_like(
|
||||||
|
terminated, dtype=torch.bool, device=terminated.device
|
||||||
|
)
|
||||||
|
truncated = torch.logical_or(terminated, truncated)
|
||||||
|
else:
|
||||||
|
done = torch.logical_or(terminated, truncated)
|
||||||
|
truncated = torch.zeros_like(done, dtype=torch.bool, device=done.device)
|
||||||
|
return obs, reward, done, truncated, info
|
||||||
148
reppo/env_utils/torch_wrappers/mtbench_env.py
Normal file
148
reppo/env_utils/torch_wrappers/mtbench_env.py
Normal file
@ -0,0 +1,148 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import isaacgymenvs
|
||||||
|
import torch
|
||||||
|
from omegaconf import OmegaConf
|
||||||
|
|
||||||
|
|
||||||
|
class MTBenchEnv:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
task_name: str,
|
||||||
|
device_id: int,
|
||||||
|
num_envs: int,
|
||||||
|
seed: int,
|
||||||
|
):
|
||||||
|
# NOTE: Currently, we only support Meta-World-v2 MT-10/MT-50 in MTBench
|
||||||
|
task_config = MTBENCH_MW2_CONFIG.copy()
|
||||||
|
if task_name == "meta-world-v2-mt10":
|
||||||
|
# MT-10 Setup
|
||||||
|
assert num_envs == 4096, "MT-10 only supports 4096 environments (for now)"
|
||||||
|
self.num_tasks = 10
|
||||||
|
task_config["env"]["tasks"] = [4, 16, 17, 18, 28, 31, 38, 40, 48, 49]
|
||||||
|
task_config["env"]["taskEnvCount"] = [410] * 6 + [409] * 4
|
||||||
|
elif task_name == "meta-world-v2-mt50":
|
||||||
|
# MT-50 Setup
|
||||||
|
self.num_tasks = 50
|
||||||
|
assert num_envs == 8192, "MT-50 only supports 8192 environments (for now)"
|
||||||
|
task_config["env"]["tasks"] = list(range(50))
|
||||||
|
task_config["env"]["taskEnvCount"] = [164] * 42 + [163] * 8 # 6888 + 1304
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unsupported task name: {task_name}")
|
||||||
|
task_config["env"]["numEnvs"] = num_envs
|
||||||
|
task_config["env"]["numObservations"] = 39 + self.num_tasks
|
||||||
|
task_config["env"]["seed"] = seed
|
||||||
|
|
||||||
|
# Convert dictionary to OmegaConf object
|
||||||
|
env_cfg = {"task": task_config}
|
||||||
|
env_cfg = OmegaConf.create(env_cfg)
|
||||||
|
|
||||||
|
self.env = isaacgymenvs.make(
|
||||||
|
task=env_cfg.task.name,
|
||||||
|
num_envs=num_envs,
|
||||||
|
sim_device=f"cuda:{device_id}",
|
||||||
|
rl_device=f"cuda:{device_id}",
|
||||||
|
seed=seed,
|
||||||
|
headless=True,
|
||||||
|
cfg=env_cfg,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.num_envs = num_envs
|
||||||
|
self.asymmetric_obs = False
|
||||||
|
self.num_obs = self.env.observation_space.shape[0]
|
||||||
|
assert self.num_obs == 39 + self.num_tasks, (
|
||||||
|
"MTBench observation space is 39 + num_tasks (one-hot vector)"
|
||||||
|
)
|
||||||
|
self.num_privileged_obs = 0
|
||||||
|
self.num_actions = self.env.action_space.shape[0]
|
||||||
|
self.max_episode_steps = self.env.max_episode_length
|
||||||
|
|
||||||
|
def reset(self) -> torch.Tensor:
|
||||||
|
"""Reset the environment."""
|
||||||
|
# TODO: Check if we need no_grad and detach here
|
||||||
|
with torch.no_grad(): # do we need this?
|
||||||
|
self.env.reset_idx(torch.arange(self.num_envs, device=self.env.device))
|
||||||
|
self.env.cumulatives["rewards"][:] = 0
|
||||||
|
self.env.cumulatives["success"][:] = 0
|
||||||
|
obs_dict = self.env.reset()
|
||||||
|
return obs_dict["obs"].detach()
|
||||||
|
|
||||||
|
def step(
|
||||||
|
self, actions: torch.Tensor
|
||||||
|
) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict]:
|
||||||
|
"""Step the environment."""
|
||||||
|
assert isinstance(actions, torch.Tensor)
|
||||||
|
|
||||||
|
# TODO: Check if we need no_grad and detach here
|
||||||
|
with torch.no_grad():
|
||||||
|
obs_dict, rew, dones, infos = self.env.step(actions.detach())
|
||||||
|
truncations = infos["time_outs"]
|
||||||
|
info_ret = {"time_outs": truncations.detach()}
|
||||||
|
if "episode" in infos:
|
||||||
|
info_ret["episode"] = infos["episode"]
|
||||||
|
# NOTE: There's really no way to get the raw observations from IsaacGym
|
||||||
|
# We just use the 'reset_obs' as next_obs, unfortunately.
|
||||||
|
info_ret["observations"] = {"raw": {"obs": obs_dict["obs"].detach()}}
|
||||||
|
return obs_dict["obs"].detach(), rew.detach(), dones.detach(), info_ret
|
||||||
|
|
||||||
|
def render(self):
|
||||||
|
raise NotImplementedError(
|
||||||
|
"We don't support rendering for IsaacLab environments"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
MTBENCH_MW2_CONFIG = {
|
||||||
|
"name": "meta-world-v2",
|
||||||
|
"physics_engine": "physx",
|
||||||
|
"env": {
|
||||||
|
"numEnvs": 1,
|
||||||
|
"envSpacing": 1.5,
|
||||||
|
"episodeLength": 150,
|
||||||
|
"enableDebugVis": False,
|
||||||
|
"clipObservations": 5.0,
|
||||||
|
"clipActions": 1.0,
|
||||||
|
"aggregateMode": 3,
|
||||||
|
"actionScale": 0.01,
|
||||||
|
"resetNoise": 0.15,
|
||||||
|
"tasks": [0],
|
||||||
|
"taskEnvCount": [4096],
|
||||||
|
"init_at_random_progress": True,
|
||||||
|
"exemptedInitAtRandomProgressTasks": [],
|
||||||
|
"taskEmbedding": True,
|
||||||
|
"taskEmbeddingType": "one_hot",
|
||||||
|
"seed": 42,
|
||||||
|
"cameraRenderingInterval": 5000,
|
||||||
|
"cameraWidth": 1024,
|
||||||
|
"cameraHeight": 1024,
|
||||||
|
"sparse_reward": False,
|
||||||
|
"termination_on_success": False,
|
||||||
|
"reward_scale": 1.0,
|
||||||
|
"fixed": False,
|
||||||
|
"numObservations": None,
|
||||||
|
"numActions": 4,
|
||||||
|
},
|
||||||
|
"enableCameraSensors": False,
|
||||||
|
"sim": {
|
||||||
|
"dt": 0.01667,
|
||||||
|
"substeps": 2,
|
||||||
|
"up_axis": "z",
|
||||||
|
"use_gpu_pipeline": True,
|
||||||
|
"gravity": [0.0, 0.0, -9.81],
|
||||||
|
"physx": {
|
||||||
|
"num_threads": 4,
|
||||||
|
"solver_type": 1,
|
||||||
|
"use_gpu": True,
|
||||||
|
"num_position_iterations": 8,
|
||||||
|
"num_velocity_iterations": 1,
|
||||||
|
"contact_offset": 0.005,
|
||||||
|
"rest_offset": 0.0,
|
||||||
|
"bounce_threshold_velocity": 0.2,
|
||||||
|
"max_depenetration_velocity": 1000.0,
|
||||||
|
"default_buffer_size_multiplier": 10.0,
|
||||||
|
"max_gpu_contact_pairs": 1048576,
|
||||||
|
"num_subscenes": 4,
|
||||||
|
"contact_collection": 0,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"task": {"randomize": False},
|
||||||
|
}
|
||||||
139
reppo/env_utils/torch_wrappers/mujoco_playground_env.py
Normal file
139
reppo/env_utils/torch_wrappers/mujoco_playground_env.py
Normal file
@ -0,0 +1,139 @@
|
|||||||
|
import jax
|
||||||
|
from mujoco_playground import registry, wrapper_torch
|
||||||
|
import torch
|
||||||
|
|
||||||
|
jax.config.update("jax_compilation_cache_dir", "/tmp/jax_cache")
|
||||||
|
jax.config.update("jax_persistent_cache_min_entry_size_bytes", -1)
|
||||||
|
jax.config.update("jax_persistent_cache_min_compile_time_secs", 0)
|
||||||
|
|
||||||
|
|
||||||
|
class PlaygroundEvalEnvWrapper:
|
||||||
|
def __init__(self, eval_env, max_episode_steps, env_name, num_eval_envs, seed):
|
||||||
|
"""
|
||||||
|
Wrapper used for evaluation / rendering environments.
|
||||||
|
Note that this is different from training environments that are
|
||||||
|
wrapped with RSLRLBraxWrapper.
|
||||||
|
"""
|
||||||
|
self.env = eval_env
|
||||||
|
self.env_name = env_name
|
||||||
|
self.num_envs = num_eval_envs
|
||||||
|
self.jit_reset = jax.jit(jax.vmap(self.env.reset))
|
||||||
|
self.jit_step = jax.jit(jax.vmap(self.env.step))
|
||||||
|
|
||||||
|
if isinstance(self.env.unwrapped.observation_size, dict):
|
||||||
|
self.asymmetric_obs = True
|
||||||
|
else:
|
||||||
|
self.asymmetric_obs = False
|
||||||
|
|
||||||
|
self.key = jax.random.PRNGKey(seed)
|
||||||
|
self.key_reset = jax.random.split(self.key, num_eval_envs)
|
||||||
|
self.max_episode_steps = max_episode_steps
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self.state = self.jit_reset(self.key_reset)
|
||||||
|
if self.asymmetric_obs:
|
||||||
|
obs = wrapper_torch._jax_to_torch(self.state.obs["state"])
|
||||||
|
else:
|
||||||
|
obs = wrapper_torch._jax_to_torch(self.state.obs)
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def step(self, actions):
|
||||||
|
self.state = self.jit_step(self.state, wrapper_torch._torch_to_jax(actions))
|
||||||
|
if self.asymmetric_obs:
|
||||||
|
next_obs = wrapper_torch._jax_to_torch(self.state.obs["state"])
|
||||||
|
else:
|
||||||
|
next_obs = wrapper_torch._jax_to_torch(self.state.obs)
|
||||||
|
rewards = wrapper_torch._jax_to_torch(self.state.reward)
|
||||||
|
dones = wrapper_torch._jax_to_torch(self.state.done)
|
||||||
|
return next_obs, rewards, dones, dones, None
|
||||||
|
|
||||||
|
|
||||||
|
class RandomizeInitialWrapper(wrapper_torch.RSLRLBraxWrapper):
|
||||||
|
"""
|
||||||
|
Wrapper to randomize the initial state of the environment.
|
||||||
|
This is useful for domain randomization experiments.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
print("Resetting environment with randomization")
|
||||||
|
obs = super().reset()
|
||||||
|
self.env_state.info["steps"] = jax.random.randint(
|
||||||
|
self.key, self.env_state.info["steps"].shape, 0, 1000
|
||||||
|
).astype(jax.numpy.float32)
|
||||||
|
print(obs)
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def reset_with_critic_obs(self):
|
||||||
|
print("Resetting environment with randomization and critic obs")
|
||||||
|
obs, critic_obs = super().reset_with_critic_obs()
|
||||||
|
self.env_state.info["steps"] = jax.random.randint(
|
||||||
|
self.key, self.env_state.info["steps"].shape, 0, 1000
|
||||||
|
).astype(jax.numpy.float32)
|
||||||
|
return obs, critic_obs
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
obs, reward, done, info = super().step(action)
|
||||||
|
return obs, reward, done, done, info
|
||||||
|
|
||||||
|
|
||||||
|
def make_env(
|
||||||
|
env_name,
|
||||||
|
seed,
|
||||||
|
num_envs,
|
||||||
|
num_eval_envs,
|
||||||
|
device_rank,
|
||||||
|
use_tuned_reward=False,
|
||||||
|
use_domain_randomization=False,
|
||||||
|
use_push_randomization=False,
|
||||||
|
):
|
||||||
|
# Make training environment
|
||||||
|
train_env_cfg = registry.get_default_config(env_name)
|
||||||
|
is_humanoid_task = env_name in [
|
||||||
|
"G1JoystickRoughTerrain",
|
||||||
|
"G1JoystickFlatTerrain",
|
||||||
|
"T1JoystickRoughTerrain",
|
||||||
|
"T1JoystickFlatTerrain",
|
||||||
|
]
|
||||||
|
|
||||||
|
if use_tuned_reward and is_humanoid_task:
|
||||||
|
# NOTE: Tuned reward for G1. Used for producing Figure 7 in the paper.
|
||||||
|
# Somehow it works reasonably for T1 as well.
|
||||||
|
# However, see `sim2real.md` for sim-to-real RL with Booster T1
|
||||||
|
train_env_cfg.reward_config.scales.energy = -5e-5
|
||||||
|
train_env_cfg.reward_config.scales.action_rate = -1e-1
|
||||||
|
train_env_cfg.reward_config.scales.torques = -1e-3
|
||||||
|
train_env_cfg.reward_config.scales.pose = -1.0
|
||||||
|
train_env_cfg.reward_config.scales.tracking_ang_vel = 1.25
|
||||||
|
train_env_cfg.reward_config.scales.tracking_lin_vel = 1.25
|
||||||
|
train_env_cfg.reward_config.scales.feet_phase = 1.0
|
||||||
|
train_env_cfg.reward_config.scales.ang_vel_xy = -0.3
|
||||||
|
train_env_cfg.reward_config.scales.orientation = -5.0
|
||||||
|
|
||||||
|
if is_humanoid_task and not use_push_randomization:
|
||||||
|
train_env_cfg.push_config.enable = False
|
||||||
|
train_env_cfg.push_config.magnitude_range = [0.0, 0.0]
|
||||||
|
randomizer = (
|
||||||
|
registry.get_domain_randomizer(env_name) if use_domain_randomization else None
|
||||||
|
)
|
||||||
|
raw_env = registry.load(env_name, config=train_env_cfg)
|
||||||
|
train_env = RandomizeInitialWrapper(
|
||||||
|
raw_env,
|
||||||
|
num_envs,
|
||||||
|
seed,
|
||||||
|
train_env_cfg.episode_length,
|
||||||
|
train_env_cfg.action_repeat,
|
||||||
|
randomization_fn=randomizer,
|
||||||
|
device_rank=device_rank,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Make evaluation environment
|
||||||
|
eval_env_cfg = registry.get_default_config(env_name)
|
||||||
|
if is_humanoid_task and not use_push_randomization:
|
||||||
|
eval_env_cfg.push_config.enable = False
|
||||||
|
eval_env_cfg.push_config.magnitude_range = [0.0, 0.0]
|
||||||
|
eval_env = registry.load(env_name, config=eval_env_cfg)
|
||||||
|
eval_env = PlaygroundEvalEnvWrapper(
|
||||||
|
eval_env, eval_env_cfg.episode_length, env_name, num_eval_envs, seed
|
||||||
|
)
|
||||||
|
|
||||||
|
return train_env, eval_env
|
||||||
3
reppo/jaxrl/__init__.py
Normal file
3
reppo/jaxrl/__init__.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
import jax
|
||||||
|
|
||||||
|
jax.config.update("jax_default_matmul_precision", "highest")
|
||||||
45
reppo/jaxrl/normalization.py
Normal file
45
reppo/jaxrl/normalization.py
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
import functools
|
||||||
|
|
||||||
|
import flax.struct as struct
|
||||||
|
import jax
|
||||||
|
import jax.numpy as jnp
|
||||||
|
|
||||||
|
|
||||||
|
class NormalizationState(struct.PyTreeNode):
|
||||||
|
mean: struct.PyTreeNode
|
||||||
|
var: struct.PyTreeNode
|
||||||
|
count: int
|
||||||
|
|
||||||
|
|
||||||
|
class Normalizer:
|
||||||
|
@functools.partial(jax.jit, static_argnums=0)
|
||||||
|
def init(self, tree: struct.PyTreeNode) -> NormalizationState:
|
||||||
|
return NormalizationState(
|
||||||
|
mean=jax.tree.map(lambda x: jnp.zeros(x.shape[1:], dtype=x.dtype), tree),
|
||||||
|
var=jax.tree.map(lambda x: jnp.ones(x.shape[1:], dtype=x.dtype), tree),
|
||||||
|
count=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
@functools.partial(jax.jit, static_argnums=0)
|
||||||
|
def update(
|
||||||
|
self, state: NormalizationState, tree: struct.PyTreeNode
|
||||||
|
) -> NormalizationState:
|
||||||
|
var = jax.tree.map(lambda x: jnp.var(x, axis=0), tree)
|
||||||
|
mean = jax.tree.map(lambda x: jnp.mean(x, axis=0), tree)
|
||||||
|
batch_size = jax.tree.reduce(lambda x, y: y.shape[0], tree, 0)
|
||||||
|
delta = mean - state.mean
|
||||||
|
count = state.count + batch_size
|
||||||
|
new_mean = state.mean + delta * batch_size / count
|
||||||
|
m_a = state.var * state.count
|
||||||
|
m_b = var * batch_size
|
||||||
|
M2 = m_a + m_b + jnp.square(delta) * state.count * batch_size / count
|
||||||
|
|
||||||
|
return state.replace(mean=new_mean, var=M2 / count, count=count)
|
||||||
|
|
||||||
|
@functools.partial(jax.jit, static_argnums=0)
|
||||||
|
def normalize(
|
||||||
|
self, state: NormalizationState, tree: struct.PyTreeNode
|
||||||
|
) -> struct.PyTreeNode:
|
||||||
|
return jax.tree.map(
|
||||||
|
lambda x, m, v: (x - m) / jnp.sqrt(v + 1e-8), tree, state.mean, state.var
|
||||||
|
)
|
||||||
758
reppo/jaxrl/ppo_mjx.py
Normal file
758
reppo/jaxrl/ppo_mjx.py
Normal file
@ -0,0 +1,758 @@
|
|||||||
|
import logging
|
||||||
|
import math
|
||||||
|
import time
|
||||||
|
import typing
|
||||||
|
from typing import Callable, Optional
|
||||||
|
|
||||||
|
import distrax
|
||||||
|
import hydra
|
||||||
|
import jax
|
||||||
|
import optax
|
||||||
|
import plotly.graph_objs as go
|
||||||
|
from flax import nnx, struct
|
||||||
|
from flax.struct import PyTreeNode
|
||||||
|
from gymnax.environments.environment import Environment, EnvParams, EnvState
|
||||||
|
from jax import numpy as jnp
|
||||||
|
from jax.experimental import checkify
|
||||||
|
from jax.random import PRNGKey
|
||||||
|
from omegaconf import DictConfig, OmegaConf
|
||||||
|
|
||||||
|
import wandb
|
||||||
|
from reppo.env_utils.jax_wrappers import (
|
||||||
|
BraxGymnaxWrapper,
|
||||||
|
ClipAction,
|
||||||
|
LogWrapper,
|
||||||
|
MjxGymnaxWrapper,
|
||||||
|
)
|
||||||
|
from reppo.jaxrl import utils
|
||||||
|
from reppo.jaxrl.normalization import NormalizationState, Normalizer
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
|
|
||||||
|
## INITIALIZE CLASS STRUCTURES (NETWORKS, STATES, ...)
|
||||||
|
class Policy(typing.Protocol):
|
||||||
|
def __call__(
|
||||||
|
self,
|
||||||
|
key: jax.random.PRNGKey,
|
||||||
|
obs: PyTreeNode,
|
||||||
|
state: Optional[PyTreeNode] = None,
|
||||||
|
) -> tuple[PyTreeNode, PyTreeNode]:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class PPOConfig(struct.PyTreeNode):
|
||||||
|
lr: float
|
||||||
|
gamma: float
|
||||||
|
lmbda: float
|
||||||
|
clip_ratio: float
|
||||||
|
value_coef: float
|
||||||
|
entropy_coef: float
|
||||||
|
total_time_steps: int
|
||||||
|
num_steps: int
|
||||||
|
num_mini_batches: int
|
||||||
|
num_envs: int
|
||||||
|
num_epochs: int
|
||||||
|
max_grad_norm: float | None
|
||||||
|
normalize_advantages: bool
|
||||||
|
normalize_env: bool
|
||||||
|
anneal_lr: bool
|
||||||
|
num_eval: int = 25
|
||||||
|
max_episode_steps: int = 1000
|
||||||
|
|
||||||
|
|
||||||
|
class Transition(struct.PyTreeNode):
|
||||||
|
obs: jax.Array
|
||||||
|
critic_obs: jax.Array
|
||||||
|
action: jax.Array
|
||||||
|
reward: jax.Array
|
||||||
|
log_prob: jax.Array
|
||||||
|
value: jax.Array
|
||||||
|
done: jax.Array
|
||||||
|
truncated: jax.Array
|
||||||
|
info: dict[str, jax.Array]
|
||||||
|
|
||||||
|
|
||||||
|
class PPOTrainState(nnx.TrainState):
|
||||||
|
iteration: int
|
||||||
|
time_steps: int
|
||||||
|
last_env_state: EnvState
|
||||||
|
last_obs: jax.Array
|
||||||
|
last_critic_obs: jax.Array
|
||||||
|
normalization_state: NormalizationState | None = None
|
||||||
|
critic_normalization_state: NormalizationState | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class PPONetworks(nnx.Module):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
obs_dim: int,
|
||||||
|
critic_obs_dim: int,
|
||||||
|
action_dim: int,
|
||||||
|
hidden_dim: int = 64,
|
||||||
|
*,
|
||||||
|
rngs: nnx.Rngs,
|
||||||
|
):
|
||||||
|
def linear_layer(in_features, out_features, scale=jnp.sqrt(2)):
|
||||||
|
return nnx.Linear(
|
||||||
|
in_features=in_features,
|
||||||
|
out_features=out_features,
|
||||||
|
kernel_init=nnx.initializers.orthogonal(scale=scale),
|
||||||
|
bias_init=nnx.initializers.zeros_init(),
|
||||||
|
rngs=rngs,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.actor_module = nnx.Sequential(
|
||||||
|
linear_layer(obs_dim, hidden_dim),
|
||||||
|
nnx.tanh,
|
||||||
|
linear_layer(hidden_dim, hidden_dim),
|
||||||
|
nnx.tanh,
|
||||||
|
linear_layer(hidden_dim, action_dim, scale=0.01),
|
||||||
|
)
|
||||||
|
self.log_std = nnx.Param(jnp.zeros(action_dim))
|
||||||
|
self.critic_module = nnx.Sequential(
|
||||||
|
linear_layer(critic_obs_dim, hidden_dim),
|
||||||
|
nnx.tanh,
|
||||||
|
linear_layer(hidden_dim, hidden_dim),
|
||||||
|
nnx.tanh,
|
||||||
|
linear_layer(hidden_dim, 1, scale=1.0),
|
||||||
|
)
|
||||||
|
|
||||||
|
def critic(self, obs: jax.Array) -> jax.Array:
|
||||||
|
return self.critic_module(obs).squeeze()
|
||||||
|
|
||||||
|
def actor(self, obs: jax.Array) -> distrax.Distribution:
|
||||||
|
loc = self.actor_module(obs)
|
||||||
|
pi = distrax.MultivariateNormalDiag(
|
||||||
|
loc=loc, scale_diag=jnp.exp(self.log_std.value)
|
||||||
|
)
|
||||||
|
return pi
|
||||||
|
|
||||||
|
|
||||||
|
def make_policy(train_state: PPOTrainState) -> Policy:
|
||||||
|
normalizer = Normalizer()
|
||||||
|
|
||||||
|
def policy(
|
||||||
|
key: PRNGKey, obs: jax.Array, state: struct.PyTreeNode = None
|
||||||
|
) -> tuple[jax.Array, jax.Array]:
|
||||||
|
if train_state.normalization_state is not None:
|
||||||
|
obs = normalizer.normalize(train_state.normalization_state, obs)
|
||||||
|
model = nnx.merge(train_state.graphdef, train_state.params)
|
||||||
|
pi = model.actor(obs)
|
||||||
|
value = model.critic(obs)
|
||||||
|
action = pi.sample(seed=key)
|
||||||
|
log_prob = pi.log_prob(action)
|
||||||
|
return action, dict(log_prob=log_prob, value=value)
|
||||||
|
|
||||||
|
return policy
|
||||||
|
|
||||||
|
|
||||||
|
def make_eval_fn(
|
||||||
|
env: Environment, max_episode_steps: int
|
||||||
|
) -> Callable[[jax.random.PRNGKey, Policy], dict[str, float]]:
|
||||||
|
def evaluation_fn(key: jax.random.PRNGKey, policy: Policy):
|
||||||
|
def step_env(carry, _):
|
||||||
|
key, env_state, obs = carry
|
||||||
|
key, act_key, env_key = jax.random.split(key, 3)
|
||||||
|
action, _ = policy(act_key, obs)
|
||||||
|
env_key = jax.random.split(env_key, env.num_envs)
|
||||||
|
obs, _, env_state, reward, done, info = env.step(
|
||||||
|
env_key, env_state, action.clip(-1.0 + 1e-4, 1.0 - 1e-4)
|
||||||
|
)
|
||||||
|
return (key, env_state, obs), info
|
||||||
|
|
||||||
|
key, init_key = jax.random.split(key)
|
||||||
|
init_key = jax.random.split(init_key, env.num_envs)
|
||||||
|
obs, _, env_state = env.reset(init_key)
|
||||||
|
_, infos = jax.lax.scan(
|
||||||
|
f=step_env,
|
||||||
|
init=(key, env_state, obs),
|
||||||
|
xs=None,
|
||||||
|
length=max_episode_steps,
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"episode_return": infos["returned_episode_returns"].mean(
|
||||||
|
where=infos["returned_episode"]
|
||||||
|
),
|
||||||
|
"episode_return_std": infos["returned_episode_returns"].std(
|
||||||
|
where=infos["returned_episode"]
|
||||||
|
),
|
||||||
|
"episode_length": infos["returned_episode_lengths"].mean(
|
||||||
|
where=infos["returned_episode"]
|
||||||
|
),
|
||||||
|
"episode_length_std": infos["returned_episode_lengths"].std(
|
||||||
|
where=infos["returned_episode"]
|
||||||
|
),
|
||||||
|
"num_episodes": infos["returned_episode"].sum(),
|
||||||
|
}
|
||||||
|
|
||||||
|
return evaluation_fn
|
||||||
|
|
||||||
|
|
||||||
|
def make_init(
|
||||||
|
cfg: PPOConfig,
|
||||||
|
env: Environment,
|
||||||
|
env_params: EnvParams = None,
|
||||||
|
) -> PPOTrainState:
|
||||||
|
def init(key: jax.random.PRNGKey) -> PPOTrainState:
|
||||||
|
# Number of calls to train_step
|
||||||
|
num_train_steps = cfg.total_time_steps // (cfg.num_steps * cfg.num_envs)
|
||||||
|
# Number of calls to train_iter, add 1 if not divisible by eval_interval
|
||||||
|
eval_interval = int(
|
||||||
|
(cfg.total_time_steps / (cfg.num_steps * cfg.num_envs)) // cfg.num_eval
|
||||||
|
)
|
||||||
|
num_iterations = num_train_steps // eval_interval + int(
|
||||||
|
num_train_steps % eval_interval != 0
|
||||||
|
)
|
||||||
|
key, model_key = jax.random.split(key)
|
||||||
|
# Intialize the model
|
||||||
|
networks = PPONetworks(
|
||||||
|
obs_dim=env.observation_space(env_params)[0].shape[0],
|
||||||
|
critic_obs_dim=env.observation_space(env_params)[1].shape[0],
|
||||||
|
action_dim=env.action_space(env_params).shape[0],
|
||||||
|
rngs=nnx.Rngs(model_key),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Set initial learning rate
|
||||||
|
if not cfg.anneal_lr:
|
||||||
|
lr = cfg.lr
|
||||||
|
else:
|
||||||
|
num_iterations = cfg.total_time_steps // cfg.num_steps // cfg.num_envs
|
||||||
|
num_updates = num_iterations * cfg.num_epochs * cfg.num_mini_batches
|
||||||
|
lr = optax.linear_schedule(cfg.lr, 1e-6, num_updates)
|
||||||
|
|
||||||
|
# Initialize the optimizer
|
||||||
|
if cfg.max_grad_norm is not None:
|
||||||
|
optimizer = optax.chain(
|
||||||
|
optax.clip_by_global_norm(cfg.max_grad_norm),
|
||||||
|
optax.adam(lr),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
optimizer = optax.adam(lr)
|
||||||
|
|
||||||
|
# Reset and fully initialize the environment
|
||||||
|
key, env_key = jax.random.split(key)
|
||||||
|
env_key = jax.random.split(env_key, cfg.num_envs)
|
||||||
|
obs, critic_obs, env_state = env.reset(env_key)
|
||||||
|
# randomize initial time step to prevent all envs stepping in tandem
|
||||||
|
_env_state = env_state.unwrapped()
|
||||||
|
key, randomize_steps_key = jax.random.split(key)
|
||||||
|
_env_state.info["steps"] = jax.random.randint(
|
||||||
|
randomize_steps_key,
|
||||||
|
_env_state.info["steps"].shape,
|
||||||
|
0,
|
||||||
|
cfg.max_episode_steps,
|
||||||
|
).astype(jnp.float32)
|
||||||
|
env_state.set_env_state(_env_state)
|
||||||
|
|
||||||
|
if cfg.normalize_env:
|
||||||
|
normalizer = Normalizer()
|
||||||
|
norm_state = normalizer.init(obs)
|
||||||
|
critic_normalizer = Normalizer()
|
||||||
|
critic_norm_state = critic_normalizer.init(critic_obs)
|
||||||
|
obs = normalizer.normalize(norm_state, obs)
|
||||||
|
critic_obs = critic_normalizer.normalize(critic_norm_state, critic_obs)
|
||||||
|
else:
|
||||||
|
norm_state = None
|
||||||
|
critic_norm_state = None
|
||||||
|
|
||||||
|
# Initialize the state observations of the environment
|
||||||
|
return PPOTrainState.create(
|
||||||
|
iteration=0,
|
||||||
|
time_steps=0,
|
||||||
|
graphdef=nnx.graphdef(networks),
|
||||||
|
params=nnx.state(networks),
|
||||||
|
tx=optimizer,
|
||||||
|
last_env_state=env_state,
|
||||||
|
last_obs=obs,
|
||||||
|
last_critic_obs=critic_obs,
|
||||||
|
normalization_state=norm_state,
|
||||||
|
critic_normalization_state=critic_norm_state,
|
||||||
|
)
|
||||||
|
|
||||||
|
return init
|
||||||
|
|
||||||
|
|
||||||
|
def make_train_fn(
|
||||||
|
cfg: PPOConfig,
|
||||||
|
env: Environment,
|
||||||
|
env_params: EnvParams = None,
|
||||||
|
log_callback: Callable[[PPOTrainState, dict[str, jax.Array]], None] = None,
|
||||||
|
num_seeds: int = 1,
|
||||||
|
):
|
||||||
|
# Initialize the environment and wrap it to admit vectorized behavior.
|
||||||
|
env_params = env_params or env.default_params
|
||||||
|
env = ClipAction(env)
|
||||||
|
env = LogWrapper(env, cfg.num_envs)
|
||||||
|
eval_fn = make_eval_fn(env, cfg.max_episode_steps)
|
||||||
|
normalizer = Normalizer()
|
||||||
|
eval_interval = int(
|
||||||
|
(cfg.total_time_steps / (cfg.num_steps * cfg.num_envs)) // cfg.num_eval
|
||||||
|
)
|
||||||
|
|
||||||
|
def collect_rollout(
|
||||||
|
key: PRNGKey, train_state: PPOTrainState
|
||||||
|
) -> tuple[Transition, PPOTrainState]:
|
||||||
|
model = nnx.merge(train_state.graphdef, train_state.params)
|
||||||
|
|
||||||
|
# Take a step in the environment
|
||||||
|
def step_env(carry, _) -> tuple[tuple, Transition]:
|
||||||
|
key, env_state, train_state, obs, critic_obs = carry
|
||||||
|
|
||||||
|
if cfg.normalize_env:
|
||||||
|
norm_state = normalizer.update(train_state.normalization_state, obs)
|
||||||
|
obs = normalizer.normalize(norm_state, obs)
|
||||||
|
train_state = train_state.replace(normalization_state=norm_state)
|
||||||
|
critic_obs = normalizer.normalize(
|
||||||
|
train_state.critic_normalization_state, critic_obs
|
||||||
|
)
|
||||||
|
# Select action
|
||||||
|
key, act_key, step_key = jax.random.split(key, 3)
|
||||||
|
pi = model.actor(obs)
|
||||||
|
action = pi.sample(seed=act_key)
|
||||||
|
# Take a step in the environment
|
||||||
|
step_key = jax.random.split(step_key, cfg.num_envs)
|
||||||
|
next_obs, next_critic_obs, next_env_state, reward, done, info = env.step(
|
||||||
|
step_key, env_state, action.clip(-1.0 + 1e-4, 1.0 - 1e-4)
|
||||||
|
)
|
||||||
|
# Record the transition
|
||||||
|
transition = Transition(
|
||||||
|
obs=obs,
|
||||||
|
critic_obs=critic_obs,
|
||||||
|
action=action,
|
||||||
|
reward=reward,
|
||||||
|
log_prob=pi.log_prob(action),
|
||||||
|
value=model.critic(critic_obs),
|
||||||
|
done=done,
|
||||||
|
truncated=next_env_state.truncated,
|
||||||
|
info=info,
|
||||||
|
)
|
||||||
|
return (
|
||||||
|
key,
|
||||||
|
next_env_state,
|
||||||
|
train_state,
|
||||||
|
next_obs,
|
||||||
|
next_critic_obs,
|
||||||
|
), transition
|
||||||
|
|
||||||
|
# Collect rollout via lax.scan taking steps in the environment
|
||||||
|
rollout_state, transitions = jax.lax.scan(
|
||||||
|
f=step_env,
|
||||||
|
init=(
|
||||||
|
key,
|
||||||
|
train_state.last_env_state,
|
||||||
|
train_state,
|
||||||
|
train_state.last_obs,
|
||||||
|
train_state.last_critic_obs,
|
||||||
|
),
|
||||||
|
length=cfg.num_steps,
|
||||||
|
)
|
||||||
|
# Aggregate the transitions across all the environments to reset for the next iteration
|
||||||
|
_, last_env_state, train_state, last_obs, last_critic_obs = rollout_state
|
||||||
|
train_state = train_state.replace(
|
||||||
|
last_env_state=last_env_state,
|
||||||
|
last_obs=last_obs,
|
||||||
|
last_critic_obs=last_critic_obs,
|
||||||
|
time_steps=train_state.time_steps + cfg.num_steps * cfg.num_envs,
|
||||||
|
)
|
||||||
|
|
||||||
|
return transitions, train_state
|
||||||
|
|
||||||
|
def learn_step(
|
||||||
|
key: PRNGKey, train_state: PPOTrainState, batch: Transition
|
||||||
|
) -> tuple[PPOTrainState, dict[str, jax.Array]]:
|
||||||
|
# Compute advantages and target values
|
||||||
|
model = nnx.merge(train_state.graphdef, train_state.params)
|
||||||
|
if cfg.normalize_env:
|
||||||
|
last_critic_obs = normalizer.normalize(
|
||||||
|
train_state.critic_normalization_state, train_state.last_critic_obs
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
last_critic_obs = train_state.last_critic_obs
|
||||||
|
last_value = model.critic(last_critic_obs)
|
||||||
|
|
||||||
|
def compute_advantage(carry, transition):
|
||||||
|
gae, next_value = carry
|
||||||
|
done = transition.done
|
||||||
|
truncated = transition.truncated
|
||||||
|
reward = transition.reward
|
||||||
|
value = transition.value
|
||||||
|
delta = reward + cfg.gamma * next_value * (1 - done) - value
|
||||||
|
gae = delta + cfg.gamma * cfg.lmbda * (1 - done) * gae
|
||||||
|
truncated_gae = reward + cfg.gamma * next_value - value
|
||||||
|
gae = jnp.where(truncated, truncated_gae, gae)
|
||||||
|
return (gae, value), gae
|
||||||
|
|
||||||
|
# Compute the advantage using GAE
|
||||||
|
_, advantages = jax.lax.scan(
|
||||||
|
compute_advantage,
|
||||||
|
(jnp.zeros_like(last_value), last_value),
|
||||||
|
batch,
|
||||||
|
reverse=True,
|
||||||
|
)
|
||||||
|
target_values = advantages + batch.value
|
||||||
|
|
||||||
|
data = (batch, advantages, target_values)
|
||||||
|
# Reshape data to (num_steps * num_envs, ...)
|
||||||
|
data = jax.tree.map(
|
||||||
|
lambda x: x.reshape(
|
||||||
|
(math.floor(cfg.num_steps * cfg.num_envs), *x.shape[2:])
|
||||||
|
),
|
||||||
|
data,
|
||||||
|
)
|
||||||
|
|
||||||
|
def update(train_state, key) -> tuple[PPOTrainState, dict[str, jax.Array]]:
|
||||||
|
def minibatch_update(carry, indices):
|
||||||
|
idx, train_state = carry
|
||||||
|
# Sample data at indices from the batch
|
||||||
|
minibatch, advantages, target_values = jax.tree.map(
|
||||||
|
lambda x: jnp.take(x, indices, axis=0), data
|
||||||
|
)
|
||||||
|
if cfg.normalize_advantages:
|
||||||
|
advantages = (advantages - jnp.mean(advantages)) / (
|
||||||
|
jnp.std(advantages) + 1e-8
|
||||||
|
)
|
||||||
|
|
||||||
|
# Define the loss function
|
||||||
|
def loss_fn(params):
|
||||||
|
model = nnx.merge(train_state.graphdef, params)
|
||||||
|
pi = model.actor(minibatch.obs)
|
||||||
|
value = model.critic(minibatch.critic_obs)
|
||||||
|
log_prob = pi.log_prob(minibatch.action)
|
||||||
|
value_pred_clipped = minibatch.value + (
|
||||||
|
value - minibatch.value
|
||||||
|
).clip(-cfg.clip_ratio, cfg.clip_ratio)
|
||||||
|
value_error = jnp.square(value - target_values)
|
||||||
|
value_error_clipped = jnp.square(value_pred_clipped - target_values)
|
||||||
|
value_loss = 0.5 * jnp.mean(
|
||||||
|
(1.0 - minibatch.truncated)
|
||||||
|
* jnp.maximum(value_error, value_error_clipped)
|
||||||
|
)
|
||||||
|
|
||||||
|
ratio = jnp.exp(log_prob - minibatch.log_prob)
|
||||||
|
checkify.check(
|
||||||
|
jnp.allclose(ratio, 1.0) | (idx != 1),
|
||||||
|
debug=True,
|
||||||
|
msg="Ratio not equal to 1 on first iteration: {r}",
|
||||||
|
r=ratio,
|
||||||
|
)
|
||||||
|
|
||||||
|
actor_loss1 = ratio * advantages
|
||||||
|
actor_loss2 = (
|
||||||
|
jnp.clip(ratio, 1 - cfg.clip_ratio, 1 + cfg.clip_ratio)
|
||||||
|
* advantages
|
||||||
|
)
|
||||||
|
actor_loss = -jnp.mean(
|
||||||
|
(1.0 - minibatch.truncated)
|
||||||
|
* jnp.minimum(actor_loss1, actor_loss2)
|
||||||
|
)
|
||||||
|
entropy_loss = jnp.mean(pi.entropy())
|
||||||
|
|
||||||
|
loss = (
|
||||||
|
actor_loss
|
||||||
|
+ cfg.value_coef * value_loss
|
||||||
|
- cfg.entropy_coef * entropy_loss
|
||||||
|
)
|
||||||
|
|
||||||
|
return loss, dict(
|
||||||
|
actor_loss=actor_loss,
|
||||||
|
value_loss=value_loss,
|
||||||
|
entropy_loss=entropy_loss,
|
||||||
|
loss=loss,
|
||||||
|
mean_value=value.mean(),
|
||||||
|
mean_log_prob=log_prob.mean(),
|
||||||
|
mean_advantages=advantages.mean(),
|
||||||
|
mean_action=minibatch.action.mean(),
|
||||||
|
mean_reward=minibatch.reward.mean(),
|
||||||
|
)
|
||||||
|
|
||||||
|
grad_fn = jax.value_and_grad(loss_fn, has_aux=True)
|
||||||
|
output, grads = grad_fn(train_state.params)
|
||||||
|
|
||||||
|
# Global gradient norm (all parameters combined)
|
||||||
|
flat_grads, _ = jax.flatten_util.ravel_pytree(grads)
|
||||||
|
global_grad_norm = jnp.linalg.norm(flat_grads)
|
||||||
|
|
||||||
|
metrics = output[1]
|
||||||
|
metrics["advantages"] = advantages
|
||||||
|
metrics["global_grad_norm"] = global_grad_norm
|
||||||
|
train_state = train_state.apply_gradients(grads)
|
||||||
|
return (idx + 1, train_state), metrics
|
||||||
|
|
||||||
|
# Shuffle data and split into mini-batches
|
||||||
|
key, shuffle_key = jax.random.split(key)
|
||||||
|
|
||||||
|
mini_batch_size = (
|
||||||
|
math.floor(cfg.num_steps * cfg.num_envs) // cfg.num_mini_batches
|
||||||
|
)
|
||||||
|
indices = jax.random.permutation(shuffle_key, cfg.num_steps * cfg.num_envs)
|
||||||
|
minibatch_idxs = jax.tree.map(
|
||||||
|
lambda x: x.reshape(
|
||||||
|
(cfg.num_mini_batches, mini_batch_size, *x.shape[1:])
|
||||||
|
),
|
||||||
|
indices,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Run model update for each mini-batch
|
||||||
|
train_state, metrics = jax.lax.scan(
|
||||||
|
minibatch_update, train_state, minibatch_idxs
|
||||||
|
)
|
||||||
|
# Compute mean metrics across mini-batches
|
||||||
|
metrics = jax.tree.map(lambda x: x.mean(0), metrics)
|
||||||
|
return train_state, metrics
|
||||||
|
|
||||||
|
# Update the model for a number of epochs
|
||||||
|
key, train_key = jax.random.split(key)
|
||||||
|
(_, train_state), update_metrics = jax.lax.scan(
|
||||||
|
f=update,
|
||||||
|
init=(1, train_state),
|
||||||
|
xs=jax.random.split(train_key, cfg.num_epochs),
|
||||||
|
)
|
||||||
|
# Get metrics from the last epoch
|
||||||
|
update_metrics = jax.tree.map(lambda x: x[-1], update_metrics)
|
||||||
|
|
||||||
|
return train_state, update_metrics
|
||||||
|
|
||||||
|
# Define the training loop
|
||||||
|
def train_fn(key: PRNGKey) -> tuple[PPOTrainState, dict]:
|
||||||
|
def train_eval_step(key, train_state):
|
||||||
|
def train_step(
|
||||||
|
state: PPOTrainState, key: PRNGKey
|
||||||
|
) -> tuple[PPOTrainState, dict[str, jax.Array]]:
|
||||||
|
key, rollout_key, learn_key = jax.random.split(key, 3)
|
||||||
|
# Collect trajectories from `state`
|
||||||
|
transitions, state = collect_rollout(key=rollout_key, train_state=state)
|
||||||
|
# Execute an update to the policy with `transitions`
|
||||||
|
state, update_metrics = learn_step(
|
||||||
|
key=learn_key, train_state=state, batch=transitions
|
||||||
|
)
|
||||||
|
metrics = {**update_metrics, **update_metrics}
|
||||||
|
state = state.replace(iteration=state.iteration + 1)
|
||||||
|
return state, metrics
|
||||||
|
|
||||||
|
train_key, eval_key = jax.random.split(key)
|
||||||
|
train_state, train_metrics = jax.lax.scan(
|
||||||
|
f=train_step,
|
||||||
|
init=train_state,
|
||||||
|
xs=jax.random.split(train_key, eval_interval),
|
||||||
|
)
|
||||||
|
train_metrics = jax.tree.map(lambda x: x[-1], train_metrics)
|
||||||
|
policy = make_policy(train_state)
|
||||||
|
eval_metrics = eval_fn(eval_key, policy)
|
||||||
|
metrics = {
|
||||||
|
"time_step": train_state.time_steps,
|
||||||
|
**utils.prefix_dict("train", train_metrics),
|
||||||
|
**utils.prefix_dict("eval", eval_metrics),
|
||||||
|
}
|
||||||
|
|
||||||
|
return train_state, metrics
|
||||||
|
|
||||||
|
def loop_body(
|
||||||
|
train_state: PPOTrainState, key: PRNGKey
|
||||||
|
) -> tuple[PPOTrainState, dict]:
|
||||||
|
# Map execution of the train+eval step across num_seeds (will be looped using jax.lax.scan)
|
||||||
|
key, subkey = jax.random.split(key)
|
||||||
|
train_state, metrics = jax.vmap(train_eval_step)(
|
||||||
|
jax.random.split(subkey, num_seeds), train_state
|
||||||
|
)
|
||||||
|
jax.debug.callback(log_callback, train_state, metrics)
|
||||||
|
return train_state, metrics
|
||||||
|
|
||||||
|
# Initialize the policy, environment and map that across the number of random seeds
|
||||||
|
num_train_steps = cfg.total_time_steps // (cfg.num_steps * cfg.num_envs)
|
||||||
|
num_iterations = num_train_steps // eval_interval + int(
|
||||||
|
num_train_steps % eval_interval != 0
|
||||||
|
)
|
||||||
|
key, init_key = jax.random.split(key)
|
||||||
|
# TWK ??: We retain the same initial state for each of the seeds across all episodes?
|
||||||
|
train_state = jax.vmap(make_init(cfg, env, env_params))(
|
||||||
|
jax.random.split(init_key, num_seeds)
|
||||||
|
)
|
||||||
|
keys = jax.random.split(key, num_iterations)
|
||||||
|
# Run the training and evaluation loop from the initialized training state
|
||||||
|
state, metrics = jax.lax.scan(f=loop_body, init=train_state, xs=keys)
|
||||||
|
return state, metrics
|
||||||
|
|
||||||
|
return train_fn
|
||||||
|
|
||||||
|
|
||||||
|
def plot_history(history: list[dict[str, jax.Array]]):
|
||||||
|
"""
|
||||||
|
TODO -- TWK: Possibly remove this...
|
||||||
|
"""
|
||||||
|
steps = jnp.array([m["time_step"][0] for m in history])
|
||||||
|
eval_return = jnp.array([m["eval/episode_return"].mean() for m in history])
|
||||||
|
eval_return_std = jnp.array([m["eval/episode_return"].std() for m in history])
|
||||||
|
fig = go.Figure(
|
||||||
|
[
|
||||||
|
go.Scatter(
|
||||||
|
x=steps,
|
||||||
|
y=eval_return,
|
||||||
|
name="Mean Episode Return",
|
||||||
|
mode="lines",
|
||||||
|
line=dict(color="blue"),
|
||||||
|
showlegend=False,
|
||||||
|
),
|
||||||
|
go.Scatter(
|
||||||
|
x=steps,
|
||||||
|
y=eval_return + eval_return_std,
|
||||||
|
name="Upper Bound",
|
||||||
|
mode="lines",
|
||||||
|
line=dict(width=0),
|
||||||
|
showlegend=False,
|
||||||
|
),
|
||||||
|
go.Scatter(
|
||||||
|
x=steps,
|
||||||
|
y=eval_return - eval_return_std,
|
||||||
|
name="Lower Bound",
|
||||||
|
mode="lines",
|
||||||
|
line=dict(width=0),
|
||||||
|
fill="tonexty",
|
||||||
|
fillcolor="rgba(50, 127, 168, 0.3)",
|
||||||
|
showlegend=False,
|
||||||
|
),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
fig.update_layout(
|
||||||
|
xaxis=dict(title=dict(text="Environment Steps")),
|
||||||
|
)
|
||||||
|
|
||||||
|
return fig
|
||||||
|
|
||||||
|
|
||||||
|
def run(cfg: DictConfig):
|
||||||
|
metric_history = []
|
||||||
|
|
||||||
|
# Define callback to log metrics during training
|
||||||
|
def log_callback(state, metrics):
|
||||||
|
metrics["sys_time"] = time.perf_counter()
|
||||||
|
if len(metric_history) > 0:
|
||||||
|
num_env_steps = state.time_steps[0] - metric_history[-1]["time_step"][0]
|
||||||
|
seconds = metrics["sys_time"] - metric_history[-1]["sys_time"]
|
||||||
|
sps = num_env_steps / seconds
|
||||||
|
else:
|
||||||
|
sps = 0
|
||||||
|
|
||||||
|
metric_history.append(metrics)
|
||||||
|
episode_return = metrics["eval/episode_return"].mean()
|
||||||
|
# Use pop() with a default value of None in case 'advantages' key doesn't exist
|
||||||
|
advantages = metrics.pop("train/advantages", None)
|
||||||
|
logging.info(
|
||||||
|
f"step={state.time_steps[0]} episode_return={episode_return:.3f}, sps={sps:.2f}"
|
||||||
|
)
|
||||||
|
log_data = {
|
||||||
|
"eval/episode_return": episode_return,
|
||||||
|
"train/advantages": wandb.Histogram(advantages),
|
||||||
|
**jax.tree.map(jnp.mean, utils.filter_prefix("train", metrics)),
|
||||||
|
}
|
||||||
|
# Push log data to WandB
|
||||||
|
wandb.log(log_data, step=state.time_steps[0])
|
||||||
|
|
||||||
|
logging.info(OmegaConf.to_yaml(cfg))
|
||||||
|
|
||||||
|
# Set up the experimental environment
|
||||||
|
if cfg.env.type == "brax":
|
||||||
|
env = BraxGymnaxWrapper(
|
||||||
|
cfg.env.name
|
||||||
|
) # , episode_length=cfg.env.max_episode_steps
|
||||||
|
elif cfg.env.type == "mjx":
|
||||||
|
env = MjxGymnaxWrapper(cfg.env.name, episode_length=cfg.env.max_episode_steps)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unknown environment type: {cfg.env.type}")
|
||||||
|
|
||||||
|
key = jax.random.PRNGKey(cfg.seed)
|
||||||
|
train_fn = make_train_fn(
|
||||||
|
cfg=PPOConfig(**cfg.hyperparameters),
|
||||||
|
env=env,
|
||||||
|
log_callback=log_callback,
|
||||||
|
num_seeds=cfg.num_seeds,
|
||||||
|
)
|
||||||
|
for i in range(cfg.trials):
|
||||||
|
# Initialize WandB reporting
|
||||||
|
key, train_key = jax.random.split(key)
|
||||||
|
wandb.init(
|
||||||
|
mode=cfg.wandb.mode,
|
||||||
|
project=cfg.wandb.project,
|
||||||
|
entity=cfg.wandb.entity,
|
||||||
|
tags=[cfg.name, cfg.env.name, cfg.env.type, *cfg.tags],
|
||||||
|
config=OmegaConf.to_container(cfg),
|
||||||
|
name=f"ppo-{cfg.name}-{cfg.env.name.lower()}",
|
||||||
|
save_code=True,
|
||||||
|
)
|
||||||
|
start = time.perf_counter()
|
||||||
|
train_state, metrics = jax.jit(train_fn)(train_key)
|
||||||
|
jax.block_until_ready(metrics)
|
||||||
|
duration = time.perf_counter() - start
|
||||||
|
|
||||||
|
# Save metrics and finish the run
|
||||||
|
logging.info(f"Training took {duration:.2f} seconds.")
|
||||||
|
# jnp.savez("metrics.npz", **metrics) # TODO: fix the directory here to save to a unique output directory
|
||||||
|
wandb.finish()
|
||||||
|
|
||||||
|
|
||||||
|
def tune(cfg: DictConfig):
|
||||||
|
"""
|
||||||
|
TODO: Signature + also adjusting to run tuning for Brax environments as well
|
||||||
|
"""
|
||||||
|
|
||||||
|
def log_callback(state, metrics):
|
||||||
|
episode_return = metrics["eval/episode_return"].mean()
|
||||||
|
t = state.time_steps[0]
|
||||||
|
wandb.log(
|
||||||
|
{
|
||||||
|
"episode_return": episode_return,
|
||||||
|
},
|
||||||
|
step=t,
|
||||||
|
)
|
||||||
|
|
||||||
|
env = MjxGymnaxWrapper(cfg.env.name, episode_length=cfg.env.max_episode_steps)
|
||||||
|
|
||||||
|
def train_agent():
|
||||||
|
wandb.init(project=cfg.wandb.project)
|
||||||
|
run_cfg = OmegaConf.to_container(cfg)
|
||||||
|
for k, v in dict(wandb.config).items():
|
||||||
|
run_cfg["experiment"]["hyperparameters"][k] = v
|
||||||
|
ppo_cfg = PPOConfig(**run_cfg["experiment"]["hyperparameters"])
|
||||||
|
train_fn = make_train_fn(
|
||||||
|
cfg=ppo_cfg,
|
||||||
|
env=env,
|
||||||
|
log_callback=log_callback,
|
||||||
|
num_seeds=cfg.num_seeds,
|
||||||
|
)
|
||||||
|
train_fn = jax.jit(train_fn)
|
||||||
|
logging.info(f"Running experiment with params: \n {run_cfg}")
|
||||||
|
key = jax.random.PRNGKey(cfg.seed)
|
||||||
|
train_state, metrics = train_fn(key)
|
||||||
|
jax.block_until_ready(metrics)
|
||||||
|
|
||||||
|
sweep_id = wandb.sweep(
|
||||||
|
sweep={
|
||||||
|
"name": f"{cfg.name}-{cfg.env.name}",
|
||||||
|
"method": "bayes",
|
||||||
|
"metric": {"name": "episode_return", "goal": "maximize"},
|
||||||
|
"parameters": {
|
||||||
|
"lr": {
|
||||||
|
"values": [1e-4, 3e-4, 1e-3],
|
||||||
|
},
|
||||||
|
"normalize_env": {
|
||||||
|
"values": [True, False],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
project=cfg.wandb.project,
|
||||||
|
entity=cfg.wandb.entity,
|
||||||
|
)
|
||||||
|
wandb.agent(sweep_id, function=train_agent, count=cfg.tune.num_runs)
|
||||||
|
|
||||||
|
|
||||||
|
@hydra.main(version_base=None, config_path="../../config", config_name="ppo")
|
||||||
|
def main(cfg: DictConfig):
|
||||||
|
if cfg.tune:
|
||||||
|
tune(cfg)
|
||||||
|
else:
|
||||||
|
run(cfg)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
939
reppo/jaxrl/reppo.py
Normal file
939
reppo/jaxrl/reppo.py
Normal file
@ -0,0 +1,939 @@
|
|||||||
|
import logging
|
||||||
|
import time
|
||||||
|
import typing
|
||||||
|
from typing import Callable
|
||||||
|
|
||||||
|
import hydra
|
||||||
|
import jax
|
||||||
|
import numpy as np
|
||||||
|
import optax
|
||||||
|
import optuna
|
||||||
|
import plotly.graph_objs as go
|
||||||
|
from flax import nnx, struct
|
||||||
|
from flax.struct import PyTreeNode
|
||||||
|
from gymnax.environments.environment import Environment, EnvParams, EnvState
|
||||||
|
from jax import numpy as jnp
|
||||||
|
from jax.random import PRNGKey
|
||||||
|
from omegaconf import DictConfig, OmegaConf
|
||||||
|
|
||||||
|
import wandb
|
||||||
|
from reppo.env_utils.jax_wrappers import (
|
||||||
|
BraxGymnaxWrapper,
|
||||||
|
ClipAction,
|
||||||
|
LogWrapper,
|
||||||
|
MjxGymnaxWrapper,
|
||||||
|
NormalizeVec,
|
||||||
|
)
|
||||||
|
from reppo.jaxrl import utils
|
||||||
|
from reppo.network_utils.jax_models import (
|
||||||
|
CategoricalCriticNetwork,
|
||||||
|
CriticNetwork,
|
||||||
|
SACActorNetworks,
|
||||||
|
)
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
|
import mujoco
|
||||||
|
|
||||||
|
print(mujoco.__file__)
|
||||||
|
|
||||||
|
|
||||||
|
class Policy(typing.Protocol):
|
||||||
|
def __call__(
|
||||||
|
self,
|
||||||
|
key: jax.random.PRNGKey,
|
||||||
|
obs: PyTreeNode,
|
||||||
|
) -> tuple[PyTreeNode, PyTreeNode]:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class Transition(struct.PyTreeNode):
|
||||||
|
obs: jax.Array
|
||||||
|
critic_obs: jax.Array
|
||||||
|
action: jax.Array
|
||||||
|
reward: jax.Array
|
||||||
|
next_emb: jax.Array
|
||||||
|
value: jax.Array
|
||||||
|
done: jax.Array
|
||||||
|
truncated: jax.Array
|
||||||
|
importance_weight: jax.Array
|
||||||
|
info: dict[str, jax.Array]
|
||||||
|
|
||||||
|
|
||||||
|
class ReppoConfig(struct.PyTreeNode):
|
||||||
|
lr: float
|
||||||
|
gamma: float
|
||||||
|
total_time_steps: int
|
||||||
|
num_steps: int
|
||||||
|
lmbda: float
|
||||||
|
lmbda_min: float
|
||||||
|
num_mini_batches: int
|
||||||
|
num_envs: int
|
||||||
|
num_epochs: int
|
||||||
|
max_grad_norm: float | None
|
||||||
|
normalize_env: bool
|
||||||
|
polyak: float
|
||||||
|
exploration_noise_min: float
|
||||||
|
exploration_noise_max: float
|
||||||
|
exploration_base_envs: int
|
||||||
|
ent_start: float
|
||||||
|
ent_target_mult: float
|
||||||
|
kl_start: float
|
||||||
|
eval_interval: int = 10
|
||||||
|
num_eval: int = 25
|
||||||
|
max_episode_steps: int = 1000
|
||||||
|
critic_hidden_dim: int = 512
|
||||||
|
actor_hidden_dim: int = 512
|
||||||
|
vmin: int = -100
|
||||||
|
vmax: int = 100
|
||||||
|
num_bins: int = 250
|
||||||
|
hl_gauss: bool = False
|
||||||
|
kl_bound: float = 1.0
|
||||||
|
aux_loss_mult: float = 0.0
|
||||||
|
update_kl_lagrangian: bool = True
|
||||||
|
update_entropy_lagrangian: bool = True
|
||||||
|
use_critic_norm: bool = True
|
||||||
|
num_critic_encoder_layers: int = 1
|
||||||
|
num_critic_head_layers: int = 1
|
||||||
|
num_critic_pred_layers: int = 1
|
||||||
|
use_simplical_embedding: bool = False
|
||||||
|
use_actor_norm: bool = True
|
||||||
|
num_actor_layers: int = 2
|
||||||
|
actor_min_std: float = 0.05
|
||||||
|
reduce_kl: bool = True
|
||||||
|
reverse_kl: bool = False
|
||||||
|
anneal_lr: bool = False
|
||||||
|
actor_kl_clip_mode: str = "clipped"
|
||||||
|
|
||||||
|
|
||||||
|
class SACTrainState(struct.PyTreeNode):
|
||||||
|
critic: nnx.TrainState
|
||||||
|
actor: nnx.TrainState
|
||||||
|
actor_target: nnx.TrainState
|
||||||
|
iteration: int
|
||||||
|
time_steps: int
|
||||||
|
last_env_state: EnvState
|
||||||
|
last_obs: jax.Array
|
||||||
|
last_critic_obs: jax.Array
|
||||||
|
|
||||||
|
|
||||||
|
def make_policy(
|
||||||
|
train_state: SACTrainState,
|
||||||
|
) -> Callable[[jax.Array, jax.Array], tuple[jax.Array, dict]]:
|
||||||
|
def policy(key: PRNGKey, obs: jax.Array) -> tuple[jax.Array, dict]:
|
||||||
|
actor_model = nnx.merge(train_state.actor.graphdef, train_state.actor.params)
|
||||||
|
action: jax.Array = actor_model.det_action(obs)
|
||||||
|
return action, {}
|
||||||
|
|
||||||
|
return policy
|
||||||
|
|
||||||
|
|
||||||
|
def make_eval_fn(
|
||||||
|
env: Environment, max_episode_steps: int, reward_scale: float = 1.0
|
||||||
|
) -> Callable[[jax.random.PRNGKey, Policy, PyTreeNode | None], dict[str, float]]:
|
||||||
|
def evaluation_fn(
|
||||||
|
key: jax.random.PRNGKey, policy: Policy, norm_state: PyTreeNode | None
|
||||||
|
):
|
||||||
|
def step_env(carry, _):
|
||||||
|
key, env_state, obs = carry
|
||||||
|
key, act_key, env_key = jax.random.split(key, 3)
|
||||||
|
action, _ = policy(act_key, obs)
|
||||||
|
step_key = jax.random.split(env_key, env.num_envs)
|
||||||
|
obs, _, env_state, reward, done, info = env.step(
|
||||||
|
step_key, env_state, action
|
||||||
|
)
|
||||||
|
return (key, env_state, obs), info
|
||||||
|
|
||||||
|
key, init_key = jax.random.split(key)
|
||||||
|
init_key = jax.random.split(init_key, env.num_envs)
|
||||||
|
obs, _, env_state = env.reset(init_key, norm_state)
|
||||||
|
# randomize initial steps
|
||||||
|
key, env_key = jax.random.split(key)
|
||||||
|
_, infos = jax.lax.scan(
|
||||||
|
f=step_env,
|
||||||
|
init=(key, env_state, obs),
|
||||||
|
xs=None,
|
||||||
|
length=max_episode_steps,
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"episode_return": infos["returned_episode_returns"].mean(
|
||||||
|
where=infos["returned_episode"]
|
||||||
|
)
|
||||||
|
* reward_scale,
|
||||||
|
"episode_return_std": infos["returned_episode_returns"].std(
|
||||||
|
where=infos["returned_episode"]
|
||||||
|
),
|
||||||
|
"episode_length": infos["returned_episode_lengths"].mean(
|
||||||
|
where=infos["returned_episode"]
|
||||||
|
),
|
||||||
|
"episode_length_std": infos["returned_episode_lengths"].std(
|
||||||
|
where=infos["returned_episode"]
|
||||||
|
),
|
||||||
|
"num_episodes": infos["returned_episode"].sum(),
|
||||||
|
}
|
||||||
|
|
||||||
|
return evaluation_fn
|
||||||
|
|
||||||
|
|
||||||
|
def make_init(
|
||||||
|
cfg: ReppoConfig,
|
||||||
|
env: Environment,
|
||||||
|
env_params: EnvParams = None,
|
||||||
|
) -> Callable[[jax.Array], SACTrainState]:
|
||||||
|
def init(key: jax.random.PRNGKey) -> SACTrainState:
|
||||||
|
# Number of calls to train_step
|
||||||
|
key, model_key = jax.random.split(key)
|
||||||
|
actor_networks = SACActorNetworks(
|
||||||
|
obs_dim=env.observation_space(env_params)[0].shape[0],
|
||||||
|
action_dim=env.action_space(env_params).shape[0],
|
||||||
|
hidden_dim=cfg.actor_hidden_dim,
|
||||||
|
ent_start=cfg.ent_start,
|
||||||
|
kl_start=cfg.kl_start,
|
||||||
|
use_norm=cfg.use_actor_norm,
|
||||||
|
layers=cfg.num_actor_layers,
|
||||||
|
rngs=nnx.Rngs(model_key),
|
||||||
|
)
|
||||||
|
actor_target_networks = SACActorNetworks(
|
||||||
|
obs_dim=env.observation_space(env_params)[0].shape[0],
|
||||||
|
action_dim=env.action_space(env_params).shape[0],
|
||||||
|
hidden_dim=cfg.actor_hidden_dim,
|
||||||
|
ent_start=cfg.ent_start,
|
||||||
|
kl_start=cfg.kl_start,
|
||||||
|
use_norm=cfg.use_actor_norm,
|
||||||
|
layers=cfg.num_actor_layers,
|
||||||
|
rngs=nnx.Rngs(model_key),
|
||||||
|
)
|
||||||
|
|
||||||
|
if cfg.hl_gauss:
|
||||||
|
critic_networks: nnx.Module = CategoricalCriticNetwork(
|
||||||
|
obs_dim=env.observation_space(env_params)[1].shape[0],
|
||||||
|
action_dim=env.action_space(env_params).shape[0],
|
||||||
|
hidden_dim=cfg.critic_hidden_dim,
|
||||||
|
num_bins=cfg.num_bins,
|
||||||
|
vmin=cfg.vmin,
|
||||||
|
vmax=cfg.vmax,
|
||||||
|
use_norm=cfg.use_critic_norm,
|
||||||
|
encoder_layers=cfg.num_critic_encoder_layers,
|
||||||
|
use_simplical_embedding=cfg.use_simplical_embedding,
|
||||||
|
head_layers=cfg.num_critic_head_layers,
|
||||||
|
pred_layers=cfg.num_critic_pred_layers,
|
||||||
|
rngs=nnx.Rngs(model_key),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
critic_networks: nnx.Module = CriticNetwork(
|
||||||
|
obs_dim=env.observation_space(env_params)[1].shape[0],
|
||||||
|
action_dim=env.action_space(env_params).shape[0],
|
||||||
|
hidden_dim=cfg.critic_hidden_dim,
|
||||||
|
use_norm=cfg.use_critic_norm,
|
||||||
|
encoder_layers=cfg.num_critic_encoder_layers,
|
||||||
|
use_simplical_embedding=cfg.use_simplical_embedding,
|
||||||
|
head_layers=cfg.num_critic_head_layers,
|
||||||
|
pred_layers=cfg.num_critic_pred_layers,
|
||||||
|
rngs=nnx.Rngs(model_key),
|
||||||
|
)
|
||||||
|
|
||||||
|
if not cfg.anneal_lr:
|
||||||
|
lr = cfg.lr
|
||||||
|
else:
|
||||||
|
num_iterations = cfg.total_time_steps // cfg.num_steps // cfg.num_envs
|
||||||
|
num_updates = num_iterations * cfg.num_epochs * cfg.num_mini_batches
|
||||||
|
lr = optax.linear_schedule(cfg.lr, 0, num_updates)
|
||||||
|
|
||||||
|
if cfg.max_grad_norm is not None:
|
||||||
|
actor_optimizer = optax.chain(
|
||||||
|
optax.clip_by_global_norm(cfg.max_grad_norm), optax.adam(lr)
|
||||||
|
)
|
||||||
|
critic_optimizer = optax.chain(
|
||||||
|
optax.clip_by_global_norm(cfg.max_grad_norm), optax.adam(lr)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
actor_optimizer = optax.adam(lr)
|
||||||
|
critic_optimizer = optax.adam(lr)
|
||||||
|
|
||||||
|
actor_trainstate = nnx.TrainState.create(
|
||||||
|
graphdef=nnx.graphdef(actor_networks),
|
||||||
|
params=nnx.state(actor_networks),
|
||||||
|
tx=actor_optimizer,
|
||||||
|
)
|
||||||
|
actor_target_trainstate = nnx.TrainState.create(
|
||||||
|
graphdef=nnx.graphdef(actor_target_networks),
|
||||||
|
params=nnx.state(actor_target_networks),
|
||||||
|
tx=optax.set_to_zero(),
|
||||||
|
)
|
||||||
|
critic_trainstate = nnx.TrainState.create(
|
||||||
|
graphdef=nnx.graphdef(critic_networks),
|
||||||
|
params=nnx.state(critic_networks),
|
||||||
|
tx=critic_optimizer,
|
||||||
|
)
|
||||||
|
|
||||||
|
key, env_key = jax.random.split(key)
|
||||||
|
env_key = jax.random.split(env_key, cfg.num_envs)
|
||||||
|
obs, critic_obs, env_state = env.reset(key=env_key, params=env_params)
|
||||||
|
|
||||||
|
# randomize initial time step to prevent all envs stepping in tandem
|
||||||
|
_env_state = env_state.unwrapped()
|
||||||
|
key, randomize_steps_key = jax.random.split(key)
|
||||||
|
_env_state.info["steps"] = jax.random.randint(
|
||||||
|
randomize_steps_key,
|
||||||
|
_env_state.info["steps"].shape,
|
||||||
|
0,
|
||||||
|
cfg.max_episode_steps,
|
||||||
|
).astype(jnp.float32)
|
||||||
|
env_state.set_env_state(_env_state)
|
||||||
|
|
||||||
|
# mock_action = jnp.zeros(
|
||||||
|
# (1, 6), dtype=jnp.float32
|
||||||
|
# )
|
||||||
|
# print(mock_action.shape)
|
||||||
|
# print(obs.shape)
|
||||||
|
# print(nnx.tabulate(critic_networks, obs[:1], mock_action))
|
||||||
|
# print(nnx.tabulate(actor_networks, obs[:1]))
|
||||||
|
|
||||||
|
return SACTrainState(
|
||||||
|
actor=actor_trainstate,
|
||||||
|
actor_target=actor_target_trainstate,
|
||||||
|
critic=critic_trainstate,
|
||||||
|
iteration=0,
|
||||||
|
time_steps=0,
|
||||||
|
last_env_state=env_state,
|
||||||
|
last_obs=obs,
|
||||||
|
last_critic_obs=critic_obs,
|
||||||
|
)
|
||||||
|
|
||||||
|
return init
|
||||||
|
|
||||||
|
|
||||||
|
def make_train_fn(
|
||||||
|
cfg: ReppoConfig,
|
||||||
|
env: Environment,
|
||||||
|
env_params: EnvParams = None,
|
||||||
|
log_callback: Callable[[SACTrainState, dict[str, jax.Array]], None] | None = None,
|
||||||
|
num_seeds: int = 1,
|
||||||
|
reward_scale: float = 1.0,
|
||||||
|
):
|
||||||
|
env_params = env_params # or env.default_params
|
||||||
|
env = LogWrapper(env, cfg.num_envs)
|
||||||
|
env = ClipAction(env)
|
||||||
|
# env = VecEnv(env, cfg.num_envs)
|
||||||
|
if cfg.normalize_env:
|
||||||
|
env = NormalizeVec(env)
|
||||||
|
print(env)
|
||||||
|
eval_fn = make_eval_fn(env, cfg.max_episode_steps, reward_scale=reward_scale)
|
||||||
|
action_size_target = (
|
||||||
|
jnp.prod(jnp.array(env.action_space(env_params).shape)) * cfg.ent_target_mult
|
||||||
|
)
|
||||||
|
|
||||||
|
def collect_rollout(
|
||||||
|
key: PRNGKey, train_state: SACTrainState
|
||||||
|
) -> tuple[Transition, SACTrainState]:
|
||||||
|
actor_model = nnx.merge(train_state.actor.graphdef, train_state.actor.params)
|
||||||
|
critic_model = nnx.merge(train_state.critic.graphdef, train_state.critic.params)
|
||||||
|
|
||||||
|
offset = (
|
||||||
|
jnp.arange(cfg.num_envs - cfg.exploration_base_envs)[:, None]
|
||||||
|
* (cfg.exploration_noise_max - cfg.exploration_noise_min)
|
||||||
|
/ (cfg.num_envs - cfg.exploration_base_envs)
|
||||||
|
) + cfg.exploration_noise_min
|
||||||
|
offset = jnp.concatenate(
|
||||||
|
[
|
||||||
|
jnp.ones((cfg.exploration_base_envs, 1)) * cfg.exploration_noise_min,
|
||||||
|
offset,
|
||||||
|
],
|
||||||
|
axis=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
def step_env(carry, _) -> tuple[tuple, Transition]:
|
||||||
|
key, env_state, train_state, obs, critic_obs = carry
|
||||||
|
key, act_key, step_key = jax.random.split(key, 3)
|
||||||
|
step_key = jax.random.split(step_key, cfg.num_envs)
|
||||||
|
|
||||||
|
# get policy action
|
||||||
|
og_pi = actor_model.actor(obs)
|
||||||
|
pi = actor_model.actor(obs, scale=offset)
|
||||||
|
action = pi.sample(seed=act_key)
|
||||||
|
|
||||||
|
next_obs, next_critic_obs, next_env_state, reward, done, info = env.step(
|
||||||
|
step_key, env_state, action
|
||||||
|
)
|
||||||
|
|
||||||
|
# compute importance weights
|
||||||
|
action = jnp.clip(action, -0.999, 0.999)
|
||||||
|
raw_importance_weight = jnp.nan_to_num(
|
||||||
|
og_pi.log_prob(action).sum(-1) - pi.log_prob(action).sum(-1),
|
||||||
|
nan=jnp.log(cfg.lmbda_min),
|
||||||
|
)
|
||||||
|
importance_weight = jnp.clip(
|
||||||
|
raw_importance_weight, min=jnp.log(cfg.lmbda_min), max=jnp.log(1.0)
|
||||||
|
)
|
||||||
|
|
||||||
|
# compute next state embedding and value
|
||||||
|
next_action, log_prob = actor_model.actor(next_obs).sample_and_log_prob(
|
||||||
|
seed=act_key
|
||||||
|
)
|
||||||
|
next_emb, value = critic_model.forward(next_critic_obs, next_action)
|
||||||
|
reward = (
|
||||||
|
reward
|
||||||
|
- cfg.gamma * log_prob.sum(-1).squeeze() * actor_model.temperature()
|
||||||
|
)
|
||||||
|
transition = Transition(
|
||||||
|
obs=obs,
|
||||||
|
critic_obs=critic_obs,
|
||||||
|
action=action,
|
||||||
|
next_emb=next_emb,
|
||||||
|
reward=reward,
|
||||||
|
value=value,
|
||||||
|
done=done,
|
||||||
|
truncated=next_env_state.truncated,
|
||||||
|
info=info,
|
||||||
|
importance_weight=importance_weight,
|
||||||
|
)
|
||||||
|
return (
|
||||||
|
key,
|
||||||
|
next_env_state,
|
||||||
|
train_state,
|
||||||
|
next_obs,
|
||||||
|
next_critic_obs,
|
||||||
|
), transition
|
||||||
|
|
||||||
|
rollout_state, transitions = jax.lax.scan(
|
||||||
|
f=step_env,
|
||||||
|
init=(
|
||||||
|
key,
|
||||||
|
train_state.last_env_state,
|
||||||
|
train_state,
|
||||||
|
train_state.last_obs,
|
||||||
|
train_state.last_critic_obs,
|
||||||
|
),
|
||||||
|
length=cfg.num_steps,
|
||||||
|
)
|
||||||
|
_, last_env_state, train_state, last_obs, last_critic_obs = rollout_state
|
||||||
|
train_state = train_state.replace(
|
||||||
|
last_env_state=last_env_state,
|
||||||
|
last_obs=last_obs,
|
||||||
|
last_critic_obs=last_critic_obs,
|
||||||
|
time_steps=train_state.time_steps + cfg.num_steps * cfg.num_envs,
|
||||||
|
)
|
||||||
|
|
||||||
|
return transitions, train_state
|
||||||
|
|
||||||
|
def learn_step(
|
||||||
|
key: PRNGKey, train_state: SACTrainState, batch: Transition
|
||||||
|
) -> tuple[SACTrainState, dict[str, jax.Array]]:
|
||||||
|
# compute n-step lambda estimates
|
||||||
|
|
||||||
|
def compute_nstep_lambda(carry, transition):
|
||||||
|
lambda_return, truncated, importance_weight = carry
|
||||||
|
# combine importance_weights with TD lambda
|
||||||
|
done = transition.done
|
||||||
|
reward = transition.reward
|
||||||
|
value = transition.value
|
||||||
|
lambda_sum = (
|
||||||
|
jnp.exp(importance_weight) * cfg.lmbda * lambda_return
|
||||||
|
+ (1 - jnp.exp(importance_weight) * cfg.lmbda) * value
|
||||||
|
)
|
||||||
|
delta = cfg.gamma * jnp.where(truncated, value, (1.0 - done) * lambda_sum)
|
||||||
|
lambda_return = reward + delta
|
||||||
|
truncated = transition.truncated
|
||||||
|
return (
|
||||||
|
lambda_return,
|
||||||
|
truncated,
|
||||||
|
transition.importance_weight,
|
||||||
|
), lambda_return
|
||||||
|
|
||||||
|
_, target_values = jax.lax.scan(
|
||||||
|
compute_nstep_lambda,
|
||||||
|
(
|
||||||
|
batch.value[-1],
|
||||||
|
jnp.ones_like(batch.truncated[0]),
|
||||||
|
jnp.zeros_like(batch.importance_weight[0]),
|
||||||
|
),
|
||||||
|
batch,
|
||||||
|
reverse=True,
|
||||||
|
)
|
||||||
|
# Reshape data to (num_steps * num_envs, ...)
|
||||||
|
jax.debug.print("num trunc {}", batch.truncated.sum(), ordered=True)
|
||||||
|
data = (batch, target_values)
|
||||||
|
data = jax.tree.map(
|
||||||
|
lambda x: x.reshape((cfg.num_steps * cfg.num_envs, *x.shape[2:])), data
|
||||||
|
)
|
||||||
|
# jax.debug.print("whole data {}", data[0].truncated.sum(), ordered=True)
|
||||||
|
|
||||||
|
train_state = train_state.replace(
|
||||||
|
actor_target=train_state.actor_target.replace(
|
||||||
|
params=train_state.actor.params
|
||||||
|
),
|
||||||
|
)
|
||||||
|
actor_target_model = nnx.merge(
|
||||||
|
train_state.actor_target.graphdef, train_state.actor_target.params
|
||||||
|
)
|
||||||
|
|
||||||
|
def update(train_state, key) -> tuple[SACTrainState, dict[str, jax.Array]]:
|
||||||
|
def minibatch_update(carry, indices):
|
||||||
|
idx, train_state = carry
|
||||||
|
# Sample data at indices from the batch
|
||||||
|
minibatch, target_values = jax.tree.map(
|
||||||
|
lambda x: jnp.take(x, indices, axis=0), data
|
||||||
|
)
|
||||||
|
|
||||||
|
def critic_loss_fn(params):
|
||||||
|
critic_model = nnx.merge(train_state.critic.graphdef, params)
|
||||||
|
critic_pred = critic_model.critic_cat(
|
||||||
|
minibatch.critic_obs, minibatch.action
|
||||||
|
).squeeze()
|
||||||
|
if cfg.hl_gauss:
|
||||||
|
target_cat = jax.vmap(
|
||||||
|
utils.hl_gauss, in_axes=(0, None, None, None)
|
||||||
|
)(target_values, cfg.num_bins, cfg.vmin, cfg.vmax)
|
||||||
|
critic_update_loss = optax.softmax_cross_entropy(
|
||||||
|
critic_pred, target_cat
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
critic_update_loss = optax.squared_error(
|
||||||
|
critic_pred,
|
||||||
|
target_values,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Aux loss
|
||||||
|
pred, value = critic_model.forward(
|
||||||
|
minibatch.critic_obs, minibatch.action
|
||||||
|
)
|
||||||
|
aux_loss = jnp.mean(
|
||||||
|
(1 - minibatch.done.reshape(-1, 1))
|
||||||
|
* (pred - minibatch.next_emb) ** 2,
|
||||||
|
axis=-1,
|
||||||
|
)
|
||||||
|
|
||||||
|
# compute l2 error for logging
|
||||||
|
critic_loss = optax.squared_error(
|
||||||
|
value,
|
||||||
|
target_values,
|
||||||
|
)
|
||||||
|
critic_loss = jnp.mean(critic_loss)
|
||||||
|
loss = jnp.mean(
|
||||||
|
(1.0 - minibatch.truncated)
|
||||||
|
* (critic_update_loss + cfg.aux_loss_mult * aux_loss)
|
||||||
|
)
|
||||||
|
return loss, dict(
|
||||||
|
value_loss=critic_loss,
|
||||||
|
critic_update_loss=critic_update_loss,
|
||||||
|
loss=loss,
|
||||||
|
aux_loss=aux_loss,
|
||||||
|
q=critic_pred.mean(),
|
||||||
|
abs_batch_action=jnp.abs(minibatch.action).mean(),
|
||||||
|
reward_mean=minibatch.reward.mean(),
|
||||||
|
target_values=target_values.mean(),
|
||||||
|
)
|
||||||
|
|
||||||
|
def actor_loss(params):
|
||||||
|
critic_target_model = nnx.merge(
|
||||||
|
train_state.critic.graphdef,
|
||||||
|
train_state.critic.params,
|
||||||
|
)
|
||||||
|
actor_model = nnx.merge(train_state.actor.graphdef, params)
|
||||||
|
|
||||||
|
# SAC actor loss
|
||||||
|
pi = actor_model.actor(minibatch.obs)
|
||||||
|
pred_action, log_prob = pi.sample_and_log_prob(seed=key)
|
||||||
|
value = critic_target_model.critic(
|
||||||
|
minibatch.critic_obs, pred_action
|
||||||
|
)
|
||||||
|
log_prob = log_prob.sum(-1)
|
||||||
|
entropy = -log_prob
|
||||||
|
|
||||||
|
# policy KL constraint
|
||||||
|
if cfg.reverse_kl:
|
||||||
|
pi_action, pi_act_log_prob = pi.sample_and_log_prob(
|
||||||
|
sample_shape=(16,), seed=key
|
||||||
|
)
|
||||||
|
pi_action = jnp.clip(pi_action, -1 + 1e-4, 1 - 1e-4)
|
||||||
|
|
||||||
|
old_pi = actor_target_model.actor(minibatch.obs)
|
||||||
|
|
||||||
|
old_pi_act_log_prob = old_pi.log_prob(pi_action).sum(-1).mean(0)
|
||||||
|
pi_act_log_prob = pi_act_log_prob.sum(-1).mean(0)
|
||||||
|
kl = pi_act_log_prob - old_pi_act_log_prob
|
||||||
|
else:
|
||||||
|
old_pi_action, old_pi_act_log_prob = actor_target_model.actor(
|
||||||
|
minibatch.obs
|
||||||
|
).sample_and_log_prob(sample_shape=(16,), seed=key)
|
||||||
|
old_pi_action = jnp.clip(old_pi_action, -1 + 1e-4, 1 - 1e-4)
|
||||||
|
|
||||||
|
old_pi_act_log_prob = old_pi_act_log_prob.sum(-1).mean(0)
|
||||||
|
pi_act_log_prob = pi.log_prob(old_pi_action).sum(-1).mean(0)
|
||||||
|
|
||||||
|
kl = old_pi_act_log_prob - pi_act_log_prob
|
||||||
|
|
||||||
|
lagrangian = actor_model.lagrangian()
|
||||||
|
|
||||||
|
if cfg.actor_kl_clip_mode == "full":
|
||||||
|
actor_loss = (
|
||||||
|
log_prob * jax.lax.stop_gradient(actor_model.temperature())
|
||||||
|
- value
|
||||||
|
+ kl * jax.lax.stop_gradient(lagrangian) * cfg.reduce_kl
|
||||||
|
)
|
||||||
|
elif cfg.actor_kl_clip_mode == "clipped":
|
||||||
|
actor_loss = jnp.where(
|
||||||
|
kl < cfg.kl_bound,
|
||||||
|
log_prob * jax.lax.stop_gradient(actor_model.temperature())
|
||||||
|
- value,
|
||||||
|
kl * jax.lax.stop_gradient(lagrangian) * cfg.reduce_kl,
|
||||||
|
)
|
||||||
|
elif cfg.actor_kl_clip_mode == "value":
|
||||||
|
actor_loss = (
|
||||||
|
log_prob * jax.lax.stop_gradient(actor_model.temperature())
|
||||||
|
- value
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"Unknown actor loss mode: {cfg.actor_kl_clip_mode}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# SAC target entropy loss
|
||||||
|
target_entropy = action_size_target + entropy
|
||||||
|
target_entropy_loss = (
|
||||||
|
actor_model.temperature()
|
||||||
|
* jax.lax.stop_gradient(target_entropy)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Lagrangian constraint (follows temperature update)
|
||||||
|
lagrangian_loss = -lagrangian * jax.lax.stop_gradient(
|
||||||
|
kl - cfg.kl_bound
|
||||||
|
)
|
||||||
|
|
||||||
|
# total loss
|
||||||
|
loss = jnp.mean(actor_loss)
|
||||||
|
if cfg.update_entropy_lagrangian:
|
||||||
|
loss += jnp.mean(target_entropy_loss)
|
||||||
|
if cfg.update_kl_lagrangian:
|
||||||
|
loss += jnp.mean(lagrangian_loss)
|
||||||
|
|
||||||
|
return loss, dict(
|
||||||
|
actor_loss=actor_loss,
|
||||||
|
loss=loss,
|
||||||
|
temp=actor_model.temperature(),
|
||||||
|
abs_batch_action=jnp.abs(minibatch.action).mean(),
|
||||||
|
abs_pred_action=jnp.abs(pred_action).mean(),
|
||||||
|
reward_mean=minibatch.reward.mean(),
|
||||||
|
kl=kl.mean(),
|
||||||
|
lagrangian=lagrangian,
|
||||||
|
lagrangian_loss=lagrangian_loss,
|
||||||
|
entropy=entropy,
|
||||||
|
entropy_loss=target_entropy_loss,
|
||||||
|
target_values=target_values.mean(),
|
||||||
|
)
|
||||||
|
|
||||||
|
critic_grad_fn = jax.value_and_grad(critic_loss_fn, has_aux=True)
|
||||||
|
output, grads = critic_grad_fn(train_state.critic.params)
|
||||||
|
critic_train_state = train_state.critic.apply_gradients(grads)
|
||||||
|
train_state = train_state.replace(
|
||||||
|
critic=critic_train_state,
|
||||||
|
)
|
||||||
|
critic_metrics = output[1]
|
||||||
|
|
||||||
|
actor_grad_fn = jax.value_and_grad(actor_loss, has_aux=True)
|
||||||
|
output, grads = actor_grad_fn(train_state.actor.params)
|
||||||
|
actor_train_state = train_state.actor.apply_gradients(grads)
|
||||||
|
train_state = train_state.replace(
|
||||||
|
actor=actor_train_state,
|
||||||
|
)
|
||||||
|
actor_metrics = output[1]
|
||||||
|
return (idx + 1, train_state), {
|
||||||
|
**critic_metrics,
|
||||||
|
**actor_metrics,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Shuffle data and split into mini-batches
|
||||||
|
key, shuffle_key = jax.random.split(key)
|
||||||
|
mini_batch_size = (cfg.num_steps * cfg.num_envs) // cfg.num_mini_batches
|
||||||
|
indices = jax.random.permutation(shuffle_key, cfg.num_steps * cfg.num_envs)
|
||||||
|
minibatch_idxs = jax.tree.map(
|
||||||
|
lambda x: x.reshape(
|
||||||
|
(cfg.num_mini_batches, mini_batch_size, *x.shape[1:])
|
||||||
|
),
|
||||||
|
indices,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Run model update for each mini-batch
|
||||||
|
train_state, metrics = jax.lax.scan(
|
||||||
|
minibatch_update, train_state, minibatch_idxs
|
||||||
|
)
|
||||||
|
# Compute mean metrics across mini-batches
|
||||||
|
metrics = jax.tree.map(lambda x: x.mean(0), metrics)
|
||||||
|
return train_state, metrics
|
||||||
|
|
||||||
|
# Update the model for a number of epochs
|
||||||
|
key, train_key = jax.random.split(key)
|
||||||
|
(_, train_state), update_metrics = jax.lax.scan(
|
||||||
|
f=update,
|
||||||
|
init=(1, train_state),
|
||||||
|
xs=jax.random.split(train_key, cfg.num_epochs),
|
||||||
|
)
|
||||||
|
# Get metrics from the last epoch
|
||||||
|
update_metrics = jax.tree.map(lambda x: x[-1], update_metrics)
|
||||||
|
|
||||||
|
return train_state, update_metrics
|
||||||
|
|
||||||
|
def train_fn(key: PRNGKey, cfg: ReppoConfig) -> tuple[SACTrainState, dict]:
|
||||||
|
def train_eval_step(key, train_state):
|
||||||
|
def train_step(
|
||||||
|
state: SACTrainState, key: PRNGKey
|
||||||
|
) -> tuple[SACTrainState, dict[str, jax.Array]]:
|
||||||
|
key, rollout_key, learn_key = jax.random.split(key, 3)
|
||||||
|
transitions, state = collect_rollout(key=rollout_key, train_state=state)
|
||||||
|
state, update_metrics = learn_step(
|
||||||
|
key=learn_key, train_state=state, batch=transitions
|
||||||
|
)
|
||||||
|
metrics = {**update_metrics, **update_metrics}
|
||||||
|
state = state.replace(iteration=state.iteration + 1)
|
||||||
|
return state, metrics
|
||||||
|
|
||||||
|
train_key, eval_key = jax.random.split(key)
|
||||||
|
eval_interval = int(
|
||||||
|
(cfg.total_time_steps / (cfg.num_steps * cfg.num_envs)) // cfg.num_eval
|
||||||
|
)
|
||||||
|
train_state, train_metrics = jax.lax.scan(
|
||||||
|
f=train_step,
|
||||||
|
init=train_state,
|
||||||
|
xs=jax.random.split(train_key, eval_interval),
|
||||||
|
)
|
||||||
|
train_metrics = jax.tree.map(lambda x: x[-1], train_metrics)
|
||||||
|
policy = make_policy(train_state)
|
||||||
|
if cfg.normalize_env:
|
||||||
|
norm_state = train_state.last_env_state
|
||||||
|
else:
|
||||||
|
norm_state = None
|
||||||
|
eval_metrics = eval_fn(eval_key, policy, norm_state)
|
||||||
|
train_returns = {
|
||||||
|
"train/episode_return": train_state.last_env_state.info[
|
||||||
|
"returned_episode_returns"
|
||||||
|
].mean(),
|
||||||
|
"train/episode_length": train_state.last_env_state.info[
|
||||||
|
"returned_episode_lengths"
|
||||||
|
].mean(),
|
||||||
|
}
|
||||||
|
metrics = {
|
||||||
|
"time_step": train_state.time_steps,
|
||||||
|
**utils.prefix_dict("train", train_metrics),
|
||||||
|
**utils.prefix_dict("eval", eval_metrics),
|
||||||
|
**train_returns,
|
||||||
|
}
|
||||||
|
return train_state, metrics
|
||||||
|
|
||||||
|
def loop_body(
|
||||||
|
train_state: SACTrainState, key: PRNGKey
|
||||||
|
) -> tuple[SACTrainState, dict]:
|
||||||
|
key, subkey = jax.random.split(key)
|
||||||
|
train_state, metrics = jax.vmap(train_eval_step)(
|
||||||
|
jax.random.split(subkey, num_seeds), train_state
|
||||||
|
)
|
||||||
|
jax.debug.callback(log_callback, train_state, metrics)
|
||||||
|
return train_state, metrics
|
||||||
|
|
||||||
|
eval_interval = int(
|
||||||
|
(cfg.total_time_steps / (cfg.num_steps * cfg.num_envs)) // cfg.num_eval
|
||||||
|
)
|
||||||
|
num_train_steps = cfg.total_time_steps // (cfg.num_steps * cfg.num_envs)
|
||||||
|
num_iterations = num_train_steps // eval_interval + int(
|
||||||
|
num_train_steps % eval_interval != 0
|
||||||
|
)
|
||||||
|
key, init_key = jax.random.split(key)
|
||||||
|
train_state = jax.vmap(make_init(cfg, env, env_params))(
|
||||||
|
jax.random.split(init_key, num_seeds)
|
||||||
|
)
|
||||||
|
keys = jax.random.split(key, num_iterations)
|
||||||
|
state, metrics = jax.lax.scan(f=loop_body, init=train_state, xs=keys)
|
||||||
|
return state, metrics
|
||||||
|
|
||||||
|
return train_fn
|
||||||
|
|
||||||
|
|
||||||
|
def plot_history(history: list[dict[str, jax.Array]]):
|
||||||
|
steps = jnp.array([m["time_step"][0] for m in history])
|
||||||
|
eval_return = jnp.array([m["eval/episode_return"].mean() for m in history])
|
||||||
|
eval_return_std = jnp.array([m["eval/episode_return"].std() for m in history])
|
||||||
|
fig = go.Figure(
|
||||||
|
[
|
||||||
|
go.Scatter(
|
||||||
|
x=steps,
|
||||||
|
y=eval_return,
|
||||||
|
name="Mean Episode Return",
|
||||||
|
mode="lines",
|
||||||
|
line=dict(color="blue"),
|
||||||
|
showlegend=False,
|
||||||
|
),
|
||||||
|
go.Scatter(
|
||||||
|
x=steps,
|
||||||
|
y=eval_return + eval_return_std,
|
||||||
|
name="Upper Bound",
|
||||||
|
mode="lines",
|
||||||
|
line=dict(width=0),
|
||||||
|
showlegend=False,
|
||||||
|
),
|
||||||
|
go.Scatter(
|
||||||
|
x=steps,
|
||||||
|
y=eval_return - eval_return_std,
|
||||||
|
name="Lower Bound",
|
||||||
|
mode="lines",
|
||||||
|
line=dict(width=0),
|
||||||
|
fill="tonexty",
|
||||||
|
fillcolor="rgba(50, 127, 168, 0.3)",
|
||||||
|
showlegend=False,
|
||||||
|
),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
fig.update_layout(
|
||||||
|
xaxis=dict(title=dict(text="Environment Steps")),
|
||||||
|
)
|
||||||
|
|
||||||
|
return fig
|
||||||
|
|
||||||
|
|
||||||
|
# type object
|
||||||
|
def _get_optuna_type(trial: optuna.Trial, name, values: list):
|
||||||
|
if all(isinstance(v, int) for v in values):
|
||||||
|
return trial.suggest_int(name, low=min(values), high=max(values))
|
||||||
|
elif all(isinstance(v, float) for v in values):
|
||||||
|
return trial.suggest_float(name, low=min(values), high=max(values))
|
||||||
|
elif all(isinstance(v, str) for v in values):
|
||||||
|
return trial.suggest_categorical(name, values)
|
||||||
|
elif all(isinstance(v, bool) for v in values):
|
||||||
|
return trial.suggest_categorical(name, [True, False])
|
||||||
|
else:
|
||||||
|
raise ValueError("Values must be of the same type (int, float, or str).")
|
||||||
|
|
||||||
|
|
||||||
|
def run(cfg: DictConfig, trial: optuna.Trial | None) -> float:
|
||||||
|
"""
|
||||||
|
Run a single trial of the SAC training process with hyperparameter tuning.
|
||||||
|
Args:
|
||||||
|
cfg (DictConfig): Configuration for the SAC training.
|
||||||
|
trial (optuna.Trial | None): Optuna trial object for hyperparameter tuning.
|
||||||
|
Returns:
|
||||||
|
float: The mean episode return from the trial.
|
||||||
|
"""
|
||||||
|
sweep_metrics = []
|
||||||
|
|
||||||
|
if trial is not None:
|
||||||
|
# Set hyperparameters from the trial
|
||||||
|
for name, values in cfg.trial_spec.items():
|
||||||
|
if name in cfg.hyperparameters:
|
||||||
|
sampled_value = _get_optuna_type(trial, name, values)
|
||||||
|
# TODO: Why the fuck is this happening
|
||||||
|
if isinstance(sampled_value, np.float64):
|
||||||
|
sampled_value = float(sampled_value)
|
||||||
|
cfg.hyperparameters[name] = sampled_value
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Hyperparameter {name} not found in config.")
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open("completed_trials.txt", "r") as f:
|
||||||
|
completed_trials = int(f.read())
|
||||||
|
except FileNotFoundError:
|
||||||
|
completed_trials = 0
|
||||||
|
|
||||||
|
metric_history = []
|
||||||
|
|
||||||
|
def log_callback(state, metrics):
|
||||||
|
metrics["sys_time"] = time.perf_counter()
|
||||||
|
if len(metric_history) > 0:
|
||||||
|
num_env_steps = state.time_steps[0] - metric_history[-1]["time_step"][0]
|
||||||
|
seconds = metrics["sys_time"] - metric_history[-1]["sys_time"]
|
||||||
|
sps = num_env_steps / seconds
|
||||||
|
else:
|
||||||
|
sps = 0
|
||||||
|
|
||||||
|
metric_history.append(metrics)
|
||||||
|
episode_return = metrics["eval/episode_return"].mean()
|
||||||
|
eval_length = metrics["eval/episode_length"].mean()
|
||||||
|
logging.info(
|
||||||
|
f"step={state.time_steps[0]} episode_return={episode_return:.3f}, episode_length={eval_length:.3f} sps={sps:.2f}"
|
||||||
|
)
|
||||||
|
log_data = {
|
||||||
|
"eval/episode_return": episode_return,
|
||||||
|
"eval/episode_length": eval_length,
|
||||||
|
**jax.tree.map(jnp.mean, utils.filter_prefix("train", metrics)),
|
||||||
|
}
|
||||||
|
wandb.log(log_data, step=state.time_steps[0])
|
||||||
|
|
||||||
|
# Set up the experiment
|
||||||
|
if cfg.env.type == "brax":
|
||||||
|
env = BraxGymnaxWrapper(
|
||||||
|
cfg.env.name,
|
||||||
|
episode_length=cfg.env.max_episode_steps,
|
||||||
|
reward_scaling=cfg.env.reward_scaling,
|
||||||
|
terminate=cfg.env.terminate,
|
||||||
|
)
|
||||||
|
elif cfg.env.type == "mjx":
|
||||||
|
env = MjxGymnaxWrapper(
|
||||||
|
cfg.env.name,
|
||||||
|
episode_length=cfg.env.max_episode_steps,
|
||||||
|
reward_scale=cfg.env.reward_scaling,
|
||||||
|
push_distractions=cfg.env.get("push_distractions", False),
|
||||||
|
asymmetric_observation=cfg.env.get("asymmetric_observation", False),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unknown environment type: {cfg.env.type}")
|
||||||
|
|
||||||
|
# build algo config with overrides
|
||||||
|
|
||||||
|
train_fn = make_train_fn(
|
||||||
|
cfg=ReppoConfig(**cfg.hyperparameters),
|
||||||
|
env=env,
|
||||||
|
log_callback=log_callback,
|
||||||
|
num_seeds=cfg.num_seeds,
|
||||||
|
reward_scale=1.0 / cfg.env.reward_scaling,
|
||||||
|
)
|
||||||
|
|
||||||
|
for i in range(completed_trials, cfg.num_trials):
|
||||||
|
cfg.seed = cfg.seed + i
|
||||||
|
|
||||||
|
wandb.init(
|
||||||
|
mode=cfg.wandb.mode,
|
||||||
|
project=cfg.wandb.project,
|
||||||
|
entity=cfg.wandb.entity,
|
||||||
|
tags=[
|
||||||
|
cfg.name,
|
||||||
|
cfg.env.name,
|
||||||
|
cfg.env.type,
|
||||||
|
"hp_tune" if trial is not None else "val",
|
||||||
|
*cfg.tags,
|
||||||
|
],
|
||||||
|
config=OmegaConf.to_container(cfg),
|
||||||
|
name=f"resampling-{cfg.name}-{cfg.env.name.lower()}",
|
||||||
|
save_code=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
logging.info(OmegaConf.to_yaml(cfg))
|
||||||
|
|
||||||
|
key = jax.random.PRNGKey(cfg.seed)
|
||||||
|
start = time.perf_counter()
|
||||||
|
_, metrics = jax.jit(train_fn, static_argnums=(1,))(
|
||||||
|
key, ReppoConfig(**cfg.hyperparameters)
|
||||||
|
)
|
||||||
|
jax.block_until_ready(metrics)
|
||||||
|
duration = time.perf_counter() - start
|
||||||
|
|
||||||
|
# Save metrics and finish the run
|
||||||
|
logging.info(f"Training took {duration:.2f} seconds.")
|
||||||
|
jnp.savez("metrics.npz", **metrics)
|
||||||
|
wandb.finish()
|
||||||
|
|
||||||
|
sweep_metrics.append(metrics["eval/episode_return"])
|
||||||
|
|
||||||
|
with open("completed_trials.txt", "w") as f:
|
||||||
|
f.write(str(i))
|
||||||
|
|
||||||
|
sweep_metrics_array = jnp.array(sweep_metrics)
|
||||||
|
return (0.1 * sweep_metrics_array.mean() + sweep_metrics_array[:, -1].mean()).item()
|
||||||
|
|
||||||
|
|
||||||
|
@hydra.main(version_base=None, config_path="../../config", config_name="sac")
|
||||||
|
def main(cfg: DictConfig):
|
||||||
|
cfg.hyperparameters = OmegaConf.merge(cfg.hyperparameters, cfg.experiment_overrides)
|
||||||
|
|
||||||
|
run(cfg, trial=None)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
136
reppo/jaxrl/utils.py
Normal file
136
reppo/jaxrl/utils.py
Normal file
@ -0,0 +1,136 @@
|
|||||||
|
import distrax
|
||||||
|
import flax
|
||||||
|
import jax
|
||||||
|
import jax.numpy as jnp
|
||||||
|
|
||||||
|
|
||||||
|
def describe(values: jnp.ndarray, axis: tuple | int = 0) -> dict[str, jnp.ndarray]:
|
||||||
|
"""Compute basic statistics for a batch of values."""
|
||||||
|
return {
|
||||||
|
"mean": jnp.mean(values, axis=axis),
|
||||||
|
"std": jnp.std(values, axis=axis),
|
||||||
|
"min": jnp.min(values, axis=axis),
|
||||||
|
"max": jnp.max(values, axis=axis),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def merge_dicts(*prefix_dicts: tuple[str, dict], sep: str = "/") -> dict:
|
||||||
|
"""Merge metric dictionaries with a prefix for each key."""
|
||||||
|
return {
|
||||||
|
f"{prefix if prefix else ''}{sep if prefix else ''}{key}": value
|
||||||
|
for prefix, metrics in prefix_dicts
|
||||||
|
for key, value in metrics.items()
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def prefix_dict(prefix: str, metrics: dict, sep: str = "/") -> dict:
|
||||||
|
"""Add a prefix to all keys in a dictionary."""
|
||||||
|
return {f"{prefix}{sep}{key}": value for key, value in metrics.items()}
|
||||||
|
|
||||||
|
|
||||||
|
def postfix_dict(postfix: str, metrics: dict, sep: str = "/") -> dict:
|
||||||
|
"""Add a postfix to all keys in a dictionary."""
|
||||||
|
return {f"{key}{sep}{postfix}": value for key, value in metrics.items()}
|
||||||
|
|
||||||
|
|
||||||
|
def filter_prefix(prefix: str, metrics: dict, sep: str = "/") -> dict:
|
||||||
|
"""Filter keys in a dictionary by a prefix."""
|
||||||
|
return {
|
||||||
|
key: value for key, value in metrics.items() if key.startswith(prefix + sep)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def hl_gauss(inp, num_bins, vmin, vmax, epsilon=0.0):
|
||||||
|
"""Converts a batch of scalars to soft two-hot encoded targets for discrete regression."""
|
||||||
|
x = jnp.clip(inp, vmin, max=vmax).squeeze() / (1 - epsilon)
|
||||||
|
bin_width = (vmax - vmin) / (num_bins - 1)
|
||||||
|
sigma_to_final_sigma_ratio = 0.75
|
||||||
|
support = jnp.linspace(
|
||||||
|
vmin - bin_width / 2, vmax + bin_width / 2, num_bins + 1, dtype=jnp.float32
|
||||||
|
)
|
||||||
|
sigma = bin_width * sigma_to_final_sigma_ratio
|
||||||
|
cdf_evals = jax.scipy.special.erf((support - x) / (jnp.sqrt(2) * sigma))
|
||||||
|
z = cdf_evals[-1] - cdf_evals[0]
|
||||||
|
target_probs = cdf_evals[1:] - cdf_evals[:-1]
|
||||||
|
target_probs = (target_probs / z).reshape(*inp.shape[:-1], num_bins)
|
||||||
|
|
||||||
|
uniform = jnp.ones_like(target_probs) / num_bins
|
||||||
|
|
||||||
|
return (1 - epsilon) * target_probs + epsilon * uniform
|
||||||
|
|
||||||
|
|
||||||
|
@flax.struct.dataclass
|
||||||
|
class MultiSampleLogProb:
|
||||||
|
policy_action: jax.Array
|
||||||
|
policy_action_log_prob: jax.Array
|
||||||
|
action: jax.Array
|
||||||
|
|
||||||
|
|
||||||
|
def fast_multi_log_prob(
|
||||||
|
key: jax.Array,
|
||||||
|
loc: jax.Array,
|
||||||
|
scale: jax.Array,
|
||||||
|
offset_scale: jax.Array,
|
||||||
|
) -> MultiSampleLogProb:
|
||||||
|
"""Computes 3 samples from a tanh squashed function
|
||||||
|
- transformed loc and log_prob
|
||||||
|
- sample with base scale
|
||||||
|
- sample with scaled scale
|
||||||
|
Args:
|
||||||
|
key: JAX PRNG key.
|
||||||
|
loc: Location of the distribution.
|
||||||
|
scale: Scale parameter of the distribution.
|
||||||
|
offset_scale: Offset scale for the distribution.
|
||||||
|
"""
|
||||||
|
# log det factor
|
||||||
|
|
||||||
|
# sample base gaussian noise with log prob
|
||||||
|
base_noise, base_log_prob = distrax.Normal(
|
||||||
|
jnp.zeros_like(loc), scale
|
||||||
|
).sample_and_log_prob(seed=key)
|
||||||
|
base_log_prob = jnp.sum(base_log_prob, axis=-1)
|
||||||
|
|
||||||
|
# sample with base scale
|
||||||
|
base_sample = loc + base_noise
|
||||||
|
base_sample_transformed = jnp.tanh(base_sample)
|
||||||
|
# numerically stable jax tanh det jacobian https://github.com/tensorflow/probability/commit/ef6bb176e0ebd1cf6e25c6b5cecdd2428c22963f#diff-e120f70e92e6741bca649f04fcd907b7
|
||||||
|
base_log_prob -= jnp.sum(
|
||||||
|
2.0 * (jnp.log(2.0) - base_sample - jax.nn.softplus(-2.0 * base_sample)),
|
||||||
|
axis=-1,
|
||||||
|
)
|
||||||
|
|
||||||
|
return MultiSampleLogProb(
|
||||||
|
policy_action=base_sample_transformed,
|
||||||
|
policy_action_log_prob=base_log_prob,
|
||||||
|
action=jnp.tanh(loc + offset_scale * base_noise),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def multi_softmax(x, dim=8, get_logits=False):
|
||||||
|
inp_shape = x.shape
|
||||||
|
if dim is not None:
|
||||||
|
x = x.reshape(*x.shape[:-1], -1, dim)
|
||||||
|
if get_logits:
|
||||||
|
x = jax.nn.log_softmax(x, axis=-1)
|
||||||
|
else:
|
||||||
|
x = jax.nn.softmax(x, axis=-1)
|
||||||
|
return x.reshape(*inp_shape)
|
||||||
|
|
||||||
|
|
||||||
|
def multi_log_softmax(x, dim=8):
|
||||||
|
if dim is not None:
|
||||||
|
x = x.reshape(*x.shape[:-1], -1, dim)
|
||||||
|
return jax.nn.log_softmax(x).reshape(x.shape)
|
||||||
|
else:
|
||||||
|
return jax.nn.log_softmax(x, axis=-1)
|
||||||
|
|
||||||
|
|
||||||
|
def simplical_softmax_cross_entropy(pred, target, dim=8):
|
||||||
|
"""Computes the cross-entropy loss for simplical softmax."""
|
||||||
|
shape = pred.shape[-1]
|
||||||
|
if dim is not None:
|
||||||
|
pred = pred.reshape(*pred.shape[:-1], -1, dim)
|
||||||
|
target = target.reshape(*target.shape[:-1], -1, dim)
|
||||||
|
return jnp.sum(-target * jax.nn.log_softmax(pred, axis=-1), axis=-1).mean() / (
|
||||||
|
shape / dim
|
||||||
|
)
|
||||||
270
reppo/network_utils/fast_td3_nets.py
Normal file
270
reppo/network_utils/fast_td3_nets.py
Normal file
@ -0,0 +1,270 @@
|
|||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
|
||||||
|
class DistributionalQNetwork(nn.Module):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
n_obs: int,
|
||||||
|
n_act: int,
|
||||||
|
num_atoms: int,
|
||||||
|
v_min: float,
|
||||||
|
v_max: float,
|
||||||
|
hidden_dim: int,
|
||||||
|
device: torch.device = None,
|
||||||
|
):
|
||||||
|
super().__init__()
|
||||||
|
self.net = nn.Sequential(
|
||||||
|
nn.Linear(n_obs + n_act, hidden_dim, device=device),
|
||||||
|
nn.ReLU(),
|
||||||
|
nn.Linear(hidden_dim, hidden_dim // 2, device=device),
|
||||||
|
nn.ReLU(),
|
||||||
|
nn.Linear(hidden_dim // 2, hidden_dim // 4, device=device),
|
||||||
|
nn.ReLU(),
|
||||||
|
nn.Linear(hidden_dim // 4, num_atoms, device=device),
|
||||||
|
)
|
||||||
|
self.v_min = v_min
|
||||||
|
self.v_max = v_max
|
||||||
|
self.num_atoms = num_atoms
|
||||||
|
|
||||||
|
def forward(self, obs: torch.Tensor, actions: torch.Tensor) -> torch.Tensor:
|
||||||
|
x = torch.cat([obs, actions], 1)
|
||||||
|
x = self.net(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
def projection(
|
||||||
|
self,
|
||||||
|
obs: torch.Tensor,
|
||||||
|
actions: torch.Tensor,
|
||||||
|
rewards: torch.Tensor,
|
||||||
|
bootstrap: torch.Tensor,
|
||||||
|
discount: float,
|
||||||
|
q_support: torch.Tensor,
|
||||||
|
device: torch.device,
|
||||||
|
) -> torch.Tensor:
|
||||||
|
delta_z = (self.v_max - self.v_min) / (self.num_atoms - 1)
|
||||||
|
batch_size = rewards.shape[0]
|
||||||
|
|
||||||
|
target_z = (
|
||||||
|
rewards.unsqueeze(1)
|
||||||
|
+ bootstrap.unsqueeze(1) * discount.unsqueeze(1) * q_support
|
||||||
|
)
|
||||||
|
target_z = target_z.clamp(self.v_min, self.v_max)
|
||||||
|
b = (target_z - self.v_min) / delta_z
|
||||||
|
l = torch.floor(b).long()
|
||||||
|
u = torch.ceil(b).long()
|
||||||
|
|
||||||
|
l_mask = torch.logical_and((u > 0), (l == u))
|
||||||
|
u_mask = torch.logical_and((l < (self.num_atoms - 1)), (l == u))
|
||||||
|
|
||||||
|
l = torch.where(l_mask, l - 1, l)
|
||||||
|
u = torch.where(u_mask, u + 1, u)
|
||||||
|
|
||||||
|
next_dist = F.softmax(self.forward(obs, actions), dim=1)
|
||||||
|
proj_dist = torch.zeros_like(next_dist)
|
||||||
|
offset = (
|
||||||
|
torch.linspace(
|
||||||
|
0, (batch_size - 1) * self.num_atoms, batch_size, device=device
|
||||||
|
)
|
||||||
|
.unsqueeze(1)
|
||||||
|
.expand(batch_size, self.num_atoms)
|
||||||
|
.long()
|
||||||
|
)
|
||||||
|
proj_dist.view(-1).index_add_(
|
||||||
|
0, (l + offset).view(-1), (next_dist * (u.float() - b)).view(-1)
|
||||||
|
)
|
||||||
|
proj_dist.view(-1).index_add_(
|
||||||
|
0, (u + offset).view(-1), (next_dist * (b - l.float())).view(-1)
|
||||||
|
)
|
||||||
|
return proj_dist
|
||||||
|
|
||||||
|
|
||||||
|
class Critic(nn.Module):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
n_obs: int,
|
||||||
|
n_act: int,
|
||||||
|
num_atoms: int,
|
||||||
|
v_min: float,
|
||||||
|
v_max: float,
|
||||||
|
hidden_dim: int,
|
||||||
|
device: torch.device = None,
|
||||||
|
):
|
||||||
|
super().__init__()
|
||||||
|
self.qnet1 = DistributionalQNetwork(
|
||||||
|
n_obs=n_obs,
|
||||||
|
n_act=n_act,
|
||||||
|
num_atoms=num_atoms,
|
||||||
|
v_min=v_min,
|
||||||
|
v_max=v_max,
|
||||||
|
hidden_dim=hidden_dim,
|
||||||
|
device=device,
|
||||||
|
)
|
||||||
|
self.qnet2 = DistributionalQNetwork(
|
||||||
|
n_obs=n_obs,
|
||||||
|
n_act=n_act,
|
||||||
|
num_atoms=num_atoms,
|
||||||
|
v_min=v_min,
|
||||||
|
v_max=v_max,
|
||||||
|
hidden_dim=hidden_dim,
|
||||||
|
device=device,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.register_buffer(
|
||||||
|
"q_support", torch.linspace(v_min, v_max, num_atoms, device=device)
|
||||||
|
)
|
||||||
|
self.device = device
|
||||||
|
|
||||||
|
def forward(self, obs: torch.Tensor, actions: torch.Tensor) -> torch.Tensor:
|
||||||
|
return self.qnet1(obs, actions), self.qnet2(obs, actions)
|
||||||
|
|
||||||
|
def projection(
|
||||||
|
self,
|
||||||
|
obs: torch.Tensor,
|
||||||
|
actions: torch.Tensor,
|
||||||
|
rewards: torch.Tensor,
|
||||||
|
bootstrap: torch.Tensor,
|
||||||
|
discount: float,
|
||||||
|
) -> torch.Tensor:
|
||||||
|
"""Projection operation that includes q_support directly"""
|
||||||
|
q1_proj = self.qnet1.projection(
|
||||||
|
obs,
|
||||||
|
actions,
|
||||||
|
rewards,
|
||||||
|
bootstrap,
|
||||||
|
discount,
|
||||||
|
self.q_support,
|
||||||
|
self.q_support.device,
|
||||||
|
)
|
||||||
|
q2_proj = self.qnet2.projection(
|
||||||
|
obs,
|
||||||
|
actions,
|
||||||
|
rewards,
|
||||||
|
bootstrap,
|
||||||
|
discount,
|
||||||
|
self.q_support,
|
||||||
|
self.q_support.device,
|
||||||
|
)
|
||||||
|
return q1_proj, q2_proj
|
||||||
|
|
||||||
|
def get_value(self, probs: torch.Tensor) -> torch.Tensor:
|
||||||
|
"""Calculate value from logits using support"""
|
||||||
|
return torch.sum(probs * self.q_support, dim=1)
|
||||||
|
|
||||||
|
|
||||||
|
class Actor(nn.Module):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
n_obs: int,
|
||||||
|
n_act: int,
|
||||||
|
num_envs: int,
|
||||||
|
init_scale: float,
|
||||||
|
hidden_dim: int,
|
||||||
|
std_min: float = 0.05,
|
||||||
|
std_max: float = 0.8,
|
||||||
|
device: torch.device = None,
|
||||||
|
):
|
||||||
|
super().__init__()
|
||||||
|
self.n_act = n_act
|
||||||
|
self.net = nn.Sequential(
|
||||||
|
nn.Linear(n_obs, hidden_dim, device=device),
|
||||||
|
nn.ReLU(),
|
||||||
|
nn.Linear(hidden_dim, hidden_dim // 2, device=device),
|
||||||
|
nn.ReLU(),
|
||||||
|
nn.Linear(hidden_dim // 2, hidden_dim // 4, device=device),
|
||||||
|
nn.ReLU(),
|
||||||
|
)
|
||||||
|
self.fc_mu = nn.Sequential(
|
||||||
|
nn.Linear(hidden_dim // 4, n_act, device=device),
|
||||||
|
nn.Tanh(),
|
||||||
|
)
|
||||||
|
nn.init.normal_(self.fc_mu[0].weight, 0.0, init_scale)
|
||||||
|
nn.init.constant_(self.fc_mu[0].bias, 0.0)
|
||||||
|
|
||||||
|
noise_scales = (
|
||||||
|
torch.rand(num_envs, 1, device=device) * (std_max - std_min) + std_min
|
||||||
|
)
|
||||||
|
self.register_buffer("noise_scales", noise_scales)
|
||||||
|
|
||||||
|
self.register_buffer("std_min", torch.as_tensor(std_min, device=device))
|
||||||
|
self.register_buffer("std_max", torch.as_tensor(std_max, device=device))
|
||||||
|
self.n_envs = num_envs
|
||||||
|
self.device = device
|
||||||
|
|
||||||
|
def forward(self, obs: torch.Tensor) -> torch.Tensor:
|
||||||
|
x = obs
|
||||||
|
x = self.net(x)
|
||||||
|
action = self.fc_mu(x)
|
||||||
|
return action
|
||||||
|
|
||||||
|
def explore(
|
||||||
|
self, obs: torch.Tensor, dones: torch.Tensor = None, deterministic: bool = False
|
||||||
|
) -> torch.Tensor:
|
||||||
|
# If dones is provided, resample noise for environments that are done
|
||||||
|
if dones is not None and dones.sum() > 0:
|
||||||
|
# Generate new noise scales for done environments (one per environment)
|
||||||
|
new_scales = (
|
||||||
|
torch.rand(self.n_envs, 1, device=obs.device)
|
||||||
|
* (self.std_max - self.std_min)
|
||||||
|
+ self.std_min
|
||||||
|
)
|
||||||
|
|
||||||
|
# Update only the noise scales for environments that are done
|
||||||
|
dones_view = dones.view(-1, 1) > 0
|
||||||
|
self.noise_scales = torch.where(dones_view, new_scales, self.noise_scales)
|
||||||
|
|
||||||
|
act = self(obs)
|
||||||
|
if deterministic:
|
||||||
|
return act
|
||||||
|
|
||||||
|
noise = torch.randn_like(act) * self.noise_scales
|
||||||
|
return act + noise
|
||||||
|
|
||||||
|
|
||||||
|
class MultiTaskActor(Actor):
|
||||||
|
def __init__(self, num_tasks: int, task_embedding_dim: int, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.num_tasks = num_tasks
|
||||||
|
self.task_embedding_dim = task_embedding_dim
|
||||||
|
self.task_embedding = nn.Embedding(
|
||||||
|
num_tasks, task_embedding_dim, max_norm=1.0, device=self.device
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, obs: torch.Tensor) -> torch.Tensor:
|
||||||
|
task_ids_one_hot = obs[..., -self.num_tasks :]
|
||||||
|
task_indices = torch.argmax(task_ids_one_hot, dim=1)
|
||||||
|
task_embeddings = self.task_embedding(task_indices)
|
||||||
|
obs = torch.cat([obs[..., : -self.num_tasks], task_embeddings], dim=-1)
|
||||||
|
return super().forward(obs)
|
||||||
|
|
||||||
|
|
||||||
|
class MultiTaskCritic(Critic):
|
||||||
|
def __init__(self, num_tasks: int, task_embedding_dim: int, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.num_tasks = num_tasks
|
||||||
|
self.task_embedding_dim = task_embedding_dim
|
||||||
|
self.task_embedding = nn.Embedding(
|
||||||
|
num_tasks, task_embedding_dim, max_norm=1.0, device=self.device
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, obs: torch.Tensor, actions: torch.Tensor) -> torch.Tensor:
|
||||||
|
task_ids_one_hot = obs[..., -self.num_tasks :]
|
||||||
|
task_indices = torch.argmax(task_ids_one_hot, dim=1)
|
||||||
|
task_embeddings = self.task_embedding(task_indices)
|
||||||
|
obs = torch.cat([obs[..., : -self.num_tasks], task_embeddings], dim=-1)
|
||||||
|
return super().forward(obs, actions)
|
||||||
|
|
||||||
|
def projection(
|
||||||
|
self,
|
||||||
|
obs: torch.Tensor,
|
||||||
|
actions: torch.Tensor,
|
||||||
|
rewards: torch.Tensor,
|
||||||
|
bootstrap: torch.Tensor,
|
||||||
|
discount: float,
|
||||||
|
) -> torch.Tensor:
|
||||||
|
task_ids_one_hot = obs[..., -self.num_tasks :]
|
||||||
|
task_indices = torch.argmax(task_ids_one_hot, dim=1)
|
||||||
|
task_embeddings = self.task_embedding(task_indices)
|
||||||
|
obs = torch.cat([obs[..., : -self.num_tasks], task_embeddings], dim=-1)
|
||||||
|
return super().projection(obs, actions, rewards, bootstrap, discount)
|
||||||
424
reppo/network_utils/jax_models.py
Normal file
424
reppo/network_utils/jax_models.py
Normal file
@ -0,0 +1,424 @@
|
|||||||
|
import math
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
import distrax
|
||||||
|
import jax
|
||||||
|
import jax.numpy as jnp
|
||||||
|
from flax import nnx
|
||||||
|
|
||||||
|
from reppo.jaxrl import utils
|
||||||
|
|
||||||
|
|
||||||
|
def torch_he_uniform(
|
||||||
|
in_axis: Union[int, Sequence[int]] = -2,
|
||||||
|
out_axis: Union[int, Sequence[int]] = -1,
|
||||||
|
batch_axis: Sequence[int] = (),
|
||||||
|
dtype=jnp.float_,
|
||||||
|
):
|
||||||
|
"TODO: push to jax"
|
||||||
|
return nnx.initializers.variance_scaling(
|
||||||
|
0.3333,
|
||||||
|
"fan_in",
|
||||||
|
"uniform",
|
||||||
|
in_axis=in_axis,
|
||||||
|
out_axis=out_axis,
|
||||||
|
batch_axis=batch_axis,
|
||||||
|
dtype=dtype,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class UnitBallNorm(nnx.Module):
|
||||||
|
def __call__(self, x: jax.Array) -> jax.Array:
|
||||||
|
return x / (jnp.linalg.norm(x, axis=-1, keepdims=True) + 1e-8)
|
||||||
|
|
||||||
|
|
||||||
|
def normed_activation_layer(
|
||||||
|
rngs, in_features, out_features, use_norm=True, activation=nnx.swish
|
||||||
|
):
|
||||||
|
layers = [
|
||||||
|
nnx.Linear(
|
||||||
|
in_features=in_features,
|
||||||
|
out_features=out_features,
|
||||||
|
kernel_init=torch_he_uniform(),
|
||||||
|
rngs=rngs,
|
||||||
|
)
|
||||||
|
]
|
||||||
|
if use_norm:
|
||||||
|
layers.append(nnx.RMSNorm(out_features, rngs=rngs))
|
||||||
|
if activation is not None:
|
||||||
|
layers.append(activation)
|
||||||
|
return nnx.Sequential(*layers)
|
||||||
|
|
||||||
|
|
||||||
|
class Identity(nnx.Module):
|
||||||
|
def __call__(self, x: jax.Array) -> jax.Array:
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
class FCNN(nnx.Module):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
in_features: int,
|
||||||
|
out_features: int,
|
||||||
|
hidden_dim: int = 512,
|
||||||
|
hidden_activation=nnx.swish,
|
||||||
|
output_activation=None,
|
||||||
|
use_norm: bool = True,
|
||||||
|
use_output_norm: bool = False,
|
||||||
|
layers: int = 2,
|
||||||
|
input_activation: bool = False,
|
||||||
|
*,
|
||||||
|
rngs: nnx.Rngs,
|
||||||
|
):
|
||||||
|
if layers == 1:
|
||||||
|
self.module = normed_activation_layer(
|
||||||
|
rngs,
|
||||||
|
in_features,
|
||||||
|
out_features,
|
||||||
|
use_norm=use_output_norm,
|
||||||
|
activation=output_activation,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
if input_activation:
|
||||||
|
input_layer = nnx.Sequential(
|
||||||
|
# nnx.LayerNorm(in_features, rngs=rngs) if use_norm else Identity(),
|
||||||
|
hidden_activation,
|
||||||
|
normed_activation_layer(
|
||||||
|
rngs,
|
||||||
|
in_features,
|
||||||
|
hidden_dim,
|
||||||
|
use_norm=use_norm,
|
||||||
|
activation=hidden_activation,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
input_layer = nnx.Sequential(
|
||||||
|
normed_activation_layer(
|
||||||
|
rngs,
|
||||||
|
in_features,
|
||||||
|
hidden_dim,
|
||||||
|
use_norm=use_norm,
|
||||||
|
activation=hidden_activation,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
hidden_layers = [
|
||||||
|
normed_activation_layer(
|
||||||
|
rngs,
|
||||||
|
hidden_dim,
|
||||||
|
hidden_dim,
|
||||||
|
use_norm=use_norm,
|
||||||
|
activation=hidden_activation,
|
||||||
|
)
|
||||||
|
for _ in range(layers - 2)
|
||||||
|
]
|
||||||
|
output_layer = normed_activation_layer(
|
||||||
|
rngs,
|
||||||
|
hidden_dim,
|
||||||
|
out_features,
|
||||||
|
use_norm=use_output_norm,
|
||||||
|
activation=output_activation,
|
||||||
|
)
|
||||||
|
self.module = nnx.Sequential(
|
||||||
|
input_layer,
|
||||||
|
*hidden_layers,
|
||||||
|
output_layer,
|
||||||
|
)
|
||||||
|
|
||||||
|
def __call__(self, x: jax.Array) -> jax.Array:
|
||||||
|
return self.module(x)
|
||||||
|
|
||||||
|
|
||||||
|
class CriticNetwork(nnx.Module):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
obs_dim: int,
|
||||||
|
action_dim: int,
|
||||||
|
hidden_dim: int = 512,
|
||||||
|
use_norm: bool = True,
|
||||||
|
use_encoder_norm: bool = False,
|
||||||
|
use_simplical_embedding: bool = False,
|
||||||
|
encoder_layers: int = 1,
|
||||||
|
head_layers: int = 1,
|
||||||
|
pred_layers: int = 1,
|
||||||
|
*,
|
||||||
|
rngs: nnx.Rngs,
|
||||||
|
):
|
||||||
|
self.feature_module = FCNN(
|
||||||
|
in_features=obs_dim + action_dim,
|
||||||
|
out_features=hidden_dim,
|
||||||
|
hidden_dim=hidden_dim,
|
||||||
|
hidden_activation=nnx.swish,
|
||||||
|
output_activation=utils.multi_softmax if use_simplical_embedding else None,
|
||||||
|
use_norm=use_norm,
|
||||||
|
use_output_norm=use_encoder_norm,
|
||||||
|
layers=encoder_layers,
|
||||||
|
rngs=rngs,
|
||||||
|
)
|
||||||
|
self.critic_module = FCNN(
|
||||||
|
in_features=hidden_dim,
|
||||||
|
out_features=1,
|
||||||
|
hidden_dim=hidden_dim,
|
||||||
|
hidden_activation=nnx.swish,
|
||||||
|
output_activation=None,
|
||||||
|
use_norm=use_norm,
|
||||||
|
use_output_norm=False,
|
||||||
|
layers=head_layers,
|
||||||
|
rngs=rngs,
|
||||||
|
)
|
||||||
|
self.pred_module = FCNN(
|
||||||
|
in_features=hidden_dim,
|
||||||
|
out_features=hidden_dim,
|
||||||
|
hidden_dim=hidden_dim,
|
||||||
|
hidden_activation=nnx.swish,
|
||||||
|
output_activation=utils.multi_softmax if use_simplical_embedding else None,
|
||||||
|
use_norm=use_norm,
|
||||||
|
use_output_norm=False,
|
||||||
|
layers=pred_layers,
|
||||||
|
rngs=rngs,
|
||||||
|
)
|
||||||
|
|
||||||
|
def features(self, obs: jax.Array, action: jax.Array):
|
||||||
|
state = jnp.concatenate([obs, action], axis=-1)
|
||||||
|
return self.feature_module(state)
|
||||||
|
|
||||||
|
def critic_head(self, features: jax.Array) -> jax.Array:
|
||||||
|
return self.critic_module(features)
|
||||||
|
|
||||||
|
def critic(self, obs: jax.Array, action: jax.Array) -> jax.Array:
|
||||||
|
features = self.features(obs, action)
|
||||||
|
return self.critic_head(features)
|
||||||
|
|
||||||
|
def critic_cat(self, obs: jax.Array, action: jax.Array) -> jax.Array:
|
||||||
|
features = self.features(obs, action)
|
||||||
|
return self.critic_head(features)
|
||||||
|
|
||||||
|
def forward(self, obs, action):
|
||||||
|
features = self.features(obs, action)
|
||||||
|
value = self.critic_head(features)
|
||||||
|
return self.pred_module(features), value
|
||||||
|
|
||||||
|
|
||||||
|
class CategoricalCriticNetwork(nnx.Module):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
obs_dim: int,
|
||||||
|
action_dim: int,
|
||||||
|
hidden_dim: int = 512,
|
||||||
|
use_norm: bool = True,
|
||||||
|
use_encoder_norm: bool = False,
|
||||||
|
use_simplical_embedding: bool = False,
|
||||||
|
encoder_layers: int = 1,
|
||||||
|
head_layers: int = 1,
|
||||||
|
pred_layers: int = 1,
|
||||||
|
num_bins: int = 51,
|
||||||
|
vmin: float = -10.0,
|
||||||
|
vmax: float = 10.0,
|
||||||
|
*,
|
||||||
|
rngs: nnx.Rngs,
|
||||||
|
):
|
||||||
|
self.num_bins = num_bins
|
||||||
|
self.vmin = vmin
|
||||||
|
self.vmax = vmax
|
||||||
|
|
||||||
|
self.feature_module = FCNN(
|
||||||
|
in_features=obs_dim + action_dim,
|
||||||
|
out_features=hidden_dim,
|
||||||
|
hidden_dim=hidden_dim,
|
||||||
|
hidden_activation=nnx.swish,
|
||||||
|
output_activation=utils.multi_softmax if use_simplical_embedding else None,
|
||||||
|
use_norm=use_norm,
|
||||||
|
use_output_norm=use_encoder_norm,
|
||||||
|
layers=encoder_layers,
|
||||||
|
rngs=rngs,
|
||||||
|
)
|
||||||
|
self.critic_module = FCNN(
|
||||||
|
in_features=hidden_dim,
|
||||||
|
out_features=self.num_bins,
|
||||||
|
hidden_dim=hidden_dim,
|
||||||
|
hidden_activation=nnx.swish,
|
||||||
|
output_activation=None,
|
||||||
|
use_norm=use_norm,
|
||||||
|
use_output_norm=False,
|
||||||
|
layers=head_layers,
|
||||||
|
input_activation=not use_simplical_embedding,
|
||||||
|
rngs=rngs,
|
||||||
|
)
|
||||||
|
self.pred_module = FCNN(
|
||||||
|
in_features=hidden_dim,
|
||||||
|
out_features=hidden_dim,
|
||||||
|
hidden_dim=hidden_dim,
|
||||||
|
hidden_activation=nnx.swish,
|
||||||
|
output_activation=None,
|
||||||
|
use_norm=use_norm,
|
||||||
|
use_output_norm=False,
|
||||||
|
layers=pred_layers,
|
||||||
|
input_activation=not use_simplical_embedding,
|
||||||
|
rngs=rngs,
|
||||||
|
)
|
||||||
|
self.zero_dist = nnx.Param(
|
||||||
|
utils.hl_gauss(jnp.zeros((1,)), num_bins, vmin, vmax)
|
||||||
|
)
|
||||||
|
|
||||||
|
def features(self, obs: jax.Array, action: jax.Array):
|
||||||
|
state = jnp.concatenate([obs, action], axis=-1)
|
||||||
|
return self.feature_module(state)
|
||||||
|
|
||||||
|
def critic_head(self, features: jax.Array) -> jax.Array:
|
||||||
|
cat = self.critic_module(features) # + self.zero_dist.value * 40.0
|
||||||
|
return cat
|
||||||
|
|
||||||
|
def critic_cat(self, obs: jax.Array, action: jax.Array) -> jax.Array:
|
||||||
|
features = self.features(obs, action)
|
||||||
|
return self.critic_head(features)
|
||||||
|
|
||||||
|
def critic(self, obs: jax.Array, action: jax.Array) -> jax.Array:
|
||||||
|
value_cat = jax.nn.softmax(self.critic_cat(obs, action), axis=-1)
|
||||||
|
value = value_cat.dot(
|
||||||
|
jnp.linspace(self.vmin, self.vmax, self.num_bins, endpoint=True)
|
||||||
|
)
|
||||||
|
return value
|
||||||
|
|
||||||
|
def forward(self, obs, action):
|
||||||
|
features = self.features(obs, action)
|
||||||
|
value_cat = jax.nn.softmax(self.critic_head(features), axis=-1)
|
||||||
|
value = value_cat.dot(
|
||||||
|
jnp.linspace(self.vmin, self.vmax, self.num_bins, endpoint=True)
|
||||||
|
)
|
||||||
|
return self.pred_module(features), value
|
||||||
|
|
||||||
|
def __call__(self, obs: jax.Array, action: jax.Array) -> jax.Array:
|
||||||
|
features = self.features(obs, action)
|
||||||
|
value_cat = jax.nn.softmax(self.critic_head(features), axis=-1)
|
||||||
|
value = value_cat.dot(
|
||||||
|
jnp.linspace(self.vmin, self.vmax, self.num_bins, endpoint=True)
|
||||||
|
)
|
||||||
|
pred = self.pred_module(features)
|
||||||
|
return value, value_cat, pred
|
||||||
|
|
||||||
|
|
||||||
|
class SACActorNetworks(nnx.Module):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
obs_dim: int,
|
||||||
|
action_dim: int,
|
||||||
|
hidden_dim: int = 512,
|
||||||
|
ent_start: float = 0.1,
|
||||||
|
kl_start: float = 0.1,
|
||||||
|
use_norm: bool = True,
|
||||||
|
layers: int = 2,
|
||||||
|
min_std: float = 0.1,
|
||||||
|
*,
|
||||||
|
rngs: nnx.Rngs,
|
||||||
|
):
|
||||||
|
self.actor_module = FCNN(
|
||||||
|
in_features=obs_dim,
|
||||||
|
out_features=action_dim * 2,
|
||||||
|
hidden_dim=hidden_dim,
|
||||||
|
hidden_activation=nnx.swish,
|
||||||
|
output_activation=None,
|
||||||
|
use_norm=use_norm,
|
||||||
|
use_output_norm=False,
|
||||||
|
layers=layers,
|
||||||
|
input_activation=False,
|
||||||
|
rngs=rngs,
|
||||||
|
)
|
||||||
|
start_value = math.log(ent_start)
|
||||||
|
kl_start_value = math.log(kl_start)
|
||||||
|
self.temperature_log_param = nnx.Param(jnp.ones(1) * start_value)
|
||||||
|
self.lagrangian_log_param = nnx.Param(jnp.ones(1) * kl_start_value)
|
||||||
|
self.min_std = min_std
|
||||||
|
|
||||||
|
def actor(
|
||||||
|
self, obs: jax.Array, scale: float | jax.Array = 1.0
|
||||||
|
) -> distrax.Distribution:
|
||||||
|
loc = self.actor_module(obs)
|
||||||
|
loc, log_std = jnp.split(loc, 2, axis=-1)
|
||||||
|
std = (jnp.exp(log_std) + self.min_std) * scale
|
||||||
|
pi = distrax.Transformed(distrax.Normal(loc=loc, scale=std), distrax.Tanh())
|
||||||
|
return pi
|
||||||
|
|
||||||
|
def det_action(self, obs: jax.Array) -> jax.Array:
|
||||||
|
loc = self.actor_module(obs)
|
||||||
|
loc, _ = jnp.split(loc, 2, axis=-1)
|
||||||
|
return jnp.tanh(loc)
|
||||||
|
|
||||||
|
def temperature(self) -> jax.Array:
|
||||||
|
return jnp.exp(self.temperature_log_param.value)
|
||||||
|
|
||||||
|
def lagrangian(self) -> jax.Array:
|
||||||
|
return jnp.exp(self.lagrangian_log_param.value)
|
||||||
|
|
||||||
|
def __call__(self, obs: jax.Array) -> jax.Array:
|
||||||
|
loc = self.actor_module(obs)
|
||||||
|
loc, std = jnp.split(loc, 2, axis=-1)
|
||||||
|
return jnp.tanh(loc), std, self.temperature(), self.lagrangian()
|
||||||
|
|
||||||
|
|
||||||
|
class TD3ActorNetworks(nnx.Module):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
obs_dim: int,
|
||||||
|
action_dim: int,
|
||||||
|
hidden_dim: int = 512,
|
||||||
|
ent_start: float = 0.1,
|
||||||
|
kl_start: float = 0.1,
|
||||||
|
use_norm: bool = True,
|
||||||
|
layers: int = 2,
|
||||||
|
min_std: float = 0.1,
|
||||||
|
*,
|
||||||
|
rngs: nnx.Rngs,
|
||||||
|
):
|
||||||
|
self.actor_module = FCNN(
|
||||||
|
in_features=obs_dim,
|
||||||
|
out_features=action_dim * 2,
|
||||||
|
hidden_dim=hidden_dim,
|
||||||
|
hidden_activation=nnx.swish,
|
||||||
|
output_activation=None,
|
||||||
|
use_norm=use_norm,
|
||||||
|
use_output_norm=False,
|
||||||
|
layers=layers,
|
||||||
|
input_activation=False,
|
||||||
|
rngs=rngs,
|
||||||
|
)
|
||||||
|
start_value = math.log(ent_start)
|
||||||
|
kl_start_value = math.log(kl_start)
|
||||||
|
self.temperature_log_param = nnx.Param(jnp.ones(1) * start_value)
|
||||||
|
self.lagrangian_log_param = nnx.Param(jnp.ones(1) * kl_start_value)
|
||||||
|
self.min_std = min_std
|
||||||
|
|
||||||
|
def actor(
|
||||||
|
self, obs: jax.Array, scale: float | jax.Array = 1.0
|
||||||
|
) -> distrax.Distribution:
|
||||||
|
loc = self.actor_module(obs)
|
||||||
|
loc, log_std = jnp.split(loc, 2, axis=-1)
|
||||||
|
std = (jnp.exp(log_std) + self.min_std) * scale
|
||||||
|
pi = distrax.Transformed(distrax.Normal(loc=loc, scale=std), distrax.Tanh())
|
||||||
|
return pi
|
||||||
|
|
||||||
|
def det_action(self, obs: jax.Array) -> jax.Array:
|
||||||
|
loc = self.actor_module(obs)
|
||||||
|
loc, _ = jnp.split(loc, 2, axis=-1)
|
||||||
|
return jnp.tanh(loc)
|
||||||
|
|
||||||
|
def temperature(self) -> jax.Array:
|
||||||
|
return jnp.exp(self.temperature_log_param.value)
|
||||||
|
|
||||||
|
def lagrangian(self) -> jax.Array:
|
||||||
|
return jnp.exp(self.lagrangian_log_param.value)
|
||||||
|
|
||||||
|
|
||||||
|
class TD3DeterministicDist(distrax.Distribution):
|
||||||
|
def __init__(self, loc: jax.Array, scale: float | jax.Array):
|
||||||
|
self.loc = loc
|
||||||
|
self.scale = scale
|
||||||
|
|
||||||
|
def sample(self, seed=None):
|
||||||
|
return self.loc + self.scale * jax.random.normal(seed, self.loc.shape)
|
||||||
|
|
||||||
|
def log_prob(self, value: jax.Array) -> jax.Array:
|
||||||
|
return jnp.zeros_like(value)
|
||||||
|
|
||||||
|
def sample_and_log_prob(self, *, seed, sample_shape=...):
|
||||||
|
sample = self.sample(seed=seed)
|
||||||
|
log_prob = self.log_prob(sample)
|
||||||
|
return sample, log_prob
|
||||||
374
reppo/network_utils/torch_models.py
Normal file
374
reppo/network_utils/torch_models.py
Normal file
@ -0,0 +1,374 @@
|
|||||||
|
import torch
|
||||||
|
from torch import nn
|
||||||
|
from torch.distributions import constraints
|
||||||
|
from torch.distributions.transforms import Transform
|
||||||
|
from torch.distributions.normal import Normal
|
||||||
|
|
||||||
|
from reppo.torchrl.reppo import hl_gauss
|
||||||
|
|
||||||
|
|
||||||
|
class TanhTransform(Transform):
|
||||||
|
r"""
|
||||||
|
Transform via the mapping :math:`y = \tanh(x)`.
|
||||||
|
|
||||||
|
It is equivalent to
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
ComposeTransform(
|
||||||
|
[
|
||||||
|
AffineTransform(0.0, 2.0),
|
||||||
|
SigmoidTransform(),
|
||||||
|
AffineTransform(-1.0, 2.0),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
However this might not be numerically stable, thus it is recommended to use `TanhTransform`
|
||||||
|
instead.
|
||||||
|
|
||||||
|
Note that one should use `cache_size=1` when it comes to `NaN/Inf` values.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
domain = constraints.real
|
||||||
|
codomain = constraints.interval(-1.0, 1.0)
|
||||||
|
bijective = True
|
||||||
|
sign = +1
|
||||||
|
log2 = torch.log(torch.tensor(2.0)).to("cuda" if torch.cuda.is_available() else "cpu")
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
return isinstance(other, TanhTransform)
|
||||||
|
|
||||||
|
def _call(self, x):
|
||||||
|
return x.tanh()
|
||||||
|
|
||||||
|
def _inverse(self, y):
|
||||||
|
# We do not clamp to the boundary here as it may degrade the performance of certain algorithms.
|
||||||
|
# one should use `cache_size=1` instead
|
||||||
|
return torch.atanh(y)
|
||||||
|
|
||||||
|
def log_abs_det_jacobian(self, x, y):
|
||||||
|
# We use a formula that is more numerically stable, see details in the following link
|
||||||
|
# https://github.com/tensorflow/probability/blob/master/tensorflow_probability/python/bijectors/tanh.py#L69-L80
|
||||||
|
return 2.0 * (self.log2 - x - torch.nn.functional.softplus(-2.0 * x))
|
||||||
|
|
||||||
|
|
||||||
|
def get_activation(name):
|
||||||
|
if name == "gelu":
|
||||||
|
return nn.GELU()
|
||||||
|
elif name == "relu":
|
||||||
|
return nn.ReLU()
|
||||||
|
elif name == "swish":
|
||||||
|
return nn.SiLU()
|
||||||
|
elif name is None:
|
||||||
|
return nn.Identity()
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unknown activation: {name}")
|
||||||
|
|
||||||
|
|
||||||
|
def normed_activation_layer(
|
||||||
|
in_features, out_features, use_norm=True, activation="swish", device=None
|
||||||
|
):
|
||||||
|
layers = [nn.Linear(in_features, out_features, device=device)]
|
||||||
|
if use_norm:
|
||||||
|
layers.append(nn.RMSNorm([out_features], device=device))
|
||||||
|
if activation is not None:
|
||||||
|
layers.append(get_activation(activation))
|
||||||
|
return nn.Sequential(*layers)
|
||||||
|
|
||||||
|
|
||||||
|
class FCNN(nn.Module):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
in_features,
|
||||||
|
out_features,
|
||||||
|
hidden_dim=256,
|
||||||
|
hidden_activation="swish",
|
||||||
|
output_activation=None,
|
||||||
|
use_norm=True,
|
||||||
|
use_output_norm=False,
|
||||||
|
layers=2,
|
||||||
|
input_activation=False,
|
||||||
|
device=None,
|
||||||
|
):
|
||||||
|
super().__init__()
|
||||||
|
net = []
|
||||||
|
if layers == 1:
|
||||||
|
net.append(
|
||||||
|
normed_activation_layer(
|
||||||
|
in_features,
|
||||||
|
out_features,
|
||||||
|
use_norm=use_output_norm,
|
||||||
|
activation=output_activation,
|
||||||
|
device=device,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
if input_activation:
|
||||||
|
net.append(get_activation(hidden_activation))
|
||||||
|
net.append(
|
||||||
|
normed_activation_layer(
|
||||||
|
in_features,
|
||||||
|
hidden_dim,
|
||||||
|
use_norm=use_norm,
|
||||||
|
activation=hidden_activation,
|
||||||
|
device=device,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
for _ in range(layers - 2):
|
||||||
|
net.append(
|
||||||
|
normed_activation_layer(
|
||||||
|
hidden_dim,
|
||||||
|
hidden_dim,
|
||||||
|
use_norm=use_norm,
|
||||||
|
activation=hidden_activation,
|
||||||
|
device=device,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
net.append(
|
||||||
|
normed_activation_layer(
|
||||||
|
hidden_dim,
|
||||||
|
out_features,
|
||||||
|
use_norm=use_output_norm,
|
||||||
|
activation=output_activation,
|
||||||
|
device=device,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
self.net = nn.Sequential(*net)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
return self.net(x)
|
||||||
|
|
||||||
|
|
||||||
|
class CriticNetwork(nn.Module):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
n_obs,
|
||||||
|
n_act,
|
||||||
|
hidden_dim=256,
|
||||||
|
use_norm=True,
|
||||||
|
use_encoder_norm=False,
|
||||||
|
encoder_layers=1,
|
||||||
|
head_layers=1,
|
||||||
|
pred_layers=1,
|
||||||
|
device=None,
|
||||||
|
):
|
||||||
|
super().__init__()
|
||||||
|
self.feature_module = FCNN(
|
||||||
|
in_features=n_obs + n_act,
|
||||||
|
out_features=hidden_dim,
|
||||||
|
hidden_dim=hidden_dim,
|
||||||
|
hidden_activation="swish",
|
||||||
|
output_activation=None,
|
||||||
|
use_norm=use_norm,
|
||||||
|
use_output_norm=use_encoder_norm,
|
||||||
|
layers=encoder_layers,
|
||||||
|
device=device,
|
||||||
|
)
|
||||||
|
self.critic_module = FCNN(
|
||||||
|
in_features=hidden_dim,
|
||||||
|
out_features=1,
|
||||||
|
hidden_dim=hidden_dim,
|
||||||
|
hidden_activation="swish",
|
||||||
|
output_activation=None,
|
||||||
|
use_norm=use_norm,
|
||||||
|
use_output_norm=False,
|
||||||
|
layers=head_layers,
|
||||||
|
device=device,
|
||||||
|
)
|
||||||
|
self.pred_module = FCNN(
|
||||||
|
in_features=hidden_dim,
|
||||||
|
out_features=hidden_dim,
|
||||||
|
hidden_dim=hidden_dim,
|
||||||
|
hidden_activation="swish",
|
||||||
|
output_activation=None,
|
||||||
|
use_norm=use_norm,
|
||||||
|
use_output_norm=False,
|
||||||
|
layers=pred_layers,
|
||||||
|
device=device,
|
||||||
|
)
|
||||||
|
|
||||||
|
def features(self, obs, action):
|
||||||
|
state = torch.cat([obs, action], dim=-1)
|
||||||
|
return self.feature_module(state)
|
||||||
|
|
||||||
|
def critic_head(self, features):
|
||||||
|
return self.critic_module(features)
|
||||||
|
|
||||||
|
def critic(self, obs, action):
|
||||||
|
features = self.features(obs, action)
|
||||||
|
return self.critic_head(features)
|
||||||
|
|
||||||
|
def forward(self, obs, action):
|
||||||
|
features = self.features(obs, action)
|
||||||
|
return self.pred_module(features)
|
||||||
|
|
||||||
|
|
||||||
|
class Critic(nn.Module):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
n_obs,
|
||||||
|
n_act,
|
||||||
|
num_atoms: int,
|
||||||
|
vmin: float,
|
||||||
|
vmax: float,
|
||||||
|
hidden_dim=256,
|
||||||
|
use_norm=True,
|
||||||
|
use_encoder_norm=False,
|
||||||
|
encoder_layers=1,
|
||||||
|
head_layers=1,
|
||||||
|
pred_layers=1,
|
||||||
|
device=None,
|
||||||
|
):
|
||||||
|
super().__init__()
|
||||||
|
self.num_atoms = num_atoms
|
||||||
|
self.vmin = vmin
|
||||||
|
self.vmax = vmax
|
||||||
|
self.hidden_dim = hidden_dim
|
||||||
|
self.feature_module = FCNN(
|
||||||
|
in_features=n_obs + n_act,
|
||||||
|
out_features=hidden_dim,
|
||||||
|
hidden_dim=hidden_dim,
|
||||||
|
hidden_activation="swish",
|
||||||
|
output_activation=None,
|
||||||
|
use_norm=use_norm,
|
||||||
|
use_output_norm=use_encoder_norm,
|
||||||
|
layers=encoder_layers,
|
||||||
|
device=device,
|
||||||
|
)
|
||||||
|
self.critic_module = FCNN(
|
||||||
|
in_features=hidden_dim,
|
||||||
|
out_features=num_atoms,
|
||||||
|
hidden_dim=hidden_dim,
|
||||||
|
hidden_activation="swish",
|
||||||
|
output_activation=None,
|
||||||
|
use_norm=use_norm,
|
||||||
|
use_output_norm=False,
|
||||||
|
input_activation=True,
|
||||||
|
layers=head_layers,
|
||||||
|
device=device,
|
||||||
|
)
|
||||||
|
self.pred_module = FCNN(
|
||||||
|
in_features=hidden_dim,
|
||||||
|
out_features=hidden_dim,
|
||||||
|
hidden_dim=hidden_dim,
|
||||||
|
hidden_activation="swish",
|
||||||
|
output_activation=None,
|
||||||
|
use_norm=use_norm,
|
||||||
|
input_activation=True,
|
||||||
|
use_output_norm=False,
|
||||||
|
layers=pred_layers,
|
||||||
|
device=device,
|
||||||
|
)
|
||||||
|
self.values = torch.linspace(
|
||||||
|
vmin, vmax, num_atoms, device=device, dtype=torch.float32
|
||||||
|
)
|
||||||
|
zeros = hl_gauss(
|
||||||
|
torch.zeros(1, device=device), self.vmin, self.vmax, self.num_atoms
|
||||||
|
)
|
||||||
|
zeros.requires_grad = True
|
||||||
|
self.zero_dist = nn.Parameter(
|
||||||
|
hl_gauss(
|
||||||
|
torch.zeros(1, device=device), self.vmin, self.vmax, self.num_atoms
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, obs, action):
|
||||||
|
inp = torch.cat([obs, action], dim=-1)
|
||||||
|
features = self.feature_module(inp)
|
||||||
|
next_pred = self.pred_module(features)
|
||||||
|
logits = self.critic_module(features) + 40.9 * self.zero_dist
|
||||||
|
value_cats = torch.softmax(logits, dim=-1)
|
||||||
|
value = value_cats @ self.values
|
||||||
|
return value, logits, next_pred, features
|
||||||
|
|
||||||
|
|
||||||
|
class Actor(nn.Module):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
n_obs,
|
||||||
|
n_act,
|
||||||
|
ent_start: float,
|
||||||
|
kl_start: float,
|
||||||
|
hidden_dim=256,
|
||||||
|
use_norm=True,
|
||||||
|
layers=2,
|
||||||
|
min_std=0.1,
|
||||||
|
device=None,
|
||||||
|
):
|
||||||
|
super().__init__()
|
||||||
|
self.model = FCNN(
|
||||||
|
in_features=n_obs,
|
||||||
|
out_features=2 * n_act,
|
||||||
|
hidden_dim=hidden_dim,
|
||||||
|
hidden_activation="swish",
|
||||||
|
output_activation=None,
|
||||||
|
use_norm=use_norm,
|
||||||
|
use_output_norm=False,
|
||||||
|
layers=layers,
|
||||||
|
device=device,
|
||||||
|
)
|
||||||
|
self.log_temp = nn.Parameter(
|
||||||
|
torch.log(torch.tensor(ent_start, device=device, dtype=torch.float32))
|
||||||
|
)
|
||||||
|
self.log_lagrange = nn.Parameter(
|
||||||
|
torch.log(torch.tensor(kl_start, device=device, dtype=torch.float32))
|
||||||
|
)
|
||||||
|
self.min_std = min_std
|
||||||
|
|
||||||
|
def forward(self, obs: torch.Tensor) -> torch.distributions.Distribution:
|
||||||
|
x = self.model(obs)
|
||||||
|
mean, log_std = torch.split(x, x.shape[-1] // 2, dim=-1)
|
||||||
|
std = torch.exp(log_std) + self.min_std
|
||||||
|
pi = Normal(mean, std, validate_args=False)
|
||||||
|
|
||||||
|
transformed_pi = torch.distributions.TransformedDistribution(
|
||||||
|
pi, [torch.distributions.TanhTransform()]
|
||||||
|
)
|
||||||
|
return (
|
||||||
|
transformed_pi,
|
||||||
|
torch.tanh(mean),
|
||||||
|
torch.exp(self.log_temp),
|
||||||
|
torch.exp(self.log_lagrange),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class StochasticPolicy(nn.Module):
|
||||||
|
def __init__(self, actor: Actor, normalizer: nn.Module = None, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.actor = actor
|
||||||
|
self.normalizer = normalizer
|
||||||
|
|
||||||
|
def forward(self, obs: torch.Tensor) -> torch.distributions.Distribution:
|
||||||
|
if self.normalizer:
|
||||||
|
obs = self.normalizer(obs)
|
||||||
|
return self.actor(obs)
|
||||||
|
|
||||||
|
|
||||||
|
class TD3DeterministicPolicy(nn.Module):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
n_obs,
|
||||||
|
n_act,
|
||||||
|
hidden_dim=256,
|
||||||
|
use_norm=True,
|
||||||
|
layers=2,
|
||||||
|
device=None,
|
||||||
|
):
|
||||||
|
super().__init__()
|
||||||
|
self.model = FCNN(
|
||||||
|
in_features=n_obs,
|
||||||
|
out_features=2 * n_act,
|
||||||
|
hidden_dim=hidden_dim,
|
||||||
|
hidden_activation="swish",
|
||||||
|
output_activation=None,
|
||||||
|
use_norm=use_norm,
|
||||||
|
use_output_norm=False,
|
||||||
|
layers=layers,
|
||||||
|
device=device,
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, obs: torch.Tensor) -> torch.Tensor:
|
||||||
|
x = self.model(obs)
|
||||||
|
mean, _ = torch.split(x, x.shape[-1] // 2, dim=-1)
|
||||||
|
return torch.tanh(mean)
|
||||||
95
reppo/torchrl/envs.py
Normal file
95
reppo/torchrl/envs.py
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
import torch
|
||||||
|
from omegaconf import DictConfig
|
||||||
|
|
||||||
|
|
||||||
|
def make_envs(cfg: DictConfig, device: torch.device, seed: int = None) -> tuple:
|
||||||
|
if cfg.env.type == "humanoid_bench":
|
||||||
|
from reppo.env_utils.torch_wrappers.humanoid_bench_env import (
|
||||||
|
HumanoidBenchEnv,
|
||||||
|
)
|
||||||
|
|
||||||
|
envs = HumanoidBenchEnv(
|
||||||
|
cfg.env.name, cfg.hyperparameters.num_envs, device=device
|
||||||
|
)
|
||||||
|
return envs, envs
|
||||||
|
elif cfg.env.type == "isaaclab":
|
||||||
|
from reppo.env_utils.torch_wrappers.isaaclab_env import IsaacLabEnv
|
||||||
|
|
||||||
|
envs = IsaacLabEnv(
|
||||||
|
cfg.env.name,
|
||||||
|
device.type,
|
||||||
|
cfg.hyperparameters.num_envs,
|
||||||
|
cfg=seed,
|
||||||
|
action_bounds=cfg.env.action_bounds,
|
||||||
|
)
|
||||||
|
return envs, envs
|
||||||
|
elif cfg.env.type == "mjx":
|
||||||
|
from reppo.env_utils.torch_wrappers.mujoco_playground_env import make_env
|
||||||
|
|
||||||
|
# TODO: Check if re-using same envs for eval could reduce memory usage
|
||||||
|
envs, eval_envs = make_env(
|
||||||
|
env_name=cfg.env.name,
|
||||||
|
seed=seed,
|
||||||
|
num_envs=cfg.hyperparameters.num_envs,
|
||||||
|
num_eval_envs=cfg.hyperparameters.num_envs,
|
||||||
|
device_rank=cfg.platform.device_rank,
|
||||||
|
use_domain_randomization=False,
|
||||||
|
use_push_randomization=True,
|
||||||
|
)
|
||||||
|
return envs, eval_envs
|
||||||
|
elif cfg.env.type == "maniskill":
|
||||||
|
import gymnasium as gym
|
||||||
|
import mani_skill.envs # noqa: F401
|
||||||
|
from mani_skill.utils import gym_utils
|
||||||
|
from mani_skill.utils.wrappers.flatten import FlattenActionSpaceWrapper
|
||||||
|
from mani_skill.vector.wrappers.gymnasium import ManiSkillVectorEnv
|
||||||
|
from reppo.env_utils.torch_wrappers.maniskill_wrapper import (
|
||||||
|
ManiSkillWrapper,
|
||||||
|
)
|
||||||
|
|
||||||
|
envs = gym.make(
|
||||||
|
cfg.env.name,
|
||||||
|
num_envs=cfg.hyperparameters.num_envs,
|
||||||
|
reconfiguration_freq=None,
|
||||||
|
**cfg.env.env_kwargs,
|
||||||
|
)
|
||||||
|
eval_envs = gym.make(
|
||||||
|
cfg.env.name,
|
||||||
|
num_envs=cfg.hyperparameters.num_envs,
|
||||||
|
reconfiguration_freq=1,
|
||||||
|
**cfg.env.env_kwargs,
|
||||||
|
)
|
||||||
|
cfg.env.max_episode_steps = gym_utils.find_max_episode_steps_value(envs)
|
||||||
|
# heuristic for setting gamma
|
||||||
|
cfg.hyperparameters.gamma = 1.0 - 10.0 / cfg.env.max_episode_steps
|
||||||
|
|
||||||
|
if isinstance(envs.action_space, gym.spaces.Dict):
|
||||||
|
envs = FlattenActionSpaceWrapper(envs)
|
||||||
|
eval_envs = FlattenActionSpaceWrapper(eval_envs)
|
||||||
|
envs = ManiSkillVectorEnv(
|
||||||
|
envs,
|
||||||
|
cfg.hyperparameters.num_envs,
|
||||||
|
ignore_terminations=not cfg.env.partial_reset,
|
||||||
|
record_metrics=True,
|
||||||
|
)
|
||||||
|
eval_envs = ManiSkillVectorEnv(
|
||||||
|
eval_envs,
|
||||||
|
cfg.hyperparameters.num_envs,
|
||||||
|
ignore_terminations=True,
|
||||||
|
record_metrics=True,
|
||||||
|
)
|
||||||
|
return ManiSkillWrapper(
|
||||||
|
envs,
|
||||||
|
max_episode_steps=cfg.env.max_episode_steps,
|
||||||
|
partial_reset=cfg.env.partial_reset,
|
||||||
|
device=device.type,
|
||||||
|
), ManiSkillWrapper(
|
||||||
|
eval_envs,
|
||||||
|
max_episode_steps=cfg.env.max_episode_steps,
|
||||||
|
partial_reset=cfg.env.partial_reset,
|
||||||
|
device=device.type,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"Unknown environment type: {cfg.env.type}. Supported types are 'humanoid_bench', 'isaaclab', 'maniskill', and 'mjx'."
|
||||||
|
)
|
||||||
695
reppo/torchrl/fast_td3.py
Normal file
695
reppo/torchrl/fast_td3.py
Normal file
@ -0,0 +1,695 @@
|
|||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
os.environ["TORCHDYNAMO_INLINE_INBUILT_NN_MODULES"] = "1"
|
||||||
|
os.environ["OMP_NUM_THREADS"] = "1"
|
||||||
|
if sys.platform != "darwin":
|
||||||
|
os.environ["MUJOCO_GL"] = "egl"
|
||||||
|
else:
|
||||||
|
os.environ["MUJOCO_GL"] = "glfw"
|
||||||
|
os.environ["XLA_PYTHON_CLIENT_PREALLOCATE"] = "false"
|
||||||
|
os.environ["JAX_DEFAULT_MATMUL_PRECISION"] = "highest"
|
||||||
|
|
||||||
|
import math
|
||||||
|
import random
|
||||||
|
import time
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import tqdm
|
||||||
|
|
||||||
|
import wandb
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Required for avoiding IsaacGym import error
|
||||||
|
import isaacgym
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
import torch.optim as optim
|
||||||
|
from fast_sac_utils import (
|
||||||
|
EmpiricalNormalization,
|
||||||
|
PerTaskRewardNormalizer,
|
||||||
|
RewardNormalizer,
|
||||||
|
SimpleReplayBuffer,
|
||||||
|
save_params,
|
||||||
|
)
|
||||||
|
from hyperparams import get_args
|
||||||
|
from tensordict import TensorDict
|
||||||
|
from torch.amp import GradScaler, autocast
|
||||||
|
|
||||||
|
torch.set_float32_matmul_precision("high")
|
||||||
|
|
||||||
|
try:
|
||||||
|
import jax.numpy as jnp
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
args = get_args()
|
||||||
|
print(args)
|
||||||
|
run_name = f"{args.env_name}__{args.exp_name}__{args.seed}"
|
||||||
|
|
||||||
|
amp_enabled = args.amp and args.cuda and torch.cuda.is_available()
|
||||||
|
amp_device_type = (
|
||||||
|
"cuda"
|
||||||
|
if args.cuda and torch.cuda.is_available()
|
||||||
|
else "mps"
|
||||||
|
if args.cuda and torch.backends.mps.is_available()
|
||||||
|
else "cpu"
|
||||||
|
)
|
||||||
|
amp_dtype = torch.bfloat16 if args.amp_dtype == "bf16" else torch.float16
|
||||||
|
|
||||||
|
scaler = GradScaler(enabled=amp_enabled and amp_dtype == torch.float16)
|
||||||
|
|
||||||
|
if args.use_wandb:
|
||||||
|
wandb.init(
|
||||||
|
project=args.project,
|
||||||
|
name=run_name,
|
||||||
|
config=vars(args),
|
||||||
|
save_code=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
random.seed(args.seed)
|
||||||
|
np.random.seed(args.seed)
|
||||||
|
torch.manual_seed(args.seed)
|
||||||
|
torch.backends.cudnn.deterministic = args.torch_deterministic
|
||||||
|
|
||||||
|
if not args.cuda:
|
||||||
|
device = torch.device("cpu")
|
||||||
|
else:
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
device = torch.device(f"cuda:{args.device_rank}")
|
||||||
|
elif torch.backends.mps.is_available():
|
||||||
|
device = torch.device(f"mps:{args.device_rank}")
|
||||||
|
else:
|
||||||
|
raise ValueError("No GPU available")
|
||||||
|
print(f"Using device: {device}")
|
||||||
|
|
||||||
|
if args.env_name.startswith("h1hand-") or args.env_name.startswith("h1-"):
|
||||||
|
from reppo.env_utils.torch_wrappers.humanoid_bench_env import (
|
||||||
|
HumanoidBenchEnv,
|
||||||
|
)
|
||||||
|
|
||||||
|
env_type = "humanoid_bench"
|
||||||
|
envs = HumanoidBenchEnv(args.env_name, args.num_envs, device=device)
|
||||||
|
eval_envs = envs
|
||||||
|
elif args.env_name.startswith("Isaac-"):
|
||||||
|
from reppo.env_utils.torch_wrappers.isaaclab_env import IsaacLabEnv
|
||||||
|
|
||||||
|
env_type = "isaaclab"
|
||||||
|
envs = IsaacLabEnv(
|
||||||
|
args.env_name,
|
||||||
|
device.type,
|
||||||
|
args.num_envs,
|
||||||
|
args.seed,
|
||||||
|
action_bounds=args.action_bounds,
|
||||||
|
)
|
||||||
|
eval_envs = envs
|
||||||
|
elif args.env_name.startswith("MTBench-"):
|
||||||
|
from reppo.env_utils.torch_wrappers.mtbench_env import MTBenchEnv
|
||||||
|
|
||||||
|
env_name = "-".join(args.env_name.split("-")[1:])
|
||||||
|
env_type = "mtbench"
|
||||||
|
envs = MTBenchEnv(env_name, args.device_rank, args.num_envs, args.seed)
|
||||||
|
eval_envs = envs
|
||||||
|
else:
|
||||||
|
from reppo.env_utils.torch_wrappers.mujoco_playground_env import make_env
|
||||||
|
|
||||||
|
# TODO: Check if re-using same envs for eval could reduce memory usage
|
||||||
|
env_type = "mujoco_playground"
|
||||||
|
envs, eval_envs = make_env(
|
||||||
|
args.env_name,
|
||||||
|
args.seed,
|
||||||
|
args.num_envs,
|
||||||
|
args.num_eval_envs,
|
||||||
|
args.device_rank,
|
||||||
|
use_tuned_reward=args.use_tuned_reward,
|
||||||
|
use_domain_randomization=args.use_domain_randomization,
|
||||||
|
use_push_randomization=args.use_push_randomization,
|
||||||
|
)
|
||||||
|
|
||||||
|
n_act = envs.num_actions
|
||||||
|
n_obs = envs.num_obs if type(envs.num_obs) == int else envs.num_obs[0]
|
||||||
|
if envs.asymmetric_obs:
|
||||||
|
n_critic_obs = (
|
||||||
|
envs.num_privileged_obs
|
||||||
|
if type(envs.num_privileged_obs) == int
|
||||||
|
else envs.num_privileged_obs[0]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
n_critic_obs = n_obs
|
||||||
|
action_low, action_high = -1.0, 1.0
|
||||||
|
|
||||||
|
if args.obs_normalization:
|
||||||
|
obs_normalizer = EmpiricalNormalization(shape=n_obs, device=device)
|
||||||
|
critic_obs_normalizer = EmpiricalNormalization(
|
||||||
|
shape=n_critic_obs, device=device
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
obs_normalizer = nn.Identity()
|
||||||
|
critic_obs_normalizer = nn.Identity()
|
||||||
|
|
||||||
|
if args.reward_normalization:
|
||||||
|
if env_type in ["mtbench"]:
|
||||||
|
reward_normalizer = PerTaskRewardNormalizer(
|
||||||
|
num_tasks=envs.num_tasks,
|
||||||
|
gamma=args.gamma,
|
||||||
|
device=device,
|
||||||
|
g_max=min(abs(args.v_min), abs(args.v_max)),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
reward_normalizer = RewardNormalizer(
|
||||||
|
gamma=args.gamma,
|
||||||
|
device=device,
|
||||||
|
g_max=min(abs(args.v_min), abs(args.v_max)),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
reward_normalizer = nn.Identity()
|
||||||
|
|
||||||
|
actor_kwargs = {
|
||||||
|
"n_obs": n_obs,
|
||||||
|
"n_act": n_act,
|
||||||
|
"num_envs": args.num_envs,
|
||||||
|
"device": device,
|
||||||
|
"init_scale": args.init_scale,
|
||||||
|
"hidden_dim": args.actor_hidden_dim,
|
||||||
|
}
|
||||||
|
critic_kwargs = {
|
||||||
|
"n_obs": n_critic_obs,
|
||||||
|
"n_act": n_act,
|
||||||
|
"num_atoms": args.num_atoms,
|
||||||
|
"v_min": args.v_min,
|
||||||
|
"v_max": args.v_max,
|
||||||
|
"hidden_dim": args.critic_hidden_dim,
|
||||||
|
"device": device,
|
||||||
|
}
|
||||||
|
|
||||||
|
if env_type == "mtbench":
|
||||||
|
actor_kwargs["n_obs"] = n_obs - envs.num_tasks + args.task_embedding_dim
|
||||||
|
critic_kwargs["n_obs"] = n_critic_obs - envs.num_tasks + args.task_embedding_dim
|
||||||
|
actor_kwargs["num_tasks"] = envs.num_tasks
|
||||||
|
actor_kwargs["task_embedding_dim"] = args.task_embedding_dim
|
||||||
|
critic_kwargs["num_tasks"] = envs.num_tasks
|
||||||
|
critic_kwargs["task_embedding_dim"] = args.task_embedding_dim
|
||||||
|
|
||||||
|
if args.agent == "fasttd3":
|
||||||
|
if env_type in ["mtbench"]:
|
||||||
|
from reppo.network_utils.fast_td3_nets import (
|
||||||
|
MultiTaskActor,
|
||||||
|
MultiTaskCritic,
|
||||||
|
)
|
||||||
|
|
||||||
|
actor_cls = MultiTaskActor
|
||||||
|
critic_cls = MultiTaskCritic
|
||||||
|
else:
|
||||||
|
from reppo.network_utils.fast_td3_nets import Actor, Critic
|
||||||
|
|
||||||
|
actor_cls = Actor
|
||||||
|
critic_cls = Critic
|
||||||
|
|
||||||
|
print("Using FastTD3")
|
||||||
|
elif args.agent == "fasttd3_simbav2":
|
||||||
|
if env_type in ["mtbench"]:
|
||||||
|
from reppo.network_utils.fast_td3_nets_simbav2 import (
|
||||||
|
MultiTaskActor,
|
||||||
|
MultiTaskCritic,
|
||||||
|
)
|
||||||
|
|
||||||
|
actor_cls = MultiTaskActor
|
||||||
|
critic_cls = MultiTaskCritic
|
||||||
|
else:
|
||||||
|
from reppo.network_utils.fast_td3_nets_simbav2 import Actor, Critic
|
||||||
|
|
||||||
|
actor_cls = Actor
|
||||||
|
critic_cls = Critic
|
||||||
|
|
||||||
|
print("Using FastTD3 + SimbaV2")
|
||||||
|
actor_kwargs.pop("init_scale")
|
||||||
|
actor_kwargs.update(
|
||||||
|
{
|
||||||
|
"scaler_init": math.sqrt(2.0 / args.actor_hidden_dim),
|
||||||
|
"scaler_scale": math.sqrt(2.0 / args.actor_hidden_dim),
|
||||||
|
"alpha_init": 1.0 / (args.actor_num_blocks + 1),
|
||||||
|
"alpha_scale": 1.0 / math.sqrt(args.actor_hidden_dim),
|
||||||
|
"expansion": 4,
|
||||||
|
"c_shift": 3.0,
|
||||||
|
"num_blocks": args.actor_num_blocks,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
critic_kwargs.update(
|
||||||
|
{
|
||||||
|
"scaler_init": math.sqrt(2.0 / args.critic_hidden_dim),
|
||||||
|
"scaler_scale": math.sqrt(2.0 / args.critic_hidden_dim),
|
||||||
|
"alpha_init": 1.0 / (args.critic_num_blocks + 1),
|
||||||
|
"alpha_scale": 1.0 / math.sqrt(args.critic_hidden_dim),
|
||||||
|
"num_blocks": args.critic_num_blocks,
|
||||||
|
"expansion": 4,
|
||||||
|
"c_shift": 3.0,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Agent {args.agent} not supported")
|
||||||
|
|
||||||
|
actor = actor_cls(**actor_kwargs)
|
||||||
|
|
||||||
|
if env_type in ["mtbench"]:
|
||||||
|
# Python 3.8 doesn't support 'from_module' in tensordict
|
||||||
|
policy = actor.explore
|
||||||
|
else:
|
||||||
|
from tensordict import from_module
|
||||||
|
|
||||||
|
actor_detach = actor_cls(**actor_kwargs)
|
||||||
|
# Copy params to actor_detach without grad
|
||||||
|
from_module(actor).data.to_module(actor_detach)
|
||||||
|
policy = actor_detach.explore
|
||||||
|
|
||||||
|
qnet = critic_cls(**critic_kwargs)
|
||||||
|
qnet_target = critic_cls(**critic_kwargs)
|
||||||
|
qnet_target.load_state_dict(qnet.state_dict())
|
||||||
|
|
||||||
|
q_optimizer = optim.AdamW(
|
||||||
|
list(qnet.parameters()),
|
||||||
|
lr=torch.tensor(args.critic_learning_rate, device=device),
|
||||||
|
weight_decay=args.weight_decay,
|
||||||
|
)
|
||||||
|
actor_optimizer = optim.AdamW(
|
||||||
|
list(actor.parameters()),
|
||||||
|
lr=torch.tensor(args.actor_learning_rate, device=device),
|
||||||
|
weight_decay=args.weight_decay,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add learning rate schedulers
|
||||||
|
q_scheduler = optim.lr_scheduler.CosineAnnealingLR(
|
||||||
|
q_optimizer,
|
||||||
|
T_max=args.total_timesteps,
|
||||||
|
eta_min=torch.tensor(args.critic_learning_rate_end, device=device),
|
||||||
|
)
|
||||||
|
actor_scheduler = optim.lr_scheduler.CosineAnnealingLR(
|
||||||
|
actor_optimizer,
|
||||||
|
T_max=args.total_timesteps,
|
||||||
|
eta_min=torch.tensor(args.actor_learning_rate_end, device=device),
|
||||||
|
)
|
||||||
|
|
||||||
|
rb = SimpleReplayBuffer(
|
||||||
|
n_env=args.num_envs,
|
||||||
|
buffer_size=args.buffer_size,
|
||||||
|
n_obs=n_obs,
|
||||||
|
n_act=n_act,
|
||||||
|
n_critic_obs=n_critic_obs,
|
||||||
|
asymmetric_obs=envs.asymmetric_obs,
|
||||||
|
playground_mode=env_type == "mujoco_playground",
|
||||||
|
n_steps=args.num_steps,
|
||||||
|
gamma=args.gamma,
|
||||||
|
device=device,
|
||||||
|
)
|
||||||
|
|
||||||
|
policy_noise = args.policy_noise
|
||||||
|
noise_clip = args.noise_clip
|
||||||
|
|
||||||
|
def evaluate():
|
||||||
|
obs_normalizer.eval()
|
||||||
|
num_eval_envs = eval_envs.num_envs
|
||||||
|
episode_returns = torch.zeros(num_eval_envs, device=device)
|
||||||
|
episode_lengths = torch.zeros(num_eval_envs, device=device)
|
||||||
|
done_masks = torch.zeros(num_eval_envs, dtype=torch.bool, device=device)
|
||||||
|
|
||||||
|
if env_type == "isaaclab":
|
||||||
|
obs = eval_envs.reset(random_start_init=False)
|
||||||
|
else:
|
||||||
|
obs = eval_envs.reset()
|
||||||
|
|
||||||
|
# Run for a fixed number of steps
|
||||||
|
for i in range(eval_envs.max_episode_steps):
|
||||||
|
with (
|
||||||
|
torch.no_grad(),
|
||||||
|
autocast(
|
||||||
|
device_type=amp_device_type, dtype=amp_dtype, enabled=amp_enabled
|
||||||
|
),
|
||||||
|
):
|
||||||
|
obs = normalize_obs(obs)
|
||||||
|
actions = actor(obs)
|
||||||
|
|
||||||
|
next_obs, rewards, dones, _, infos = eval_envs.step(actions.float())
|
||||||
|
|
||||||
|
if env_type == "mtbench":
|
||||||
|
# We only report success rate in MTBench evaluation
|
||||||
|
rewards = (
|
||||||
|
infos["episode"]["success"].float() if "episode" in infos else 0.0
|
||||||
|
)
|
||||||
|
episode_returns = torch.where(
|
||||||
|
~done_masks, episode_returns + rewards, episode_returns
|
||||||
|
)
|
||||||
|
episode_lengths = torch.where(
|
||||||
|
~done_masks, episode_lengths + 1, episode_lengths
|
||||||
|
)
|
||||||
|
if env_type == "mtbench" and "episode" in infos:
|
||||||
|
dones = dones | infos["episode"]["success"]
|
||||||
|
done_masks = torch.logical_or(done_masks, dones)
|
||||||
|
if done_masks.all():
|
||||||
|
break
|
||||||
|
obs = next_obs
|
||||||
|
|
||||||
|
obs_normalizer.train()
|
||||||
|
return episode_returns.mean().item(), episode_lengths.mean().item()
|
||||||
|
|
||||||
|
def update_main(data, logs_dict):
|
||||||
|
with autocast(
|
||||||
|
device_type=amp_device_type, dtype=amp_dtype, enabled=amp_enabled
|
||||||
|
):
|
||||||
|
observations = data["observations"]
|
||||||
|
next_observations = data["next"]["observations"]
|
||||||
|
if envs.asymmetric_obs:
|
||||||
|
critic_observations = data["critic_observations"]
|
||||||
|
next_critic_observations = data["next"]["critic_observations"]
|
||||||
|
else:
|
||||||
|
critic_observations = observations
|
||||||
|
next_critic_observations = next_observations
|
||||||
|
actions = data["actions"]
|
||||||
|
rewards = data["next"]["rewards"]
|
||||||
|
dones = data["next"]["dones"].bool()
|
||||||
|
truncations = data["next"]["truncations"].bool()
|
||||||
|
if args.disable_bootstrap:
|
||||||
|
bootstrap = (~dones).float()
|
||||||
|
else:
|
||||||
|
bootstrap = (truncations | ~dones).float()
|
||||||
|
|
||||||
|
clipped_noise = torch.randn_like(actions)
|
||||||
|
clipped_noise = clipped_noise.mul(policy_noise).clamp(
|
||||||
|
-noise_clip, noise_clip
|
||||||
|
)
|
||||||
|
|
||||||
|
next_state_actions = (actor(next_observations) + clipped_noise).clamp(
|
||||||
|
action_low, action_high
|
||||||
|
)
|
||||||
|
discount = args.gamma ** data["next"]["effective_n_steps"]
|
||||||
|
|
||||||
|
with torch.no_grad():
|
||||||
|
qf1_next_target_projected, qf2_next_target_projected = (
|
||||||
|
qnet_target.projection(
|
||||||
|
next_critic_observations,
|
||||||
|
next_state_actions,
|
||||||
|
rewards,
|
||||||
|
bootstrap,
|
||||||
|
discount,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
qf1_next_target_value = qnet_target.get_value(qf1_next_target_projected)
|
||||||
|
qf2_next_target_value = qnet_target.get_value(qf2_next_target_projected)
|
||||||
|
if args.use_cdq:
|
||||||
|
qf_next_target_dist = torch.where(
|
||||||
|
qf1_next_target_value.unsqueeze(1)
|
||||||
|
< qf2_next_target_value.unsqueeze(1),
|
||||||
|
qf1_next_target_projected,
|
||||||
|
qf2_next_target_projected,
|
||||||
|
)
|
||||||
|
qf1_next_target_dist = qf2_next_target_dist = qf_next_target_dist
|
||||||
|
else:
|
||||||
|
qf1_next_target_dist, qf2_next_target_dist = (
|
||||||
|
qf1_next_target_projected,
|
||||||
|
qf2_next_target_projected,
|
||||||
|
)
|
||||||
|
|
||||||
|
qf1, qf2 = qnet(critic_observations, actions)
|
||||||
|
qf1_loss = -torch.sum(
|
||||||
|
qf1_next_target_dist * F.log_softmax(qf1, dim=1), dim=1
|
||||||
|
).mean()
|
||||||
|
qf2_loss = -torch.sum(
|
||||||
|
qf2_next_target_dist * F.log_softmax(qf2, dim=1), dim=1
|
||||||
|
).mean()
|
||||||
|
qf_loss = qf1_loss + qf2_loss
|
||||||
|
|
||||||
|
q_optimizer.zero_grad(set_to_none=True)
|
||||||
|
scaler.scale(qf_loss).backward()
|
||||||
|
scaler.unscale_(q_optimizer)
|
||||||
|
|
||||||
|
critic_grad_norm = torch.nn.utils.clip_grad_norm_(
|
||||||
|
qnet.parameters(),
|
||||||
|
max_norm=args.max_grad_norm if args.max_grad_norm > 0 else float("inf"),
|
||||||
|
)
|
||||||
|
scaler.step(q_optimizer)
|
||||||
|
scaler.update()
|
||||||
|
q_scheduler.step()
|
||||||
|
|
||||||
|
logs_dict["critic_grad_norm"] = critic_grad_norm.detach()
|
||||||
|
logs_dict["qf_loss"] = qf_loss.detach()
|
||||||
|
logs_dict["qf_max"] = qf1_next_target_value.max().detach()
|
||||||
|
logs_dict["qf_min"] = qf1_next_target_value.min().detach()
|
||||||
|
return logs_dict
|
||||||
|
|
||||||
|
def update_pol(data, logs_dict):
|
||||||
|
with autocast(
|
||||||
|
device_type=amp_device_type, dtype=amp_dtype, enabled=amp_enabled
|
||||||
|
):
|
||||||
|
critic_observations = (
|
||||||
|
data["critic_observations"]
|
||||||
|
if envs.asymmetric_obs
|
||||||
|
else data["observations"]
|
||||||
|
)
|
||||||
|
|
||||||
|
qf1, qf2 = qnet(critic_observations, actor(data["observations"]))
|
||||||
|
qf1_value = qnet.get_value(F.softmax(qf1, dim=1))
|
||||||
|
qf2_value = qnet.get_value(F.softmax(qf2, dim=1))
|
||||||
|
if args.use_cdq:
|
||||||
|
qf_value = torch.minimum(qf1_value, qf2_value)
|
||||||
|
else:
|
||||||
|
qf_value = (qf1_value + qf2_value) / 2.0
|
||||||
|
actor_loss = -qf_value.mean()
|
||||||
|
|
||||||
|
actor_optimizer.zero_grad(set_to_none=True)
|
||||||
|
scaler.scale(actor_loss).backward()
|
||||||
|
scaler.unscale_(actor_optimizer)
|
||||||
|
actor_grad_norm = torch.nn.utils.clip_grad_norm_(
|
||||||
|
actor.parameters(),
|
||||||
|
max_norm=args.max_grad_norm if args.max_grad_norm > 0 else float("inf"),
|
||||||
|
)
|
||||||
|
scaler.step(actor_optimizer)
|
||||||
|
scaler.update()
|
||||||
|
actor_scheduler.step()
|
||||||
|
logs_dict["actor_grad_norm"] = actor_grad_norm.detach()
|
||||||
|
logs_dict["actor_loss"] = actor_loss.detach()
|
||||||
|
return logs_dict
|
||||||
|
|
||||||
|
if args.compile:
|
||||||
|
mode = None
|
||||||
|
update_main = torch.compile(update_main, mode=mode)
|
||||||
|
update_pol = torch.compile(update_pol, mode=mode)
|
||||||
|
policy = torch.compile(policy, mode=mode)
|
||||||
|
normalize_obs = torch.compile(obs_normalizer.forward, mode=mode)
|
||||||
|
normalize_critic_obs = torch.compile(critic_obs_normalizer.forward, mode=mode)
|
||||||
|
if args.reward_normalization:
|
||||||
|
update_stats = torch.compile(reward_normalizer.update_stats, mode=mode)
|
||||||
|
normalize_reward = torch.compile(reward_normalizer.forward, mode=mode)
|
||||||
|
else:
|
||||||
|
normalize_obs = obs_normalizer.forward
|
||||||
|
normalize_critic_obs = critic_obs_normalizer.forward
|
||||||
|
if args.reward_normalization:
|
||||||
|
update_stats = reward_normalizer.update_stats
|
||||||
|
normalize_reward = reward_normalizer.forward
|
||||||
|
|
||||||
|
if envs.asymmetric_obs:
|
||||||
|
obs, critic_obs = envs.reset_with_critic_obs()
|
||||||
|
critic_obs = torch.as_tensor(critic_obs, device=device, dtype=torch.float)
|
||||||
|
else:
|
||||||
|
obs = envs.reset()
|
||||||
|
if args.checkpoint_path:
|
||||||
|
# Load checkpoint if specified
|
||||||
|
torch_checkpoint = torch.load(
|
||||||
|
f"{args.checkpoint_path}", map_location=device, weights_only=False
|
||||||
|
)
|
||||||
|
actor.load_state_dict(torch_checkpoint["actor_state_dict"])
|
||||||
|
obs_normalizer.load_state_dict(torch_checkpoint["obs_normalizer_state"])
|
||||||
|
critic_obs_normalizer.load_state_dict(
|
||||||
|
torch_checkpoint["critic_obs_normalizer_state"]
|
||||||
|
)
|
||||||
|
qnet.load_state_dict(torch_checkpoint["qnet_state_dict"])
|
||||||
|
qnet_target.load_state_dict(torch_checkpoint["qnet_target_state_dict"])
|
||||||
|
global_step = torch_checkpoint["global_step"]
|
||||||
|
else:
|
||||||
|
global_step = 0
|
||||||
|
|
||||||
|
dones = None
|
||||||
|
pbar = tqdm.tqdm(total=args.total_timesteps, initial=global_step)
|
||||||
|
start_time = None
|
||||||
|
desc = ""
|
||||||
|
|
||||||
|
while global_step < args.total_timesteps:
|
||||||
|
logs_dict = TensorDict()
|
||||||
|
if (
|
||||||
|
start_time is None
|
||||||
|
and global_step >= args.measure_burnin + args.learning_starts
|
||||||
|
):
|
||||||
|
start_time = time.time()
|
||||||
|
measure_burnin = global_step
|
||||||
|
|
||||||
|
with (
|
||||||
|
torch.no_grad(),
|
||||||
|
autocast(device_type=amp_device_type, dtype=amp_dtype, enabled=amp_enabled),
|
||||||
|
):
|
||||||
|
norm_obs = normalize_obs(obs)
|
||||||
|
actions = policy(obs=norm_obs, dones=dones)
|
||||||
|
|
||||||
|
next_obs, rewards, dones, _, infos = envs.step(actions.float())
|
||||||
|
print(infos["time_outs"])
|
||||||
|
truncations = infos["time_outs"]
|
||||||
|
|
||||||
|
if args.reward_normalization:
|
||||||
|
if env_type == "mtbench":
|
||||||
|
task_ids_one_hot = obs[..., -envs.num_tasks :]
|
||||||
|
task_indices = torch.argmax(task_ids_one_hot, dim=1)
|
||||||
|
update_stats(rewards, dones.float(), task_ids=task_indices)
|
||||||
|
else:
|
||||||
|
update_stats(rewards, dones.float())
|
||||||
|
|
||||||
|
if envs.asymmetric_obs:
|
||||||
|
next_critic_obs = infos["observations"]["critic"]
|
||||||
|
|
||||||
|
# Compute 'true' next_obs and next_critic_obs for saving
|
||||||
|
true_next_obs = torch.where(
|
||||||
|
dones[:, None] > 0, infos["observations"]["raw"]["obs"], next_obs
|
||||||
|
)
|
||||||
|
if envs.asymmetric_obs:
|
||||||
|
true_next_critic_obs = torch.where(
|
||||||
|
dones[:, None] > 0,
|
||||||
|
infos["observations"]["raw"]["critic_obs"],
|
||||||
|
next_critic_obs,
|
||||||
|
)
|
||||||
|
transition = TensorDict(
|
||||||
|
{
|
||||||
|
"observations": obs,
|
||||||
|
"actions": torch.as_tensor(actions, device=device, dtype=torch.float),
|
||||||
|
"next": {
|
||||||
|
"observations": true_next_obs,
|
||||||
|
"rewards": torch.as_tensor(
|
||||||
|
rewards, device=device, dtype=torch.float
|
||||||
|
),
|
||||||
|
"truncations": truncations.long(),
|
||||||
|
"dones": dones.long(),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
batch_size=(envs.num_envs,),
|
||||||
|
device=device,
|
||||||
|
)
|
||||||
|
if envs.asymmetric_obs:
|
||||||
|
transition["critic_observations"] = critic_obs
|
||||||
|
transition["next"]["critic_observations"] = true_next_critic_obs
|
||||||
|
|
||||||
|
obs = next_obs
|
||||||
|
if envs.asymmetric_obs:
|
||||||
|
critic_obs = next_critic_obs
|
||||||
|
|
||||||
|
rb.extend(transition)
|
||||||
|
|
||||||
|
batch_size = args.batch_size // args.num_envs
|
||||||
|
if global_step > args.learning_starts:
|
||||||
|
for i in range(args.num_updates):
|
||||||
|
data = rb.sample(batch_size)
|
||||||
|
data["observations"] = normalize_obs(data["observations"])
|
||||||
|
data["next"]["observations"] = normalize_obs(
|
||||||
|
data["next"]["observations"]
|
||||||
|
)
|
||||||
|
raw_rewards = data["next"]["rewards"]
|
||||||
|
if env_type in ["mtbench"] and args.reward_normalization:
|
||||||
|
# Multi-task reward normalization
|
||||||
|
task_ids_one_hot = data["observations"][..., -envs.num_tasks :]
|
||||||
|
task_indices = torch.argmax(task_ids_one_hot, dim=1)
|
||||||
|
data["next"]["rewards"] = normalize_reward(
|
||||||
|
raw_rewards, task_ids=task_indices
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
data["next"]["rewards"] = normalize_reward(raw_rewards)
|
||||||
|
if envs.asymmetric_obs:
|
||||||
|
data["critic_observations"] = normalize_critic_obs(
|
||||||
|
data["critic_observations"]
|
||||||
|
)
|
||||||
|
data["next"]["critic_observations"] = normalize_critic_obs(
|
||||||
|
data["next"]["critic_observations"]
|
||||||
|
)
|
||||||
|
logs_dict = update_main(data, logs_dict)
|
||||||
|
if args.num_updates > 1:
|
||||||
|
if i % args.policy_frequency == 1:
|
||||||
|
logs_dict = update_pol(data, logs_dict)
|
||||||
|
else:
|
||||||
|
if global_step % args.policy_frequency == 0:
|
||||||
|
logs_dict = update_pol(data, logs_dict)
|
||||||
|
|
||||||
|
for param, target_param in zip(
|
||||||
|
qnet.parameters(), qnet_target.parameters()
|
||||||
|
):
|
||||||
|
target_param.data.copy_(
|
||||||
|
args.tau * param.data + (1 - args.tau) * target_param.data
|
||||||
|
)
|
||||||
|
|
||||||
|
if global_step % 100 == 0 and start_time is not None:
|
||||||
|
speed = (global_step - measure_burnin) / (time.time() - start_time)
|
||||||
|
pbar.set_description(f"{speed: 4.4f} sps, " + desc)
|
||||||
|
with torch.no_grad():
|
||||||
|
logs = {
|
||||||
|
"actor_loss": logs_dict["actor_loss"].mean(),
|
||||||
|
"qf_loss": logs_dict["qf_loss"].mean(),
|
||||||
|
"qf_max": logs_dict["qf_max"].mean(),
|
||||||
|
"qf_min": logs_dict["qf_min"].mean(),
|
||||||
|
"actor_grad_norm": logs_dict["actor_grad_norm"].mean(),
|
||||||
|
"critic_grad_norm": logs_dict["critic_grad_norm"].mean(),
|
||||||
|
"env_rewards": rewards.mean(),
|
||||||
|
"buffer_rewards": raw_rewards.mean(),
|
||||||
|
}
|
||||||
|
|
||||||
|
if args.eval_interval > 0 and global_step % args.eval_interval == 0:
|
||||||
|
print(f"Evaluating at global step {global_step}")
|
||||||
|
eval_avg_return, eval_avg_length = evaluate()
|
||||||
|
if env_type in ["humanoid_bench", "isaaclab", "mtbench"]:
|
||||||
|
# NOTE: Hacky way of evaluating performance, but just works
|
||||||
|
obs = envs.reset()
|
||||||
|
logs["eval_avg_return"] = eval_avg_return
|
||||||
|
logs["eval_avg_length"] = eval_avg_length
|
||||||
|
|
||||||
|
if args.use_wandb:
|
||||||
|
wandb.log(
|
||||||
|
{
|
||||||
|
"speed": speed,
|
||||||
|
"frame": global_step * args.num_envs,
|
||||||
|
"critic_lr": q_scheduler.get_last_lr()[0],
|
||||||
|
"actor_lr": actor_scheduler.get_last_lr()[0],
|
||||||
|
**logs,
|
||||||
|
},
|
||||||
|
step=global_step,
|
||||||
|
)
|
||||||
|
|
||||||
|
if (
|
||||||
|
args.save_interval > 0
|
||||||
|
and global_step > 0
|
||||||
|
and global_step % args.save_interval == 0
|
||||||
|
):
|
||||||
|
print(f"Saving model at global step {global_step}")
|
||||||
|
save_params(
|
||||||
|
global_step,
|
||||||
|
actor,
|
||||||
|
qnet,
|
||||||
|
qnet_target,
|
||||||
|
obs_normalizer,
|
||||||
|
critic_obs_normalizer,
|
||||||
|
args,
|
||||||
|
f"models/{run_name}_{global_step}.pt",
|
||||||
|
)
|
||||||
|
|
||||||
|
global_step += 1
|
||||||
|
pbar.update(1)
|
||||||
|
|
||||||
|
save_params(
|
||||||
|
global_step,
|
||||||
|
actor,
|
||||||
|
qnet,
|
||||||
|
qnet_target,
|
||||||
|
obs_normalizer,
|
||||||
|
critic_obs_normalizer,
|
||||||
|
args,
|
||||||
|
f"models/{run_name}_final.pt",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
550
reppo/torchrl/hyperparams.py
Normal file
550
reppo/torchrl/hyperparams.py
Normal file
@ -0,0 +1,550 @@
|
|||||||
|
import os
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
import tyro
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class BaseArgs:
|
||||||
|
# Default hyperparameters -- specifically for HumanoidBench
|
||||||
|
# See MuJoCoPlaygroundArgs for default hyperparameters for MuJoCo Playground
|
||||||
|
# See IsaacLabArgs for default hyperparameters for IsaacLab
|
||||||
|
env_name: str = "HumanoidRun"
|
||||||
|
"""the id of the environment"""
|
||||||
|
agent: str = "fasttd3"
|
||||||
|
"""the agent to use: currently support [fasttd3, fasttd3_simbav2]"""
|
||||||
|
seed: int = 1
|
||||||
|
"""seed of the experiment"""
|
||||||
|
torch_deterministic: bool = True
|
||||||
|
"""if toggled, `torch.backends.cudnn.deterministic=False`"""
|
||||||
|
cuda: bool = True
|
||||||
|
"""if toggled, cuda will be enabled by default"""
|
||||||
|
device_rank: int = 0
|
||||||
|
"""the rank of the device"""
|
||||||
|
exp_name: str = os.path.basename(__file__)[: -len(".py")]
|
||||||
|
"""the name of this experiment"""
|
||||||
|
project: str = "FastTD3"
|
||||||
|
"""the project name"""
|
||||||
|
use_wandb: bool = True
|
||||||
|
"""whether to use wandb"""
|
||||||
|
checkpoint_path: str = None
|
||||||
|
"""the path to the checkpoint file"""
|
||||||
|
num_envs: int = 128
|
||||||
|
"""the number of environments to run in parallel"""
|
||||||
|
num_eval_envs: int = 128
|
||||||
|
"""the number of evaluation environments to run in parallel (only valid for MuJoCo Playground)"""
|
||||||
|
total_timesteps: int = 50000
|
||||||
|
"""total timesteps of the experiments"""
|
||||||
|
critic_learning_rate: float = 3e-4
|
||||||
|
"""the learning rate of the critic"""
|
||||||
|
actor_learning_rate: float = 3e-4
|
||||||
|
"""the learning rate for the actor"""
|
||||||
|
critic_learning_rate_end: float = 3e-4
|
||||||
|
"""the learning rate of the critic at the end of training"""
|
||||||
|
actor_learning_rate_end: float = 3e-4
|
||||||
|
"""the learning rate for the actor at the end of training"""
|
||||||
|
buffer_size: int = 1024 * 50
|
||||||
|
"""the replay memory buffer size"""
|
||||||
|
num_steps: int = 1
|
||||||
|
"""the number of steps to use for the multi-step return"""
|
||||||
|
gamma: float = 0.99
|
||||||
|
"""the discount factor gamma"""
|
||||||
|
tau: float = 0.1
|
||||||
|
"""target smoothing coefficient (default: 0.005)"""
|
||||||
|
batch_size: int = 32768
|
||||||
|
"""the batch size of sample from the replay memory"""
|
||||||
|
policy_noise: float = 0.001
|
||||||
|
"""the scale of policy noise"""
|
||||||
|
std_min: float = 0.001
|
||||||
|
"""the minimum scale of noise"""
|
||||||
|
std_max: float = 0.4
|
||||||
|
"""the maximum scale of noise"""
|
||||||
|
learning_starts: int = 10
|
||||||
|
"""timestep to start learning"""
|
||||||
|
policy_frequency: int = 2
|
||||||
|
"""the frequency of training policy (delayed)"""
|
||||||
|
noise_clip: float = 0.5
|
||||||
|
"""noise clip parameter of the Target Policy Smoothing Regularization"""
|
||||||
|
num_updates: int = 2
|
||||||
|
"""the number of updates to perform per step"""
|
||||||
|
init_scale: float = 0.01
|
||||||
|
"""the scale of the initial parameters"""
|
||||||
|
num_atoms: int = 101
|
||||||
|
"""the number of atoms"""
|
||||||
|
v_min: float = -250.0
|
||||||
|
"""the minimum value of the support"""
|
||||||
|
v_max: float = 250.0
|
||||||
|
"""the maximum value of the support"""
|
||||||
|
critic_hidden_dim: int = 1024
|
||||||
|
"""the hidden dimension of the critic network"""
|
||||||
|
actor_hidden_dim: int = 512
|
||||||
|
"""the hidden dimension of the actor network"""
|
||||||
|
critic_num_blocks: int = 2
|
||||||
|
"""(SimbaV2 only) the number of blocks in the critic network"""
|
||||||
|
actor_num_blocks: int = 1
|
||||||
|
"""(SimbaV2 only) the number of blocks in the actor network"""
|
||||||
|
use_cdq: bool = True
|
||||||
|
"""whether to use Clipped Double Q-learning"""
|
||||||
|
measure_burnin: int = 3
|
||||||
|
"""Number of burn-in iterations for speed measure."""
|
||||||
|
eval_interval: int = 2500
|
||||||
|
"""the interval to evaluate the model"""
|
||||||
|
render_interval: int = 500000
|
||||||
|
"""the interval to render the model"""
|
||||||
|
compile: bool = True
|
||||||
|
"""whether to use torch.compile."""
|
||||||
|
compile_mode: str = "reduce-overhead"
|
||||||
|
"""the mode of torch.compile."""
|
||||||
|
obs_normalization: bool = True
|
||||||
|
"""whether to enable observation normalization"""
|
||||||
|
reward_normalization: bool = False
|
||||||
|
"""whether to enable reward normalization"""
|
||||||
|
use_grad_norm_clipping: bool = False
|
||||||
|
"""whether to use gradient norm clipping."""
|
||||||
|
max_grad_norm: float = 0.0
|
||||||
|
"""the maximum gradient norm"""
|
||||||
|
amp: bool = True
|
||||||
|
"""whether to use amp"""
|
||||||
|
amp_dtype: str = "bf16"
|
||||||
|
"""the dtype of the amp"""
|
||||||
|
disable_bootstrap: bool = False
|
||||||
|
"""Whether to disable bootstrap in the critic learning"""
|
||||||
|
|
||||||
|
use_domain_randomization: bool = False
|
||||||
|
"""(Playground only) whether to use domain randomization"""
|
||||||
|
use_push_randomization: bool = False
|
||||||
|
"""(Playground only) whether to use push randomization"""
|
||||||
|
use_tuned_reward: bool = False
|
||||||
|
"""(Playground only) Use tuned reward for G1"""
|
||||||
|
action_bounds: float = 1.0
|
||||||
|
"""(IsaacLab only) the bounds of the action space (-action_bounds, action_bounds)"""
|
||||||
|
task_embedding_dim: int = 32
|
||||||
|
"""the dimension of the task embedding"""
|
||||||
|
|
||||||
|
weight_decay: float = 0.1
|
||||||
|
"""the weight decay of the optimizer"""
|
||||||
|
save_interval: int = 5000
|
||||||
|
"""the interval to save the model"""
|
||||||
|
|
||||||
|
|
||||||
|
def get_args():
|
||||||
|
"""
|
||||||
|
Parse command-line arguments and return the appropriate Args instance based on env_name.
|
||||||
|
"""
|
||||||
|
# First, parse all arguments using the base Args class
|
||||||
|
base_args = tyro.cli(BaseArgs)
|
||||||
|
|
||||||
|
# Map environment names to their specific Args classes
|
||||||
|
# For tasks not here, default hyperparameters are used
|
||||||
|
# See below links for available task list
|
||||||
|
# - HumanoidBench (https://arxiv.org/abs/2403.10506)
|
||||||
|
# - IsaacLab (https://isaac-sim.github.io/IsaacLab/main/source/overview/environments.html)
|
||||||
|
# - MuJoCo Playground (https://arxiv.org/abs/2502.08844)
|
||||||
|
env_to_args_class = {
|
||||||
|
# HumanoidBench
|
||||||
|
# NOTE: These tasks are not full list of HumanoidBench tasks
|
||||||
|
"h1hand-reach-v0": H1HandReachArgs,
|
||||||
|
"h1hand-balance-simple-v0": H1HandBalanceSimpleArgs,
|
||||||
|
"h1hand-balance-hard-v0": H1HandBalanceHardArgs,
|
||||||
|
"h1hand-pole-v0": H1HandPoleArgs,
|
||||||
|
"h1hand-truck-v0": H1HandTruckArgs,
|
||||||
|
"h1hand-maze-v0": H1HandMazeArgs,
|
||||||
|
"h1hand-push-v0": H1HandPushArgs,
|
||||||
|
"h1hand-basketball-v0": H1HandBasketballArgs,
|
||||||
|
"h1hand-window-v0": H1HandWindowArgs,
|
||||||
|
"h1hand-package-v0": H1HandPackageArgs,
|
||||||
|
"h1hand-truck-v0": H1HandTruckArgs,
|
||||||
|
# MuJoCo Playground
|
||||||
|
# NOTE: These tasks are not full list of MuJoCo Playground tasks
|
||||||
|
"G1JoystickFlatTerrain": G1JoystickFlatTerrainArgs,
|
||||||
|
"G1JoystickRoughTerrain": G1JoystickRoughTerrainArgs,
|
||||||
|
"T1JoystickFlatTerrain": T1JoystickFlatTerrainArgs,
|
||||||
|
"T1JoystickRoughTerrain": T1JoystickRoughTerrainArgs,
|
||||||
|
"LeapCubeReorient": LeapCubeReorientArgs,
|
||||||
|
"LeapCubeRotateZAxis": LeapCubeRotateZAxisArgs,
|
||||||
|
"Go1JoystickFlatTerrain": Go1JoystickFlatTerrainArgs,
|
||||||
|
"Go1JoystickRoughTerrain": Go1JoystickRoughTerrainArgs,
|
||||||
|
"Go1Getup": Go1GetupArgs,
|
||||||
|
"CheetahRun": CheetahRunArgs, # NOTE: Example config for DeepMind Control Suite
|
||||||
|
# IsaacLab
|
||||||
|
# NOTE: These tasks are not full list of IsaacLab tasks
|
||||||
|
"Isaac-Lift-Cube-Franka-v0": IsaacLiftCubeFrankaArgs,
|
||||||
|
"Isaac-Open-Drawer-Franka-v0": IsaacOpenDrawerFrankaArgs,
|
||||||
|
"Isaac-Velocity-Flat-H1-v0": IsaacVelocityFlatH1Args,
|
||||||
|
"Isaac-Velocity-Flat-G1-v0": IsaacVelocityFlatG1Args,
|
||||||
|
"Isaac-Velocity-Rough-H1-v0": IsaacVelocityRoughH1Args,
|
||||||
|
"Isaac-Velocity-Rough-G1-v0": IsaacVelocityRoughG1Args,
|
||||||
|
"Isaac-Repose-Cube-Allegro-Direct-v0": IsaacReposeCubeAllegroDirectArgs,
|
||||||
|
"Isaac-Repose-Cube-Shadow-Direct-v0": IsaacReposeCubeShadowDirectArgs,
|
||||||
|
# MTBench
|
||||||
|
"MTBench-meta-world-v2-mt10": MetaWorldMT10Args,
|
||||||
|
"MTBench-meta-world-v2-mt50": MetaWorldMT50Args,
|
||||||
|
}
|
||||||
|
# If the provided env_name has a specific Args class, use it
|
||||||
|
if base_args.env_name in env_to_args_class:
|
||||||
|
specific_args_class = env_to_args_class[base_args.env_name]
|
||||||
|
# Re-parse with the specific class, maintaining any user overrides
|
||||||
|
specific_args = tyro.cli(specific_args_class)
|
||||||
|
return specific_args
|
||||||
|
|
||||||
|
if base_args.env_name.startswith("h1hand-") or base_args.env_name.startswith("h1-"):
|
||||||
|
# HumanoidBench
|
||||||
|
specific_args = tyro.cli(HumanoidBenchArgs)
|
||||||
|
elif base_args.env_name.startswith("Isaac-"):
|
||||||
|
# IsaacLab
|
||||||
|
specific_args = tyro.cli(IsaacLabArgs)
|
||||||
|
elif base_args.env_name.startswith("MTBench-"):
|
||||||
|
# MTBench
|
||||||
|
specific_args = tyro.cli(MTBenchArgs)
|
||||||
|
else:
|
||||||
|
# MuJoCo Playground
|
||||||
|
specific_args = tyro.cli(MuJoCoPlaygroundArgs)
|
||||||
|
return specific_args
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class HumanoidBenchArgs(BaseArgs):
|
||||||
|
# See HumanoidBench (https://arxiv.org/abs/2403.10506) for available task list
|
||||||
|
total_timesteps: int = 100000
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class H1HandReachArgs(HumanoidBenchArgs):
|
||||||
|
env_name: str = "h1hand-reach-v0"
|
||||||
|
v_min: float = -2000.0
|
||||||
|
v_max: float = 2000.0
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class H1HandBalanceSimpleArgs(HumanoidBenchArgs):
|
||||||
|
env_name: str = "h1hand-balance-simple-v0"
|
||||||
|
total_timesteps: int = 200000
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class H1HandBalanceHardArgs(HumanoidBenchArgs):
|
||||||
|
env_name: str = "h1hand-balance-hard-v0"
|
||||||
|
total_timesteps: int = 1000000
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class H1HandPoleArgs(HumanoidBenchArgs):
|
||||||
|
env_name: str = "h1hand-pole-v0"
|
||||||
|
total_timesteps: int = 150000
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class H1HandTruckArgs(HumanoidBenchArgs):
|
||||||
|
env_name: str = "h1hand-truck-v0"
|
||||||
|
total_timesteps: int = 500000
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class H1HandMazeArgs(HumanoidBenchArgs):
|
||||||
|
env_name: str = "h1hand-maze-v0"
|
||||||
|
v_min: float = -1000.0
|
||||||
|
v_max: float = 1000.0
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class H1HandPushArgs(HumanoidBenchArgs):
|
||||||
|
env_name: str = "h1hand-push-v0"
|
||||||
|
v_min: float = -1000.0
|
||||||
|
v_max: float = 1000.0
|
||||||
|
total_timesteps: int = 1000000
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class H1HandBasketballArgs(HumanoidBenchArgs):
|
||||||
|
env_name: str = "h1hand-basketball-v0"
|
||||||
|
v_min: float = -2000.0
|
||||||
|
v_max: float = 2000.0
|
||||||
|
total_timesteps: int = 250000
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class H1HandWindowArgs(HumanoidBenchArgs):
|
||||||
|
env_name: str = "h1hand-window-v0"
|
||||||
|
total_timesteps: int = 250000
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class H1HandPackageArgs(HumanoidBenchArgs):
|
||||||
|
env_name: str = "h1hand-package-v0"
|
||||||
|
v_min: float = -10000.0
|
||||||
|
v_max: float = 10000.0
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class H1HandTruckArgs(HumanoidBenchArgs):
|
||||||
|
env_name: str = "h1hand-truck-v0"
|
||||||
|
v_min: float = -1000.0
|
||||||
|
v_max: float = 1000.0
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class MuJoCoPlaygroundArgs(BaseArgs):
|
||||||
|
# Default hyperparameters for many of Playground environments
|
||||||
|
v_min: float = -150.0
|
||||||
|
v_max: float = 150.0
|
||||||
|
buffer_size: int = 1024 * 10
|
||||||
|
num_envs: int = 1024
|
||||||
|
num_eval_envs: int = 1024
|
||||||
|
gamma: float = 0.99
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class MTBenchArgs(BaseArgs):
|
||||||
|
# Default hyperparameters for MTBench
|
||||||
|
reward_normalization: bool = True
|
||||||
|
v_min: float = -10.0
|
||||||
|
v_max: float = 10.0
|
||||||
|
buffer_size: int = 2048 # 2K is usually enough for MTBench
|
||||||
|
num_envs: int = 4096
|
||||||
|
num_eval_envs: int = 4096
|
||||||
|
gamma: float = 0.97
|
||||||
|
num_steps: int = 8
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class MetaWorldMT10Args(MTBenchArgs):
|
||||||
|
# This config achieves 97 ~ 98% success rate within 10k steps (15-20 mins on A100)
|
||||||
|
env_name: str = "MTBench-meta-world-v2-mt10"
|
||||||
|
num_envs: int = 4096
|
||||||
|
num_eval_envs: int = 4096
|
||||||
|
num_steps: int = 8
|
||||||
|
gamma: float = 0.97
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class MetaWorldMT50Args(MTBenchArgs):
|
||||||
|
# FastTD3 + SimbaV2 achieves >90% success rate within 20k steps (80 mins on A100)
|
||||||
|
# Performance further improves with more training steps, slowly.
|
||||||
|
env_name: str = "MTBench-meta-world-v2-mt50"
|
||||||
|
num_envs: int = 8192
|
||||||
|
num_eval_envs: int = 8192
|
||||||
|
num_steps: int = 8
|
||||||
|
gamma: float = 0.99
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class G1JoystickFlatTerrainArgs(MuJoCoPlaygroundArgs):
|
||||||
|
env_name: str = "G1JoystickFlatTerrain"
|
||||||
|
total_timesteps: int = 100000
|
||||||
|
v_min: float = -10.0
|
||||||
|
v_max: float = 10.0
|
||||||
|
buffer_size: int = 128 # 1024 * 10
|
||||||
|
num_envs: int = 1024
|
||||||
|
num_eval_envs: int = 1024
|
||||||
|
gamma: float = 0.97
|
||||||
|
critic_hidden_dim: int = 1024
|
||||||
|
batch_size: int = 8129
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class G1JoystickRoughTerrainArgs(MuJoCoPlaygroundArgs):
|
||||||
|
env_name: str = "G1JoystickRoughTerrain"
|
||||||
|
total_timesteps: int = 100000
|
||||||
|
v_min: float = -10.0
|
||||||
|
v_max: float = 10.0
|
||||||
|
buffer_size: int = 128 # 1024 * 10
|
||||||
|
num_envs: int = 1024
|
||||||
|
num_eval_envs: int = 1024
|
||||||
|
gamma: float = 0.97
|
||||||
|
critic_hidden_dim: int = 1024
|
||||||
|
batch_size: int = 8129
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class T1JoystickFlatTerrainArgs(MuJoCoPlaygroundArgs):
|
||||||
|
env_name: str = "T1JoystickFlatTerrain"
|
||||||
|
total_timesteps: int = 100000
|
||||||
|
v_min: float = -10.0
|
||||||
|
v_max: float = 10.0
|
||||||
|
buffer_size: int = 128 # 1024 * 10
|
||||||
|
num_envs: int = 1024
|
||||||
|
num_eval_envs: int = 1024
|
||||||
|
gamma: float = 0.97
|
||||||
|
critic_hidden_dim: int = 1024
|
||||||
|
batch_size: int = 8129
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class T1JoystickRoughTerrainArgs(MuJoCoPlaygroundArgs):
|
||||||
|
env_name: str = "T1JoystickRoughTerrain"
|
||||||
|
total_timesteps: int = 100000
|
||||||
|
v_min: float = -10.0
|
||||||
|
v_max: float = 10.0
|
||||||
|
buffer_size: int = 128 # 1024 * 10
|
||||||
|
num_envs: int = 1024
|
||||||
|
num_eval_envs: int = 1024
|
||||||
|
gamma: float = 0.97
|
||||||
|
critic_hidden_dim: int = 1024
|
||||||
|
batch_size: int = 8129
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class T1LowDofJoystickFlatTerrainArgs(MuJoCoPlaygroundArgs):
|
||||||
|
env_name: str = "T1LowDofJoystickFlatTerrain"
|
||||||
|
total_timesteps: int = 1000000
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class T1LowDofJoystickRoughTerrainArgs(MuJoCoPlaygroundArgs):
|
||||||
|
env_name: str = "T1LowDofJoystickRoughTerrain"
|
||||||
|
total_timesteps: int = 1000000
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class CheetahRunArgs(MuJoCoPlaygroundArgs):
|
||||||
|
# NOTE: This config will work for most DMC tasks, though we haven't tested DMC extensively.
|
||||||
|
# Future research can consider using LayerNorm as we find it sometimes works better for DMC tasks.
|
||||||
|
env_name: str = "CheetahRun"
|
||||||
|
num_steps: int = 3
|
||||||
|
v_min: float = -500.0
|
||||||
|
v_max: float = 500.0
|
||||||
|
std_min: float = 0.1
|
||||||
|
policy_noise: float = 0.1
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Go1JoystickFlatTerrainArgs(MuJoCoPlaygroundArgs):
|
||||||
|
env_name: str = "Go1JoystickFlatTerrain"
|
||||||
|
total_timesteps: int = 50000
|
||||||
|
std_min: float = 0.2
|
||||||
|
std_max: float = 0.8
|
||||||
|
policy_noise: float = 0.2
|
||||||
|
num_updates: int = 8
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Go1JoystickRoughTerrainArgs(MuJoCoPlaygroundArgs):
|
||||||
|
env_name: str = "Go1JoystickRoughTerrain"
|
||||||
|
total_timesteps: int = 50000
|
||||||
|
std_min: float = 0.2
|
||||||
|
std_max: float = 0.8
|
||||||
|
policy_noise: float = 0.2
|
||||||
|
num_updates: int = 8
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Go1GetupArgs(MuJoCoPlaygroundArgs):
|
||||||
|
env_name: str = "Go1Getup"
|
||||||
|
total_timesteps: int = 50000
|
||||||
|
std_min: float = 0.2
|
||||||
|
std_max: float = 0.8
|
||||||
|
policy_noise: float = 0.2
|
||||||
|
num_updates: int = 8
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class LeapCubeReorientArgs(MuJoCoPlaygroundArgs):
|
||||||
|
env_name: str = "LeapCubeReorient"
|
||||||
|
num_steps: int = 3
|
||||||
|
gamma: float = 0.99
|
||||||
|
policy_noise: float = 0.2
|
||||||
|
v_min: float = -50.0
|
||||||
|
v_max: float = 50.0
|
||||||
|
use_cdq: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class LeapCubeRotateZAxisArgs(MuJoCoPlaygroundArgs):
|
||||||
|
env_name: str = "LeapCubeRotateZAxis"
|
||||||
|
num_steps: int = 1
|
||||||
|
policy_noise: float = 0.2
|
||||||
|
gamma: float = 0.99
|
||||||
|
v_min: float = -10.0
|
||||||
|
v_max: float = 10.0
|
||||||
|
use_cdq: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class IsaacLabArgs(BaseArgs):
|
||||||
|
v_min: float = -10.0
|
||||||
|
v_max: float = 10.0
|
||||||
|
buffer_size: int = 1024 * 10
|
||||||
|
num_envs: int = 4096
|
||||||
|
num_eval_envs: int = 4096
|
||||||
|
action_bounds: float = 1.0
|
||||||
|
std_max: float = 0.4
|
||||||
|
num_atoms: int = 251
|
||||||
|
render_interval: int = 0 # IsaacLab does not support rendering in our codebase
|
||||||
|
total_timesteps: int = 100000
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class IsaacLiftCubeFrankaArgs(IsaacLabArgs):
|
||||||
|
# Value learning is unstable for Lift Cube task Due to brittle reward shaping
|
||||||
|
# Therefore, we need to disable bootstrap from 'reset_obs' in IsaacLab
|
||||||
|
# Higher UTD works better for manipulation tasks
|
||||||
|
env_name: str = "Isaac-Lift-Cube-Franka-v0"
|
||||||
|
num_updates: int = 8
|
||||||
|
v_min: float = -50.0
|
||||||
|
v_max: float = 50.0
|
||||||
|
std_max: float = 0.8
|
||||||
|
num_envs: int = 1024
|
||||||
|
num_eval_envs: int = 1024
|
||||||
|
action_bounds: float = 3.0
|
||||||
|
disable_bootstrap: bool = True
|
||||||
|
total_timesteps: int = 20000
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class IsaacOpenDrawerFrankaArgs(IsaacLabArgs):
|
||||||
|
# Higher UTD works better for manipulation tasks
|
||||||
|
env_name: str = "Isaac-Open-Drawer-Franka-v0"
|
||||||
|
v_min: float = -50.0
|
||||||
|
v_max: float = 50.0
|
||||||
|
num_updates: int = 8
|
||||||
|
action_bounds: float = 3.0
|
||||||
|
total_timesteps: int = 20000
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class IsaacVelocityFlatH1Args(IsaacLabArgs):
|
||||||
|
env_name: str = "Isaac-Velocity-Flat-H1-v0"
|
||||||
|
num_steps: int = 8
|
||||||
|
num_updates: int = 4
|
||||||
|
total_timesteps: int = 75000
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class IsaacVelocityFlatG1Args(IsaacLabArgs):
|
||||||
|
env_name: str = "Isaac-Velocity-Flat-G1-v0"
|
||||||
|
num_steps: int = 8
|
||||||
|
num_updates: int = 4
|
||||||
|
total_timesteps: int = 50000
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class IsaacVelocityRoughH1Args(IsaacLabArgs):
|
||||||
|
env_name: str = "Isaac-Velocity-Rough-H1-v0"
|
||||||
|
num_steps: int = 8
|
||||||
|
num_updates: int = 4
|
||||||
|
buffer_size: int = 1024 * 5 # To reduce memory usage
|
||||||
|
total_timesteps: int = 50000
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class IsaacVelocityRoughG1Args(IsaacLabArgs):
|
||||||
|
env_name: str = "Isaac-Velocity-Rough-G1-v0"
|
||||||
|
num_steps: int = 8
|
||||||
|
num_updates: int = 4
|
||||||
|
buffer_size: int = 1024 * 5 # To reduce memory usage
|
||||||
|
total_timesteps: int = 50000
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class IsaacReposeCubeAllegroDirectArgs(IsaacLabArgs):
|
||||||
|
env_name: str = "Isaac-Repose-Cube-Allegro-Direct-v0"
|
||||||
|
total_timesteps: int = 100000
|
||||||
|
v_min: float = -500.0
|
||||||
|
v_max: float = 500.0
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class IsaacReposeCubeShadowDirectArgs(IsaacLabArgs):
|
||||||
|
env_name: str = "Isaac-Repose-Cube-Shadow-Direct-v0"
|
||||||
|
total_timesteps: int = 100000
|
||||||
|
v_min: float = -500.0
|
||||||
|
v_max: float = 500.0
|
||||||
743
reppo/torchrl/reppo.py
Normal file
743
reppo/torchrl/reppo.py
Normal file
@ -0,0 +1,743 @@
|
|||||||
|
from dataclasses import dataclass, replace
|
||||||
|
import functools
|
||||||
|
import os
|
||||||
|
import random
|
||||||
|
import sys
|
||||||
|
import copy
|
||||||
|
import time
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import tqdm
|
||||||
|
from omegaconf import DictConfig, OmegaConf
|
||||||
|
|
||||||
|
import wandb
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Required for avoiding IsaacGym import error
|
||||||
|
import isaacgym
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
import hydra
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
import torch.optim as optim
|
||||||
|
from torchinfo import summary
|
||||||
|
from tensordict import TensorDict
|
||||||
|
from torch.amp import GradScaler
|
||||||
|
from reppo.torchrl.envs import make_envs
|
||||||
|
from reppo.network_utils.torch_models import Actor, Critic
|
||||||
|
from reppo.torchrl.reppo import (
|
||||||
|
EmpiricalNormalization,
|
||||||
|
hl_gauss,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
import jax.numpy as jnp
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
torch.set_float32_matmul_precision("medium")
|
||||||
|
os.environ["TORCHDYNAMO_INLINE_INBUILT_NN_MODULES"] = "1"
|
||||||
|
os.environ["OMP_NUM_THREADS"] = "1"
|
||||||
|
if sys.platform != "darwin":
|
||||||
|
os.environ["MUJOCO_GL"] = "egl"
|
||||||
|
else:
|
||||||
|
os.environ["MUJOCO_GL"] = "glfw"
|
||||||
|
os.environ["XLA_PYTHON_CLIENT_PREALLOCATE"] = "false"
|
||||||
|
os.environ["JAX_DEFAULT_MATMUL_PRECISION"] = "highest"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class TrainState:
|
||||||
|
device: torch.device
|
||||||
|
obs: torch.Tensor
|
||||||
|
critic_obs: torch.Tensor
|
||||||
|
actor: Actor
|
||||||
|
old_actor: Actor
|
||||||
|
critic: Critic
|
||||||
|
normalizer: EmpiricalNormalization
|
||||||
|
critic_normalizer: EmpiricalNormalization
|
||||||
|
actor_optimizer: optim.Optimizer
|
||||||
|
critic_optimizer: optim.Optimizer
|
||||||
|
scaler: GradScaler
|
||||||
|
|
||||||
|
def compile(self):
|
||||||
|
self.actor.compile()
|
||||||
|
self.old_actor.compile()
|
||||||
|
self.critic.compile()
|
||||||
|
self.normalizer.compile()
|
||||||
|
self.critic_normalizer.compile()
|
||||||
|
|
||||||
|
|
||||||
|
def get_autocast_context(cfg: DictConfig):
|
||||||
|
amp_enabled = (
|
||||||
|
cfg.platform.amp_enabled and cfg.platform.cuda and torch.cuda.is_available()
|
||||||
|
)
|
||||||
|
amp_device = (
|
||||||
|
"cuda"
|
||||||
|
if cfg.platform.cuda and torch.cuda.is_available()
|
||||||
|
else "mps"
|
||||||
|
if cfg.platform.cuda and torch.backends.mps.is_available()
|
||||||
|
else "cpu"
|
||||||
|
)
|
||||||
|
amp_dtype = torch.bfloat16 if cfg.platform.amp_dtype == "bf16" else torch.float32
|
||||||
|
return functools.partial(
|
||||||
|
torch.amp.autocast,
|
||||||
|
device_type=amp_device,
|
||||||
|
dtype=amp_dtype,
|
||||||
|
enabled=amp_enabled,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def make_collect_fn(cfg: DictConfig, env):
|
||||||
|
autocast = get_autocast_context(cfg)
|
||||||
|
asymmetric_obs = env.asymmetric_obs
|
||||||
|
|
||||||
|
def collect_fn(
|
||||||
|
train_state: TrainState,
|
||||||
|
) -> tuple[TrainState, TensorDict, list[dict]]:
|
||||||
|
transitions = []
|
||||||
|
info_list = []
|
||||||
|
obs = train_state.obs
|
||||||
|
critic_obs = train_state.critic_obs
|
||||||
|
|
||||||
|
for _ in range(cfg.hyperparameters.num_steps):
|
||||||
|
with autocast():
|
||||||
|
norm_obs = train_state.normalizer(obs)
|
||||||
|
norm_critic_obs = train_state.critic_normalizer(critic_obs)
|
||||||
|
with torch.inference_mode():
|
||||||
|
pi, _, _, _ = train_state.actor(norm_obs)
|
||||||
|
actions = pi.sample()
|
||||||
|
|
||||||
|
next_obs, rewards, dones, truncations, infos = env.step(actions)
|
||||||
|
|
||||||
|
if asymmetric_obs:
|
||||||
|
next_critic_obs = infos["observations"]["critic"]
|
||||||
|
else:
|
||||||
|
next_critic_obs = next_obs
|
||||||
|
|
||||||
|
with (
|
||||||
|
torch.inference_mode(),
|
||||||
|
autocast(),
|
||||||
|
):
|
||||||
|
if (
|
||||||
|
cfg.env.get("has_final_obs", False)
|
||||||
|
and cfg.env.get("partial_reset", False)
|
||||||
|
and "final_observation" in infos
|
||||||
|
):
|
||||||
|
_next_obs = infos["final_observation"]
|
||||||
|
_next_critic_obs = _next_obs
|
||||||
|
else:
|
||||||
|
_next_obs = next_obs
|
||||||
|
_next_critic_obs = next_critic_obs
|
||||||
|
norm_next_obs = train_state.normalizer(_next_obs)
|
||||||
|
next_pi, _, temperature, _ = train_state.actor(norm_next_obs)
|
||||||
|
next_actions = next_pi.sample()
|
||||||
|
next_log_probs = next_pi.log_prob(
|
||||||
|
next_actions.clip(-1 + 1e-6, 1 - 1e-6)
|
||||||
|
).sum(-1)
|
||||||
|
norm_next_critic_obs = train_state.critic_normalizer(_next_critic_obs)
|
||||||
|
next_value, _, _, next_embedding = train_state.critic(
|
||||||
|
norm_next_critic_obs, next_actions
|
||||||
|
)
|
||||||
|
rewards = (
|
||||||
|
rewards - cfg.hyperparameters.gamma * next_log_probs * temperature
|
||||||
|
)
|
||||||
|
|
||||||
|
transitions.append(
|
||||||
|
TensorDict(
|
||||||
|
{
|
||||||
|
"observations": norm_obs,
|
||||||
|
"critic_observations": norm_critic_obs,
|
||||||
|
"actions": actions,
|
||||||
|
"log_probs": pi.log_prob(actions.clip(-0.999, 0.999)).sum(-1),
|
||||||
|
"rewards": rewards.unsqueeze(-1),
|
||||||
|
"next_embeddings": next_embedding,
|
||||||
|
"next_values": next_value.unsqueeze(-1),
|
||||||
|
"dones": dones.unsqueeze(-1).float(),
|
||||||
|
"truncations": truncations.unsqueeze(-1).float(),
|
||||||
|
},
|
||||||
|
batch_size=(env.num_envs,),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
info_list.append(infos)
|
||||||
|
obs = next_obs
|
||||||
|
critic_obs = next_critic_obs
|
||||||
|
|
||||||
|
train_state = replace(train_state, obs=obs, critic_obs=critic_obs)
|
||||||
|
return (
|
||||||
|
train_state,
|
||||||
|
torch.stack(transitions, dim=0),
|
||||||
|
info_list,
|
||||||
|
)
|
||||||
|
|
||||||
|
return collect_fn
|
||||||
|
|
||||||
|
|
||||||
|
def make_postprocess_fn(cfg: DictConfig, env):
|
||||||
|
@torch.compiler.disable()
|
||||||
|
def compute_gve(rewards, dones, truncated, next_values, device: torch.device):
|
||||||
|
gves = []
|
||||||
|
last_gve = 0
|
||||||
|
truncated[-1] = 1.0
|
||||||
|
for t in reversed(range(cfg.hyperparameters.num_steps)):
|
||||||
|
lambda_sum = (
|
||||||
|
cfg.hyperparameters.lmbda * last_gve
|
||||||
|
+ (1.0 - cfg.hyperparameters.lmbda) * next_values[t]
|
||||||
|
)
|
||||||
|
delta = cfg.hyperparameters.gamma * torch.where(
|
||||||
|
truncated[t].bool(), next_values[t], (1.0 - dones[t]) * lambda_sum
|
||||||
|
)
|
||||||
|
last_gve = rewards[t] + delta
|
||||||
|
gves.insert(0, last_gve)
|
||||||
|
return gves
|
||||||
|
|
||||||
|
def postprocess(train_state: TrainState, transition: TensorDict):
|
||||||
|
gve = compute_gve(
|
||||||
|
rewards=transition["rewards"],
|
||||||
|
dones=transition["dones"],
|
||||||
|
truncated=transition["truncations"],
|
||||||
|
next_values=transition["next_values"],
|
||||||
|
device=train_state.device,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Flatten all time and environment dimensions into a single batch dimension
|
||||||
|
data = TensorDict(
|
||||||
|
{
|
||||||
|
"observations": transition["observations"],
|
||||||
|
"critic_observations": transition["critic_observations"],
|
||||||
|
"actions": transition["actions"],
|
||||||
|
"rewards": transition["rewards"],
|
||||||
|
"next_embeddings": transition["next_embeddings"],
|
||||||
|
"next_values": transition["next_values"],
|
||||||
|
"dones": transition["dones"],
|
||||||
|
"truncations": transition["truncations"],
|
||||||
|
"gve": torch.stack(gve),
|
||||||
|
},
|
||||||
|
batch_size=(
|
||||||
|
cfg.hyperparameters.num_steps,
|
||||||
|
cfg.hyperparameters.num_envs,
|
||||||
|
),
|
||||||
|
device=train_state.device,
|
||||||
|
)
|
||||||
|
return data.float().flatten(0, 1).detach()
|
||||||
|
|
||||||
|
return postprocess
|
||||||
|
|
||||||
|
|
||||||
|
def make_critic_update_fn(cfg: DictConfig, train_state: TrainState):
|
||||||
|
autocast = get_autocast_context(cfg)
|
||||||
|
|
||||||
|
def update(data: TensorDict):
|
||||||
|
qnet = train_state.critic
|
||||||
|
q_optimizer = train_state.critic_optimizer
|
||||||
|
|
||||||
|
with autocast():
|
||||||
|
critic_observations = data["critic_observations"]
|
||||||
|
actions = data["actions"]
|
||||||
|
targets = data["gve"]
|
||||||
|
target_embeddings = data["next_embeddings"]
|
||||||
|
truncations = data["truncations"].squeeze(-1)
|
||||||
|
if cfg.env.get("partial_reset", False):
|
||||||
|
truncation_mask = torch.ones_like(
|
||||||
|
truncations, dtype=torch.bool, device=train_state.device
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
truncation_mask = 1.0 - truncations
|
||||||
|
qf_target_dist = hl_gauss(
|
||||||
|
targets,
|
||||||
|
cfg.hyperparameters.vmin,
|
||||||
|
cfg.hyperparameters.vmax,
|
||||||
|
cfg.hyperparameters.num_bins,
|
||||||
|
)
|
||||||
|
|
||||||
|
_, qf1, embedding, _ = qnet(critic_observations, actions)
|
||||||
|
qf_loss = -(
|
||||||
|
truncation_mask
|
||||||
|
* torch.sum(qf_target_dist * F.log_softmax(qf1, dim=-1), dim=-1)
|
||||||
|
).mean()
|
||||||
|
embedding_loss = (
|
||||||
|
truncation_mask
|
||||||
|
* F.mse_loss(
|
||||||
|
embedding,
|
||||||
|
target_embeddings,
|
||||||
|
reduction="none",
|
||||||
|
).mean(dim=-1)
|
||||||
|
).mean()
|
||||||
|
|
||||||
|
qf_loss = qf_loss + cfg.hyperparameters.aux_loss_mult * embedding_loss
|
||||||
|
|
||||||
|
q_optimizer.zero_grad(set_to_none=True)
|
||||||
|
train_state.scaler.scale(qf_loss).backward()
|
||||||
|
train_state.scaler.unscale_(q_optimizer)
|
||||||
|
|
||||||
|
critic_grad_norm = torch.nn.utils.clip_grad_norm_(
|
||||||
|
qnet.parameters(), max_norm=cfg.hyperparameters.max_grad_norm
|
||||||
|
)
|
||||||
|
train_state.scaler.step(q_optimizer)
|
||||||
|
train_state.scaler.update()
|
||||||
|
logs_dict = {
|
||||||
|
"critic_grad_norm": critic_grad_norm.detach(),
|
||||||
|
"qf_loss": qf_loss.detach(),
|
||||||
|
"qf_max": targets.max().detach(),
|
||||||
|
"qf_min": targets.min().detach(),
|
||||||
|
"qf_mean": targets.mean().detach(),
|
||||||
|
"embedding_loss": embedding_loss.detach(),
|
||||||
|
}
|
||||||
|
return logs_dict
|
||||||
|
|
||||||
|
return update
|
||||||
|
|
||||||
|
|
||||||
|
def make_actor_update_fn(cfg: DictConfig, train_state: TrainState):
|
||||||
|
autocast = get_autocast_context(cfg)
|
||||||
|
|
||||||
|
def update(data: TensorDict):
|
||||||
|
actor = train_state.actor
|
||||||
|
old_actor = train_state.old_actor
|
||||||
|
qnet = train_state.critic
|
||||||
|
actor_optimizer = train_state.actor_optimizer
|
||||||
|
scaler = train_state.scaler
|
||||||
|
critic_obs = data["critic_observations"]
|
||||||
|
with autocast():
|
||||||
|
pi, _, temperature, beta = actor(data["observations"])
|
||||||
|
actions = pi.rsample()
|
||||||
|
log_probs = pi.log_prob(actions.clip(-1 + 1e-6, 1 - 1e-6)).sum(-1)
|
||||||
|
entropy = -log_probs
|
||||||
|
qf, _, _, _ = qnet(critic_obs, actions)
|
||||||
|
actor_loss = -qf + temperature.detach() * log_probs
|
||||||
|
|
||||||
|
# compute KL
|
||||||
|
old_pi, _, _, _ = old_actor(data["observations"])
|
||||||
|
old_pi_actions = old_pi.sample((16,)).clip(-1 + 1e-6, 1 - 1e-6)
|
||||||
|
old_log_probs = old_pi.log_prob(old_pi_actions).sum(-1).mean(0)
|
||||||
|
new_pi_log_probs = pi.log_prob(old_pi_actions).sum(-1).mean(0)
|
||||||
|
kl = old_log_probs - new_pi_log_probs
|
||||||
|
|
||||||
|
if cfg.hyperparameters.actor_kl_clip_mode == "clipped":
|
||||||
|
actor_loss = torch.where(
|
||||||
|
kl < cfg.hyperparameters.kl_bound,
|
||||||
|
actor_loss,
|
||||||
|
kl * beta.detach(),
|
||||||
|
).mean()
|
||||||
|
elif cfg.hyperparameters.actor_kl_clip_mode == "full":
|
||||||
|
actor_loss = actor_loss + kl * beta.detach()
|
||||||
|
elif cfg.hyperparameters.actor_kl_clip_mode == "value":
|
||||||
|
actor_loss = actor_loss
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"Unknown actor kl clip mode: {cfg.hyperparameters.actor_kl_clip_mode}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# temperature updates
|
||||||
|
target_entropy = (
|
||||||
|
actions.shape[-1] * cfg.hyperparameters.ent_target_mult
|
||||||
|
) # -0.5 * np.prod(envs.action_space.shape)
|
||||||
|
entropy_loss = (target_entropy + entropy).detach().mean() * temperature
|
||||||
|
|
||||||
|
lagrangian_loss = (
|
||||||
|
-beta * (kl - cfg.hyperparameters.kl_bound).mean().detach()
|
||||||
|
)
|
||||||
|
|
||||||
|
actor_loss = (actor_loss + entropy_loss + lagrangian_loss).mean()
|
||||||
|
|
||||||
|
actor_optimizer.zero_grad(set_to_none=True)
|
||||||
|
scaler.scale(actor_loss).backward()
|
||||||
|
scaler.unscale_(actor_optimizer)
|
||||||
|
actor_grad_norm = torch.nn.utils.clip_grad_norm_(
|
||||||
|
actor.parameters(), max_norm=cfg.hyperparameters.max_grad_norm
|
||||||
|
)
|
||||||
|
scaler.step(actor_optimizer)
|
||||||
|
scaler.update()
|
||||||
|
logs_dict = {
|
||||||
|
"actor_grad_norm": actor_grad_norm.detach(),
|
||||||
|
"actor_loss": actor_loss.detach(),
|
||||||
|
"kl": kl.detach(),
|
||||||
|
"entropy": entropy.detach(),
|
||||||
|
"temperature": temperature.detach(),
|
||||||
|
"lagrangian": beta.detach(),
|
||||||
|
"entropy_loss": entropy_loss.detach(),
|
||||||
|
"lagrangian_loss": lagrangian_loss.detach(),
|
||||||
|
}
|
||||||
|
return logs_dict
|
||||||
|
|
||||||
|
return update
|
||||||
|
|
||||||
|
|
||||||
|
def make_evaluate_fn(cfg: DictConfig, eval_envs):
|
||||||
|
autocast = get_autocast_context(cfg)
|
||||||
|
|
||||||
|
@torch.inference_mode()
|
||||||
|
def evaluate(
|
||||||
|
train_state: TrainState, stochastic_eval: bool = False
|
||||||
|
) -> tuple[int | float | bool, int | float | bool]:
|
||||||
|
train_state.normalizer.eval()
|
||||||
|
num_eval_envs = eval_envs.num_envs
|
||||||
|
episode_returns = torch.zeros(num_eval_envs, device=train_state.device)
|
||||||
|
episode_lengths = torch.zeros(num_eval_envs, device=train_state.device)
|
||||||
|
done_masks = torch.zeros(
|
||||||
|
num_eval_envs, dtype=torch.bool, device=train_state.device
|
||||||
|
)
|
||||||
|
|
||||||
|
if cfg.env.type == "isaaclab" or cfg.env.asymmetric_observation:
|
||||||
|
obs, _ = eval_envs.reset(random_start_init=False)
|
||||||
|
else:
|
||||||
|
obs = eval_envs.reset()
|
||||||
|
|
||||||
|
# Run for a fixed number of steps
|
||||||
|
for i in range(eval_envs.max_episode_steps):
|
||||||
|
with autocast():
|
||||||
|
obs = train_state.normalizer(obs)
|
||||||
|
action_dist, det_actions, _, _ = train_state.actor(obs)
|
||||||
|
if stochastic_eval:
|
||||||
|
actions = action_dist.sample()
|
||||||
|
else:
|
||||||
|
actions = det_actions
|
||||||
|
|
||||||
|
next_obs, rewards, dones, _, infos = eval_envs.step(actions)
|
||||||
|
|
||||||
|
episode_returns = torch.where(
|
||||||
|
~done_masks, episode_returns + rewards, episode_returns
|
||||||
|
)
|
||||||
|
episode_lengths = torch.where(
|
||||||
|
~done_masks, episode_lengths + 1, episode_lengths
|
||||||
|
)
|
||||||
|
done_masks = torch.logical_or(done_masks, dones)
|
||||||
|
if done_masks.all():
|
||||||
|
break
|
||||||
|
obs = next_obs
|
||||||
|
|
||||||
|
train_state.normalizer.train()
|
||||||
|
|
||||||
|
if cfg.env.type == "maniskill":
|
||||||
|
# combine log_infos
|
||||||
|
info = {
|
||||||
|
"info_return": infos["log_info"]["return"].mean(),
|
||||||
|
"episode_len": infos["log_info"]["episode_len"].float().mean(),
|
||||||
|
"success": infos["log_info"]["success"].float().mean(),
|
||||||
|
"return": episode_returns.mean().item(),
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
info = {}
|
||||||
|
|
||||||
|
return episode_returns.mean().item(), episode_lengths.mean().item(), info
|
||||||
|
|
||||||
|
return evaluate
|
||||||
|
|
||||||
|
|
||||||
|
def configure_platform(cfg: DictConfig) -> DictConfig:
|
||||||
|
cfg.platform.amp_enabled = (
|
||||||
|
cfg.platform.amp_enabled and cfg.platform.cuda and torch.cuda.is_available()
|
||||||
|
)
|
||||||
|
cfg.platform.amp_device = (
|
||||||
|
"cuda"
|
||||||
|
if cfg.platform.cuda and torch.cuda.is_available()
|
||||||
|
else "mps"
|
||||||
|
if cfg.platform.cuda and torch.backends.mps.is_available()
|
||||||
|
else "cpu"
|
||||||
|
)
|
||||||
|
return cfg
|
||||||
|
|
||||||
|
|
||||||
|
@hydra.main(
|
||||||
|
version_base=None,
|
||||||
|
config_path="../../config",
|
||||||
|
config_name="sac",
|
||||||
|
)
|
||||||
|
def main(cfg):
|
||||||
|
cfg.hyperparameters = OmegaConf.merge(cfg.hyperparameters, cfg.experiment_overrides)
|
||||||
|
cfg = configure_platform(cfg)
|
||||||
|
run_name = f"{cfg.env.name}_torch_{cfg.seed}"
|
||||||
|
|
||||||
|
scaler = GradScaler(
|
||||||
|
enabled=cfg.platform.amp_enabled and cfg.platform.amp_dtype == torch.float16
|
||||||
|
)
|
||||||
|
|
||||||
|
num_batches = cfg.hyperparameters.num_mini_batches
|
||||||
|
batch_size = (
|
||||||
|
cfg.hyperparameters.num_envs * cfg.hyperparameters.num_steps // num_batches
|
||||||
|
)
|
||||||
|
|
||||||
|
wandb.init(
|
||||||
|
project=cfg.wandb.project,
|
||||||
|
name=run_name,
|
||||||
|
config=OmegaConf.to_container(cfg),
|
||||||
|
save_code=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
random.seed(cfg.seed)
|
||||||
|
np.random.seed(cfg.seed)
|
||||||
|
torch.manual_seed(cfg.seed)
|
||||||
|
torch.backends.cudnn.deterministic = cfg.platform.torch_deterministic
|
||||||
|
|
||||||
|
if not cfg.platform.cuda:
|
||||||
|
device = torch.device("cpu")
|
||||||
|
else:
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
device = torch.device(f"cuda:{cfg.platform.device_rank}")
|
||||||
|
elif torch.backends.mps.is_available():
|
||||||
|
device = torch.device(f"mps:{cfg.platform.device_rank}")
|
||||||
|
else:
|
||||||
|
raise ValueError("No GPU available")
|
||||||
|
print(f"Using device: {device}")
|
||||||
|
|
||||||
|
envs, eval_envs = make_envs(cfg=cfg, device=device, seed=cfg.seed)
|
||||||
|
|
||||||
|
n_act = envs.num_actions
|
||||||
|
n_obs = envs.num_obs if type(envs.num_obs) == int else envs.num_obs[0]
|
||||||
|
if envs.asymmetric_obs:
|
||||||
|
n_critic_obs = (
|
||||||
|
envs.num_privileged_obs
|
||||||
|
if type(envs.num_privileged_obs) == int
|
||||||
|
else envs.num_privileged_obs[0]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
n_critic_obs = n_obs
|
||||||
|
|
||||||
|
if cfg.hyperparameters.normalize_env:
|
||||||
|
obs_normalizer = EmpiricalNormalization(shape=n_obs, device=device)
|
||||||
|
critic_obs_normalizer = EmpiricalNormalization(
|
||||||
|
shape=n_critic_obs, device=device
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
obs_normalizer = nn.Identity()
|
||||||
|
critic_obs_normalizer = nn.Identity()
|
||||||
|
|
||||||
|
actor = Actor(
|
||||||
|
n_obs=n_obs,
|
||||||
|
n_act=n_act,
|
||||||
|
ent_start=cfg.hyperparameters.ent_start,
|
||||||
|
kl_start=cfg.hyperparameters.kl_start,
|
||||||
|
hidden_dim=cfg.hyperparameters.actor_hidden_dim,
|
||||||
|
use_norm=cfg.hyperparameters.use_actor_norm,
|
||||||
|
layers=cfg.hyperparameters.num_actor_layers,
|
||||||
|
min_std=cfg.hyperparameters.actor_min_std,
|
||||||
|
device=device,
|
||||||
|
)
|
||||||
|
old_actor = copy.deepcopy(actor)
|
||||||
|
qnet = Critic(
|
||||||
|
n_obs=n_critic_obs,
|
||||||
|
n_act=n_act,
|
||||||
|
num_atoms=cfg.hyperparameters.num_bins,
|
||||||
|
vmin=cfg.hyperparameters.vmin,
|
||||||
|
vmax=cfg.hyperparameters.vmax,
|
||||||
|
hidden_dim=cfg.hyperparameters.critic_hidden_dim,
|
||||||
|
use_norm=cfg.hyperparameters.use_critic_norm,
|
||||||
|
use_encoder_norm=False,
|
||||||
|
encoder_layers=cfg.hyperparameters.num_critic_encoder_layers,
|
||||||
|
head_layers=cfg.hyperparameters.num_critic_head_layers,
|
||||||
|
pred_layers=cfg.hyperparameters.num_critic_pred_layers,
|
||||||
|
device=device,
|
||||||
|
)
|
||||||
|
|
||||||
|
q_optimizer = optim.AdamW(
|
||||||
|
list(qnet.parameters()),
|
||||||
|
lr=torch.tensor(cfg.hyperparameters.lr, device=device),
|
||||||
|
)
|
||||||
|
actor_optimizer = optim.AdamW(
|
||||||
|
list(actor.parameters()),
|
||||||
|
lr=torch.tensor(cfg.hyperparameters.lr, device=device),
|
||||||
|
)
|
||||||
|
|
||||||
|
if envs.asymmetric_obs:
|
||||||
|
obs, critic_obs = envs.reset_with_critic_obs()
|
||||||
|
critic_obs = torch.as_tensor(critic_obs, device=device, dtype=torch.float)
|
||||||
|
else:
|
||||||
|
obs = envs.reset()
|
||||||
|
critic_obs = obs
|
||||||
|
|
||||||
|
train_state = TrainState(
|
||||||
|
obs=obs,
|
||||||
|
critic_obs=critic_obs,
|
||||||
|
actor=actor,
|
||||||
|
old_actor=old_actor,
|
||||||
|
critic=qnet,
|
||||||
|
normalizer=obs_normalizer,
|
||||||
|
critic_normalizer=critic_obs_normalizer,
|
||||||
|
actor_optimizer=actor_optimizer,
|
||||||
|
critic_optimizer=q_optimizer,
|
||||||
|
device=device,
|
||||||
|
scaler=scaler,
|
||||||
|
)
|
||||||
|
|
||||||
|
print(
|
||||||
|
summary(
|
||||||
|
train_state.critic,
|
||||||
|
input_data=(critic_obs[:1], torch.zeros((1, n_act), device=device)),
|
||||||
|
depth=10,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
print(summary(train_state.actor, input_data=(obs[:1],), depth=10))
|
||||||
|
# create functions
|
||||||
|
collect_fn = make_collect_fn(cfg, envs)
|
||||||
|
postprocess_fn = make_postprocess_fn(cfg, envs)
|
||||||
|
update_critic = make_critic_update_fn(cfg, train_state)
|
||||||
|
update_actor = make_actor_update_fn(cfg, train_state)
|
||||||
|
evaluate = make_evaluate_fn(cfg, eval_envs)
|
||||||
|
|
||||||
|
if cfg.platform.compile:
|
||||||
|
mode = "max-autotune-no-cudagraphs"
|
||||||
|
update_critic = torch.compile(update_critic, mode=mode)
|
||||||
|
update_actor = torch.compile(update_actor, mode=mode)
|
||||||
|
postprocess_fn = torch.compile(postprocess_fn, mode=mode)
|
||||||
|
train_state.compile()
|
||||||
|
|
||||||
|
# TODO: Support checkpoint loading
|
||||||
|
# if cfg.checkpoint_path:
|
||||||
|
# # Load checkpoint if specified
|
||||||
|
# torch_checkpoint = torch.load(
|
||||||
|
# f"{cfg.checkpoint_path}", map_location=device, weights_only=False
|
||||||
|
# )
|
||||||
|
# actor.load_state_dict(torch_checkpoint["actor_state_dict"])
|
||||||
|
# obs_normalizer.load_state_dict(torch_checkpoint["obs_normalizer_state"])
|
||||||
|
# critic_obs_normalizer.load_state_dict(
|
||||||
|
# torch_checkpoint["critic_obs_normalizer_state"]
|
||||||
|
# )
|
||||||
|
# qnet.load_state_dict(torch_checkpoint["qnet_state_dict"])
|
||||||
|
# qnet_target.load_state_dict(torch_checkpoint["qnet_target_state_dict"])
|
||||||
|
# global_step = torch_checkpoint["global_step"]
|
||||||
|
# else:
|
||||||
|
global_step = 0
|
||||||
|
total_env_steps = (
|
||||||
|
cfg.hyperparameters.total_time_steps
|
||||||
|
// (cfg.hyperparameters.num_envs * cfg.hyperparameters.num_steps)
|
||||||
|
+ 1
|
||||||
|
)
|
||||||
|
|
||||||
|
pbar = tqdm.tqdm(total=cfg.hyperparameters.total_time_steps, initial=global_step)
|
||||||
|
start_time = None
|
||||||
|
desc = ""
|
||||||
|
|
||||||
|
eval_interval = total_env_steps // cfg.hyperparameters.num_eval
|
||||||
|
stochastic_eval = cfg.env.get("stochastic_eval", False)
|
||||||
|
|
||||||
|
while global_step < total_env_steps:
|
||||||
|
if start_time is None and global_step >= cfg.measure_burnin:
|
||||||
|
start_time = time.time()
|
||||||
|
measure_burnin = global_step
|
||||||
|
|
||||||
|
train_state, transition, infos = collect_fn(train_state)
|
||||||
|
data = postprocess_fn(train_state, transition)
|
||||||
|
|
||||||
|
for _ in range(cfg.hyperparameters.num_epochs):
|
||||||
|
indices = torch.randperm(
|
||||||
|
cfg.hyperparameters.num_envs * cfg.hyperparameters.num_steps,
|
||||||
|
device=device,
|
||||||
|
)
|
||||||
|
data = data[indices].contiguous()
|
||||||
|
for j in range(num_batches):
|
||||||
|
mini_batch = data[j * batch_size : (j + 1) * batch_size]
|
||||||
|
critic_logs_dict = update_critic(mini_batch)
|
||||||
|
actor_logs_dict = update_actor(mini_batch)
|
||||||
|
logs_dict = {
|
||||||
|
**critic_logs_dict,
|
||||||
|
**actor_logs_dict,
|
||||||
|
}
|
||||||
|
|
||||||
|
for param, target_param in zip(actor.parameters(), old_actor.parameters()):
|
||||||
|
target_param.data.copy_(param.data)
|
||||||
|
if start_time is not None:
|
||||||
|
# @TODO: shouldn't that be env_steps per second?
|
||||||
|
speed = (
|
||||||
|
cfg.hyperparameters.num_envs
|
||||||
|
* cfg.hyperparameters.num_steps
|
||||||
|
* (global_step - measure_burnin)
|
||||||
|
/ (time.time() - start_time)
|
||||||
|
)
|
||||||
|
pbar.set_description(f"{speed: 4.4f} sps, " + desc)
|
||||||
|
with torch.inference_mode():
|
||||||
|
logs = {
|
||||||
|
"critic/qf_loss": logs_dict["qf_loss"].mean(),
|
||||||
|
"critic/qf_max": logs_dict["qf_max"].mean(),
|
||||||
|
"critic/qf_min": logs_dict["qf_min"].mean(),
|
||||||
|
"critic/qf_mean": logs_dict["qf_mean"].mean(),
|
||||||
|
"critic/embedding_loss": logs_dict["embedding_loss"].mean(),
|
||||||
|
"critic/critic_grad_norm": logs_dict["critic_grad_norm"].mean(),
|
||||||
|
"actor/actor_loss": logs_dict["actor_loss"].mean(),
|
||||||
|
"actor/actor_grad_norm": logs_dict["actor_grad_norm"].mean(),
|
||||||
|
"actor/kl": logs_dict["kl"].mean(),
|
||||||
|
"actor/entropy": logs_dict["entropy"].mean(),
|
||||||
|
"actor/temperature": logs_dict["temperature"].mean(),
|
||||||
|
"actor/lagrangian": logs_dict["lagrangian"].mean(),
|
||||||
|
"actor/entropy_loss": logs_dict["entropy_loss"].mean(),
|
||||||
|
"actor/lagrangian_loss": logs_dict["lagrangian_loss"].mean(),
|
||||||
|
"train/rewards_batch": data["rewards"].mean(),
|
||||||
|
}
|
||||||
|
|
||||||
|
if cfg.env.type == "maniskill":
|
||||||
|
logs.update(
|
||||||
|
{
|
||||||
|
"train/return": torch.stack(
|
||||||
|
[info["log_info"]["return"] for info in infos]
|
||||||
|
).mean(),
|
||||||
|
"train/episode_len": torch.stack(
|
||||||
|
[info["log_info"]["episode_len"] for info in infos]
|
||||||
|
)
|
||||||
|
.float()
|
||||||
|
.mean(),
|
||||||
|
"train/success": torch.stack(
|
||||||
|
[info["log_info"]["success"] for info in infos]
|
||||||
|
)
|
||||||
|
.float()
|
||||||
|
.mean(),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
if eval_interval > 0 and global_step % eval_interval == 0:
|
||||||
|
print(f"Evaluating at global step {global_step}")
|
||||||
|
if stochastic_eval:
|
||||||
|
eval_avg_return, eval_avg_length, stoch_eval_info = evaluate(
|
||||||
|
train_state, stochastic_eval=stochastic_eval
|
||||||
|
)
|
||||||
|
eval_avg_return, eval_avg_length, eval_info = evaluate(
|
||||||
|
train_state
|
||||||
|
)
|
||||||
|
eval_info = {
|
||||||
|
**eval_info,
|
||||||
|
**{f"stoch/{k}": v for k, v in stoch_eval_info.items()},
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
eval_avg_return, eval_avg_length, eval_info = evaluate(
|
||||||
|
train_state
|
||||||
|
)
|
||||||
|
if cfg.env.type in [
|
||||||
|
"humanoid_bench",
|
||||||
|
"isaaclab",
|
||||||
|
"mtbench",
|
||||||
|
]:
|
||||||
|
# NOTE: Hacky way of evaluating performance, but just works
|
||||||
|
obs, _ = envs.reset()
|
||||||
|
logs["eval/avg_return"] = eval_avg_return
|
||||||
|
logs["eval/avg_length"] = eval_avg_length
|
||||||
|
for key, value in eval_info.items():
|
||||||
|
if isinstance(value, torch.Tensor):
|
||||||
|
logs[f"eval/{key}"] = value.mean().item()
|
||||||
|
elif isinstance(value, np.ndarray):
|
||||||
|
logs[f"eval/{key}"] = value.mean()
|
||||||
|
else:
|
||||||
|
logs[f"eval/{key}"] = value
|
||||||
|
print(
|
||||||
|
f"Eval return: {eval_avg_return:.2f}, length: {eval_avg_length:.2f}, env steps: {global_step * cfg.hyperparameters.num_envs * cfg.hyperparameters.num_steps} success rate: {eval_info.get('success', 0.0):.2f}"
|
||||||
|
)
|
||||||
|
wandb.log(
|
||||||
|
{
|
||||||
|
"speed": speed,
|
||||||
|
"frame": global_step
|
||||||
|
* cfg.hyperparameters.num_envs
|
||||||
|
* cfg.hyperparameters.num_steps,
|
||||||
|
**logs,
|
||||||
|
},
|
||||||
|
step=global_step
|
||||||
|
* cfg.hyperparameters.num_envs
|
||||||
|
* cfg.hyperparameters.num_steps,
|
||||||
|
)
|
||||||
|
|
||||||
|
global_step += 1
|
||||||
|
pbar.update(n=cfg.hyperparameters.num_envs * cfg.hyperparameters.num_steps)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
777
reppo/torchrl/reppo_util.py
Normal file
777
reppo/torchrl/reppo_util.py
Normal file
@ -0,0 +1,777 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
from tensordict import TensorDict
|
||||||
|
|
||||||
|
|
||||||
|
class SimpleReplayBuffer(nn.Module):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
n_env: int,
|
||||||
|
buffer_size: int,
|
||||||
|
n_obs: int,
|
||||||
|
n_act: int,
|
||||||
|
n_critic_obs: int,
|
||||||
|
asymmetric_obs: bool = False,
|
||||||
|
playground_mode: bool = False,
|
||||||
|
n_steps: int = 1,
|
||||||
|
gamma: float = 0.99,
|
||||||
|
device=None,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
A simple replay buffer that stores transitions in a circular buffer.
|
||||||
|
Supports n-step returns and asymmetric observations.
|
||||||
|
|
||||||
|
When playground_mode=True, critic_observations are treated as a concatenation of
|
||||||
|
regular observations and privileged observations, and only the privileged part is stored
|
||||||
|
to save memory.
|
||||||
|
|
||||||
|
TODO (Younggyo): Refactor to split this into SimpleReplayBuffer and NStepReplayBuffer
|
||||||
|
"""
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
self.n_env = n_env
|
||||||
|
self.buffer_size = buffer_size
|
||||||
|
self.n_obs = n_obs
|
||||||
|
self.n_act = n_act
|
||||||
|
self.n_critic_obs = n_critic_obs
|
||||||
|
self.asymmetric_obs = asymmetric_obs
|
||||||
|
self.playground_mode = playground_mode and asymmetric_obs
|
||||||
|
self.gamma = gamma
|
||||||
|
self.n_steps = n_steps
|
||||||
|
self.device = device
|
||||||
|
|
||||||
|
self.observations = torch.zeros(
|
||||||
|
(n_env, buffer_size, n_obs), device=device, dtype=torch.float
|
||||||
|
)
|
||||||
|
self.actions = torch.zeros(
|
||||||
|
(n_env, buffer_size, n_act), device=device, dtype=torch.float
|
||||||
|
)
|
||||||
|
self.rewards = torch.zeros(
|
||||||
|
(n_env, buffer_size), device=device, dtype=torch.float
|
||||||
|
)
|
||||||
|
self.dones = torch.zeros((n_env, buffer_size), device=device, dtype=torch.long)
|
||||||
|
self.truncations = torch.zeros(
|
||||||
|
(n_env, buffer_size), device=device, dtype=torch.long
|
||||||
|
)
|
||||||
|
self.next_observations = torch.zeros(
|
||||||
|
(n_env, buffer_size, n_obs), device=device, dtype=torch.float
|
||||||
|
)
|
||||||
|
if asymmetric_obs:
|
||||||
|
if self.playground_mode:
|
||||||
|
# Only store the privileged part of observations (n_critic_obs - n_obs)
|
||||||
|
self.privileged_obs_size = n_critic_obs - n_obs
|
||||||
|
self.privileged_observations = torch.zeros(
|
||||||
|
(n_env, buffer_size, self.privileged_obs_size),
|
||||||
|
device=device,
|
||||||
|
dtype=torch.float,
|
||||||
|
)
|
||||||
|
self.next_privileged_observations = torch.zeros(
|
||||||
|
(n_env, buffer_size, self.privileged_obs_size),
|
||||||
|
device=device,
|
||||||
|
dtype=torch.float,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Store full critic observations
|
||||||
|
self.critic_observations = torch.zeros(
|
||||||
|
(n_env, buffer_size, n_critic_obs), device=device, dtype=torch.float
|
||||||
|
)
|
||||||
|
self.next_critic_observations = torch.zeros(
|
||||||
|
(n_env, buffer_size, n_critic_obs), device=device, dtype=torch.float
|
||||||
|
)
|
||||||
|
self.ptr = 0
|
||||||
|
|
||||||
|
def extend(
|
||||||
|
self,
|
||||||
|
tensor_dict: TensorDict,
|
||||||
|
):
|
||||||
|
observations = tensor_dict["observations"]
|
||||||
|
actions = tensor_dict["actions"]
|
||||||
|
rewards = tensor_dict["next"]["rewards"]
|
||||||
|
dones = tensor_dict["next"]["dones"]
|
||||||
|
truncations = tensor_dict["next"]["truncations"]
|
||||||
|
next_observations = tensor_dict["next"]["observations"]
|
||||||
|
|
||||||
|
ptr = self.ptr % self.buffer_size
|
||||||
|
self.observations[:, ptr] = observations
|
||||||
|
self.actions[:, ptr] = actions
|
||||||
|
self.rewards[:, ptr] = rewards
|
||||||
|
self.dones[:, ptr] = dones
|
||||||
|
self.truncations[:, ptr] = truncations
|
||||||
|
self.next_observations[:, ptr] = next_observations
|
||||||
|
if self.asymmetric_obs:
|
||||||
|
critic_observations = tensor_dict["critic_observations"]
|
||||||
|
next_critic_observations = tensor_dict["next"]["critic_observations"]
|
||||||
|
|
||||||
|
if self.playground_mode:
|
||||||
|
# Extract and store only the privileged part
|
||||||
|
privileged_observations = critic_observations[:, self.n_obs :]
|
||||||
|
next_privileged_observations = next_critic_observations[:, self.n_obs :]
|
||||||
|
self.privileged_observations[:, ptr] = privileged_observations
|
||||||
|
self.next_privileged_observations[:, ptr] = next_privileged_observations
|
||||||
|
else:
|
||||||
|
# Store full critic observations
|
||||||
|
self.critic_observations[:, ptr] = critic_observations
|
||||||
|
self.next_critic_observations[:, ptr] = next_critic_observations
|
||||||
|
self.ptr += 1
|
||||||
|
|
||||||
|
def sample(self, batch_size: int):
|
||||||
|
# we will sample n_env * batch_size transitions
|
||||||
|
|
||||||
|
if self.n_steps == 1:
|
||||||
|
indices = torch.randint(
|
||||||
|
0,
|
||||||
|
min(self.buffer_size, self.ptr),
|
||||||
|
(self.n_env, batch_size),
|
||||||
|
device=self.device,
|
||||||
|
)
|
||||||
|
obs_indices = indices.unsqueeze(-1).expand(-1, -1, self.n_obs)
|
||||||
|
act_indices = indices.unsqueeze(-1).expand(-1, -1, self.n_act)
|
||||||
|
observations = torch.gather(self.observations, 1, obs_indices).reshape(
|
||||||
|
self.n_env * batch_size, self.n_obs
|
||||||
|
)
|
||||||
|
next_observations = torch.gather(
|
||||||
|
self.next_observations, 1, obs_indices
|
||||||
|
).reshape(self.n_env * batch_size, self.n_obs)
|
||||||
|
actions = torch.gather(self.actions, 1, act_indices).reshape(
|
||||||
|
self.n_env * batch_size, self.n_act
|
||||||
|
)
|
||||||
|
|
||||||
|
rewards = torch.gather(self.rewards, 1, indices).reshape(
|
||||||
|
self.n_env * batch_size
|
||||||
|
)
|
||||||
|
dones = torch.gather(self.dones, 1, indices).reshape(
|
||||||
|
self.n_env * batch_size
|
||||||
|
)
|
||||||
|
truncations = torch.gather(self.truncations, 1, indices).reshape(
|
||||||
|
self.n_env * batch_size
|
||||||
|
)
|
||||||
|
effective_n_steps = torch.ones_like(dones)
|
||||||
|
if self.asymmetric_obs:
|
||||||
|
if self.playground_mode:
|
||||||
|
# Gather privileged observations
|
||||||
|
priv_obs_indices = indices.unsqueeze(-1).expand(
|
||||||
|
-1, -1, self.privileged_obs_size
|
||||||
|
)
|
||||||
|
privileged_observations = torch.gather(
|
||||||
|
self.privileged_observations, 1, priv_obs_indices
|
||||||
|
).reshape(self.n_env * batch_size, self.privileged_obs_size)
|
||||||
|
next_privileged_observations = torch.gather(
|
||||||
|
self.next_privileged_observations, 1, priv_obs_indices
|
||||||
|
).reshape(self.n_env * batch_size, self.privileged_obs_size)
|
||||||
|
|
||||||
|
# Concatenate with regular observations to form full critic observations
|
||||||
|
critic_observations = torch.cat(
|
||||||
|
[observations, privileged_observations], dim=1
|
||||||
|
)
|
||||||
|
next_critic_observations = torch.cat(
|
||||||
|
[next_observations, next_privileged_observations], dim=1
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Gather full critic observations
|
||||||
|
critic_obs_indices = indices.unsqueeze(-1).expand(
|
||||||
|
-1, -1, self.n_critic_obs
|
||||||
|
)
|
||||||
|
critic_observations = torch.gather(
|
||||||
|
self.critic_observations, 1, critic_obs_indices
|
||||||
|
).reshape(self.n_env * batch_size, self.n_critic_obs)
|
||||||
|
next_critic_observations = torch.gather(
|
||||||
|
self.next_critic_observations, 1, critic_obs_indices
|
||||||
|
).reshape(self.n_env * batch_size, self.n_critic_obs)
|
||||||
|
else:
|
||||||
|
# Sample base indices
|
||||||
|
if self.ptr >= self.buffer_size:
|
||||||
|
# When the buffer is full, there is no protection against sampling across different episodes
|
||||||
|
# We avoid this by temporarily setting self.pos - 1 to truncated = True if not done
|
||||||
|
# https://github.com/DLR-RM/stable-baselines3/blob/b91050ca94f8bce7a0285c91f85da518d5a26223/stable_baselines3/common/buffers.py#L857-L860
|
||||||
|
# TODO (Younggyo): Change the reference when this SB3 branch is merged
|
||||||
|
current_pos = self.ptr % self.buffer_size
|
||||||
|
curr_truncations = self.truncations[:, current_pos - 1].clone()
|
||||||
|
self.truncations[:, current_pos - 1] = torch.logical_not(
|
||||||
|
self.dones[:, current_pos - 1]
|
||||||
|
)
|
||||||
|
indices = torch.randint(
|
||||||
|
0,
|
||||||
|
self.buffer_size,
|
||||||
|
(self.n_env, batch_size),
|
||||||
|
device=self.device,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Buffer not full - ensure n-step sequence doesn't exceed valid data
|
||||||
|
max_start_idx = max(1, self.ptr - self.n_steps + 1)
|
||||||
|
indices = torch.randint(
|
||||||
|
0,
|
||||||
|
max_start_idx,
|
||||||
|
(self.n_env, batch_size),
|
||||||
|
device=self.device,
|
||||||
|
)
|
||||||
|
obs_indices = indices.unsqueeze(-1).expand(-1, -1, self.n_obs)
|
||||||
|
act_indices = indices.unsqueeze(-1).expand(-1, -1, self.n_act)
|
||||||
|
|
||||||
|
# Get base transitions
|
||||||
|
observations = torch.gather(self.observations, 1, obs_indices).reshape(
|
||||||
|
self.n_env * batch_size, self.n_obs
|
||||||
|
)
|
||||||
|
actions = torch.gather(self.actions, 1, act_indices).reshape(
|
||||||
|
self.n_env * batch_size, self.n_act
|
||||||
|
)
|
||||||
|
if self.asymmetric_obs:
|
||||||
|
if self.playground_mode:
|
||||||
|
# Gather privileged observations
|
||||||
|
priv_obs_indices = indices.unsqueeze(-1).expand(
|
||||||
|
-1, -1, self.privileged_obs_size
|
||||||
|
)
|
||||||
|
privileged_observations = torch.gather(
|
||||||
|
self.privileged_observations, 1, priv_obs_indices
|
||||||
|
).reshape(self.n_env * batch_size, self.privileged_obs_size)
|
||||||
|
|
||||||
|
# Concatenate with regular observations to form full critic observations
|
||||||
|
critic_observations = torch.cat(
|
||||||
|
[observations, privileged_observations], dim=1
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Gather full critic observations
|
||||||
|
critic_obs_indices = indices.unsqueeze(-1).expand(
|
||||||
|
-1, -1, self.n_critic_obs
|
||||||
|
)
|
||||||
|
critic_observations = torch.gather(
|
||||||
|
self.critic_observations, 1, critic_obs_indices
|
||||||
|
).reshape(self.n_env * batch_size, self.n_critic_obs)
|
||||||
|
|
||||||
|
# Create sequential indices for each sample
|
||||||
|
# This creates a [n_env, batch_size, n_step] tensor of indices
|
||||||
|
seq_offsets = torch.arange(self.n_steps, device=self.device).view(1, 1, -1)
|
||||||
|
all_indices = (
|
||||||
|
indices.unsqueeze(-1) + seq_offsets
|
||||||
|
) % self.buffer_size # [n_env, batch_size, n_step]
|
||||||
|
|
||||||
|
# Gather all rewards and terminal flags
|
||||||
|
# Using advanced indexing - result shapes: [n_env, batch_size, n_step]
|
||||||
|
all_rewards = torch.gather(
|
||||||
|
self.rewards.unsqueeze(-1).expand(-1, -1, self.n_steps), 1, all_indices
|
||||||
|
)
|
||||||
|
all_dones = torch.gather(
|
||||||
|
self.dones.unsqueeze(-1).expand(-1, -1, self.n_steps), 1, all_indices
|
||||||
|
)
|
||||||
|
all_truncations = torch.gather(
|
||||||
|
self.truncations.unsqueeze(-1).expand(-1, -1, self.n_steps),
|
||||||
|
1,
|
||||||
|
all_indices,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create masks for rewards *after* first done
|
||||||
|
# This creates a cumulative product that zeroes out rewards after the first done
|
||||||
|
all_dones_shifted = torch.cat(
|
||||||
|
[torch.zeros_like(all_dones[:, :, :1]), all_dones[:, :, :-1]], dim=2
|
||||||
|
) # First reward should not be masked
|
||||||
|
done_masks = torch.cumprod(
|
||||||
|
1.0 - all_dones_shifted, dim=2
|
||||||
|
) # [n_env, batch_size, n_step]
|
||||||
|
effective_n_steps = done_masks.sum(2)
|
||||||
|
|
||||||
|
# Create discount factors
|
||||||
|
discounts = torch.pow(
|
||||||
|
self.gamma, torch.arange(self.n_steps, device=self.device)
|
||||||
|
) # [n_steps]
|
||||||
|
|
||||||
|
# Apply masks and discounts to rewards
|
||||||
|
masked_rewards = all_rewards * done_masks # [n_env, batch_size, n_step]
|
||||||
|
discounted_rewards = masked_rewards * discounts.view(
|
||||||
|
1, 1, -1
|
||||||
|
) # [n_env, batch_size, n_step]
|
||||||
|
|
||||||
|
# Sum rewards along the n_step dimension
|
||||||
|
n_step_rewards = discounted_rewards.sum(dim=2) # [n_env, batch_size]
|
||||||
|
|
||||||
|
# Find index of first done or truncation or last step for each sequence
|
||||||
|
first_done = torch.argmax(
|
||||||
|
(all_dones > 0).float(), dim=2
|
||||||
|
) # [n_env, batch_size]
|
||||||
|
first_trunc = torch.argmax(
|
||||||
|
(all_truncations > 0).float(), dim=2
|
||||||
|
) # [n_env, batch_size]
|
||||||
|
|
||||||
|
# Handle case where there are no dones or truncations
|
||||||
|
no_dones = all_dones.sum(dim=2) == 0
|
||||||
|
no_truncs = all_truncations.sum(dim=2) == 0
|
||||||
|
|
||||||
|
# When no dones or truncs, use the last index
|
||||||
|
first_done = torch.where(no_dones, self.n_steps - 1, first_done)
|
||||||
|
first_trunc = torch.where(no_truncs, self.n_steps - 1, first_trunc)
|
||||||
|
|
||||||
|
# Take the minimum (first) of done or truncation
|
||||||
|
final_indices = torch.minimum(
|
||||||
|
first_done, first_trunc
|
||||||
|
) # [n_env, batch_size]
|
||||||
|
|
||||||
|
# Create indices to gather the final next observations
|
||||||
|
final_next_obs_indices = torch.gather(
|
||||||
|
all_indices, 2, final_indices.unsqueeze(-1)
|
||||||
|
).squeeze(-1) # [n_env, batch_size]
|
||||||
|
|
||||||
|
# Gather final values
|
||||||
|
final_next_observations = self.next_observations.gather(
|
||||||
|
1, final_next_obs_indices.unsqueeze(-1).expand(-1, -1, self.n_obs)
|
||||||
|
)
|
||||||
|
final_dones = self.dones.gather(1, final_next_obs_indices)
|
||||||
|
final_truncations = self.truncations.gather(1, final_next_obs_indices)
|
||||||
|
|
||||||
|
if self.asymmetric_obs:
|
||||||
|
if self.playground_mode:
|
||||||
|
# Gather final privileged observations
|
||||||
|
final_next_privileged_observations = (
|
||||||
|
self.next_privileged_observations.gather(
|
||||||
|
1,
|
||||||
|
final_next_obs_indices.unsqueeze(-1).expand(
|
||||||
|
-1, -1, self.privileged_obs_size
|
||||||
|
),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Reshape for output
|
||||||
|
next_privileged_observations = (
|
||||||
|
final_next_privileged_observations.reshape(
|
||||||
|
self.n_env * batch_size, self.privileged_obs_size
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Concatenate with next observations to form full next critic observations
|
||||||
|
next_observations_reshaped = final_next_observations.reshape(
|
||||||
|
self.n_env * batch_size, self.n_obs
|
||||||
|
)
|
||||||
|
next_critic_observations = torch.cat(
|
||||||
|
[next_observations_reshaped, next_privileged_observations],
|
||||||
|
dim=1,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Gather final next critic observations directly
|
||||||
|
final_next_critic_observations = (
|
||||||
|
self.next_critic_observations.gather(
|
||||||
|
1,
|
||||||
|
final_next_obs_indices.unsqueeze(-1).expand(
|
||||||
|
-1, -1, self.n_critic_obs
|
||||||
|
),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
next_critic_observations = final_next_critic_observations.reshape(
|
||||||
|
self.n_env * batch_size, self.n_critic_obs
|
||||||
|
)
|
||||||
|
|
||||||
|
# Reshape everything to batch dimension
|
||||||
|
rewards = n_step_rewards.reshape(self.n_env * batch_size)
|
||||||
|
dones = final_dones.reshape(self.n_env * batch_size)
|
||||||
|
truncations = final_truncations.reshape(self.n_env * batch_size)
|
||||||
|
effective_n_steps = effective_n_steps.reshape(self.n_env * batch_size)
|
||||||
|
next_observations = final_next_observations.reshape(
|
||||||
|
self.n_env * batch_size, self.n_obs
|
||||||
|
)
|
||||||
|
|
||||||
|
out = TensorDict(
|
||||||
|
{
|
||||||
|
"observations": observations,
|
||||||
|
"actions": actions,
|
||||||
|
"next": {
|
||||||
|
"rewards": rewards,
|
||||||
|
"dones": dones,
|
||||||
|
"truncations": truncations,
|
||||||
|
"observations": next_observations,
|
||||||
|
"effective_n_steps": effective_n_steps,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
batch_size=self.n_env * batch_size,
|
||||||
|
)
|
||||||
|
if self.asymmetric_obs:
|
||||||
|
out["critic_observations"] = critic_observations
|
||||||
|
out["next"]["critic_observations"] = next_critic_observations
|
||||||
|
|
||||||
|
if self.n_steps > 1 and self.ptr >= self.buffer_size:
|
||||||
|
# Roll back the truncation flags introduced for safe sampling
|
||||||
|
self.truncations[:, current_pos - 1] = curr_truncations
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class EmpiricalNormalization(nn.Module):
|
||||||
|
"""Normalize mean and variance of values based on empirical values."""
|
||||||
|
|
||||||
|
def __init__(self, shape, device, eps=1e-2, until=None):
|
||||||
|
"""Initialize EmpiricalNormalization module.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
shape (int or tuple of int): Shape of input values except batch axis.
|
||||||
|
eps (float): Small value for stability.
|
||||||
|
until (int or None): If this arg is specified, the link learns input values until the sum of batch sizes
|
||||||
|
exceeds it.
|
||||||
|
"""
|
||||||
|
super().__init__()
|
||||||
|
self.eps = eps
|
||||||
|
self.until = until
|
||||||
|
self.device = device
|
||||||
|
self.register_buffer("_mean", torch.zeros(shape).unsqueeze(0).to(device))
|
||||||
|
self.register_buffer("_var", torch.ones(shape).unsqueeze(0).to(device))
|
||||||
|
self.register_buffer("_std", torch.ones(shape).unsqueeze(0).to(device))
|
||||||
|
self.register_buffer("count", torch.tensor(0, dtype=torch.long).to(device))
|
||||||
|
|
||||||
|
@property
|
||||||
|
def mean(self):
|
||||||
|
return self._mean.squeeze(0).clone()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def std(self):
|
||||||
|
return self._std.squeeze(0).clone()
|
||||||
|
|
||||||
|
def forward(self, x: torch.Tensor, center: bool = True) -> torch.Tensor:
|
||||||
|
if x.shape[-1:] != self._mean.shape[-1:]:
|
||||||
|
raise ValueError(
|
||||||
|
f"Expected input of shape (*,{self._mean.shape[-1:]}), got {x.shape}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if self.training:
|
||||||
|
self.update(x)
|
||||||
|
if center:
|
||||||
|
return (x - self._mean) / (self._std + self.eps)
|
||||||
|
else:
|
||||||
|
return x / (self._std + self.eps)
|
||||||
|
|
||||||
|
@torch.jit.unused
|
||||||
|
def update(self, x):
|
||||||
|
x = x.flatten(end_dim=-2)
|
||||||
|
|
||||||
|
if self.until is not None and self.count >= self.until:
|
||||||
|
return
|
||||||
|
|
||||||
|
batch_size = x.shape[0]
|
||||||
|
batch_mean = torch.mean(x, dim=0, keepdim=True)
|
||||||
|
|
||||||
|
# Update count
|
||||||
|
new_count = self.count + batch_size
|
||||||
|
|
||||||
|
# Update mean
|
||||||
|
delta = batch_mean - self._mean
|
||||||
|
self._mean += (batch_size / new_count) * delta
|
||||||
|
|
||||||
|
# Update variance using Chan's parallel algorithm
|
||||||
|
# https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm
|
||||||
|
if self.count > 0: # Ensure we're not dividing by zero
|
||||||
|
batch_var = torch.mean((x - batch_mean) ** 2, dim=0, keepdim=True)
|
||||||
|
delta2 = batch_mean - self._mean
|
||||||
|
m_a = self._var * self.count
|
||||||
|
m_b = batch_var * batch_size
|
||||||
|
M2 = m_a + m_b + (delta2**2) * (self.count * batch_size / new_count)
|
||||||
|
self._var = M2 / new_count
|
||||||
|
else:
|
||||||
|
# For first batch, just use batch variance
|
||||||
|
self._var = torch.mean((x - self._mean) ** 2, dim=0, keepdim=True)
|
||||||
|
|
||||||
|
self._std = torch.sqrt(self._var)
|
||||||
|
self.count = new_count
|
||||||
|
|
||||||
|
@torch.jit.unused
|
||||||
|
def inverse(self, y):
|
||||||
|
return y * (self._std + self.eps) + self._mean
|
||||||
|
|
||||||
|
|
||||||
|
class RewardNormalizer(nn.Module):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
gamma: float,
|
||||||
|
device: torch.device,
|
||||||
|
g_max: float = 10.0,
|
||||||
|
epsilon: float = 1e-8,
|
||||||
|
):
|
||||||
|
super().__init__()
|
||||||
|
self.register_buffer(
|
||||||
|
"G", torch.zeros(1, device=device)
|
||||||
|
) # running estimate of the discounted return
|
||||||
|
self.register_buffer("G_r_max", torch.zeros(1, device=device)) # running-max
|
||||||
|
self.G_rms = EmpiricalNormalization(shape=1, device=device)
|
||||||
|
self.gamma = gamma
|
||||||
|
self.g_max = g_max
|
||||||
|
self.epsilon = epsilon
|
||||||
|
|
||||||
|
def _scale_reward(self, rewards: torch.Tensor) -> torch.Tensor:
|
||||||
|
var_denominator = self.G_rms.std[0] + self.epsilon
|
||||||
|
min_required_denominator = self.G_r_max / self.g_max
|
||||||
|
denominator = torch.maximum(var_denominator, min_required_denominator)
|
||||||
|
|
||||||
|
return rewards / denominator
|
||||||
|
|
||||||
|
def update_stats(
|
||||||
|
self,
|
||||||
|
rewards: torch.Tensor,
|
||||||
|
dones: torch.Tensor,
|
||||||
|
):
|
||||||
|
self.G = self.gamma * (1 - dones) * self.G + rewards
|
||||||
|
self.G_rms.update(self.G.view(-1, 1))
|
||||||
|
self.G_r_max = max(self.G_r_max, max(abs(self.G)))
|
||||||
|
|
||||||
|
def forward(self, rewards: torch.Tensor) -> torch.Tensor:
|
||||||
|
return self._scale_reward(rewards)
|
||||||
|
|
||||||
|
|
||||||
|
class PerTaskEmpiricalNormalization(nn.Module):
|
||||||
|
"""Normalize mean and variance of values based on empirical values for each task."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
num_tasks: int,
|
||||||
|
shape: tuple,
|
||||||
|
device: torch.device,
|
||||||
|
eps: float = 1e-2,
|
||||||
|
until: int = None,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Initialize PerTaskEmpiricalNormalization module.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
num_tasks (int): The total number of tasks.
|
||||||
|
shape (int or tuple of int): Shape of input values except batch axis.
|
||||||
|
eps (float): Small value for stability.
|
||||||
|
until (int or None): If specified, learns until the sum of batch sizes
|
||||||
|
for a specific task exceeds this value.
|
||||||
|
"""
|
||||||
|
super().__init__()
|
||||||
|
if not isinstance(shape, tuple):
|
||||||
|
shape = (shape,)
|
||||||
|
self.num_tasks = num_tasks
|
||||||
|
self.shape = shape
|
||||||
|
self.eps = eps
|
||||||
|
self.until = until
|
||||||
|
self.device = device
|
||||||
|
|
||||||
|
# Buffers now have a leading dimension for tasks
|
||||||
|
self.register_buffer("_mean", torch.zeros(num_tasks, *shape).to(device))
|
||||||
|
self.register_buffer("_var", torch.ones(num_tasks, *shape).to(device))
|
||||||
|
self.register_buffer("_std", torch.ones(num_tasks, *shape).to(device))
|
||||||
|
self.register_buffer(
|
||||||
|
"count", torch.zeros(num_tasks, dtype=torch.long).to(device)
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(
|
||||||
|
self, x: torch.Tensor, task_ids: torch.Tensor, center: bool = True
|
||||||
|
) -> torch.Tensor:
|
||||||
|
"""
|
||||||
|
Normalize the input tensor `x` using statistics for the given `task_ids`.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
x (torch.Tensor): Input tensor of shape [num_envs, *shape].
|
||||||
|
task_ids (torch.Tensor): Tensor of task indices, shape [num_envs].
|
||||||
|
center (bool): If True, center the data by subtracting the mean.
|
||||||
|
"""
|
||||||
|
if x.shape[1:] != self.shape:
|
||||||
|
raise ValueError(f"Expected input shape (*, {self.shape}), got {x.shape}")
|
||||||
|
if x.shape[0] != task_ids.shape[0]:
|
||||||
|
raise ValueError("Batch size of x and task_ids must match.")
|
||||||
|
|
||||||
|
# Gather the stats for the tasks in the current batch
|
||||||
|
# Reshape task_ids for broadcasting: [num_envs] -> [num_envs, 1, ...]
|
||||||
|
view_shape = (task_ids.shape[0],) + (1,) * len(self.shape)
|
||||||
|
task_ids_expanded = task_ids.view(view_shape).expand_as(x)
|
||||||
|
|
||||||
|
mean = self._mean.gather(0, task_ids_expanded)
|
||||||
|
std = self._std.gather(0, task_ids_expanded)
|
||||||
|
|
||||||
|
if self.training:
|
||||||
|
self.update(x, task_ids)
|
||||||
|
|
||||||
|
if center:
|
||||||
|
return (x - mean) / (std + self.eps)
|
||||||
|
else:
|
||||||
|
return x / (std + self.eps)
|
||||||
|
|
||||||
|
@torch.jit.unused
|
||||||
|
def update(self, x: torch.Tensor, task_ids: torch.Tensor):
|
||||||
|
"""Update running statistics for the tasks present in the batch."""
|
||||||
|
unique_tasks = torch.unique(task_ids)
|
||||||
|
|
||||||
|
for task_id in unique_tasks:
|
||||||
|
if self.until is not None and self.count[task_id] >= self.until:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Create a mask to select data for the current task
|
||||||
|
mask = task_ids == task_id
|
||||||
|
x_task = x[mask]
|
||||||
|
batch_size = x_task.shape[0]
|
||||||
|
|
||||||
|
if batch_size == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Update count for this task
|
||||||
|
old_count = self.count[task_id].clone()
|
||||||
|
new_count = old_count + batch_size
|
||||||
|
|
||||||
|
# Update mean
|
||||||
|
task_mean = self._mean[task_id]
|
||||||
|
batch_mean = torch.mean(x_task, dim=0)
|
||||||
|
delta = batch_mean - task_mean
|
||||||
|
self._mean[task_id] = task_mean + (batch_size / new_count) * delta
|
||||||
|
|
||||||
|
# Update variance using Chan's parallel algorithm
|
||||||
|
if old_count > 0:
|
||||||
|
batch_var = torch.var(x_task, dim=0, unbiased=False)
|
||||||
|
m_a = self._var[task_id] * old_count
|
||||||
|
m_b = batch_var * batch_size
|
||||||
|
M2 = m_a + m_b + (delta**2) * (old_count * batch_size / new_count)
|
||||||
|
self._var[task_id] = M2 / new_count
|
||||||
|
else:
|
||||||
|
# For the first batch of this task
|
||||||
|
self._var[task_id] = torch.var(x_task, dim=0, unbiased=False)
|
||||||
|
|
||||||
|
self._std[task_id] = torch.sqrt(self._var[task_id])
|
||||||
|
self.count[task_id] = new_count
|
||||||
|
|
||||||
|
|
||||||
|
class PerTaskRewardNormalizer(nn.Module):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
num_tasks: int,
|
||||||
|
gamma: float,
|
||||||
|
device: torch.device,
|
||||||
|
g_max: float = 10.0,
|
||||||
|
epsilon: float = 1e-8,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Per-task reward normalizer, motivation comes from BRC (https://arxiv.org/abs/2505.23150v1)
|
||||||
|
"""
|
||||||
|
super().__init__()
|
||||||
|
self.num_tasks = num_tasks
|
||||||
|
self.gamma = gamma
|
||||||
|
self.g_max = g_max
|
||||||
|
self.epsilon = epsilon
|
||||||
|
self.device = device
|
||||||
|
|
||||||
|
# Per-task running estimate of the discounted return
|
||||||
|
self.register_buffer("G", torch.zeros(num_tasks, device=device))
|
||||||
|
# Per-task running-max of the discounted return
|
||||||
|
self.register_buffer("G_r_max", torch.zeros(num_tasks, device=device))
|
||||||
|
# Use the new per-task normalizer for the statistics of G
|
||||||
|
self.G_rms = PerTaskEmpiricalNormalization(
|
||||||
|
num_tasks=num_tasks, shape=(1,), device=device
|
||||||
|
)
|
||||||
|
|
||||||
|
def _scale_reward(
|
||||||
|
self, rewards: torch.Tensor, task_ids: torch.Tensor
|
||||||
|
) -> torch.Tensor:
|
||||||
|
"""
|
||||||
|
Scales rewards using per-task statistics.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
rewards (torch.Tensor): Reward tensor, shape [num_envs].
|
||||||
|
task_ids (torch.Tensor): Task indices, shape [num_envs].
|
||||||
|
"""
|
||||||
|
# Gather stats for the tasks in the batch
|
||||||
|
std_for_batch = self.G_rms._std.gather(0, task_ids.unsqueeze(-1)).squeeze(-1)
|
||||||
|
g_r_max_for_batch = self.G_r_max.gather(0, task_ids)
|
||||||
|
|
||||||
|
var_denominator = std_for_batch + self.epsilon
|
||||||
|
min_required_denominator = g_r_max_for_batch / self.g_max
|
||||||
|
denominator = torch.maximum(var_denominator, min_required_denominator)
|
||||||
|
|
||||||
|
# Add a small epsilon to the final denominator to prevent division by zero
|
||||||
|
# in case g_r_max is also zero.
|
||||||
|
return rewards / (denominator + self.epsilon)
|
||||||
|
|
||||||
|
def update_stats(
|
||||||
|
self, rewards: torch.Tensor, dones: torch.Tensor, task_ids: torch.Tensor
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Updates the running discounted return and its statistics for each task.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
rewards (torch.Tensor): Reward tensor, shape [num_envs].
|
||||||
|
dones (torch.Tensor): Done tensor, shape [num_envs].
|
||||||
|
task_ids (torch.Tensor): Task indices, shape [num_envs].
|
||||||
|
"""
|
||||||
|
if not (rewards.shape == dones.shape == task_ids.shape):
|
||||||
|
raise ValueError("rewards, dones, and task_ids must have the same shape.")
|
||||||
|
|
||||||
|
# === Update G (running discounted return) ===
|
||||||
|
# Gather the previous G values for the tasks in the batch
|
||||||
|
prev_G = self.G.gather(0, task_ids)
|
||||||
|
# Update G for each environment based on its own reward and done signal
|
||||||
|
new_G = self.gamma * (1 - dones.float()) * prev_G + rewards
|
||||||
|
# Scatter the updated G values back to the main buffer
|
||||||
|
self.G.scatter_(0, task_ids, new_G)
|
||||||
|
|
||||||
|
# === Update G_rms (statistics of G) ===
|
||||||
|
# The update function handles the per-task logic internally
|
||||||
|
self.G_rms.update(new_G.unsqueeze(-1), task_ids)
|
||||||
|
|
||||||
|
# === Update G_r_max (running max of |G|) ===
|
||||||
|
prev_G_r_max = self.G_r_max.gather(0, task_ids)
|
||||||
|
# Update the max for each environment
|
||||||
|
updated_G_r_max = torch.maximum(prev_G_r_max, torch.abs(new_G))
|
||||||
|
# Scatter the new maxes back to the main buffer
|
||||||
|
self.G_r_max.scatter_(0, task_ids, updated_G_r_max)
|
||||||
|
|
||||||
|
def forward(self, rewards: torch.Tensor, task_ids: torch.Tensor) -> torch.Tensor:
|
||||||
|
"""
|
||||||
|
Normalizes rewards. During training, it also updates the running statistics.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
rewards (torch.Tensor): Reward tensor, shape [num_envs].
|
||||||
|
task_ids (torch.Tensor): Task indices, shape [num_envs].
|
||||||
|
"""
|
||||||
|
return self._scale_reward(rewards, task_ids)
|
||||||
|
|
||||||
|
|
||||||
|
def cpu_state(sd):
|
||||||
|
# detach & move to host without locking the compute stream
|
||||||
|
return {k: v.detach().to("cpu", non_blocking=True) for k, v in sd.items()}
|
||||||
|
|
||||||
|
|
||||||
|
def save_params(
|
||||||
|
global_step,
|
||||||
|
actor,
|
||||||
|
qnet,
|
||||||
|
qnet_target,
|
||||||
|
obs_normalizer,
|
||||||
|
critic_obs_normalizer,
|
||||||
|
args,
|
||||||
|
save_path,
|
||||||
|
):
|
||||||
|
"""Save model parameters and training configuration to disk."""
|
||||||
|
os.makedirs(os.path.dirname(save_path), exist_ok=True)
|
||||||
|
save_dict = {
|
||||||
|
"actor_state_dict": cpu_state(actor.state_dict()),
|
||||||
|
"qnet_state_dict": cpu_state(qnet.state_dict()),
|
||||||
|
"qnet_target_state_dict": cpu_state(qnet_target.state_dict()),
|
||||||
|
"obs_normalizer_state": (
|
||||||
|
cpu_state(obs_normalizer.state_dict())
|
||||||
|
if hasattr(obs_normalizer, "state_dict")
|
||||||
|
else None
|
||||||
|
),
|
||||||
|
"critic_obs_normalizer_state": (
|
||||||
|
cpu_state(critic_obs_normalizer.state_dict())
|
||||||
|
if hasattr(critic_obs_normalizer, "state_dict")
|
||||||
|
else None
|
||||||
|
),
|
||||||
|
"args": vars(args), # Save all arguments
|
||||||
|
"global_step": global_step,
|
||||||
|
}
|
||||||
|
torch.save(save_dict, save_path, _use_new_zipfile_serialization=True)
|
||||||
|
print(f"Saved parameters and configuration to {save_path}")
|
||||||
|
|
||||||
|
|
||||||
|
def hl_gauss(inp, vmin, vmax, num_atoms):
|
||||||
|
x = torch.clip(inp, vmin, max=vmax)
|
||||||
|
bin_width = (vmax - vmin) / (num_atoms - 1)
|
||||||
|
sigma_to_final_sigma_ratio = 0.75
|
||||||
|
support = torch.linspace(
|
||||||
|
vmin - bin_width / 2,
|
||||||
|
vmax + bin_width / 2,
|
||||||
|
num_atoms + 1,
|
||||||
|
device=inp.device,
|
||||||
|
)
|
||||||
|
sigma = bin_width * sigma_to_final_sigma_ratio
|
||||||
|
cdf_evals = torch.erf(
|
||||||
|
(support.unsqueeze(0) - x).squeeze()
|
||||||
|
/ (torch.sqrt(torch.tensor(2.0)) * sigma + 1e-6)
|
||||||
|
)
|
||||||
|
z = cdf_evals[..., -1] - cdf_evals[..., 0]
|
||||||
|
target_probs = cdf_evals[..., 1:] - cdf_evals[..., :-1]
|
||||||
|
target_probs = (target_probs / (z.unsqueeze(-1) + 1e-6)).reshape(
|
||||||
|
*inp.shape[:-1], num_atoms
|
||||||
|
)
|
||||||
|
|
||||||
|
return target_probs
|
||||||
0
reppo/torchrl/tensordict_replay_buffer.py
Normal file
0
reppo/torchrl/tensordict_replay_buffer.py
Normal file
21
results/AcrobotSwingupSparse_fasttd3_large.csv
Normal file
21
results/AcrobotSwingupSparse_fasttd3_large.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19,trial_20
|
||||||
|
0.0,0.1748046875,0.328125,0.177734375,0.095703125,0.1328125,0.2919921875,0.310546875,0.1845703125,0.802734375,0.0966796875,0.3037109375,0.189453125,0.212890625,0.203125,0.2333984375,0.103515625,0.4228515625,0.3291015625,0.2216796875,0.2509765625,0.5458984375
|
||||||
|
2631578.947368421,0.1840884560032895,0.3241930509868421,0.19501856753700658,0.09837903474506579,0.13401392886513158,0.2982177734375,0.308990478515625,0.18287739000822367,0.787798429790296,0.09864566200657895,0.3184011358963816,0.18890702097039475,0.21338211862664475,0.21437474300986842,0.23593782123766446,0.10291491056743421,0.4170628597861842,0.3284462376644737,0.22214387592516446,0.2504031532689145,0.5388810007195723
|
||||||
|
5263157.894736842,0.48848684210526316,0.19525467722039475,0.7890721371299342,0.18916722347861842,0.17818410773026316,0.5055285002055921,0.2550466437088816,0.12730006167763158,0.26593338815789475,0.16524465460526316,0.7958984375,0.17063181023848684,0.23991634971217107,0.5994616056743421,0.32465563322368424,0.08607241981907895,0.2334048622532895,0.2993292557565789,0.24789268092105265,0.23150634765625,0.31081350226151316
|
||||||
|
7894736.842105264,0.19452064915707243,0.337476228412829,0.6555609452097039,0.15161293431332237,0.21129567999588814,0.34132304944490127,0.2538805509868421,0.20350566663240138,0.22061478464226975,0.14499865080180924,0.2727050781250001,0.18040546618009867,0.37197233501233545,0.501569245990954,0.3275258917557566,0.15708200555098686,0.5201978181537829,0.21253405119243426,0.39639442845394735,0.2802332827919409,0.5455161646792762
|
||||||
|
10526315.789473685,0.3692626953125,0.431993986430921,0.48030170641447373,0.1561536287006579,0.13720060649671054,0.24969803659539475,0.22721140008223686,0.4422350431743421,0.2071276212993421,0.2340794613486842,0.7223864103618421,0.15472090871710525,0.10044459292763161,0.542223478618421,0.27034719366776316,0.19580078125,0.41299599095394735,0.43228952508223684,0.24915193256578946,0.641627261513158,0.22753263774671056
|
||||||
|
13157894.736842105,0.36103098016036184,0.20296759354440788,0.6290267141241777,0.18700850637335525,0.19858912417763158,0.3980054353412829,0.34470407586348684,0.4927817896792763,0.19361154656661184,0.22700420178865127,1.0422427528782894,0.15350020559210525,0.3347312525699013,0.5613451505962171,0.2114000822368421,0.2524349814967105,0.4295397306743421,0.42914139597039475,0.2937798751027961,0.8010012978001645,0.5903866416529605
|
||||||
|
15789473.684210528,0.24551873458059223,0.2576486687911186,0.4504651521381582,0.2689305355674342,0.2162925318667763,0.30931653474506576,0.3723979749177635,0.3467503597861844,0.2554000051398028,0.7705078125,0.8545403731496707,0.25297787314967113,0.44676128186677644,0.5716103001644737,0.2556473581414474,0.2074360094572369,0.40404630962171056,0.4277472245065789,0.4140303762335526,0.7098388671874998,0.43176590768914475
|
||||||
|
18421052.63157895,0.370945980674342,0.5088757966694079,0.8132741827713814,0.18149124948601975,0.20238133480674342,0.503620348478619,0.7649504009046051,0.5583913702713814,0.5247963353207237,0.6386236893503286,0.26464522512335525,0.4163352564761513,0.6332831131784539,0.556126644736842,0.34198801141036184,0.25974153217516444,0.45157181589226975,0.43826454564144735,0.41545185289884873,0.42344424599095404,0.3832509894120065
|
||||||
|
21052631.57894737,0.14837325246710528,0.5150981702302632,0.8915758634868425,0.17590974506578946,0.23987458881578952,1.4712556537828947,0.3565481085526316,0.324976870888158,0.5488666735197368,0.19694438733552644,0.31960577713815796,0.3659025493421052,0.5372378700657896,0.5097913240131579,0.3429918791118421,0.22906815378289475,0.4058773643092105,0.489026521381579,0.6003417968750001,0.655029296875,0.31978567023026316
|
||||||
|
23684210.52631579,0.1733494808799342,0.4554989463404605,1.982894094366776,0.18209999486019737,0.3596352025082237,0.935869718852796,0.40523488898026316,1.0166497481496712,0.603607177734375,0.5259993703741777,0.6458178068462171,0.1646824886924342,0.8397361353824013,0.535479093852796,0.3265477230674342,0.22773983604029605,0.39511269017269735,0.6545426218133223,0.8703372353001645,0.974953099300987,0.39999550267269735
|
||||||
|
26315789.47368421,0.2528236790707237,0.5258339329769737,0.6802593030427632,0.16102680407072367,0.30910451788651316,0.5595638877467105,0.4016209652549342,1.3741390830592106,0.5273533871299342,0.43395674856085525,0.813672517475329,0.2581947728207237,0.618250796669408,0.5655549701891447,0.5877621299342104,0.20391203227796054,0.39954576994243424,0.5987516704358553,0.7362285413240132,1.4764693410773027,0.44620554070723684
|
||||||
|
28947368.42105263,0.3994333367598685,0.9606403551603617,0.7533151726973685,0.13401714124177633,0.36729511461759873,0.7690285130550986,0.41407213712993424,0.5455129523026315,0.5043591951069079,0.27301507247121715,1.0926545795641447,0.34652870579769735,0.47317665501644735,0.545074462890625,1.0787048339843752,0.20206973427220393,0.37504497327302627,0.4582310726768092,0.4192456697162829,0.6424303556743421,0.4377328973067434
|
||||||
|
31578947.368421055,0.2634727076480262,1.0799175061677624,0.6831632915296052,0.1430599814967106,0.32739900287828994,1.113345497532895,0.3898347553453948,0.5399748149671051,0.6196481805098688,0.14470471833881587,1.0172311883223688,0.39645867598684226,0.4613165604440788,0.5476266961348685,0.5277099609375004,0.1705643503289472,0.5230006167763157,0.4680882504111842,0.42450914884868424,0.6711747018914475,0.3981226870888158
|
||||||
|
34210526.315789476,0.6013986687911195,0.5434104517886513,0.7230706465871712,0.17978065892269735,0.45540418122944043,0.9985865542763152,0.4253459729646381,0.45985653525904613,0.7354077790912825,0.472365529913652,1.0472074809827299,0.4830450760690789,0.3867155376233553,0.5802548057154605,0.6627133018092101,0.1357148822985199,0.4906776829769737,0.5056650262129935,0.4311089766652961,0.7133516010485196,0.6203822085731914
|
||||||
|
36842105.2631579,0.8890316611842097,0.544324372944079,0.7553582442434209,0.16457326788651316,0.19467323704769735,0.5453298468338815,0.5648996453536188,0.48393490439967096,0.4972405684621711,0.8378906249999998,0.7346609015213815,0.38804546155427616,0.42256244860197373,0.5904830129523027,0.6089991519325663,0.25921630859375,0.46831632915296045,0.5201897872121709,0.4439600894325658,0.6452122738486842,0.7665148283305916
|
||||||
|
39473684.21052632,0.21754857113486836,1.9864807128906272,1.1047122353001653,0.169921875,0.18989482678865133,1.1913709138569089,0.7123075786389801,0.3249945389597038,0.4986138594777961,0.5544224789268091,0.5165485582853616,0.20894743266858545,0.41566386975740127,0.7173156738281252,0.7566480134662825,0.23446494654605257,0.4399269505550987,0.4410609195106908,1.0174897846422706,1.3514934339021392,0.3843544407894737
|
||||||
|
42105263.15789474,0.27564761513157904,2.5264956825657876,1.2739771792763153,0.1703587582236842,0.21910978618421056,2.3172800164473686,0.4689555921052631,0.6794819078947374,0.5032894736842105,0.43744860197368424,1.7810701069078962,0.2857216282894738,0.42917351973684215,0.7801706414473684,0.42120682565789475,0.19855057565789475,0.4755088404605263,0.44449013157894735,1.3637438322368416,1.5823139391447363,0.44397615131578955
|
||||||
|
44736842.10526316,0.41042769582648025,0.61688232421875,0.7045384457236843,0.28601716694078955,0.29529772306743424,1.5944294176603613,0.4669269762541119,0.8442077636718748,0.48609201531661184,0.40747712787828944,2.7471891704358544,0.31205990439967096,0.4864293148643092,0.5992511950041118,0.3608735737047697,0.2589898360402961,0.7906783254523029,0.43679488332648025,0.703727320620888,1.0970715974506582,0.3933555201480262
|
||||||
|
47368421.05263158,0.4002942537006579,0.5106843647203947,0.9419876901726975,0.3354299444901315,0.4366037469161185,0.702235171669408,0.4573524876644737,1.1186812551398029,0.4933825041118421,0.3423012181332237,1.0062480725740128,0.18820351048519737,0.41626297800164475,0.5100964997944079,0.664014314350329,0.23045911287006576,1.3811709755345396,0.44484670538651316,0.5263093647203947,1.3432681435032894,0.22702186986019737
|
||||||
|
50000000.0,0.3857421875,0.501953125,1.103515625,0.2587890625,0.5224609375,0.7890625,0.3583984375,1.8603515625,0.5263671875,0.3447265625,0.25390625,0.220703125,0.3798828125,0.486328125,1.025390625,0.1552734375,1.6494140625,0.4482421875,0.517578125,1.123046875,0.22265625
|
||||||
|
21
results/AcrobotSwingupSparse_fasttd3_large_fasttd.csv
Normal file
21
results/AcrobotSwingupSparse_fasttd3_large_fasttd.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19,trial_20
|
||||||
|
0.0,0.1748046875,0.328125,0.177734375,0.095703125,0.1328125,0.2919921875,0.310546875,0.1845703125,0.802734375,0.0966796875,0.3037109375,0.189453125,0.212890625,0.203125,0.2333984375,0.103515625,0.4228515625,0.3291015625,0.2216796875,0.2509765625,0.5458984375
|
||||||
|
2631578.947368421,0.1840884560032895,0.3241930509868421,0.19501856753700658,0.09837903474506579,0.13401392886513158,0.2982177734375,0.308990478515625,0.18287739000822367,0.787798429790296,0.09864566200657895,0.3184011358963816,0.18890702097039475,0.21338211862664475,0.21437474300986842,0.23593782123766446,0.10291491056743421,0.4170628597861842,0.3284462376644737,0.22214387592516446,0.2504031532689145,0.5388810007195723
|
||||||
|
5263157.894736842,0.48848684210526316,0.19525467722039475,0.7890721371299342,0.18916722347861842,0.17818410773026316,0.5055285002055921,0.2550466437088816,0.12730006167763158,0.26593338815789475,0.16524465460526316,0.7958984375,0.17063181023848684,0.23991634971217107,0.5994616056743421,0.32465563322368424,0.08607241981907895,0.2334048622532895,0.2993292557565789,0.24789268092105265,0.23150634765625,0.31081350226151316
|
||||||
|
7894736.842105264,0.19452064915707243,0.337476228412829,0.6555609452097039,0.15161293431332237,0.21129567999588814,0.34132304944490127,0.2538805509868421,0.20350566663240138,0.22061478464226975,0.14499865080180924,0.2727050781250001,0.18040546618009867,0.37197233501233545,0.501569245990954,0.3275258917557566,0.15708200555098686,0.5201978181537829,0.21253405119243426,0.39639442845394735,0.2802332827919409,0.5455161646792762
|
||||||
|
10526315.789473685,0.3692626953125,0.431993986430921,0.48030170641447373,0.1561536287006579,0.13720060649671054,0.24969803659539475,0.22721140008223686,0.4422350431743421,0.2071276212993421,0.2340794613486842,0.7223864103618421,0.15472090871710525,0.10044459292763161,0.542223478618421,0.27034719366776316,0.19580078125,0.41299599095394735,0.43228952508223684,0.24915193256578946,0.641627261513158,0.22753263774671056
|
||||||
|
13157894.736842105,0.36103098016036184,0.20296759354440788,0.6290267141241777,0.18700850637335525,0.19858912417763158,0.3980054353412829,0.34470407586348684,0.4927817896792763,0.19361154656661184,0.22700420178865127,1.0422427528782894,0.15350020559210525,0.3347312525699013,0.5613451505962171,0.2114000822368421,0.2524349814967105,0.4295397306743421,0.42914139597039475,0.2937798751027961,0.8010012978001645,0.5903866416529605
|
||||||
|
15789473.684210528,0.24551873458059223,0.2576486687911186,0.4504651521381582,0.2689305355674342,0.2162925318667763,0.30931653474506576,0.3723979749177635,0.3467503597861844,0.2554000051398028,0.7705078125,0.8545403731496707,0.25297787314967113,0.44676128186677644,0.5716103001644737,0.2556473581414474,0.2074360094572369,0.40404630962171056,0.4277472245065789,0.4140303762335526,0.7098388671874998,0.43176590768914475
|
||||||
|
18421052.63157895,0.370945980674342,0.5088757966694079,0.8132741827713814,0.18149124948601975,0.20238133480674342,0.503620348478619,0.7649504009046051,0.5583913702713814,0.5247963353207237,0.6386236893503286,0.26464522512335525,0.4163352564761513,0.6332831131784539,0.556126644736842,0.34198801141036184,0.25974153217516444,0.45157181589226975,0.43826454564144735,0.41545185289884873,0.42344424599095404,0.3832509894120065
|
||||||
|
21052631.57894737,0.14837325246710528,0.5150981702302632,0.8915758634868425,0.17590974506578946,0.23987458881578952,1.4712556537828947,0.3565481085526316,0.324976870888158,0.5488666735197368,0.19694438733552644,0.31960577713815796,0.3659025493421052,0.5372378700657896,0.5097913240131579,0.3429918791118421,0.22906815378289475,0.4058773643092105,0.489026521381579,0.6003417968750001,0.655029296875,0.31978567023026316
|
||||||
|
23684210.52631579,0.1733494808799342,0.4554989463404605,1.982894094366776,0.18209999486019737,0.3596352025082237,0.935869718852796,0.40523488898026316,1.0166497481496712,0.603607177734375,0.5259993703741777,0.6458178068462171,0.1646824886924342,0.8397361353824013,0.535479093852796,0.3265477230674342,0.22773983604029605,0.39511269017269735,0.6545426218133223,0.8703372353001645,0.974953099300987,0.39999550267269735
|
||||||
|
26315789.47368421,0.2528236790707237,0.5258339329769737,0.6802593030427632,0.16102680407072367,0.30910451788651316,0.5595638877467105,0.4016209652549342,1.3741390830592106,0.5273533871299342,0.43395674856085525,0.813672517475329,0.2581947728207237,0.618250796669408,0.5655549701891447,0.5877621299342104,0.20391203227796054,0.39954576994243424,0.5987516704358553,0.7362285413240132,1.4764693410773027,0.44620554070723684
|
||||||
|
28947368.42105263,0.3994333367598685,0.9606403551603617,0.7533151726973685,0.13401714124177633,0.36729511461759873,0.7690285130550986,0.41407213712993424,0.5455129523026315,0.5043591951069079,0.27301507247121715,1.0926545795641447,0.34652870579769735,0.47317665501644735,0.545074462890625,1.0787048339843752,0.20206973427220393,0.37504497327302627,0.4582310726768092,0.4192456697162829,0.6424303556743421,0.4377328973067434
|
||||||
|
31578947.368421055,0.2634727076480262,1.0799175061677624,0.6831632915296052,0.1430599814967106,0.32739900287828994,1.113345497532895,0.3898347553453948,0.5399748149671051,0.6196481805098688,0.14470471833881587,1.0172311883223688,0.39645867598684226,0.4613165604440788,0.5476266961348685,0.5277099609375004,0.1705643503289472,0.5230006167763157,0.4680882504111842,0.42450914884868424,0.6711747018914475,0.3981226870888158
|
||||||
|
34210526.315789476,0.6013986687911195,0.5434104517886513,0.7230706465871712,0.17978065892269735,0.45540418122944043,0.9985865542763152,0.4253459729646381,0.45985653525904613,0.7354077790912825,0.472365529913652,1.0472074809827299,0.4830450760690789,0.3867155376233553,0.5802548057154605,0.6627133018092101,0.1357148822985199,0.4906776829769737,0.5056650262129935,0.4311089766652961,0.7133516010485196,0.6203822085731914
|
||||||
|
36842105.2631579,0.8890316611842097,0.544324372944079,0.7553582442434209,0.16457326788651316,0.19467323704769735,0.5453298468338815,0.5648996453536188,0.48393490439967096,0.4972405684621711,0.8378906249999998,0.7346609015213815,0.38804546155427616,0.42256244860197373,0.5904830129523027,0.6089991519325663,0.25921630859375,0.46831632915296045,0.5201897872121709,0.4439600894325658,0.6452122738486842,0.7665148283305916
|
||||||
|
39473684.21052632,0.21754857113486836,1.9864807128906272,1.1047122353001653,0.169921875,0.18989482678865133,1.1913709138569089,0.7123075786389801,0.3249945389597038,0.4986138594777961,0.5544224789268091,0.5165485582853616,0.20894743266858545,0.41566386975740127,0.7173156738281252,0.7566480134662825,0.23446494654605257,0.4399269505550987,0.4410609195106908,1.0174897846422706,1.3514934339021392,0.3843544407894737
|
||||||
|
42105263.15789474,0.27564761513157904,2.5264956825657876,1.2739771792763153,0.1703587582236842,0.21910978618421056,2.3172800164473686,0.4689555921052631,0.6794819078947374,0.5032894736842105,0.43744860197368424,1.7810701069078962,0.2857216282894738,0.42917351973684215,0.7801706414473684,0.42120682565789475,0.19855057565789475,0.4755088404605263,0.44449013157894735,1.3637438322368416,1.5823139391447363,0.44397615131578955
|
||||||
|
44736842.10526316,0.41042769582648025,0.61688232421875,0.7045384457236843,0.28601716694078955,0.29529772306743424,1.5944294176603613,0.4669269762541119,0.8442077636718748,0.48609201531661184,0.40747712787828944,2.7471891704358544,0.31205990439967096,0.4864293148643092,0.5992511950041118,0.3608735737047697,0.2589898360402961,0.7906783254523029,0.43679488332648025,0.703727320620888,1.0970715974506582,0.3933555201480262
|
||||||
|
47368421.05263158,0.4002942537006579,0.5106843647203947,0.9419876901726975,0.3354299444901315,0.4366037469161185,0.702235171669408,0.4573524876644737,1.1186812551398029,0.4933825041118421,0.3423012181332237,1.0062480725740128,0.18820351048519737,0.41626297800164475,0.5100964997944079,0.664014314350329,0.23045911287006576,1.3811709755345396,0.44484670538651316,0.5263093647203947,1.3432681435032894,0.22702186986019737
|
||||||
|
50000000.0,0.3857421875,0.501953125,1.103515625,0.2587890625,0.5224609375,0.7890625,0.3583984375,1.8603515625,0.5263671875,0.3447265625,0.25390625,0.220703125,0.3798828125,0.486328125,1.025390625,0.1552734375,1.6494140625,0.4482421875,0.517578125,1.123046875,0.22265625
|
||||||
|
21
results/AcrobotSwingupSparse_fasttd3_small.csv
Normal file
21
results/AcrobotSwingupSparse_fasttd3_small.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19,trial_20,trial_21,trial_22,trial_23
|
||||||
|
0.0,0.4677734375,0.23828125,0.314453125,0.548828125,0.3359375,0.783203125,0.60546875,0.408203125,0.939453125,0.1728515625,1.0234375,0.2978515625,0.3779296875,0.3232421875,0.3056640625,0.1416015625,0.25,0.6064453125,0.578125,0.2294921875,0.0869140625,0.255859375,0.4873046875,0.419921875
|
||||||
|
2631578.947368421,0.47522775750411184,0.24128482216282895,0.3237368935032895,0.5765429045024671,0.3386134097450658,0.7858517295435855,0.610547517475329,0.4032335783305921,0.9323537726151315,0.1807154605263158,1.0213623046875,0.2971689324629934,0.3782846551192434,0.3709443744860198,0.31467477898848684,0.15934994346217107,0.25442344263980265,0.6057899876644737,0.5763501619037829,0.2389124820106908,0.08705058850740131,0.2628495065789474,0.518732974403783,0.42560135690789475
|
||||||
|
5263157.894736842,0.7239990234375,0.3556422183388158,0.6561504163240132,1.6154014185855263,0.43136757298519735,0.8792403371710527,0.7970484683388158,0.24428518194901316,0.6889327199835527,0.4528455232319079,0.949109529194079,0.28201133326480265,0.39837967722039475,1.9478181537828947,0.627820466694079,0.7923680355674342,0.4122989052220395,0.5696828741776315,0.5241506476151316,0.5731779399671053,0.09092310855263158,0.5571385433799343,1.5620149311266447,0.615892912212171
|
||||||
|
7894736.842105264,0.5552175421463816,0.5139352898848684,0.8040996350740132,2.707399067125822,0.4177470960115131,0.897844816509046,0.9603383917557565,0.48065185546875,0.7569066097861843,0.4377023797286184,0.9480028654399671,0.4363419382195724,0.529706453022204,0.6677792197779608,0.6335127981085527,1.0373519094366777,0.4820187217310855,0.3935482627467107,0.6843245656866777,0.6967564633018093,0.07813784950657895,1.3374955026726971,0.7738085295024673,0.5054385536595395
|
||||||
|
10526315.789473685,0.6130949321546052,0.41277754934210525,0.6388967413651315,0.7638389185855263,0.3223941200657895,0.855115388569079,0.9168122944078947,0.5195633737664473,0.8672453227796053,0.49309339021381576,0.960205078125,0.5501901726973685,0.5303569592927632,1.663098787006579,0.7229196648848685,0.7749280427631579,0.4860968338815789,0.9209691097861842,0.687975431743421,0.7988345497532895,0.09818950452302631,0.4064748663651316,1.2171823601973684,0.609754060444079
|
||||||
|
13157894.736842105,0.5152780633223685,0.4240369294819079,0.6664589329769737,1.134177760074013,0.35869397615131576,0.8262039987664473,0.9690134148848685,0.55340576171875,0.8097598427220395,0.4206061112253289,0.9967137386924342,0.6259139211554277,0.47010080437911184,1.312556216591283,0.7749280427631579,0.8951432077508223,0.4720266241776316,0.8452373303865132,0.737871671977796,0.8056849429481908,0.08759669253700658,0.4248898154810855,0.9582888954564145,0.5981252569901315
|
||||||
|
15789473.684210528,0.49380653782894735,0.38178132709703955,0.7684968647203947,2.090666118421052,0.42508095189144735,0.8543155067845395,1.0089271946957237,0.534754702919408,0.8336502878289473,0.5039126747532895,0.9122282329358549,0.6273353978207237,0.31382028680098695,1.5233090049342106,0.7577129163240132,1.263851768092106,0.4648276881167762,0.6634200246710527,0.7173301295230262,0.7930876079358552,0.09032239412006579,0.4714644582648026,1.348619962993421,0.8901527806332236
|
||||||
|
18421052.63157895,0.4867312782689145,0.45927991365131576,0.7473546078330593,1.5748001901726971,0.39897236071134873,0.846234773334704,0.987749601665296,0.6122452585320723,0.8326913934004935,0.5028638337787829,0.378649259868421,0.5401852256373355,0.4641691509046053,1.6434165553042763,0.8027970163445723,2.168865003083881,0.34897974917763164,0.7622407612047699,0.6462787828947368,0.7712338096217105,0.08281667608963816,0.4293469880756579,1.3432633249383223,0.645525480571546
|
||||||
|
21052631.57894737,0.4948601973684211,0.4323473478618421,0.8724943462171053,1.1141293174342106,0.4361379523026316,0.8806923314144737,0.9792865953947368,0.6245502672697368,0.8836477179276316,0.501233552631579,0.5627826891447371,0.5207776521381579,0.5234760485197368,1.6124588815789473,0.7833701685855263,1.710809004934211,0.4428967927631579,1.060778166118421,0.7079178659539473,0.8216359991776316,0.08334189967105263,0.42953330592105265,1.373560855263158,0.6098375822368421
|
||||||
|
23684210.52631579,0.5287925318667763,0.40749158357319076,0.971117521587171,1.9908029656661186,0.4106862921463816,0.9523106625205592,0.9765046772203947,0.5373470908717105,0.9531844289679277,0.568036531147204,1.2821815892269737,0.5697166041324013,0.536572908100329,1.8591902883429277,0.7674030504728618,3.4662186472039473,0.4816653603001645,0.8218046489514803,0.6790787546258223,0.8194917377672697,0.08887361225328948,0.47222900390625,1.5904958624588816,0.6262287340666118
|
||||||
|
26315789.47368421,0.5157502826891447,0.41016588712993424,0.9317659076891447,4.164682488692434,0.41548879523026316,0.9117624383223685,0.9239823190789473,0.746264005962171,0.8560341282894737,0.5797376130756579,1.2406937448601973,0.6211869089226973,0.4827013517680921,1.7503469366776316,0.7565050627055921,9.043595163445723,0.4403943513569079,0.9655183490953947,0.7088655170641447,0.803052400287829,0.09070466694078948,0.4915353875411184,2.077427914268092,0.6154110557154605
|
||||||
|
28947368.42105263,0.5105879934210527,0.3934791966488487,0.9548371967516447,8.639731959292762,0.39632696854440785,0.8801928068462171,0.8585478130139803,1.0641286749588816,0.930394222861842,0.6612998560855263,1.5844967490748352,0.5991564298930921,0.4045956260279605,1.955535888671875,0.6564716539884868,22.66107980828536,0.3779232627467105,0.8646963019120066,0.6451303582442434,0.8990992495888157,0.08098240902549343,0.45110441509046056,1.9963186163651316,0.5763228567023027
|
||||||
|
31578947.368421055,0.5164987664473684,0.3476690995065788,1.0118600945723684,13.908832750822377,0.4312872635690789,0.8241930509868421,0.8999794407894738,1.2379086143092106,1.0876529091282892,0.5647486636513158,2.105622944078946,0.6060855263157895,0.40881990131578944,1.953690378289474,0.6823280736019737,25.559685958059212,0.4373329564144738,0.5644145764802633,0.6739501953125001,0.8933298211348681,0.08574475740131585,0.44882684004934226,2.2047954358552637,0.5725932874177632
|
||||||
|
34210526.315789476,0.5630894711143094,0.2600290398848684,1.0167541503906252,16.49586085269325,0.4207217567845395,0.7727452328330591,0.9708814119037829,1.3840299907483553,0.896049097964638,0.6018098530016448,1.7122144197162839,0.5768898411800987,0.4026971114309211,1.8417840254934206,0.7199819464432566,26.103899504009043,0.5059396844161185,0.6886387875205593,0.7080977590460527,0.7641890676398028,0.1082281815378289,0.5099503366570723,2.496940211245888,0.5686276084498355
|
||||||
|
36842105.2631579,0.6720773797286184,0.3225386770148027,1.2828369140625002,12.712983783922697,0.4250199167351974,0.7478958932976975,0.9975907175164475,1.4516665810032894,0.8807822779605263,0.6336862664473684,2.3220953690378288,0.6012862356085528,0.39142488178453944,1.8977693256578956,0.7333438270970394,23.689783999794408,0.4865626284950657,0.9328099300986845,0.6889809056332237,0.9455245168585529,0.12684390419407904,0.4512039987664473,2.2551655016447367,0.5685746042351973
|
||||||
|
39473684.21052632,0.6212816740337171,0.3954756887335526,1.5771227384868423,13.892448023745889,0.4088295384457236,0.8461464329769737,1.004625822368421,1.4943430047286186,0.922254060444079,0.5547501413445723,2.579575388055099,0.7662016216077303,0.37623034025493424,2.360335500616776,0.7340666118421053,25.917310212787832,0.4614177503083883,1.1736658999794407,0.6215290270353617,1.054962158203125,0.17379278885690785,0.4379449141652961,2.445249858655428,0.5527938039679275
|
||||||
|
42105263.15789474,0.6224814967105263,0.4769222861842106,1.8918328536184212,11.578150699013154,0.4010331003289474,0.8413856907894737,0.9921361019736843,1.2933028371710522,0.9226716694078947,0.5861687911184211,4.3059210526315805,0.7286698190789472,0.3924496299342105,2.6044150904605265,0.8162520559210527,23.7096525493421,0.5126953125,1.3959960937500004,0.6367958470394738,0.8885433799342104,0.1440686677631579,0.4612458881578947,2.6436574835526314,0.5451274671052632
|
||||||
|
44736842.10526316,0.5815766987047697,1.0654441431949015,3.4354970831620073,7.4593505859375,0.4371803685238487,0.8028307462993421,1.02630615234375,1.562397203947369,0.9073229337993421,0.6526280453330592,5.857717413651316,0.5847633763363487,0.4012049624794408,2.7349853515625,0.8401938990542763,13.982601768092103,0.6160422877261514,1.697239925986842,0.7134383352179277,1.1035284745065792,0.17635465923108554,0.45049727590460525,2.6348636024876644,0.6605754651521382
|
||||||
|
47368421.05263158,0.48906506990131576,1.138620476973684,5.909616570723685,15.516659385279608,0.43015329461348684,0.7811215049342105,0.9449077405427632,3.0830270867598686,0.9180522717927632,0.7229517886513158,16.223035310444082,0.7626503392269737,0.40914113898026316,2.274693539268092,0.8495836759868421,28.019084729646387,1.2442755448190792,3.7878578587582243,0.6643741005345395,1.1563784950657894,0.2154637386924342,0.4481329666940789,2.837942023026316,0.7490491365131579
|
||||||
|
50000000.0,0.4599609375,0.6884765625,7.0078125,23.0458984375,0.4267578125,0.76171875,0.8837890625,3.8232421875,0.9306640625,0.767578125,26.6025390625,0.9033203125,0.4033203125,2.2431640625,0.845703125,45.2783203125,1.70703125,5.8916015625,0.6376953125,0.880859375,0.2353515625,0.431640625,2.845703125,0.7529296875
|
||||||
|
21
results/AcrobotSwingupSparse_fasttd3_small_fasttd.csv
Normal file
21
results/AcrobotSwingupSparse_fasttd3_small_fasttd.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19,trial_20,trial_21,trial_22,trial_23
|
||||||
|
0.0,0.4677734375,0.23828125,0.314453125,0.548828125,0.3359375,0.783203125,0.60546875,0.408203125,0.939453125,0.1728515625,1.0234375,0.2978515625,0.3779296875,0.3232421875,0.3056640625,0.1416015625,0.25,0.6064453125,0.578125,0.2294921875,0.0869140625,0.255859375,0.4873046875,0.419921875
|
||||||
|
2631578.947368421,0.47522775750411184,0.24128482216282895,0.3237368935032895,0.5765429045024671,0.3386134097450658,0.7858517295435855,0.610547517475329,0.4032335783305921,0.9323537726151315,0.1807154605263158,1.0213623046875,0.2971689324629934,0.3782846551192434,0.3709443744860198,0.31467477898848684,0.15934994346217107,0.25442344263980265,0.6057899876644737,0.5763501619037829,0.2389124820106908,0.08705058850740131,0.2628495065789474,0.518732974403783,0.42560135690789475
|
||||||
|
5263157.894736842,0.7239990234375,0.3556422183388158,0.6561504163240132,1.6154014185855263,0.43136757298519735,0.8792403371710527,0.7970484683388158,0.24428518194901316,0.6889327199835527,0.4528455232319079,0.949109529194079,0.28201133326480265,0.39837967722039475,1.9478181537828947,0.627820466694079,0.7923680355674342,0.4122989052220395,0.5696828741776315,0.5241506476151316,0.5731779399671053,0.09092310855263158,0.5571385433799343,1.5620149311266447,0.615892912212171
|
||||||
|
7894736.842105264,0.5552175421463816,0.5139352898848684,0.8040996350740132,2.707399067125822,0.4177470960115131,0.897844816509046,0.9603383917557565,0.48065185546875,0.7569066097861843,0.4377023797286184,0.9480028654399671,0.4363419382195724,0.529706453022204,0.6677792197779608,0.6335127981085527,1.0373519094366777,0.4820187217310855,0.3935482627467107,0.6843245656866777,0.6967564633018093,0.07813784950657895,1.3374955026726971,0.7738085295024673,0.5054385536595395
|
||||||
|
10526315.789473685,0.6130949321546052,0.41277754934210525,0.6388967413651315,0.7638389185855263,0.3223941200657895,0.855115388569079,0.9168122944078947,0.5195633737664473,0.8672453227796053,0.49309339021381576,0.960205078125,0.5501901726973685,0.5303569592927632,1.663098787006579,0.7229196648848685,0.7749280427631579,0.4860968338815789,0.9209691097861842,0.687975431743421,0.7988345497532895,0.09818950452302631,0.4064748663651316,1.2171823601973684,0.609754060444079
|
||||||
|
13157894.736842105,0.5152780633223685,0.4240369294819079,0.6664589329769737,1.134177760074013,0.35869397615131576,0.8262039987664473,0.9690134148848685,0.55340576171875,0.8097598427220395,0.4206061112253289,0.9967137386924342,0.6259139211554277,0.47010080437911184,1.312556216591283,0.7749280427631579,0.8951432077508223,0.4720266241776316,0.8452373303865132,0.737871671977796,0.8056849429481908,0.08759669253700658,0.4248898154810855,0.9582888954564145,0.5981252569901315
|
||||||
|
15789473.684210528,0.49380653782894735,0.38178132709703955,0.7684968647203947,2.090666118421052,0.42508095189144735,0.8543155067845395,1.0089271946957237,0.534754702919408,0.8336502878289473,0.5039126747532895,0.9122282329358549,0.6273353978207237,0.31382028680098695,1.5233090049342106,0.7577129163240132,1.263851768092106,0.4648276881167762,0.6634200246710527,0.7173301295230262,0.7930876079358552,0.09032239412006579,0.4714644582648026,1.348619962993421,0.8901527806332236
|
||||||
|
18421052.63157895,0.4867312782689145,0.45927991365131576,0.7473546078330593,1.5748001901726971,0.39897236071134873,0.846234773334704,0.987749601665296,0.6122452585320723,0.8326913934004935,0.5028638337787829,0.378649259868421,0.5401852256373355,0.4641691509046053,1.6434165553042763,0.8027970163445723,2.168865003083881,0.34897974917763164,0.7622407612047699,0.6462787828947368,0.7712338096217105,0.08281667608963816,0.4293469880756579,1.3432633249383223,0.645525480571546
|
||||||
|
21052631.57894737,0.4948601973684211,0.4323473478618421,0.8724943462171053,1.1141293174342106,0.4361379523026316,0.8806923314144737,0.9792865953947368,0.6245502672697368,0.8836477179276316,0.501233552631579,0.5627826891447371,0.5207776521381579,0.5234760485197368,1.6124588815789473,0.7833701685855263,1.710809004934211,0.4428967927631579,1.060778166118421,0.7079178659539473,0.8216359991776316,0.08334189967105263,0.42953330592105265,1.373560855263158,0.6098375822368421
|
||||||
|
23684210.52631579,0.5287925318667763,0.40749158357319076,0.971117521587171,1.9908029656661186,0.4106862921463816,0.9523106625205592,0.9765046772203947,0.5373470908717105,0.9531844289679277,0.568036531147204,1.2821815892269737,0.5697166041324013,0.536572908100329,1.8591902883429277,0.7674030504728618,3.4662186472039473,0.4816653603001645,0.8218046489514803,0.6790787546258223,0.8194917377672697,0.08887361225328948,0.47222900390625,1.5904958624588816,0.6262287340666118
|
||||||
|
26315789.47368421,0.5157502826891447,0.41016588712993424,0.9317659076891447,4.164682488692434,0.41548879523026316,0.9117624383223685,0.9239823190789473,0.746264005962171,0.8560341282894737,0.5797376130756579,1.2406937448601973,0.6211869089226973,0.4827013517680921,1.7503469366776316,0.7565050627055921,9.043595163445723,0.4403943513569079,0.9655183490953947,0.7088655170641447,0.803052400287829,0.09070466694078948,0.4915353875411184,2.077427914268092,0.6154110557154605
|
||||||
|
28947368.42105263,0.5105879934210527,0.3934791966488487,0.9548371967516447,8.639731959292762,0.39632696854440785,0.8801928068462171,0.8585478130139803,1.0641286749588816,0.930394222861842,0.6612998560855263,1.5844967490748352,0.5991564298930921,0.4045956260279605,1.955535888671875,0.6564716539884868,22.66107980828536,0.3779232627467105,0.8646963019120066,0.6451303582442434,0.8990992495888157,0.08098240902549343,0.45110441509046056,1.9963186163651316,0.5763228567023027
|
||||||
|
31578947.368421055,0.5164987664473684,0.3476690995065788,1.0118600945723684,13.908832750822377,0.4312872635690789,0.8241930509868421,0.8999794407894738,1.2379086143092106,1.0876529091282892,0.5647486636513158,2.105622944078946,0.6060855263157895,0.40881990131578944,1.953690378289474,0.6823280736019737,25.559685958059212,0.4373329564144738,0.5644145764802633,0.6739501953125001,0.8933298211348681,0.08574475740131585,0.44882684004934226,2.2047954358552637,0.5725932874177632
|
||||||
|
34210526.315789476,0.5630894711143094,0.2600290398848684,1.0167541503906252,16.49586085269325,0.4207217567845395,0.7727452328330591,0.9708814119037829,1.3840299907483553,0.896049097964638,0.6018098530016448,1.7122144197162839,0.5768898411800987,0.4026971114309211,1.8417840254934206,0.7199819464432566,26.103899504009043,0.5059396844161185,0.6886387875205593,0.7080977590460527,0.7641890676398028,0.1082281815378289,0.5099503366570723,2.496940211245888,0.5686276084498355
|
||||||
|
36842105.2631579,0.6720773797286184,0.3225386770148027,1.2828369140625002,12.712983783922697,0.4250199167351974,0.7478958932976975,0.9975907175164475,1.4516665810032894,0.8807822779605263,0.6336862664473684,2.3220953690378288,0.6012862356085528,0.39142488178453944,1.8977693256578956,0.7333438270970394,23.689783999794408,0.4865626284950657,0.9328099300986845,0.6889809056332237,0.9455245168585529,0.12684390419407904,0.4512039987664473,2.2551655016447367,0.5685746042351973
|
||||||
|
39473684.21052632,0.6212816740337171,0.3954756887335526,1.5771227384868423,13.892448023745889,0.4088295384457236,0.8461464329769737,1.004625822368421,1.4943430047286186,0.922254060444079,0.5547501413445723,2.579575388055099,0.7662016216077303,0.37623034025493424,2.360335500616776,0.7340666118421053,25.917310212787832,0.4614177503083883,1.1736658999794407,0.6215290270353617,1.054962158203125,0.17379278885690785,0.4379449141652961,2.445249858655428,0.5527938039679275
|
||||||
|
42105263.15789474,0.6224814967105263,0.4769222861842106,1.8918328536184212,11.578150699013154,0.4010331003289474,0.8413856907894737,0.9921361019736843,1.2933028371710522,0.9226716694078947,0.5861687911184211,4.3059210526315805,0.7286698190789472,0.3924496299342105,2.6044150904605265,0.8162520559210527,23.7096525493421,0.5126953125,1.3959960937500004,0.6367958470394738,0.8885433799342104,0.1440686677631579,0.4612458881578947,2.6436574835526314,0.5451274671052632
|
||||||
|
44736842.10526316,0.5815766987047697,1.0654441431949015,3.4354970831620073,7.4593505859375,0.4371803685238487,0.8028307462993421,1.02630615234375,1.562397203947369,0.9073229337993421,0.6526280453330592,5.857717413651316,0.5847633763363487,0.4012049624794408,2.7349853515625,0.8401938990542763,13.982601768092103,0.6160422877261514,1.697239925986842,0.7134383352179277,1.1035284745065792,0.17635465923108554,0.45049727590460525,2.6348636024876644,0.6605754651521382
|
||||||
|
47368421.05263158,0.48906506990131576,1.138620476973684,5.909616570723685,15.516659385279608,0.43015329461348684,0.7811215049342105,0.9449077405427632,3.0830270867598686,0.9180522717927632,0.7229517886513158,16.223035310444082,0.7626503392269737,0.40914113898026316,2.274693539268092,0.8495836759868421,28.019084729646387,1.2442755448190792,3.7878578587582243,0.6643741005345395,1.1563784950657894,0.2154637386924342,0.4481329666940789,2.837942023026316,0.7490491365131579
|
||||||
|
50000000.0,0.4599609375,0.6884765625,7.0078125,23.0458984375,0.4267578125,0.76171875,0.8837890625,3.8232421875,0.9306640625,0.767578125,26.6025390625,0.9033203125,0.4033203125,2.2431640625,0.845703125,45.2783203125,1.70703125,5.8916015625,0.6376953125,0.880859375,0.2353515625,0.431640625,2.845703125,0.7529296875
|
||||||
|
21
results/AcrobotSwingupSparse_full_large_data.csv
Normal file
21
results/AcrobotSwingupSparse_full_large_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19,trial_20,trial_21,trial_22,trial_23,trial_24,trial_25,trial_26,trial_27,trial_28,trial_29,trial_30,trial_31,trial_32,trial_33,trial_34,trial_35,trial_36,trial_37,trial_38,trial_39
|
||||||
|
0.0,0.125,0.134765625,0.1162109375,0.1923828125,0.28125,0.1572265625,0.1875,0.150390625,0.1943359375,0.337890625,0.3193359375,0.15234375,0.1875,0.1669921875,0.1474609375,0.21484375,0.1650390625,0.15234375,0.2451171875,0.1640625,0.119140625,0.24609375,0.2158203125,0.1318359375,0.1865234375,0.2822265625,0.24609375,0.2578125,0.1806640625,0.2802734375,0.2412109375,0.1640625,0.140625,0.173828125,0.1552734375,0.1376953125,0.1240234375,0.1337890625,0.2451171875,0.3427734375
|
||||||
|
10526315.789473685,0.16475843920932254,0.16256330812406675,0.1471649312576759,0.21082231146476937,0.3008523502508359,0.1596630128136632,0.18949345934754264,0.16595068268498556,0.19173336557404155,0.3226628105396049,0.31468453235573385,0.15345122741530146,0.19923926060219552,0.16538634524781287,0.15958781519755103,0.23200964993717269,0.20352390268172588,0.15948697932869443,0.25286952940711027,0.17884732349427454,0.1445018578104035,0.24565075903387942,0.2509827204358215,0.13632122103197092,0.21598233674701894,0.2877639495765073,0.24565075903387942,0.2625746528857963,0.1901329944008275,0.27567740622649894,0.26773502159647006,0.17546951737760508,0.1581785170325282,0.17931013820574224,0.19231855704183395,0.1713626259251645,0.13160965779481504,0.13600401733060294,0.24650153426912683,0.3472587210319709
|
||||||
|
21052631.57894737,0.7925045615748355,0.5630920124846481,0.6950014542344535,0.49077717154970457,0.5689213084382033,0.2200191582338962,0.21778334937267357,0.4004401843633678,0.17800714698854908,0.08284716196667785,0.31826128655853697,0.16810957678797503,0.4033910693224117,0.14731019883935143,0.40518423064593795,0.49110941477429504,0.80587427239669,0.28817686149618305,0.3775167755803243,0.4189350571989022,0.6233305891464952,0.2653036989333557,0.7989477543289312,0.21880058964864038,0.706829921690711,0.5544212531515107,0.2653036989333557,0.3322725692284074,0.3443338177540956,0.1967822996863368,0.6891605292661038,0.38284326590329326,0.43524436739343025,0.28479427115738887,0.739708654768249,0.7331065286229522,0.32104946041371357,0.2075156055989358,0.2680915293601081,0.435496972208208
|
||||||
|
31578947.368421055,0.6475754629541964,0.14571632903038312,1.2066467475362765,0.37095567063941864,0.18533769538858272,0.42766055109758466,0.2752074270697513,0.20049200559917246,0.5050318577943418,0.22435883075576746,1.380596242783143,0.19614995285414602,0.4484024946048979,0.28610992167464927,0.7551901043286945,0.3142476993254348,0.6037904575590943,0.33533248478686045,0.32132358234014535,0.38949291461722674,1.3610656558641772,0.4393512588458708,0.5167427327163993,0.3133316383467487,0.8130619572140175,1.6885346362465303,0.4393512588458708,0.24269543328113502,0.3343070497803411,0.21428112970494834,0.5474694164836176,0.49210101679751744,0.3997589650246577,0.3478107425975006,0.20216860467377132,0.7321395345672015,0.783700459551613,0.5459609995918592,0.24637027592540117,0.6135906023992397
|
||||||
|
42105263.15789474,0.9944755681003559,0.49971903328089834,2.449571160398362,0.7257544393354505,0.5020932649311266,1.0112761500139318,0.6091996295960656,0.19593802581533504,0.9587020345672016,0.421342115322969,3.89199744565335,0.5073503721453807,0.45322961358152275,0.5812946330147106,1.2126524441790385,0.42372625884587084,0.9366905656217537,0.2864681465804082,0.3970842863384047,0.5450350267404995,3.9674184698807573,0.28084803023827043,0.5824551040776218,0.4568897363551766,1.309953145373231,1.2081189987732106,0.28084803023827043,0.2714977211569155,0.49561317211373035,0.3573603431934135,0.6778673504858467,0.3867399474590439,0.7534618853201827,0.1588590732902041,0.29717991160553914,0.9121374410275278,0.7950278622952195,0.9647056706394186,0.25135085721425404,1.0601271420634686
|
||||||
|
52631578.94736842,0.48655342197154033,0.7309763636285248,2.781361138721582,0.5829015293279843,0.7452132866983598,1.8104764140543845,0.15890281814617463,0.36336260555193367,0.8197659257376293,0.5689103900560711,5.588392791325366,0.7706739578881092,0.8531122815245736,0.4759909981175473,3.2873511327600875,0.3098878596297922,0.7088196508772155,0.4845670795176498,0.9125889226009971,0.7796082641939708,7.270052067130557,0.5578903266927873,0.6884916020232225,0.5075089858839716,1.491553116373078,2.6708549171933837,0.5578903266927873,0.6377059112295219,0.8262337502350107,0.27412831155877365,1.0517072466271735,0.5382079708279005,0.901442089239316,0.33363230803006244,0.6568204953729941,2.2373502208255336,1.5775630903376106,1.7442312768952006,0.4072204045641786,1.8016450755153666
|
||||||
|
63157894.73684211,1.0420178019769302,1.8103189970317652,5.545044193637671,1.7648259635777368,1.4199973922687221,1.5551879967348736,0.2229905537951357,0.36925956747208266,1.1995302342972276,1.5327045356137612,7.523034003302663,1.158937681414744,0.805724564021314,0.6856986439459212,4.756751147663824,0.2955948996081576,1.8540393797644628,0.20788887002791734,0.8148117171099976,1.0256419036527094,7.311451592273663,0.8515860539063849,0.798549905708292,0.7053573745769806,2.282229357479022,2.193101169660153,0.8515860539063849,1.109277989395438,0.8124757116851382,0.6429493764100647,1.667533314459212,0.5593464130179703,1.7188528964394019,0.3305101738081744,0.9957177856952533,3.8924312116036464,1.8392909940259934,3.733806340648198,0.3027439434442493,2.8920462706082417
|
||||||
|
73684210.5263158,1.3830839933781087,5.100327803487595,13.43898136001545,3.7048471086243184,1.6210746844389439,2.6957534541713897,0.44708755472030015,0.20198059874558377,2.7214035525546514,2.2359649969930455,11.240130662257652,1.3772276379064843,0.8208262477885323,0.824473415385323,3.905194047415355,0.20836535054891064,3.6135144458252975,0.5216871007987999,1.031909335022818,1.270225081087149,12.81709700344012,1.2153364192085592,0.9017106925351467,0.46933654140567505,7.1658145714334545,4.9027279565869275,1.2153364192085592,3.705652979272226,0.717919415714338,0.944001266500626,3.466930384120784,0.5417748131580301,1.5883855317768294,0.4837573730384216,1.7244084376707636,4.0685841858882315,1.1316749197624396,7.66055482246209,0.5972840634079195,3.450969931161305
|
||||||
|
84210526.31578948,4.014672276716154,12.016582013497395,23.181006053808325,3.612718534601692,3.1542508450241304,2.7617867593950183,0.46156744283322143,0.19719761560498184,3.8793698466715725,1.4529278403834294,14.404343877142487,2.9885022488327246,0.6980767289687391,0.755399603592722,8.341413820219175,0.31318774447876996,6.770764527888841,1.1650599640822479,0.7818840639743118,1.2950600494638378,16.08718688204018,2.8845401034791056,0.7440910656366322,0.7709036956533503,16.82825472084109,4.51887216330235,2.8845401034791056,5.652058567036551,1.4381194392069556,0.643355076332832,5.181146996833611,0.8622281399460051,2.1362054882947765,0.5899076382539278,2.9367576873863834,3.752791703242675,0.8795601377196589,9.607740534309535,1.2595998705919431,6.307293815295783
|
||||||
|
94736842.10526316,7.206987671574727,20.472733000969292,27.658756911259278,4.630834341709634,4.179353563409102,4.193443348533229,0.39152398466073246,0.34336410004676543,5.037000735380643,1.9169747439778082,18.562820011889176,8.97109312496027,0.593006905426279,0.3599191464875874,14.953387540463265,0.4500801239647694,13.060928857227442,1.1082677946856807,1.0216030215952865,2.47150509905617,17.433356670791753,4.715693341728063,0.7305781240278334,1.1777716184917248,27.34507172920037,5.5128052677143975,4.715693341728063,5.500632465711261,2.582225485191451,1.1256173786364103,9.04507803982975,1.893804344113844,3.3445423342844784,1.0345239982710652,3.8706329958590775,6.168476234182426,2.845837072652463,10.392869431556427,1.6238624673140676,10.91194662955329
|
||||||
|
105263157.89473684,8.615425152131396,16.700722343043278,44.27714042980585,9.23390510537948,7.044667981007753,6.372787887700047,0.6249945526968409,0.35853823400270246,8.856565808325263,8.447798821404369,21.272593569557422,17.973017911831757,1.7495619596867018,0.3543151475055726,13.826643724520782,0.5491730275246575,18.733312997791575,1.2025583536671138,0.8180130512100179,8.685578496832596,22.984352798673253,8.626647954502264,0.6113759672212469,1.0337551465655297,35.809171766455485,7.8992716612247875,8.626647954502264,10.082749300716326,3.2425235209372567,1.1161777200461096,16.167401453794866,3.1296436013937656,3.4180094277759667,2.1925999385168016,4.249838710161457,15.802954702826417,4.4951492795653625,21.30028856229914,1.143182014824611,13.192193160757133
|
||||||
|
115789473.68421052,9.898732985816173,33.9311675444204,40.799380836064145,17.085271122052728,10.80816331266366,13.7724383821778,0.42226769719427654,0.5424167522102841,12.583843027785875,12.710444487363018,27.33187628716973,19.28526973196014,4.420468710796324,0.28842294117090117,8.44861547306304,0.3847382458293208,15.227359153557355,0.8635894532348974,0.6124045168593979,14.121717038247063,34.8746522602282,9.723575922260654,0.6863716923298928,2.341130198534175,24.876571800570083,7.922004789526773,9.723575922260654,9.378431602858441,5.896153542473705,2.052391723252399,16.594550243705264,5.894031205005592,10.69688388581421,3.521378897563902,6.193912344956331,19.67007807153084,5.128197794145494,28.35557448897005,2.1761116704121846,36.51867231701879
|
||||||
|
126315789.47368422,19.2578698683974,31.084221160973183,50.78754471543756,32.0201824325604,19.85560531827553,10.872660935420399,0.364966754437814,1.7530347871648324,19.50133777523306,27.96229583867041,12.770933029724262,30.135337607682267,5.330696095390002,0.314728987844367,17.700676323634454,0.43210962142310344,31.615508378047394,3.5079920760812575,2.3745504257751664,22.52807503063594,34.323667933075704,7.553489547687222,1.135703617846207,3.462618587419928,31.315759949406782,16.31396907056139,7.553489547687222,14.806945304130926,9.015886724160321,2.898692165385323,21.914943642233226,11.082360972988315,13.824822888149777,4.217304145200101,8.77485626093899,23.833055480365285,5.954192534047811,33.394083134025095,1.8438913234383096,48.044342484830814
|
||||||
|
136842105.2631579,22.636791448513886,49.85415743724795,74.40764710909774,25.756220624717635,21.905454680529985,51.89257555721215,0.546974485930974,2.8361672853168693,35.280834916555996,36.29879818860844,20.875748681890027,57.31053744921066,6.776159595584607,0.5280735459684337,21.09539140294463,0.31771365823507947,36.920059040312616,5.794329294537574,2.7835130929286453,26.970919955140005,49.179430689507925,13.340963487810054,1.8276130406810314,6.754651193803701,62.92153591462453,27.47999400247168,13.340963487810054,32.18803154266443,6.306751879958891,4.95727184802872,27.558249251664183,21.43056178687353,22.402516034831642,10.409063709079401,14.328873272417662,28.695540142851854,24.797155361756747,50.35903336268715,2.4002308277542257,62.39681279031856
|
||||||
|
147368421.0526316,19.679168220372077,45.91240576429709,82.62553129962276,36.09019663063114,39.86967425465255,34.59419177575782,0.8402443935996611,6.6791896820068395,53.77405348907219,27.635101984082155,41.16208487006107,70.66680231384954,14.320707321167,0.7969724491362429,38.61753103937801,0.3336952135503458,36.217906164660675,10.376314918750545,1.6846200726369072,21.955583213108724,39.30254156371562,22.60398905429154,1.567710887031872,15.147874599678701,102.52527457789374,56.207678142346865,22.60398905429154,24.089488016271183,15.12185784654275,9.406208175701451,45.29535850461502,22.676075277566248,19.71807641533933,12.892043914160901,22.201287990791982,21.644894444050873,31.721617516388193,30.133124502081596,4.7340741514168965,70.84579688095981
|
||||||
|
157894736.84210527,20.93860469564507,62.022362362975244,85.7289114500347,69.55898959088526,26.26827265615277,73.04686395481356,0.8089229023687726,8.604140950041796,64.26762188570652,30.216566548122927,50.80094005658687,75.29341152962556,22.38469854286173,0.3882291561348613,59.69005033382089,0.40628179618856586,52.88322378723907,16.521912056983677,1.1338901942456525,30.577961406549264,29.94445240200391,31.17389853046872,1.1057848758644673,19.46067774394873,104.03078655390858,41.552028037834674,31.17389853046872,37.56119133428855,16.692208673154877,11.793678569001175,46.478927374546544,18.78170444034143,31.220727645789495,14.545244100681634,32.50277000120803,27.967346059318402,38.47296557069816,37.630641369278095,6.612353773988846,87.07085215550052
|
||||||
|
168421052.63157895,21.833397904921767,44.453782226900636,99.24632165966932,89.49713815282257,34.06695036742826,87.55973976536801,0.1390235034382571,10.39505617770462,69.08605289195053,36.850569030254505,48.081508393432955,48.87818507144324,40.697050818445945,0.34828177581533504,22.084190558858836,1.0502881079169195,53.87098237352028,16.334211861988184,1.9927300957761647,29.734603966372163,83.20763551495415,26.171153525566456,0.5283944085033974,26.566136537166187,96.23106113761415,40.090914316784975,26.171153525566456,30.54276045273545,26.484150218171102,16.057352884984745,57.90814340015528,32.40687594849648,39.966278287512445,14.001846355744675,36.12048597679244,34.88797026657993,31.660333818345844,97.46280114340323,13.646429680060816,73.59145023419916
|
||||||
|
178947368.42105263,39.62290905453162,72.01485320537705,118.38136280672703,93.51537311902668,67.66455685729136,30.89697310270693,1.4005186630418098,17.01636497201682,93.24313926696777,72.31672083307834,69.29196774596323,108.4303757067863,42.841104417626546,0.5671681855854236,71.44353934734484,0.5683066534533724,73.4847314126604,28.15911824551316,1.0777454349803128,43.60697280535078,72.00125672612494,26.871987110359846,1.2908000629034073,30.494818246265528,114.58213631268023,42.717250821332854,26.871987110359846,98.78742092269941,33.78274945605165,34.93246751346747,55.59523259899953,45.13029844278775,56.46899472709508,26.481303296921332,60.838417283055534,15.159094945215447,63.26216114392903,76.43822607373266,13.146239798485077,115.92416219367877
|
||||||
|
189473684.21052632,82.69990513661561,83.0333305475124,157.83095485549885,95.52357798864307,83.45088402914539,138.61260967307473,1.0363624551619848,15.196170442322288,101.8697741289218,35.694249816218246,57.25018566268964,54.851957389852686,56.98084063543177,0.43187644937362035,94.84579836034379,0.8730964819149958,40.54276743754126,13.943557649438072,0.7616605732249431,70.40712352868923,89.06160556972853,42.06132338780115,3.9075127281970925,33.37288742910793,233.75306191272682,48.45792490359489,42.06132338780115,61.43184522908811,41.0789123440053,20.362468804018654,60.239098646634176,42.86614551174343,62.56966509515229,59.172065502388655,32.57754192722141,46.780091901234975,65.14912636986729,109.84514912211664,30.141611965739497,108.45218757248982
|
||||||
|
200000000.0,87.70932333092941,126.6426611950523,189.91159408970884,145.40460666857268,65.90140613756682,141.93519145564028,0.7105246594077662,37.30758431083277,116.48193359375,66.90076230701648,80.59385495436818,96.4539737701416,46.427630173532584,0.26785945892333984,99.67988074453254,1.172507737812243,84.90093833521793,17.944659383673418,7.393907848157381,40.68786365107486,117.31464355870297,46.02576140353554,4.258270364058645,66.42470209222091,114.95296382904053,86.08775515305369,46.02576140353554,94.29793714222156,55.6609713905736,56.793220269052604,66.8853511308369,26.22753499683581,89.84934094077663,56.36625440497147,31.966515290109736,56.7328139857242,46.12269250970138,128.4853956322921,39.81086439835398,144.08709255017732
|
||||||
|
21
results/AcrobotSwingupSparse_full_medium_data.csv
Normal file
21
results/AcrobotSwingupSparse_full_medium_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19,trial_20,trial_21,trial_22,trial_23,trial_24,trial_25,trial_26,trial_27,trial_28,trial_29,trial_30,trial_31,trial_32,trial_33,trial_34,trial_35,trial_36,trial_37,trial_38,trial_39
|
||||||
|
0.0,0.33203125,0.2978515625,0.3330078125,0.0908203125,0.1884765625,0.1005859375,0.1171875,0.2021484375,0.283203125,0.154296875,0.1748046875,0.408203125,0.162109375,0.2685546875,0.2568359375,0.3017578125,0.1552734375,0.2822265625,0.279296875,0.2353515625,0.0908203125,0.326171875,0.232421875,0.1708984375,0.13671875,0.171875,0.08984375,0.146484375,0.1689453125,0.0634765625,0.236328125,0.1982421875,0.1953125,0.28515625,0.2314453125,0.3134765625,0.130859375,0.25,0.0966796875,0.185546875
|
||||||
|
10526315.789473685,0.32483264680054047,0.3168448001724201,0.32691668671584195,0.10045536601312273,0.19545367021639923,0.12489506676586712,0.12820690028224957,0.21792999066804586,0.281708030489343,0.18779806681286926,0.18560259229918927,0.39945405341911844,0.16293998306147608,0.26700421911857797,0.2598261265213139,0.30951015440711027,0.1767584993568484,0.31290368690385056,0.282342437892079,0.23645903991530146,0.10106447859153854,0.31902864567130557,0.23917748723333893,0.1780970406994595,0.14679679447924332,0.17674790062732643,0.09615637126721835,0.15988485172514771,0.16811470443852392,0.06812796764426615,0.23455616113551767,0.1988513000784158,0.21425036380165502,0.2921887315871643,0.23814555086257386,0.3035092657622868,0.13534465853197092,0.25016612161229523,0.10354604747486908,0.18670972628606652
|
||||||
|
21052631.57894737,0.22102579978034106,0.6066760329988855,0.3322360125935309,0.333392705943776,0.3031066091437089,0.48544076935406205,0.3072042755803243,0.46806500294862363,0.2539564962202162,0.7089026760196422,0.3567067989021787,0.25811466417814555,0.20643820723007922,0.2354520549404324,0.3159936815087485,0.4239686083595508,0.5076002295327648,0.7554645696835505,0.37974335942572174,0.24624448866064857,0.2785722304579294,0.25025329167162613,0.3618620399623036,0.3030567063519169,0.33250500686941387,0.2565942748431684,0.2779200598803914,0.36332089749069424,0.17855063039510205,0.15757931632678596,0.25923377060824154,0.2092058704830603,0.5108020021644656,0.38647640048632,0.389700057433913,0.1424574653857963,0.21749178392404994,0.31395169308311066,0.2618510386289982,0.2770439898208238
|
||||||
|
31578947.368421055,0.3153028699499748,0.3760965352573553,1.2761542539517308,0.7925575314466311,0.2778383609000336,0.7975956422800513,0.2608122574655633,0.33870003560243217,0.2689887968787197,0.42046535840655297,0.3855589895697513,0.31859181264100644,0.4079101858376796,0.20421778132050322,0.31438099446389145,0.3142015504704948,0.3951584929574561,0.23804505842214146,0.798774468271356,0.36426877843376043,0.3155448760352306,0.5999050378138998,0.4598255817909981,0.37547922794838695,0.5081861078574057,0.26168901438197933,0.8005170161704276,0.34715722208208016,0.37248322441967574,0.29381577129839537,0.6774674175188482,0.28137306361317316,0.359399951395896,0.1980203485885155,0.6351812605712551,0.20792817409018735,0.36457776362876165,0.6969907924409057,0.6679103090492313,0.7610973215499411
|
||||||
|
42105263.15789474,0.19290715901805425,0.269974927822969,1.8890065222235597,0.27080006480547203,0.42508496736225326,4.282658658859803,0.2829440425967906,0.3337174412946623,0.4273409433972472,0.38146301649944275,0.7645147751573049,0.434962370389056,0.29090590754374246,0.285881581398919,0.10250010186615416,0.28706766693876057,0.7192589397905935,0.27025520768522227,1.004508517785746,1.2163723340655297,0.2802953218158923,0.3364590990906607,0.5748237652131395,0.5539734541874514,0.7181774678322748,0.36246671174701894,0.518980602148167,0.8323074087211629,0.3776070127196589,0.4887440647114677,0.9923393164975493,0.548584058344199,0.3177550466437089,0.357166913737881,0.37126705983338926,0.422119066655801,0.6651255219266685,0.22144759328741775,0.896673387437646,0.40042036061802067
|
||||||
|
52631578.94736842,0.25633175815571707,0.3007043352417669,1.982854586889209,0.28929869157785854,0.346788398446799,4.640484078108769,0.5248908864494176,0.7023599973345728,0.33446735688523904,0.42052860735526043,0.5691076210000838,0.3497898492786693,0.5105846278224956,0.4141455726940546,0.3324469093470692,0.2871746845192526,0.2989156384877551,0.25923480089351414,0.9541261942432857,0.7953873895872333,0.4166835219576088,0.16949016962024974,0.8531235908867579,0.6909701038265492,0.7876658373592302,0.36100270279226543,0.7829997361201659,0.20907461213933465,0.3531440777131395,0.3963968615122449,1.749352885745569,0.2847635052540956,0.3001372741530146,0.8835159233072127,0.6799165271325784,0.5923577781529308,0.19995296298632,0.2648429447924331,0.6243949847868605,0.38962451638937656
|
||||||
|
63157894.73684211,0.18771124018196253,0.41903319319199367,1.6856210661066537,0.2883788766623204,0.5540824847868605,3.662441026470999,0.3429422827638746,0.43071808643288245,0.2756008348966899,1.056775021751172,0.7039265355244896,0.27706140956720154,0.9086700608525582,0.5890073274311266,0.47117344758517177,0.6422448514901369,0.45912554521639926,0.8386874476298076,1.2766782332655464,0.26387560730825843,0.29502271086885656,0.3616634590804083,1.112771919229354,0.8366624879704948,0.5449803159507687,0.10584132757213308,0.5443253979458371,0.25610344165580107,0.39880904728686045,0.3900155677689742,0.7875540831082416,0.22952089283274804,0.2832721646802907,0.5668954135968748,0.6702776721309753,0.2891708437425607,0.2133459318377636,0.45507606442945475,0.5604566378606654,0.3856180936015542
|
||||||
|
73684210.5263158,0.16900547059288976,0.6859539486364643,1.96036018361015,0.3662618705770646,0.5802066596921462,9.480657101998373,0.38026601257746917,0.5093280813370383,0.2145521845513764,1.37036766041679,0.485417535100287,0.32364724811754736,1.0526359193542987,0.47608224176634045,0.32072097112597525,0.5349132136294716,0.4028851728360078,1.334013056556934,1.2206429669071102,0.3810699283251141,0.3481006146798174,0.6800440315394521,0.6058214800509718,0.8670070257213307,0.4502011270073973,0.26811172030969355,0.3245588445267188,0.3081678353518329,0.418731348666458,0.4965008236364643,1.3588814907126814,0.263484579704475,0.2214841736981083,0.8548473215499413,0.19819466493136328,0.23619517328996747,0.2982576296270058,0.3924844773522375,0.8822482101144555,0.23536319151479457
|
||||||
|
84210526.31578948,0.3034135813197932,0.3512483633786357,1.3963632768541159,0.4689537323082584,0.7979349786224789,14.164057702569089,0.47443263088236876,0.4813205792963341,0.4207836161690076,0.43259186889986573,0.6113011368093729,0.44130575755956764,1.048298674607211,0.29916787609829465,0.3151565720830267,0.3703154249534712,0.4851461606012488,1.6298015520513225,1.079589991688398,0.3172512107278501,0.3507147918447563,0.6799127256440983,0.27968618546166246,0.6210444865134284,0.3468765512397744,0.38244605658787434,0.37698835547280785,0.21172436320550556,0.49768317901526793,0.6269357528052504,2.1927492559121258,0.4238875962360414,0.31735445059567613,1.2255005559102323,0.13441458831533504,0.5421887553629784,0.27500985269731437,0.4239569793447563,1.3506467824497381,0.2608413062267356
|
||||||
|
94736842.10526316,0.42417369060569193,0.25796802288277326,3.7093396411377975,0.6298599216746491,0.7482683467072463,10.994220559286608,0.27164571239017055,0.5853092267572715,0.35743276960631815,0.4231947716583506,0.8141537082492479,0.3560446926761532,1.488667884361711,0.21109883739017055,0.30298219559265305,0.4064260572607828,0.8439807442746995,3.2509093588409,0.640345016014543,0.4061580694943584,0.19467568859829465,0.47928197760331004,0.9432084117899973,0.2692690767409729,0.2843844500935309,0.31267878091236234,0.6267083952631647,0.2914189684754263,0.32909265713678504,0.9214712893203355,2.3621894989647694,0.3826914969573721,0.40166969510656975,1.2336218984503495,0.1581569761450601,1.4035378057210401,0.23811682175401172,0.4523128657459883,1.1867794290473916,0.2166010177696841
|
||||||
|
105263157.89473684,0.3910283434754263,0.27153565150548875,5.3618300535672265,0.9706816316641598,0.8912686799701891,10.375362961576256,0.23942768804914732,0.30341387719658935,0.4643727881096076,0.6525064611038673,0.5487063875488958,0.24598231540162147,1.5324709250325972,0.3477711188826204,0.2980214380491473,0.4811711667977542,0.6395673778248626,4.643279950044161,0.3999377221612059,0.4754939832185444,0.17852158163392973,0.47963198400270246,1.3341740267428666,0.6560815457161773,0.30144298902178734,0.405022206398919,0.6214440998278166,0.26901647192619516,0.21565931853825365,1.5403868075553069,2.1788756616227847,0.4971144245602087,1.1098502256863663,1.134236222158839,0.2579574479290653,1.5722607694504334,0.17147337366669463,0.9840968282599197,1.0413588103825366,0.4419969363225794
|
||||||
|
115789473.68421052,0.3355929409037667,0.7794080374973961,11.520479833650455,0.9612577470055579,0.5783645307588448,21.252192354598527,0.2211666265683161,0.6031465292637366,0.41975501897922857,0.3417756299893283,0.4560752218780094,0.319640849105539,1.2349902467384233,0.6211043725053358,0.30107931681286926,0.8204862008134413,0.3221528166879247,5.6784187802977835,0.3564186439619831,0.2484433499069426,0.3598323674083086,0.5739709531831609,2.8284233283468225,0.5296857944816105,0.6097141355688881,0.5293105645853396,0.850388410679191,0.21781308499069427,0.21229142429425776,3.2014772726888467,5.442512189912662,0.3354763548790254,1.0993655804451814,1.8291128956379983,0.30783218161881465,1.433882615572858,0.36659892179959364,1.2793662581087148,1.1087925559596012,0.34890656722219376
|
||||||
|
126315789.47368422,0.3268575589082248,1.8058586648956902,15.014279838414073,0.5682704640227337,0.38481008048863313,49.53502071729331,0.3607484283869949,0.6275095635834159,0.16399036946389145,0.20528185730825843,0.5678965602885323,0.4437945146639923,1.319280587405049,0.5252231058981938,0.1923786913589095,0.7197056609177518,0.2107741020392842,7.531840876529094,0.9573936673742923,0.2059463437574393,0.5251315425967907,0.7909900105230698,2.0790736021427536,0.373855622521398,0.8934357450279173,0.3943233965506511,1.5405289605053516,0.4067295951526254,0.3565003667181551,4.914086930969747,8.72921926228954,0.34807431334603883,0.5009980558358397,11.709253126234245,0.3939006203099303,1.6896886534968243,0.28202930778017304,2.5007142360190624,0.8111203899013694,0.7195923084037135
|
||||||
|
136842105.2631579,1.374333275982549,3.7887927131969863,16.22817917873985,0.4539508238393514,0.3851009353045941,71.47758424711361,0.30119516842913413,0.7542057143023804,0.21379405045443306,0.3964440644282715,1.2784485988669787,0.686041853104272,1.6512646820406507,0.49833093257491945,0.3024428333271906,1.0183780384856254,0.4503047340794617,13.654052610212423,3.899136223621319,0.48740316692151553,0.24915430486367307,1.7978161550294671,2.2338535435642246,0.3523052511452967,1.3312188198691925,0.9042306540745455,6.348607520317443,0.7039771727247585,0.380430049843405,8.306098599843375,8.641696145329778,0.25733393513264724,0.24635145868951247,11.307902201390988,0.2320273893361604,1.5378476615757821,0.18065859142102692,2.158244003549506,0.658203125,0.5036699870946989
|
||||||
|
147368421.0526316,1.5809387883321069,2.942739793138159,24.46977044208559,0.19035068839540747,0.301262514743118,92.9651444083766,0.1802890809288975,0.6619124637085974,0.19759649451089362,0.2397342692451792,1.025595979347123,0.5971677732599738,1.6888976691502282,0.459666968052407,0.5338327719564255,1.2614897466432355,1.1703646176409528,10.691914133087746,3.6039492007437826,0.37673535967797767,0.42433063475379024,3.383265249616883,1.7619237133670702,0.45273637639518605,0.7433922838966595,0.7100183006138678,9.00354647438282,0.4848791317926546,0.4673639598645663,9.506287693647137,15.884732909479968,0.23525210034484023,0.2523373315869276,25.088568727065336,0.1296907092065362,5.779394598878986,0.394397291780509,0.7482237723395424,0.502381052667084,0.6993510861806265
|
||||||
|
157894736.84210527,2.9801674866610295,7.916251938098689,30.649839908462486,0.5139713630781944,0.1331549747498741,105.55140764495343,0.5455583905249094,0.9567602427052001,0.2646005952787532,0.3000248330451776,1.2309275148978196,0.767323163737881,2.1125351358979034,0.23736857575392775,0.9186743052052001,0.5886688311674584,0.7597422956429686,20.325805722181165,7.5241419324584315,0.27535882881143414,0.37027918797120485,5.258753063275875,6.455622688885215,0.20460603772107894,0.730484500155885,0.27127240040956085,5.145708179209699,0.5080220468156557,0.35044543036463505,10.744365034341152,27.385786008966935,0.2085704327950517,0.21080486794257755,26.10407848041143,0.2019546408402293,4.837990808354849,0.8756125944142861,1.6123513462140628,0.5092587220041376,0.6135574087211628
|
||||||
|
168421052.63157895,2.2012294864390363,12.794544420744245,67.40622362469705,0.3322749256757486,0.39704738627510405,84.11273159544882,1.4717977423416946,0.6923321752997315,0.1968099747338123,0.44612193437824627,0.6601059086765276,1.862549927095958,5.20887955007791,0.29065605015635826,0.7924594192293541,1.0231935284474551,0.337568246096455,20.755509817699316,4.054029216396509,0.2967249481961998,0.4415703348175641,4.988143075536162,8.905153015643936,0.18628280512844098,0.23633146418098572,0.6656273923752383,5.213852266855848,0.7734655660275278,0.25874666758191217,22.028875715514637,21.01273990536,0.23670173285740567,0.24812891740878204,34.86719265670988,0.3735250964389284,4.803399252429233,3.130596847745521,0.9514161424293408,0.5211220527289647,0.3366142547691957
|
||||||
|
178947368.42105263,3.8616866114397137,9.760375588224203,79.11486676393123,0.12495895501979495,0.4003998500158252,127.34924911594129,1.1208985082990903,0.6215254553797502,0.1734148696519001,0.45145352857595006,0.5392217768196254,0.5643175553086717,10.181054773092931,0.6368100649762353,0.6783605638963693,0.48419990169704763,0.32069054865110613,22.161459478975335,2.53295685511877,0.4315944999208741,0.12075598814480842,10.938467210679836,5.85453545683969,0.3720761507832113,0.41795613825156097,0.44268630284021426,19.249580298764556,0.20200854589404138,0.20394397904668157,16.98866281218806,78.90181455849942,0.468042479327511,0.37248187448179293,61.57476029435685,0.24279693223102594,4.159137884335505,1.8680561678561471,5.944719874627705,1.4571920431882064,0.5195630699643798
|
||||||
|
189473684.21052632,1.3317146037093823,9.608790579925284,99.66854551954613,1.0431200804142409,0.4725663087374615,135.6688920916613,1.9935955301216102,0.44902561642126365,0.1685823033721163,0.5113464683046631,1.5655018180361084,0.7143379486168522,13.703403087203853,0.34229869789694156,0.42022511701504617,0.7113548289375622,0.18600527269358122,29.755003783841545,6.139354283129411,0.3477779874511043,0.23627998690195698,12.614510528268578,10.36519875354714,0.6902294845792396,0.6257143892409729,0.8616120874716634,24.394445683487234,0.557663703559178,0.21136645795235673,21.502688843787872,84.2396581021042,0.19899691108851553,1.6890823121216159,82.73021098319182,0.5526971315082752,19.676021480824478,0.30468205269684095,8.50576306710283,5.344249160005776,0.1122596402577747
|
||||||
|
200000000.0,2.8858423734966077,13.503776801259894,96.9629494516473,0.44552270989668996,0.20568897849635073,156.40123573102449,0.7199370233636153,0.3921194076538086,0.18664485529849403,0.21627355876721835,1.7926802886159796,1.2530435260973478,10.787358334189967,0.5791486940885845,0.720414211875514,0.5491467024150648,0.3646584560996608,36.99791065015291,15.644497871398926,0.6704824347245065,0.6044921875,20.147124089692767,9.650274226540013,1.3059104618273283,0.6453861939279657,0.33368873596191406,32.88845037159167,0.7028142025596217,0.1171731948852539,26.57983011948435,53.57619697169254,0.19112049905877365,2.0969105268779553,85.3630492561742,0.6357739599127519,20.90488092522872,0.4946517442402087,16.90790030830785,8.808686206215306,0.5628930643985146
|
||||||
|
21
results/AcrobotSwingupSparse_full_small_data.csv
Normal file
21
results/AcrobotSwingupSparse_full_small_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19,trial_20,trial_21,trial_22,trial_23,trial_24,trial_25,trial_26,trial_27,trial_28,trial_29,trial_30,trial_31,trial_32,trial_33,trial_34,trial_35,trial_36,trial_37,trial_38,trial_39
|
||||||
|
0.0,0.248046875,0.2529296875,0.31640625,0.154296875,0.2734375,0.2958984375,0.1181640625,0.236328125,0.173828125,0.1982421875,0.2265625,0.15625,0.49609375,0.150390625,0.23046875,0.33984375,0.3173828125,0.0830078125,0.15234375,0.1865234375,0.3037109375,0.2880859375,0.271484375,0.3203125,0.12890625,0.1845703125,0.3896484375,0.1875,0.1103515625,0.2978515625,0.3642578125,0.279296875,0.23046875,0.3330078125,0.203125,0.2392578125,0.677734375,0.2509765625,0.337890625,0.2216796875
|
||||||
|
10526315.789473685,0.25303052336885656,0.2818348480393682,0.3151880248431684,0.1530232759724033,0.27127791904016213,0.29373885654016213,0.20886646281318988,0.26102487136122265,0.17427111596612058,0.24669432441943903,0.2251227793601081,0.15901869353825365,0.5368489188830938,0.1704359662169565,0.2254851016311434,0.3528566096297922,0.3232524428010978,0.08483515023524742,0.16336315028224957,0.18967974813360916,0.2997240188049147,0.28160719462048645,0.27569278917814555,0.33570643607269035,0.14363569962350947,0.19442686149618302,0.3857168926756798,0.2030046838142205,0.15304481685987142,0.29547048605710186,0.36292883960163824,0.30105880621067377,0.2335696867628441,0.3363856186166695,0.19908270743414966,0.25321202793279846,0.6628941776349604,0.2595041385978213,0.3368385214554636,0.24250026290766752
|
||||||
|
21052631.57894737,0.3247519781054552,0.7037775140059621,0.2755410202322244,0.15343174709837853,0.25894083144592117,0.2796298050814388,1.5344859252676077,0.6107422466753593,0.1783181927540956,0.9530614076228683,0.19264429890217874,0.22412673654318516,1.153600248933829,0.47345062107920977,0.13438279212676918,0.5467433982278501,0.44637041805193367,0.13694093233990867,0.3770245686792601,0.26190059799236604,0.24535919358525582,0.18512439463607494,0.3808101590650564,0.5481622648371224,0.48889858108478246,0.37235265293279846,0.3824419829984121,0.48751695796723515,0.798936812170985,0.2401331957027192,0.31014318809614944,0.6102592647901202,0.28714970934754264,0.3766304502196589,0.14612239615738887,0.4649739780584531,0.3896576625158252,0.38885269270709344,0.32919248649618305,0.5444573431464114
|
||||||
|
31578947.368421055,0.2180345804737545,0.2539619672991893,0.17020524571807108,0.5804186105067715,0.4357177808344199,0.4126518281212803,0.2439287217370033,0.1284933380803243,0.17643138378280687,0.16602040922212466,0.18611258615086954,0.3673987401819625,0.64457055406227,0.2227263199655633,0.11632818660577586,0.3601498907622868,0.6028145581401285,0.32325278622952197,0.5918313948401454,0.40721117954835334,0.3053106218163656,0.25549636587211627,0.6572668968475426,0.21616793867623707,1.1649802543449927,0.4694845564147442,0.7458985558507183,0.6225512338146938,0.29157927558032426,0.1670239836885659,0.17164668350008405,0.20955862430984626,0.28296590313686887,0.2850113258467487,0.2701054993098463,0.30184326541720996,0.20336222186313113,0.3003184352885323,0.41007079708279004,0.21720021847542637
|
||||||
|
42105263.15789474,0.13598419358525582,0.3002172559912515,0.36671339970216194,1.5596519639287298,0.3951150677540956,0.22173984559288976,0.7765633643829261,0.11783216270383376,0.3005901769918088,0.17488672990878204,0.41149307420049014,0.31990915652457363,0.4778002054737545,0.1575888847710353,0.39087829167162613,0.28472248404970457,0.4007331235256882,0.37771741703276485,0.4939396401191352,0.3401148048464281,0.18324271331533504,0.1648678132371559,0.773753010335061,0.2722599526191352,0.6262277936010809,0.3841103635666443,0.25329544405527726,0.2946005739333557,0.24392835453276487,0.33839171348846514,0.5588118111988183,0.25602002553332215,0.3825469400083591,0.4070843619983282,0.34956972750930576,0.3018087455770646,0.2827495353043575,0.29542365034531354,0.38108940864203705,0.29717647732129715
|
||||||
|
52631578.94736842,0.17786528991530146,0.26807614640846145,0.2984565777131395,0.48377714923214055,0.414798430128441,0.23352047082790048,0.19221566060243223,0.16633317212979218,0.3055225488551766,0.3150263440245737,0.21540364664347217,0.2422626976161122,0.41017128952322246,0.19617490425004191,0.7063261095506663,0.21717013942898145,0.350719576066881,0.29652293303006244,0.5154834863551766,0.24064660006282731,0.3836147461571522,0.3076824970192526,1.2968951909495852,0.37243469534158047,0.5339568183032429,0.2966989665150312,0.49501291734690156,0.3737058903702078,0.1516590963770478,0.19563792220773457,0.22353813456696486,0.2891455726940546,0.8566958329684187,0.2955870483060293,0.401618418601081,0.34142361057101855,0.2852437634877551,0.1619545389740751,0.23120912092214146,0.25987705959837853
|
||||||
|
63157894.73684211,0.17999957605081912,0.27707303858199617,0.4620678920164665,0.19764880130165505,0.41907382341633204,0.2666196691032261,0.16284599198528943,0.23775896405249097,0.39197826121322354,0.2673063832967235,0.27476544261308916,0.24036597677214988,0.3899902491688398,0.3345182899623036,0.7855397221784508,0.31222928858199617,0.16956946460163824,0.6608618052052004,0.37667829244090567,0.18065346377047803,0.5571649715180543,0.5401549141162648,1.7198178299246072,0.5094227830458876,0.38991914891800383,0.344143775031177,0.2856922255328489,0.3368333938049147,0.17669834126395867,0.42355535301145086,0.3971519523049988,0.4327764643196253,0.3738525316655801,0.2834526389589599,0.37055026659344703,0.21014994993764596,0.4328284276819625,0.2617994186950853,0.4068888719722507,0.3450195016623204
|
||||||
|
73684210.5263158,0.21902311541697328,0.4048072122801044,0.5649795083127853,0.21383129394615785,0.44245359309822585,0.13812804816502283,0.36271556262494464,0.2710249707969602,0.42825595031484653,0.26083242463933465,0.3028328306457013,0.46015784879140265,0.37508101212350947,0.37426065936313113,0.3477154015834313,0.5182621155419179,0.182884488409576,0.9708832082986167,0.17069129468331373,0.31162429714467077,0.47253112581628165,0.29546229132654933,1.4554862051459228,0.477337734190711,0.3807958063986824,0.5142539993877887,0.4302764454046445,0.32755109411857797,0.3380550532459884,0.33365110149013694,0.5805108606650228,0.28719379763193736,0.4491401418754599,0.21764971080579262,0.3998526126724201,0.17420892107849978,0.40753316747184604,0.3288366655893934,0.8167627815394521,0.38297793475544684
|
||||||
|
84210526.31578948,0.27672950977103533,0.49609375,0.4557121889743118,0.26988640782575535,0.5044739741697866,0.23841559919954342,0.39831306267313016,0.24123213495904391,0.12370213643335572,0.25069490892404994,0.3359644881906271,0.5955277545960656,0.39878991039836176,0.3431641216753592,0.5136110300502619,0.534042590873063,0.32353787408971396,0.37822979110760035,0.13255067016939706,0.583048833704391,0.2897276733060293,0.4774314056142876,1.5770261558469314,0.39611930530157113,0.3430311461895126,0.5164658395867598,0.30432105658787434,0.3429149511448234,0.4549861707185444,0.34398346792627904,0.821226547959769,0.28823873673119377,0.5228529953890559,0.3338640588142205,0.4216230820420706,0.4141558279951524,0.4650375704355848,0.36534340559940914,0.8170744666102189,0.3284363891939707
|
||||||
|
94736842.10526316,0.26697892429425774,0.4612082114180039,0.39491714995323457,0.25240225012612805,0.41098619498044175,0.4563544714549902,0.23331195429751747,0.37045730580253283,0.3072545455763545,0.261260719510657,0.19042387299260274,0.4474874639114845,0.8299811143954376,0.2510917510352306,0.5663430248271065,0.279371729187688,0.36818113881795356,0.4780982960624378,0.12739098831557172,0.43092005404739164,0.26806486082209113,0.4701555204523567,1.7963611808840256,0.2634845559286609,0.2651844130328488,0.5682158483362595,0.3905419748575734,0.2895030869671512,0.4257265629860833,0.35372002038929273,0.7144280431013028,0.26426798484992453,0.43419567435732176,0.5150059993247247,0.30494076789581215,0.46327652495323457,0.39209956013264746,0.3391553424401957,0.9518784604904725,0.3575746742311937
|
||||||
|
105263157.89473684,0.24289163393987512,0.5008278844098966,0.47332485759027115,0.2709853470820799,0.3951205150572547,0.3517679018987513,0.3550103998580467,0.301561588065446,0.5977817176121424,0.38230287335255797,0.12546178442619516,0.3411689451856957,1.0323205298003728,0.15366006359829468,0.6383498870765073,0.4585578210466126,0.3223275002350107,0.5460966749534713,0.15379441279783804,0.28494569667488584,0.26985495506561363,0.4252097481175473,1.820428790147945,0.2713144994508527,0.3380215399153015,0.6999406471146771,0.5722037497649893,0.46640211723517844,0.6511723843307706,0.41687530559846236,0.2804733920955922,0.6346997237271549,0.4913859644755102,0.5701179927075669,0.33130047129792184,0.37445514287975024,0.3506300495602087,0.4928154298143043,0.6927053921770853,0.38953700290162147
|
||||||
|
115789473.68421052,0.3960215840643462,0.3349236464566472,0.5759203004704948,0.2933789144922822,0.21887885434475635,0.3572403942118721,0.2640881736522897,0.39678315244553153,0.4300108140855616,0.26592542326021074,0.15344303268474885,0.30311858158692756,0.5973605396344722,0.11043770227405833,0.3837052791732831,0.6310613901661374,0.30808375614831984,0.3443751586110969,0.2165917927538589,0.1960237983851552,0.3364871413423744,0.27921993646595294,3.6862943046971366,0.21039092573762935,0.3821141805675221,0.5022108336895127,0.47497989200158797,0.8974585810526585,0.5633630435552625,0.3803920957190179,0.30859068291999625,0.5726148969909162,0.5711930108532681,0.6564062335154356,0.32955137448297644,0.33839377405901033,0.4429919250784158,0.7014173153694977,0.5292815395999813,0.3206631932562408
|
||||||
|
126315789.47368422,0.38875902128351675,0.08688020574088884,1.105062754200437,0.3690640774460052,0.22917157329020416,0.5642152400558347,0.5831729751544653,0.6456626723017391,0.27235053318689406,0.2748512627345375,0.145597019354062,0.3213591324655633,0.7293305542330337,0.28322844360013444,0.35445597429354786,0.45018913078836426,0.17931183157204905,0.525398867282181,0.20267072344750903,0.738552399949685,0.3066491631589767,0.19045088495904394,3.86860421093547,0.29548007827716544,0.4999022549869613,0.3194634181310594,0.5797001001247081,1.2835128446034783,0.25055849915396117,0.31238959568689406,0.3922171764426615,0.41658236643614216,0.6757747724115686,1.259228027428287,0.22270512250651936,0.22567308378351675,0.3357125702326976,0.48903729975058385,0.4417398193866588,0.2923849322459044
|
||||||
|
136842105.2631579,0.3119756534819457,0.13614315075227112,0.7586542985445898,0.7804659317735164,0.30378306803610855,0.4147002941353497,0.5311460494995114,0.6222439419859993,0.4090781885501092,0.22487255476848556,0.17107106047654091,0.4090115290929737,1.6516287927812496,0.2456370694485396,0.39678585232129715,0.23052000272967463,0.11553585232129715,0.691115714836649,0.2565563444945952,0.5896184292526451,0.38927929421210894,0.3687431198077851,4.022914598523085,0.32317552804286453,0.6394300328727576,0.7446390611643282,0.24048315455048314,0.603918162739507,0.17213170218005405,0.721224451989679,0.6768115167802724,0.40813099446389145,0.3729046269466998,1.3564408479304852,0.42936759642286676,0.3605286500460556,0.2506029546425943,0.43681296615389253,0.478324575767623,0.1874059851479991
|
||||||
|
147368421.0526316,0.29885820404644486,0.4263531428625051,2.003540044346016,0.4488618274804953,0.5765424395532162,0.4208707624525245,0.26172316701788634,0.34955122992602716,0.4289214908251141,0.18844648345355514,0.10687564416605341,0.37881360886169596,0.8840979050401165,0.5205984406193871,0.39605163933497717,0.6280042695867062,0.3760555616045925,0.4822133346938028,0.2651714103043574,0.2632435563528635,0.31279838646547936,0.2988441710326809,5.082009164910568,0.1723538607441486,0.6521215676600913,0.4365182385220087,0.3794388388002351,0.1592651166413958,1.0618438509362564,0.49622290022155213,0.9170270745443837,0.8664174647872803,0.20655680627373774,1.7252670879839536,0.4688795412016047,0.3593124379081408,0.5739576545778737,0.5410180289989694,0.40144339162557063,0.1866253036541292
|
||||||
|
157894736.84210527,0.6699639740412918,0.5021547967377131,1.0038571635111537,0.48041818412717374,0.21481497333980965,0.5784565801435562,0.258181323635281,0.3168653107746156,0.9343601739307523,0.6354896540126646,0.2379387752501259,0.2194401009261112,1.1198062130618958,0.5236312966597707,0.3077552668605814,0.488167387626838,0.5433242512541796,0.33163063017615313,1.585562045554376,0.30679445451646636,0.49251635649197667,0.5521847812092537,4.242530862380262,0.3227749557706459,0.71459423604104,0.22030250517615313,0.5270491174713727,0.09725290652457365,0.597749802544506,0.19523043381540384,0.7216772597252166,1.0209668732746155,0.663090793049567,3.5608185348088073,0.33175538715563313,0.24212390762286826,1.2001974443979875,0.3304684419711209,0.4459493114017054,0.2718743960943249
|
||||||
|
168421052.63157895,0.31982603627889084,1.0665047769731433,0.6447691349441654,0.6184696609623875,0.7359142778983079,0.41471089286487184,0.6811505262211092,0.6376751928778568,0.266899510433799,0.3568164688068082,0.18756800939501816,0.07738632244416545,1.4490854786373573,0.17220650881611388,0.45589884496461663,0.32995368767313016,0.4728346637081241,0.8372730455900497,0.5452033383694375,1.0780082132017186,0.1881938310871492,1.229814270526749,9.025932121805209,0.14106112239763666,0.42089873337679606,0.3812979489482342,0.30436786852384856,0.26385853099030476,0.49507754000930576,0.2610676097077346,0.5956097970048476,2.245478326263851,0.4926123368112663,7.2247872814907605,0.28564880032948836,0.30168980310498184,1.03001327725989,0.5584915879690748,0.6912022218149456,0.503107805331328
|
||||||
|
178947368.42105263,0.16908783265428196,1.0148261643512757,0.8389694208583673,0.865221763251561,0.19346593027299774,0.35314680136471904,0.21247661146760966,0.6793354330300624,0.536813416309304,0.1998189572152008,0.1476814026977877,0.5351662332001157,1.7974592087341479,0.1697014098022123,0.5027796356961998,0.40778817628559316,0.13721672385683337,0.7889585732753257,0.5600078561629616,0.4549047200633547,1.3676692983780545,0.6321721829866106,11.988294265937277,0.424139905174023,0.995881574636021,0.3862350812579126,0.26050496563686887,0.5617740292958606,0.5940299602096432,0.5821317899920604,0.4861889915783319,1.1209380369107145,1.0099591733346027,6.493331047966869,1.109713520039482,0.6578719596123102,1.4754903006091344,0.7525574771321052,0.6301217277294381,0.41901741926029445
|
||||||
|
189473684.21052632,0.5497069424869612,1.1784656873370143,0.9500587019563713,0.5782917846933296,0.7417523603360078,0.3938052317442326,0.3029391375935309,0.34313644662788373,0.44362463911484484,0.10192933439217783,0.18164371585581773,0.4847572886712665,2.629460189481191,0.3306831402131395,0.27274474674975124,0.5544075160145429,0.3823742694801901,1.4530799527577747,0.4352265566670003,0.7203882539701594,1.04530342926279,0.6526284468801398,19.052833002359918,0.47603741907346936,0.9852716282133911,0.6048613783395191,0.12569495647567797,0.27798909956068213,0.32314444248696117,0.43579123753259713,1.1412307496216159,0.90483520698019,0.38890225207046125,4.863410484757781,0.22094895634954992,0.4729533103034107,0.12646513434328213,0.3890868474902209,1.088537596599547,0.26863609060356164
|
||||||
|
200000000.0,0.6222849394145765,2.4296290247063888,1.6548071409526623,0.42989118475663035,0.7013195941322728,0.35227414181357936,0.4102591464394017,0.36949202888890315,0.5796488711708471,0.5352263199655634,0.289144566184596,1.376618134348016,1.9230283436022306,0.34192501871209396,0.9165915439003393,0.32346986469469574,0.2478114178306178,1.9161021082024825,0.20343368931820519,0.36306220606753703,1.6887894680625515,0.4443906482897307,46.86712370420757,0.13684900183426707,1.2336734470568205,0.8227253964072779,0.4287427099127519,0.30051085823460627,0.42170855873509455,0.23243934229800575,1.4553125281082957,0.7481388292814556,0.37096560628790604,7.274050461618524,0.6028052380210475,0.6749271593595806,0.8799737127203691,0.44110303176076787,0.5555447528236791,0.6040071688200298
|
||||||
|
21
results/AcrobotSwingupSparse_large_data.csv
Normal file
21
results/AcrobotSwingupSparse_large_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19,trial_20
|
||||||
|
0.0,0.2802734375,0.17578125,0.2060546875,0.3681640625,0.1787109375,0.234375,0.2314453125,0.4072265625,0.1015625,0.1044921875,0.1455078125,0.1748046875,0.2431640625,0.158203125,0.3388671875,0.265625,0.1357421875,0.263671875,0.125,0.3681640625,0.1669921875
|
||||||
|
10526315.789473685,0.27517904138961324,0.21664716662462402,0.20527945330928898,0.35913812156529307,0.19925464355384215,0.23110794162486067,0.24473504148361758,0.40484548605710186,0.10831811223333893,0.13793800544210422,0.14761201958907277,0.1737525839554636,0.25252224665929734,0.1676720569008275,0.3313917149467151,0.26280093259098125,0.1611034203104035,0.27098122594098967,0.14105842252187123,0.3725939721612059,0.19274103740575904
|
||||||
|
21052631.57894737,0.22520447834046595,0.9249465445732477,0.1866239299404324,0.22127737430984623,0.6085717473333893,0.23866580001535181,0.4757876092377132,0.3481792228043575,0.2669956809596012,0.6319849590185276,0.200779817110944,0.1488299013174802,0.412743782402736,0.3288499879704948,0.23516117634865719,0.23265396921258225,0.5676138764273096,0.4159458984628609,0.43910174488691084,0.481839087531177,0.6044815887704781
|
||||||
|
31578947.368421055,0.5593659171107074,1.0562601723499243,0.1592104534032933,0.35176998624511047,1.1225984129549063,0.9223344279788541,0.5239751688991556,0.31717466317385534,0.5847179011294715,0.19785628754676554,0.44676556679680757,0.10377779825902714,0.4440891471926195,0.38466581941641603,0.5153802702631648,0.4778849953099303,0.42733103152457363,0.6790356728508864,0.6700322793131059,0.6997957229614257,0.48823134720820804
|
||||||
|
42105263.15789474,0.9246354988077011,0.5353551743103195,0.3642817573864375,0.4714667301759165,0.942520205003733,1.6597057036085472,0.3733688629234927,0.677971573087317,0.36227426502513094,0.3999759959382033,0.8719267382846314,0.22967540920606283,0.39769165799888545,0.5895641569945951,0.873139516138304,1.0678873352727072,0.293752865778112,1.0677451347710354,0.6325612688989191,0.4016891754234927,0.4499201365124815
|
||||||
|
52631578.94736842,0.9867714356187308,0.49509397702203894,0.6614212870928059,0.3461020276817258,0.6589964895697512,0.8486628915464449,0.3478473230081912,0.22737362774455316,0.4114069582022458,0.3873740352091697,0.6964534907459883,0.4021058412800205,0.3044407096926195,0.8141957359630976,1.3091242782296897,2.0485787299201097,0.1649382266311434,0.9983521062581493,1.0205287140822477,0.3650928613551766,0.11630733257515609
|
||||||
|
63157894.73684211,1.6591425866631608,1.7824413320694616,0.44587821115086956,0.40847383641800383,1.6665435009055534,1.1877225733199614,0.28022147413766274,0.3380635914049651,0.5179783776196085,1.0235838216427624,0.7296962156850542,0.23140291758191212,0.4407968177689742,1.0733628946658322,1.792181199937646,4.195472807104898,0.14171196681310594,0.9286025937574394,2.554397059939906,0.2698115298622532,0.17777470934754264
|
||||||
|
73684210.5263158,6.238765642583537,3.59658643083229,0.6551404857899674,0.3245403944950685,3.8729239302658973,1.9344165067593477,0.26265259792930207,0.46801065936313113,0.4373612337825702,1.8031213765659495,0.3401677984940379,0.2482830903536727,0.8480729396323419,1.7971204403694978,1.7015861606333724,3.5507319389617997,0.12613277223962166,1.9583733445059237,3.313483999046262,0.23699467201972607,0.26459068340607966
|
||||||
|
84210526.31578948,11.33134506944144,6.717954979048542,0.8468423986038673,0.5004795951526252,7.705900599091338,4.249325591111118,0.5948339235089163,0.40457578114855647,0.5547483449497381,3.5057508304838993,0.40089308720216194,0.4130237188365651,1.177265769556949,1.7268967562435074,3.9254876134138037,3.7854948175911103,0.14663580051749697,6.488634822771491,2.5694901104448906,0.2593000628611387,0.6267391849422721
|
||||||
|
94736842.10526316,15.202643405037243,15.685721064538507,0.7532116607285603,0.5572941455154208,9.217766674601801,8.15010813134529,0.6283958889440816,0.4634296913886665,0.5443773613081745,4.50728197177031,0.42302452890496506,0.3292772525565446,1.4781875029165,1.9640556081840537,10.410305865913877,9.901060608945727,0.21344095319922282,10.328643532010657,6.292152698020196,0.1799496732590271,0.8677750716909477
|
||||||
|
105263157.89473684,25.396041542539304,21.818484546735345,0.4865458427703942,0.39040384794536387,12.66807343755072,8.59216557975621,0.3923658545327649,0.9828761991041188,0.44712239421305566,3.5509889607944647,0.6431069360875687,0.21256520279226543,3.083921020381008,2.7742421739319356,14.99383551262092,22.727189605586084,0.17520459669118443,19.471750449605928,12.117786713914528,0.334693660366238,0.6805385948878576
|
||||||
|
115789473.68421052,42.63585940754645,27.735247804847777,0.2831894354146604,0.4100106389899003,15.06661862613752,9.918502606843646,0.6394707344245381,2.134073925810837,0.772664373931462,4.038864748629837,1.9573007779108187,0.14512053883306866,6.462227501697487,4.790906855934544,9.35677797840573,18.159722560660665,0.1370267286855428,62.73333005825897,17.51345433686909,0.39889415677564627,0.31168131683011474
|
||||||
|
126315789.47368422,27.23198478043571,20.460909581910855,0.2832858542656305,0.5299548838607494,12.117384765286852,7.166928024503326,0.5250193735899353,2.0424152596175165,0.7614090303965219,5.575143491792549,4.446013484965404,0.2297765885033437,9.887922588147617,6.148144264960884,8.634028109817299,22.693724391863306,0.10977628290488112,80.47438623106049,16.407796558580895,0.33552355779505166,0.31173982910832565
|
||||||
|
136842105.2631579,16.033454924078857,28.467217551043838,0.25496142062454,0.6837882572924332,20.969380410424243,10.766100595532368,0.4718130064142709,2.0120291564603265,1.349581451627357,13.320209347310168,7.357996776823855,0.29395625465794617,17.497463712401675,6.682625110129571,13.461418257525757,23.01459267000742,0.17884972749324396,108.81330713076608,16.185095536081416,0.3766680609156221,0.4783474665929737
|
||||||
|
147368421.0526316,37.59070293394815,45.247559222488206,0.5489187531193871,0.2677650293154725,23.18473077282681,13.554130813091417,0.3090910607757989,1.9922845581561903,1.973272254922714,20.619481776229566,9.112065228068598,0.4696110543121593,27.280923315032375,6.106955874329458,18.48774893594251,33.81049475313225,0.07217057986272657,110.47327014151702,14.82299238823127,0.37575438016009133,0.4326448625474755
|
||||||
|
157894736.84210527,36.77550039793315,41.5172875739861,0.6474715362295219,0.09058684457372096,34.07405397304208,16.38433054609642,0.17504360272943803,3.4255914212594085,3.3810120210092824,28.359679169271796,8.069284127359573,0.5641930123115181,38.486584512810964,9.454989784642272,34.8980696326808,30.82037958213827,0.2703642858362596,93.73673477595531,29.67569741647991,0.7430602071027679,0.48935592471727707
|
||||||
|
168421052.63157895,39.882253588731935,87.91054042488587,0.4652204011615953,0.25461656565151064,29.904271683204204,35.48649123765096,0.2992554133618638,6.534593027384329,3.874464758875628,46.74972572220991,18.78880846863639,0.35086072250746614,41.29111356021955,18.411489893524937,76.87768503965765,41.29712421279866,0.24596077451415335,80.2293726110062,43.59896161615684,0.5038159653090373,0.38863801824088895
|
||||||
|
178947368.42105263,52.231146648650025,119.5222034692104,0.42840330224288137,0.25080975403085637,46.23489197865749,43.12637632111103,0.40478807539160566,9.235101374892977,10.007000925798497,29.432354541366443,18.630783300320527,0.8863489911827026,64.01402699848292,27.234131459053867,60.27029654854222,48.25192117823128,0.2684398423931936,134.70386039385176,65.43712950677423,0.5545542048615432,0.4268449825593309
|
||||||
|
189473684.21052632,72.26899539598799,96.20847314744776,0.6031557067279341,0.528297054470411,49.87977726373646,53.568232951071785,0.5338306638342522,11.775390931443825,6.6547322894067324,49.90554913655543,22.6669396429511,0.6017369352219178,55.97077158349373,31.042400545030425,76.6398057382853,55.76528996179639,0.2646484375,167.1103262597504,86.8981981568059,0.613903341531093,0.28348001812963936
|
||||||
|
200000000.0,77.25336019616378,148.55915451049805,0.6621674487465307,0.4916038011249743,50.23296411413895,78.22769566586143,0.3491790671097605,11.36713665410092,12.129742672568874,54.4974018900018,22.20694471660413,1.2743200000963713,54.5428665060746,58.93288255992689,61.13555807816355,56.83450663717169,0.2646484375,135.05298509095846,83.42306859869706,0.5416407836110968,0.4620617816322728
|
||||||
|
21
results/AcrobotSwingupSparse_large_data_no_aux_data.csv
Normal file
21
results/AcrobotSwingupSparse_large_data_no_aux_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14
|
||||||
|
0.0,0.1865234375,0.240234375,0.310546875,0.1474609375,0.4345703125,0.3525390625,0.2548828125,0.126953125,0.15234375,0.2109375,0.2216796875,0.203125,0.1416015625,0.197265625,0.318359375
|
||||||
|
10526315.789473685,0.19034423458279004,0.2400682533877048,0.34593077841888176,0.16407309872952194,0.4529544375940043,0.35159770669699375,0.25659940249371727,0.13492696239017055,0.15411571386448233,0.21215572515683162,0.22998576811476099,0.19686775260354672,0.16352961532296897,0.20850652076530984,0.30794908729616627
|
||||||
|
21052631.57894737,0.27528056411531826,0.2597864790306197,0.8654637217851887,0.5270344213765744,0.8641101646951692,0.32198328456720154,0.3099637441027528,0.2998057177852726,0.22246620727708138,0.24094945109782126,0.3316173077950517,0.1113790580770646,0.534411438284158,0.3847652815715758,0.19556613510005036
|
||||||
|
31578947.368421055,0.38988838301471057,0.4313938346926195,0.3783542284344703,1.2504494923303664,1.6605882988081744,0.23598152910903553,0.5223457489647694,0.5761776895073973,0.5469662674246072,0.2747251082655465,0.09806062376069899,0.26480977489017055,0.5896728441655801,0.3286896808655969,0.6154138189933966
|
||||||
|
42105263.15789474,0.34708921268706183,0.43128890145848664,0.6115588454988855,2.902771915425225,1.7377241245597355,0.36834591049236604,0.613855155881422,0.6140236577168726,0.6988478895699879,0.10362602931310595,0.38753534882352625,0.34801003411205855,0.9399262425641937,0.49164470345029543,0.3360882386606486
|
||||||
|
52631578.94736842,1.127931404642121,0.45348594855733854,0.8702088459046593,9.160069980779843,2.5894455579509366,0.7035564095029541,0.8960099788253657,0.4340952056927033,0.7486016202171093,0.21301609261214235,0.4492471119043239,0.436174094181642,1.9846863891939708,0.4783511967540118,0.3387574700437424
|
||||||
|
63157894.73684211,1.2623429021016381,0.28477204341307233,2.1772849539970762,12.486180773071965,4.034385623033689,1.6144263618870789,1.9671510500921112,1.1361896905872637,0.5804855896165169,0.4604503969736705,0.38344931140170546,0.6079257584674873,2.65433014000552,0.19061016177866924,0.5152756804574561
|
||||||
|
73684210.5263158,1.0310715387402476,0.3584240995285585,5.58352809863738,13.306633970413843,6.781386137669107,2.3914307042172083,2.9402263461717943,1.4218407284850223,0.8730167959535551,0.33432005250883245,0.48149384157809516,1.713388717735904,4.933274284954548,0.2823366471604959,0.7346932339866405
|
||||||
|
84210526.31578948,0.9781827424701893,0.4988282433507184,9.453214206854062,21.745892046561206,8.408636986053551,3.943866444426561,3.8482770629206526,0.5594711937732644,0.6924070532632336,1.1217781552977844,0.40613959568689406,2.4955827496388614,8.980877968743238,0.6492681318372902,1.0405002076209748
|
||||||
|
94736842.10526316,1.973406627898071,0.32963444717703105,14.503148189872256,28.405982165455487,9.733883551283226,4.645091738397064,9.383900547291764,1.562067063561437,1.3817745483482975,1.3647147413766283,0.5345659546574728,3.2113450359439586,18.489804283733847,0.5906411643833995,1.4129823779795638
|
||||||
|
105263157.89473684,4.259074044689907,0.570512158718796,21.455445884007165,27.151618418601082,10.064860198636465,5.120512478899758,13.860042186324947,1.3542048964143791,2.1629922660764236,1.1493925691641598,1.9927540882142296,5.49791963212708,28.61485353401163,0.5250136304099804,2.7843624125557263
|
||||||
|
115789473.68421052,6.041937915241949,0.4458508082043762,21.408805841884455,44.59048214645597,9.67897478034952,10.779392289983267,20.029696041857434,1.4827501635141975,1.7389949048324966,1.4774170529479134,5.255100200050755,7.8835120161484475,30.77901814584917,0.8043901438197931,5.036998474036557
|
||||||
|
126315789.47368422,10.123932740694935,0.5530058208264808,18.56505785490337,59.18716700123288,19.130813989612882,20.732716547154997,25.11263677750268,6.196380604667353,1.2243379291735192,2.957851473314282,12.702524665980468,15.74330357897646,35.41263929034205,1.4215093385479796,6.627211319772822
|
||||||
|
136842105.2631579,20.491153437014773,1.1387464570867063,35.89996609463258,91.50769230797684,22.253454163464152,27.70701300338365,33.45144478171817,15.03411338732184,2.0689597486459,6.677827124450349,19.041428716559164,33.84444712007477,53.581838776860565,2.9289575745854695,6.342605847070751
|
||||||
|
147368421.0526316,22.066439182144123,2.5543048573332823,35.63914051214413,120.60290105190965,14.671598091019812,25.899213632388125,38.15370446096827,23.35743813765677,4.728770623246721,10.481099120797875,21.96640780229648,45.589437283967676,73.74559278831589,4.027575939315839,8.25748878436736
|
||||||
|
157894736.84210527,28.339502936915352,2.394123050975007,39.988073660726364,85.0292111737576,21.776703509597574,26.101378414439363,61.39263042122375,28.699849287228574,5.639603023053537,23.323222477350217,15.353265041129406,38.02090155847184,64.42699340439898,10.968236587714625,23.005829718634704
|
||||||
|
168421052.63157895,31.184271476935812,10.029926289481804,38.03403259446416,91.53893328637628,21.498702537980435,27.58700620698797,53.512879474671585,53.38297607760021,13.046214011237236,19.53149350660329,18.620361919878594,28.519923836240473,76.54258398336057,19.977879658960575,30.549120451274675
|
||||||
|
178947368.42105263,31.010932526099715,9.284925978599823,37.51186296352059,122.61154211789288,19.267769575779457,75.70819789484929,69.04460650467807,78.92181762631911,23.29142483581797,33.112013201304094,25.141503347254197,40.65754468553285,88.30279642194922,19.76915168498031,40.55523753760595
|
||||||
|
189473684.21052632,56.85672156104091,10.940894235204132,28.893765473299744,219.25523408165927,55.27034931764048,60.26909399693032,81.47037080780622,54.63399636118036,33.22709029665285,54.299327073665204,2.546875,62.02290531803036,101.4423805458724,33.632486797766006,41.771661684453655
|
||||||
|
200000000.0,66.80308221515857,18.400088360435085,62.4457819587306,156.69830794083444,74.55105696226421,75.79422358462685,95.60859529595626,82.23799554925216,77.2982702255249,40.56042756532368,2.546875,70.23708228061074,114.39200953433388,22.08554292979993,57.08792816965204
|
||||||
|
21
results/AcrobotSwingupSparse_large_no_gauss_data.csv
Normal file
21
results/AcrobotSwingupSparse_large_no_gauss_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2
|
||||||
|
0.0,0.4296875,0.2314453125,0.314453125
|
||||||
|
10526315.789473685,0.5006768023208238,0.22707077670955922,0.34136482619182557
|
||||||
|
21052631.57894737,1.5581568931938867,0.17489595492460722,0.8084433547677756
|
||||||
|
31578947.368421055,0.5316406841753591,0.4312913054574561,1.0742526067591112
|
||||||
|
42105263.15789474,0.2484601303480999,0.8415534952010474,1.9649651440226803
|
||||||
|
52631578.94736842,0.48032350619413844,0.5554545156843445,2.6935879873767123
|
||||||
|
63157894.73684211,0.8109354986048138,0.3501846545951189,2.071372526174107
|
||||||
|
73684210.5263158,0.629401547756882,0.7175458316327465,2.3725279282334766
|
||||||
|
84210526.31578948,0.6040252516474419,1.019607797553995,1.487511463112448
|
||||||
|
94736842.10526316,0.4250753989180039,0.6952112969269052,1.5457347436624882
|
||||||
|
105263157.89473684,0.29857345961467713,0.4873436481338459,1.4422716737784178
|
||||||
|
115789473.68421052,0.41575484923048356,0.3451961982283236,1.4957208765510706
|
||||||
|
126315789.47368422,0.7033923246853903,0.2284838288114341,1.0474488107781652
|
||||||
|
136842105.2631579,0.6116699128930256,0.3368125635501092,1.361418113814167
|
||||||
|
147368421.0526316,1.110299007384071,0.36130727096938026,1.2535032024013697
|
||||||
|
157894736.84210527,2.123014191181046,0.3911848966434722,1.4056686100206879
|
||||||
|
168421052.63157895,2.7653386335293675,0.40809272068689406,1.6191461198547872
|
||||||
|
178947368.42105263,5.326025614117652,0.36117054154668154,1.9360698953559856
|
||||||
|
189473684.21052632,6.177909697852306,0.6335446999674028,1.2216595203262286
|
||||||
|
200000000.0,6.039200331035413,0.8152918564645868,1.1759828266344572
|
||||||
|
21
results/AcrobotSwingupSparse_large_no_norm_data.csv
Normal file
21
results/AcrobotSwingupSparse_large_no_norm_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9
|
||||||
|
0.0,0.1630859375,0.552734375,0.2333984375,0.2080078125,0.251953125,0.5595703125,0.7138671875,0.400390625,0.126953125,0.2041015625
|
||||||
|
10526315.789473685,0.1597635052540956,0.5452589024467152,0.22913464945108936,0.2255613295325282,0.3501863717372398,0.6899204042809822,0.747146883829809,0.40548502111038676,0.170089370325992,0.2005576347710353
|
||||||
|
21052631.57894737,0.11313051133935141,0.42554984264426615,0.25178496659297367,0.6881296469564254,2.14318726135423,3.008129384048758,1.3229307983060292,0.5040778426912683,0.9340986162011313,0.15887821017870282
|
||||||
|
31578947.368421055,0.23266012714840378,0.4303509561639083,0.9543271553483366,2.5293181437864867,3.3130010884884653,4.112760657418797,2.1725409552661348,0.6777566027443166,1.4066813857271403,0.340791607167252
|
||||||
|
42105263.15789474,0.5359431132055056,0.26064100820271924,0.8523691869508527,5.16609118982035,3.650855595385269,4.733257409938485,5.523157220137747,0.9523282249218209,2.9470863236614875,0.5248153454048812
|
||||||
|
52631578.94736842,1.5639268605662844,0.5079882376081726,1.3875268582161773,7.055676912006579,7.237852801906765,8.839877382209757,7.123404627031237,0.9122136451530984,7.8915293262936075,0.5596568194782965
|
||||||
|
63157894.73684211,4.293208103761119,2.019424375074393,2.7456720040445517,11.772341841806007,11.974730959229195,16.662337517143957,11.629478581394187,1.3368279465017558,14.29436954699065,0.8960270075916912
|
||||||
|
73684210.5263158,10.072159001041321,4.708208300730529,5.944688384882964,14.994564917609303,20.681688979722132,29.30764139392039,18.207377209227506,1.8838079035117028,32.15873111450112,1.2167651739146907
|
||||||
|
84210526.31578948,14.138785238081066,9.861402791622936,11.042435368672633,21.759455778592184,38.02457535762206,36.183964885172756,24.745262336202604,3.5783671962917682,46.36677377970265,2.390926181444501
|
||||||
|
94736842.10526316,24.571982333534642,14.093079403166625,9.755608402791115,36.66465025397219,72.03410663499066,57.151690226842824,36.82278372640425,5.869220038860458,38.14601104345348,7.161767542197103
|
||||||
|
105263157.89473684,41.98139735404144,24.809139146038696,16.508829449682683,52.23029834609943,114.84467322542396,89.42307763878989,54.86347759595538,9.268700702698936,48.87880932921518,18.40993717436645
|
||||||
|
115789473.68421052,68.94661910514091,34.38870800631198,29.521901233704796,70.47003637192321,134.27849453638134,116.05059737313817,100.47973870570638,26.87508471586697,55.5563349657772,37.55725796401005
|
||||||
|
126315789.47368422,86.41138893613525,54.63687204588156,49.136337998831394,80.27194482269711,131.4345456067875,116.38952452852453,113.59657228422296,44.8046686167202,60.507944070071076,49.21556445304047
|
||||||
|
136842105.2631579,106.32890335938939,72.70863331982305,48.91356672408507,104.94321046179354,142.38893879285482,124.69046738801572,108.07506098707627,57.7786672333271,61.429169321984794,51.20308814748833
|
||||||
|
147368421.0526316,123.66593588620343,108.39993884226625,73.06872711287313,105.70160243914067,147.66078628793647,164.9942044868364,118.36307599603965,74.51562498414947,82.70189710072866,55.39326099279515
|
||||||
|
157894736.84210527,111.944893990197,130.71012265001968,91.21438978781661,92.43176300703983,159.05557620030032,180.06277841213998,115.48762584327,95.42183693756358,90.75954311244045,58.449131889026255
|
||||||
|
168421052.63157895,127.30031668116182,136.00661521341002,81.91565571201144,95.40619708032159,161.72888010294483,183.1228608952995,130.0786024606129,126.38154001843567,97.09616443919343,106.65009804511666
|
||||||
|
178947368.42105263,142.86994731987613,185.8866277599599,88.67486125016147,128.87277666815763,179.6187286826052,153.9956542118104,147.17875872952786,118.22051210482695,95.54079551379766,83.66468143727309
|
||||||
|
189473684.21052632,159.15779095805584,173.78900422283817,115.49254558489262,131.10272004927955,232.66658780845577,203.0620319017743,156.8713545812464,119.69465374616375,101.49783433507355,97.80115297692635
|
||||||
|
200000000.0,211.78176593780518,225.41251006879304,95.78061359807064,118.19754419828716,203.89517046275893,204.22622771012155,130.77916682393928,121.89682659349944,126.48130356638055,119.69654289044831
|
||||||
|
21
results/AcrobotSwingupSparse_medium_data.csv
Normal file
21
results/AcrobotSwingupSparse_medium_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19,trial_20,trial_21,trial_22,trial_23,trial_24,trial_25,trial_26,trial_27,trial_28,trial_29,trial_30,trial_31,trial_32,trial_33,trial_34,trial_35,trial_36,trial_37,trial_38,trial_39
|
||||||
|
0.0,0.2275390625,0.2060546875,0.1025390625,0.220703125,0.318359375,0.3076171875,0.27734375,0.1376953125,0.1279296875,0.26171875,0.1572265625,0.220703125,0.1416015625,0.064453125,0.09375,0.3427734375,0.12109375,0.1435546875,0.419921875,0.146484375,0.3701171875,0.123046875,0.1904296875,0.126953125,0.1943359375,0.1904296875,0.0732421875,0.1455078125,0.1318359375,0.2138671875,0.2568359375,0.107421875,0.20703125,0.1513671875,0.15234375,0.3974609375,0.1669921875,0.265625,0.1806640625,0.4072265625
|
||||||
|
10526315.789473685,0.25256805208581307,0.20826964233060294,0.11892972824646167,0.2250222869196757,0.3165320372647526,0.3171414932715926,0.27933720934754264,0.13697545218005405,0.1429913803480999,0.2621617409661206,0.15440249509098128,0.21655008469261952,0.14437025603825365,0.07846271430356351,0.13101661502489426,0.3510241442439959,0.12469305159972975,0.1576196506743286,0.41017607374534715,0.15440283851940545,0.36845597137704783,0.12409897854453639,0.20477152002815396,0.13226901659344703,0.20928688260656975,0.18511379590655297,0.07850270522268195,0.15265104182869443,0.13897916682869443,0.20766531397431182,0.26541888746858633,0.11356837465492312,0.21489433964864038,0.16637350647733484,0.17166923089701053,0.3915359333281372,0.1795620561636716,0.2752600535131227,0.18874864763170068,0.39947422059288973
|
||||||
|
21052631.57894737,0.6342812694010642,0.26416579904318516,0.38739314832185445,0.3957727331864207,0.2758332725376964,0.45764480337211627,0.36344396110386734,0.12898691869508527,0.3908967417032765,0.300540617628441,0.1503919987136968,0.1546595406994595,0.22808391962024976,0.29923043819015377,0.7253938342065362,0.4731088094103699,0.18833573571202497,0.39669595861038676,0.2507048207967235,0.286908046690711,0.3288595564147442,0.14935390440711024,0.42696870925353836,0.257803618412599,0.4474867770546361,0.17431385431263258,0.16368992242786692,0.2869890588142205,0.2800727960475594,0.12797070870439103,0.38084843284205383,0.287695596753065,0.3395012514743118,0.4474679835945616,0.48441499091911844,0.30360053318689406,0.3917728117628441,0.43798332531366324,0.36189314929402105,0.24359611130817443
|
||||||
|
31578947.368421055,0.3498253994040872,0.3549433731966731,0.37774921322133076,1.0402010629712046,0.22255644441641603,0.369544998760699,0.6786579438523901,0.21265067948528943,0.4280936301910317,0.5398124393663908,0.4577552076852222,0.2614384701377468,0.49887166855407883,0.19764298679425774,0.488345505458166,0.3782475780582164,0.2481736925500252,0.39283345346635723,0.2801226988393514,0.28250308842540117,0.2599375611196925,0.20115649204835334,0.27822904507539276,0.5606429266467319,0.4127451561164328,0.8448745775090691,0.1666052572614929,0.4018905037327817,0.46988311566804586,0.40820621585581784,0.20021650995904394,0.8358212811794967,0.31394040749674035,0.7036589149623036,0.4319632997803411,0.33588553663766274,0.37348849225242375,0.4581941012860665,0.6310042991532513,0.10114686442874486
|
||||||
|
42105263.15789474,0.31806849640822477,0.07151055798306029,0.4362800577010474,0.4209691943224117,0.3561332364491809,0.5947860337360413,0.2849552651191352,0.441506055583584,0.6402804409037666,0.48122244330324293,0.28668552092237815,0.505597545169397,0.5332985135656976,0.32151536598099906,0.2854676391939707,0.4303499258786357,0.38408062794862363,0.18500339159344703,0.2786310148371223,0.2349892402289647,0.34156409393056936,0.15931060241530146,0.17507642920327648,0.6638020438830938,0.5966882256589767,1.277095953183161,0.25712750294862363,0.3859339473650396,0.5371291987453471,0.7148686776200819,0.41922902664649525,0.689923791013596,0.4045176836262119,0.7694109338142205,0.46480204193876057,0.08634015661857795,0.17908385850055727,0.4975345009251645,0.5306378203415805,0.38499565864203705
|
||||||
|
52631578.94736842,0.4482678257527444,0.14452852634842045,0.24210170365436587,0.41767068318713074,0.5677116689919764,0.48819545365436584,0.3175738855081912,0.3406036012390644,0.35040818687291025,0.30685117832511416,0.15739199725544684,0.449594738080561,0.9795656679739912,0.6436442140066723,0.24983012445085268,0.2195354894918088,0.3465036540150312,0.21170794996858633,0.17861181877326437,0.22713571902457366,0.3089492512541795,0.17886749066804586,0.15466398149316002,0.708045055991725,0.3843110050190849,2.244460304027779,0.2854454352254683,0.4088365021174634,0.5351924869822663,0.17836946925958438,0.5028103778236791,0.5827360945725375,0.2932695404644488,1.2547355556752213,0.2702699037800205,0.155255674325198,0.22418655120765074,0.8493048543745131,0.841276649623036,0.21518077744671513
|
||||||
|
63157894.73684211,0.4295347007688062,0.30621711435080234,0.20832945699506847,0.30310592228686045,0.8611192412653784,0.38166194080976223,0.3134064925344367,0.3136676355081912,0.3328803318689406,0.28246959887052847,0.11680535398361758,0.5158625890673696,0.8010625601475259,0.3455479693214652,0.3236742363081744,0.3207179040459716,0.28131800939501805,0.3144657843000672,0.3050802447459044,0.17443210992786692,0.4905242233065026,0.24101029604755944,0.15488172303936823,0.3737284377671343,0.26674273271639926,2.792095971569789,0.4950980506115013,0.281389109645854,0.6222316498901705,0.3351752685377804,0.2236328125,0.8736371703425271,0.33585138400175585,2.463276892157473,0.15405486891474418,0.32283884402457363,0.30632104107547675,1.520006724011535,1.2819705709526086,0.23117460108199614
|
||||||
|
73684210.5263158,0.2705112230084279,0.35051314388285726,0.21319381946341817,0.21449543696691453,0.5097287249366994,0.302012477885323,0.2882756367944944,0.3608618997801044,0.43091733039581204,0.4181666678008611,0.22203962954788004,1.0264171140676062,1.1404507312087806,0.5620474168138159,0.3052070622959296,0.48204929742786695,0.2954103517400262,0.39570673681031954,0.3029958614021787,0.1881398784817091,0.4770393477252317,0.22991329415022826,0.28148381135470324,0.20030640366995434,0.3236865521798174,1.4399255557073454,0.5312441854926027,0.18736464429099806,0.6931245571358383,0.4508628660291846,0.24378890145848664,0.728100332857169,0.4541268335485061,3.702669172736087,0.15411640072133076,0.3717811510503458,0.3663739682564774,1.5483211982283234,1.6849394795637052,0.29650242242786695
|
||||||
|
84210526.31578948,0.19070142920327648,0.492338925517497,0.23267241924423257,0.14271826493112663,0.8555350898045253,0.3414714527922654,0.2616196312732644,0.38304184678518866,0.2617286618726736,0.4175031878611387,0.2767677835480328,1.3311480154951525,1.4286616740134284,0.45591729499626693,0.3480284841437089,0.35757909124908016,0.4469302909195918,0.30640990450111455,0.2568147400409561,0.2401848156366322,0.8117299172356519,0.24270399521592587,0.48214296885144353,0.31292591464816705,0.3422484041250974,2.5021138706365784,0.4130732781999329,0.39150656491435476,0.639573559536498,0.6972865265822479,0.24118974004095609,0.6500822791102189,0.256051821721888,6.55459848335245,0.19078209789836176,0.5345632072300792,0.3229434576060964,1.170348085524963,2.3973301176879547,0.34834262076507316
|
||||||
|
94736842.10526316,0.17653597358851553,0.34783738735970365,0.25224709444759297,0.12027852489017053,2.3185349118346323,0.30297365743367627,0.2515381525097791,0.318742551301655,0.2329345925032597,0.28731958489668996,0.32036173971075765,1.3045172572466144,0.7365030806480681,0.5753911697303159,0.28257212810569193,0.49299183050351136,0.27475962810569193,0.29303778968029076,0.22925599592214146,0.37817478444107355,1.6268421289333017,0.3673891717377132,0.8493909941485714,0.3928286692442326,0.4869940089386916,2.9320835684144924,0.31343622815245736,0.41808357133099244,1.134464586210383,0.8802078437276823,0.23555904874511044,0.4649360477098798,0.3257196590180542,7.203382246382018,0.392227112091149,0.47462645131795356,0.39892423582209113,1.0136547986820463,2.2716120801804136,0.2869592994203858
|
||||||
|
105263157.89473684,0.3387821017870282,0.1782775149781288,0.20618216813105958,0.17444373894266144,5.464725092837685,0.2481230791255708,0.1905305233688566,0.3568486559754263,0.4291105508143882,0.29361444298910633,0.31665404681683906,1.170143027054636,0.3531618408879415,0.7902175237597521,0.35131470318316094,0.5860526647594166,0.19953013919397075,0.25166121612295217,0.3724470112132233,0.4541812009098127,2.2518734865901875,0.41294614099729754,0.6939506530761719,0.22826984120207813,0.5904046531529308,4.055337160908284,0.23344627972124687,0.2560008886448234,1.0088251806031965,0.4132188654342187,0.24862009402457366,0.5294684676912683,0.28801346353546736,10.039715241197072,0.4992623527294381,0.2189058425353835,0.43283117510935604,0.8640386734642811,3.2897665758212185,0.38168181210673746
|
||||||
|
115789473.68421052,0.35314439736574976,0.1806343268819793,0.15484651634237442,0.3762486714075146,8.285631570789622,0.15975906446039512,0.22905535446970085,0.5511494105542465,0.4683985961110969,0.4266754504386077,0.19865919678495203,1.6609725767225438,0.4454222412320715,0.7236351689473415,0.7038178959051327,0.42741648444178365,0.30322147802632926,0.2803397535287112,0.28444046326951644,0.3528234397275296,1.9664447604784345,0.46053040258772154,0.3381155309914882,0.17458969338118532,0.40575298510099717,8.439830299229502,0.2024564161855428,0.22262958146198306,1.191928998255003,0.1278763504239661,0.2206070733532681,0.6033867706552436,0.6720951930968053,12.632351354879026,0.26584612827882215,0.23871879366296145,0.5428696312732644,0.4622343095055578,14.473562156064355,0.27923291541863027
|
||||||
|
126315789.47368422,0.4955267126870621,0.5303346734297909,0.17218707605081912,0.6911916230854238,8.054654530871275,0.27792109016566413,0.2247495149311266,0.5840927187425606,0.2945865409195917,0.6587421913886667,0.16299467007539276,1.9765327643819792,0.8072135481477779,0.5083539228360077,0.5573457416735192,0.30477464628351675,0.38727283213607494,0.3374086734000336,0.31044059182798456,1.3167003621024786,3.5454615225752333,0.5424273271639923,0.3866910749525246,0.4254158844247753,0.418333819698429,16.77756688337248,0.18916018583767957,0.2667649604607158,0.7247206669434931,0.14106013966399228,0.36904494055750653,0.8748363061624884,0.9645125370606822,15.219834303921944,0.1547596897114677,0.3760083586885659,0.43936833516382423,0.3190939314147442,19.21645830740889,0.226827764114845
|
||||||
|
136842105.2631579,1.0477754059260578,0.2945065115297265,0.14739974912183765,0.4752536467237812,16.179841926553586,0.5601767252026506,0.36046199058892014,0.6316120617937845,0.3598819267716766,0.663672478905675,0.22098718257491948,2.795400878399034,0.9910766120762706,0.6800262683646503,0.4020815767409729,0.463385603104272,0.31555307076578315,0.9395321716562209,0.3716574005803243,5.374017139551056,3.463400132768371,0.4060281373127013,0.49077513475497353,0.4028581608695665,0.39656298312454,24.874373198215988,0.13089320798329693,0.42243764407086587,0.22859423312454002,0.13495566772291862,0.19331114179870068,2.966870321131151,0.6447571387251324,15.260037863353611,0.3267564033867582,0.4562830277757302,0.49860951907086587,0.25948703850405364,26.696052022918117,0.5007571513632991
|
||||||
|
147368421.0526316,1.0926992463933463,0.5765981806282195,0.1859611606333725,0.3561028139743118,41.093773791664546,0.24100681421168912,0.351067202243118,1.1386770739779912,0.5901216734149122,0.8747156465482846,0.307135922756882,2.0717298502406907,0.7917234891009131,0.659126350423966,0.30908423975894317,0.8706614053150297,0.3741369326689238,0.5032413250191383,0.6344720465324594,7.085498334298174,3.316975738863536,0.5100527734307373,0.3526244916413958,0.4341700598803915,0.5145421543279844,23.39264359830819,0.1958587308339466,0.2851907460643312,0.26207287754048275,0.22575036574598836,0.17116232790114813,4.22579486878625,0.5021335992786693,18.357474704858674,0.4059283555049315,0.28120348394082173,0.9598434162932423,0.40768496019358136,57.03829002908725,0.37231536188944536
|
||||||
|
157894736.84210527,1.639772602725888,0.4988060156064019,0.20239522780738048,0.23239280246301364,72.52874119724265,0.15250030316804586,0.258544995844199,1.3656907121230362,0.5142266202171092,1.0361964048771317,0.4328824516148449,4.004721464542802,0.7054496722868605,0.9512272964223933,0.3218489591434722,0.6488756591923677,0.2818604625162985,0.6897765341888177,0.5278666990285433,10.423493290211688,4.304360183652419,0.5510731588770478,0.39443828920908586,0.1891375908891248,1.2933448569596315,35.360429372813904,0.2608191022582332,0.5173655586559689,0.7763932405086109,0.3218701566025161,0.1403012949343863,6.449813818997625,1.0750275688488402,17.91763835336363,0.4890640158402293,0.28611949011889853,0.4271299515404525,0.35066829956139217,64.87515323776287,0.46869943412717374
|
||||||
|
168421052.63157895,3.470647669234766,0.3745641734791594,0.17654554203276487,0.12145332484364174,80.5721263515652,0.05958738881795356,0.2672667358060293,1.9835988306272725,0.24854246863367804,1.4488588792795623,0.7675580053778568,2.7533303892183163,0.5557802575446892,4.063473603732039,0.3130633071519001,0.15882345183734392,0.3768964011913522,0.4170519546128374,0.23659162442109577,10.894548326317954,6.646584170016556,0.3771287912807305,0.19211411410091314,0.1514379443224117,0.6397341149665637,38.19280401425348,0.23665933793931784,0.5858978525093057,0.2458424237956628,0.3995470379858467,0.21600422106291123,5.846165453628159,0.9703240222878072,14.001885918699141,0.9476926174850677,0.20375631654691823,0.7716198020364442,0.364017180128441,46.202826164435805,0.4118348858693299
|
||||||
|
178947368.42105263,6.0064985798336465,0.45278046467958066,0.25760879146755566,0.24032907670884915,97.45902445838063,0.33503549581089187,0.20646041119858166,7.178135068793048,0.6067646243235413,3.0048406553400526,0.6228297965348262,3.9085424032237723,0.8300118565229169,2.9102855772192786,0.6108745828559855,0.045538838880544205,0.392437298212025,1.1848476441613198,0.18454433081883143,19.95002200795013,7.838737102096431,0.5937725949485547,0.6001214769738534,0.3368494873892238,0.42213236526108844,51.87713701771237,0.2597563999841748,0.32713506756727045,0.0903643188053881,0.3074076406843444,0.4260595168433362,12.892902213120397,1.13466584319223,15.739320340249018,0.9442135607436754,0.21316305356012488,0.46911637208468354,0.332357335288769,64.48933502313504,0.3148328670174131
|
||||||
|
189473684.21052632,8.321682552221409,0.6011560656687559,0.3187053078099301,0.2965383397575231,130.28715179105214,0.3714202500446352,0.21862354965421305,6.121536112227928,0.5079816411406709,1.2045939596075763,0.40828997540672074,4.862701511118883,0.9616751472705619,3.463323561438563,0.42859375972166625,0.19506740305892648,0.3542501814147442,0.8875673933372602,0.2313243094573721,37.27420115801106,3.635099194386659,0.7943415866334024,0.3458326662016047,0.27558920588189545,0.5955401655709646,68.15315711002931,0.13331123204112386,0.23034539051003078,0.20494313649523624,0.16380326437487847,0.2419827374064691,7.683193037714655,3.0830784742191555,50.46679675347917,0.3673837244345541,0.47952324928008944,0.3603686388179536,0.3961104712657982,76.8800411118695,0.33204670427908867
|
||||||
|
200000000.0,8.098408096715023,0.4952255550183748,0.4190471548783152,0.4697484468158923,125.78070580331902,0.7740988480417352,0.3726384514256528,4.909173463520251,0.17950128254137543,2.5410545750668176,0.6388261192723325,16.00847379784835,0.9926584143387643,2.7348364779823706,0.8201275373760023,0.10200038709138569,0.33960071362947164,0.5774664125944439,0.22437728078741775,45.82475441380551,4.395343830710964,1.529719402915553,0.3008294356496711,0.21285443556936162,1.135622877823679,57.51070986296001,0.2877400046900699,0.4311261428029914,0.47156775625128494,1.3624275609066612,0.1381376166092722,11.237925830640291,2.47349708958676,48.58576794674522,0.5196286753604287,0.20546677238062808,0.2910659187718442,0.6272422388980263,125.72831244217723,0.7643978219283255
|
||||||
|
21
results/AcrobotSwingupSparse_medium_data_no_aux_data.csv
Normal file
21
results/AcrobotSwingupSparse_medium_data_no_aux_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9
|
||||||
|
0.0,0.224609375,0.3408203125,0.2080078125,0.1796875,0.2685546875,0.0888671875,0.146484375,0.1318359375,0.33984375,0.2138671875
|
||||||
|
10526315.789473685,0.22510773983688565,0.33993433056775885,0.20025547059288976,0.22658916853801697,0.2804600697144907,0.10221229035438263,0.14388180307404155,0.15376399032296897,0.3297103316499916,0.21381181362923493
|
||||||
|
21052631.57894737,0.3166807391306701,0.3151172680207567,0.18225829951320666,0.9231106449032094,0.4551478515371391,0.36209447760331004,0.12306772903061972,0.6181169071356016,0.1823963788737881,0.2773458105705452
|
||||||
|
31578947.368421055,0.9142822783409392,0.2590867858844451,0.9679067326384567,0.35643302040417124,0.7362724790282534,0.5962397636138831,0.3642800402443166,1.3062803395236957,0.3571050147270562,0.6820753974597539
|
||||||
|
42105263.15789474,0.6692679872803411,0.3916022493568484,0.468464912139808,0.6781739316818787,2.9221645048780784,0.3776706050967906,0.6561283101005237,0.8281137144136297,0.3951150677540956,0.2409908157306365
|
||||||
|
52631578.94736842,0.718372294777318,0.4174990904958625,0.24892159512168482,0.26779311911881465,2.945608553793952,0.8854120286217686,0.6837687769755102,0.6910435843005405,0.48152394440035406,0.38395005646174635
|
||||||
|
63157894.73684211,0.9034273300804924,0.2529987271802907,0.6161524965492313,0.2088250981803747,4.935496818986298,1.1256541835964546,0.8413894103834831,0.5931131885982945,1.077703254044552,0.8919301231151812
|
||||||
|
73684210.5263158,1.3466161188986825,0.21097920806123943,0.7481429266467319,0.29587758346938026,8.721545895711207,0.3519453090974167,0.8345320033234576,0.6842258009553949,1.863027982104188,1.2890432680082446
|
||||||
|
84210526.31578948,1.6357787916865045,0.3488464566809319,0.567789199279616,0.9115864972988988,11.355629495636578,0.16477860638309386,0.8188183644471734,1.5626268888774675,3.3678344134808915,1.168839811288089
|
||||||
|
94736842.10526316,1.732427322303159,0.4037120269606318,0.4447625390050154,1.6205779416409225,13.694336518686564,0.23978215901805425,0.5164177014887168,1.22849053094922,4.495021064525826,2.215204035476304
|
||||||
|
105263157.89473684,2.7986854278479916,0.27076620804636103,0.5858578615901873,2.787410699099385,17.964216591578772,0.406959285366238,0.6013768742949679,2.4444842615946505,3.9979955984945112,2.497227576300708
|
||||||
|
115789473.68421052,5.457766411377122,0.2647414220667281,0.6744242586257385,4.404882161571048,24.736210902311793,0.47481445724614113,0.44614651856990406,3.7558747104000187,7.985983153789657,2.7482061755954392
|
||||||
|
126315789.47368422,14.534496730054197,0.331498365322969,0.7826213678164496,10.120458090404401,22.44869295579904,0.49205795879839537,1.4207191361614888,9.139373430584946,26.36291551457881,4.325928389530766
|
||||||
|
136842105.2631579,19.74368827957196,0.9666356327130862,1.7617893060488725,16.279786561664785,29.869162609702677,0.6924036903064339,1.0287457410648577,11.495013236999512,32.650216118450636,4.208086988602318
|
||||||
|
147368421.0526316,28.052590959290065,1.8088652433781087,4.110566865704398,22.06466674804688,26.32343972224607,0.601789099357795,0.9641266608832618,19.081813643183413,38.351053776833496,4.105917922677756
|
||||||
|
157894736.84210527,38.69553768337599,2.323565596688818,6.552347730071262,29.182168255222145,63.26096472383539,1.2492669124022089,3.2766303804772727,14.857409318728456,49.98374962212307,11.615941000116834
|
||||||
|
168421052.63157895,61.47175534742362,9.410692928240245,9.445706312015778,44.69571459656608,73.19101260705668,3.1925820221200882,2.835508055964335,60.245032566736306,49.94152971516025,8.155745107381295
|
||||||
|
178947368.42105263,35.859508575164696,28.185993263265768,14.05515370408584,36.75653987842253,78.38034054975431,2.9439998690111153,5.779282974074093,69.12962228769742,47.880713822108554,31.729023074839592
|
||||||
|
189473684.21052632,63.92569595822997,40.39670869187965,17.14929752402689,37.35419879395546,80.63813571454415,2.2480240449350632,7.785219319309227,59.180295077717545,71.6997069572808,46.92400834038648
|
||||||
|
200000000.0,76.36808375308388,45.91506902795089,36.48639729148463,69.086468144467,88.02534053200169,5.3373210806595655,24.41777645914178,109.24024807779412,60.212072121469596,70.15842924619976
|
||||||
|
21
results/AcrobotSwingupSparse_medium_no_gauss_data.csv
Normal file
21
results/AcrobotSwingupSparse_medium_no_gauss_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19,trial_20,trial_21,trial_22,trial_23
|
||||||
|
0.0,0.1767578125,0.2744140625,0.361328125,0.455078125,0.30078125,0.5244140625,0.0634765625,0.0986328125,0.2626953125,0.6044921875,0.5205078125,0.1982421875,0.302734375,0.2197265625,0.232421875,0.25390625,0.08984375,0.564453125,0.982421875,0.45703125,0.0859375,0.2568359375,0.24609375,0.2705078125
|
||||||
|
10526315.789473685,0.3161338452156892,0.2847689763330687,0.41598213544512724,0.4476580263174802,0.2959083493726736,0.5056423203106402,0.07416371955765912,0.1159648340494679,0.2553305876882453,0.5797400672680123,0.523553375392079,0.22980529383609172,0.3064444243412599,0.22947236375465288,0.2568417520073973,0.2724564967062995,0.10352109607897307,0.5620720485571018,0.9470379715811182,0.47818406863225793,0.09684615254071942,0.2590508923306029,0.25152038933497717,0.28523726212350947
|
||||||
|
21052631.57894737,2.3593359918145262,0.4637868622333389,1.2159980393512757,0.33341081254700217,0.22503254222077346,0.2134047162169565,0.2521746204830603,0.44704550323063647,0.2283878009405163,0.1918902621705116,0.6692403122328656,0.7345052280584531,0.42730335647709816,0.38982959863551764,0.7089355024934806,0.6550341788421378,0.3177649585163824,0.5513660980393682,0.44666438749952664,1.1429404948226634,0.40457273784436687,0.4705476258930408,0.35154267625465285,0.6444543590175809
|
||||||
|
31578947.368421055,0.22349745679099808,0.553646682041834,0.3217710140999664,0.3529451296270058,0.29970385163114344,0.3815795787483702,0.2369618455458876,0.6780884787646686,0.8566428630966227,0.543034402947677,1.3416738113868267,0.581175690542628,0.8065876313523901,0.4206632762074141,0.9520944135671175,1.0611303255498572,0.25390249606314796,0.7592364942598211,0.9634539083760858,3.095153755758607,1.2809645687113838,1.562169082937478,0.41809932148687723,1.428964205395812
|
||||||
|
42105263.15789474,0.3400461085945616,0.6973852543289312,0.24562445770010072,0.22612363017496978,0.2237821774469518,0.5092645365115348,0.2639371391148449,0.20581405512844098,0.41723831472634615,1.1612377219583188,0.8915075476479992,0.5738048077950517,0.32716620067480195,0.6656943239333557,0.4755886849273936,0.22779098045792937,0.5659468695727743,0.6199544763961327,0.15692577203555122,0.64864770989669,0.6946968860573386,0.24671858895848664,0.31173296053984156,0.7616035376889553
|
||||||
|
52631578.94736842,0.34759265762286834,0.7349188506107911,1.2518260591248065,0.5857084728674216,0.0946424832964868,0.6381748601009971,0.6700890268975678,0.2879895424248439,0.8357190715969435,0.5797622950123288,0.6468203959372565,0.5403087674085453,0.16724685288532287,0.6448498036392508,0.8059320502664243,0.2059928122649893,0.9467012875628273,0.5498382008306868,0.4216829204823502,0.46055807763519707,0.8949554950576739,0.36914234214212094,0.1877290271325785,0.3795686996544497
|
||||||
|
63157894.73684211,0.14055596031970932,1.239804386762371,1.8848991446878096,0.3660995029016214,0.18033078899013694,0.30616618127373774,0.5987889984638074,0.24579227267870285,0.6580916904016214,0.5281240880654456,0.7599006135047635,1.0030780697133077,0.5292606617935478,0.4670218047673024,1.1296384499674033,0.2237377695099471,0.8308912895392842,0.6719812831720154,0.6246031341130052,0.28494397953276485,1.7024170883474585,0.7975890933641772,0.31464213743764596,0.5454027963146938
|
||||||
|
73684210.5263158,0.17908692558056094,1.5906446462192696,0.25824182515659494,0.26297524893382906,0.15246065567735162,0.22135222849753425,0.29981972702322246,0.3909305984623876,0.6402431974120417,0.2606878439145075,0.4174307376724201,1.2653320370618655,0.6668578383004565,0.5656979949190345,1.3024991661557859,0.3686637772747683,0.6377137625316504,0.34438747448273965,0.589105806852642,0.36790156958836284,1.5427130781052185,1.1799534034200652,0.17249402445108936,0.7457875360082065
|
||||||
|
84210526.31578948,0.1785622594098966,2.000679206319793,0.33290732005956764,0.4312865450111454,0.31724541999626693,0.2608143180361085,0.2516899214557003,0.6615488152754935,1.0253917028369006,0.4486694758618638,0.4950180687732644,0.8128370036709011,0.5347344564929233,0.3062953790469182,1.4841423140338257,0.2337621334847321,0.9855464607724855,0.42614906712582246,0.8344345304114006,0.39728419338236876,1.5910014736355176,0.6937756736522895,0.30245684256514027,1.3257811908246409
|
||||||
|
94736842.10526316,0.2426800503294884,1.6305499512733184,0.9309044465463908,0.4176713224923511,0.3230921119203858,0.3820492382525077,0.2750105395541627,0.6277132958916746,0.982829931370109,0.4397897852424769,0.531782884677031,1.1669645837799665,0.3888270306785351,0.3657532742148952,1.5892000224781828,0.6016985901174784,1.795510181099424,0.6981789623271065,0.8422033093312441,0.32207867313289906,1.0660824736069443,0.6939562192584009,0.6945496054567459,1.5778286741050658
|
||||||
|
105263157.89473684,0.21695786896174635,0.7520525396035319,0.8739833514776257,0.23599009104382626,0.25390796714212094,0.47114714625139315,0.3795112652131395,0.5205997667814556,1.1299017010633303,0.3142531704044078,0.33409953975941664,1.7502936260191688,0.5237998698226632,0.38864724325671424,0.7122168659833661,0.597978533800289,1.7381294335024509,0.6599918217540117,0.8682720628141366,0.3342020927703942,0.5555933056775882,0.9622271688360917,0.4551440262728451,1.5579835833604976
|
||||||
|
115789473.68421052,0.23677180282296895,0.43027985591307244,0.4394897054106905,0.26418800301168766,0.1852816108851552,1.4377817962308335,0.3322958034821825,0.4985233555209934,1.2618796964101184,0.9969702585912475,0.19800052484316838,1.917952703967319,0.609808055317633,0.5370772829346379,0.8167187170308712,0.40298944298910633,1.3323286061802069,0.5544953967065362,0.4841684964885343,0.5267688613849333,0.5713652904013847,0.89922502628654,0.5324566674694788,0.7688864921929105
|
||||||
|
126315789.47368422,0.3403701570885995,1.9111207343864995,0.3179922447310258,0.4330267126870621,0.48859509074456864,1.2343906075009996,0.24485398395570024,0.7900896997002689,1.3171893830444679,0.6710265098846508,0.5388116598789717,2.8476533017990677,0.3926546725539948,0.7917437038263127,0.662158543383315,0.33302488881795345,1.5650875799543644,0.3157950530752247,0.6348671952773335,1.7312065399254475,0.49525526686058136,1.5251203521136771,0.4066303813226329,0.9682653960758969
|
||||||
|
136842105.2631579,0.36841529360108094,2.5102352607283236,1.1373310723133045,0.38851120069086365,0.5037401046118906,2.843767728831962,0.32297353665254136,0.9927730348962166,0.9037765545197795,0.4196477530735683,0.4141540633014031,1.2554735093896054,0.4757968580293525,0.8338281652603783,0.6050493604918927,0.36583085205416277,0.8325516999263171,0.25484897951670304,0.6605646973501611,0.7907535811870694,0.646767426065461,1.651547294574431,0.21720431584070263,0.7042453850405361
|
||||||
|
147368421.0526316,0.5400346454821137,2.0474252568717803,1.050204549139556,0.24156025704254394,0.3457099460168558,3.3522912181315325,0.4894130157301632,1.168114728214338,1.3668157043879718,0.7254034502024138,0.5280151050176649,2.1767898464467073,0.9341403718139991,0.5955848218331374,0.7830968418279844,0.7020978954029878,1.5633690482691724,0.5015276013318855,0.770452462405049,0.3837822414831442,0.4623187321374949,1.2318267241079057,0.7627329945234056,0.6892473796728247
|
||||||
|
157894736.84210527,0.3625039314629297,3.268964722546185,0.3406298312454008,0.6786149571807104,0.2470730361515795,1.5275789931870551,0.4038299391474419,3.2265313563254416,1.6590628055984624,0.27857556963891494,0.7991118629223093,3.4037839329473867,0.6180901672701423,0.890510545873246,1.6308400135621475,0.4458302976021804,0.8548434249582045,0.8480832187092537,0.5299473046596028,0.28141200047120485,0.6440055773198772,1.5193822760331006,0.3097274574216379,0.4006165137251326
|
||||||
|
168421052.63157895,0.16110749389986573,3.249538854879025,0.30259560878257014,0.4984009549888546,0.33980684993669935,3.6939117769785548,0.3642068794229354,2.4833230734531893,1.6372509478201827,0.5516385741511212,0.2034369809475629,3.6158004295792936,0.751265084644434,0.7307435350074661,0.6482404460537132,0.3840608042032765,1.128266099417309,0.5245531245942259,0.9154858787304149,0.7620268000129851,0.47929424592332487,1.4681019030119242,0.35303710768427554,0.8184551545457499
|
||||||
|
178947368.42105263,0.4382342605379481,2.7228315253006783,0.6550478922032914,0.19586243721917057,0.38259203480221227,3.3852762106052725,0.3001752045015879,1.1059115507596082,1.3729982983702766,0.5097933000474755,0.3947206534176983,5.565063896601881,1.0247442927056734,0.2533329596479842,1.1610874456051645,0.3574311000158252,1.0589827170332382,0.8656945136115162,0.7067362264913204,0.6383843831408387,1.2826354959334696,0.6869669964438988,0.6948145261431666,0.9989773124208741
|
||||||
|
189473684.21052632,0.52915790800903,1.649729942681057,0.5648479303164495,0.3455014532622868,0.46547128883425215,5.4922756660017615,0.327526166498496,1.6249489242680515,1.438724015888415,0.4364567542670506,0.4130859375,9.49447930354491,0.17243187711509647,0.2770846675967906,1.4154466592043726,0.6171635075619346,0.5331689723640928,0.4021885943214649,0.7982105931416774,0.49493494852758174,0.560486373478686,1.4939863569518534,0.6872207504230193,1.0232785190571707
|
||||||
|
200000000.0,1.4102090534410978,6.100927001551578,0.6284475326538086,0.3231710634733501,0.26835541976125615,7.329010210539165,0.5117963991667095,0.8290902187949732,1.3261784001400596,0.27206892716257197,0.4130859375,16.774827806573164,0.5401770943089536,0.35032101681357936,4.510709561799702,0.19597048508493523,0.7176278264899003,0.5826465205142373,1.605031916969701,0.2057720987420333,0.6009777470638877,0.8068737732736688,0.7291332044099507,0.7881078218158922
|
||||||
|
21
results/AcrobotSwingupSparse_medium_no_norm_data.csv
Normal file
21
results/AcrobotSwingupSparse_medium_no_norm_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11
|
||||||
|
0.0,0.201171875,0.1474609375,0.2626953125,0.2509765625,0.3779296875,0.3515625,0.5283203125,0.1025390625,0.2001953125,0.1552734375,0.201171875,0.40625
|
||||||
|
10526315.789473685,0.20726300078415805,0.15554552263170068,0.2598158712202162,0.2521394137860665,0.38446380425027865,0.35028890097240334,0.5169132951223949,0.11560729600055727,0.20440372667814555,0.16485311714235765,0.21346487430984623,0.42801193121067377
|
||||||
|
21052631.57894737,0.3095904796737713,0.2963516837672183,0.20460471155901033,0.26616848340655297,0.48818041413114344,0.3360786702163992,0.3345685361825198,0.31905359706720154,0.3026547365901873,0.31956734485573385,0.40822294874534715,0.788488594118578
|
||||||
|
31578947.368421055,0.2770152607122616,0.37145197490575904,0.22537197366645806,0.223114280489343,0.4533717903073805,0.3713548929737545,0.3598521673778417,0.2385338548147778,0.5887232936320214,0.2654281124844115,0.3582784647426447,0.8210539012074141
|
||||||
|
42105263.15789474,0.10577160103499396,0.4357160636922989,0.5325967360731637,0.24195677950111455,0.5679898882836847,0.2910764699497381,0.2839206050967906,0.32787170832837387,1.038794879437814,0.18668477489017055,0.48278419727103533,1.4864833757817912
|
||||||
|
52631578.94736842,0.26562227634842045,0.28747375784158047,0.7030830198377783,0.38061430183474043,0.7938516410764235,0.3176791621707483,0.7576070981012487,0.15861090926912683,1.325336115512161,0.18147347310243223,0.4715337330614761,3.8417730727684463
|
||||||
|
63157894.73684211,0.4130244056934134,0.419478197177031,2.552789867749836,0.5317743465180543,0.7291756944312946,0.2742431566655801,1.2819059482902042,0.15480002405901033,0.9884446479607158,0.16506129024431657,0.8301297924855415,6.062050874873872
|
||||||
|
73684210.5263158,0.4416308363388781,0.9029495233974303,4.578024922315434,0.5052830651196085,1.9550710794337909,0.3313127633937508,1.843442779498748,0.18985237111015008,1.7661487547644623,0.35013475180332687,2.1102747428450233,9.047234659379871
|
||||||
|
84210526.31578948,0.703158856759111,2.1783637260796294,8.389273244588331,0.6207991412471867,5.681757200457714,0.36671339970216194,4.353583232847939,1.3504192954615548,2.7489937018819792,0.43980553539836176,3.964401245117188,12.114645263164658
|
||||||
|
94736842.10526316,0.7625770331089516,3.050972027131395,15.596520246891433,1.548264912951356,10.712566909367357,0.7441789701044395,5.035858796243852,3.253250740241476,3.1388800956535867,0.5963977142714397,6.16468988561234,12.802119413571345
|
||||||
|
105263157.89473684,0.7522874287951355,3.85286762311518,16.892098931394457,5.454146150076488,12.15275656681642,3.5993557768845488,5.601042216504379,4.577857131112646,4.791842325902712,0.8901967289044916,10.639487623177736,16.14216986785635
|
||||||
|
115789473.68421052,1.770855924759545,4.962410142216986,30.214912644383645,11.192647178417426,19.69669155797139,7.527202888869182,20.23894223876276,5.842226638688275,9.328721656693645,1.4675567975665063,16.8366599439584,20.60467112427603
|
||||||
|
126315789.47368422,6.458008087242747,6.281620659656474,41.81310676933987,13.325664308923104,26.887397945752767,12.208369627553676,35.33012637405185,10.183598441760633,12.488072213043468,2.5151763134055534,25.50017838200705,54.93078991580874
|
||||||
|
136842105.2631579,10.757425239541858,21.18903042801201,65.53680683305062,16.855921145621434,53.91626050003351,14.447411331113356,48.61652792193554,10.42493467780031,26.932806942271412,2.0457209114222636,36.71975068966769,44.305423652036005
|
||||||
|
147368421.0526316,17.106903144857565,19.493297022135295,74.71719628754084,22.694302107158464,60.21679335617953,32.81872818542651,73.32166379574595,18.40202304597047,74.92617977491051,3.3872946107816846,48.90670771612025,83.19890364649558
|
||||||
|
157894736.84210527,24.359525931508923,36.21356267744156,74.32568160979041,16.51164893628487,84.45178987180759,33.07684492074221,88.7824796349058,23.264234556055467,91.39014486328718,3.3141351631143414,42.45797127435742,115.99503649502913
|
||||||
|
168421052.63157895,48.62123260075367,46.22311392913565,64.09681967949273,33.462527626439154,128.38521421649122,45.96464652698126,102.65875184965266,42.08960518876602,111.09692758998713,3.5145066279783803,74.90180981918718,120.84549241845298
|
||||||
|
178947368.42105263,44.80953861933996,46.58364661314481,70.36745623802544,79.8354273661352,106.02552816808388,70.05180360735949,127.37867529438475,53.92728262660907,138.49816179407603,6.6758360691017735,74.96363218999636,130.2065561484762
|
||||||
|
189473684.21052632,80.78561290413388,50.879144642161535,65.52772759202445,113.54444279234825,128.26982405707446,69.0160346150068,117.7153404320376,49.49558243262801,153.671208717156,7.646348741906503,118.21148424861833,163.8649747431113
|
||||||
|
200000000.0,75.92054613013016,66.98146318134509,109.06625958492882,101.48150519320839,126.20168585526316,83.08265148965936,115.53981620387027,91.26483646192048,162.5011793939691,25.507937581915606,111.37292104018361,154.62860072286506
|
||||||
|
21
results/AcrobotSwingupSparse_no_gauss_small_data.csv
Normal file
21
results/AcrobotSwingupSparse_no_gauss_small_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9
|
||||||
|
0.0,0.1015625,0.3388671875,0.4326171875,0.2109375,0.955078125,0.4345703125,0.3984375,0.388671875,0.525390625,0.2646484375
|
||||||
|
2631578.947368421,0.11994662509400429,0.34396158361038676,0.5194434168596347,0.22716204413416644,0.9299383876726568,0.42554437156529307,0.38907931584070266,0.3846849563049147,0.5970444137700047,0.2624334826693971
|
||||||
|
5263157.894736842,0.4377420060852558,0.4179684065715758,1.7919866926452128,0.4814548809442494,0.5468257840650564,0.28768362430984623,0.24026479747486906,0.45491134030667046,1.7831928696989023,0.28062756504048275
|
||||||
|
7894736.842105264,0.5687934843787197,0.31969791634261113,0.40986809505980426,0.41984326687546,0.7867357671425943,0.375482981885239,0.4064096440238637,1.3354080020555825,1.5299849153555658,0.6551206382688062
|
||||||
|
10526315.789473685,0.8009036267563246,0.2659476747803411,0.25698804987434537,0.6692156804895797,0.5164976357753257,0.3327459826693971,1.007351059002229,0.4339263366530146,0.4346078994201491,0.3746171671267692
|
||||||
|
13157894.736842105,0.8069123429274625,0.34273891765985465,0.3983879406366322,0.6205434455765911,0.17684360884563413,0.43400496855336873,0.9540840952019942,0.5550173630014351,0.24008125611619607,0.37602783900548875
|
||||||
|
15789473.684210528,0.4758098369820297,0.3824696580458876,0.46029318072459047,0.8411207595360245,0.36577344138866674,0.5807384903080903,0.3193041413114341,0.6842254099753423,0.4427745269606318,0.8528741007036126
|
||||||
|
18421052.63157895,0.7026946683339466,0.3174371560854925,0.3322486243419699,0.5016936516167382,0.6090902555682322,0.332806484190711,0.43623326954088737,0.42868767947041087,0.3677255123275799,1.4322281729151336
|
||||||
|
21052631.57894737,0.6812232485108097,0.3168407028071438,0.3072702719564254,0.21290328430006714,0.5520422135033436,0.47707045705694906,0.622971997036498,0.6103017072598361,0.35348588938197933,1.0626288543447564
|
||||||
|
23684210.52631579,0.4968484735885155,0.42882340238365113,0.4110288857753257,0.45058739416487004,0.6469827398368857,0.5656887699032094,0.5646780547673023,0.6071271711439306,0.432392624936936,0.9504458567442327
|
||||||
|
26315789.47368421,0.6087043793908117,0.4738238855081912,0.25644147561197467,0.8457718688034945,0.36560793530577773,0.8602326675795451,0.5361789375791259,0.20382913393987512,0.35720617024852297,1.0799222824646164
|
||||||
|
28947368.42105263,1.007922951864734,0.41575720567782504,0.25065560486177985,0.6191521826873527,0.4847934305502766,0.7377698348829953,0.7388849390510706,0.2613284092530649,0.30422093135168016,1.4377591299548373
|
||||||
|
31578947.368421055,1.0422088749851213,0.6821434781822147,0.21164026022617827,0.5682954867460724,0.5795411191818786,0.49586234264426615,0.5351438865767285,0.42828197954764347,1.031030660190742,0.9182899559633872
|
||||||
|
34210526.315789476,0.3950715712232926,0.776914424843405,0.17613844462048645,0.6794362688989191,0.6833469834684337,0.6096252245916225,0.24337699604826937,0.3430759213307556,0.717002892428157,0.9107397718772998
|
||||||
|
36842105.2631579,0.41892244545046337,0.6398056537490802,0.16995195404644486,0.4791332519615784,0.49253201154460513,0.6532902334535551,0.5630349927992044,0.19508791366112188,1.0247433099720291,0.7518741258954075
|
||||||
|
39473684.21052632,0.5524397424713727,0.8534278156354489,0.2461918622172771,0.47644834175004197,0.36392350870486434,0.7182536957336595,0.25672752242999697,0.44319192722563616,0.654069150253676,0.8105308289673191
|
||||||
|
42105263.15789474,1.3406638782110245,0.5544953491549081,0.42190031860013427,0.5385104784344703,0.3862063759251645,0.8659334050651403,0.7730434766436551,0.18643315280903727,1.3308274963886126,1.37788670082832
|
||||||
|
44736842.10526316,1.1469286921281894,0.4291610929114006,0.2925664130340322,0.9289598359295537,2.1716349725908195,0.5767662439319895,0.9920969432080552,0.3802574744184923,1.1568963322943266,0.7181633397152547
|
||||||
|
47368421.05263158,0.6630999229621358,0.4158324983971932,0.23266152463791448,0.39704905586559686,0.7588860731045625,0.7202478181622365,0.7219760133949343,0.26065298064593795,0.7539636606654962,0.5497162150544144
|
||||||
|
50000000.0,0.4712636847245066,0.9249690206427323,0.46508573230944183,0.45525319952713816,0.35294959419652033,0.6721355036685341,0.5371083209389135,0.29072334891871404,0.6535622948094418,0.9252316324334395
|
||||||
|
21
results/AcrobotSwingupSparse_no_norm_small_data.csv
Normal file
21
results/AcrobotSwingupSparse_no_norm_small_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11
|
||||||
|
0.0,0.248046875,0.0625,0.125,0.2177734375,0.1640625,0.263671875,0.142578125,0.1875,0.478515625,0.2314453125,0.40234375,0.2861328125
|
||||||
|
2631578.947368421,0.25125855950437426,0.07003084642404996,0.14266426477405836,0.21860404556147608,0.17630012543908116,0.264447109190711,0.15265616947924332,0.1891612161229522,0.47076328309288973,0.23604134377350108,0.4022883761292349,0.2895106186166695
|
||||||
|
5263157.894736842,0.3036907703262287,0.1955339954830603,0.417585573698345,0.2400634691655801,0.3735701912327817,0.27346757904644486,0.33570643607269035,0.21690762274153014,0.3714772697300792,0.3006499916562743,0.37301576566828254,0.3378400353513596
|
||||||
|
7894736.842105264,0.29091478913114344,0.21520675912788373,0.25125787264752586,0.284687620781135,0.331029736104104,0.28718628975823324,0.41236470346635723,0.2380402742000168,0.5673011135196421,0.2003662183344199,0.19073800961396709,0.27287863364180037
|
||||||
|
10526315.789473685,0.23802864518522224,0.3445970212983953,0.18891032845029543,0.20229917616064857,0.39216624336559686,0.45654655097264,0.23332905439128507,0.29350094782018266,0.34632590338794145,0.18567847677214985,0.46343685583394656,0.2711425633311602
|
||||||
|
13157894.736842105,0.14090186935382537,0.41421117809010344,0.27307483430054047,0.15208636096309758,0.4189179571051347,0.4396387507050321,0.32275304767893953,0.14495407379234926,0.20444918867623707,0.3202841380956761,1.1782266532285062,0.41820837586210047
|
||||||
|
15789473.684210528,0.20871469386726876,0.28198661698528943,0.22730662485899353,0.23915152932798456,0.2703475053951021,0.6747909979146601,0.2827912433655969,0.165482740322969,0.22189092768196253,0.4306025306934134,2.911001649259531,0.5465694015376124
|
||||||
|
18421052.63157895,0.31909871563686887,0.3659586761136464,0.24902584149896942,0.37836342967448133,0.4092480640992564,0.46554378657459883,0.31598273935080234,0.2607237374683497,0.19426996489971296,0.4440918946200131,6.176969705195971,0.5010172640517809
|
||||||
|
21052631.57894737,0.3532961900874849,0.3432970971612059,0.3083503702010474,0.25003248304541426,0.3858074494676246,0.45309389066828254,0.26247313016009133,0.5184596899143548,0.2748201534028199,0.4376770399944273,9.152308519527192,0.9481273661690076
|
||||||
|
23684210.52631579,0.4062441854926027,0.32371457065571707,0.2666073770073973,0.3100225284819458,0.16453488315571707,0.48871638966399217,0.2891547977098798,0.6601589736515795,0.23289970545887614,0.22545330544257758,11.488105253499631,2.6669291763094325
|
||||||
|
26315789.47368421,0.6048617217679433,0.7771274534949305,0.2667820367786693,2.0100523243320283,0.2862582801121424,0.6492459040929737,0.4212433400246575,0.39811924223754547,0.1760765931612897,0.26034227830881557,13.632602242551682,3.112846997966397
|
||||||
|
28947368.42105263,0.8280159694005907,3.2479128586618518,0.2203951463144572,3.6560692298445345,0.45045373182217496,0.4112739351647714,0.3031011142889219,0.363053276928508,0.5425097367770122,0.2138534979146604,15.559125250396304,4.053317080574352
|
||||||
|
31578947.368421055,1.134784466011703,7.669549429515728,0.32472602020010083,8.23079994180527,0.32095166784904644,0.8598584204169202,0.3451743614640595,0.342951851208124,0.3340930146193564,0.33820888276245487,21.842560554145123,6.406826156658481
|
||||||
|
34210526.315789476,1.8827517501535187,8.867948677401133,0.441584344055514,10.450260965447677,0.39772757532854175,2.7829278671180133,0.4073773011606486,0.23049164082535079,1.1378827742262243,0.6605059605225965,30.34590503713762,8.07602402642163
|
||||||
|
36842105.2631579,1.2499827810271618,13.4577449788017,0.5287855312104371,13.08799321499558,0.3087301122184604,4.908896897968495,0.404296875,0.09344440153761242,0.9425163797394385,0.3322899414231568,36.82283538860628,10.160734514780653
|
||||||
|
39473684.21052632,2.4262336799642727,13.912452441503467,0.44871729430729657,25.01421294912408,0.1585216786392507,7.548204992616608,1.8462324697225059,0.16994034880746442,1.0017884008772158,0.6442124004839533,38.800063912558095,26.62291460750507
|
||||||
|
42105263.15789474,1.8957548273567344,12.724887795065248,0.37405179940432387,32.843212180520695,0.4806609008450919,10.410637895156142,1.7354708008488788,0.4507272144433866,0.8014381810238486,0.7009397807874178,46.07278522692229,23.83986615210028
|
||||||
|
44736842.10526316,4.238514731135066,16.38166935622197,0.595972166827511,23.140149697702675,0.22474605687107063,14.50005219055345,1.1324873498932475,0.24690689076347025,1.1698891198536037,0.69168790357595,42.606099791804176,33.06238525527996
|
||||||
|
47368421.05263158,8.035884746223937,23.544428865004775,0.5621408161364104,32.238068741774626,0.1405993855230696,28.6749156835667,1.1710396077163991,0.931688763098043,1.1358143180361084,0.5324972739180039,81.18519864386138,33.74080153383377
|
||||||
|
50000000.0,12.403944969177246,28.882382643850224,0.9095641186362818,50.32430417914139,0.27952896921258225,28.698694680866442,1.2693656620226408,0.534015555130808,1.2294362218756425,0.47636112413908305,97.38083723971718,41.65443470603541
|
||||||
|
21
results/AcrobotSwingupSparse_ppo.csv
Normal file
21
results/AcrobotSwingupSparse_ppo.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19,trial_20,trial_21,trial_22,trial_23,trial_24,trial_25,trial_26,trial_27,trial_28,trial_29,trial_30,trial_31,trial_32,trial_33,trial_34,trial_35,trial_36,trial_37,trial_38,trial_39
|
||||||
|
0.0,0.494140625,0.9501953125,0.72802734375,0.23046875,0.484375,0.24462890625,0.5361328125,0.7392578125,0.220703125,0.208984375,0.43701171875,0.54833984375,0.31982421875,1.18408203125,0.13232421875,0.70751953125,0.2265625,0.0966796875,0.25537109375,0.5419921875,0.55859375,0.251953125,0.142578125,0.20703125,1.1865234375,0.564453125,0.24951171875,0.6279296875,0.27783203125,0.1484375,0.18310546875,0.22607421875,0.501953125,0.8427734375,0.33984375,0.28662109375,0.583984375,0.75830078125,0.23876953125,1.1474609375
|
||||||
|
10526315.789473685,0.49652170144289814,0.9977614674871979,0.8302475091823251,0.2288075338770478,0.5258777161384224,0.26179480618717266,0.5844465147425264,0.8223186186476098,0.21613478066188146,0.20795995839084613,0.44014034244822664,0.5774664997724285,0.3131793542581912,1.2775808120368262,0.14068567323552605,0.7143582042894865,0.22722698644918088,0.10673004504386079,0.2541805555285509,0.5312496565715759,0.5797465686322579,0.24948898775095424,0.15077345787323082,0.20888627467062995,1.2291059441183412,0.5879870200751561,0.25258496857746154,0.6318335453889377,0.28727327621544496,0.15109544579672352,0.18903047292186284,0.23094711937732643,0.5055524265997298,0.8665842019289814,0.33557996195108936,0.28424001730710186,0.6033929167031582,0.8217869240821564,0.23627770706557172,1.248075260680138
|
||||||
|
21052631.57894737,0.6849224032457516,2.0354762249045755,2.899330202562327,0.2083704781994595,1.5123926807308463,0.5181258774860413,1.8486727030323484,2.3948645472856773,0.15702574933334731,0.19307754970983784,0.48653936518196256,1.4385540399524976,0.2176984115980999,2.9038863670792936,0.2726973007920706,1.1934817993079527,0.28839460304239123,0.26190965129397914,0.23969089159344703,0.38155497078089834,1.128383987828305,0.23446643913881932,0.2949170907778753,0.2448958637311518,2.122116485130754,1.428297410050918,0.289313731101081,0.7537016828964952,0.43387027592540117,0.19929941869508527,0.27259133727266516,0.3060867145781372,0.5700257187735011,1.4916662681135775,0.2568407217221247,0.24473965399153014,1.2262851630551665,2.124011169179985,0.19919875454044078,3.114077705425569
|
||||||
|
31578947.368421055,2.24118826594049,4.627148058936207,6.956321308157122,0.2571729649467151,4.4241064512828725,0.30242520620287955,6.9978975642090715,4.626302251525204,0.2688787241061308,0.2056417755803243,0.4190427259725216,6.324368927287266,0.3265617124922057,4.997290672027506,0.2129659943303243,4.843616811852708,0.9436446583502185,0.18441478961722674,0.3087431268348589,0.8372309584366652,3.5895586449683883,0.45294930994345545,0.35903028107746154,0.27153427779179196,4.1995158816308535,5.935984034287304,0.26729409120089476,1.6712633949237525,0.3758061718082164,0.2163484129549063,0.1468012352729438,0.2813614345983786,0.7173415722939448,4.086182188789602,0.2085921453967319,0.22982391558195414,4.048961675068019,5.133871602879999,0.23165416057090019,4.987565718199078
|
||||||
|
42105263.15789474,5.191192600535555,10.389190272281045,15.12841030237087,0.22302882757213308,10.069237051247892,0.4511746224273936,16.082432427234597,10.240132302788817,0.17627978655109774,0.20904556337816235,0.33917533789975496,16.848162975998136,0.2657049818382369,14.023747753238414,0.17384264938058613,10.538108941920907,2.820839836987102,0.2887914451535719,0.423933229948345,4.502140309341728,10.21979456132799,0.49398851262565463,0.3515416459693803,0.1941140985885155,9.77537937639823,12.380305530622065,1.0742304265664226,5.395573312225765,0.5510600082101584,0.17625535029783806,0.14290148663718946,0.5723319648045253,2.1806129286493956,9.85613180461683,0.234670834528112,0.14906660803797503,7.448105281079576,12.058902481586319,0.20896882033414127,7.8542944361298375
|
||||||
|
52631578.94736842,10.83784787608646,20.385555940982048,23.572254878332078,0.22182034257376293,19.97872336617467,1.615725350842251,23.36556761416702,18.36757633758714,0.19532001976161123,0.2862509082889293,1.101568066182229,22.791972205249227,0.22986545192898145,20.553699007324894,0.1492667462356863,18.53281970697757,5.4521982636808355,0.32914804289545707,0.4060274861195741,15.137717310411448,19.36185924630416,0.699090618175813,0.2563520970437005,0.20477511413870095,19.20060614667771,18.830435314336974,5.432178082558586,11.44808975018953,1.4109826840852435,0.2726144117001351,0.2742455487766424,2.2851454742727517,6.6287669871322334,17.796421831664617,0.15306242020836827,0.13872229293442828,12.601487184825697,18.7865858936574,0.19123859484770292,13.378934909101998
|
||||||
|
63157894.73684211,17.681571799302038,24.965509924531975,25.528548031962814,0.18676971266474418,23.436591069123754,5.921458986657481,30.736879105713232,22.626395431581958,0.2350659832729858,0.22715007169094764,7.6920882301647655,30.14843616855442,0.40067605628861636,26.028098610959887,0.20446781042209947,25.811571707685903,8.995880586618865,0.34856634853289087,0.6106412628681047,21.48754160820282,25.737043610570176,2.587658182075481,0.21728037202787534,0.2831463774155381,22.96093462312651,23.087736861527464,14.609693767621579,16.40369266023927,5.065548767343456,0.24314816440571704,0.2669131234081828,7.416600829676582,14.068949282004239,23.940860328251638,0.28999975843772996,0.14751135543442828,17.590556440591154,24.487356196480118,0.2859974567909981,18.806900661077528
|
||||||
|
73684210.5263158,22.41108472855798,30.021490996564197,30.333554796234726,0.19962260861806264,29.265969341151273,12.091581846538345,35.01257228455055,27.039359401797988,0.37813803231617094,0.15081874815711024,19.19548369114419,34.45088687695955,1.1774729718131707,31.30721900205533,0.1648690152366406,34.464571133875125,15.36987778222462,1.1226384870893744,1.3629475564507572,26.888942303749996,30.506537101935812,5.227423442037482,0.35820362244286374,0.31114694616470984,29.957185218208718,28.480166279378032,18.83023749163937,21.563904739813132,10.990942068707582,0.21749981882829747,0.23049882904644486,14.673485847061032,22.614783775773404,29.367993566137933,0.310452528607482,0.1395523858532681,21.66815598717687,30.919340112532936,0.26071261170828436,22.64625679422944
|
||||||
|
84210526.31578948,27.914187317739895,38.72586870061394,34.37427745829659,0.25018611707185445,34.87612768157367,16.360396504071943,35.12214121726081,31.569806550678454,0.7241392479048543,0.16149017883469854,23.976321466081362,37.202182146320716,3.1595932809930103,36.29751395650848,0.28417679213420843,40.665853104102645,20.017524243722,3.322405807199241,5.020510792402021,33.043249019295224,33.58857228287039,6.7285567413076475,0.4223994944564523,0.4629706068382369,36.326130462815556,32.31678976973008,24.615091128362515,26.16703098962842,16.228713851886443,0.2158493612611723,0.2527393013486572,20.926730813742346,27.030312429834932,34.8623172833979,0.23361429018987512,0.22156211874161405,26.893434751727245,34.799212468958295,0.21807697539184234,30.454888457406593
|
||||||
|
94736842.10526316,35.796448780228886,46.719436846281354,35.544743354300714,0.21197592584710373,38.41740090166763,18.951874665606386,36.41043453533564,38.42468954387464,2.397318435838018,0.20849763917790887,30.508521716680555,39.469043739614726,5.325915806841652,41.73014003087939,0.4447336500701482,41.8525629479469,24.35030221939087,6.963049008905723,12.455735194716098,36.966595233642494,39.92939883916332,8.246406448184619,0.2892166610569835,0.4603340302147694,38.96102768562507,35.913605004466476,30.72795726321741,31.332497328602376,22.743064679597556,0.2022651714631395,0.27341442557253004,26.362160670790317,35.19011972749662,41.105935195806616,0.2338622824967403,0.3123530152762035,33.57906621578988,40.10294982667115,0.29732876140985465,38.12677005915761
|
||||||
|
105263157.89473684,42.99383903672491,48.78390027149232,36.582929270419385,0.21699940530877365,39.39383727097445,26.90067602458753,44.39998707810928,50.317437911628026,4.624118086373707,0.24574612382376293,34.63773770741809,46.11670608996024,6.189395542620292,46.03257439737505,0.7025569324017892,45.70444963513319,31.311860348709402,9.772873133503499,17.79793688597111,40.246998491049474,45.88934720322035,8.619014446755195,0.27764983982920977,0.45255536319806633,41.12428070765783,38.60599930728902,34.659614916983735,35.5420840836628,27.53532337283824,0.24175493604918927,0.23388586017893953,32.44746919351932,39.44407709813844,47.280881060127406,0.2550287669385239,0.33040183006561363,41.19357739128895,40.48108042508281,0.41852062362713166,46.02484375750259
|
||||||
|
115789473.68421052,46.656092494478514,49.3003974172217,40.88618461180922,0.25334020730861334,41.910199469146306,34.03389628243909,49.00269958253052,53.758933121477796,2.3499170736592903,0.3511508019677159,40.18175835094293,50.15375549971562,5.919387569057644,51.30893965507148,1.5903333402406472,51.3686523807346,35.98736973152266,14.98820638656616,19.71185778911094,43.69592754068137,51.63654300975007,8.098332146198135,0.22332978975079398,0.42853712441188146,43.709000233467926,40.5388273638041,36.95644398079024,41.39879911428013,32.6011972374533,0.41385128359385137,0.23226139618088992,39.2308258759348,42.3913648927641,49.19575073580332,0.2872866104844534,0.4159165062732644,50.36735196126795,44.85200709319181,0.3679334899395127,55.25720063735243
|
||||||
|
126315789.47368422,50.7570367110403,52.976161645059776,47.366475002257125,0.31013072428610855,44.14990248640488,34.19732289301061,49.38634661095955,58.1743103460592,0.4272218054351382,0.37309574750651936,45.080737893270985,55.0537393258219,5.963009271595287,54.12758286705968,3.548206762593871,56.25744196714788,40.83990379185558,21.697760658581178,22.474877867342038,45.57133719531453,55.972243332796815,8.525807214245573,0.2298403288188734,0.677621614239553,47.102193710876634,46.08562893748614,41.119568093001355,43.526330535761865,37.3657715181895,0.4213284257376292,0.3566150401107494,42.9950479945978,46.90616961133117,54.337957281815385,0.24798961227290178,1.2862201014384023,56.409569034946266,46.74809531425835,0.20578515430566663,63.949041636036384
|
||||||
|
136842105.2631579,53.9350477414118,60.46156376946997,50.39507691285617,0.23333228129759376,47.89338325067241,36.87274535805235,52.60365415610105,68.70991566967106,0.4601527092529467,0.18887406769221488,49.668169630531466,57.84740869622482,5.950966425549621,60.1147542858388,5.4075075358234965,59.828689462920636,43.388983726501465,27.622071433925896,26.959531791982894,50.158386805050924,60.51397173820771,8.5146870190417,0.16035500259610744,1.9383167652542255,48.087698569258166,51.13319751364373,43.27336407896554,47.40165236650082,40.66711062803824,0.22400793012159334,0.185431479086836,47.22096720692854,51.11769243884945,62.04953011251223,0.25980716134702736,3.4394111752179874,61.05483244396643,55.152264300475835,0.2738663219018658,70.90816192283525
|
||||||
|
147368421.0526316,55.426549438624505,67.26765538582842,54.96629402445954,0.20269226433497717,47.025430993690385,48.85590636499042,52.11518048051322,73.96457697744185,0.30798375507470954,0.19287811025688198,53.99958121017076,62.600232671172336,6.394833828934012,58.921657126365936,5.569509199781761,60.1442669686188,46.26557719476335,32.55457249110425,31.540916662136937,54.30574820246393,63.68158021329843,8.601941087569557,0.2437566139030986,4.160913982549864,49.16665086191447,55.80555607011114,42.6528623348458,52.275333370198176,50.99232035660678,0.2255374084219048,0.1864807229293021,50.14173426060135,54.075917346986046,71.33212466517314,0.2398154768587149,4.5656570688179,64.05983274729299,60.19737185005336,0.17605878243485962,59.13145895876051
|
||||||
|
157894736.84210527,64.25255515502761,74.46061060105005,63.49411354038524,0.09404599435441703,51.2797619484138,53.28576902273289,55.91283242920429,74.41781764413511,0.3046184603197093,0.18032839687907462,56.76242933907337,68.94851316837723,6.424376209026558,66.99848009542745,5.472073794071694,68.08767717498822,52.67536304796172,36.49175333910702,35.18614392795722,56.681206433726814,63.8464490911637,8.304588321833728,0.2629819220122868,7.969707409760961,56.353007364140986,59.631344845420436,43.98708957582299,50.09347830981098,55.1231301854522,0.1679714736515795,0.2552794947848756,51.8840199850933,57.710881399646034,76.3658868493796,0.2791322988156137,4.517977678874853,67.03021792889962,57.27909845790704,0.19710634818037465,78.43667520776681
|
||||||
|
168421052.63157895,66.25667553000834,77.70913510309362,76.04937911099675,0.07780796801284409,51.94030814553892,61.43769618744996,58.442686783640006,81.16076535465315,0.2247877887081241,0.21604813763309386,51.86910890278063,66.78434825273762,5.698441016707063,61.01313853594074,5.6116096306375525,75.84686744246126,53.525400684811075,37.03903276438198,34.26640053535102,57.08660542073342,68.74459581031694,8.277319657175164,0.24926971266474418,18.56597621421075,61.868744424835796,62.780055756714205,49.807558033274816,50.50667508619314,55.40086335158414,0.22349404628257014,0.12058579294305091,50.24718676115337,60.91642773910903,82.71585998112475,0.24551351446854439,5.083082246648308,69.59498067839985,62.904787628934656,0.1977284397114677,79.53383008793121
|
||||||
|
178947368.42105263,65.73572593382521,83.04939186143743,82.21289736015976,0.14307855040743084,58.02044856977595,71.76870822642319,58.05174952266619,81.45012706006332,0.2177030241060125,0.19056213595530333,54.24838673441034,70.10384824335411,5.556786910979041,67.51318451572324,5.438664361050255,74.89859534894991,55.49365537648716,43.63048864269521,37.042173616773866,61.76772238871398,76.03766362092502,8.568345916568408,0.24663484129549063,21.676341284014843,60.74922025897166,66.91877703785566,52.28359889059516,55.67508761704464,63.00039149387391,0.12880232327532568,0.2288039635423148,51.25826370418898,61.06184660364716,83.92521700766608,0.22897331206091884,6.270845864948473,70.55238805781441,68.32308550092323,0.22078191672665923,85.00873951238279
|
||||||
|
189473684.21052632,70.03839490156095,84.11796303270927,78.2207005387198,0.25439144039418227,59.15385215632473,67.67184759441174,63.57085884971302,68.03014485789798,0.17987481907133912,0.18274555047793403,53.248326032115486,71.29009116323371,5.828619095757397,68.09218141444832,5.7173553741539616,73.84181186961335,55.026133030075115,46.10458964191976,37.11788458309015,65.59806084831006,79.93420678294596,8.045755721855691,0.1616916788581996,23.026484946464898,67.00361081173546,69.48359441625114,52.17803209310093,56.08022738956018,67.23691428863441,0.1402910634091026,0.15547510923771315,56.131559609706386,65.1895280399481,92.5978728714412,0.21482203739831982,7.0279121795189345,74.47953558100228,71.90119198616853,0.2425749453811434,96.88982743239468
|
||||||
|
200000000.0,74.1640023181313,88.20140344218204,80.21218297356053,0.16139447061639084,61.21819438432392,72.84553038446526,64.21980923100521,80.9699421179922,0.2037934002123381,0.2522574976870888,56.771503498679714,73.2979884398611,5.8505777308815405,60.75899395189787,5.52407149264687,77.37758588790894,60.568656620226406,45.00232500779001,38.86378034792448,64.92305840943989,80.85791045741031,8.019417311015882,0.16745637592516446,24.14631193562558,66.12484121322632,74.87561394038953,53.085997129741465,56.55158790789152,68.79039694133557,0.23664617538452148,0.16919381994950144,60.330911761836,67.12918286574514,90.9565927354913,0.16649843517102694,8.009520580894069,80.6888095203199,77.13992292002628,0.2391904780739232,98.43607809669093
|
||||||
|
21
results/AcrobotSwingupSparse_ppo_3.csv
Normal file
21
results/AcrobotSwingupSparse_ppo_3.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19,trial_20,trial_21,trial_22,trial_23,trial_24,trial_25,trial_26,trial_27,trial_28,trial_29,trial_30,trial_31
|
||||||
|
0.0,0.21337890625,0.21826171875,0.6845703125,0.41455078125,0.439453125,0.23779296875,0.291015625,0.294921875,0.22900390625,0.55078125,0.380859375,0.1962890625,0.44189453125,0.36474609375,0.29833984375,0.119140625,0.32421875,0.54736328125,0.26513671875,0.298828125,0.2861328125,0.3330078125,0.29638671875,0.826171875,0.396484375,0.54052734375,0.3359375,0.578125,0.65966796875,0.2255859375,0.3486328125,0.234375
|
||||||
|
10526315.789473685,0.21357271479767775,0.2228854369588836,0.7019023340494679,0.43993970099578605,0.4457103723964533,0.2440779030818358,0.28445382131433883,0.3020097304579294,0.23659012654481504,0.5688885057401789,0.4093215445732476,0.20769607987760508,0.46548380019592117,0.37152939291872145,0.3158102999763806,0.12055265870450936,0.324938610319946,0.5766560588847237,0.26416767601161123,0.31300383591585873,0.2913102694165343,0.34159076246858633,0.29804793487295217,0.8444452523524741,0.3994468770859314,0.5618462839945532,0.33319649339712887,0.5817519885351123,0.674480479179657,0.22874224813360916,0.34287392994043236,0.24068762126721835
|
||||||
|
21052631.57894737,0.21463729404016216,0.2847429946519001,1.2539801214540436,0.9711697174241338,0.5922764463768111,0.3674045546893598,0.19389858932706458,0.44744268877024135,0.3761380716043826,0.9280382563202665,0.9076199941027527,0.3867184065715758,1.0954640618321638,0.5039219763800709,0.6472004813830938,0.16120886868717269,0.38553232103173424,1.1500712127896888,0.25701744206394184,0.5633914530112143,0.4337566328180794,0.5333808756270898,0.36920745617134754,1.2941815278536726,0.46334917063197933,1.0828470140282798,0.30708925875930576,0.7122485380093477,1.0490335744503794,0.2927677651191352,0.2670602798461914,0.3565010297991893
|
||||||
|
31578947.368421055,0.20323591945574226,0.16300526880491473,4.20973530370443,1.9141308765992564,1.1303072054960723,0.47055323302250485,0.38454054729429976,0.6999750190164244,0.6068042361505146,1.9838808276316475,1.047240732779463,0.32258590766927875,3.300744557314633,0.820451130166939,1.2534598841891729,0.2838647041954822,0.8028362411541292,2.05955018098995,0.3327965723180375,0.7174475239254432,1.0429691172042386,1.0538034188119991,0.7856394154873583,2.6947369245280854,0.8043927195329749,2.747439355401122,0.49086964097379654,1.582021385678955,2.4064476219240656,0.3838922904799189,0.3872914538819373,0.4644241768897736
|
||||||
|
42105263.15789474,0.2500258099669565,0.16924816353499395,10.562775117868863,3.0364261901940006,2.5617434588825936,0.3306845139268363,0.6573641504937592,1.4167928193744863,1.5554985035819693,6.665296026214008,1.5902420847039478,0.5595294867856351,6.122559275323335,2.002881549401957,3.353673818699211,0.22144793671584195,1.312077567187703,3.7189625135088895,0.419380108735568,1.469084890265214,2.406639748993343,2.07946530603636,1.6489465348938497,5.053634876029314,2.681116569075228,5.717011200754267,0.684133651183913,4.126098395054361,4.518586874668618,0.42747887481943064,0.24465010370904391,0.5900655889114845
|
||||||
|
52631578.94736842,0.32888041044536387,0.2715100132527444,17.733460875429277,7.464843899259277,6.074702384399245,0.6656232355704268,1.4731523178290793,4.60719631253187,3.4147048775839344,14.414384933059566,6.434121141143122,1.7846566303284874,11.545327676630416,4.058305542224662,7.028147891617878,0.22477122753280682,3.516465171222211,6.5624148852277,0.9232236724811247,4.439537885776847,5.61043294843214,3.9165439922723744,5.178697201683911,10.395311174630457,6.622331314139749,11.543822865737113,1.2768046070003773,9.706652687527136,6.612446320023893,1.2700750121119277,0.41397620039963656,0.7761202492542214
|
||||||
|
63157894.73684211,0.5350462128911325,0.29962318293605816,19.781360399029595,15.19260876198555,12.498151311583799,0.9643479608762959,4.202989639007486,10.342126384005986,6.6210016662724485,19.07600781395825,12.518384072258863,5.658649148703285,16.467369206394185,7.578091576489058,12.856702910235718,0.22467789953765438,8.977922814704709,12.485103358852571,2.445170590091611,9.169885431960683,10.9500342604196,6.319276756857242,11.627668977774414,16.484819478275377,10.808324359460553,17.468208276003683,3.136212800678455,17.003232776293135,9.315752800812023,3.66637701208902,1.0774520467192852,1.8907644002391373
|
||||||
|
73684210.5263158,0.7852336561250554,0.2384258307248271,22.641011190546518,21.417244698532404,17.225132876155783,2.4014786405906796,8.874928936733767,17.268501571010688,12.455725827045391,20.028633709429375,15.736250269776237,11.799598576289467,20.206166419296057,13.716660425603557,19.380570267706368,0.13551659465166344,16.52533781627539,19.908879957701032,4.471161410418905,15.018252133662683,15.222444074636021,10.52112705753781,17.137029601596403,22.15234950638874,14.156446965447424,22.020987118380223,7.051836641211262,22.444470619560942,12.252371070787849,6.747187160058697,3.2735256950610907,5.643231167357387
|
||||||
|
84210526.31578948,1.0865307858115751,0.1625721897114677,24.922460529612703,26.028391206693783,21.931540470704476,7.427022326355827,15.49045526816244,23.244672812253157,18.479983554322303,23.264797252961475,20.318913594507446,17.263236872707378,24.036384677622788,19.661253857810742,22.65865376649471,0.19408470639891903,21.799998254326905,23.554826160547147,7.174491523045253,19.62782619402349,18.426508631402438,16.64619142527065,20.932012954246964,26.657700557127555,17.2458696021928,26.194125621933026,12.5986764754615,28.700997275989142,17.94214692472421,13.128704757901772,7.179397445636444,12.70919324288408
|
||||||
|
94736842.10526316,2.6582788205873276,0.23384656800457648,27.00726091696615,30.072886935891866,24.903228779578804,16.55449330971842,22.605368816291197,28.305850891525395,21.575907559275958,27.155014569079118,24.751330803636037,21.787139946734147,27.175793199988284,23.45812468383451,25.65341528763071,0.2079372273918004,26.26315112391337,26.554582058227624,15.029219785885799,22.826661364853877,22.64420315829671,22.053757330717474,22.635709687283164,30.819130501258407,19.947592706231198,30.637604253774207,17.19585534542221,33.978571811211076,23.49253982850389,19.76031255589958,13.753053688936948,20.234403689482207
|
||||||
|
105263157.89473684,5.158810243051798,0.2324192992868186,28.257206893033267,33.07061711044523,28.04394555025814,24.2341824185485,27.88982900159841,31.484295480469257,26.272313741435635,32.21083139646747,27.41051373521377,25.068851772107575,30.700618521988886,27.733159168275108,28.51371868165246,0.23559921137844095,29.91249806464874,28.84397799156379,21.897442701450675,25.46100950109001,26.490569035432344,29.707195385010948,25.814450047352967,34.304045774930074,23.846011732423733,35.449990415176856,21.909407502066067,39.53630973625712,26.544865684826288,23.703709055512235,20.16929844673981,25.416740750341866
|
||||||
|
115789473.68421052,8.622501515945899,0.17303033152445532,30.4624208954893,34.4494729240185,32.32523063939694,29.10033617653675,32.68394685518048,35.03421277022428,29.25066611509244,36.44389487765832,31.956153038796295,27.203258828773393,32.48497857677639,31.02924344414159,32.335136919470706,0.2611046814852474,34.55427443617929,29.749940528763958,26.3664939489391,28.684378358465814,32.14956315138333,33.532602390754256,31.03434411178335,35.64516809706543,27.293726334611463,41.36981221909668,25.84496872973244,43.52629251163091,28.518349736020834,27.90022700381081,24.67630119270895,29.86176488274022
|
||||||
|
126315789.47368422,12.953163553803256,0.1676715417581912,33.4014120762368,36.214981232323474,36.62185140593891,33.39649893636519,37.98510644970839,37.83161627222626,32.14927361612505,37.06019579076371,37.07880169136703,28.69297118992687,36.598861699619455,35.13885881101656,37.91701086472277,0.24924954549097286,39.300789333776756,31.456702678817795,32.177538573246586,31.831940267885162,38.08447337480794,36.686553305205884,35.872348959756366,38.176747055265054,31.759865414733046,44.80667683292294,30.703182098938164,46.604350858778176,33.42258303845689,33.616753813302424,27.639321192479862,34.44234290611711
|
||||||
|
136842105.2631579,17.76422202752238,0.2581326228429737,35.67636442316536,38.85061561499937,39.09070651617077,36.82817951662059,43.90963821199793,40.68667470451207,40.04966179221621,41.721462842830334,39.7955187258628,32.505569522730866,40.11072799571664,39.583316231038104,40.304967028282356,0.1549811733066209,45.480007874338256,35.54768012567241,35.17292557660893,31.51637954526991,43.55536895841773,41.5929385367523,39.77065433095367,39.490757211753866,37.01248428762125,49.20768334000395,33.773233650133555,49.02499226784112,37.10005671297745,37.34949378491769,29.527241902338172,39.6905295723363
|
||||||
|
147368421.0526316,20.60570672476391,0.260494538621559,38.31259859301707,42.231486737893235,41.38264921563484,40.73568734832088,50.646387562527224,44.740820213698285,44.63164416706793,48.643137432531645,41.64506741573936,40.34868815466969,43.2403947190895,41.13025597654221,41.12120321913109,0.25687543705229626,50.849281604270196,37.71554495951476,41.43600509833762,33.58173339188595,45.139807246728616,43.83206370398609,42.460580910341896,44.589737517021376,36.15181583182633,47.555083031799654,38.0724171059944,51.82304479805056,41.23192626551578,42.69152521888966,32.82670638964117,40.99125826457861
|
||||||
|
157894736.84210527,23.47368086342006,0.2752831279405927,41.03937353519852,42.48044663453036,45.95535014144602,42.57561094278774,51.33014775643388,44.734889303878404,50.75987037199026,47.9832610674512,44.06926891215951,46.44779079706716,44.76039503551917,48.13988857057947,45.47479902938463,0.2263011734241263,56.45151233937272,39.8602860469237,46.56666281216693,40.77834847098903,48.16133561094712,47.558508133293856,46.317501591183145,44.29969079474663,35.5516636378217,53.21788925741518,42.643767492923054,57.00895313807142,46.58676381784793,45.043329876875944,37.123653786994744,45.95667480431766
|
||||||
|
168421052.63157895,26.715029264751234,0.1977865372338123,40.6483221925857,46.188406376297124,47.896664352628335,44.920449106316816,57.66966511734305,47.51291468401035,55.239545975365466,58.429957276235996,46.36995255319696,49.251544962959606,42.72431286418207,49.434618350211274,46.68246104908782,0.1842773733376796,58.893274301967466,42.532215805265054,52.034736886909464,45.06114534922254,46.15300290009982,50.499782615091,56.667042190678565,49.380164931025206,37.5579634056197,45.91710508563181,44.579420158407366,58.330935301212726,47.099042094645405,48.48191630609148,39.709610872981955,50.18605172535059
|
||||||
|
178947368.42105263,28.837614544210673,0.21486973168116857,40.59004576516614,43.8768165065311,48.7303252946637,47.438980399736735,57.06440764450961,51.27431836220696,59.70055551740271,63.84417723156409,49.136360402252535,50.296337965122554,48.62306642400261,53.7552396306701,48.46115139382698,0.19997945981012488,63.31777568405025,47.1586730949106,61.79786850805098,51.845167277592374,48.759547636449504,49.15821553465402,61.260983023286855,50.79944745193228,39.82006362666714,57.308786665633775,45.93147571199158,59.319026052786704,50.68093549247594,46.91568566029092,43.8149001208699,49.19773148829917
|
||||||
|
189473684.21052632,31.99867078670174,0.1767333762467403,40.67618104295387,47.60233529320714,55.792366305216525,49.51787988234755,61.44642992627257,59.45849196204188,65.56626956284542,65.54279105484981,48.73579462743532,58.54962596734805,43.62327897977961,56.37715196477409,47.4302944886057,0.09778306755002517,64.34230051806759,52.26438877391023,64.09345422359054,53.15896337250263,51.936688737525834,53.397176240619864,69.45459241417967,52.13961957894534,46.31354668794246,62.875351150280224,49.85243285593894,63.38545828578875,56.71937128051166,50.516936875446355,46.2262927369728,53.113862914722056
|
||||||
|
200000000.0,35.77912157460263,0.19589767957988538,44.0443098419591,54.61983675705759,57.44728095907914,52.52605199813843,63.98921911340011,61.216058580498945,67.1724002486781,69.0598918262281,53.56565031252409,61.13422780287893,51.03659190629658,63.258964111930446,50.16908838874415,0.13556480407714844,69.07298070506046,51.32774358046682,73.0192215317174,52.29111724150808,54.54280444195396,53.68163495314749,70.86406047720658,53.29583193126478,50.12250875171862,64.98147881658454,49.4239363921316,66.28706073760986,57.06801630321302,55.12897709796303,48.3109793412058,56.84779302697433
|
||||||
|
21
results/AcrobotSwingupSparse_ppo_4.csv
Normal file
21
results/AcrobotSwingupSparse_ppo_4.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19,trial_20,trial_21,trial_22,trial_23,trial_24,trial_25,trial_26,trial_27,trial_28,trial_29,trial_30,trial_31
|
||||||
|
0.0,0.669921875,0.41015625,0.28955078125,0.96240234375,0.70751953125,0.54833984375,0.244140625,0.27587890625,0.94970703125,0.54345703125,0.66162109375,0.55517578125,1.17578125,0.5283203125,0.2998046875,0.2255859375,0.17626953125,0.43408203125,0.861328125,0.83251953125,0.53271484375,0.46826171875,0.341796875,0.2470703125,0.22265625,0.2255859375,0.06494140625,0.33447265625,0.3935546875,0.4658203125,1.8935546875,0.3349609375
|
||||||
|
10526315.789473685,0.865474699606856,0.3998013361669313,0.29874284379700217,1.1285239560452196,0.7520678102805014,0.6072576422440379,0.2548277820576591,0.2726395348102432,1.0543636469959883,0.5373935924012245,0.6946792945967487,0.6053998820339214,1.3062974633932776,0.5653377451064514,0.3004968608845634,0.22409084298934304,0.17538354931775882,0.4936643161932187,0.915816013832832,0.8665467748351374,0.6190427082727491,0.4997694512153266,0.3458945414366154,0.2626303701849856,0.23040859190711024,0.2665902888015367,0.07100484509877551,0.3421696242863452,0.41304629000930576,0.592958719776608,1.9742067302693291,0.3309186449341497
|
||||||
|
21052631.57894737,5.136534836153575,0.22488111670327648,0.5026594912246324,4.130086740298284,1.8879797610549718,2.0788454467900244,0.4548774835475594,0.21061004099753425,2.9655740611110697,0.42079725820271924,1.5062150717442058,1.916081153785093,3.849706847912057,1.551160548202219,0.33034046997323924,0.22601970345029543,0.16557280574809152,1.8085789297426178,2.100740821077553,1.9634311126539914,2.2377988070332115,1.4297608800872212,0.47918217201972607,0.616062946266745,0.3447009242472556,1.2252122054800103,0.18206379222077346,0.4925305136683245,0.8832702108367328,3.2497303901947108,3.4376297366916306,0.2527078485885155
|
||||||
|
31578947.368421055,15.197919318550515,0.22378697355698354,1.3661754587020247,6.927482427982744,6.3490165343245,7.307432474522051,0.8697825878280685,0.15494649364017055,4.531375822928474,0.32218837870124967,3.7291027172120326,7.42123505151173,9.158737911742154,4.685554602139545,0.7266360280256197,0.4987561484783311,0.2185541427696841,4.798017240297102,5.0678233585199175,7.230864396716091,3.833476293780467,5.180974516511955,1.8104171092490424,1.6031560739321726,0.21909950314466312,4.6760716438293475,0.2543263382528627,1.025731809251527,2.5542589449156035,8.921898288409798,4.890016817320087,0.16151017429425774
|
||||||
|
42105263.15789474,26.886371681234515,0.28006577954067746,4.051972534517833,12.986229735398227,15.3105737046852,16.205443332069798,2.820488308935615,0.19838831365273601,6.520308148497691,0.7131359174310998,4.890604193521009,18.5083759920749,22.218747981697568,6.822381814761176,2.765962833182634,1.632642571615711,0.2859615632371559,8.982257584125382,10.841354856200496,14.396594525704424,4.711085343294857,10.07568441797822,7.683180093104821,3.3054583647244526,0.16479739984316838,12.753998637529623,0.24661314058171746,3.3447900544903617,5.629994146711609,17.25693868071749,8.768676076239167,0.170315650031177
|
||||||
|
52631578.94736842,32.37576011805653,0.1493764636919439,6.265924547517729,19.996075070135483,24.29708250473741,26.6585715655805,6.10779984704015,0.2001621544856444,11.340636197880034,3.9239011035401403,6.068390164679108,29.12424737396663,28.99433048544168,12.393097345188384,6.198827989213685,6.135541967407818,0.2632083734316839,18.486392133453876,18.32487551591403,22.473236131536,5.31832713499624,18.098600483997377,17.17883172920206,6.341349301906174,0.16022922721926194,20.96771786946009,0.2881647173387522,6.350126699727658,7.671597331514649,23.07540059684056,16.858973849182973,0.2855512151427546
|
||||||
|
63157894.73684211,35.80207819687693,0.15696968860573385,9.528466628859249,25.93645130168038,29.322925760475226,35.98826725925435,7.909809506170638,0.18297011304099808,19.6207221868626,11.841488027176371,7.1471137498554445,39.51367347590482,35.41464697555162,19.702830066311066,8.714586651556381,12.551106637864896,0.21265340313686887,27.894111997863266,22.868072269365733,32.63707952948489,8.056146447348135,22.01666963793895,28.06944986063358,11.067505207748626,0.17562572711722674,30.657168763496216,0.16387672635656977,10.259124906439531,9.602591168517222,27.649575761810897,27.45019500605618,0.26402906962048645
|
||||||
|
73684210.5263158,42.097688450377404,0.17120624447133076,15.057362825916748,30.882598448988475,35.760426465824374,42.40087026374162,9.17602476056593,0.19896648861364644,27.669871694823712,19.465457654725814,9.453078845861548,49.16960808312794,39.52166929139325,26.007552211634675,11.258532858291161,19.211206028005755,0.20455053968772996,32.89088746385231,29.191360765546975,41.76788461769717,14.800956604553395,29.16591713633234,38.925531135041304,15.4980167378349,0.1867820166484801,40.24636071408555,0.18143638943701237,15.044918992842996,13.2366541283943,33.978920569380236,34.34210071008952,0.23402207445900194
|
||||||
|
84210526.31578948,46.634757477821076,0.22714204867460722,23.433133714417014,39.82416696099364,42.69144009619208,51.36190829712929,11.780941009521484,0.21573110564593795,33.41706395875714,28.193304352482933,14.012693188527285,56.81790659896555,45.67534055207905,35.214311974860955,14.276992068726601,27.023425115442674,0.3060836474981335,37.56818266258345,40.38045466837791,45.11509157217771,23.64768246468415,37.93268641210329,47.03214918015076,19.808247024662936,0.2035594528071438,43.760102227123824,0.20526065984921443,23.391231906711234,19.02653037412015,41.596011365219496,41.010670775521824,0.24761021368391298
|
||||||
|
94736842.10526316,50.64325562210294,0.2145757622335756,32.08468913899895,48.85426952171854,50.23131015492278,56.09562193587877,14.05611093816995,0.2411446214712888,42.76877510250441,35.598343096281354,20.581640330708257,62.13237791114236,51.9025301352102,42.574058651593916,16.533297886148382,33.64211386656827,0.3660697672836008,43.986269673482205,51.553368676732454,51.51794774380417,32.328315868272014,43.17469909250571,50.10774188688917,23.728150213194027,0.22334465755980432,49.67813453515811,0.24267287399630136,30.732595606193648,29.080394035561262,48.49142870546378,45.09694130995267,0.22637654275445065
|
||||||
|
105263157.89473684,54.601308101432146,0.18895510359153853,39.23376072014468,58.98587759445909,56.80859798473664,62.75700563869318,16.11361351171689,0.23699329830602925,47.99425019451786,39.281661633309234,28.58030585238808,63.22890748211552,60.81936539837528,50.32442856627488,18.42635929551481,40.3814683898334,0.4651390931612897,49.987598197281855,54.254583562179945,56.942457360243864,36.61246023415859,47.325207718191386,58.199025074861055,28.678021755905363,0.14748485861062344,53.398840843475426,0.2090843522977961,37.445197042005546,38.78517117328591,50.80505324334649,48.90324298562766,0.1840324718866322
|
||||||
|
115789473.68421052,57.84781972060904,0.20530491984782126,45.051278077334246,71.35784784116242,62.39978758978381,65.62058491587969,19.464806851257578,0.2115729258001016,49.98107045641236,47.75888700168218,38.56563757397131,75.38356621179554,71.22856314492688,55.8925410954906,20.78477737315804,44.015384160250505,0.35737809448030855,52.25931945996271,59.534807744118645,61.15149383465669,37.80827363376142,53.19825499738022,62.83921505671789,34.050563824143765,0.19229207210593605,55.12566884534841,0.24111181877326435,43.069155896469496,44.387714350322604,52.746815015734725,54.64979370545152,0.22365110270534524
|
||||||
|
126315789.47368422,63.7530689239502,0.20912024585163824,49.7047985689792,77.98938513462564,72.51944951403506,76.08099383726676,23.967050790126308,0.18138375110573385,57.328001170277275,55.48598247221633,47.13993561234832,82.54383255173956,74.57033384539744,61.94208381314688,24.488641448298324,52.13795906552979,0.28518274682380496,52.93552567225744,63.90966310263341,63.06423820220863,43.33337992512289,54.80695270633433,67.55776363594711,35.703730311089934,0.2230182288426111,56.68115396050535,0.2678498667032765,45.94595931309412,45.57512038698487,58.373956442539715,60.34076965416568,0.19446547870160463
|
||||||
|
136842105.2631579,64.805707676589,0.178621890472243,49.96782849700167,80.56781150495577,78.12921948181956,85.5370428304593,27.81163813598929,0.1914368441890812,59.664371242153344,57.8341243260455,54.869540851202046,92.0400896930959,74.93563837357836,64.55782760477463,27.63025651017715,58.11468744674218,0.2078662869672695,56.362652410100374,65.16038157074736,66.9500071784466,52.422797210989245,56.566700552308994,72.18032006741892,41.70922832673937,0.18247515871254028,71.61550764099715,0.30871339121683816,48.557396640407745,48.194826695397296,61.731607083138336,64.70955352175599,0.19316112565862176
|
||||||
|
147368421.0526316,67.4204287145937,0.13463367979942592,57.43094876994718,83.65467017641359,86.24994984095778,93.20914219885321,34.10741448864713,0.25084240879048275,63.46955779648884,58.9085684615159,60.04024309861033,99.38487338491424,76.63446779594527,69.46521978827396,30.494680172188463,60.46085262826936,0.24427377220005878,64.16851718141763,68.4582290966425,66.04493196915391,55.48238676340626,62.94392488801909,72.47080420919403,49.12282633583302,0.21459850512052842,77.12562282593957,0.291945866930848,56.65066451445181,56.30040905548265,62.69338523251859,70.59848607345963,0.19311754062895628
|
||||||
|
157894736.84210527,75.11101093714917,0.1446652438832122,57.72943945934898,92.0367622269818,93.08306409513521,98.0535777744494,38.80749326135313,0.3016834734549483,69.24901885140966,64.78287652192684,61.80310367283068,100.59991703403293,83.68771514601985,72.4255261711797,32.22426858833292,64.52351888725302,0.29508902689756783,64.9916165304316,70.51700372246825,66.72272138252153,58.48017675394497,65.68075569184533,78.80507130768162,51.575137197806235,0.16543983026224485,86.74412872916774,0.2097672887786273,66.2101844327932,61.90613622876746,68.62450473526509,72.37640202144506,0.10793013982165217
|
||||||
|
168421052.63157895,68.86239824796978,0.18781840570085268,65.21267645246765,102.54309010175457,96.48281004422259,103.50117281335213,44.30333961212074,0.24209981479803278,69.11582266260713,69.78110601367052,63.69698371781537,99.09766764125665,93.42920648886557,75.96690554632045,35.926341154568746,67.52480664874048,0.2804036655584531,70.10055818848333,74.4431859489293,72.2640332885066,64.72849307496132,62.235762725576464,81.15476841807696,53.232106258994655,0.1822505961494763,90.6124653274663,0.20475118114017055,65.16558201755512,71.49719470755876,75.32608146350469,74.67137184037396,0.3231740301665838
|
||||||
|
178947368.42105263,70.68408412880514,0.18012758371242196,65.07637613962231,105.87991427846893,95.10705247017816,102.94097472161798,46.1325226701858,0.18922512815269404,76.6458372713126,72.20303079097886,65.94735959362126,97.87624455554993,100.94739993061056,78.89789389375174,38.8776149247822,67.36650774287385,0.23784201297073151,71.51746272380332,73.53825630969948,73.58519468413165,61.01770497557199,73.00694562589693,86.19878709481364,62.40378995351184,0.18552927165150312,96.37348061453272,0.21309725267404994,64.1080729083011,68.78358301891845,80.49347682804942,79.12744323830856,0.14146412244463885
|
||||||
|
189473684.21052632,74.7780111793666,0.29193595505817443,69.56358931797693,111.95183009255956,101.04187843608064,109.19837586040973,47.49925733339093,0.3356706904572463,70.09297890280092,71.74511948318693,69.71643940563678,103.18923526457472,92.56247736806685,81.79213520958814,38.75901080894999,67.30001951782988,0.35302543111785295,71.10964035657635,75.65704755175477,79.51700720166235,67.64783785547907,75.54478327521326,87.4395329060647,62.86759142466199,0.26673964186058147,102.41595607186949,0.16846711483688565,68.2066048624773,77.31482650830804,87.25618293741073,80.94310684151266,0.18061518999348058
|
||||||
|
200000000.0,74.8451529553062,0.25142054808767217,70.38147190997475,114.76372851823506,107.85900936628643,107.15486784985191,50.362035149022155,0.18676373833104185,76.77851872695119,69.44340331930863,65.98918714021382,102.84718960209896,100.46886825561523,79.57574284704108,42.47697817651849,70.82723800759567,0.15393611004478053,67.85379028320312,77.56115122845299,77.00273456071552,68.15039315976594,76.66461758864553,88.43965093713058,64.11918256157323,0.18549868935032895,104.17709260237844,0.16769888526514956,69.53055404361926,78.6344186130323,86.94301341709338,81.56379895461232,0.22411835821051346
|
||||||
|
21
results/AcrobotSwingupSparse_ppo_brax.csv
Normal file
21
results/AcrobotSwingupSparse_ppo_brax.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4
|
||||||
|
0.0,0.1640625,0.5546875,1.5,0.3671875,0.3125
|
||||||
|
2631578.947368421,0.15360554477624727,0.5546875,1.5104569552237528,0.32745107014973956,0.27067217910498903
|
||||||
|
5263157.894736842,0.14314858955249452,0.5546875,1.5209139104475056,0.28771464029947913,0.22884435820997806
|
||||||
|
7894736.842105264,0.13269163432874176,0.5546875,1.5313708656712581,0.24797821044921875,0.1870165373149671
|
||||||
|
10526315.789473685,0.12223467910498903,0.5546875,1.541827820895011,0.20824178059895831,0.14518871641995615
|
||||||
|
13157894.736842105,0.1117777238812363,0.5546875,1.5522847761187637,0.1685053507486979,0.10336089552494518
|
||||||
|
15789473.684210528,0.10132076865748355,0.5546875,1.5627417313425165,0.12876892089843747,0.06153307462993418
|
||||||
|
18421052.63157895,0.0908638134337308,0.5546875,1.5731986865662693,0.08903249104817706,0.01970525373492321
|
||||||
|
21052631.57894737,0.1351602119311952,0.5154199432908443,1.5571085611979167,0.13446794476425442,0.033183850740131596
|
||||||
|
23684210.52631579,0.22822711342259458,0.4411755612021998,1.5173721313476562,0.2557686253597862,0.09592558208264804
|
||||||
|
26315789.47368421,0.321294014913994,0.3669311791135554,1.4776357014973958,0.37706930595531796,0.1586673134251645
|
||||||
|
28947368.42105263,0.41436091640539335,0.2926867970249109,1.4378992716471355,0.49836998655084974,0.22140904476768092
|
||||||
|
31578947.368421055,0.5074278178967928,0.21844241493626637,1.398162841796875,0.6196706671463817,0.28415077611019746
|
||||||
|
34210526.315789476,0.6004947193881923,0.14419803284762195,1.3584264119466145,0.7409713477419134,0.3468925074527139
|
||||||
|
36842105.2631579,0.6935616208795917,0.06995365075897747,1.318689982096354,0.8622720283374452,0.4096342387952303
|
||||||
|
39473684.21052632,0.7760527761358963,0.0013295223838404742,1.3068735223067436,0.9783754850688734,0.46705788060238485
|
||||||
|
42105263.15789474,0.6861229612116228,0.024334823876096503,1.7502484237938598,1.0097463507401316,0.4377784059758772
|
||||||
|
44736842.10526316,0.5961931462873492,0.047340125368352526,2.193623325280976,1.0411172164113898,0.4084989313493695
|
||||||
|
47368421.05263158,0.5062633313630756,0.07034542686060856,2.6369982267680925,1.0724880820826481,0.37921945672286184
|
||||||
|
50000000.0,0.41633351643880206,0.09335072835286458,3.080373128255208,1.1038589477539062,0.34993998209635413
|
||||||
|
21
results/AcrobotSwingupSparse_ppo_brax_full.csv
Normal file
21
results/AcrobotSwingupSparse_ppo_brax_full.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4
|
||||||
|
0.0,0.1640625,0.5546875,1.5,0.3671875,0.3125
|
||||||
|
26315789.47368421,0.321294014913994,0.3669311791135554,1.4776357014973958,0.37706930595531796,0.1586673134251645
|
||||||
|
52631578.94736842,0.3264037015145285,0.11635602984512061,3.5237480297423245,1.1352298134251644,0.3206605074698465
|
||||||
|
78947368.42105263,1.6835218730725743,0.3886911492598684,12.659094559518914,0.08980520148026319,0.0007251940275493496
|
||||||
|
105263157.89473684,4.5589449698464914,0.1716040895696272,19.572329872532894,0.4963261119106359,0.14642655222039472
|
||||||
|
131578947.36842105,8.834401716265761,0.11901507461280149,26.827797872978344,0.7295549626935992,0.2523881343373081
|
||||||
|
157894736.84210527,12.662828947368423,0.411084626850329,35.43735223067434,0.546875,0.017888119346217292
|
||||||
|
184210526.31578946,16.5390625,0.5625,42.0390625,0.546875,0.578125
|
||||||
|
210526315.78947368,16.5390625,0.5625,42.0390625,0.546875,0.578125
|
||||||
|
236842105.2631579,16.5390625,0.5625,42.0390625,0.546875,0.578125
|
||||||
|
263157894.7368421,16.5390625,0.5625,42.0390625,0.546875,0.578125
|
||||||
|
289473684.2105263,16.5390625,0.5625,42.0390625,0.546875,0.578125
|
||||||
|
315789473.68421054,16.5390625,0.5625,42.0390625,0.546875,0.578125
|
||||||
|
342105263.15789473,16.5390625,0.5625,42.0390625,0.546875,0.578125
|
||||||
|
368421052.6315789,16.5390625,0.5625,42.0390625,0.546875,0.578125
|
||||||
|
394736842.1052632,16.5390625,0.5625,42.0390625,0.546875,0.578125
|
||||||
|
421052631.57894737,16.5390625,0.5625,42.0390625,0.546875,0.578125
|
||||||
|
447368421.05263156,16.5390625,0.5625,42.0390625,0.546875,0.578125
|
||||||
|
473684210.5263158,16.5390625,0.5625,42.0390625,0.546875,0.578125
|
||||||
|
500000000.0,16.5390625,0.5625,42.0390625,0.546875,0.578125
|
||||||
|
21
results/AcrobotSwingupSparse_ppo_long.csv
Normal file
21
results/AcrobotSwingupSparse_ppo_long.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19,trial_20,trial_21,trial_22,trial_23,trial_24,trial_25
|
||||||
|
0.0,0.27001953125,0.24658203125,23.2275390625,17.623046875,6.58544921875,21.76416015625,15.46875,19.1357421875,0.88720703125,4.16650390625,15.2255859375,4.2294921875,9.0263671875,21.91015625,10.80078125,23.7861328125,13.20458984375,1.65966796875,9.07958984375,7.3349609375,8.68212890625,15.86279296875,6.17041015625,20.71728515625,17.98291015625,23.9736328125
|
||||||
|
10526315.789473685,0.27001953125,0.24658203125,23.2275390625,17.623046875,6.58544921875,21.76416015625,15.46875,19.1357421875,0.88720703125,4.16650390625,15.2255859375,4.2294921875,9.0263671875,21.91015625,10.80078125,23.7861328125,13.20458984375,1.65966796875,9.07958984375,7.3349609375,8.68212890625,15.86279296875,6.17041015625,20.71728515625,17.98291015625,23.9736328125
|
||||||
|
21052631.57894737,0.27001953125,0.24658203125,23.2275390625,17.623046875,6.58544921875,21.76416015625,15.46875,19.1357421875,0.88720703125,4.16650390625,15.2255859375,4.2294921875,9.0263671875,21.91015625,10.80078125,23.7861328125,13.20458984375,1.65966796875,9.07958984375,7.3349609375,8.68212890625,15.86279296875,6.17041015625,20.71728515625,17.98291015625,23.9736328125
|
||||||
|
31578947.368421055,0.27001953125,0.24658203125,23.2275390625,17.623046875,6.58544921875,21.76416015625,15.46875,19.1357421875,0.88720703125,4.16650390625,15.2255859375,4.2294921875,9.0263671875,21.91015625,10.80078125,23.7861328125,13.20458984375,1.65966796875,9.07958984375,7.3349609375,8.68212890625,15.86279296875,6.17041015625,20.71728515625,17.98291015625,23.9736328125
|
||||||
|
42105263.15789474,0.8943599241262,0.24306579045641785,25.237499823530626,19.23520174845434,7.728061355051903,22.67129490712343,16.71391454495882,20.23336305380528,2.5632356646318524,6.353273436633504,16.327553652660338,5.911999563761366,10.551945014013148,23.355165094549967,12.266499922216104,25.086062115645476,14.392525493273116,3.426952741152693,10.641049938518917,9.445480647839998,10.307102830786455,17.915973035912767,7.094101694482186,22.349596118662827,19.487473598807803,25.051236024523707
|
||||||
|
52631578.94736842,3.8031305692202495,0.22668380057052231,34.60180642003828,26.746150623067926,13.051434115689878,26.897590313591785,22.51507384526102,25.3471237391316,10.371769190164814,16.54132324110438,21.461566089262924,13.750717228139207,17.659535759703935,30.087388985374957,19.09520986370763,31.14236768361934,19.927058233778894,11.660644051440865,17.915814376273644,19.278286356674997,17.877775022858067,27.481636216765956,11.397534344665232,29.95445120301603,26.497159381322252,30.071735460342133
|
||||||
|
63157894.73684211,6.7119012143143015,0.21030181068462678,43.96611301654595,34.25709949768151,18.374806876327856,31.123885720060144,28.31623314556323,30.460884424457923,18.18030271569778,26.72937304557526,26.59557852586551,21.58943489251705,24.767126505394728,36.819612876199955,25.92391980519916,37.19867325159321,25.461590974284675,19.89433536172904,25.190578814028374,29.11109206551,25.448447214929686,37.047299397619156,15.70096699484828,37.55930628736924,33.50684516383671,35.09223489616056
|
||||||
|
73684210.5263158,9.62067185940835,0.19391982079873124,53.33041961305359,41.7680483722951,23.698179636965833,35.3501811265285,34.117392445865434,35.57464510978424,25.988836241230743,36.91742285004614,31.729590962468095,29.428152556894894,31.874717251085514,43.551836767024945,32.75262974669069,43.254978819567086,30.996123714790453,28.128026672017214,32.4653432517831,38.943897774345004,33.0191194070013,46.61296257847234,20.004399645031327,45.16416137172244,40.51653094635115,40.112734331978984
|
||||||
|
84210526.31578948,12.874864710334926,0.1886679789366154,59.44870990531266,46.972177164706494,28.099632823236103,38.582515552763795,39.13550983967873,40.75352546713029,32.1027078258694,44.21202178542964,37.319496477079525,36.1907344168243,37.35287647564326,49.373326605376775,38.60244040079724,47.815746885917854,35.26912893152633,35.20672978588749,38.545084184556785,46.105744826826694,40.07420548837931,54.238712944812725,23.80769549446423,51.91509599500746,45.475943789918006,44.53735554449446
|
||||||
|
94736842.10526316,16.58828774053304,0.19821339036619234,61.25150274503925,49.10944638060731,31.275417004265613,40.493403727328015,43.11259261260733,46.01898079895907,35.96356876504058,47.65985019220209,43.51550043124571,41.522620652521084,40.664747265567414,53.98401830995512,43.15082894699065,50.388235266188836,37.86496487218587,40.749904993498426,43.036071270126385,49.71661918018003,46.44383273692672,59.28539776603931,26.94607198205351,57.53076473266464,47.709570064439006,48.16977156958752
|
||||||
|
105263157.89473684,20.301710770731155,0.20775880179576928,63.054295584765825,51.246715596508125,34.451201185295126,42.40429190189224,47.08967538553592,51.28443613078785,39.82442970421175,51.10767859897455,49.7115043854119,46.85450688821787,43.97661805549156,58.59471001453347,47.69921749318405,52.96072364645982,40.46080081284541,46.29308020110936,47.527058355695985,53.327493533533364,52.81345998547414,64.3320825872659,30.084448469642787,63.146433470321824,49.94319633896001,51.802187594680575
|
||||||
|
115789473.68421052,24.01513380092927,0.21730421322534618,64.85708842449242,53.38398481240893,37.62698536632464,44.315180076456464,51.06675815846451,56.54989146261664,43.68529064338293,54.555507005747,55.907508339578094,52.186393123914655,47.28848884541572,63.20540171911181,52.24760603937746,55.533212026730794,43.056636753504954,51.8362554087203,52.018045441265585,56.9383678868867,59.183087234021556,69.3787674084925,33.22282495723206,68.76210220797901,52.17682261348101,55.43460361977363
|
||||||
|
126315789.47368422,28.018189862164107,0.20982215939466312,69.27305728642895,57.55134319136348,40.54619471716419,45.733268488146926,55.375502730340514,59.18589085423055,48.44428807942822,58.32552827956604,60.45724711589867,56.33956345957072,49.66293537715796,70.63442963708472,54.96068301293328,58.80017180720195,45.65313718061368,56.00901037570182,54.44322721713798,58.29854357605826,63.530433035293115,72.29884640992184,36.19034536657571,72.91590075743827,53.18829218634608,62.088771772516736
|
||||||
|
136842105.2631579,32.18140815796945,0.19292420132338503,75.13406889623553,62.841307011997934,43.323522467032035,46.8788468517425,59.86765074432722,60.3678449748956,53.69994001573473,62.27371683411321,64.09662871149439,59.84092356557662,51.51900193202529,79.62195046621677,56.65886330307356,62.45116268696878,48.25000505764399,59.42394581065614,55.726053118045314,58.41412044958395,66.75949093146338,74.04294786856116,39.063385214832024,76.26130947941229,53.52392949093742,70.41391844366396
|
||||||
|
147368421.0526316,36.344626453774794,0.17602624325210697,80.9950805060421,68.1312708326324,46.100850216899886,48.02442521533808,64.35979875831393,61.54979909556064,58.955591952041246,66.22190538866039,67.73601030709011,63.34228367158251,53.375068486892616,88.60947129534884,58.35704359321383,66.10215356673561,50.846872934674295,62.83888124561046,57.00887901895264,58.52969732310964,69.98854882763364,75.78704932720046,41.936425063088336,79.6067182013863,53.859566795528764,78.7390651148112
|
||||||
|
157894736.84210527,40.50784474958013,0.1591282851808289,86.85609211584868,73.42123465326685,48.87817796676773,49.17000357893365,68.85194677230064,62.731753216225684,64.21124388834777,70.17009394320755,71.37539190268583,66.8436437775884,55.23113504175995,97.59699212448089,60.0552238833541,69.75314444650243,53.4437408117046,66.25381668056478,58.291704919859974,58.64527419663532,73.2176067238039,77.53115078583978,44.80946491134464,82.95212692336032,54.1952041001201,87.06421178595842
|
||||||
|
168421052.63157895,42.81027949219595,0.16637333476312274,91.44583040063071,77.8666361969924,52.66906904711948,53.65252214257407,72.654025608813,66.19455707370409,67.99217889843885,71.89236364074031,72.92751081366288,69.74785206126374,57.07690205666497,95.61483840731042,63.08016211702553,72.95405650469075,55.955775918722814,68.77922625554896,63.57263503774712,64.59282713913852,77.09542488985775,83.51467579025311,49.043151512040325,86.22809681139495,57.27627791724377,89.66458620192932
|
||||||
|
178947368.42105263,44.80617342530195,0.1775956427951929,95.82614231043576,82.17290667219505,56.62693200514257,58.68475985460995,76.34242418202007,70.03310238885747,71.5301727456069,73.24794105578658,74.13577931178244,72.55368705337398,58.92097235122216,91.82556805386108,66.3236671223865,76.08082370810892,58.453835888252364,71.15809757425514,69.5122028037782,71.5011251732253,81.08011819352073,90.19659278300331,53.5009876416991,89.49262752031025,60.80962805074338,91.32187601337803
|
||||||
|
189473684.21052632,46.802067358407946,0.18881795082726308,100.2064542202408,86.4791771473977,60.58479496316566,63.71699756664582,80.03082275522713,73.87164770401085,75.06816659277496,74.60351847083285,75.344047809902,75.35952204548421,60.765042645779346,88.03629770041172,69.56717212774747,79.20759091152709,60.95189585778191,73.53696889296133,75.45177056980926,78.40942320731207,85.06481149718371,96.8785097757535,57.95882377135787,92.75715822922555,64.342978184243,92.97916582482674
|
||||||
|
200000000.0,48.896693436723005,0.1997758652034559,104.5010648150193,90.58705795438667,64.50065710042652,68.46793931408932,83.59973428123875,77.3779918331849,78.29683874782764,76.33946393038097,76.87886135829122,78.05110120773315,62.766795428175676,84.84851347145282,72.68968870765285,82.17928179314262,63.28797694256431,75.96145755993693,81.1731002581747,84.95365117725574,88.90131215672744,103.1232140879882,62.464156332768894,95.96283302181645,67.76959826444325,94.57043276335064
|
||||||
|
21
results/AcrobotSwingupSparse_sac_brax_full.csv
Normal file
21
results/AcrobotSwingupSparse_sac_brax_full.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4
|
||||||
|
0.0,1.28125,0.7578125,0.515625,0.390625,0.40625
|
||||||
|
26315789.47368421,1.0625,0.0,0.421875,1.015625,0.0
|
||||||
|
52631578.94736842,1.0625,0.0,0.421875,1.015625,0.0
|
||||||
|
78947368.42105263,1.0625,0.0,0.421875,1.015625,0.0
|
||||||
|
105263157.89473684,1.0625,0.0,0.421875,1.015625,0.0
|
||||||
|
131578947.36842105,1.0625,0.0,0.421875,1.015625,0.0
|
||||||
|
157894736.84210527,1.0625,0.0,0.421875,1.015625,0.0
|
||||||
|
184210526.31578946,1.0625,0.0,0.421875,1.015625,0.0
|
||||||
|
210526315.78947368,1.0625,0.0,0.421875,1.015625,0.0
|
||||||
|
236842105.2631579,1.0625,0.0,0.421875,1.015625,0.0
|
||||||
|
263157894.7368421,1.0625,0.0,0.421875,1.015625,0.0
|
||||||
|
289473684.2105263,1.0625,0.0,0.421875,1.015625,0.0
|
||||||
|
315789473.68421054,1.0625,0.0,0.421875,1.015625,0.0
|
||||||
|
342105263.15789473,1.0625,0.0,0.421875,1.015625,0.0
|
||||||
|
368421052.6315789,1.0625,0.0,0.421875,1.015625,0.0
|
||||||
|
394736842.1052632,1.0625,0.0,0.421875,1.015625,0.0
|
||||||
|
421052631.57894737,1.0625,0.0,0.421875,1.015625,0.0
|
||||||
|
447368421.05263156,1.0625,0.0,0.421875,1.015625,0.0
|
||||||
|
473684210.5263158,1.0625,0.0,0.421875,1.015625,0.0
|
||||||
|
500000000.0,1.0625,0.0,0.421875,1.015625,0.0
|
||||||
|
21
results/AcrobotSwingupSparse_small_data.csv
Normal file
21
results/AcrobotSwingupSparse_small_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9
|
||||||
|
0.0,0.2783203125,0.091796875,0.171875,0.3818359375,0.21875,0.35546875,0.1865234375,0.24609375,0.2041015625,0.212890625
|
||||||
|
10526315.789473685,0.27588386218633676,0.11123310363854069,0.16949392355710186,0.39722987357269035,0.2185285045169397,0.3702535734942745,0.19399891005328487,0.24675823644918088,0.2134043727885323,0.2186495075595676
|
||||||
|
21052631.57894737,0.23269361670327648,0.4130852506431516,0.14195431632678596,0.6502193757371559,0.2253647854453639,0.5689216518666275,0.33231359043279846,0.279740552822969,0.3543205948087317,0.3130134043601081
|
||||||
|
31578947.368421055,0.3549447706861842,0.23365479625162983,0.2346669088770478,0.582120456854062,0.34416394220494834,0.22662881602871127,0.42412960232129715,0.4523347262860665,0.2637026409032933,0.27600896259424096
|
||||||
|
42105263.15789474,1.005112846142037,0.177734375,0.3142747112918759,0.3776883682715926,0.4250029249534712,0.33089266325298106,0.29222222114203705,0.3975395456245401,0.3139486022272929,0.2261420802066201
|
||||||
|
52631578.94736842,0.7126448412020782,0.22314094902735998,0.516376976161122,0.36558028403411613,0.17140433398640387,0.3210050287008946,0.14261880277596684,0.3921365315233901,0.20871810437569657,0.46139484363249467
|
||||||
|
63157894.73684211,0.32863668721798706,0.329584544385239,0.4572083375460556,0.27618671322133076,0.3325737031212803,0.5208995269606318,0.22065425249348064,0.6791368045965387,0.2755827282934638,0.36644131457046125
|
||||||
|
73684210.5263158,0.2131521114021787,0.4537939510186955,0.7790087200598043,0.3030710590182909,0.22022868127373774,0.5303510391150816,0.27933617906227004,0.37754106389518605,0.2988373500158252,0.4220602822766081
|
||||||
|
84210526.31578948,0.28702154185963474,0.48004999979711294,0.6799479561169062,0.2665308056775883,0.18673570796723513,0.5049593600539949,0.24491620261913521,0.4376770399944273,0.4191517922356519,0.4145594673473748
|
||||||
|
94736842.10526316,0.39209992733688565,0.2832896082024825,0.6632530893975678,0.2802792520073973,0.24366824184428293,0.5344018936157227,0.31410755939430807,0.29422765293279846,0.4184407659514789,0.18032703505328487
|
||||||
|
105263157.89473684,0.3189308768824527,0.34432493616669463,0.6948896762076507,0.23338612162835712,0.19060844463654833,0.424624509098127,0.27803556806823226,0.26719426184149664,0.2379592620765073,0.27997436417767213
|
||||||
|
115789473.68421052,0.18594712761960863,0.3494073598338627,0.788506724497618,0.21318869181286926,0.38461765754255883,0.2656318447926698,0.2075244871863368,0.6468700028522522,0.2427966125784158,0.28784734192317213
|
||||||
|
126315789.47368422,0.36690280311986057,0.3176439554737545,0.586905820217819,0.40084902269358147,0.6420982339705789,0.18484853179170796,0.18279766177866924,0.5915254291735192,0.14947147316549608,0.2752504850688734
|
||||||
|
136842105.2631579,0.38437013282670196,0.4021755915929737,0.7278494689603265,0.22538149455907902,0.39770323946205166,0.31802305818594745,0.2798540479918928,0.4880912072770814,0.1400220453574057,0.1942473937269722
|
||||||
|
147368421.0526316,0.258735725423966,0.2200317699823351,0.6398842618736202,0.32082183077064597,0.4030520050809655,0.2824347118261447,0.26254251326880623,1.3957981468898115,0.14149047133004566,0.535570926613425
|
||||||
|
157894736.84210527,0.1604508825286273,0.25127634645499025,0.6484241776188986,0.2179699578113502,0.4424385297991893,0.4070300421886498,0.19239613488110144,0.8106113550075197,0.24997335523779712,0.3686791364506009
|
||||||
|
168421052.63157895,0.2670684983525581,1.0941086872132535,0.9665248374199273,0.16305482816828254,0.7880969272095744,0.2891786950446892,0.2791892181142876,0.6554744223808647,0.3687612264110111,0.37808248673119377
|
||||||
|
178947368.42105263,0.3207165303322747,0.8661808109019271,0.384282891439929,0.35125147801026746,0.4297554618433901,0.17086492416931318,0.11020489742881368,0.6477073843789563,0.7083807810521854,0.7800998555656287
|
||||||
|
189473684.21052632,0.42074637267728265,1.1703677560483983,0.6889311349292871,0.260232560852558,0.5879302249390662,0.43638844899523627,0.18866115791975957,0.20022710868856583,0.43562680928660885,0.9370145190125356
|
||||||
|
200000000.0,0.6556753359342876,2.1284826680233606,0.42468537782367904,0.3749244589554636,0.28398147382234273,0.6110040765059622,0.29132326025711863,0.2444209048622533,0.1256590391460218,0.3028667349564402
|
||||||
|
21
results/AcrobotSwingupSparse_small_data_no_aux_data.csv
Normal file
21
results/AcrobotSwingupSparse_small_data_no_aux_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19
|
||||||
|
0.0,0.2802734375,0.091796875,0.05859375,0.2421875,0.1259765625,0.2880859375,0.263671875,0.2119140625,0.22265625,0.421875,0.2109375,0.2177734375,0.341796875,0.0908203125,0.123046875,0.4404296875,0.1298828125,0.1025390625,0.1328125,0.1591796875
|
||||||
|
10526315.789473685,0.28758278844098967,0.11976067973636195,0.06402038933497717,0.25387138673143045,0.18279015390496509,0.34373667761889853,0.2692092620765073,0.24862693881724351,0.21850320969261952,0.4140672842221247,0.22256601286066535,0.2365451796893598,0.3378099563049147,0.10228270374837015,0.11961369501256547,0.4309053817284074,0.1335928618412599,0.10619373797049483,0.14084171126093561,0.1686486194008275
|
||||||
|
21052631.57894737,0.3839292024311266,0.57743009107595,0.19261559356943064,0.5659670367465456,1.0205043782157581,1.1957732541409225,0.37218140366995434,0.8698960354453639,0.17488604305193367,0.2786423004234927,0.46363715385796295,0.6179347157148113,0.2674974563049147,0.2925257590338794,0.1477757609782127,0.3654890403853229,0.2923275215804082,0.1853763125940043,0.3377617706552437,0.33857562205137637
|
||||||
|
31578947.368421055,0.2126045306303495,0.5026070127196589,0.4888650677540956,1.279838960917042,0.19395379148361758,0.5609980606966731,0.4797899108844451,0.9360346411073637,0.41140080026642445,0.21873940127047803,0.8287942706713055,1.3322985746853901,0.3418153488074645,0.3179676605393682,0.8413648261918256,1.0228362387237124,0.9529356203581156,0.3234534276819625,0.7296801696523734,0.47923685903364277
|
||||||
|
42105263.15789474,0.3887942517563247,0.41705234559288973,0.5096459956710688,0.32849929471425404,0.26033953088142203,0.34866529554541426,0.5578174855240163,0.7391432870458037,0.564955587202162,0.30337393382909894,0.6829760754867934,2.06801344219007,0.8283153861513428,0.42177313384587084,0.8114075383321069,0.5746583066818787,0.4941915580770646,0.35348726309567613,0.3793038027438431,0.961056833452135
|
||||||
|
52631578.94736842,0.6312493960943248,0.49278979156155994,0.6800412841120586,0.5893023271639921,0.3010642772896468,0.5378073510040536,0.47881369181286926,0.44447028669954336,0.5033436297049483,0.6582970923003728,0.4572425139577765,1.4813181277457368,0.6200129173469016,0.2974136991844283,0.33481225940989656,0.27890140660251606,0.6442772952473395,0.498306987688482,0.7873120532471718,1.512039316658168
|
||||||
|
63157894.73684211,1.1202877974576242,1.5383930047793415,0.674045842770394,0.5935230096621524,0.4095628638016551,0.46083872470168863,0.5780952643819793,1.1702685422184071,0.8259476796411745,0.44023831861501306,0.9055674069475931,1.130370945811602,0.4110244687574393,0.3093590723180375,0.5427882757213307,0.518909158468907,1.010797241718155,0.36384490058032426,0.46324200511308916,0.9539823241511216
|
||||||
|
73684210.5263158,1.7893322049085454,1.9711866273113885,0.32166264658159155,0.35822036722030015,0.5943587929258057,0.5040750952638747,0.670014516138304,2.1637340344880758,1.2464905921111806,1.1629588002973648,1.2048723796728247,1.163565804753607,0.45741064858898883,0.32225058547677754,0.4537751100069927,0.7476524368877887,2.1073823463883765,0.34422785423469016,0.3773499671111807,1.4520058697940899
|
||||||
|
84210526.31578948,2.7291050327121393,0.6669734415915536,0.556845435145159,0.7301939649925339,0.8042742446519001,0.36219290947319727,0.7090127369043239,1.9028834081422588,1.5614240228964684,1.3554390143819792,1.425437377760615,0.8146158361038673,0.428463140683161,0.21754850773269774,0.32309350940989656,1.2003860896313951,2.4577581558861556,0.6638215717516449,0.36181213717051164,0.9332953159828925
|
||||||
|
94736842.10526316,4.128411710427408,1.1404954587983953,1.3649267872945094,0.7453759026989712,0.6700138055056416,1.410175896747621,0.7642396467214145,1.5908323087190328,1.5163297494692816,1.3750403343475426,1.0019062417365836,0.9033866285287112,0.5278506529958624,0.2583313992148952,0.549678237154213,1.1528844384275314,1.4348584674401956,0.8126780465038859,0.3437787053327481,1.485840134343282
|
||||||
|
105263157.89473684,5.40820052022749,1.450888551833557,1.8518205996695647,0.6507779224427452,0.9129865228964681,2.640842421893598,0.5947374571063182,1.3147295795979592,1.3276099260493988,1.1499202548632002,1.4740919921536855,0.9701104256585034,1.2847892623859098,0.29562257465563324,0.7761990290929737,0.6266065053332215,1.3194015428960488,0.8731774940385052,0.27390988315571707,1.9808829880817447
|
||||||
|
115789473.68421052,9.572825701282955,1.909399169964143,3.257218273722894,2.304726698391985,0.7261502036097309,1.881474846287778,0.4483351720667281,1.4334533379678909,1.314280134819221,1.060620035821381,2.061361069824557,1.7897773515484667,1.2524107169576633,0.30459522606593414,1.3599199284476915,0.355322428357238,2.629666658649814,1.397359462325923,0.4351801357110781,1.628594493601791
|
||||||
|
126315789.47368422,8.36041634895134,2.1618608913263127,3.9502335862769975,2.2868173234680667,1.6050727611763673,1.8917602475660338,0.4652556316344031,3.7007114061688484,1.8497212481300598,1.638930365649618,1.3035163351043098,4.065054304381819,1.933104433181215,0.5728258888477107,0.7625857958172814,0.7349944392069563,5.23650476344735,2.100532352098799,0.560557473729522,3.160721512052164
|
||||||
|
136842105.2631579,7.470201101329519,2.540120122175138,2.0528609072402553,2.5199052963890867,1.802647844245889,0.8292404819393404,0.7516103305975159,6.420845042305312,1.6085367889615632,1.6002143426615112,2.1647504109094693,2.949976598787173,3.490142243720819,0.5336115723501611,1.4347010504175766,0.8320825027296747,9.47237784314354,2.374547784018054,0.6540227530735683,2.6016511732191243
|
||||||
|
147368421.0526316,20.081148152866533,4.925763132829747,2.9526918152362702,6.1510426317886004,2.364219290397835,0.9721825882338427,1.4343974411982914,7.127217015401148,3.260841591536505,1.2065588192926546,5.1095862771665645,2.3029172532776387,3.263078737126823,0.7179218197133075,2.663424800967907,0.629535505976373,11.074170611901957,1.2480709731083486,0.5163444931157076,4.723242508737667
|
||||||
|
157894736.84210527,24.720929962115935,3.9580889302937927,6.551401780252644,8.312047630796142,2.2396508531227006,1.2979575497952196,1.711067408405843,11.47199925029047,4.747077299947555,3.8261825344899374,4.410465388416913,10.360845325396008,2.1869621884459596,0.7193471401351971,2.337641702794632,0.5016533410450097,11.735319174557842,5.449206436770117,0.28244700392197364,4.985333384569332
|
||||||
|
168421052.63157895,31.57667499235793,5.839278371710527,13.627815817201569,24.30758776889284,2.9883370861782597,2.0607414773956894,2.5759672550613537,22.271483149224707,8.224110822598362,0.9676266403409564,4.7425379449310725,12.595719313687566,2.076777577069988,1.4851045476432656,2.003858360227125,0.4119056426917416,17.390863054016624,10.157069916870457,0.6610050359921444,7.172339864714986
|
||||||
|
178947368.42105263,40.50576080575875,5.724447646629777,20.450412264160835,36.863183084947586,2.614696291344978,1.8787520532792954,1.5635471528917135,11.971458540728873,7.453547657361651,0.630718548212025,9.172437599160997,39.72295700876337,2.034963459849688,2.880468537602729,1.5775332596163338,0.5727995637381176,27.8347481043385,16.08902388596469,0.9789422502808294,10.399655833468874
|
||||||
|
189473684.21052632,75.71465896965725,7.929584001239978,15.501922644406475,55.83949986719359,2.628800701236461,2.788790594507783,0.7577746436206265,19.06275972170843,13.41016861342327,0.7510913838309924,9.921009798129182,45.88625667498053,6.527735464460632,4.097421587999508,1.5876355210829969,0.41316718738164954,39.68276319741542,25.172200184449594,1.562894070907973,8.840237144618156
|
||||||
|
200000000.0,83.22050260242663,10.766298796001234,18.28678567785966,58.48859044125206,3.0183227940609583,7.341066260086863,5.686492719148335,37.594172628302324,13.746350037424188,0.5452274021349455,26.481724789268092,74.8992950037906,6.290600224545128,7.62923160352205,0.8570639459710372,2.1128625367817127,16.455182025307103,23.770082975688734,1.3139089785124127,25.105711435016833
|
||||||
|
21
results/AcrobotSwingupSparse_small_no_gauss_data.csv
Normal file
21
results/AcrobotSwingupSparse_small_no_gauss_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9
|
||||||
|
0.0,0.1015625,0.3388671875,0.4326171875,0.2109375,0.955078125,0.4345703125,0.3984375,0.388671875,0.525390625,0.2646484375
|
||||||
|
10526315.789473685,0.11994662509400429,0.34396158361038676,0.5194434168596347,0.22716204413416644,0.9299383876726568,0.42554437156529307,0.38907931584070266,0.3846849563049147,0.5970444137700047,0.2624334826693971
|
||||||
|
21052631.57894737,0.4377420060852558,0.4179684065715758,1.7919866926452128,0.4814548809442494,0.5468257840650564,0.28768362430984623,0.24026479747486906,0.45491134030667046,1.7831928696989023,0.28062756504048275
|
||||||
|
31578947.368421055,0.5687934843787197,0.31969791634261113,0.40986809505980426,0.41984326687546,0.7867357671425943,0.375482981885239,0.4064096440238637,1.3354080020555825,1.5299849153555658,0.6551206382688062
|
||||||
|
42105263.15789474,0.8009036267563246,0.2659476747803411,0.25698804987434537,0.6692156804895797,0.5164976357753257,0.3327459826693971,1.007351059002229,0.4339263366530146,0.4346078994201491,0.3746171671267692
|
||||||
|
52631578.94736842,0.8069123429274625,0.34273891765985465,0.3983879406366322,0.6205434455765911,0.17684360884563413,0.43400496855336873,0.9540840952019942,0.5550173630014351,0.24008125611619607,0.37602783900548875
|
||||||
|
63157894.73684211,0.4758098369820297,0.3824696580458876,0.46029318072459047,0.8411207595360245,0.36577344138866674,0.5807384903080903,0.3193041413114341,0.6842254099753423,0.4427745269606318,0.8528741007036126
|
||||||
|
73684210.5263158,0.7026946683339466,0.3174371560854925,0.3322486243419699,0.5016936516167382,0.6090902555682322,0.332806484190711,0.43623326954088737,0.42868767947041087,0.3677255123275799,1.4322281729151336
|
||||||
|
84210526.31578948,0.6812232485108097,0.3168407028071438,0.3072702719564254,0.21290328430006714,0.5520422135033436,0.47707045705694906,0.622971997036498,0.6103017072598361,0.35348588938197933,1.0626288543447564
|
||||||
|
94736842.10526316,0.4968484735885155,0.42882340238365113,0.4110288857753257,0.45058739416487004,0.6469827398368857,0.5656887699032094,0.5646780547673023,0.6071271711439306,0.432392624936936,0.9504458567442327
|
||||||
|
105263157.89473684,0.6087043793908117,0.4738238855081912,0.25644147561197467,0.8457718688034945,0.36560793530577773,0.8602326675795451,0.5361789375791259,0.20382913393987512,0.35720617024852297,1.0799222824646164
|
||||||
|
115789473.68421052,1.007922951864734,0.41575720567782504,0.25065560486177985,0.6191521826873527,0.4847934305502766,0.7377698348829953,0.7388849390510706,0.2613284092530649,0.30422093135168016,1.4377591299548373
|
||||||
|
126315789.47368422,1.0422088749851213,0.6821434781822147,0.21164026022617827,0.5682954867460724,0.5795411191818786,0.49586234264426615,0.5351438865767285,0.42828197954764347,1.031030660190742,0.9182899559633872
|
||||||
|
136842105.2631579,0.3950715712232926,0.776914424843405,0.17613844462048645,0.6794362688989191,0.6833469834684337,0.6096252245916225,0.24337699604826937,0.3430759213307556,0.717002892428157,0.9107397718772998
|
||||||
|
147368421.0526316,0.41892244545046337,0.6398056537490802,0.16995195404644486,0.4791332519615784,0.49253201154460513,0.6532902334535551,0.5630349927992044,0.19508791366112188,1.0247433099720291,0.7518741258954075
|
||||||
|
157894736.84210527,0.5524397424713727,0.8534278156354489,0.2461918622172771,0.47644834175004197,0.36392350870486434,0.7182536957336595,0.25672752242999697,0.44319192722563616,0.654069150253676,0.8105308289673191
|
||||||
|
168421052.63157895,1.3406638782110245,0.5544953491549081,0.42190031860013427,0.5385104784344703,0.3862063759251645,0.8659334050651403,0.7730434766436551,0.18643315280903727,1.3308274963886126,1.37788670082832
|
||||||
|
178947368.42105263,1.1469286921281894,0.4291610929114006,0.2925664130340322,0.9289598359295537,2.1716349725908195,0.5767662439319895,0.9920969432080552,0.3802574744184923,1.1568963322943266,0.7181633397152547
|
||||||
|
189473684.21052632,0.6630999229621358,0.4158324983971932,0.23266152463791448,0.39704905586559686,0.7588860731045625,0.7202478181622365,0.7219760133949343,0.26065298064593795,0.7539636606654962,0.5497162150544144
|
||||||
|
200000000.0,0.4712636847245066,0.9249690206427323,0.46508573230944183,0.45525319952713816,0.35294959419652033,0.6721355036685341,0.5371083209389135,0.29072334891871404,0.6535622948094418,0.9252316324334395
|
||||||
|
21
results/AcrobotSwingupSparse_small_no_norm_data.csv
Normal file
21
results/AcrobotSwingupSparse_small_no_norm_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11
|
||||||
|
0.0,0.248046875,0.0625,0.125,0.2177734375,0.1640625,0.263671875,0.142578125,0.1875,0.478515625,0.2314453125,0.40234375,0.2861328125
|
||||||
|
10526315.789473685,0.25125855950437426,0.07003084642404996,0.14266426477405836,0.21860404556147608,0.17630012543908116,0.264447109190711,0.15265616947924332,0.1891612161229522,0.47076328309288973,0.23604134377350108,0.4022883761292349,0.2895106186166695
|
||||||
|
21052631.57894737,0.3036907703262287,0.1955339954830603,0.417585573698345,0.2400634691655801,0.3735701912327817,0.27346757904644486,0.33570643607269035,0.21690762274153014,0.3714772697300792,0.3006499916562743,0.37301576566828254,0.3378400353513596
|
||||||
|
31578947.368421055,0.29091478913114344,0.21520675912788373,0.25125787264752586,0.284687620781135,0.331029736104104,0.28718628975823324,0.41236470346635723,0.2380402742000168,0.5673011135196421,0.2003662183344199,0.19073800961396709,0.27287863364180037
|
||||||
|
42105263.15789474,0.23802864518522224,0.3445970212983953,0.18891032845029543,0.20229917616064857,0.39216624336559686,0.45654655097264,0.23332905439128507,0.29350094782018266,0.34632590338794145,0.18567847677214985,0.46343685583394656,0.2711425633311602
|
||||||
|
52631578.94736842,0.14090186935382537,0.41421117809010344,0.27307483430054047,0.15208636096309758,0.4189179571051347,0.4396387507050321,0.32275304767893953,0.14495407379234926,0.20444918867623707,0.3202841380956761,1.1782266532285062,0.41820837586210047
|
||||||
|
63157894.73684211,0.20871469386726876,0.28198661698528943,0.22730662485899353,0.23915152932798456,0.2703475053951021,0.6747909979146601,0.2827912433655969,0.165482740322969,0.22189092768196253,0.4306025306934134,2.911001649259531,0.5465694015376124
|
||||||
|
73684210.5263158,0.31909871563686887,0.3659586761136464,0.24902584149896942,0.37836342967448133,0.4092480640992564,0.46554378657459883,0.31598273935080234,0.2607237374683497,0.19426996489971296,0.4440918946200131,6.176969705195971,0.5010172640517809
|
||||||
|
84210526.31578948,0.3532961900874849,0.3432970971612059,0.3083503702010474,0.25003248304541426,0.3858074494676246,0.45309389066828254,0.26247313016009133,0.5184596899143548,0.2748201534028199,0.4376770399944273,9.152308519527192,0.9481273661690076
|
||||||
|
94736842.10526316,0.4062441854926027,0.32371457065571707,0.2666073770073973,0.3100225284819458,0.16453488315571707,0.48871638966399217,0.2891547977098798,0.6601589736515795,0.23289970545887614,0.22545330544257758,11.488105253499631,2.6669291763094325
|
||||||
|
105263157.89473684,0.6048617217679433,0.7771274534949305,0.2667820367786693,2.0100523243320283,0.2862582801121424,0.6492459040929737,0.4212433400246575,0.39811924223754547,0.1760765931612897,0.26034227830881557,13.632602242551682,3.112846997966397
|
||||||
|
115789473.68421052,0.8280159694005907,3.2479128586618518,0.2203951463144572,3.6560692298445345,0.45045373182217496,0.4112739351647714,0.3031011142889219,0.363053276928508,0.5425097367770122,0.2138534979146604,15.559125250396304,4.053317080574352
|
||||||
|
126315789.47368422,1.134784466011703,7.669549429515728,0.32472602020010083,8.23079994180527,0.32095166784904644,0.8598584204169202,0.3451743614640595,0.342951851208124,0.3340930146193564,0.33820888276245487,21.842560554145123,6.406826156658481
|
||||||
|
136842105.2631579,1.8827517501535187,8.867948677401133,0.441584344055514,10.450260965447677,0.39772757532854175,2.7829278671180133,0.4073773011606486,0.23049164082535079,1.1378827742262243,0.6605059605225965,30.34590503713762,8.07602402642163
|
||||||
|
147368421.0526316,1.2499827810271618,13.4577449788017,0.5287855312104371,13.08799321499558,0.3087301122184604,4.908896897968495,0.404296875,0.09344440153761242,0.9425163797394385,0.3322899414231568,36.82283538860628,10.160734514780653
|
||||||
|
157894736.84210527,2.4262336799642727,13.912452441503467,0.44871729430729657,25.01421294912408,0.1585216786392507,7.548204992616608,1.8462324697225059,0.16994034880746442,1.0017884008772158,0.6442124004839533,38.800063912558095,26.62291460750507
|
||||||
|
168421052.63157895,1.8957548273567344,12.724887795065248,0.37405179940432387,32.843212180520695,0.4806609008450919,10.410637895156142,1.7354708008488788,0.4507272144433866,0.8014381810238486,0.7009397807874178,46.07278522692229,23.83986615210028
|
||||||
|
178947368.42105263,4.238514731135066,16.38166935622197,0.595972166827511,23.140149697702675,0.22474605687107063,14.50005219055345,1.1324873498932475,0.24690689076347025,1.1698891198536037,0.69168790357595,42.606099791804176,33.06238525527996
|
||||||
|
189473684.21052632,8.035884746223937,23.544428865004775,0.5621408161364104,32.238068741774626,0.1405993855230696,28.6749156835667,1.1710396077163991,0.931688763098043,1.1358143180361084,0.5324972739180039,81.18519864386138,33.74080153383377
|
||||||
|
200000000.0,12.403944969177246,28.882382643850224,0.9095641186362818,50.32430417914139,0.27952896921258225,28.698694680866442,1.2693656620226408,0.534015555130808,1.2294362218756425,0.47636112413908305,97.38083723971718,41.65443470603541
|
||||||
|
21
results/AcrobotSwingupSparse_value_large_data.csv
Normal file
21
results/AcrobotSwingupSparse_value_large_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19,trial_20,trial_21,trial_22,trial_23,trial_24,trial_25,trial_26,trial_27,trial_28,trial_29,trial_30,trial_31,trial_32,trial_33,trial_34,trial_35,trial_36,trial_37,trial_38,trial_39
|
||||||
|
0.0,0.2060546875,0.310546875,0.162109375,0.7412109375,0.216796875,0.2841796875,0.09375,0.2978515625,0.2666015625,0.1640625,0.3251953125,0.2177734375,0.3271484375,0.220703125,0.3154296875,0.7177734375,0.17578125,0.3427734375,0.060546875,0.3466796875,0.078125,0.1884765625,0.1708984375,0.228515625,0.0732421875,0.361328125,0.44921875,0.23828125,0.255859375,0.2412109375,0.1298828125,0.2978515625,0.3076171875,0.83203125,0.423828125,0.1923828125,0.138671875,0.240234375,0.4736328125,0.4345703125
|
||||||
|
10526315.789473685,0.2118135700595676,0.309827014680054,0.1672591449811518,0.7317973794699375,0.22122678466120585,0.27808856171584195,0.10172383739017053,0.2989036660445364,0.2818293769603951,0.17425129222077346,0.3266350331398919,0.2282944729453639,0.33362718037951355,0.2271264940087485,0.3453315777131395,0.6961776279016214,0.19366701025711863,0.33806665848496875,0.0710679104453639,0.40028159440059086,0.08726168867623707,0.20962938113225796,0.19659191353499395,0.23444062917186284,0.0897436009879918,0.3609405079046445,0.4458963177540956,0.25589014090329326,0.2610091449811518,0.24641608135191687,0.13519870409344703,0.29463987799562574,0.3138744348964533,0.8152529671581829,0.4520134252194222,0.23042466171560527,0.15279221204509366,0.2468792394918088,0.4610629438363284,0.4318569928325114
|
||||||
|
21052631.57894737,0.2927769901349604,0.32288055208581307,0.26201100230547203,0.5259125675190849,0.3001270188519169,0.20776718012844098,0.23404275677540956,0.31363755646174635,0.5216450255333221,0.3244798929737545,0.364983143898919,0.36455860296444875,0.43343241260982945,0.35569093233990867,0.8980365605235431,0.32783274769452797,0.4986310361825198,0.28147218233990867,0.26337039767870285,1.1860065988556499,0.2600783879076675,0.5349439795327648,0.5920173401977877,0.3222652815715758,0.3668050105551934,0.3563748991060125,0.3617198394606318,0.5455056927540957,0.36019091196667785,0.30620685904970457,0.2128902815715758,0.2638034767721499,0.39736967007539276,0.5721258284972975,0.8771530795955922,0.8021484966753593,0.37728502479616627,0.3725943155896301,0.2530606024153015,0.37586547133004566
|
||||||
|
31578947.368421055,0.2332391606803747,0.48416751375488953,0.4107527270541628,0.16305141765985465,0.4571054411066538,0.4224530032466983,0.25619743273198775,0.2826893772114677,0.42216212465492314,0.19176925912788373,0.4893336969729606,0.23811071136981854,0.44335665134842056,0.5305403949811518,1.3076517073401452,0.26181958586885656,0.5064718980868437,0.6136388118247249,0.3721902852573553,0.333998727666374,0.39477701002210785,0.3102040330458876,0.3756856839082248,0.25921257314919766,0.43090709887052847,0.3905299786385407,0.15384944324017896,0.4942807649311266,0.44405838128932623,0.11810937548608334,0.19219858428447872,0.4058763921425943,0.2501162188205032,0.8388969231180207,0.6187154209844955,0.31458369648687723,0.31998607128280687,0.6352383753599555,0.25224981809917246,0.44733606216980176
|
||||||
|
42105263.15789474,0.43082265246277696,0.37244460721425404,0.724254904031093,0.38941055255583457,0.8952821958758493,0.31834023811150125,0.34572226188849875,0.23198879590655297,0.41272224151526793,0.2885996852885323,0.45229642873325504,1.306264660695253,0.8862685945885994,0.7317529715329327,1.2945927226312273,0.30961133370439103,0.2506439758469854,1.3530910840655297,0.32188553955416277,0.3413275827001007,0.30875646110386734,0.24614468307706458,0.47301651170049014,0.4842581180984624,0.6128170615748355,0.4231113317600578,0.2669949941027528,0.3591329939147442,0.3290308056775883,0.3311890366995434,0.5226831198399087,0.2995449182399422,0.3530435852727071,0.6904976968950182,1.4620488026795957,0.2972585197300792,0.4525616928480999,1.1407596450763395,0.1912144901349604,1.0498197032474084
|
||||||
|
52631578.94736842,0.19438721400548875,0.5479718786857796,0.5071986031994595,0.6670940298783152,0.46532229109153855,0.544412248352558,0.22292252062430343,0.365314724042475,0.32787992683474043,0.4293990254071941,0.5465711900071755,1.7668202867798528,1.3394154789044916,1.08229509284952,1.8495260064291492,0.4306251018661541,0.19212542346309758,1.1372663307718294,0.6112785656366322,0.6913488155586898,0.4276017429425776,0.3104009205614761,0.27554273737434537,0.8042010838305191,0.7090213226149287,0.24748425470494828,0.2884362873278166,0.3571210845355512,0.28083161700135123,0.6189416769138664,0.5435207715655298,0.4377946325286274,0.4328021025723698,0.4033698718633678,2.9511417959535553,0.3701968259098127,0.509997375784158,0.5432278561790234,0.1688656740901873,0.7397989394592116
|
||||||
|
63157894.73684211,0.40211646378535665,0.45695911946389145,0.48651816772291856,0.35069634181310594,0.7141461412001848,0.7260753969736705,0.22915140611643284,0.5460429944490128,0.4650857560852558,0.5572298900572544,1.2152039641488617,1.1181630322147273,1.709709362970495,0.47841581941641603,3.0052941023808115,0.5139128507999832,0.31972426522801817,1.491355932980694,0.8555853360247416,0.5113759080458876,0.297368580614761,0.32412851286066535,0.4390528723803915,0.5042066970360245,1.3392025215804084,0.28469137471798706,0.216618461291876,0.3985967768196253,0.34366285371648325,0.4300706763016551,0.417485768114761,0.2037980246081576,0.8567441374971604,0.31820760605407883,4.014480765176284,0.2865351256901537,0.4968177076852222,1.0241515933641772,0.25751102267870285,1.4193305493722004
|
||||||
|
73684210.5263158,0.6832269869352642,0.3502434389743118,0.42469941083744295,0.330853359190711,1.0170202651512588,0.5658347481175474,0.28252735296444875,0.2504259146481671,0.4948707406210437,0.2926019869352642,0.6511313631263798,1.0467488693068232,2.124570059313999,0.9452250578396872,4.0766381002198955,0.5680250949806784,0.6984042117470189,1.908098463866849,1.0677054635045269,0.48898128657459883,0.5072851101777562,0.3462920704046445,0.47044264510727973,0.35083579488738414,1.5618416714866405,0.21649608453555125,0.4809097041713895,0.2367341921270058,0.5707660896956426,0.6125409028536727,0.43746992095355514,0.11579324135819961,1.339187043525505,0.503868343427241,6.064206678121043,0.3072712784658839,0.4929593236822831,1.168906623967136,0.31078409686313113,1.7903679189919768
|
||||||
|
84210526.31578948,0.6135799798939036,0.5060559428629784,0.46436111154318516,0.5034687538886665,0.9443499705137639,0.47750245831349536,0.3175468973175641,0.33357965781087684,0.7759932837657982,0.25440733848846514,0.7407016067293541,0.7270963674106754,2.426959093257661,1.6659178799869612,4.122235771031261,0.8202656880640258,0.4769264918615282,0.9828566236839399,0.913581235256882,0.4038808722245065,0.8500438278071438,0.41037980605360547,0.2831323681775883,0.29202329683171746,1.3165678832669665,0.6550338829653417,0.6089617446519001,0.18336302950111455,0.495234756258386,0.6062870184140191,0.6077090234307372,0.27082469654875785,0.49931500294862363,0.7141546318075332,6.310332934942272,0.29747658092889756,0.5628722341437089,0.6313560464705787,0.33018852931310594,1.6888859925838056
|
||||||
|
94736842.10526316,0.4518804497335756,0.6751762347868605,0.39460882783926754,0.6890296143508023,0.9811195468638412,0.30161391863202125,0.37235537658437795,0.2548223109786861,0.9490426927392173,0.21032976113528096,0.46526998430077715,0.24377452501629857,2.310550333060056,2.155622828369986,5.291314521324602,1.2526422669682806,0.24994258933450378,0.9459554931133408,1.1397841604132402,0.2403639162016047,0.8175007246868101,0.5286488017877383,0.26299200559917246,0.41787100498696117,1.189747461651831,1.154665698635281,0.5933121366844283,0.26866715238365113,0.4059389542344535,0.3046211839712888,0.5315634022128879,0.30814116681381604,0.4888602835319709,0.48115134305240703,7.620361396810685,0.2364952768975678,0.5756591574967402,0.754822310978686,0.45194131845912777,1.2548599216746492
|
||||||
|
105263157.89473684,0.41989179595355514,0.6957671199809151,0.4225029298143043,1.0848030819456993,0.8830141717377131,0.35927416413114344,0.3296959789836176,0.348857398838878,0.8554704671421209,0.22495084324041562,0.5843521921258223,0.17642694298910633,2.2409388048166714,2.4818770178797505,6.150779935461663,1.410464204909729,0.9096408801726026,0.8624748589259436,1.279065124215842,0.2494780574809151,0.79857861104104,0.5009341675819122,0.27660953701367996,0.45934532355733854,1.4530860393661542,1.0253147167512255,0.43124272882773274,0.448226141467319,0.4496822515683161,0.4034652604290653,0.5288118800958437,0.1902792922677756,0.3730027629397913,0.8583520165440779,8.402377559207483,0.27477672819945953,0.4730650407785854,0.7624741604453639,0.41050316554357474,1.331548268114761
|
||||||
|
115789473.68421052,0.6805061831698853,0.5512737765536745,0.42278831355129254,1.1264157519776405,0.9162077282934638,0.32135844560871496,0.3349708493726736,0.45696186689128504,0.8605467328734675,0.21299658851940545,0.7853750218314808,0.20045679890217874,2.040065731038017,2.166842088144572,5.689375475833291,1.2688855570108937,1.1741750405435747,0.7905032747009784,1.4370471209370197,0.22376166684475635,0.9797178041241505,0.39902985591307244,0.27749927288277326,0.4366194891467319,1.4673198478043574,1.1793542740417648,0.36596620776316463,0.4112845576701072,0.3462985479930762,0.43689395450158797,0.811596598321381,0.22761529040138476,0.24774880818713074,1.43480961870949,6.809932077360286,0.30402882805821635,0.4491958591746491,0.6760953628455504,0.30524705321504797,1.26967001621743
|
||||||
|
126315789.47368422,0.7980617723966899,0.7165036399608838,0.3159817090655297,1.3453630780249097,0.8412704679114005,0.32802519441641603,0.39050605752791734,0.5406652751721834,0.915283129155801,0.23438559872952197,0.6583285450605141,0.3275453033869949,2.3128903882985634,2.378213792626548,5.559986315275493,1.611289555346207,0.6407018434307905,1.010574372521398,1.276671021268638,0.23671233158692767,0.8981404396965891,0.5143510813197933,0.26226223340655297,0.42639480337211627,1.3241339601638242,1.3685270716278837,0.33190818638682684,0.5307530326526254,0.33015025553610855,0.6034114023985298,0.8802458216278837,0.15881461780157102,0.33252792147057886,1.4760658285294213,7.563065262052164,0.30108272732129715,0.5659454958590775,0.5819293838458708,0.3262928780426279,1.5169027439445015
|
||||||
|
136842105.2631579,0.6816525736674046,0.6894934355717285,0.44002671122881204,1.4486222729458371,0.9096866142716766,0.46679586849054155,0.3459827180054048,0.5479968300816755,0.8691077985261615,0.15796384256632362,0.8307959723010291,0.464513910774379,2.427734375,2.544916403921027,5.056008869921401,1.620051534552323,0.41832047354151336,1.194904052649839,1.5468774515505979,0.3190508734156221,1.0586365121223262,0.6312384539363788,0.33391123795443306,0.3553409021647022,1.6547624846904898,1.274013099247729,0.2901898011606486,0.6357979047991893,0.3062615460636212,0.6200341148059453,0.9117074052382704,0.21225930845308183,0.4898149854918928,1.36628954562454,7.060144028174909,0.3498377152757302,0.4148619749539444,0.7364997414670825,0.3758924832964869,1.5767584164056752
|
||||||
|
147368421.0526316,0.7728475480858971,0.5616922827638747,0.48851265735573385,1.4488068683655968,0.8492135393982778,0.4528009515059621,0.2296100996868101,0.4609651750474755,0.8119353191344031,0.3581742421411744,0.7743791754556163,0.48035765883004566,2.212994504173046,2.012104578625792,5.265597715932577,1.467720100423966,0.3576416533409393,1.092768629502061,1.7314870205612394,0.3049582114180039,0.8650442847254531,0.6616164812420874,0.41461795584977174,0.5268163601777562,1.5701039596938027,1.5939124152270714,0.4863445620127334,0.6644559316027527,0.25882836656227004,0.404190200847932,0.9695368632055056,0.2234106301600913,0.46242540628956286,1.4524841150088323,9.68162809778779,0.275538616233255,0.49670254292580573,0.9451460587350947,0.423587492628441,1.3606475449665099
|
||||||
|
157894736.84210527,0.7192808241064859,0.6800190801435562,0.5728573178320381,1.2741515220367345,0.8556359018975678,0.4740467547049483,0.36303961111898253,0.2809529396965892,1.1061982263158234,0.48192557073365955,0.39411760367184767,0.4335168352417668,2.4064089809428295,2.379179093315991,4.795482363397064,1.7448701013158234,0.5381122863854069,1.12182079854104,1.77721899302052,0.25270681830324293,1.0745448828403972,0.6176944029958624,0.5059574872172771,0.3636821657006428,1.6147141496230362,2.2455226628734137,0.43924357818434434,0.4325139238563599,0.5666076258609172,0.48670482371322343,1.0987070036066537,0.45532734308216394,0.5657506689140341,1.7637060989633493,8.537530539768884,0.21775224004095609,0.5235277371393346,0.7250793303809335,0.3510716430368185,1.8314840839840372
|
||||||
|
168421052.63157895,0.5446248146965893,0.6323370259884652,0.34956148522712505,1.4158399587192694,1.0243488005323753,0.44768741850707666,0.4242923134251645,0.3743092122170404,0.9926150842386599,0.36292471846054786,0.9373380708562373,0.4372990151191352,2.4998103007055055,2.07439745958492,4.894567854186505,1.4142213858395731,0.3392523768205721,0.988488025612448,2.074691180707345,0.35675091096238754,0.978722400612448,0.5606459937267356,0.46017046053984156,0.5432374483990868,1.3767637215823016,2.1346582639910836,0.3882076865748355,0.3480978672524238,0.5813151436169062,0.5434329384251645,0.8143628403090373,0.29506643194901305,0.5372990742944944,2.3193722458097086,8.333616261997381,0.34481135233617566,0.7004527253127165,0.6991791025093057,0.29527291168466496,1.8121038209698537
|
||||||
|
178947368.42105263,0.6591687691178679,0.4084881653085639,0.4136827579825869,1.317806986230232,0.7200132961748709,0.3972681473496878,0.4004350567128189,0.4140180682871811,1.0433635553164495,0.26677006433545053,1.1722329052531488,0.3622267424564942,2.2064730229470206,2.3212932074169044,4.882091609394782,1.0492908921598396,0.3220191781210437,0.8889107902294381,1.5547403985443538,0.18930645992881368,1.2453325725988669,0.7985338596756109,0.23758972780856394,0.4891798674564942,1.7873750300948972,1.7150877453283588,0.5259689954509366,0.2044293411550759,0.33710270773340784,0.5139275468947815,0.7210736420015879,0.36572557539160566,0.3731374317919448,1.6895521486234795,8.086900692567271,0.09764288006727045,0.4156334314319895,0.45265394300635164,0.3729135798615432,1.0947643977453172
|
||||||
|
189473684.21052632,0.6422042212657981,0.5305526870769807,0.5760128940571708,1.1364200676577243,0.9701035570900196,0.38320730531644953,0.30219295216399217,0.4370074496705116,1.0593717316181046,0.3910546923608331,0.9566542554099804,0.45032176284578695,2.449823030804663,2.4222031220835003,4.834490910791624,1.6343475616539613,0.2616466194638915,1.306795780678535,1.667646075219659,0.1756230034656472,1.191112280552407,0.6465099894769304,0.29377072405617,0.5455416100837518,1.876787227937059,2.7795739213515516,0.4502092979621359,0.36123891814593795,0.3971498917344535,0.4487176615115348,0.8336244567279341,0.2872303780426279,0.4650105346933296,1.638438407073721,6.3392494761712666,0.23643205172467432,0.5543819015376125,0.8812192932720659,0.2366795051130892,1.4850172587048645
|
||||||
|
200000000.0,0.8797569776836195,0.45245607275711863,0.34197531248393814,1.5450302425183748,0.8130079570569491,0.44204782184801605,0.36642114739668996,0.4647955643503289,1.1184805317928916,0.5815714785927221,1.2139172804983038,0.3197245848806281,2.1900277890657125,2.3103103637695312,4.651352882385254,1.4646324860422235,0.2180328871074476,1.1175750933195416,1.630814552307129,0.2488067024632504,1.108574766861765,0.5167208721763209,0.7578312221326327,0.6886364786248458,1.155426627711246,1.8101054241782741,0.2778626492148952,0.38942076030530426,0.42765336287649053,0.514480942174008,0.9143920195730109,0.3007494775872481,0.34607094212582235,1.5790150792975175,7.278399015727796,0.3382902145385742,0.45935219212582235,0.7952393983539782,0.21919335817035876,1.434862588581286
|
||||||
|
21
results/AcrobotSwingupSparse_value_medium_data.csv
Normal file
21
results/AcrobotSwingupSparse_value_medium_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19,trial_20,trial_21,trial_22,trial_23,trial_24,trial_25,trial_26,trial_27,trial_28,trial_29,trial_30,trial_31,trial_32,trial_33,trial_34,trial_35,trial_36,trial_37,trial_38,trial_39
|
||||||
|
0.0,0.4970703125,0.2109375,0.29296875,0.15234375,0.3076171875,0.3876953125,0.1015625,0.1220703125,0.669921875,0.2431640625,0.2548828125,0.126953125,0.2109375,0.25,0.3779296875,0.431640625,0.2421875,0.423828125,0.51171875,0.2763671875,0.7109375,0.2705078125,0.1689453125,0.31640625,0.2216796875,0.6982421875,0.30859375,1.2939453125,0.181640625,0.6875,0.16015625,0.2568359375,0.375,0.458984375,0.3271484375,0.3935546875,0.078125,0.2783203125,0.390625,0.5048828125
|
||||||
|
10526315.789473685,0.4929172721926195,0.2264421838142205,0.3127925957338962,0.15765964159344703,0.3105520026505489,0.3796107273682993,0.10372208095983786,0.1406205592062995,0.6496550382999832,0.2448252786229522,0.25322159637704783,0.12866971499371727,0.20711670291720996,0.2533778061166695,0.3785941739491809,0.4132564999059957,0.2630080754076675,0.4130855940715758,0.4925593907152847,0.27110666977731807,0.6936608523212971,0.27000944766311435,0.20754090042325601,0.3134160609786861,0.2247806242628441,0.6750405356494343,0.31280216417814555,1.236411860775089,0.18380020595983784,0.6765359735885155,0.15594783582185445,0.2676338422991893,0.3978694086259752,0.45289324921584195,0.32371525751256547,0.41077596130793775,0.11212455664975493,0.2816427447459044,0.39051425225846986,0.48810452965818285
|
||||||
|
21052631.57894737,0.43612112430984623,0.46610162264752586,0.6272949366688398,0.28729190984921443,0.3353427400879582,0.31898593110060763,0.16512041805193367,0.43448930037649053,0.3180373870765073,0.2894053419541124,0.261911196721888,0.16420367021639923,0.2680351489798845,0.3254072395388109,0.3856615188049147,0.12403403622952196,0.6241044496235094,0.23703294579672352,0.2352931215492312,0.2542508090962334,0.39196354134261113,0.28663699184428293,0.7726396946365482,0.2622321543601081,0.33761068856617094,0.29681484190711027,0.3706302484316839,0.2822870640213139,0.22980460697924332,0.46612144639287295,0.09712302189454478,0.43087495925353836,0.7051160553485732,0.3513410045169397,0.280223534708208,0.6536310148371224,0.6220282610103364,0.314986009677031,0.3989714149623036,0.20809839306775882
|
||||||
|
31578947.368421055,0.4798982546270058,0.30513493175982104,0.43554959865157944,0.5545548679425775,0.14364014041720996,0.7742188683507183,0.32588372005980426,0.4281455935533689,0.3309278699499748,0.40875139262867766,0.5120096285917752,0.3254000750935311,1.2878911105549566,0.4734725053951021,0.31030383862947153,0.30424109852545156,0.6860988965655297,0.27591975574017896,0.6359158169859993,0.7300585617319038,0.32844870506561374,0.45652707065571707,0.21276108379839537,0.23270318514752583,0.7795272520704613,0.33682279507539276,0.2675829092221247,0.3074914002352474,0.2908187374844115,0.30753857937546,0.1717789483532681,0.28977688924097283,0.1361349322459044,0.3437951185696673,0.40997577572133076,0.2909041904016214,0.2378550156992229,0.1789385909188818,0.477997092989343,0.27243804667464927
|
||||||
|
42105263.15789474,0.3247253333432523,0.22632118077159258,0.16629011413067007,0.252334584159534,0.23862718280993017,0.1927960678480999,0.15772048654318516,0.8096964194173627,0.18416765588142206,0.30685083489668996,0.4211021698082584,0.7878928039212636,1.2606836989976031,0.5113940146491137,0.10735180503443668,0.6468197090804082,0.5186784379700214,0.3704596860256882,0.37212467986130654,0.4138126426126158,0.6877355284968242,0.2984141827950517,0.3658267309130724,0.2968295617777226,0.5588542061169062,0.23787859338142206,0.2950548504858467,0.39437266003722304,0.2289090566027528,0.5544745426759164,0.16740681656179668,0.23453633739017055,0.37760465627231765,0.3331493261448234,0.550681444416416,0.2163347471453807,0.5305868872645159,0.37703448055193367,0.538037751850329,0.5237666523687726
|
||||||
|
52631578.94736842,0.5788017785450098,0.2895215607746156,0.42749202548632004,0.26031835719819213,0.5171669064466313,0.28096424905877365,0.3259285189768614,0.25731276144941756,0.22599987970494828,0.22978581351916877,0.605864218397484,0.4936391693072967,0.5059486056298761,0.46710625117505356,0.15675862013798342,0.922995799796403,0.6684014618892089,0.37360505450135123,0.7283874337362781,0.5947009242472556,1.0952697024781288,0.22920199576507314,0.31046007214490723,0.6634584912963191,0.6646743837816232,0.36803214107523996,0.42608032332232787,0.5906620421898332,0.7708647350195041,0.42080375956696486,0.3572051637390644,0.2810996047677756,0.891895484395965,0.49793443389216285,0.672349100297838,0.33734748502187123,0.6864332003606654,0.47328074553006244,1.0516460820248252,0.2050029273838878
|
||||||
|
63157894.73684211,0.7102440123412753,0.41137584887052847,0.3511793236983449,0.5651271798934302,0.27707303858199617,0.3617304381901537,0.5540219832655465,0.32454657620670424,0.3615509941967571,0.49057073936568096,0.48048037901479457,0.2420664969573721,0.5663385364817777,1.3289293305035117,0.28780771820829204,0.6771772020081072,0.3130253768033269,0.3962348610410401,1.0994761511889861,0.4257166273375955,1.249049786385407,0.4139934603197093,0.5435505071835504,0.3579079477410569,0.9808617906227002,0.5327553260359406,0.1721159757999832,0.4555727359000336,1.418757922101218,0.4455726126884819,0.34754584568689406,0.34735477267870285,0.20280129493438628,0.5550634643047467,0.5417843340506511,0.4094927938360917,0.8360923598017391,0.6312965752345375,2.3436475420922793,0.2630487531836344
|
||||||
|
73684210.5263158,1.1584573994052705,0.42419829857316366,0.2702647761294716,0.520240511590424,0.3005382136294716,0.6537985577147426,0.44178223808056094,0.8365821864796479,0.45694101286066535,0.7962525650404828,0.5197660678641617,0.40412120871926954,0.25367383613480765,1.4740025950931113,0.44445966419420735,0.6018889524930073,0.4067880123275799,0.6484816120602088,1.4740230581436784,0.36489327396382276,1.488372493648793,0.7808361053466799,0.4400178058655969,0.561081476819152,0.5061649734623875,0.3627213771323419,0.25026047989272027,0.4911541899155381,0.3797149737455838,0.3205080138018917,0.305370093052407,0.3742698843789563,0.4101658659958777,0.24073548726427926,0.2513423190552773,0.40597107007562944,1.1239899479451272,0.9245490156052185,2.559697674251989,0.297115632371559
|
||||||
|
84210526.31578948,0.844375525815335,0.3338541469415469,0.343110145294105,0.47660214749069424,0.4881182430193366,0.9530487483228012,0.25697333000373307,1.0861285730081913,0.44891589896500605,0.7128752182725393,0.4714291432557674,0.6613238855081912,0.42424000663440314,0.9286726637230025,0.447409886072217,0.5376545279970458,0.29154644910648586,0.7442284581403653,0.4095221860256882,0.764792698572217,1.3847287249366993,1.0000638882539277,0.7553043101302805,0.5593170208283739,0.5007998183823688,0.49123929940432387,0.31707721403761246,0.4173644216437089,0.2686141587360414,0.5382753409176984,0.3313404622170404,0.6553028772412246,0.6607537097878072,0.33042916168466496,0.6023237487285752,0.289454557889056,1.636732487136968,0.8620896696053713,1.2946330569787698,0.3248654732743789
|
||||||
|
94736842.10526316,1.6106815206046912,0.4127202047205368,0.37367169018267266,0.35793731615483926,0.7695052683188314,0.7174104283721163,0.4823679223945597,1.2655101786690075,0.45584447760331004,0.5067351016311434,0.43042478006632373,0.713124092926279,0.32533920636797875,0.9740043597868605,0.5119634797368353,0.37033357910832543,0.1909885538581996,0.641143532010657,0.39755595886145934,0.7326635138810176,1.6248475679730445,0.61191260583513,1.2848254042649203,0.3853962546900699,0.28206522510982945,0.4662294704828236,0.25039480531644953,0.3927739822303159,0.4775972075739726,0.9358869933025329,0.4431518887548896,0.6968731761308918,0.3966600650565446,0.26681758690408724,0.6834580270867598,0.26209611179425774,1.9973119513810176,1.0260797669682806,1.7709118903839027,0.34726144468355047
|
||||||
|
105263157.89473684,1.5309234524037372,0.3983951050819122,0.19968053442619516,0.26715530120765074,0.7650517809754264,0.549578384019001,0.5774122803495201,1.2204945846938031,0.37977854234690156,0.6176845148990029,0.3302285202322244,0.7217788062267356,0.32748274129513555,0.939221374215842,0.36162719832232787,0.4420078309288976,0.15945039891800392,0.57446826395896,0.4724880915929736,0.6661995335629112,1.6547675410135962,0.5102096462513932,1.217835608611807,0.33340227438802533,0.4355520264263628,0.38585906940153764,0.27002311347264,0.5810208782925169,0.5961826726009971,1.0901479747486906,0.36364219855733854,0.5543299857269033,0.31594103128956297,0.2378478274781288,0.5065228311639083,0.25295976654644486,1.7614039265217873,1.26806998847264,2.3667626631887337,0.4710514618088994
|
||||||
|
115789473.68421052,1.1999215577778062,0.35740205125465285,0.31415576881979307,0.1841297017570348,0.7656547356180207,0.4759899678322747,0.31301612801168766,1.1280127601940546,0.42551362943781373,1.0100018641294863,0.46344404405504047,0.6437597697461411,0.4433463960473227,0.9053329087360414,0.37020026019405455,0.5204719427219718,0.13159427484316838,0.5809357450279172,0.39714918110179115,0.6872217569324779,1.6812205486350442,0.40358793306218627,1.1986103480542465,0.30020494011960863,0.5049918430994093,0.4205149415457347,0.24509736375465288,0.6549487021491138,0.5059851622647525,0.9695901527299116,0.36040796665603764,0.45746737239763685,0.24247872202019943,0.24264108969564252,0.45196624607920977,0.2475635496863368,1.6110133015217873,1.1109668092384233,2.216639930851902,0.5585869052073302
|
||||||
|
126315789.47368422,1.4080035138328322,0.45896729868204655,0.3545007256589767,0.19093489712955558,0.8013565295951188,0.4192447292507519,0.2692191739491809,1.082088660665496,0.47498092228686045,0.9461704159047131,0.5686820497803411,0.6231275436950853,0.5511760553164496,0.8584579087360413,0.4771080439770982,0.43805849915396117,0.24780899005583468,0.6089015865590104,0.5185714679411577,0.5760785470048475,1.6961871963458708,0.3501337215180543,1.0807152322785012,0.47783818337395617,0.4474057649311266,0.6096805984623876,0.14864564932614466,0.6615976877820129,0.40255189932614466,0.8845044820262455,0.29682200635239014,0.3852708346295556,0.34347178070829204,0.27685119967051164,0.49738444912136437,0.20290213080324287,1.6539273882836847,1.0005626202950517,2.0815301615115347,0.46660374142126354
|
||||||
|
136842105.2631579,1.685544861981083,0.4700728149625403,0.3125950213614593,0.16522363414394553,0.8884927744350276,0.42189141323691925,0.4494963299864879,1.1374266616525412,0.4926113540776219,0.8855073696358383,0.6481579069945952,0.8028054990266501,0.6110823649778921,0.815212289382216,0.43217692018545883,0.4928058613700551,0.30057479404016213,0.4730144511299449,0.3645165276989709,0.8226372198384888,1.5476963592698367,0.41557435117600044,1.2330392298605968,0.36129083395664685,0.2951741126105394,0.6426731463614593,0.2899092016457854,0.6068756441660534,0.34729358430054047,0.7682989918294044,0.31180202399594636,0.5258473055514604,0.33267350870486434,0.1815466101479991,0.5071876610415134,0.2517237782148114,1.651451266703513,0.789756011434539,1.7992156945437272,0.43369424244043236
|
||||||
|
147368421.0526316,1.6932158007846314,0.47971436983990867,0.2692280555365818,0.5914688004681282,0.8043518224911675,0.39853696215515977,0.7656229869810831,1.3139441852094065,0.4294837676917416,0.750226599357795,0.766856227885323,0.863885234927867,0.46217074090423993,0.8744585771639922,0.4837734190711025,0.5152315683972473,0.19127190080045653,0.42965742095355514,0.32945569004048275,0.6457203788440311,1.5226831198399087,0.24773887253864302,1.0275053832669665,0.3256396771798174,0.37768223411158525,0.5706225154802739,0.2287329993419699,0.6734788054574561,0.4284518550967907,0.9492259619969082,0.3992752962825702,0.502061812170985,0.35453389556123943,0.17146311836559688,0.38371487139334626,0.30582608674701894,1.3928925574981603,1.1509581008446188,1.6121009551917416,0.3820256367944944
|
||||||
|
157894736.84210527,1.2549097531389988,0.5027943317909981,0.27886961041395025,0.664670238864719,0.6509453702171092,0.43518592644266146,0.8185955665778585,1.2093825670490634,0.42178476286066535,0.9018313997009786,0.7295506284507688,0.608284575126838,0.4582758002664244,0.9714858908402293,0.359380447303159,0.37672238072530045,0.24863856783203808,0.5427961507992732,0.4268108061476098,0.7069700853976517,1.7525844890985465,0.3420833365738888,1.0291030783402293,0.36598534465166344,0.19615127901621457,0.6143453259877552,0.1521208808032429,0.5312899671433043,0.3372326161392507,1.0011676355081913,0.23841145428263896,0.49033521086885656,0.26487675399991606,0.1753885052540956,0.4606640649304166,0.25533399819667313,1.3010068428483366,1.2435913270860497,1.7181952547828907,0.4073485958279005
|
||||||
|
168421052.63157895,1.0124180759419363,0.45978831452345914,0.2999462011448234,0.7901238523361757,0.8078746003127165,0.4522191943224117,1.013980220889781,1.2829143069787698,0.2519476301452128,0.9913637367311937,0.469789077040231,0.8026916842051162,0.26904174297470124,1.0704058279951525,0.36056383296723515,0.5218521921258225,0.30974293547654086,0.697169253700658,0.44317821386448236,0.6082032433507184,1.4083620108395731,0.2360621740283067,0.8826060202643481,0.2195908395867598,0.34425933077064585,0.6453775844415469,0.13832937647431182,0.6383943425651403,0.4458501926749698,0.7669867518205721,0.39574540156736937,0.5160184316027527,0.18859550497208272,0.4025020440859809,0.36608650017313016,0.2628368261448234,1.4146200164203169,0.8654640176619848,1.8252886404951525,0.43709803023827043
|
||||||
|
178947368.42105263,1.540162197440615,0.6781794503124798,0.3318811981961998,0.8592509298773684,0.6906166393671009,0.33169010141219457,0.8082542947785014,0.9945589604470206,0.49701326903874204,0.726901197037208,0.49893457407436215,0.81961759107595,0.4947173020846296,1.0621571580458875,0.5449629199802051,0.5494174138330686,0.18831144739716332,0.784363252634487,0.6636366329034611,0.3829567135205889,1.4962236584058428,0.18594337368275649,1.0215346449960303,0.4902737266138981,0.2427877072152008,0.8300839632832113,0.19700072808939337,0.5525600348813382,0.5907741874208741,1.186542645715941,0.4018600812579126,0.41406419336630695,0.29463852805774304,0.23845144520175748,0.4947542021479303,0.2025189545015879,1.438054082136075,0.9608209615268866,2.1982890707634164,0.3453175446993756
|
||||||
|
189473684.21052632,1.2474309651805424,0.22603579703460444,0.38242623284252725,0.7634182398999497,0.7871124658558177,0.610305046440822,1.0380975665147945,1.1121658737309421,0.4745372444638915,1.0006996693703607,0.4901133719573721,0.6686776919378138,0.2518988051903215,0.7926987729904725,0.47779505404739164,0.217048153652709,0.1977209318377635,0.5529709337820967,0.5048571980230696,0.7586704159047134,1.9725345125488958,0.2896589770541627,0.9778788135982946,0.5875942991050657,0.30496439312963936,0.6392621703425273,0.09999873301329049,0.8237719813212133,0.47105995241624826,0.9593535658395191,0.44698873187036064,0.5362910590343528,0.22654233282622868,0.2911622425195583,0.40635083586885656,0.3520519594736707,1.4246286302392173,0.6529200598803914,1.7648292076554655,0.37195549116900756
|
||||||
|
200000000.0,1.4503914180554842,0.42453429573460627,0.27918956154271174,0.6980528078581157,0.8648501446372584,0.4159773274471885,1.0562553405761719,0.8974411613062808,0.43092351210744756,0.8383592304430509,0.4834685074655633,0.7888030503925524,0.4189333162809673,1.1257642444811369,0.31813380592747736,0.5106129897268195,0.24825055975663035,0.6962507649471885,0.6283008675826224,0.5188404384412264,1.6482437033402293,0.3647110587672183,0.9155689038728413,0.7550875513177169,0.4355020523071289,0.5397270102249948,0.2954014226009971,0.7015758815564608,0.33128888983475535,1.1314397109182257,0.2894829197933799,0.4487545615748355,0.2337443703099301,0.16643162777549342,0.4068269729614258,0.25219174435264186,1.3003082275390625,0.7267135820890728,1.4779998879683645,0.5472253498278166
|
||||||
|
21
results/AcrobotSwingupSparse_value_small_data.csv
Normal file
21
results/AcrobotSwingupSparse_value_small_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19,trial_20,trial_21,trial_22,trial_23,trial_24,trial_25,trial_26,trial_27,trial_28,trial_29,trial_30,trial_31,trial_32,trial_33,trial_34,trial_35,trial_36,trial_37,trial_38,trial_39
|
||||||
|
0.0,0.5126953125,0.341796875,0.1689453125,0.232421875,0.267578125,0.3916015625,0.1044921875,0.1884765625,0.3212890625,0.2333984375,0.4599609375,0.384765625,0.3291015625,0.5283203125,0.1376953125,0.455078125,0.25390625,0.1982421875,0.087890625,0.4580078125,0.140625,0.220703125,0.23046875,0.3603515625,0.333984375,0.2783203125,0.078125,0.1767578125,0.2294921875,0.2021484375,0.2421875,0.4267578125,0.796875,0.8671875,0.13671875,0.24609375,0.1328125,0.318359375,0.2880859375,0.1767578125
|
||||||
|
10526315.789473685,0.49093338128932623,0.3462821585319709,0.1852252305049315,0.2928347680046948,0.27106667885819963,0.38623029703578793,0.11983074970192527,0.1988314763330687,0.3117093828576423,0.23267857718005405,0.442573542079767,0.38980464723962166,0.3179714144762203,0.521509326395896,0.155857942110944,0.44749190470518496,0.27505906863225793,0.21468822711722674,0.09868852979918927,0.4620501050658503,0.1616670708907278,0.22291807983060294,0.25549773958581307,0.36278801281366324,0.32900072663114344,0.279040172819946,0.1068086650563079,0.19270548728034106,0.26188590189756783,0.21333395939454478,0.263395692503023,0.4162367770546361,0.7937186893663908,0.8380054701068065,0.1567087173461914,0.25711315028224957,0.15335620605384215,0.31996521725218713,0.2936233245765073,0.17786528991530146
|
||||||
|
21052631.57894737,0.14773336606012488,0.41170637495299783,0.43445887790162147,1.1673907229774876,0.3169193109316839,0.31370865671258225,0.387846738017497,0.40387508149292334,0.2171311787951356,0.2135045218005405,0.23726710057985093,0.4631117770546361,0.15972420119182557,0.426365754610944,0.4283274177699208,0.3453213224120418,0.6104517115120082,0.47045086361364646,0.27034647510982945,0.5193295782622869,0.510279029360108,0.24381795021965894,0.637100209159534,0.4475086613705284,0.2694615234628609,0.26598663541418693,0.5406820080617127,0.4469330383469854,0.7313424160605982,0.3769322709693803,0.5845370834223782,0.232129622694528,0.6846188415780952,0.38532039399292334,0.4707629396644656,0.4800773789677924,0.4946741965338794,0.34535071460163824,0.38374872815245736,0.19174840509726399
|
||||||
|
31578947.368421055,0.30206510432869443,0.5240726180353987,0.25200302779179196,0.16374085550493153,0.24011918646476937,0.4396643651819625,0.47582212907785854,0.5759325925663236,0.6530311272745316,0.17334035889263633,0.7837483017728598,0.3632504840967067,0.341378148572927,0.5027413381433882,0.21605753436313113,0.5028028699499748,0.4777906370295052,0.2948401760196421,0.2864172135033437,0.4068529546425943,0.47393907404342184,0.14073643459837856,0.35161240279179196,0.6927974177859825,0.40297644026061497,0.09805480925330169,0.21826223389263633,0.36566745409344703,0.20924176403690248,0.22758521135493992,0.3536923453418172,0.2098064449024994,0.25531110737132234,0.5960842407311098,0.4220240452943418,0.7280757011138831,0.489715842989343,0.3941002314770982,0.359857981885239,0.1763507150877216
|
||||||
|
42105263.15789474,0.3601710882213307,1.1924962601172955,0.18665435241530146,0.2274378832027192,0.36839684356943064,0.38353645768522227,0.45003534882352625,0.17367738633935143,0.34137508149292334,0.28982885260331004,0.27263287361969246,0.29196245188197933,0.21424010850055727,0.2311246982902041,0.33199160250930576,0.40100623894266146,0.4240786692442326,0.1392061333907278,0.5756957141316168,0.24957751963607494,0.32276808720216194,0.2485302003136632,0.5412050284176984,0.23178849788253658,0.15444761366064857,0.21346384402457366,0.329434836009863,0.38119127479616627,0.29703565053332215,0.3316378183972472,0.3469486817758829,0.4120478431934135,0.35543970123882773,0.3525383756431516,0.717103456196032,0.40850460232129715,0.6356967492777226,0.6420835140999664,0.3197789284661206,0.2659972341437089
|
||||||
|
52631578.94736842,0.5717966761285248,0.45180252846588387,0.28262718232384676,0.20317627650548875,0.4386659183660703,0.26370639484014535,0.5657366359002702,0.265974343318358,0.3811389680054049,0.4391567753292517,0.510938411934554,0.27700944620486445,0.2651570576379834,0.40340610884563416,0.3701968259098127,0.47318707410648586,0.2735171384098127,0.23087209347542634,0.3386583513170068,0.30774366162160094,0.22156381210792098,0.33242127109432484,1.064376506118563,0.3128821460163824,0.1856343884877551,0.1527064156994595,0.39336189734968785,0.3171855815559873,0.33348362993995895,0.2634056043756966,0.14376423431565555,0.3315845050970273,0.40819080912835715,0.2981239910601249,0.19203246267218338,0.3540392846612059,0.9713163111678782,0.5147041547991893,0.3093836802855093,0.26526849223636195
|
||||||
|
63157894.73684211,0.7125522000637738,0.7319037101935815,0.4711044079048811,0.24843858946063177,0.21935192435732168,0.22663360025083593,0.510458816781929,0.47032200926889023,0.23310755890822482,0.4480394854770142,0.8734587566344031,0.14717005890822482,0.24536773174423254,0.5740625640361926,0.3370360958279005,0.24657673188523901,0.23138584126395867,0.2938752425344367,0.40674902791791984,0.3378195247491641,0.2892863994820297,0.42775145131795345,0.2716987060377804,0.30853427876395867,0.38471675249348064,0.23302792049841217,0.43555202642636315,0.20956716246882298,0.5333538636606485,0.3262833095983786,0.3460439063835672,0.4598812990901874,0.21585004811802067,0.29453766841307233,0.483130816937814,0.3874946710475594,0.389383173385155,0.4164042486048139,0.23577301257865244,0.28111736794257747
|
||||||
|
73684210.5263158,0.5028141793121593,0.7853086582511413,0.32119642136169596,0.19974753731175474,0.22512211627907383,0.33674521723612527,0.437565972600287,0.3969574212367514,0.5267469770690413,0.4076859667030398,0.5995570444334245,0.3140586868877887,0.16802890809288976,0.6648814552708674,0.30337525999116755,0.19752129689478143,0.3245219444634182,0.2280445917821656,0.2014959234940379,0.3265502670795304,0.4518267930049315,0.336766071266745,0.41025571215515977,0.4350054521639922,0.6852717465641098,0.34111187794862363,0.745721172427867,0.19450377625441623,0.437915292142831,0.3584736351161122,0.21514454046444875,0.4708477057248271,0.2247112411541292,0.21236765219564255,1.0193036441327465,0.4368659360256882,0.19613866726777562,0.22169131651479457,0.27811797768125257,0.14612102244369207
|
||||||
|
84210526.31578948,0.6498939535294213,0.34513844413440314,0.32915524300445814,0.28137306361317305,0.44040986375465285,0.39313014656552975,0.7127375298920102,0.2532551097077346,0.6116822049888546,0.40558345298027393,0.4897784288570162,0.3734163617163153,0.24351922032575532,0.6532393479281187,0.15692508517870282,0.25244979647057875,0.49373149079299045,0.19049327987713172,0.2821558056775883,0.33839308720216194,0.43713630401526793,0.40073037609829465,0.3272885774311266,0.39881552487529204,0.8610919096463274,0.3377703088142205,0.8777427356328991,0.2923148622803411,0.31304348340655297,0.4358921209530818,0.2709846602252316,0.3591117964557003,0.19841068933545053,0.2856812833749026,2.245444765381536,0.41749464970216194,0.37531688404875785,0.24894139509121796,0.42374608259121793,0.478638392736377
|
||||||
|
94736842.10526316,0.5020078120139166,0.24723437354175007,0.3673659137081241,0.3348139527762035,0.431715479187688,0.3390688592377132,0.6717935968964384,0.457708395749248,0.35966896944759297,0.3585454222237965,0.6291940377359575,0.46708539714443387,0.29879632881143414,3.3344964188551973,0.1513210386450601,0.3669454939147442,0.8411693361657478,0.21884774501303886,0.28045494206394184,0.31766515293279846,0.35177270989668996,0.39040625194433326,0.3227899715180542,0.4911740612125133,0.7692721675967906,0.3718946224434554,0.8825356068703606,0.3802977849902209,0.3524901899934806,0.4965401514745485,0.35784810930077715,0.48360047644195137,0.2077747117779592,0.4977649017714397,2.5750844696552138,0.4186947682557674,0.32242931883751186,0.2232814323869108,0.32183629984340506,0.5832836040169247
|
||||||
|
105263157.89473684,0.47092427705463613,0.1789020105081912,0.41831536966677846,0.26426457434149664,0.5466678809591277,0.3619225414836176,0.6388927311778398,0.3057775101172957,0.308620394762203,0.3875643975846985,0.39149729234690156,0.6554945895546361,0.27589480434428293,5.434002540778585,0.18356538809567607,0.4280085207022458,0.8192583121091045,0.3302053097542633,0.44632429297280773,0.39916454854103994,0.3065698681775883,0.44995296298632004,0.4168735884563414,0.3975001940106422,0.8313359976475259,0.2914756922840742,0.7520645595983785,0.39257640785787906,1.4076112551702356,0.36185894910648586,0.3285283434754263,1.142009761524993,0.21171955520756688,0.5883418106966732,3.8587911241272477,0.438727793601081,0.18723100572411702,0.3022124324809151,0.37799327987713166,0.2224439795327649
|
||||||
|
115789473.68421052,0.5792311274774187,0.35534367336790973,0.4392541769138664,0.28334566893009594,0.6810879165776218,0.2916674521491138,0.6729964628774374,0.22855905020336031,0.38809320867226715,0.9873037523179831,0.8258462044670969,0.6791347915776218,0.30101300078415805,4.686507616016673,0.2811409694005908,0.40969581551168766,0.46326727616159546,0.5475969208904911,0.28012333814457197,0.4383805108532681,0.37350730948831234,0.3248042611204026,0.35399517260099717,0.11088444717703104,0.8374835275877216,0.4063566741520678,0.7801278264899003,0.4228286716746491,1.2008435864858025,0.24473298091307244,0.3711600660287112,1.3431203530435747,0.16948504196970085,0.5506455508625738,3.350528561177347,0.5580844667809821,0.27620175274455316,0.5384144505635522,0.45958357570574226,0.3081340261443499
|
||||||
|
126315789.47368422,0.6972570868410233,0.31267635313757874,0.33449606221798706,0.3799252074180879,1.8770008139993366,0.24781546764426615,0.5310377295327648,0.2528681556934134,0.30656437332280095,1.6705563299543644,0.8751828307260102,0.4370990129719146,0.2601669079048811,3.6776542980585063,0.401188773791876,0.38317550912788373,0.3322923929737545,0.4699644474441653,0.23832570548863313,0.4153808741688398,0.5083389070885995,0.3155824867311938,0.38053022263122743,0.25248159265914477,0.8476159156524574,0.4444442812425607,0.7804645105081912,0.4246221526507855,0.5917459419229354,0.5932889499822818,0.34516817975242375,1.0056706584391497,0.2681746020541628,0.45302107327532554,2.6140705647561036,0.5044859466130052,0.4146504388951861,0.4203169761932454,0.45823921985573385,0.2870160470048475
|
||||||
|
136842105.2631579,0.960159909361948,0.450690290604272,0.23012897512589126,0.3031537645081072,1.2008859338522602,0.31478742879513566,1.3341564299987634,0.32415108403340614,0.3303785958118387,1.8565622181773516,0.8373598008935143,0.5340105225835153,0.2586940411385407,2.385186588995343,0.4632587617784326,0.42975415945713547,0.3700331082964869,0.4503122181773516,0.3638722559751897,0.3153411437269722,0.5860544056773517,0.32595994796118904,0.5114446280735683,0.4670077955293525,0.8243640175816755,1.0210609594540592,0.8471702010347574,0.32536894198599936,1.3395810087631954,0.5504872567765928,0.2546490011452967,1.0243628097703255,0.2852512713614593,0.4995751394129197,2.809771297380865,0.37543580274502647,0.5132251300970273,0.2985157055207567,0.5266009988547032,0.3262549476940547
|
||||||
|
147368421.0526316,1.0055862595830267,0.6440014561787868,0.2858737300967907,0.3863602054416309,0.8479710734782128,0.4378958356017222,1.530421428733255,0.54462589253349,0.2567316673469014,1.553620309380613,1.45811728163109,0.37198208837958235,0.1532160423469014,2.7359078003098767,0.47535453014426615,0.3418036960168558,0.7782120163090676,1.4861997567385528,0.4293566304891064,0.44270003997718216,0.6637524845197261,0.36605131725195045,0.44276221108898883,0.329971450847932,0.7993510978043574,1.0415730938686887,0.6488028655752248,0.3565680326847489,1.4946403397747683,0.7484382935838358,0.5302830534958777,2.565739253881567,0.5837625836401438,0.4866890735573385,2.587823254910202,0.4213335771639922,0.45447173607316366,0.31662843233990867,0.4362479180840573,0.30952860443876057
|
||||||
|
157894736.84210527,0.9976090116844283,0.7096902260819962,0.49118156908621763,0.30583666170072676,2.533825134636624,0.43479590534833656,1.5473897780738048,0.47497304720891803,0.24082879148361758,1.6901296076682137,2.1395948520987984,0.4781837489796479,0.20542302752465758,2.631700877667794,0.3386815617949678,0.29715631014752586,0.5002949284054232,2.789382593783646,0.6829672414510206,0.5018177692909981,0.7628942605861337,0.3123116506433883,0.3546965828892927,0.3742781028853229,0.6146799256596868,1.1904109178157396,0.7768765182706459,0.6362036760493991,1.4573682774467152,0.5246160301145092,0.43812378489739995,3.921759954119654,0.4363299367170253,0.45301356540162147,2.1907006947948,1.1487924095005875,0.3566391091597707,0.3147901524467151,0.35226047600405364,0.43407128458208005
|
||||||
|
168421052.63157895,0.8819832418764066,0.6183622999534713,0.22751953463144897,0.3871347527754935,3.2939703352233383,0.2267976375167719,1.07802922045425,0.40803050202345914,0.42567634054168124,1.8516671135815228,1.5821111367349807,0.4174423429114006,0.3464219788104874,2.970000755753874,0.5660719937564925,0.4968351749832281,0.47434891888309383,4.935901631278676,0.5953465934605479,0.3941350709698537,0.5912875442293541,0.31495696091585873,0.4686734524460051,0.2391220895867598,0.6104732523994763,1.2840607450279173,0.6961646251731302,0.45934974057522493,1.0166076491083793,0.6046675579039344,0.39845732374534715,3.7994228373604138,0.38673857374534715,0.5138954310536055,2.1964220379858466,0.697549244043239,0.3853005702475762,0.2744933574813885,0.35074727489017055,0.6157125964389284
|
||||||
|
178947368.42105263,1.036737164632105,0.6620557182713559,0.30928696555774304,0.4473527950593309,2.256413634133801,0.32278280707277424,1.700483562543452,0.3760209942128189,0.4509254281210437,1.8510788312579125,1.4233721313053882,0.47974616602847453,0.3697299138330687,1.7596451186077082,0.290563018038002,0.3500167920648886,0.25259023227850147,4.41752291650323,0.5792071825909813,0.41858402051423726,0.5553212205458876,0.3079340477729438,0.39578193442643184,0.455206340039536,0.9051015489319356,1.1262431527769137,0.6798635334849688,0.8544854140347722,1.3124504881882602,0.3912973139754953,0.3701616192128189,4.179021024307716,0.26242457730618207,0.48850171519778773,1.7418606565269406,0.8900138744026671,0.3708213214398751,0.30816204462024976,0.40239571336233715,0.4134413912025514
|
||||||
|
189473684.21052632,1.237837572177031,0.5930154435852558,0.27973201466399217,0.4655953827326978,4.113130653994236,0.3874246010819961,1.522287971095035,0.5005732190245736,0.27539680671163547,1.5777712634395695,1.0808048776642438,0.5301438962983953,0.22412845368530612,0.3852786172459997,0.5128333918605815,0.2688028752968913,0.22640970076880626,2.662768128836254,0.4151313602098798,0.40941006457046125,0.5537225665147946,0.3220383387853564,0.4326978561950853,0.3504680253131898,0.9074135143671009,0.8324415095955382,0.9398728103849036,0.6542906932883645,1.1128848226446855,1.7575621089776796,0.4881168217540117,3.0816953241659997,0.5057438192605311,0.4500168987918759,2.6855392244714116,0.41633903418881746,0.3249307827936315,0.25339971420837576,0.2916004730393682,0.46179850676053114
|
||||||
|
200000000.0,1.1967784981978566,0.610686553151984,0.3437335867630808,0.37152069493343953,5.123102439077277,0.4199164039210269,1.4689820942125822,0.4834093796579461,0.44998404854222346,1.8275360308195416,1.4237405877364309,0.5493120896188837,0.29364314832185445,4.355327355234246,0.4096855364347759,0.4686908721923828,0.23630603991056742,1.0983883205213045,0.4730722527754934,0.36480743006656047,0.5995283628764906,0.43493602150364924,0.4425667210629112,0.44316557834022924,0.7875417910124126,1.3248418004889237,0.5543754477249948,0.4814408954821135,1.5615148042377673,0.5799588153236791,0.3838427192286441,3.5419572529039884,0.36495409513774674,0.5220931203741777,1.8803436379683645,0.7313085355256733,0.536768913269043,0.46199507462350947,0.32877856806704875,0.3125481856496711
|
||||||
|
21
results/AcrobotSwingup_fasttd3_large.csv
Normal file
21
results/AcrobotSwingup_fasttd3_large.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19,trial_20
|
||||||
|
0.0,9.888734817504883,9.284497261047363,6.875021934509277,6.710487365722656,6.043292999267578,9.820124626159668,16.537796020507812,6.202973365783691,13.810953140258789,10.509878158569336,14.101040840148926,18.845413208007812,7.062399864196777,5.953526020050049,3.237598419189453,15.201939582824707,26.584564208984375,4.742987632751465,9.663023948669434,11.021127700805664,10.703713417053223
|
||||||
|
2631578.947368421,10.10537327277033,9.283902492962385,6.847756677552273,6.843581103964856,6.502802102189316,9.706934806547666,16.38613082546937,6.16217115521431,13.814598011343103,10.559696472004841,13.84272244807921,18.931698924616764,7.653529451081628,5.921330041006992,3.2969599005423094,15.300211296269769,26.58416327049858,4.995440086251811,9.93426430068518,10.886806641754351,11.527277304937966
|
||||||
|
5263157.894736842,17.75651438612687,9.914393174020868,5.956438364166963,11.625398419405284,22.537136435508728,5.852523133942955,11.18851415734542,4.71972844475194,13.519483784311696,12.223783179333335,4.817055474770696,21.689254810935573,28.765877899370697,4.9972495577837295,5.481313225470092,18.717195812024567,26.083637112065365,14.647366856273853,19.33682950546867,7.828131286721482,39.55978362183822
|
||||||
|
7894736.842105264,19.44448771131666,22.67342510348873,6.805056731167592,13.944712709439427,23.3878490266047,7.180790741976939,12.523353957816175,4.340164582980306,6.309299673689039,11.574244839580437,4.36824749174871,19.894484808570468,40.80036094941592,8.495043572626617,8.15164659525219,19.003879700836382,17.92720954041732,30.071821808815006,20.095726013183597,34.50449174956271,30.197650106329668
|
||||||
|
10526315.789473685,16.578101691446808,39.64854281827023,6.440792052369369,12.181270567994371,22.96099554864984,7.186699704120034,13.097670335518686,4.722508173239858,5.906746431400902,17.147330208828574,7.697360220708345,45.72458196941175,73.38811226894981,11.500692028748363,15.430165278284173,22.40508878858466,19.284776336268376,37.79571555790148,37.32427807858116,28.69454835590563,37.2967561295158
|
||||||
|
13157894.736842105,22.251839186015882,28.554213194470655,13.777229503581399,24.66804232095417,34.30007964686344,11.680851812425413,12.848895167049609,5.975417475951345,11.150909117962184,20.87581067336233,7.345379272573872,52.023328028227155,109.53894695482757,17.527399414464046,19.268240897279036,26.273384464414494,27.957743011022867,47.231798115529514,68.03209223872737,29.05422474208631,27.824859227004804
|
||||||
|
15789473.684210528,19.25550873656022,71.7214200873124,20.747600367194728,27.79745068989301,40.710212456552604,21.770903882227447,23.17712328308508,13.038783038917355,18.8152778430989,35.476379708239904,11.309572819032168,53.50583972428975,97.65787807263825,16.068686836644225,26.065466604734723,31.93282687036615,46.68305125989412,75.91218122683074,64.00905655559741,39.98445173313744,22.702275276184096
|
||||||
|
18421052.63157895,24.159047685171437,68.98515194340757,26.65492335118746,19.256001913233813,38.468064841471225,23.646104894186323,41.1612764408714,33.52059091392316,36.403322169655254,32.9737615146135,17.57789507508279,70.32805327365276,88.26997340352912,18.196330208527414,36.305894876781274,36.24021483095069,51.50751520458021,98.42132879558363,74.45273170973127,48.47059553861617,39.84074703643196
|
||||||
|
21052631.57894737,38.28674632624576,88.61726499858655,38.54707135652241,32.62826583259984,51.20372069509406,31.695851978502777,49.42035880841706,25.077611722444235,41.08017173566316,36.683732635096504,41.534054354617474,113.68805142452842,81.35287575972707,23.39544168271517,51.68273172880473,48.26376187173944,67.90966686449553,102.67105975903962,117.30013686732242,38.7832320614865,22.784806954233275
|
||||||
|
23684210.52631579,35.66065367272026,105.75405805989315,46.75854296433298,35.39752160875421,63.491124046476266,40.093452623015956,47.46454145406422,37.4789982093008,43.01326150015781,46.46034582665092,42.260203116818474,113.42279282369113,63.84576871520595,38.857609259454826,45.14887053088138,57.85811551621086,96.90247347480373,112.75141496407359,146.27356973447297,75.26643239824396,37.109446475380345
|
||||||
|
26315789.47368421,31.79018675653558,150.29831253854852,61.520441795650285,65.30472025118377,74.5737415614881,63.103047270523874,74.47154268465545,56.1377770022342,110.07281300896092,49.43393192793194,56.55144682683442,149.9051325446681,95.1966239778619,43.00841266230533,121.19370550858346,54.78556738401714,107.96995740187795,144.9742881875289,158.83448455208227,112.80655241012573,34.726112842559814
|
||||||
|
28947368.42105263,34.886604942773516,182.68540041070236,52.63218028294413,74.24405795649479,98.45311747099223,83.63654175557589,98.15259555766457,83.00712973193119,152.04949614876193,60.033586916170634,104.44645261764528,183.34212626908953,145.3882823994285,55.8120873350846,155.29590370780545,52.760886982867596,135.8035633689479,149.5857477941011,174.62654038479454,119.70657352397316,56.90697970515803
|
||||||
|
31578947.368421055,42.14771624615319,202.42679094013414,52.38267727902063,86.16361492558535,116.33401408948396,82.56993238549485,123.30445560656098,90.39040706032202,137.23200105365945,40.95219484128452,74.46030948036596,199.67874667519,197.26049192328207,95.94568357969584,134.3780511554919,49.24194752542601,126.69745851817879,172.39319590518346,173.57390514173008,134.63159445712444,63.719783306121826
|
||||||
|
34210526.315789476,43.65013820246645,217.50729831896336,62.131120148457974,115.80259094740214,128.52460966612168,95.68277384105482,124.53052769209206,97.55255219810887,81.50745273891249,47.45693647861479,80.01052587910702,156.18299102783214,207.39208640550308,110.61193823814396,156.5341895505002,80.74786253979332,121.23273363866309,162.08628305635955,190.38883532975854,143.8943206385562,77.16892081812811
|
||||||
|
36842105.2631579,41.94657889165378,245.93722017187818,55.185322447826984,135.67933988571173,147.6677642621492,95.9600271425749,107.22564328344245,93.87127048090883,117.41437731291124,48.61179826134132,79.67655578412509,242.42287349700928,182.9434824491802,137.05576708442285,161.4447242837203,81.30902413318032,131.1828931256344,160.34503550278512,207.20630570461876,137.0641293776663,112.80630558415466
|
||||||
|
39473684.21052632,58.79436187367691,212.42795580311824,56.41915478204427,164.73655364387912,139.29991523843063,94.7229819925208,131.44584129986015,105.2594486035799,168.36056754463598,73.8486905223445,83.05311863045942,250.90402429982237,201.20104995526768,139.37954770891292,153.56256239037768,83.17541534022281,118.41643432566998,162.2819593329179,186.46040652927596,159.94275539799742,120.8470532266717
|
||||||
|
42105263.15789474,64.42797510247482,147.64111830058846,60.26418444984837,158.13086258737667,136.83324552837172,105.07864440114875,175.86086995978107,145.38353729248047,174.84317819695724,85.31340006778115,98.05986544960426,264.37591231496714,194.34444628263773,150.8557458174856,168.78205148797284,93.2750462983784,154.0627132214998,184.64032062731292,187.95712240118732,183.14200230648646,126.56950057180306
|
||||||
|
44736842.10526316,63.05825469368383,119.0508030213808,67.56669671911943,184.729963001452,119.09256009051673,98.55030614451357,131.52183355783157,156.8359883208024,151.64848545977944,83.73348729233992,125.89496096811797,249.2035153539557,159.97292945259494,135.16470550235948,164.3975305557251,91.16130136188707,160.4443457001134,171.18832497847706,200.91943379452354,192.18329652987026,154.0876652817977
|
||||||
|
47368421.05263158,67.45054091905293,188.57196054960554,69.25454774655793,166.7225661026804,122.31587982177736,94.99036314612941,136.75466273960316,167.56895547164115,163.80847815463417,76.43427557694285,99.01034643775537,248.06674073871815,183.3102160002056,154.41465392865632,173.13884579507928,68.79246320222553,186.41125699093467,166.97562890303763,184.13067767494604,180.09653121546694,120.06530277352583
|
||||||
|
50000000.0,69.65415954589844,222.86355590820312,61.62987518310547,135.8297119140625,148.7770538330078,99.98258209228516,207.14266967773438,184.39578247070312,193.70370483398438,78.28141784667969,70.91169738769531,270.7571105957031,221.07476806640625,182.5941162109375,166.03514099121094,53.1497802734375,204.63912963867188,178.64535522460938,186.98333740234375,188.1245574951172,85.73209381103516
|
||||||
|
21
results/AcrobotSwingup_fasttd3_large_fasttd.csv
Normal file
21
results/AcrobotSwingup_fasttd3_large_fasttd.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19,trial_20
|
||||||
|
0.0,9.888734817504883,9.284497261047363,6.875021934509277,6.710487365722656,6.043292999267578,9.820124626159668,16.537796020507812,6.202973365783691,13.810953140258789,10.509878158569336,14.101040840148926,18.845413208007812,7.062399864196777,5.953526020050049,3.237598419189453,15.201939582824707,26.584564208984375,4.742987632751465,9.663023948669434,11.021127700805664,10.703713417053223
|
||||||
|
2631578.947368421,10.10537327277033,9.283902492962385,6.847756677552273,6.843581103964856,6.502802102189316,9.706934806547666,16.38613082546937,6.16217115521431,13.814598011343103,10.559696472004841,13.84272244807921,18.931698924616764,7.653529451081628,5.921330041006992,3.2969599005423094,15.300211296269769,26.58416327049858,4.995440086251811,9.93426430068518,10.886806641754351,11.527277304937966
|
||||||
|
5263157.894736842,17.75651438612687,9.914393174020868,5.956438364166963,11.625398419405284,22.537136435508728,5.852523133942955,11.18851415734542,4.71972844475194,13.519483784311696,12.223783179333335,4.817055474770696,21.689254810935573,28.765877899370697,4.9972495577837295,5.481313225470092,18.717195812024567,26.083637112065365,14.647366856273853,19.33682950546867,7.828131286721482,39.55978362183822
|
||||||
|
7894736.842105264,19.44448771131666,22.67342510348873,6.805056731167592,13.944712709439427,23.3878490266047,7.180790741976939,12.523353957816175,4.340164582980306,6.309299673689039,11.574244839580437,4.36824749174871,19.894484808570468,40.80036094941592,8.495043572626617,8.15164659525219,19.003879700836382,17.92720954041732,30.071821808815006,20.095726013183597,34.50449174956271,30.197650106329668
|
||||||
|
10526315.789473685,16.578101691446808,39.64854281827023,6.440792052369369,12.181270567994371,22.96099554864984,7.186699704120034,13.097670335518686,4.722508173239858,5.906746431400902,17.147330208828574,7.697360220708345,45.72458196941175,73.38811226894981,11.500692028748363,15.430165278284173,22.40508878858466,19.284776336268376,37.79571555790148,37.32427807858116,28.69454835590563,37.2967561295158
|
||||||
|
13157894.736842105,22.251839186015882,28.554213194470655,13.777229503581399,24.66804232095417,34.30007964686344,11.680851812425413,12.848895167049609,5.975417475951345,11.150909117962184,20.87581067336233,7.345379272573872,52.023328028227155,109.53894695482757,17.527399414464046,19.268240897279036,26.273384464414494,27.957743011022867,47.231798115529514,68.03209223872737,29.05422474208631,27.824859227004804
|
||||||
|
15789473.684210528,19.25550873656022,71.7214200873124,20.747600367194728,27.79745068989301,40.710212456552604,21.770903882227447,23.17712328308508,13.038783038917355,18.8152778430989,35.476379708239904,11.309572819032168,53.50583972428975,97.65787807263825,16.068686836644225,26.065466604734723,31.93282687036615,46.68305125989412,75.91218122683074,64.00905655559741,39.98445173313744,22.702275276184096
|
||||||
|
18421052.63157895,24.159047685171437,68.98515194340757,26.65492335118746,19.256001913233813,38.468064841471225,23.646104894186323,41.1612764408714,33.52059091392316,36.403322169655254,32.9737615146135,17.57789507508279,70.32805327365276,88.26997340352912,18.196330208527414,36.305894876781274,36.24021483095069,51.50751520458021,98.42132879558363,74.45273170973127,48.47059553861617,39.84074703643196
|
||||||
|
21052631.57894737,38.28674632624576,88.61726499858655,38.54707135652241,32.62826583259984,51.20372069509406,31.695851978502777,49.42035880841706,25.077611722444235,41.08017173566316,36.683732635096504,41.534054354617474,113.68805142452842,81.35287575972707,23.39544168271517,51.68273172880473,48.26376187173944,67.90966686449553,102.67105975903962,117.30013686732242,38.7832320614865,22.784806954233275
|
||||||
|
23684210.52631579,35.66065367272026,105.75405805989315,46.75854296433298,35.39752160875421,63.491124046476266,40.093452623015956,47.46454145406422,37.4789982093008,43.01326150015781,46.46034582665092,42.260203116818474,113.42279282369113,63.84576871520595,38.857609259454826,45.14887053088138,57.85811551621086,96.90247347480373,112.75141496407359,146.27356973447297,75.26643239824396,37.109446475380345
|
||||||
|
26315789.47368421,31.79018675653558,150.29831253854852,61.520441795650285,65.30472025118377,74.5737415614881,63.103047270523874,74.47154268465545,56.1377770022342,110.07281300896092,49.43393192793194,56.55144682683442,149.9051325446681,95.1966239778619,43.00841266230533,121.19370550858346,54.78556738401714,107.96995740187795,144.9742881875289,158.83448455208227,112.80655241012573,34.726112842559814
|
||||||
|
28947368.42105263,34.886604942773516,182.68540041070236,52.63218028294413,74.24405795649479,98.45311747099223,83.63654175557589,98.15259555766457,83.00712973193119,152.04949614876193,60.033586916170634,104.44645261764528,183.34212626908953,145.3882823994285,55.8120873350846,155.29590370780545,52.760886982867596,135.8035633689479,149.5857477941011,174.62654038479454,119.70657352397316,56.90697970515803
|
||||||
|
31578947.368421055,42.14771624615319,202.42679094013414,52.38267727902063,86.16361492558535,116.33401408948396,82.56993238549485,123.30445560656098,90.39040706032202,137.23200105365945,40.95219484128452,74.46030948036596,199.67874667519,197.26049192328207,95.94568357969584,134.3780511554919,49.24194752542601,126.69745851817879,172.39319590518346,173.57390514173008,134.63159445712444,63.719783306121826
|
||||||
|
34210526.315789476,43.65013820246645,217.50729831896336,62.131120148457974,115.80259094740214,128.52460966612168,95.68277384105482,124.53052769209206,97.55255219810887,81.50745273891249,47.45693647861479,80.01052587910702,156.18299102783214,207.39208640550308,110.61193823814396,156.5341895505002,80.74786253979332,121.23273363866309,162.08628305635955,190.38883532975854,143.8943206385562,77.16892081812811
|
||||||
|
36842105.2631579,41.94657889165378,245.93722017187818,55.185322447826984,135.67933988571173,147.6677642621492,95.9600271425749,107.22564328344245,93.87127048090883,117.41437731291124,48.61179826134132,79.67655578412509,242.42287349700928,182.9434824491802,137.05576708442285,161.4447242837203,81.30902413318032,131.1828931256344,160.34503550278512,207.20630570461876,137.0641293776663,112.80630558415466
|
||||||
|
39473684.21052632,58.79436187367691,212.42795580311824,56.41915478204427,164.73655364387912,139.29991523843063,94.7229819925208,131.44584129986015,105.2594486035799,168.36056754463598,73.8486905223445,83.05311863045942,250.90402429982237,201.20104995526768,139.37954770891292,153.56256239037768,83.17541534022281,118.41643432566998,162.2819593329179,186.46040652927596,159.94275539799742,120.8470532266717
|
||||||
|
42105263.15789474,64.42797510247482,147.64111830058846,60.26418444984837,158.13086258737667,136.83324552837172,105.07864440114875,175.86086995978107,145.38353729248047,174.84317819695724,85.31340006778115,98.05986544960426,264.37591231496714,194.34444628263773,150.8557458174856,168.78205148797284,93.2750462983784,154.0627132214998,184.64032062731292,187.95712240118732,183.14200230648646,126.56950057180306
|
||||||
|
44736842.10526316,63.05825469368383,119.0508030213808,67.56669671911943,184.729963001452,119.09256009051673,98.55030614451357,131.52183355783157,156.8359883208024,151.64848545977944,83.73348729233992,125.89496096811797,249.2035153539557,159.97292945259494,135.16470550235948,164.3975305557251,91.16130136188707,160.4443457001134,171.18832497847706,200.91943379452354,192.18329652987026,154.0876652817977
|
||||||
|
47368421.05263158,67.45054091905293,188.57196054960554,69.25454774655793,166.7225661026804,122.31587982177736,94.99036314612941,136.75466273960316,167.56895547164115,163.80847815463417,76.43427557694285,99.01034643775537,248.06674073871815,183.3102160002056,154.41465392865632,173.13884579507928,68.79246320222553,186.41125699093467,166.97562890303763,184.13067767494604,180.09653121546694,120.06530277352583
|
||||||
|
50000000.0,69.65415954589844,222.86355590820312,61.62987518310547,135.8297119140625,148.7770538330078,99.98258209228516,207.14266967773438,184.39578247070312,193.70370483398438,78.28141784667969,70.91169738769531,270.7571105957031,221.07476806640625,182.5941162109375,166.03514099121094,53.1497802734375,204.63912963867188,178.64535522460938,186.98333740234375,188.1245574951172,85.73209381103516
|
||||||
|
21
results/AcrobotSwingup_fasttd3_small.csv
Normal file
21
results/AcrobotSwingup_fasttd3_small.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19,trial_20,trial_21,trial_22,trial_23,trial_24
|
||||||
|
0.0,16.287094116210938,9.519023895263672,4.587231159210205,20.571439743041992,27.30125617980957,13.915904998779297,43.75962448120117,7.398332595825195,11.781911849975586,5.428969383239746,9.077356338500977,33.518253326416016,10.437522888183594,11.8486328125,6.978132247924805,33.936607360839844,18.351730346679688,5.592342376708984,7.198254585266113,17.855012893676758,18.936899185180664,4.449044227600098,14.971601486206055,5.235752582550049,6.302183151245117
|
||||||
|
2631578.947368421,16.421380137142382,10.056043079024867,4.662623287031525,21.04422082712776,27.586748220418627,14.44254457950592,43.875450799339696,7.770613846025969,11.924832394248561,5.583928081550097,9.636894740556416,33.95280931497875,10.44215615956407,12.334181158166183,7.188560103115282,34.73577610442513,18.441976864086953,5.693842956894323,7.268998602503224,17.906796514987946,19.13224710288801,4.521706488571669,15.107418784969731,5.526400786481406,6.619853305189233
|
||||||
|
5263157.894736842,21.361631732237967,28.318635200199328,7.797155866497442,37.381322434074,37.75667086400484,33.560849014081455,48.312501995187056,20.992702251986454,17.82285982056668,11.236212222199692,29.6345266229228,48.92006230354309,11.31641540715569,29.244621910546954,14.7860443717555,61.56724217063502,22.18982083546488,10.517626210262902,10.45357128193504,20.043819207894174,26.367242380192405,7.0919759022562125,20.14383306001362,15.893065179649152,18.051533636293914
|
||||||
|
7894736.842105264,26.056687750314413,22.142802790591592,16.60792303085327,36.10195835013139,43.11333872142591,48.11219581804778,55.122068442796404,25.632349409555133,34.312123913513986,16.133165528899745,39.53154394501134,46.8566004288824,25.81226831360868,30.529674677472368,19.601410341890237,45.22117100891314,34.154705210735926,32.26232831729086,23.132305261335876,26.22709031481492,34.101866264092294,8.21936057115856,25.312087752317126,20.015055273708544,25.26831566660028
|
||||||
|
10526315.789473685,27.38552354511462,30.38068505337364,20.162110504351165,43.34544969859876,57.31569322786833,56.92737629539088,54.04297878867702,26.17458702388563,46.291572169253705,21.158217405018053,48.94438896681133,51.25651866511295,52.84580353686684,38.500591805106716,22.67075528596577,42.50031285536917,52.803925464027806,33.53385511197542,32.36817188011973,33.982845168364676,36.969710902163854,14.15533999392861,25.29677721073753,20.041901350021366,34.27549982070923
|
||||||
|
13157894.736842105,30.80395347821085,36.85353053870954,42.187363963378104,66.43124038294742,66.12823299357765,49.86494440781443,56.754242966049596,38.46823332811657,76.4212149067929,27.647250194298593,63.36426373531944,62.121768487127206,59.19179628397289,36.88565060339476,44.99805409029911,48.193437701777405,56.20987471781279,54.703254982044825,69.65226446954827,52.16454800806547,39.51263521846972,29.383961279141275,39.60771564433449,53.71195552223607,53.58391092325512
|
||||||
|
15789473.684210528,43.96609288767765,46.346220719186896,82.26313608571104,66.0013697272853,84.34098047959179,47.33048078888342,64.4124425210451,46.21369021817259,107.1608110227083,52.70972959618822,65.18659048331413,75.09479919232821,88.80393269187526,48.97378349304199,69.7719448491147,63.881811016484306,51.58831567513315,61.8734909735228,83.2185062107287,54.7519380418878,46.02017393865084,42.461149341181724,50.340063534284894,95.75055320639359,89.4950034994828
|
||||||
|
18421052.63157895,50.23525526649074,61.50189724721406,103.80597896324961,80.63195464485571,117.34456777572633,59.19724694051241,73.52777947877583,68.9801967269496,134.1733152991847,85.3421406871394,100.76200361000866,85.67804071777746,99.9848475456238,54.716643691062934,110.73270924467789,63.25767571047733,54.80461528426723,80.97548851213958,103.54271512282523,72.19664953256908,55.49492140192735,73.77047834898296,52.65499827108886,103.44908603868987,104.0086105246293
|
||||||
|
21052631.57894737,67.74022042123896,57.66859104758816,133.05308592946906,104.85747789081775,143.54523468017578,63.94674115431937,81.64914924220035,91.02104006315533,142.51776283665706,95.18521449440405,137.3905721965589,106.51264732762388,126.02176565872996,71.34742295114617,108.72206627695184,74.0683728268272,63.01840455908525,96.61264098318,132.38460601003547,71.63434545617356,65.51847979896947,82.19315830029939,68.17025305095471,125.74210197047182,140.32913770173727
|
||||||
|
23684210.52631579,89.8769181402106,84.87032641862568,152.11012310730783,149.80162592938072,159.56479511762922,89.89506904702438,89.92486411646793,117.66022782576712,128.3806041918303,137.5425827126754,152.77642390602514,128.68560053172865,137.7969227088125,83.73427398581254,126.21492806233859,82.64666007694446,65.93250309793572,120.96357506199887,139.9765312797145,116.575527078227,84.5733994308271,86.28288592790302,67.83383710760819,115.67570334986637,153.62613208670365
|
||||||
|
26315789.47368421,114.26601520337556,93.76450779563501,190.09130211880333,173.77456926044664,177.81273691277755,127.27064479024787,87.39835774271111,137.6591692472759,151.1868812159488,155.24181029671118,170.6516284942627,145.53067297684518,160.40461650647615,92.1872336738988,162.85533905029297,99.92150472339831,68.8617030946832,156.52020916185882,151.68310536836321,131.58652451163843,104.94969415664673,109.96378218500237,72.29348523993241,131.29652801312898,163.15333235891242
|
||||||
|
28947368.42105263,122.41626327916195,133.84884420194123,207.31056414152448,198.09824790452657,170.06713194596142,127.26082776722154,91.81295121343513,145.97237960915817,167.87781328904,145.58369874954224,188.92073912369577,133.8795719774146,169.23453162845811,96.16378713908948,173.42941249044318,100.4667820930481,90.6223213672638,174.71190690994263,161.78474742487856,135.2476440479881,96.78860313013979,130.6538378564935,82.91003312562641,111.33236460936696,175.15652127014963
|
||||||
|
31578947.368421055,145.70292944657174,154.49581999527786,196.8035216080515,206.22886898643094,152.96537298905224,146.67974883631658,101.19485046989043,156.73033664101052,177.02410768207747,169.2260831531726,198.04608967429715,127.78581794939554,184.50897548073218,109.48155041744835,167.64149655793844,103.55727562151453,91.79166778765226,178.19568603917173,160.45962825574372,133.88398933410645,125.93390976755246,136.4324505454616,79.36228762174906,120.42866807234914,168.53483611659
|
||||||
|
34210526.315789476,145.13476906324686,181.0156959985432,196.96632676375538,219.36357698942484,174.92123664052866,160.18036340412343,116.44782897045737,184.47489683251632,155.5202418628492,204.00147480713696,200.63506276983964,191.83462117847643,192.65076958505733,110.6756746392501,180.92265563262137,96.38060310012419,96.83216679723641,186.28922005703572,169.99148961117396,146.65858916232463,151.2074738050762,164.81994503422789,69.92559983855799,127.15519405666153,166.59142622194793
|
||||||
|
36842105.2631579,153.25818342911572,177.61119591562368,185.54612691778885,205.48416830364025,194.97514383416427,169.0116389927111,112.93625131406283,191.1707026331048,169.76137090984145,218.34308072140342,211.42340118006658,190.43515556736995,194.09800233339007,97.26928690860143,184.24711483403257,115.22883121590864,109.81641312649376,172.25729094053568,183.7776959067897,157.71640115035208,151.01735260612085,169.25614894063847,77.43704221123146,146.09945086428993,170.15413394727207
|
||||||
|
39473684.21052632,174.00993527864154,173.4664195462277,190.27979253467763,187.4821865182174,207.23032710426733,168.04096028679297,108.28894090652463,188.1237645149231,181.1585824615077,208.1002385992753,223.78642079704684,196.18918883173092,187.94592272607903,86.08651335615863,195.87506048302902,123.7816006384398,114.92910612256905,177.02735617286282,194.24528091832212,171.77286777998273,152.0886357959948,155.6707747108058,103.52223518020229,148.2211524812799,185.68032312393188
|
||||||
|
42105263.15789474,149.12492571379008,189.59115680895354,211.65512727436268,188.90756024812399,222.34170532226562,172.42721637926604,99.95464967426503,186.3722775107936,193.89571179841695,185.30610977975945,209.62792085346422,185.85879155209187,201.30312467876234,133.71206564652294,208.58982889275802,145.61541587428044,129.02138880679482,185.5816899349815,202.50917936626234,194.06424311587685,163.40277862548828,156.7138214111328,106.52769289518658,148.12367047761617,192.17261023270456
|
||||||
|
44736842.10526316,112.16595446436028,195.17130994796753,217.09464494805587,203.96229997434114,218.59926120858444,184.17645692825317,116.61314033207141,189.38473520780866,215.5696160918788,190.83929910157858,195.87477217222514,182.3937087811922,205.65431386546084,160.96340656280518,201.25900002529747,155.24908404601248,129.4638685929148,197.94397042926988,198.7030984226026,191.8459028695759,185.69810520975216,166.76698185268202,95.27756620708264,140.59051365601388,195.0851933077762
|
||||||
|
47368421.05263158,146.875292627435,204.24813155124062,194.46876992677386,228.11076786643582,218.22915333195738,196.66796232524672,109.01915881508275,204.0876811178107,209.54677260549445,206.95813450060393,203.44305565482694,192.86889181639017,197.8119333166825,165.4711187262284,174.31547531328704,159.99122619628906,129.88634305251273,208.15185125250565,198.8398228695518,188.9094945004112,200.28832897387053,190.1343819467645,93.6343687960976,137.6984423085263,202.6962440390336
|
||||||
|
50000000.0,189.524169921875,208.85409545898438,187.1713104248047,241.01425170898438,217.15151977539062,205.97315979003906,101.03228759765625,212.29312133789062,203.9116668701172,204.74691772460938,216.1698455810547,173.5381317138672,196.7125701904297,169.46923828125,159.1580810546875,159.99122619628906,131.65098571777344,206.62130737304688,198.2906036376953,195.96603393554688,198.23983764648438,207.55056762695312,99.96365356445312,143.13748168945312,201.30123901367188
|
||||||
|
21
results/AcrobotSwingup_fasttd3_small_fasttd.csv
Normal file
21
results/AcrobotSwingup_fasttd3_small_fasttd.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19,trial_20,trial_21,trial_22,trial_23,trial_24
|
||||||
|
0.0,16.287094116210938,9.519023895263672,4.587231159210205,20.571439743041992,27.30125617980957,13.915904998779297,43.75962448120117,7.398332595825195,11.781911849975586,5.428969383239746,9.077356338500977,33.518253326416016,10.437522888183594,11.8486328125,6.978132247924805,33.936607360839844,18.351730346679688,5.592342376708984,7.198254585266113,17.855012893676758,18.936899185180664,4.449044227600098,14.971601486206055,5.235752582550049,6.302183151245117
|
||||||
|
2631578.947368421,16.421380137142382,10.056043079024867,4.662623287031525,21.04422082712776,27.586748220418627,14.44254457950592,43.875450799339696,7.770613846025969,11.924832394248561,5.583928081550097,9.636894740556416,33.95280931497875,10.44215615956407,12.334181158166183,7.188560103115282,34.73577610442513,18.441976864086953,5.693842956894323,7.268998602503224,17.906796514987946,19.13224710288801,4.521706488571669,15.107418784969731,5.526400786481406,6.619853305189233
|
||||||
|
5263157.894736842,21.361631732237967,28.318635200199328,7.797155866497442,37.381322434074,37.75667086400484,33.560849014081455,48.312501995187056,20.992702251986454,17.82285982056668,11.236212222199692,29.6345266229228,48.92006230354309,11.31641540715569,29.244621910546954,14.7860443717555,61.56724217063502,22.18982083546488,10.517626210262902,10.45357128193504,20.043819207894174,26.367242380192405,7.0919759022562125,20.14383306001362,15.893065179649152,18.051533636293914
|
||||||
|
7894736.842105264,26.056687750314413,22.142802790591592,16.60792303085327,36.10195835013139,43.11333872142591,48.11219581804778,55.122068442796404,25.632349409555133,34.312123913513986,16.133165528899745,39.53154394501134,46.8566004288824,25.81226831360868,30.529674677472368,19.601410341890237,45.22117100891314,34.154705210735926,32.26232831729086,23.132305261335876,26.22709031481492,34.101866264092294,8.21936057115856,25.312087752317126,20.015055273708544,25.26831566660028
|
||||||
|
10526315.789473685,27.38552354511462,30.38068505337364,20.162110504351165,43.34544969859876,57.31569322786833,56.92737629539088,54.04297878867702,26.17458702388563,46.291572169253705,21.158217405018053,48.94438896681133,51.25651866511295,52.84580353686684,38.500591805106716,22.67075528596577,42.50031285536917,52.803925464027806,33.53385511197542,32.36817188011973,33.982845168364676,36.969710902163854,14.15533999392861,25.29677721073753,20.041901350021366,34.27549982070923
|
||||||
|
13157894.736842105,30.80395347821085,36.85353053870954,42.187363963378104,66.43124038294742,66.12823299357765,49.86494440781443,56.754242966049596,38.46823332811657,76.4212149067929,27.647250194298593,63.36426373531944,62.121768487127206,59.19179628397289,36.88565060339476,44.99805409029911,48.193437701777405,56.20987471781279,54.703254982044825,69.65226446954827,52.16454800806547,39.51263521846972,29.383961279141275,39.60771564433449,53.71195552223607,53.58391092325512
|
||||||
|
15789473.684210528,43.96609288767765,46.346220719186896,82.26313608571104,66.0013697272853,84.34098047959179,47.33048078888342,64.4124425210451,46.21369021817259,107.1608110227083,52.70972959618822,65.18659048331413,75.09479919232821,88.80393269187526,48.97378349304199,69.7719448491147,63.881811016484306,51.58831567513315,61.8734909735228,83.2185062107287,54.7519380418878,46.02017393865084,42.461149341181724,50.340063534284894,95.75055320639359,89.4950034994828
|
||||||
|
18421052.63157895,50.23525526649074,61.50189724721406,103.80597896324961,80.63195464485571,117.34456777572633,59.19724694051241,73.52777947877583,68.9801967269496,134.1733152991847,85.3421406871394,100.76200361000866,85.67804071777746,99.9848475456238,54.716643691062934,110.73270924467789,63.25767571047733,54.80461528426723,80.97548851213958,103.54271512282523,72.19664953256908,55.49492140192735,73.77047834898296,52.65499827108886,103.44908603868987,104.0086105246293
|
||||||
|
21052631.57894737,67.74022042123896,57.66859104758816,133.05308592946906,104.85747789081775,143.54523468017578,63.94674115431937,81.64914924220035,91.02104006315533,142.51776283665706,95.18521449440405,137.3905721965589,106.51264732762388,126.02176565872996,71.34742295114617,108.72206627695184,74.0683728268272,63.01840455908525,96.61264098318,132.38460601003547,71.63434545617356,65.51847979896947,82.19315830029939,68.17025305095471,125.74210197047182,140.32913770173727
|
||||||
|
23684210.52631579,89.8769181402106,84.87032641862568,152.11012310730783,149.80162592938072,159.56479511762922,89.89506904702438,89.92486411646793,117.66022782576712,128.3806041918303,137.5425827126754,152.77642390602514,128.68560053172865,137.7969227088125,83.73427398581254,126.21492806233859,82.64666007694446,65.93250309793572,120.96357506199887,139.9765312797145,116.575527078227,84.5733994308271,86.28288592790302,67.83383710760819,115.67570334986637,153.62613208670365
|
||||||
|
26315789.47368421,114.26601520337556,93.76450779563501,190.09130211880333,173.77456926044664,177.81273691277755,127.27064479024787,87.39835774271111,137.6591692472759,151.1868812159488,155.24181029671118,170.6516284942627,145.53067297684518,160.40461650647615,92.1872336738988,162.85533905029297,99.92150472339831,68.8617030946832,156.52020916185882,151.68310536836321,131.58652451163843,104.94969415664673,109.96378218500237,72.29348523993241,131.29652801312898,163.15333235891242
|
||||||
|
28947368.42105263,122.41626327916195,133.84884420194123,207.31056414152448,198.09824790452657,170.06713194596142,127.26082776722154,91.81295121343513,145.97237960915817,167.87781328904,145.58369874954224,188.92073912369577,133.8795719774146,169.23453162845811,96.16378713908948,173.42941249044318,100.4667820930481,90.6223213672638,174.71190690994263,161.78474742487856,135.2476440479881,96.78860313013979,130.6538378564935,82.91003312562641,111.33236460936696,175.15652127014963
|
||||||
|
31578947.368421055,145.70292944657174,154.49581999527786,196.8035216080515,206.22886898643094,152.96537298905224,146.67974883631658,101.19485046989043,156.73033664101052,177.02410768207747,169.2260831531726,198.04608967429715,127.78581794939554,184.50897548073218,109.48155041744835,167.64149655793844,103.55727562151453,91.79166778765226,178.19568603917173,160.45962825574372,133.88398933410645,125.93390976755246,136.4324505454616,79.36228762174906,120.42866807234914,168.53483611659
|
||||||
|
34210526.315789476,145.13476906324686,181.0156959985432,196.96632676375538,219.36357698942484,174.92123664052866,160.18036340412343,116.44782897045737,184.47489683251632,155.5202418628492,204.00147480713696,200.63506276983964,191.83462117847643,192.65076958505733,110.6756746392501,180.92265563262137,96.38060310012419,96.83216679723641,186.28922005703572,169.99148961117396,146.65858916232463,151.2074738050762,164.81994503422789,69.92559983855799,127.15519405666153,166.59142622194793
|
||||||
|
36842105.2631579,153.25818342911572,177.61119591562368,185.54612691778885,205.48416830364025,194.97514383416427,169.0116389927111,112.93625131406283,191.1707026331048,169.76137090984145,218.34308072140342,211.42340118006658,190.43515556736995,194.09800233339007,97.26928690860143,184.24711483403257,115.22883121590864,109.81641312649376,172.25729094053568,183.7776959067897,157.71640115035208,151.01735260612085,169.25614894063847,77.43704221123146,146.09945086428993,170.15413394727207
|
||||||
|
39473684.21052632,174.00993527864154,173.4664195462277,190.27979253467763,187.4821865182174,207.23032710426733,168.04096028679297,108.28894090652463,188.1237645149231,181.1585824615077,208.1002385992753,223.78642079704684,196.18918883173092,187.94592272607903,86.08651335615863,195.87506048302902,123.7816006384398,114.92910612256905,177.02735617286282,194.24528091832212,171.77286777998273,152.0886357959948,155.6707747108058,103.52223518020229,148.2211524812799,185.68032312393188
|
||||||
|
42105263.15789474,149.12492571379008,189.59115680895354,211.65512727436268,188.90756024812399,222.34170532226562,172.42721637926604,99.95464967426503,186.3722775107936,193.89571179841695,185.30610977975945,209.62792085346422,185.85879155209187,201.30312467876234,133.71206564652294,208.58982889275802,145.61541587428044,129.02138880679482,185.5816899349815,202.50917936626234,194.06424311587685,163.40277862548828,156.7138214111328,106.52769289518658,148.12367047761617,192.17261023270456
|
||||||
|
44736842.10526316,112.16595446436028,195.17130994796753,217.09464494805587,203.96229997434114,218.59926120858444,184.17645692825317,116.61314033207141,189.38473520780866,215.5696160918788,190.83929910157858,195.87477217222514,182.3937087811922,205.65431386546084,160.96340656280518,201.25900002529747,155.24908404601248,129.4638685929148,197.94397042926988,198.7030984226026,191.8459028695759,185.69810520975216,166.76698185268202,95.27756620708264,140.59051365601388,195.0851933077762
|
||||||
|
47368421.05263158,146.875292627435,204.24813155124062,194.46876992677386,228.11076786643582,218.22915333195738,196.66796232524672,109.01915881508275,204.0876811178107,209.54677260549445,206.95813450060393,203.44305565482694,192.86889181639017,197.8119333166825,165.4711187262284,174.31547531328704,159.99122619628906,129.88634305251273,208.15185125250565,198.8398228695518,188.9094945004112,200.28832897387053,190.1343819467645,93.6343687960976,137.6984423085263,202.6962440390336
|
||||||
|
50000000.0,189.524169921875,208.85409545898438,187.1713104248047,241.01425170898438,217.15151977539062,205.97315979003906,101.03228759765625,212.29312133789062,203.9116668701172,204.74691772460938,216.1698455810547,173.5381317138672,196.7125701904297,169.46923828125,159.1580810546875,159.99122619628906,131.65098571777344,206.62130737304688,198.2906036376953,195.96603393554688,198.23983764648438,207.55056762695312,99.96365356445312,143.13748168945312,201.30123901367188
|
||||||
|
21
results/AcrobotSwingup_full_large_data.csv
Normal file
21
results/AcrobotSwingup_full_large_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19,trial_20,trial_21,trial_22,trial_23,trial_24,trial_25,trial_26,trial_27,trial_28,trial_29,trial_30,trial_31,trial_32,trial_33,trial_34,trial_35,trial_36,trial_37,trial_38,trial_39,trial_40,trial_41,trial_42,trial_43,trial_44,trial_45,trial_46,trial_47,trial_48,trial_49
|
||||||
|
0.0,13.131850242614746,19.088336944580078,18.534748077392578,9.736234664916992,26.78424072265625,14.534576416015625,14.604934692382812,30.73688316345215,30.384143829345703,13.160032272338867,16.67830467224121,32.946624755859375,16.287212371826172,11.276420593261719,20.979473114013672,31.550506591796875,10.902191162109375,25.663440704345703,12.663228988647461,20.764907836914062,15.645150184631348,9.958779335021973,16.15315818786621,13.715818405151367,21.309791564941406,17.166309356689453,24.48415756225586,31.02811622619629,21.977977752685547,15.927885055541992,10.854205131530762,43.514427185058594,36.47843933105469,18.705127716064453,9.01789665222168,14.974609375,28.061336517333984,14.599555015563965,29.38121795654297,16.62999725341797,28.061494827270508,23.176982879638672,18.30661392211914,21.846527099609375,31.955196380615234,5.222922325134277,24.149133682250977,14.076574325561523,4.547811508178711,12.979728698730469
|
||||||
|
10526315.789473685,14.578516896496788,21.27217122720467,20.773169099861484,11.257422922499003,28.163551497802015,16.108522453425334,16.305835927426404,31.914930359112134,31.654923331921204,14.13211430079843,16.807565564310327,34.1331594031913,17.809194357606515,12.24502856024332,21.403429960971312,32.16119472987929,11.984503532494122,26.32972139672188,13.772950235387915,22.997165271697614,16.157506279041087,10.109449690636092,17.32452764079862,15.751903628300969,24.12469719399871,16.5026787961214,26.012534148588653,31.82195441520301,23.63026851614798,17.90123860484861,12.514019008542714,44.73422487329826,38.433206619008594,20.170953891459348,9.993477400836028,16.077604318723868,29.58831166886722,15.352139616774659,31.53048250469227,17.583394926577974,29.377068005971754,24.721233597329697,19.636690188116727,24.486644240379995,34.68676498308365,6.277898443292651,25.389025488300792,15.99386529158543,5.199360155281186,14.043891634059415
|
||||||
|
21052631.57894737,41.9699082075724,60.350450819982555,56.61529349665727,38.22343111665626,49.91949587340088,44.55280600962877,44.23908254229708,54.835552537375214,53.801255558563234,31.40268119545318,21.69965727408507,56.909939696068086,45.43346053659091,31.12955591002082,31.30991787213698,45.604040135306995,31.727594151453133,40.37715117086417,34.95416063912357,63.39542901804903,27.402238714036603,14.382858662409426,38.32895942775331,56.13486680842503,73.60611767398188,5.658291847615341,50.882046328522165,47.28833870546392,52.344143393256,51.342227033135636,41.6641316787234,64.9429931675298,75.4204476021251,46.08789759267565,31.102428219582983,38.78266359739769,56.30345803088511,30.133151580133266,69.80889788439234,34.4516262237674,53.719430947526675,54.61095216531833,41.507692631715884,68.94802934005651,80.59594458799778,25.674573244480545,50.033174255341706,52.402261338034165,19.086892241988735,33.04253505725527
|
||||||
|
31578947.368421055,71.82923842570294,89.20940393795598,51.86431584007771,53.814702484065315,49.7774284479629,66.03723991206148,55.40710610526918,83.80683302745182,67.82404819045247,46.04743041713153,51.355510519874244,88.07472382372355,65.4418163264578,58.76778662556634,62.40782748641566,75.9722700920214,49.72562147342598,74.92825485667902,62.36576134101631,92.06652362562119,53.2729889456998,39.39577382732793,55.57664816308551,106.88883059851598,101.97433286661753,6.791727328639467,50.71064958073344,79.33624879409074,65.38555961405638,68.41926057046473,61.29456652914513,71.38773377045702,112.8603146817752,62.09150616413917,75.47236712142785,71.8000953380132,71.04090361107255,49.3084470618444,93.13644092860233,47.45984061021059,81.9267141872661,89.51191180896974,45.86237815521927,82.18942804762533,87.71783483899367,43.57849113421304,90.9641745901174,88.39200949594586,50.5276216131292,47.68582182062837
|
||||||
|
42105263.15789474,102.5467359871085,139.96036380843114,84.70142667164762,71.85725436332814,100.21967437582664,97.97964815849083,132.59601531183952,107.53529505188115,108.07909900032583,83.49446996815317,94.61736524187627,128.3878988167916,82.7371334286608,93.14794400160993,103.8310020354646,112.06424977079324,89.71856673082488,127.82209061395758,85.05144835137594,109.76285609776293,81.2382294116258,98.9263033129667,79.95390604556103,113.77420615778405,150.723114723479,5.329706375494888,78.32454566603883,156.57926234759782,93.74829114284212,129.1020234189205,107.35965868838937,108.43264617236368,127.01024321547176,94.02454384517472,105.26959627008173,81.21135687943641,97.52814887682817,60.013150132635296,123.49805149196588,82.51805546417461,128.46684939948807,104.98790308494648,88.01630801771486,135.03749597122135,108.96623901217929,65.43625644868926,143.03488636132423,121.14057619584896,86.40538589784313,84.16920916459567
|
||||||
|
52631578.94736842,144.13979146908196,185.54390390105854,98.2669547105884,92.4629613315052,131.94131366485456,154.6929700062133,173.89696222490056,143.52236334624547,138.64938037747575,126.52989631653905,117.31610031694257,132.54583404933481,119.88830440726719,116.41639239524706,114.50272682162615,183.89190203549953,162.20418576380223,147.86475339579368,75.40463155439933,119.0273981627665,107.91543696585454,143.05149236911717,88.33033488051052,148.59873948945892,195.68585212896076,7.092535126448668,113.32339880446236,172.85401617308403,141.3737898319547,149.02812729375515,141.0219228257763,127.66075503962357,155.6178410835461,150.22101113361168,120.67198953189347,149.87697301156962,130.38291655509755,78.48837357414808,175.19228991828962,129.24023185977893,144.72595483101802,136.07077052173852,127.4604304423788,163.9138241316143,164.69921980249254,106.42125280686444,166.73256900518555,219.82055856060452,100.12516405369436,148.2886260774327
|
||||||
|
63157894.73684211,135.7850016231682,186.65099440816368,131.3567391421409,111.08136356372253,159.88894375151543,177.29128979340484,227.09807373521406,163.60770072591933,126.39248155982045,160.13549264472937,155.82748588275712,133.4752865526155,149.64404376309335,155.55924040632237,163.01035111993966,248.5377308453054,201.9842170992882,232.17162564586735,129.61881628772892,130.58126931102984,142.20630389410707,215.9150707041458,136.95182849404887,210.03073203084878,222.81034131599927,7.556761387714967,135.04575778172452,151.86440633835886,163.43747750494288,201.3468350323944,193.7429336574435,161.28514621883548,205.6992639995843,205.4439744288902,178.9154333227064,193.28771711237872,171.36529199461197,100.57917860686945,207.75617558292407,176.79813569676844,155.1181606532299,174.86386330363825,134.68320627820128,183.36056676828963,273.55055915104055,171.79813773744326,183.36939666698845,229.14018003514602,147.75354186814909,201.9667309184154
|
||||||
|
73684210.5263158,137.4941390458897,213.71141381697973,171.73881925444357,147.01024638058573,167.2011874746583,189.50615948368474,269.9545277954799,184.40571114317697,150.01981112195847,179.63344627045035,206.2057923066567,184.64895688075768,157.94594263039798,219.3244845986449,180.9082957308048,249.44355805461757,240.05342299472593,257.1974964406847,165.72504083127032,160.59547806747898,170.02064665615393,228.51437470634724,170.99889279183753,277.1303765950441,251.35156073489347,7.395047835505198,144.02190146446642,169.80719205852694,184.7310018231499,235.6958190701345,262.6069913634468,175.63935190488758,198.98004524987655,260.599305703039,208.86079193449416,199.70475362142204,187.74416078457875,132.40912145433995,227.03161739733412,209.68270899640225,163.7439955285134,188.33631447461173,159.8010989797,221.83098979125063,271.07631585034966,200.3558374828249,211.7923857372801,259.0435818443668,171.81979007032442,237.65909953683698
|
||||||
|
84210526.31578948,190.6054479266798,236.52964512793312,194.63286740264735,165.86403157341184,201.63207120578375,233.25441333296556,273.13909049839856,211.0717185121494,197.94882644807865,236.06067579274693,263.21099116333306,217.37264484051525,202.85230090875706,245.39041472967313,193.4165912770829,257.1536286276793,276.4397002848232,270.20338615221993,179.17934460223876,207.43956020532224,177.23825545654404,265.18099309169685,184.90645077089854,256.629547566258,274.482604477214,7.136222053195383,213.64018115574635,204.45596312188707,218.21476130142108,254.27167203221626,270.367992820502,184.1062668436452,237.02938870379802,281.225585975145,242.78301077470226,239.44694030780212,217.7870300767164,163.65623399821675,232.40816722153957,228.95322476793856,206.36649878566615,207.62960287012223,193.73775794466448,224.61237972628047,286.75268723429735,249.82306382266438,242.21513200730828,290.3233504810492,206.97715675599687,259.40182058831
|
||||||
|
94736842.10526316,218.6093478828999,260.4235771085417,239.87491197194748,202.4542374744227,207.7642742862662,268.02659345870205,290.23976702662054,251.39520895018802,230.5822905680149,259.58972346543277,264.7963478177208,245.4519365710152,245.5206414316252,271.71887550476185,230.6868558583002,269.9336736153945,327.2937162067752,286.6697595445568,199.68922275746462,259.1918790185633,191.22278106906077,291.9408217033686,204.12259862751512,279.28572648763657,299.20330514892976,8.207333623845688,274.6120527493161,220.67371542929283,192.45959641002221,284.0899253319175,305.4111643114909,184.23090640990029,282.07258640407196,307.097376135908,301.8436694941032,267.99307128654954,224.09875334625926,182.24871467553348,235.69674135880788,250.1833234303876,208.02950419956133,239.22385424152637,235.79763301340168,235.00211449462788,317.38724400313606,253.67784693527585,252.19745760214957,316.03926547915984,227.03805852526608,283.6554968045854
|
||||||
|
105263157.89473684,233.7070014661699,236.4588412384082,227.86542598552322,259.2314850423805,203.80724647558628,310.5145723827988,324.4786443622819,260.90657832781034,253.22746316077306,305.4351500477487,264.23625691511626,255.15465027622238,264.0947916591927,298.43461567328575,241.13049423612057,287.9545368621554,326.44009964783106,280.21253364089455,223.60327494375593,262.24277615464626,199.62979901885393,313.0425877153378,232.80092922042942,297.56597193968264,347.3181179527431,8.553666785209751,284.90756135931304,240.02197442581448,215.9974652583249,326.78153358073774,330.0559985472555,181.14412818762403,294.22331209872897,328.49864373081607,318.23630509317087,279.33334811869753,227.04866879509757,204.29448835646676,243.1798177373046,255.5653329281926,211.90322889449524,225.5689285417343,251.21839637967688,266.3837017137918,364.3349415684671,266.09452537180977,310.4641024762574,332.3559288919137,263.4665492849832,244.38903130058438
|
||||||
|
115789473.68421052,276.5488656624697,295.13956372745315,235.62601564597554,300.2803348636363,232.76246946587787,315.18976279308924,347.12224755417606,314.5844160807925,298.56801354365007,321.80716903883336,333.4719323181544,274.66721459604037,301.0560708523127,315.30347500019127,299.7768020316174,309.4053590409148,337.83500375856653,329.3117125771547,240.79432223671196,251.3774658264876,189.86827175575115,351.90785649757305,252.71035989466796,275.21311055383853,375.1182126095751,6.754631191361799,272.794443650424,310.3462960501291,238.49437050100346,324.63535441437585,313.85982853091656,231.43473563737484,311.3839022262935,359.72830884592025,287.3158475172157,284.8178030305292,267.83785231093617,224.5355625346592,277.10290653718806,273.2543876255318,258.7496281216679,290.93817221457937,269.5797844337624,273.58192458377323,406.38162375602695,317.9204851158603,366.94428695503035,386.90347374674354,264.0266434691867,321.07867583615956
|
||||||
|
126315789.47368422,274.51146465985727,356.3357103802161,302.566576299575,303.1054287861589,307.09774730096575,306.7411860452134,316.7586757433381,392.011989816074,293.8702307817348,330.68698898576963,383.5234444352729,265.70808594312695,301.0508737871191,353.41227520205643,307.05298712180917,316.43596926059087,304.7796152146568,383.14256066992016,309.95309426150504,282.07487103813577,202.81493695010113,361.9679604471555,260.12213428462974,300.47154465144365,397.6514699752972,9.66206645684374,311.40442080114696,315.9928800749316,264.36154607111735,318.15073041110156,325.49586192227474,280.4558618659789,350.588253217721,382.790769190009,327.63382848907384,328.59098073848406,324.6893846384377,259.1152530725313,314.23965203729034,295.8492951360106,284.54310578124347,316.6982381257984,361.0939780519941,228.82659471976126,430.73620994269356,356.08496555727277,371.2050633602195,433.10619049026036,287.51413181300313,414.49608479114124
|
||||||
|
136842105.2631579,295.8215606684005,327.52961309051904,372.79350010914493,337.1520495822556,347.35989879306993,335.94174062801204,311.34117528853983,360.7743715569583,261.0578707109032,357.6900739304082,378.1977548696658,283.28047672054447,336.9097182389937,388.5829952481381,323.7213373757136,389.4055765404927,335.8389141993015,333.0175647288147,313.72139500358094,294.3657518128279,259.0122548280248,335.72703765625766,264.21126048271014,319.0928314717027,383.63559428080293,10.211789956460821,320.43016694770955,370.8177819940523,241.37559441615335,374.3172611656943,320.056471314209,319.0331632962188,332.984270778967,377.43805342433853,344.35092811455684,348.98795242331033,340.4962717146094,325.69641256332403,340.0157494001772,281.3487462096762,340.022980649468,316.21160365282003,365.0943860438887,283.4469771658863,450.34652956601985,394.07381074852896,414.6581850288813,390.0941564739575,336.4237602438128,436.99119754395656
|
||||||
|
147368421.0526316,374.0069051256141,382.3320039012096,338.93124933998996,343.19654505834023,339.66070390308994,366.0526170763613,398.92071783971926,368.25265716416686,298.43632176625766,378.7850583355843,345.08800805275456,367.1963991775408,381.6090762050529,444.43383028269477,343.74826998740355,383.33993569074244,383.6911936925719,390.5027536956228,303.2677666812392,288.3322585948286,274.87440212743763,370.8470226250528,286.07339236709885,396.60684442553173,359.0064918633313,8.374565310260264,366.009077102854,362.6107564698956,334.14413163734616,432.1543775815053,333.7108969302059,381.19876484824687,360.5529421954274,357.8067900173552,345.8540894498125,352.8169089920633,368.2098680338371,303.30917481289674,372.478919465456,409.9435138652861,327.4440968452728,325.09087151751294,400.0577256953618,307.5088636112345,369.9902180070031,402.74654010111607,387.627925438729,443.03470360704415,358.61981710469627,344.57160458901575
|
||||||
|
157894736.84210527,319.4371367487385,438.8960359651296,335.93308903248027,335.33164424166455,377.79123810354696,421.27432754767904,388.8802927061296,407.54358048600835,360.80726494650435,305.40035057439366,341.5672449482775,414.31668923394835,412.78808228741724,379.2012600539628,375.4074236975978,410.36587960931405,436.19841861345105,450.76744908721827,350.559084634454,285.4714257022846,217.08753348808534,417.39305680106884,324.2628801504661,394.5788551277566,354.9956076909631,10.619728304296114,334.2232698433287,430.5284344669525,337.3103654057695,369.5456475475983,412.953185415334,380.24785154901053,434.92694193950325,326.12409786454856,406.38966707872885,414.9345823381086,382.84838017581905,291.42020477408187,345.0068082401627,402.705297378044,354.4140480656043,338.21994598948726,447.5115759876296,305.68947555384807,379.82456534704676,371.5780525730092,430.40356882160063,414.55645047949616,323.8916691483883,407.7883412755428
|
||||||
|
168421052.63157895,359.9550312826839,356.08956733502833,387.15063165495604,314.36751110467884,423.2236417561687,335.90961637391274,369.4368377418729,353.9260972662315,408.7101379312637,310.6273126892766,421.4139342572221,362.9750264173069,441.14840749492276,369.1346163564772,368.1828386565655,403.6807277143167,372.8448023716828,425.7752558399105,417.2959520625276,308.55465713432295,347.0596264775771,391.7235765760955,351.8399038711083,327.7099130543315,342.1851323714217,10.851874733730696,331.604548842623,437.52680651699075,347.54702171262284,428.9528711231792,476.58806410918936,354.2465774240256,431.91340778541036,406.8365396951375,314.2840775888712,432.46192768339967,399.6192189134719,323.48628729268125,400.52607633268406,345.84652253069044,402.5480046945926,338.3828897185603,414.76752688812087,427.88150703874,367.0536686667445,345.73365704702866,350.4733094746386,349.17639883997697,378.95735212046026,453.15244681022835
|
||||||
|
178947368.42105263,424.1862716408978,429.5524229884478,430.3123633185417,390.67559176031244,453.1678893142625,358.8468738827349,425.82012368156643,360.54822518175,424.43842392615005,312.8583559502855,455.43650840705783,416.2684081839888,344.4092289948232,400.416601183672,420.6666319183696,443.16356836643246,412.2710924451536,330.1673371512804,436.6210455428035,291.73771567432175,338.29016915582883,411.74352206401215,392.2218581018686,449.42959771187685,341.7862329938735,8.032284210549559,401.14537573182685,387.03339982676704,403.91989352779046,433.5477445312154,418.250723131971,369.31728773277223,398.2252201601409,424.3135207842757,438.15765449545063,429.67754491494964,393.81013845068264,375.688338032885,437.01060984900784,355.7741284737792,298.3854456870014,313.70276352655856,389.4333854320967,450.6199733239297,467.6602152320982,394.4946728019338,421.4124408141398,479.807273483425,379.86767738496167,443.0825177725002
|
||||||
|
189473684.21052632,359.3258810434645,432.6553371269617,435.3871922674602,398.98991420592625,460.6816918128415,470.9855809632761,404.2624583510151,325.3524176059007,261.3128609076101,431.16090246614,458.3621986724663,470.5636248974919,412.2272390866874,448.3094219941842,470.9078854649351,438.91100251542565,458.0437994643922,471.1209647756204,380.87846808469857,308.2953936943057,369.45044155596366,369.60889945961435,310.60073033726445,442.6246018825806,432.8135956378855,8.83426236120824,443.8104823538801,430.6483456809435,397.1606999028092,426.6657524432502,341.84687051614566,381.31631883112016,378.68289827243774,455.13216278906344,385.9636085672392,455.7124659457035,366.83695224299,394.1719890448195,456.90076180689886,459.6371409292036,411.5598555766975,393.2819310040355,364.16218982492455,444.16721364632866,470.61743459807207,390.71233736907345,450.98516168736353,468.0763415600124,413.6251806238682,415.5071907162336
|
||||||
|
200000000.0,332.2180620714238,394.53579566666957,449.4133102486008,427.2775546233905,326.10394409612604,485.5193360573367,422.4752121457928,449.85337178801234,395.59909669976486,410.41199009355745,400.621340826938,469.92181377975567,418.13865042673916,454.0215923990074,441.5672948391814,359.02545880016527,363.5961473756715,474.97294805081265,461.20647636997074,278.60043747644676,328.6688987989175,418.5614862598871,395.1100215221706,373.59512803899617,413.9753353187912,6.649205576481395,428.64490203637826,473.9824835893355,365.0857575959281,386.9181992615524,463.28309116551753,354.24556859072885,445.01417607539577,446.487778963227,391.882612062128,384.9880537296596,424.6376469511735,392.9517190189738,444.9073190124411,446.20645395391864,354.5054558641032,425.5205393637481,389.0466575355906,463.7441430750646,440.292606507477,422.71178463101387,390.3505477920959,443.23616884256666,398.3211184802808,395.9731214140591
|
||||||
|
21
results/AcrobotSwingup_full_medium_data.csv
Normal file
21
results/AcrobotSwingup_full_medium_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19,trial_20,trial_21,trial_22,trial_23,trial_24,trial_25,trial_26,trial_27,trial_28,trial_29,trial_30,trial_31,trial_32,trial_33,trial_34,trial_35,trial_36,trial_37,trial_38,trial_39,trial_40,trial_41,trial_42,trial_43,trial_44,trial_45,trial_46,trial_47,trial_48,trial_49
|
||||||
|
0.0,12.017431259155273,23.287109375,10.888079643249512,14.205253601074219,15.467559814453125,20.44513511657715,12.091629981994629,9.831884384155273,15.037577629089355,22.472427368164062,17.966447830200195,18.04360008239746,52.83140182495117,30.300392150878906,14.272525787353516,28.776683807373047,14.87478256225586,26.560134887695312,22.791263580322266,13.031448364257812,7.465834617614746,15.859904289245605,15.654475212097168,35.36785888671875,41.679527282714844,13.489083290100098,12.026243209838867,10.55536937713623,10.937093734741211,20.357519149780273,9.97134017944336,6.53872013092041,22.88406753540039,16.057342529296875,8.918428421020508,31.07950782775879,12.095285415649414,11.247681617736816,17.473600387573242,14.213282585144043,21.809486389160156,16.25005340576172,18.457508087158203,14.448582649230957,19.33466339111328,21.741180419921875,36.0016975402832,7.962129592895508,11.582380294799805,12.768638610839844
|
||||||
|
10526315.789473685,12.776854911937773,24.663676115284336,11.50460004508547,14.671225372474858,17.374185145685548,20.682800526965483,13.288055084003368,11.440362194656212,16.873823473567963,23.09021612365119,18.51907138163714,20.278866849122494,52.724657441233994,31.825402611520897,14.734285683709086,29.528678925609903,15.09234152549563,27.171897841261448,23.14398767460148,13.934958505942138,7.821502990352501,16.55533397105107,16.789932060112193,36.67604927920899,41.60809650355718,14.087474481321335,13.008655385302582,11.17730214223111,11.321997185972961,21.453200092023224,10.50211116821709,6.6034694361033495,24.67641698757904,16.979849585824727,10.544376255139461,33.28799029645559,13.000915446476057,11.913011274697812,18.361130300697734,16.198423144201183,23.528672465677904,18.851392397878904,18.425226634631443,15.230230421186464,19.828224328405888,21.80370530696622,37.00908387222242,8.38620989546541,12.546300841990725,13.912173933393408
|
||||||
|
21052631.57894737,27.850291000271522,48.93607658308794,22.555204084251397,25.16136723975561,50.47484804072786,25.466752017850773,36.19210741423339,39.069919163005174,46.128476019030295,35.92472362260327,30.057665656952814,59.709366985924355,56.79487052413318,59.196705338883106,31.906915928724736,44.597132171702846,21.837189617703498,38.07948978734396,33.9796350759627,28.935405494340817,14.325688842611795,28.278194154192207,35.13875224691019,61.76754178412238,44.6775886771215,28.01277049410046,31.604132814853973,25.329280523939314,20.83117309613076,44.08581161208018,19.868308472057542,7.786522062392157,59.196783292615514,33.437312922319215,37.86525742288607,72.11422021674647,28.45282693481759,26.283618867180603,38.233814525141945,50.490736919922185,55.499477194740834,65.00387222594337,19.20063659396033,29.938730225751275,31.415738094856042,23.71012478055551,55.03615078812986,16.187582322780735,29.75866186424801,32.77275201298524
|
||||||
|
31578947.368421055,49.20190873289084,63.3407311417018,33.114977303696314,48.51630176246744,66.29822704721232,34.01225583028781,65.06097386341925,50.047275764748996,43.282776942099986,61.40209562979495,57.700768603533106,82.31168388243033,105.80648966121211,88.26221817525473,110.843572811221,71.62837815443649,49.18897271648545,46.72247282685996,86.72634276908688,33.328892795028494,23.67281438119277,31.959782852453294,39.07779858889798,96.04975107021198,83.06608436031686,66.41134040049405,52.577474701324086,58.82678742664467,47.34819447817772,84.11480306084798,27.042457599915217,10.702444755188463,96.48672616551953,42.9159596922325,43.724018014337965,88.44290469566211,39.80999278071805,62.12509418082866,81.39175821873786,63.63133665324909,84.58056680833369,90.41854720717487,31.23196938182963,47.711000206728066,64.42433359302643,33.52378338944797,72.37739086817756,25.263175579370973,44.26823562103998,34.745594789106356
|
||||||
|
42105263.15789474,71.2761652676435,103.37331421421507,56.98527450756353,62.14653069354656,97.10758482889786,52.89649554009748,101.0744354151953,84.57153450130096,81.48520412421954,105.53959711222109,101.08103186271858,116.84015828420581,131.2956676815025,161.26585437468876,151.61537382906494,113.44135146111333,67.29765375082347,68.58153648769427,171.36852356717196,62.00626942862104,51.36441795994371,48.637713735701304,83.4602617826158,135.34337045793058,106.06452643252146,122.26506322721366,77.94572525556069,88.73507968375557,59.07073416674401,109.31621462420414,46.10797188734414,28.4949539547523,94.59404101852235,58.56205767252769,71.30583339764472,109.5969239019291,85.18315022746282,110.43338953870816,100.53123858059212,94.27747407828012,102.29093465382373,106.8593352884467,44.443592740021586,84.56197012015657,106.86370780626493,57.23514194889743,110.73687602121414,45.42995975593781,75.10924024479542,52.518669980549745
|
||||||
|
52631578.94736842,116.56954062430812,200.24208642311208,91.63478704172488,100.53739246073852,114.46677843487494,86.82538705147864,145.64239079833362,148.20972216240753,94.29939152531512,161.23961707986788,117.83669799729314,157.88332560774032,150.64240087599717,202.0456057157873,231.11175816516467,142.2224185919993,93.33254251982036,104.95370235605748,220.70153244062638,101.58419977784817,75.52134488704627,80.48859411405807,111.8477973121933,164.0769980673232,125.8043303962931,189.7006752178527,115.91802844983059,145.39265588313918,76.79280047851339,122.56216236536193,65.62835249701035,82.54330217770179,93.81866740257868,109.42892080456679,125.24353733316187,191.41724494537158,117.23467583401217,135.91432102994574,141.40902455706262,156.8251056132059,123.78489379805623,129.66109143585544,69.6442174114225,162.3300856070753,166.4248887533296,92.33557594592304,130.96027772283372,70.40188753720466,116.44582591835317,83.51644768956997
|
||||||
|
63157894.73684211,155.68236535350044,239.25845356983157,146.60758282574423,164.6354726203756,119.17300705492003,115.60162661078071,173.94746751088513,208.19938650512634,127.03410804288212,169.36822957939722,197.56866733279918,174.5068757060162,147.7696934534325,238.3855649325326,277.6416850435106,164.07564736981143,142.16748480362577,139.51655532597175,221.027061882772,157.86828469239444,126.15439900821931,142.31899524853668,168.10890295160448,259.72631814753913,214.67234853463162,179.63523435031277,137.29928255064667,185.92074385780705,118.64928261097779,173.9366375515831,87.4481535860211,150.83401271849456,152.18064737047519,162.13419690670403,185.54342678544265,216.91481358374256,181.28867939056786,197.47478968037132,233.22489004509933,189.45276868855194,132.4331818908618,176.6262832676935,117.018150313244,239.9683915706883,212.05294656415063,121.92457645627934,167.89475403705464,123.00701815418265,176.3371164409903,149.3586106997943
|
||||||
|
73684210.5263158,188.6502753069774,236.06987952154103,157.39433862002687,186.66364885676768,156.72891684792876,187.4181134075835,200.8435320484754,258.0318562030132,174.47490193242842,201.55724401511975,229.08333583101836,186.79538948875222,215.11149580793204,224.4069714883853,286.45698238583157,184.66906194083413,192.61128096158652,158.9001218145739,248.0090168571703,186.20706609246474,186.54174038524278,180.4862363494912,220.86154558396075,287.48900658569175,270.17734908042195,205.149702065209,156.5826320784409,190.99208637890393,142.8722244101961,169.21395700220586,137.41018920996515,164.412160163606,207.25356921718722,228.95310728635815,222.19102848963067,218.29636236926194,228.72617721656684,214.3128368296452,271.1694789425653,192.2755204891564,161.6754708725784,200.72027487078697,175.93144433464536,271.11137532023844,224.79094732216362,187.7839266554223,172.97055842217645,201.38173093892368,180.84034712988256,193.04960143661074
|
||||||
|
84210526.31578948,203.86209836197693,284.69398957078147,212.2996325360771,232.92239413829392,176.04226196439643,234.01833025900612,240.9273930649348,269.76537692778,191.80755229339707,203.48332648462207,229.24116860499342,212.80748128857968,241.1983528404685,229.99508955273933,304.1023236291891,182.74417670876036,212.4863110916106,162.07548295725084,253.88107839119402,203.60452846080642,188.8442091713982,177.49857212136658,257.1731001135385,324.0044686932973,276.16961056836095,276.5629439453009,203.1669735885393,222.6841661857436,172.51373754445865,178.3794994129699,184.87722887854167,191.68113153991277,243.93536417140857,257.17001851509815,276.60933772084456,235.94803331697418,243.5014506707231,259.4381450404751,292.5352912451092,202.75665405252303,243.62081868760805,204.4980077370382,213.31540870204196,281.39510426362796,287.01321238361896,254.7712791775072,181.18540515199592,257.1377680714441,209.59565546605066,244.95822023982157
|
||||||
|
94736842.10526316,221.76365231258222,298.44623046337404,225.5962761084954,263.1635334493381,192.5403528766206,252.85596271911817,240.2570946445673,291.16103165640067,230.3001519537781,218.93743998332369,262.8539800021457,231.51076867136268,258.9324501816421,261.97373524448545,311.88752959524163,213.3737466722892,217.86249587814893,164.6451902902605,321.27542467378186,219.87435967233702,217.34156211484174,193.3396997034467,267.4996712301907,391.4363280450538,296.0965765049253,275.96148516547316,226.7879831254317,261.11786638328243,174.60653318113899,198.0473997109154,204.17909564036577,199.0112266571898,270.19472741023986,281.7424192036453,300.30018441003443,248.97953229531686,237.83056088481254,280.8931333558051,355.5319778854827,220.38083039905226,258.79200364191115,215.7094383073398,268.5995612194829,285.1399150184482,298.1157929243474,274.5396528453708,229.65018444155393,260.6920795028312,238.34592995055826,266.393293495241
|
||||||
|
105263157.89473684,245.35491304945748,364.31667623113725,239.16689980352024,292.85095850929326,209.13734311979894,291.7582185681177,294.80305819456925,316.62698328973846,257.2564549506371,200.58218662593504,288.03024431642075,239.05698390540323,276.06873063945375,314.9276267980274,353.70784453870186,269.759808546701,251.11507031098628,193.25933077196666,363.0210942720112,248.17318240245623,258.87329045540736,233.39723021535002,264.084143219397,414.2178097969938,325.3885057649454,337.0668087867488,224.15583896381042,272.8901896906031,181.93902114479496,194.69881453689115,245.03934105207054,229.72719440953884,279.4478513626511,277.8076484269383,322.78119783256193,312.6036096735344,243.64709245580715,295.8488897459989,373.86842227444424,258.369452648711,266.8204641612943,236.13185130707774,324.044835117385,301.04168546695126,339.1355337043218,283.0219517756367,226.35342628176522,272.6518562146336,241.85981405904087,305.05292836566383
|
||||||
|
115789473.68421052,266.799933626381,297.22971822754835,261.2805058757602,310.35724036639084,223.7405827696799,343.20311494349113,301.1583439559157,312.18926526123136,298.58264072276546,239.98498249161273,284.83406036101553,262.26198046875294,308.5391135595512,376.90335008006673,340.0420200265015,300.0000998102067,275.3919342841303,246.53034022904498,355.8592794014146,287.0505564011365,260.20867862249014,263.67540143062865,286.6119846154283,399.51523782705004,352.7916221132404,342.731029956955,270.10488644134966,270.6986888677625,196.22677843875337,250.6378657192074,255.4672039059143,255.49418821918502,327.43856407186,320.6331213520339,346.39125615738106,345.4912432643515,258.5572632840961,326.6639678924863,391.0978296762358,316.57403531943,347.21038744802945,260.61276194094125,325.1980462995593,378.30549113786776,311.30939930619627,342.9487895093796,262.5119135733126,305.02191326261556,254.73453559167166,342.3123120395596
|
||||||
|
126315789.47368422,271.3524932161263,320.7873489780771,298.90931839625927,346.8220421171585,242.54962994005544,349.6517234192,299.3558407490274,388.93619583683346,341.031371523799,275.71578407089464,319.10226869913356,333.42872072884256,257.7790326339055,411.54482385491406,345.8125582576789,297.90487721488086,279.4960406642211,254.2968361598963,330.1279817324266,310.06161374035304,294.09684144856203,337.6745081129498,324.94982235814734,434.03228495920143,376.2682174127188,341.6737316632205,302.57796462619075,289.6701769343374,199.77932009637522,284.9327006023016,264.7638128574866,282.01549374793046,348.51115873249614,355.99555546557144,398.95423698788545,330.2747906284649,319.92821790637083,358.4613528552148,390.2342778942921,330.15719127060635,333.894176150623,323.17206675557225,352.52024125194293,418.9016897585914,373.5275361442171,338.1935064227957,306.68087078263557,344.82413530118583,286.94550957739193,351.8441617346867
|
||||||
|
136842105.2631579,297.2721224268718,376.6337987147375,330.50617138129195,344.5550482439862,251.33251910163426,383.3552640318211,335.21915383104476,438.22977767657704,365.34676718687086,264.9363139811645,359.036309464734,350.1226638596804,285.58465833099274,428.1497014372963,345.8279276070007,261.092950361092,281.61397434032193,230.0082749237314,393.73593705455033,350.4029616794428,324.6779472279747,364.2673981259074,326.8073693636051,449.0447615683904,416.23431921698716,356.7385176269962,315.8569057514463,358.79769443020933,238.81872825448374,318.630122147934,291.5059439486249,300.7436602405399,333.75592076208784,413.4255971162274,393.59042667723423,319.4989490870624,354.7795290034729,404.17495688985923,412.6509597500441,385.41001742723256,348.9382036526448,244.10422467508462,402.9348775415044,358.8729444223757,425.4869883682094,343.94898409328306,309.83586655974057,305.12867254258185,225.398809883568,397.3069331453613
|
||||||
|
147368421.0526316,370.3942853876099,341.2557415130065,384.47379720425675,395.7937452228446,253.81901774157117,398.5665183508165,380.54408885237257,432.25985739551425,402.16225101990716,289.558074584463,333.1293403476559,345.9917329052809,373.18766408300144,453.0235213899877,343.9136790307935,332.00041786059126,236.9182789447895,273.0091116958378,402.3976468655211,319.8198612436363,341.0698728407849,356.8327299688331,296.1200630909188,434.86471840863084,441.19100512774696,381.0524857095404,366.1001347113515,358.6435228746353,278.96238650914046,326.7365833193972,379.69252581460995,352.68463947072917,404.6530310683304,436.63432705105174,413.19101009375504,346.4137888773657,358.03829574122653,375.7055694201646,403.512131685035,432.76447856310665,420.7434611452584,280.8673328042361,355.20467194081004,406.01481324714973,446.8363766077483,380.79529509683067,318.95795463244343,396.39719896831684,246.33690655115905,401.98747768808266
|
||||||
|
157894736.84210527,399.07695006251004,408.57639275743037,299.7582718746483,400.4445620312915,227.29237436509362,404.55028432186623,298.62492275130705,390.3508182280281,425.57856892268086,409.71991330624627,276.4244173179868,338.7914892716586,343.64085562457007,425.3494745476424,312.77514500416544,319.4544134759176,337.7000562287765,319.3332904039823,416.9436507120192,395.89678544185836,371.5304383441517,412.5474703913746,375.8705816346523,472.8032948153501,447.0053145694766,425.6691376086418,337.5347253185725,407.17998473589773,267.1954396183801,350.53344924875904,372.4398675392539,377.5436311585421,351.30124496844,393.3341468016196,414.7628950601304,331.9626589612287,321.1367172037465,347.4960473004801,378.1563288552939,427.13315910117444,417.3272790754436,282.9820552824111,394.83927507397215,446.2759765244587,405.0859293798165,439.56592535906555,351.3937716141467,370.16757454865524,350.61212696617997,317.5515082533834
|
||||||
|
168421052.63157895,405.66319603801105,436.92694349104016,371.0781460376328,436.085471494046,313.6614704488718,431.83860445286757,388.7779165304929,391.15441200805833,439.57102705567166,409.0776826285259,358.70014174559117,369.72447547225744,310.683538335182,455.6376533204499,399.6960995454868,322.52053370171967,323.93608347795015,244.5564996912208,352.69593422828945,385.4130186228871,373.22122557605735,360.1646905579395,353.46507820198076,473.2671767060446,439.74098881063696,467.58912187798205,397.3972447268521,424.1238789716916,309.34182845984805,315.16203010511526,281.6992490377452,361.0641497865608,350.1424059458387,429.14449884620734,451.26876141424,379.01408825950944,316.16020478964515,432.196519804133,396.7129494107001,447.0729596119508,409.9787525509863,334.0932743093644,432.1989708210953,436.9457453545441,469.29303387591716,436.3535582444674,267.05532896749855,450.26014419407727,387.5445527554879,334.56755607081914
|
||||||
|
178947368.42105263,374.5632440644288,450.06489664671165,439.15150136688413,476.84046223553264,366.6310531429307,474.15093061162827,369.5149481791869,430.7493337189722,461.8469791498211,388.4465658482257,249.30757973134678,308.8934753428205,353.9047789252531,470.41065144051805,326.6880696727133,434.30225151645186,362.04361415280863,283.59574180659826,410.2621727873909,351.2370315819896,426.4339595463468,445.1974868125533,437.7607866135991,397.1426794160766,355.78668926379686,405.54767805486506,363.14088455048955,444.0159104274911,380.11345006769056,284.96332408013103,377.7035250941968,378.2071454263295,449.48486753200234,447.5252899607628,405.52003547169494,409.8556373455353,410.2689578122049,433.56355019256347,465.5651950213718,407.7810002547222,456.44013229507823,336.4349845822003,400.4434332218527,473.3829805204578,419.1451347264722,436.2796764652815,357.51336995312056,448.9562657846969,270.1528899316312,399.75885224697333
|
||||||
|
189473684.21052632,414.13150791448237,477.1833577823111,456.33780543989093,471.43761618249636,312.56730236224524,447.7417323557954,399.54536268007723,480.2824562745081,407.9549190740506,446.518902373446,418.60946643088334,364.35016300787225,452.0663475466897,462.3839974119392,419.90909223807483,314.4026516537257,352.6561743346278,338.135948294913,438.9825474261577,418.2522636138501,445.6926105436856,404.29945301250075,443.92523782273076,510.89616100296087,462.37512547603274,419.4611658964131,401.1209657860925,380.046299821287,429.2635412879928,289.97788792270705,405.04890485978854,387.0189599084392,456.03291114875816,410.56268372347483,479.0863113768213,454.90363978596605,391.9438576729674,421.4198047013494,465.99211775034746,452.20524171349746,431.7272804914419,422.94254108479146,381.6554382743928,479.548941893921,434.72673203525784,453.3807408248288,413.0242045165099,451.7109863409705,393.6158449306382,365.2227183553981
|
||||||
|
200000000.0,445.8689920039553,477.52782797186,471.2671553147467,488.94760429545454,439.9456694549636,480.6451155574698,398.4015148564389,465.5332888772613,437.0121902619538,403.0043455064297,401.88093809861886,326.7413901420016,423.57061845534724,429.4419846864123,362.4472447006326,389.56077324246104,357.7941360301093,339.4551521100496,380.07093806172674,413.3284254325064,408.90006056898517,420.30478690486206,467.6191948526784,496.21221445108716,387.0091969229673,439.06786423921585,433.34362527571227,455.5856535262183,423.07452937019497,364.95605689600893,447.886041175378,406.9709141426965,455.93409534033975,464.5003368148678,374.2333729910223,484.88693250951013,441.21791495618066,412.7518442429994,439.16432979859803,399.8890632610572,450.18339968511935,397.31223195791245,450.47006618976593,453.3742038058607,457.3968785514957,441.6868673343408,350.8426554516742,392.3017089006148,438.31347545824553,407.5624998290288
|
||||||
|
21
results/AcrobotSwingup_full_small_data.csv
Normal file
21
results/AcrobotSwingup_full_small_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19,trial_20,trial_21,trial_22,trial_23,trial_24,trial_25,trial_26,trial_27,trial_28,trial_29,trial_30,trial_31,trial_32,trial_33,trial_34,trial_35,trial_36,trial_37,trial_38,trial_39,trial_40,trial_41,trial_42,trial_43,trial_44,trial_45,trial_46,trial_47,trial_48,trial_49,trial_50,trial_51
|
||||||
|
0.0,5.659590244293213,26.06021499633789,24.206058502197266,3.551205635070801,24.381572723388672,25.24798583984375,22.904619216918945,23.205829620361328,20.361244201660156,5.436698913574219,23.75769805908203,23.605976104736328,14.900619506835938,4.284371376037598,39.48065185546875,11.61447811126709,11.175546646118164,13.062265396118164,20.859821319580078,12.516582489013672,30.621231079101562,16.870220184326172,12.008752822875977,16.727144241333008,30.366870880126953,27.533985137939453,4.029427528381348,23.41136932373047,19.937116622924805,4.43222713470459,17.344379425048828,30.81283950805664,29.214832305908203,8.560333251953125,19.13614845275879,17.62757110595703,9.370306015014648,9.371320724487305,15.618865966796875,7.7243499755859375,19.97760009765625,12.78360366821289,9.0764799118042,14.895511627197266,13.307378768920898,16.089614868164062,3.8655829429626465,12.270563125610352,10.179352760314941,12.686513900756836,15.486466407775879,13.59727954864502
|
||||||
|
10526315.789473685,6.832179842082077,26.655761841632984,24.647387386978167,3.7951000110837025,24.625391447013683,26.989194710416477,23.52083598175847,23.68593773531658,21.505964871134786,6.178805253491673,24.291254072359806,24.02885478349542,15.25842534343085,5.6363645011167005,39.64680201991608,13.029074858113031,11.729220786970368,13.583531654648834,21.267860990647133,13.081201621509283,31.193171783724484,17.06456462561651,12.291960430014209,17.29320882652935,30.71530728474507,27.631295198425033,4.4305144800286005,23.872031043750592,21.072399616453,4.8248382069536815,18.455150071531534,31.757999885331973,29.120529306170187,8.690298646745218,19.795293378300574,18.47134473781589,10.036576541094238,10.714540822297295,16.085801157027152,8.390602115810001,21.141984764073776,14.268853959212096,11.663462633089038,16.354732738069963,13.993466110848448,17.041158165565985,5.519646887000589,12.77408562757983,10.878759518975182,12.874184006110474,16.49670249324581,15.477476564457852
|
||||||
|
21052631.57894737,28.147902433747564,37.22287948023001,33.6056732494704,8.76476968623636,31.189594262524658,57.853027596863356,35.680787060564576,33.39733524088054,41.07930637002404,20.98607490638947,35.37923510168316,31.28637695119528,23.60581588421089,29.482394350914497,43.46011415307624,36.97067757039602,22.661797813967986,23.621173048500292,30.616702312742905,23.55001003704862,43.07789826620154,21.385546275958873,19.710106381563435,27.632518249339427,38.985538164127895,29.504928318726883,11.446981073855074,31.393746199514563,43.00993741780437,14.809376867389995,39.59122657057652,51.235432717319675,30.593241051479225,13.376692822486824,33.07524504178201,34.797295711109015,22.10745354431199,33.52466998808602,26.082018015147742,20.24374039771443,41.394080985182704,39.4924360788595,56.2871973983056,41.513682820336314,31.357747317660717,33.01974145325102,32.66231393183574,23.712765755271153,22.531451086712345,16.743698832963354,34.49079740631531,50.443144228484826
|
||||||
|
31578947.368421055,41.92667163968334,47.081286884803035,53.31386547188929,15.734067988522618,50.92809671742475,82.65884691344735,55.39503933964014,50.652260361739806,49.82079556129275,40.99985525575454,54.66093580208079,35.508166313924704,46.11705220614527,41.22772324692014,56.816872034387,43.2228327223056,40.19663441021233,38.10794743479661,52.762171613780005,33.09947887429364,67.04472125703444,34.54369067007527,44.49081858767615,39.34411290972146,58.049930083908535,34.19552375136816,17.889195941370552,36.579066074445564,65.77434147692122,39.26487075412579,61.8704024942339,82.76812107233626,57.317669620598124,34.596338665479195,54.706077099001234,53.91412353304078,33.3593971645279,37.66723245261573,44.58308358777346,28.64237396818924,47.31604417122467,44.35868179637144,66.696906809806,53.72041435680067,89.43141169008128,44.489799806616,34.633037358083854,46.95936085364248,24.94592719816105,24.373070298593465,49.16865390767228,80.42390218921976
|
||||||
|
42105263.15789474,45.03420286743265,73.27525277117944,98.7415206533885,19.6023741904801,54.754309569080455,129.565376188121,78.54606731264874,81.8389400313931,77.15637892253511,44.15434618599692,68.51232993272534,50.155750891549765,67.66363900520135,50.85221590237935,88.36148197944806,62.49077229123367,71.73133739701103,64.20776773993329,65.93678008329505,43.32574237387762,102.95464571394089,64.28326158097576,67.88477996462271,67.6132686489837,54.94492945396999,57.28846560926319,42.152913433115245,66.50812302835266,76.14029259265625,30.933653839118264,76.36194617513804,92.22426402089998,83.57488780495534,47.93303664771639,74.12341055158433,72.26059684528869,52.23324800008222,46.95458285799482,58.34815755808453,43.72658182639329,49.740356901676044,50.15222640356199,66.78518838796589,85.88943360650968,140.95735328176013,119.37172471354214,65.49967622670276,62.988179742381845,40.105039974494,36.616043191578584,83.02653971157577,90.17971566170867
|
||||||
|
52631578.94736842,68.19982863906925,98.62902046740055,149.89563639292757,37.31145393718395,90.8538058003684,128.68682862083503,97.18668391627783,150.89161663157788,104.50409683882364,47.27786670418823,98.00339147436156,70.17052016663172,91.46424767593763,75.87357258311681,127.03667593448115,99.75405900906823,118.32361865695823,99.97577654869602,88.9501178143675,72.77759236561707,142.65313143012762,108.78447872647948,122.99013573985928,99.45447295109568,69.59848316240839,111.86518009029679,67.02958871088819,91.4430535141265,124.42946462968875,57.23680565259587,103.33874670533757,102.40711969170214,132.04876374395516,81.83370796749962,96.88298358625818,122.74303608513605,77.01863054526645,80.91640323483053,98.80274882351262,64.57210949987916,117.0932100008234,80.69520676672623,102.77821842197153,117.83863189564683,141.4332256555103,207.55719856368867,88.07973560133634,95.4651102644915,69.42282263108031,55.72770765317485,138.5041259121449,102.79626683517259
|
||||||
|
63157894.73684211,107.91899601388675,143.40176084695432,202.07172927747476,53.905907596040976,171.61163277994234,153.5450500102915,158.0027304757335,219.43945966054198,143.7749312539015,80.56461809305193,138.67242715736836,107.37028623312466,149.7808669954126,106.92668054329226,189.67774393692244,167.86396317840286,166.89563976133303,129.80476129983768,127.62876651095553,122.30031400762108,164.95216065835095,154.1963369258553,213.7809501466989,137.0633972818419,133.79106724489264,158.77895214328143,90.50216631364296,178.06996485698258,116.29014581247876,98.66104619972761,115.10021218579067,151.11044837489024,167.1813571567846,119.88039637140291,158.57706770986076,143.5445758474996,137.01065986015297,121.12346531413601,135.48091573101001,103.78291744695478,235.38945437592153,154.3679414960321,112.85369830011001,148.27576443858425,195.8013524470072,226.92724116546958,107.9768022773504,185.39239127847299,115.60783792883572,78.35283062774225,216.65210434654082,125.26208101093277
|
||||||
|
73684210.5263158,159.27213348667047,164.69235548550404,237.1428833089542,87.11527654062184,220.34895413102703,183.06053682344444,187.50815415200765,244.20376514538174,169.89169147573514,134.44562729344887,161.52370494434876,155.60801137554517,206.13444026908385,118.16709765420397,229.82704253040689,185.92471832164932,220.7179796937595,163.12295127269965,140.9532556124341,159.75447376629654,180.6877703009137,157.72269000822985,224.0692377513961,166.8458060386521,143.45255696424732,167.9714248380585,120.2691682845024,227.39046660973756,149.21119205164533,133.88103827830003,160.8760729027381,165.72361888433096,186.84776258163174,148.98402629351023,225.57498357253064,165.6156990489719,164.65016914796797,157.21168921594807,143.17523509206202,175.8885006343229,256.5494777203184,169.17628211880984,139.77598481714068,143.35689478010516,235.67630765993675,249.74661437585084,160.81853491881054,234.92553995694152,160.46660868703827,100.40524795591585,232.56014445836854,186.19126910879342
|
||||||
|
84210526.31578948,181.49334535110032,202.86809865226377,244.54835048276632,136.9738449730041,242.64559251581863,192.88516068260427,196.84616251533382,269.4718436574011,181.06986464027554,144.14859775294886,210.91982869857569,172.87517659584898,213.34077877813428,133.1486908771985,212.35303221779185,194.55898791997387,227.30744709301524,227.87541759905724,167.39779695513508,178.91156548146066,207.38431391161234,181.85070799757568,253.32659885229498,188.17152256450495,169.64988594454744,211.84758900308213,149.92824863925205,235.58682354731573,176.24888158108718,169.49650049638882,217.37305591179063,174.74953899786414,202.85201661738665,193.5160693376018,277.8905470549565,187.2670950717873,183.05075054287582,203.59228640481047,165.07513461416778,236.19514031456447,270.7366297529015,168.42355418403395,167.69085103124792,154.60634244842214,273.64416068478636,258.0338005814856,180.11805761058577,278.42032172541207,233.7866994251505,129.33775326915065,265.3804964094611,212.68048018497774
|
||||||
|
94736842.10526316,217.02513688414712,217.40887945081388,251.11152075486996,215.00448604307843,256.1565973265019,224.0008576076447,212.8130595597369,283.8795618796282,211.2892646598849,138.55405252032662,230.05362433942236,187.6279875877574,249.96236565820564,146.20013928817912,206.56550075311907,208.8869472236019,257.2059580760897,261.8355242179867,206.87350549872892,197.80102841865653,277.776471017718,197.17316929286042,268.01165100222147,203.4566460744248,225.54916045309105,218.13186058967563,199.26705589056675,238.4805408087959,175.1814991003754,189.9897630881322,239.72997726945832,182.3316647547434,243.28823338684282,219.53104953471973,311.4989064796479,184.86817445872234,219.15327002871732,263.98296699282866,209.68148471394403,233.19880699488098,307.6828768263233,197.80799960726847,162.5171639018607,178.39798249997756,276.5798949677693,280.23315778638846,196.88992108264293,272.6524273077867,251.78043104638022,164.45755950780458,283.25361567133024,230.48192281505078
|
||||||
|
105263157.89473684,234.10990122084473,217.3358140208054,268.8515427618641,256.3500708089806,289.30000748000316,220.46724514229805,246.1834609686833,288.6386668810884,226.11516149858028,179.30007085625155,234.0672507999346,268.1379677846491,272.1014634734045,178.48134565848721,226.5820130066033,216.65648028626006,284.4021098225731,252.4125197247455,236.43471294674515,204.11757548017184,303.5332206807969,234.57752460612815,292.4457925383074,236.94915928487302,258.0473381690371,240.2055082372351,232.65203548732558,280.9704074772111,190.91949436093302,200.74843618711276,257.47322858221975,179.76330357426752,257.6140380349021,239.513289012407,299.66237203500276,204.41949953877693,237.0964844879021,278.9093321056577,248.92210537360316,244.09034333846574,320.38562923339595,262.20589205564886,193.83135211368676,194.8333185571878,279.9745952908022,302.37976678306046,240.22126082064702,310.4425104144868,272.914372193351,208.41189419694885,319.2198248177024,226.18180656895413
|
||||||
|
115789473.68421052,249.18590030519914,201.9511145972892,287.2572264526029,267.4537149961635,322.91802707950166,212.04232808382062,253.9414979251633,277.4374415159556,245.63929299952414,200.66878382836848,299.10941293596227,323.05420755093445,315.3025903497871,205.4430230187247,261.71017522486625,242.5056815896173,343.54271122549045,270.9586794495252,261.1006143945076,197.63392752256254,335.2822739715556,290.29829008163176,287.67242846099293,282.4304266203804,309.06754603148164,270.7695217532134,263.9483717367094,326.35631039152514,207.29549197410944,220.54112385452783,317.8353903697303,184.20524202014767,217.4636432468231,243.75496271492042,343.43285579504726,232.37027203174011,244.5884135654841,309.7953513684035,255.47565112826402,281.1268229476303,289.6322519325153,276.7330301683035,207.6586179891782,240.92947756542392,333.9844641084486,296.96585101789054,296.62612143456107,319.80459124774484,329.46133025745934,229.17594435972023,316.6618266349189,251.2355852050299
|
||||||
|
126315789.47368422,275.9971774727354,241.99168589537834,313.75812100637654,279.6528471430913,304.3596527021677,227.34184290662697,323.7295767570798,302.88170019972694,273.12861756109464,172.9358307937175,310.04793748102685,368.80631197984866,347.8431623767948,221.48816011982282,298.2685445692401,268.7836603342331,337.6478552217298,319.1258326544326,275.55963378269587,280.2497144638668,345.62822697466424,299.2109466309032,351.6162366814232,329.6497399978691,311.6484397705902,293.0560598096029,295.722064181378,332.50370447886615,257.1094944529917,234.14091986302194,345.1080124731539,177.2222673440574,223.79750408376688,220.62658190463054,376.7927564058938,211.49110627190882,273.6205906425487,357.2806487849545,293.5841209632539,324.8032245044894,353.39509516343526,295.26515137984154,269.06375472664513,277.3217741280712,373.429230363085,336.0682568289237,294.9049922211348,344.5549608944526,328.5805262115853,262.09640103908794,329.7033198355308,230.26065806833984
|
||||||
|
136842105.2631579,331.1310984597973,330.92062750037695,350.31771540072157,241.92275971545732,349.487158518749,253.6812473855851,325.71914522254895,325.98606509638955,299.67226434183254,222.7171929314362,362.22500239704794,353.7206253434977,329.8485732653134,240.04395158617783,375.9206986509038,296.1192307444986,317.76374119866915,332.76844176856434,266.33326857316194,317.83344951684785,335.4382505423475,269.0314322388898,337.316036642836,350.5986721150763,353.6266821601385,360.60048108269314,321.1663005097751,314.1888553346623,274.78550352903284,272.76473444873614,288.8268783533341,222.18471603817892,264.06045392060713,170.12259723795083,380.4427074806512,266.2235197476074,319.71874896377085,363.64526832351396,281.31195467149115,346.50876504405716,382.92176924897694,282.25554529261717,273.57407037917926,305.9969841263301,394.5893140506216,370.8762834500573,332.9856849477067,363.31866019089136,362.5416107225617,269.99199316788906,389.3279319475728,308.12263722532026
|
||||||
|
147368421.0526316,334.1331815575959,331.26054902420145,381.4375261449748,303.58033058682975,401.2678463894905,278.4019773907939,414.0427268372019,369.7423321454479,282.71922560246696,273.4214107034279,361.65964628751914,372.1824835423288,318.56753216737525,210.9307676490984,356.5701974561009,352.06374148252604,406.5566897681216,333.9498800973813,246.20663672388423,328.3490646717621,387.3416802528163,327.51076342805277,385.68170533995885,253.16799918981133,358.00645590753106,336.27955926702947,313.1054222923897,375.3290775827754,280.4674377538821,257.16616971241797,318.7566214383805,338.27643510055356,314.57207713794185,309.70972638645344,344.50422472405626,303.4549404699717,343.41138735685985,326.9862472304346,333.69467143058125,371.663311329905,396.60525569509605,333.56858942300664,311.51673168356734,330.65806122070535,365.5853701407229,398.0602120017411,327.3454002835414,389.85550036010983,364.51443585398454,333.3668585472491,416.26006033100253,293.7112717917421
|
||||||
|
157894736.84210527,363.0588874188652,348.01363353600465,350.85800904307996,358.95551380325224,415.44099752196314,330.59946830360184,368.9097922313081,360.74902897726463,298.0753274592997,278.57638661988557,395.7277124602544,355.3894465778673,359.38338084745936,248.06845572632108,423.7281991743646,380.3995349169438,397.4260149710396,343.7821053395311,331.15995204671606,339.8947229533975,418.3897465968561,328.4362782049212,360.40792845507406,277.0002353487748,393.35680071394535,346.9166771864462,382.6530473823363,370.72555134220465,272.7918648605036,348.9737502551971,348.5698393718688,327.3293010350574,322.3626938407771,322.2339335977205,383.3609695101049,292.453846626252,381.693540019672,307.4503969633513,295.532632318395,372.8014120229228,380.804421746995,370.52682173821734,311.2337816863674,339.06866188103805,399.200470263608,377.8003832427088,411.4315186640728,384.6761060342234,398.4817227789735,302.1114491755942,435.55380212410336,352.27379932801483
|
||||||
|
168421052.63157895,374.20749622831056,300.8698926645633,393.02563713660203,344.3874844910365,421.54784317227944,386.81971816409,422.2265746111355,366.76998138427734,362.695848941803,341.3760617829426,410.4994723961954,423.44265975846486,403.233764142541,274.3367646473597,432.61980298219294,285.7924808557674,336.5352416698952,382.75353345976646,359.34346648002264,331.80881489280847,422.6831429605669,285.9784820126034,425.2275680465382,365.71028386060556,381.67256126641564,363.7023679331729,296.40756576411275,348.2884794708104,262.71160837818053,352.1786316649735,418.6676336827371,348.0470139115141,296.0973640227912,377.61352123778283,400.28931817453656,334.1318756338632,400.72983025447814,348.9818155323039,326.57440268762224,408.8445852800089,387.8502867281272,410.54368397403624,361.45651005906086,389.7398920138457,433.7308813052825,410.21367164992233,387.79224080458243,412.08511918007173,354.6767572260299,294.55531499656615,423.0860895542557,425.9015865008917
|
||||||
|
178947368.42105263,398.01404096074714,343.0975108570174,352.34444543983466,341.7874634552365,344.21429625739677,282.29907252947044,438.95200730748786,415.22266582224177,384.4037126287859,316.2727703955695,444.0847602010269,437.8799759261826,456.83454141136355,312.02938371863723,405.02045424146337,416.5735928360115,400.2747476744025,379.6909807688477,365.98834967109633,402.3423815390245,401.33779452134365,424.40306465158505,359.80635938188703,393.3847072848488,426.8554985335824,391.3710435760153,373.8681989105785,343.2895198200218,295.2621848998473,288.74132352192316,461.13590629064475,368.64064342188044,426.87068316637647,418.08778811112006,430.0503562543531,258.3060365077366,427.9050599097215,390.7223211107492,377.3263620782922,409.5508105064693,433.06401254760925,387.0293493639894,390.24354464632984,344.2173246845314,403.5795052540269,392.1088207283178,415.9190718847794,417.3374117802715,420.3628288448022,241.277399306152,403.91413826881353,394.01589216485905
|
||||||
|
189473684.21052632,385.2378914937418,323.18988571256153,371.40428784943686,337.7934095258528,409.0873669016064,350.0865441392333,386.67445789958634,438.3240877908353,348.0832089961731,393.9125860939396,441.1994237678533,432.8470368479428,354.38452062448306,362.3283076091486,423.22816584463595,389.48347414299394,460.3501194213566,376.732465519139,371.7489857191524,347.0449151358776,438.2062873435813,311.6641447796716,437.89386574119084,280.44927952890583,417.02974923363684,404.2108649728701,354.4117709971201,362.40806556325873,341.3597019408878,415.0093542162401,422.3552487041811,434.64257056082386,406.5943000494113,354.17875053430197,432.9376538988626,324.46613526938694,447.33863975830025,413.11104356450056,381.02392887606845,348.64680599885634,445.56152537687996,426.4302560138901,325.9977320700141,394.3317614556019,458.34285525601985,444.51259330237014,449.12709821211666,411.45975269934473,453.96155503450007,275.59770568850297,439.5296161207796,435.0594251375449
|
||||||
|
200000000.0,378.5197869319665,301.70494612737707,399.60696692372625,422.10956559526295,445.1377922955312,357.91642107304773,432.5184326407157,368.815842484173,417.42577834505784,376.1266447588017,387.5552142591853,417.07823802295485,486.6005549744556,263.66496690950896,464.6004346781655,411.08793175534197,463.1029889740442,399.80591059515353,385.4797520292433,410.11976740862195,460.4791630302605,359.92517642598403,453.4604589970488,437.12201295871483,449.5832874790618,429.516524446638,416.6843957383382,453.37773589397733,293.39611906754345,441.9698517746047,384.04286868007557,369.98577942816837,236.3446388048561,420.255173156136,461.411853818517,378.4628292855464,473.1667629103911,403.2194635993556,362.9362511697568,385.4418494277879,425.4589729607105,410.18619725421854,390.9898659649648,405.15032039974864,474.73755304907496,440.46052292930455,462.5142033633433,326.40245916812046,401.2219400923503,360.6054227493311,468.72995001548213,437.466227181648
|
||||||
|
21
results/AcrobotSwingup_large_data.csv
Normal file
21
results/AcrobotSwingup_large_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19
|
||||||
|
0.0,24.3169002532959,30.271114349365234,14.60433292388916,13.657001495361328,15.904391288757324,25.168766021728516,16.74152183532715,18.805774688720703,19.843246459960938,17.792728424072266,28.703142166137695,14.102890014648438,19.939960479736328,28.929645538330078,21.75873565673828,16.539203643798828,22.264387130737305,9.800703048706055,26.894725799560547,32.5867805480957
|
||||||
|
10526315.789473685,25.762511829450894,32.70700162324095,15.36123117126287,14.20298699588822,16.977084233606146,27.206356181229086,17.98280382835902,20.616100748913194,21.258175612259606,18.936890379843042,30.64417727490232,14.807493751677697,21.088349884453326,29.458373642250525,22.333996846869464,17.71215568628379,23.927833025356616,10.598851963769325,28.481777491094416,34.02556069740133
|
||||||
|
21052631.57894737,52.802071999108364,75.04354749463107,30.267817541069437,25.288711316099295,38.012291869298245,64.85328723063017,42.042211070986994,52.47638418300495,46.05687695710695,40.28956040942273,65.02992827057261,27.856611354529363,42.9228491768695,41.54559324632226,35.195920338075084,36.787317652822864,52.719722148247705,26.782328497897062,57.22022142766916,60.33896303517419
|
||||||
|
31578947.368421055,84.52866227786545,93.41449945824546,53.97153302651529,45.28002020109889,70.69617120562499,99.69342987901865,72.42625242659508,68.00371379939803,64.11864963197603,62.46243620925994,89.8273081147316,41.82106544763634,70.481348370189,70.67493571762068,66.56983537647946,36.14575144182117,68.54771589807693,51.84800787573459,78.83731343615749,84.86028521503852
|
||||||
|
42105263.15789474,136.46442768068525,118.56856599243724,84.13958934849319,70.35050390516292,110.45823494136499,130.1910540372381,100.96277890145944,82.83822812697234,112.17398193189642,86.58933386214883,132.764583095289,62.33591435507064,114.69852565588053,96.91321978394015,109.08007701034362,60.729169925989545,108.36251401166507,72.40169971529137,109.2776334890038,120.25627023031177
|
||||||
|
52631578.94736842,191.51177546324163,137.22077021597164,106.23935222382195,87.97524075899427,124.61782275210457,150.22093490071575,156.29813172224485,111.41645518593346,140.3074722253302,115.78617410634197,142.5488815822347,89.10208167889029,173.57055290959713,139.70335072068463,164.9673718009299,105.62017759594065,137.53337764397386,112.23756559820552,160.7608490121084,142.42668136504383
|
||||||
|
63157894.73684211,245.1352228659342,161.29114831633188,151.03451491021386,114.68412031303485,142.5274221086436,173.06202095614907,201.11275127546608,132.21238535436237,151.62592553242092,167.03167329039272,160.90778474282692,131.00744026156343,212.27889946763537,170.4728685084968,184.39648098100255,146.1250135044147,166.03865516903988,167.42173638188612,178.72940225771262,151.3985947890625
|
||||||
|
73684210.5263158,249.27998496492978,197.6212415887956,193.27252344361963,154.02377205500974,154.01569777826194,187.11299054287477,219.33901325359406,147.0810632117485,176.47623613344666,205.58722066045468,190.6559267956712,166.33897041415906,236.29937467821088,194.84835322436535,206.21267260978428,203.2651285276106,177.8257181515654,236.9242750494599,203.80678897654417,168.88351923441954
|
||||||
|
84210526.31578948,250.05047207426827,210.2934632373979,213.88962149289836,180.53644787010396,177.26351460294381,207.80069080746406,276.4014260207517,200.0965092172253,188.13320777951185,211.06099299942028,201.7714645083261,223.98842413636785,238.30537913182434,223.48484866506837,237.02326212034993,222.3086790936145,193.48866763801786,264.0820230691387,226.79453508213288,172.07964834744251
|
||||||
|
94736842.10526316,294.5232759923816,223.19554733751224,235.73416188788548,213.279825863787,189.82826234396146,246.07355888324102,297.954089518234,214.21506824222627,198.96135158915268,208.75851002686406,206.32593528183378,230.2881534641552,240.64756978346534,241.2582411316128,241.21360000629505,238.9790445958313,211.31644999138868,300.5051646675925,252.83925076551384,176.75785493268697
|
||||||
|
105263157.89473684,312.4253567648727,238.1863949382239,251.5679695626045,241.14316543235012,223.94594137275647,303.7945728098917,332.209810426856,233.15935256581886,203.5995933150981,226.25263951648634,212.51013239400868,243.95576424611903,279.6204641749489,247.2354274813158,240.10190256480695,256.00152441380425,247.13229266560307,300.38466452016723,252.686404169679,187.82689217228307
|
||||||
|
115789473.68421052,324.32314211856624,237.47856798784554,258.63967081153163,257.984210342499,266.8685342215022,340.9500689688152,351.23310166845033,248.54146900318997,211.9989426432389,224.41028554192707,238.99214235393953,281.8342053910867,345.6648225276589,244.35925231700625,273.7139865590925,253.95985308863285,265.05688116474494,311.2039019866663,284.98313567029965,214.07060069382356
|
||||||
|
126315789.47368422,328.4579964219038,265.98104875734975,243.26051586998463,256.57419957199915,244.8864680743283,318.77284372810504,394.7993501223356,261.81227400956726,278.32483848772563,292.56170795465783,240.0545474553372,290.1369752414999,386.78039891369787,311.5420004150875,278.0904132616487,328.6489255517475,295.55978971009773,345.9444580137565,268.62290552349293,258.3312623302693
|
||||||
|
136842105.2631579,386.5121980803166,239.07506338521378,331.90584531356274,323.1668021887376,212.48024862327733,352.3716851145938,413.4421160075803,290.00070445260184,252.08410596665908,340.22277878592223,289.2531877444475,287.8541851756976,330.58639109085135,289.1148046866843,312.307009290295,360.1367129089925,296.49360630675693,365.03487656733995,279.28444127222514,261.75725084163474
|
||||||
|
147368421.0526316,361.05900273990096,301.8800847148962,253.2253218192472,305.0527645443615,276.21071555277655,392.3359279617709,380.6932613095088,279.04291102760715,264.13008727525414,334.346609600363,337.41002282211326,299.2138455381354,384.5172859598726,325.11798095323377,396.3171929241548,367.8603964660967,320.75403278058917,387.44701564031294,315.8281090880365,284.67108203532626
|
||||||
|
157894736.84210527,404.4930629034287,292.4332255012771,288.68266117250823,308.6885727366582,286.4357508496069,383.74028185175064,380.6973844009588,318.5002830120996,278.03917474652593,317.5929606722002,344.69870302832356,295.88010700851925,375.0334378563466,351.6014518791454,381.85084546875424,315.02968522658637,272.46775056103917,428.92158800635974,405.14476261558303,302.25783008427834
|
||||||
|
168421052.63157895,374.8085286808806,355.71886519117703,309.6698876100894,287.3859699188507,307.9483720113696,392.82563340829023,439.3696628633959,381.80300312095073,307.7508173385155,337.42833154287365,338.2521870658008,313.3624106510194,453.6570545312771,347.35719539584215,357.43786984863704,359.55321236792696,367.05916958436416,369.01141390443837,379.42239585939865,301.02159678440677
|
||||||
|
178947368.42105263,460.9219853769049,332.91101660160473,328.08016123178925,334.6597258260376,306.66642621894295,396.5225643712067,412.2956154452136,395.0246918517632,371.71390475130477,396.97516485173617,359.83094309480896,393.2392986393702,432.46183522396467,373.2472983816654,418.69614904384207,391.24885557430935,359.5810155994014,374.80882815921734,417.0380227778757,348.08028226843174
|
||||||
|
189473684.21052632,445.403046762514,369.06941627366393,325.17104752027427,389.51752933570884,359.73629539428987,442.19938579870393,348.85825205658284,388.412877708756,385.20297224353226,301.5269393285226,433.8218935376389,395.19299865297336,401.06453073916344,426.29952765369677,405.1504339382589,426.04661484025524,398.5646058910441,441.7296423570271,345.92167439503686,388.9061690963536
|
||||||
|
200000000.0,457.7804379604365,413.48223425055807,382.80279507448796,318.8119400786726,300.1966344792592,432.9982435436625,474.76589046967655,414.3723743855953,365.78045196125385,396.0776034248503,360.99074424567976,430.58092873504285,448.0698297494336,371.9452913874074,464.32688662566636,433.2630072267432,408.1235684172103,456.26760264917425,381.1985447312656,332.8810343899225
|
||||||
|
21
results/AcrobotSwingup_large_data_no_aux_data.csv
Normal file
21
results/AcrobotSwingup_large_data_no_aux_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19,trial_20
|
||||||
|
0.0,21.610992431640625,12.831076622009277,22.645374298095703,24.391294479370117,22.992084503173828,12.81808853149414,37.963375091552734,22.544424057006836,16.202733993530273,23.449268341064453,13.088092803955078,22.53523826599121,20.679716110229492,34.1524658203125,14.650665283203125,15.53592300415039,10.70332145690918,20.658796310424805,25.913219451904297,13.82458782196045,26.401182174682617
|
||||||
|
10526315.789473685,22.41193523967943,14.45298730078672,23.397280299009296,26.074390311160453,24.62118378108228,14.982204308521798,39.6959994980276,23.05934070625505,17.743822235918483,24.85454470290702,15.142398085476124,23.36049636796509,21.687236010001513,35.35177344593563,16.17592492626356,16.165059293364884,11.702182483944496,21.705337917359955,27.303222359103923,15.15046654306507,28.6969198039333
|
||||||
|
21052631.57894737,40.79906564278121,44.02001809855082,38.13520043229793,56.58670376521894,54.53531184691389,52.66748450040157,69.97559111261963,33.301039160794254,46.1014306535351,51.170813437623494,51.04267114378161,39.4623782393262,42.28172382291334,59.688220804954504,46.20431178996025,30.19921978959334,31.83290208050584,42.20484041172877,50.650370432791455,40.19813596962892,68.82591259586845
|
||||||
|
31578947.368421055,83.48029852798854,71.42716294209218,60.208085722994284,80.64523967400896,83.58353635631606,66.8220415576632,87.86678010122104,48.60553289235794,76.56246859421361,82.17909373782597,74.726910017534,63.86109954232243,78.61012017528768,103.84328855025143,83.48160858751582,63.033240238550306,65.85023978386602,70.2576532346555,57.68446594735717,67.60731699348652,88.89712046165216
|
||||||
|
42105263.15789474,107.05441837058173,109.22754788192356,84.95466328476274,111.74366020561916,134.69698853307816,82.91211065897652,122.68001001297272,67.64301741374993,122.96247341213464,122.43333788616505,131.86015850668798,102.61340505000297,115.44744241105553,159.3875340485837,100.5190369061651,92.59806219560619,105.33561165469835,97.81943799138399,96.7416164041226,95.61071000204853,116.03128647597873
|
||||||
|
52631578.94736842,139.6138235641401,121.89494468998215,103.29961518728666,151.2265343372594,191.29332295303695,146.00007384315833,132.88405823802518,97.6371394774549,135.39485554203102,127.16516508302365,177.18048834041215,143.5402870183176,134.7923942197145,191.73671049630872,144.2158865408554,138.75660055933565,135.29246141181594,148.02283831539245,157.73530145715972,130.73649917939198,133.9815715961055
|
||||||
|
63157894.73684211,170.3429649975822,143.2800540683012,154.04453367135204,167.2998684477938,205.39389118874175,201.9366151853116,148.99072973178366,110.19512084299839,154.63160377476686,141.2415219756705,240.58056296965424,160.34284779445946,147.13358185132788,220.49876853939242,160.96652047381508,218.8542649250612,157.85891454801006,216.7285834085248,206.39621523418916,152.34932186175914,160.14713825826172
|
||||||
|
73684210.5263158,228.5315419176279,169.93997071390336,211.68098963367808,179.46582664437904,245.2006250089886,218.4695902901013,189.08388967708868,132.20846538450002,171.50688353213908,164.48499347917593,276.0288330216652,172.6474497385963,183.64116299082204,240.7638072047042,211.62110029222896,247.61203689109587,196.17974738863367,263.8485144171606,253.31047026032888,164.71186413560218,182.08168633203758
|
||||||
|
84210526.31578948,259.1321320038423,181.96615564261776,239.02872035311862,191.08008256546347,274.609939149212,284.748096922428,206.4433287751972,149.69543390657103,199.72760233522453,178.36193088679434,304.4095695467867,196.29681246465594,200.0162832700645,257.96001057545567,222.94310007267052,267.55511673533687,207.7870992610329,312.53448946654305,297.4343499430659,175.57665131296807,221.9817456261273
|
||||||
|
94736842.10526316,296.6687648603461,221.49823807146907,277.041304075074,212.18550774665587,273.18764455166547,286.15569140359634,200.71863160887088,187.36285618541973,211.57831607383375,199.52524940764474,305.83255155710634,200.99791841795076,202.30563136477053,273.06776336083124,232.53886667174646,291.5535795155324,234.56952473595533,351.7853661546912,289.1564731310609,205.5126913332708,275.84541509793735
|
||||||
|
105263157.89473684,324.48257222053417,231.7651404156414,306.81241932536096,269.255733566271,298.07831579859567,298.0356811255299,230.61287658003226,209.10014182073257,234.28644683420492,207.38414472325027,316.3947759179858,251.42642624399667,195.753275454292,310.3774922542955,285.31686083415207,321.5605350861589,275.34006026610115,338.67813204018364,313.65629755294884,210.4008385203882,301.076096476115
|
||||||
|
115789473.68421052,367.3442906617623,219.84164580050597,345.13440378972035,332.7896038771006,296.4041903067659,306.61689214098817,231.56420993747145,242.10165482037118,235.5086769459155,212.10088086408922,366.38260980961724,296.7957857188426,205.015015164901,364.9787299211996,310.7806308968741,357.09526838845164,272.329053916099,328.55025403395587,320.15509409389335,217.62655573158713,327.2179374358
|
||||||
|
126315789.47368422,387.81980651237296,242.4190111754674,321.5440418274448,310.4686062699209,295.1930680030601,294.20799525986087,248.3542656792829,276.94726205037244,253.16322388576341,277.46598616036033,416.3253717597502,314.3584618324058,307.51572765141657,393.82295390179286,318.7585819233157,352.4195145745026,272.8340030000481,350.0113900269168,352.67373348735384,247.0423063241214,377.89684267453543
|
||||||
|
136842105.2631579,360.5452750781731,297.59659717750026,339.7523420553789,319.1244146493664,339.63422903071813,364.65336778396727,277.89726539818867,302.21982924289324,305.09305035184633,260.9120794635812,410.97650443186717,319.3477561794985,351.2585127518778,429.39801137153466,365.2351153790126,352.8615815582533,274.4488868510294,378.97896192897724,360.52939680969945,304.4890717450437,416.87276996304786
|
||||||
|
147368421.0526316,423.4084374450581,262.2428390258236,325.90747956622005,304.740076175687,390.4999509633744,355.2166017303836,245.75528988349467,314.93174012050736,299.39047493218055,253.49294891499417,433.6390623402398,337.1239616501034,293.587706336024,400.1815115421432,309.453990480411,410.9386006950342,290.63954413085764,378.5202755515265,377.21458991247533,359.4732504105634,423.34830109861747
|
||||||
|
157894736.84210527,422.0363495603327,264.8404110479388,416.66744748220225,339.34646954661923,370.2233793194769,375.4528701961701,298.73418296027387,419.43396981064643,294.9413085915871,285.47883170252857,413.55021470570495,352.7250564147891,345.4000576292049,387.517175076163,352.57678318378674,366.0361401962936,315.60907201778525,428.1646152990677,428.13534441763676,323.58518047189114,441.82827221241024
|
||||||
|
168421052.63157895,451.0411295811556,311.6226501227086,445.36453021992605,446.4801021890297,375.01774800715356,427.4875720795502,260.1504218003756,434.43734819671124,361.2766098698751,314.59049651101026,461.1648308827937,397.9799725447996,401.75175168705783,412.2983792138562,425.8300286990454,391.21541197742454,310.95432871002237,366.61256378311197,460.27283658875655,348.1527973093155,401.4362179130068
|
||||||
|
178947368.42105263,479.48699818142893,340.0148061013618,419.57024528651687,474.4810860798465,395.29241936664175,432.54540615819826,311.9640049630585,445.3570511786892,397.2670020097015,332.0863424083037,364.2173112567276,380.08874290496357,330.054766972557,389.05330242369314,454.1317445924738,462.27170595130434,346.9222939464359,463.6604668631779,403.89540820636904,397.0810687823474,450.2487043713599
|
||||||
|
189473684.21052632,413.08813853640305,394.0170016808853,435.6358130097059,431.35094690075215,403.11936367383623,433.13129679565617,346.3964480444005,373.1244920064538,455.71847329014224,411.3638133338944,415.65434852118636,394.58050807526234,447.70624533997346,397.5156592883562,394.02988219971144,385.4444457629381,341.7352494876471,373.3940197354869,450.10979184467044,391.59509962830185,441.37998921058846
|
||||||
|
200000000.0,453.1080168030764,270.5458061036311,433.28080914051907,474.24051839113235,374.2362935433262,468.5098656776704,343.5146443828156,470.801119156574,413.1319475017096,406.00079163124684,467.3962741572606,396.4504235559388,406.2538125954176,420.8539507294956,453.02776546227307,427.4598434771362,317.38688211221444,449.5443787057149,426.8843891855918,346.8315834183442,442.5394962022179
|
||||||
|
21
results/AcrobotSwingup_large_net.csv
Normal file
21
results/AcrobotSwingup_large_net.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9
|
||||||
|
0.0,14.997875213623047,22.64777374267578,9.069900512695312,16.02065086364746,8.749673843383789,7.985396385192871,19.399980545043945,25.466238021850586,10.052742004394531,5.448733329772949
|
||||||
|
10526315.789473685,16.421361143084198,24.323532428437655,10.503180479243852,18.04650749460668,10.653193803617624,9.970870268995515,20.84834999952806,26.65931922342329,12.512713234881947,6.73330184546173
|
||||||
|
21052631.57894737,43.786364010496484,54.70056261741389,36.34793663522296,54.9251366256842,44.66604045202693,47.888765687138424,47.09318577062721,47.46074067568019,54.11783599948454,32.433691136848566
|
||||||
|
31578947.368421055,81.4291699336135,80.37104636868283,54.112331292522335,86.19867213408868,65.76718337053738,98.13559486892417,68.48915848791229,61.90312541777739,64.06631099612264,75.72074066783584
|
||||||
|
42105263.15789474,129.09014698730942,115.75289394700296,76.77785130980273,127.82936567406576,88.20409984651366,166.64402191427607,98.09406462823586,103.9076435153174,91.3852965499886,130.47929174649087
|
||||||
|
52631578.94736842,150.20228605112706,136.44764815799252,136.12977260640122,171.5069108321396,125.93061622527333,200.90067326048404,150.78127188349035,144.71472782709756,120.93527968409484,167.46285533029948
|
||||||
|
63157894.73684211,171.26101931351704,149.6937772024041,188.44195035760424,212.81370249407115,147.7683667471204,213.70612012671305,231.04858050080549,196.19003152954613,165.7872897303996,188.89658950298116
|
||||||
|
73684210.5263158,200.03432094245903,190.0013426940197,213.74224950137892,271.4228479868818,157.33899557309798,233.81502917710267,237.5772905211452,240.1017610574363,192.9443470110441,204.56975645499713
|
||||||
|
84210526.31578948,231.871344522426,243.87249409657107,256.36375859371515,308.2390486883655,178.40187106337245,246.52417189592802,232.52239951664723,301.094158884561,210.77860412472174,221.35252691999366
|
||||||
|
94736842.10526316,272.0726253861288,272.26729493202265,296.86755699216496,312.9224369275768,209.70951966132318,292.8027400372761,233.73923662915786,370.69949457866664,250.53326213194723,239.97383977262268
|
||||||
|
105263157.89473684,263.80536321697144,311.40825209541663,292.478055374444,364.12767256676653,232.32620461140644,281.1642440796889,270.43256771448904,364.5955663533092,309.34206971815087,315.7421812136913
|
||||||
|
115789473.68421052,263.4286612659032,368.9433611179818,337.2539028925744,374.04440748642025,249.305725843416,303.6707088641017,318.8717477775677,371.341216411617,333.80851950448965,306.72116583866426
|
||||||
|
126315789.47368422,290.39447543924865,393.3537955726613,353.98339856331364,384.85330331028337,278.4916819307613,310.6599941682947,307.7627760822423,355.27924364227334,363.4503751289812,311.8980857685333
|
||||||
|
136842105.2631579,286.2507367091991,345.88199391647385,376.70570696671587,386.11578597883766,307.3992125968194,369.25360026823523,329.67726773708813,340.7835824281555,333.17629646726584,337.07161045272596
|
||||||
|
147368421.0526316,283.16624622860115,366.9028443401871,395.8961625282454,423.6658177455047,319.46504163643,400.1386920880743,400.0631105944391,390.9987303386765,401.3115719381793,323.7255555939476
|
||||||
|
157894736.84210527,310.7397505134922,398.7199938134639,361.5912186629719,400.0206518485275,331.0902439907317,406.24151489287203,424.18774740220437,443.47634984713847,416.64415379268974,316.58676440563886
|
||||||
|
168421052.63157895,317.1652906938273,353.31107945587496,402.1310449663622,444.15464952315654,334.0951133395166,395.45394100442815,432.74129373148867,443.27765228741714,444.0412761986751,349.49186711826485
|
||||||
|
178947368.42105263,344.2673961201533,466.8299807719577,412.93315888177654,438.2525572678554,390.5275672771594,479.9510474495611,437.00640374455094,437.52690303796214,438.4576053308153,354.47546324786055
|
||||||
|
189473684.21052632,351.79817867691827,389.941742487066,454.1092313133448,453.94111891747184,364.4102775087977,449.97805472226025,445.0261040863238,369.98232314609754,445.83513682981607,361.7291163216007
|
||||||
|
200000000.0,372.8020444449626,374.4920755609086,444.57303354457804,499.9412924349308,264.1695065106216,490.0502140271036,446.8819844769804,470.0402107317197,378.77983861063655,353.42701962433364
|
||||||
|
21
results/AcrobotSwingup_large_no_gauss_data.csv
Normal file
21
results/AcrobotSwingup_large_no_gauss_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2
|
||||||
|
0.0,15.827186584472656,41.008358001708984,20.326847076416016
|
||||||
|
10526315.789473685,16.102586151376737,40.78576780318017,20.700928504932207
|
||||||
|
21052631.57894737,20.984169122213473,37.24624818101154,28.7622204513142
|
||||||
|
31578947.368421055,25.82256714434175,38.424179403952365,42.341857699176856
|
||||||
|
42105263.15789474,39.48025258162015,40.716315923841705,40.002442529079325
|
||||||
|
52631578.94736842,43.98444773285673,52.99415687117674,45.703030203930886
|
||||||
|
63157894.73684211,39.27736930485164,55.82250727950453,49.03863238927153
|
||||||
|
73684210.5263158,31.23113632902833,46.201162365520595,48.75332018531797
|
||||||
|
84210526.31578948,31.826514436349974,51.90484813293262,47.91930922213684
|
||||||
|
94736842.10526316,32.53973341677493,55.130621864824164,50.7900108620318
|
||||||
|
105263157.89473684,41.36943751460049,53.401270436117855,48.44223592207455
|
||||||
|
115789473.68421052,44.714684227558884,60.012540589130566,60.92519295511772
|
||||||
|
126315789.47368422,42.98434026097657,59.22982636841215,61.84121357259492
|
||||||
|
136842105.2631579,62.99426908009892,56.587936767580764,66.59265598977744
|
||||||
|
147368421.0526316,77.92241949874939,61.252681688609385,90.14325877082977
|
||||||
|
157894736.84210527,70.37777244506037,77.21775193396864,78.70447894526319
|
||||||
|
168421052.63157895,74.66005499673352,77.70293356441064,72.44301822938417
|
||||||
|
178947368.42105263,71.79797424576206,105.96007828187416,88.03305291743573
|
||||||
|
189473684.21052632,67.53728627341276,144.68886711541305,78.867013961861
|
||||||
|
200000000.0,107.00897326241983,185.26694365394744,75.47260350067364
|
||||||
|
21
results/AcrobotSwingup_large_no_norm_data.csv
Normal file
21
results/AcrobotSwingup_large_no_norm_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13
|
||||||
|
0.0,34.07563018798828,27.39107894897461,32.80567169189453,11.473906517028809,32.6915283203125,41.52360534667969,10.221007347106934,26.018232345581055,31.15937042236328,48.56639099121094,38.697105407714844,37.666847229003906,35.27167510986328,15.40784740447998
|
||||||
|
10526315.789473685,34.693340425057094,28.466272316099122,33.767087112193785,12.005439707666243,33.91737214306632,42.32214588046569,10.915249518949508,27.72251344163105,32.51209914389162,49.26564487689007,39.760808047743055,38.449470154290964,36.636273307111786,16.540240736355763
|
||||||
|
21052631.57894737,46.605396069317976,47.38682833124066,53.10789091414527,21.82512508974594,57.11047710593883,57.38776610069328,25.719181836992917,56.66012974326961,56.70195945948775,60.52967690337976,59.367012935951145,52.29420167548257,62.194769111407766,39.843515834154516
|
||||||
|
31578947.368421055,61.968588091887106,58.24731394931303,85.26092298027552,31.33459617987977,81.91333190423961,77.7057583341143,51.54766722090563,66.77549569180799,72.17442518026876,60.328853091467536,75.74856087581934,64.19739945540385,88.35469828818479,75.29088707734674
|
||||||
|
42105263.15789474,74.66357869405165,75.67919393754732,113.25921304214364,45.147771838711904,102.20273710147497,108.11046912531444,59.752879019919526,96.7805045069585,87.43265620195965,78.97479865788753,88.9880455654744,96.83215563746371,118.5438042279756,75.7881770619395
|
||||||
|
52631578.94736842,84.38171850854835,95.46336910881907,122.19150933140368,68.83146035619637,125.15598079184498,121.37136074962544,85.46424122311566,110.78881949954086,106.9781938457134,102.29312318148622,118.2612948984403,127.04891613806879,165.90072834355513,80.1240680132505
|
||||||
|
63157894.73684211,123.67189274468252,128.1529060870079,156.09194638111586,97.64308609146822,147.72156348370453,170.9338825688468,99.98624851077878,120.44933217999183,117.98263260495133,128.94726856639016,172.74328770242906,162.9343608088606,180.4248389148481,132.76766757320337
|
||||||
|
73684210.5263158,189.44089225260672,174.60681953382294,166.6089508168585,123.16339941591107,204.33567395748526,203.2163374052071,117.81474217694057,131.1928168328639,123.96500625182719,172.14135926850452,237.1300986244903,192.18679065172694,182.64418950817262,191.21565810836586
|
||||||
|
84210526.31578948,253.90335122733237,209.84569630761555,182.1127264123214,155.9007204186553,223.43832868915516,232.48352513551052,151.67509922624626,159.19444194923148,148.44663414697567,188.46213115516463,244.5940771317548,207.41653629022952,174.54944984569443,230.48675230830662
|
||||||
|
94736842.10526316,282.23801198502656,240.32092558165337,215.28548624426375,222.94720855850593,223.134930211091,248.48728750918545,168.44175812073692,191.83701406423404,153.52879191658502,245.16496599331458,257.64387554974604,234.5667038270146,217.1242185006759,238.60946873036778
|
||||||
|
105263157.89473684,305.2189948101453,260.2947119100437,220.2285285872766,264.80692383688245,272.08870855237967,277.20314323959917,175.79611189601493,233.0521011908959,197.3219953489766,299.6245480924432,260.8581662620534,274.84370013006503,291.4581044337756,238.92622016250593
|
||||||
|
115789473.68421052,306.8129309732828,273.96435129427846,219.93118351400724,298.0797184146837,298.53007116690895,310.30800368003236,234.46898241010894,269.80816667025437,235.87325119922696,296.2212512142275,331.9783073717372,290.51499383807845,302.25550691275714,252.14250298376888
|
||||||
|
126315789.47368422,324.2546781159504,307.9851759500451,290.2495415382439,333.827041007145,300.6597147621937,330.46183921359585,274.2407316411962,292.814144066166,282.712822567393,345.21611341049805,345.9078377211192,320.2862079856799,326.65752984744364,239.48515251534795
|
||||||
|
136842105.2631579,339.4164805219114,311.8416078567835,343.17631575234054,338.82721692043,319.2855823922521,340.98611734473144,273.45342522248666,315.33359135369517,311.196819595683,383.29897767140267,295.34617374172825,338.84044231767473,246.53617612712094,270.37147120673245
|
||||||
|
147368421.0526316,329.71180019649444,344.8635403893661,345.2689325896987,360.44592779395987,340.9938912584841,352.1536906028058,330.62013896465965,333.79402634509717,312.9454590630333,404.7856094171796,337.3338157097059,340.11112961709665,364.9926313684424,293.35603970239697
|
||||||
|
157894736.84210527,335.3094508413463,285.7293966853387,335.0984972187852,350.11661431969367,343.54242439910644,365.5857397980802,308.43075415764486,326.7233897539222,294.8016482623,363.86529065829563,347.4976780896702,342.44104630200815,398.03893738060447,336.786482956601
|
||||||
|
168421052.63157895,349.09581163649415,323.6347668943643,372.01395059358384,359.0228395646959,391.23012770840336,389.57356282152296,341.8318839046764,323.18437809164834,335.49868272216037,388.48498515871427,391.7063230794553,353.39474565368613,436.52063496819494,336.22423563241296
|
||||||
|
178947368.42105263,425.58226314385183,359.20615979121004,467.4081493313128,370.8203348896345,437.96621125812675,375.5092861071188,332.4503836569859,343.17310706548744,319.7443415611735,387.809891144242,424.17714543165926,367.9691014586228,372.8765975498592,311.2588231146666
|
||||||
|
189473684.21052632,401.3172480181644,451.3966784729852,373.37162428764094,432.3465845749979,378.6558635910132,460.46913416696054,421.02904573669065,416.94284818542303,415.5061075892145,469.5598077610589,458.18582062542936,411.85529273996065,470.4415187763045,411.9905258696165
|
||||||
|
200000000.0,424.9263538373144,302.3178994608553,416.2834781361254,398.13912684666485,415.9868317575831,449.3516834597838,422.10039517440293,437.11209053428547,389.64984067176516,426.2361208357309,452.2273449348776,407.2533179050998,441.1629843805966,346.9261227673606
|
||||||
|
21
results/AcrobotSwingup_medium_data.csv
Normal file
21
results/AcrobotSwingup_medium_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19,trial_20,trial_21,trial_22,trial_23,trial_24,trial_25,trial_26,trial_27,trial_28,trial_29,trial_30,trial_31,trial_32,trial_33,trial_34,trial_35,trial_36,trial_37,trial_38,trial_39,trial_40,trial_41,trial_42,trial_43,trial_44,trial_45,trial_46,trial_47,trial_48,trial_49,trial_50,trial_51,trial_52,trial_53,trial_54,trial_55,trial_56,trial_57,trial_58,trial_59
|
||||||
|
0.0,11.785539627075195,20.944839477539062,23.589067459106445,4.790501117706299,22.078189849853516,12.267173767089844,14.26957893371582,21.009227752685547,19.944807052612305,18.501026153564453,25.534420013427734,7.656429290771484,11.978395462036133,30.33766746520996,21.306434631347656,19.665122985839844,14.038528442382812,23.657506942749023,23.632793426513672,10.11609935760498,12.907320022583008,23.30079460144043,41.12226104736328,11.626574516296387,15.905196189880371,7.155399799346924,20.783321380615234,15.256168365478516,14.738500595092773,12.945549011230469,22.072715759277344,18.5211181640625,17.723697662353516,20.409257888793945,20.436363220214844,23.49307632446289,22.489397048950195,6.639081954956055,14.212964057922363,9.581293106079102,10.174055099487305,33.60285949707031,29.099143981933594,23.81216812133789,20.28188705444336,3.913780450820923,7.905327796936035,5.953383445739746,19.593669891357422,18.583890914916992,11.005389213562012,8.893831253051758,7.85148811340332,34.27123260498047,18.74211311340332,2.6329994201660156,16.824853897094727,12.390493392944336,14.423970222473145,11.830799102783203
|
||||||
|
10526315.789473685,12.521245742531447,22.256282868632567,24.55918144395993,5.219488091394047,22.75625219845169,13.046124171811334,15.63517742965711,21.76460932920638,20.91708797276784,19.44213320676062,26.659236155708328,8.161998571069535,11.932931026639906,30.80523804897685,22.326499849258617,20.497244311377944,14.615842791815957,23.89519117160125,24.992365579773516,10.1887474426267,13.24259702148757,24.313732488380122,41.98179503458029,12.7265004822004,16.209588141985485,7.572079551724816,21.81090338517837,16.833174458160542,15.520192655329684,14.400838793583029,23.092565321918176,19.737784632953748,19.30846388389323,21.658559575702718,20.937224207368583,25.03969297730403,22.628890859293353,7.072514006635819,15.42048837645676,10.056394211813766,10.732709103385167,34.123126862885385,29.98765126550751,25.267955187006425,21.884178240863488,4.624378612145674,9.476415969408183,6.272993208236807,20.70372067752018,18.8132984805233,11.570610807282423,10.485109203858844,8.054299781924511,35.11227128675357,20.14071335479968,3.0408982231341035,16.79412118251593,13.366995056616426,14.714745934430892,11.781103757573833
|
||||||
|
21052631.57894737,27.359905527455737,49.479318940552815,41.858806247835844,13.839042351031651,34.94364612770378,27.764799880612014,38.974657996663424,37.03851566007593,39.81399748218439,35.757845150239746,47.356898906942554,18.77251762203315,12.216029037987179,39.274909262553145,40.153752324674436,38.17362284631445,26.165192488838926,32.342252766326524,50.03848534758484,13.340847326829577,20.57834614280849,45.55574190827123,55.86350044758779,31.95929233429546,22.4659324892051,15.345003433045711,42.527423986375666,44.96653949669523,31.570928304458757,40.67427721549434,42.73474488706635,40.36491466885293,46.17703216735511,45.47381193942063,32.415705007075275,52.75080671444164,26.9227886835624,16.150873613128454,37.98825536172971,20.124975139940794,22.849271784621678,44.29268284994397,48.548042858674755,49.458809519697425,51.59208857794383,16.537196890448435,35.62109787171897,12.408426848748427,40.29429169090005,25.139777841495345,21.60870932931096,37.92850724483874,15.746648905370542,51.34906750471638,45.55639018467176,12.545405540114832,18.44907268771381,30.79988395992326,22.440436530600294,11.963267911586735
|
||||||
|
31578947.368421055,50.19984218562822,95.7627434473165,57.44908338787236,27.490606025702366,46.31622227355632,46.42271329362431,48.928204724776705,63.00824045449744,66.87143983536109,45.18657296955834,66.36613860713973,38.22286283433272,21.760822755684487,47.23665355575217,49.001069785753614,67.30844743241275,46.722584111394156,70.83334652181726,74.37853096312021,28.897429747997148,36.768034405469734,87.24772420106586,63.39912562013458,38.99176335574187,34.148407234183395,23.03103437141575,81.61494452539327,61.37473770072091,57.3178523594818,61.37094538101613,70.54656969803358,45.375053538561616,55.36329639108641,70.50818928267157,57.26184309821917,70.64682318784111,47.023234357040934,40.141925605751815,58.13240398075483,39.35413919562613,46.422374490710425,57.443876964883714,78.75672523852778,54.05312604729572,77.97363730860549,16.882866950478622,39.18201143632017,21.974023716095996,52.12763368309453,46.8986691621821,29.569498028224366,47.83528931583395,50.204762281514604,75.5490172869446,62.50741971310981,35.72511847835251,41.644241008009786,41.4900454131066,47.981161816680284,20.521151958208872
|
||||||
|
42105263.15789474,71.7205637589386,118.302632463936,101.3021050756823,41.85173461216804,75.62955314020041,77.47314474226036,80.08230674894233,94.32690992340487,97.53938523190834,74.81742938395352,92.46111631979574,49.13239003230825,31.620140676869084,63.71013470691658,68.76809601126615,70.0006458041493,78.44450518480629,93.16119912332775,108.79861944261681,30.88315011556789,60.91928814652885,125.7514024069765,114.6559663261403,45.80562259574677,53.19936465218127,30.523363000137984,144.6606092083157,74.9237344968352,77.54316764508589,87.09789742838973,107.27080620036891,56.77543164269581,87.56150563445118,82.08937283376247,68.4969939636144,87.654038446927,77.37261080023654,90.40144906768839,73.7096477901754,52.11238302661937,53.35492494999537,70.59013371743325,94.91051908378127,91.85290913015523,99.01999473720377,17.911084433816313,56.15153539684341,39.55601663288978,76.28479543113643,60.075686107547,45.84764881770532,70.11813512910932,59.67213562618002,117.8326490924299,74.94229583070715,47.04145131621335,79.2642127621372,57.320525521799475,67.37550589916944,27.81922529158995
|
||||||
|
52631578.94736842,109.78308910789829,129.26172621110635,143.15915463621266,70.35662907563211,109.9685235915917,102.62451407835506,114.99960360120868,126.93669588186899,128.24316364514365,112.73028668901597,135.06876942480287,54.42914115612774,43.77966931216687,107.9545935538089,121.60569097819916,105.77133458014835,102.41545599575188,136.87449794553652,146.31507720490737,36.22220644149465,101.05313528900827,142.94836430338282,120.70437212296635,83.93320925632837,92.38767512342854,42.51138626750032,185.94786720824044,103.72868996765722,130.7558528629449,105.18739593431478,124.67650402260949,74.32735683133315,119.36600999503172,117.20324663696114,117.50127174950867,114.04186146522657,114.02449641890638,127.76820277305026,121.96723127621033,61.59412244536045,61.62199675129763,102.32576925821417,150.62751116878107,154.5477755263324,140.28159557518205,36.001864991019225,62.11321556833353,49.448539615595855,120.0330300271057,81.38011535655428,68.79558897083444,113.73096821822453,88.71656178920388,151.19500614170224,114.94237780649411,61.21451652956058,108.6644107342757,97.49090110531226,106.07466130311552,47.94170696323433
|
||||||
|
63157894.73684211,130.09573982561065,134.49164372715595,143.8041810326464,112.35440654594484,133.75277536496563,114.61320994587983,165.90329273123493,155.9917827339384,195.1349673762381,135.91990100057834,171.5284713233442,73.74760650218029,61.678494699196165,193.3532841608465,194.2357592271471,135.63697518404172,117.92179158144215,205.42551425627727,165.26021125128395,49.21965828575091,120.97795466637021,164.03316881303311,157.47767474421835,122.64910768777382,133.74001378687797,67.7638516430008,213.72451048536317,138.0508896129366,173.3742485086013,123.12391150199971,149.00704184397438,121.37970130694544,146.97009355754403,169.96131970463037,154.77483620356324,135.1718171618652,137.94731099569236,204.70469173863327,143.14453568410676,99.241259826435,98.25607884590647,134.35030867146986,195.68202826702696,192.57857306974418,162.87413804369288,56.59804398355969,86.14122690751613,59.62274795285636,153.94289153195155,117.11878372749464,119.8802781827397,144.6688488854761,115.76022774203993,164.79035527934,117.54453968366097,88.95088400792879,144.44883853874052,116.27139919017985,160.63054338221406,70.4290694657125
|
||||||
|
73684210.5263158,142.05119361358996,141.2739432390129,153.58242456284754,129.3530168546534,155.0370520251362,156.3233502617008,198.3129222332358,206.60054306590987,227.26284406096818,150.54703921389216,189.54458045369205,79.4871461187332,83.15570831575387,215.0559924036017,245.31307558826794,151.79886007036532,129.05333838950318,229.52851920795737,161.55290783149218,61.09510687228881,175.4535785511838,169.4010533898491,175.45690722800688,161.86104411968236,153.39613515741277,95.343237056255,250.12611998233768,175.67181219148176,186.91283098837346,138.30727918957905,168.6634191151636,166.74818455842725,185.13076690169586,196.06328659296202,159.00106996958277,147.53227785943784,144.29380331149227,252.92380828755054,161.83021639927273,158.37642650789172,134.95020751960556,161.92485472865383,219.44614581024877,204.8939925621669,163.87479544961388,62.28339529161309,114.36035383346174,73.26770117102899,184.90799060567102,133.55651756993126,149.25193044167642,172.65737803536274,127.64791199507145,187.47890424728394,127.4667265804438,100.58226624824664,192.5378447574179,122.82138388193216,166.08783444630143,97.68133479833521
|
||||||
|
84210526.31578948,156.98382791852026,171.49205648370727,167.96290058541496,169.58965982195414,165.14236203686352,190.55570776706918,213.81650753067473,227.07257700461759,244.72462381154216,172.39616706166572,210.56055105525041,86.66916269460214,97.76003269343497,216.62488078542694,274.47087871863243,187.37852013342268,174.906962667806,234.72810939771648,175.35142183634053,71.09189684123544,200.36326198828849,177.65034631480802,192.88590952960408,191.87133305488862,181.823534754835,121.08353809372541,260.37844490344503,197.72782769427735,199.1000146198801,155.6172585609547,188.3333606706762,216.63439847656895,237.46044434173615,219.88954279231234,167.9451337943777,184.12010345003281,171.86817226713714,255.55514081099025,192.7677592491509,177.55450556681097,172.40522007433663,184.57939248732254,211.90057499223798,220.12632777188955,176.32980910298568,66.19763033723567,147.89932422294513,91.83561926452738,227.15824218866237,148.3560436947524,177.9017477345929,176.77912095940346,148.9836894897873,186.8724168013998,138.17522092803364,103.79728987573587,196.31196953286093,181.15175239960575,176.38314548654898,134.11560053574414
|
||||||
|
94736842.10526316,177.50703368738417,180.07163142811228,170.19788439596624,230.17199015212853,186.7152641999672,202.32136867358415,242.52110207510128,246.5426962615463,275.2746549791081,188.79378135617915,248.76160812353163,114.50940864853087,118.83732693227539,222.19905515460923,316.12739749001,186.60792090547713,221.96615643378274,279.25633912597505,208.70377108124484,75.55631991562504,233.0685064294414,206.0960170440479,226.05493699930545,213.6214276380899,209.034883158235,146.54324803218617,263.45004389953084,218.7632884669667,220.1425207066156,167.06603481944578,227.15617701213117,256.5004989509355,239.16989794150615,228.28495687547647,178.50618889956758,207.98876260939232,185.47858896352082,286.9083827366624,206.889544396024,186.57727016318702,199.4551273523275,207.09057484373162,224.79322197220168,252.97702940002372,167.3617957905967,73.43771980579045,176.89539620552696,117.57588874870638,230.66892304091903,160.3239713432964,189.4453628971306,212.48472390012233,187.18904126363282,203.79207052084053,158.7002833554124,110.15281374267511,201.272744946739,225.88635731172693,185.86140196875687,158.33122381151054
|
||||||
|
105263157.89473684,187.4822156936178,168.81655327872556,183.96053641076892,259.9417879020078,197.78698479980642,200.9632009822576,252.2136458176655,260.4845025263665,312.45136727520634,200.15711312272543,269.80010229596803,127.38773361030047,121.39551918328304,254.8371876923166,307.9046038826747,216.54950229489242,235.1073884034421,306.84854726315865,218.3854012038569,117.49342332053877,249.15439719770754,231.85994625562117,247.3223593897106,235.77957139929907,245.79210845527555,144.97472224489803,289.6301629508631,246.8354558282612,249.4250682894543,186.52417960787744,261.16505889978436,270.0852087121921,231.29987899905427,260.187934553359,192.83863506843838,214.1980406263033,191.01341612765003,331.32259322170404,212.06754543279348,198.13864791566644,207.2465890195231,235.2270682571337,243.0753881094198,303.44697698942514,179.47131669958873,94.66741403656653,197.60686232657312,149.48907171086591,264.85335073527204,175.20055618139185,208.9073012083191,236.6283622346096,212.45229872062265,212.37005870139168,168.72790700131176,111.51848024596303,218.2907471880523,229.14288076940005,190.20035747709036,182.12844445558466
|
||||||
|
115789473.68421052,204.33618636458203,208.97937327392214,203.26508745633663,262.53028909197474,228.25160244742918,239.83261680223274,307.8960346965083,291.83622635550114,318.1760849858585,189.93487178135942,272.6035751122517,147.29058455785224,131.56432024395696,284.9344873376335,334.6258354376062,242.79576449764897,276.9140351096431,332.6072866247962,271.86184722048915,146.20610334276327,288.3537132783775,223.48604851317208,279.3637355322075,272.9982034846769,264.4160739589596,215.220591410623,326.7301407122876,260.74825026016487,279.50744292878875,191.47800940857203,308.1976659703288,287.3858875845278,237.58001807282506,268.49753534917687,236.11503538427425,263.1190391036777,217.44360463634916,353.33575785234365,209.34603341167323,225.0007956305369,197.30059046362246,260.6699765298175,287.34515616925137,323.88860872057666,182.6750216743705,139.93921522926178,225.1112028043356,196.24649667013384,303.7260773763927,204.40207447499614,214.62775435998336,231.77151989115407,263.9149778497268,263.2551559860356,187.07528430326659,124.55479483455007,215.15620480618153,264.0115539427444,205.88322556943444,208.21420388790543
|
||||||
|
126315789.47368422,224.27290287258884,213.94084420857996,212.1724990754907,267.4558044209705,288.073020624322,294.9917391604664,281.01059953856,285.1447145780367,353.9158715306888,238.73978208521407,259.65260402251477,172.69965283131006,134.63715194533077,287.3527122666631,377.128634861962,243.58635720899562,304.760246725624,371.4857180663423,286.60728735342576,174.12479818593766,310.08049941161994,230.00405312393514,316.123489933331,295.7356850334812,287.8535455402575,266.5603330914664,350.49364153235905,273.65543075710787,301.18170159774473,229.5719277843875,330.85581437735675,277.90954718926605,248.3884120443191,290.59740516584674,232.19719084577213,289.21906135683247,219.5619089050306,328.959751304497,278.1110437177226,209.61117229402225,213.5325285718052,297.6727019769664,297.0418804474484,331.8711062081963,238.0768505499965,169.32207323094815,231.79873708030854,199.25926227450697,340.2057539125559,218.2615093436928,225.0138333202069,293.3826207906587,274.8790082218244,276.67763111862115,208.71575643596887,144.980948954573,262.5158955570404,329.17942566191397,270.61722003455975,231.61985431566134
|
||||||
|
136842105.2631579,204.38010185201574,233.880563097441,251.9661883152093,288.11714197897516,301.35627189839977,302.1487548557675,283.20209472430383,308.6939438982684,312.7614157887541,289.99760320378147,316.69549368689275,186.9147924491078,135.96301089714274,335.63530246158064,421.7878439278649,276.05110477860455,336.82433988471774,379.132361794195,266.17172882051676,217.702279388368,308.3334091058068,283.4567099456725,307.4117430953768,312.57243897280864,330.12590011690133,268.43772835381475,393.60475318179243,283.05019038767034,378.11122529311865,245.33570881852484,384.1772100708327,277.21013088049654,264.6887591629808,308.60342785938957,257.6098503650391,294.82469220892875,266.72664497004325,308.0634727055346,306.3786484911171,219.61969740849787,269.2697040357749,333.9351530465558,264.68840490087575,367.447809548094,257.10414108910555,180.92165099823245,201.71280920592204,232.5189837320525,362.8353548965461,219.4646458754992,248.13732507407997,301.658544296538,286.99322312277775,269.58465626629436,258.1865828535894,141.41744397242147,241.2628508800449,387.8847122683585,315.2520664345855,234.0036096096534
|
||||||
|
147368421.0526316,209.01412421563987,269.1088501650046,277.62140132680827,286.3812462845998,304.92271114749593,304.5740827620855,311.66304726977097,363.82818948859324,355.8465009590926,337.3051970617593,363.53718839160626,193.68300695837038,155.41928049030398,358.85109749576725,353.6260774142193,313.36778022031046,334.61180174268185,396.17468280739405,253.3930328178769,242.55612659479112,316.19599434097717,283.3968463571448,271.94968776227364,279.3344040309953,373.79561375713087,360.1616574241845,416.8008358777395,188.8068022620644,402.13552527051223,260.17150159852986,344.99628295330456,263.9971654071702,304.5801596341041,320.50696164666783,301.2938507281843,321.0130844740326,297.0878530549211,316.5158826345552,303.0372310698528,246.50805036009515,339.1457488394841,293.5122398260227,330.566440542484,354.39843065883974,276.27822203244864,184.7441100037329,223.14476646272433,261.6821691578775,330.4065732496927,229.5302502490642,264.84120489000617,291.0414294877871,273.45942357438423,310.54708901402694,286.3682355510891,158.15299937236344,302.50970089270476,374.0639517163636,304.0609434238761,268.9613715258332
|
||||||
|
157894736.84210527,215.1030660117267,289.763091495823,305.73230834185586,316.6466577644989,331.2296462570862,277.30848388658666,344.0689388087582,336.82872416487686,337.24014171206716,393.52752171419667,310.6581031430791,191.41533756218953,179.36305265396916,356.5080643377971,410.7615501263136,354.4903463050104,387.5390976777203,418.3770597886676,243.5269136584449,274.0524370534103,355.5813758495442,296.22212664968754,208.55149613163968,325.721032025081,374.74396902231956,333.0372606325182,361.22691870775907,340.33279469122203,432.9252212346426,277.5082257670709,364.7816114776518,287.8339680674334,295.9136459311455,282.7450615151932,338.94229687614126,299.85669886289867,262.1307636130384,435.8061072797492,236.99161491652433,277.8856634701141,315.0719299669741,339.4638539322361,359.3706164003409,371.2352041375109,304.5772513735658,209.5413842926478,246.9610502694287,284.7601460336483,386.50880843813735,238.10788500812575,310.003999425929,288.3094627606241,285.3150259283606,327.36437099742756,241.41174038113647,134.67995923278733,282.21453795868933,328.59135473558774,288.3819711018467,327.2083716369402
|
||||||
|
168421052.63157895,291.8934986716823,292.8032875404464,303.6196940453759,310.7215798026637,338.8408755542829,338.22096556507654,377.3762954516424,363.7390118305703,388.64554795664105,420.05065055963405,378.4726949213615,207.1002508576887,192.2559747293055,362.39914556487446,393.45044374862204,377.83641522214685,370.9075768396795,448.9780755109074,272.7927068779013,286.2883612904852,385.07544864776065,294.31664315221053,341.0183441467233,360.32159218167334,350.3668509203311,307.06587868185915,422.5287315997391,238.88580998819614,402.5153885759475,262.52917306905306,414.1949773812228,278.41245042716366,284.3255839545971,355.59817077974867,319.1476166202091,378.6395647796568,283.31147490710106,411.21407767478115,319.01904493223606,309.0330916705885,245.08074630066295,353.4125390845323,337.34525808535125,355.4833657946283,331.7615326197194,258.2921636117793,250.11618260003192,265.5470217007349,396.1393336377976,280.4855040473621,335.74812761238076,312.44806572076686,318.4781535721882,330.91542130097787,275.1197319413817,176.67198796021313,349.0430592275393,383.1076037177089,284.15803462844804,350.77298175595143
|
||||||
|
178947368.42105263,323.4867910341543,306.59477210053115,338.9742059683701,272.44762310014204,349.6202698180715,356.21242533438425,415.3843328445077,406.45503196706403,363.5525320651128,368.83238811341016,322.88656639103414,210.61926726819405,195.73444180021326,393.5083012118564,353.9615836264024,354.97095883817224,420.2413749036036,402.9848650800389,278.0039345709571,266.9015686836434,405.04649478519065,278.25217117133894,363.5661960510006,299.80019076601945,366.57334774542716,399.6004834398009,315.69167166857505,367.6714489875243,363.48987470785994,266.5817015587953,450.8775974322224,296.9991092657944,344.28410071522575,365.41132432461774,332.17150423071064,341.8766899444885,281.2321838731581,384.17475245880615,253.21313249643816,361.2629809699891,367.6414440762964,363.0169169056614,308.1073904992636,356.2537413179214,326.57385522655505,254.51422596563924,280.81164139855935,293.0028404293794,428.4441351449721,239.6546129898021,311.5046668029558,326.63328962271567,282.41087649659437,367.72824119555656,271.8521433210109,206.54136326640761,333.58757517542534,422.88554984826465,284.63246393500935,331.5176258027719
|
||||||
|
189473684.21052632,291.1987604441075,392.81539487475504,303.93205805663587,314.49674255233725,370.4853718543317,358.1279445888263,370.9162837828295,408.34209462199516,376.9015187413078,421.59953840039776,401.91046565655523,229.21167370074343,215.54053490074386,384.1058138057466,407.30039984912423,448.3455321224112,373.25967925357685,357.59249067256985,291.3433544710072,328.6641294546735,394.9899036950352,273.3378584819157,385.9936726593575,319.5205416507668,326.6827584921158,385.2930974958676,335.6819358636468,362.249475315337,435.5602965889875,245.25459116218494,410.8941179183712,348.55669626403716,384.0513943154726,385.2983011131141,297.47666549534017,423.8761345331358,244.62352379116325,447.90094579454933,306.7216131251274,286.28587409928235,377.71188819028663,425.4318806633064,357.0208544665096,424.4997699016349,343.72069834523586,275.1054221634389,223.85850632116404,360.19619662593277,452.4859390943995,300.5850230996959,331.72917448351586,325.9052586149311,306.93792577496527,352.26298673199153,313.6327243098922,180.516361028699,378.100378198802,377.6579695760048,298.44257219990203,367.06861152708365
|
||||||
|
200000000.0,323.80988609477095,410.82074248947595,338.916478557022,342.94881952122637,366.69170150788204,377.3954415462519,355.05356556490847,451.865214932906,408.574218640202,425.3116951286793,352.9965223356297,279.53458236863736,198.7256152974932,438.31003459817487,481.4843272660908,369.4874466045907,425.41700550756957,441.6266595438907,282.14364730841237,295.30975777381343,368.03513252578284,406.2630075451575,334.8723376722712,336.7419066240913,414.0972129012409,410.93163867843776,434.83542439341545,328.48616526628797,431.39962866118077,281.5225012929816,424.4315608786909,320.1624299036829,371.7119784135568,407.4015055690941,345.5004364236405,419.4131177130498,225.1177396366471,445.5111840022238,337.37999242544174,309.45874505293995,389.9827517616121,383.01435690804533,375.1896372296308,409.1512322927776,348.62199642626865,280.99005907146557,273.87161913043576,364.73685055971146,427.10471819576463,351.24566193160257,387.2563348522312,326.1249270596002,335.38179971983556,355.090489161642,341.9574250124003,187.0701408150949,331.78061578618855,444.7386715020004,318.50800225138664,380.48608528940304
|
||||||
|
21
results/AcrobotSwingup_medium_data_no_aux_data.csv
Normal file
21
results/AcrobotSwingup_medium_data_no_aux_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9
|
||||||
|
0.0,17.307758331298828,37.547828674316406,27.00891876220703,17.306774139404297,21.148475646972656,27.11147689819336,12.319110870361328,12.338271141052246,9.167003631591797,23.911279678344727
|
||||||
|
10526315.789473685,17.864419524761697,39.06803220710678,27.329142395974525,17.728836702482152,23.123769769254153,27.811965015538842,12.99973274635311,12.584402318618201,9.805771621604581,24.471356855612505
|
||||||
|
21052631.57894737,29.213341455008845,67.41065765318778,33.44884940292531,27.89234048440805,55.86079928016811,44.259765687308324,25.886394279134407,19.283138154461525,22.713503041167918,35.75108932222851
|
||||||
|
31578947.368421055,47.85005487071304,94.13677850582346,44.793857524475904,55.930509796144236,62.01391163921382,90.32462446694973,43.72811458105031,39.81319235120784,43.460683282237724,54.5183416753801
|
||||||
|
42105263.15789474,65.17444399457395,114.06009310937985,72.8059797296894,83.03748225653932,112.3448231834454,130.0692117328954,81.23104351825002,44.70019634076268,59.90238050773864,71.60687793745889
|
||||||
|
52631578.94736842,93.09273944642405,139.96234875847261,92.08393842274958,120.52496370991015,166.27367624178157,129.4817153519417,129.5151534611746,61.83638626024375,78.03499792798982,84.329490289381
|
||||||
|
63157894.73684211,127.35687766453239,151.7497832778418,118.49689206381915,140.2242629423201,173.43377406783713,150.53439266207806,186.04898702190195,90.96116083699418,124.220185591491,134.87419924222536
|
||||||
|
73684210.5263158,178.63772263677168,159.28665598459193,144.99850499357547,161.67004588097745,207.0625903145346,162.01191289029458,229.92634071422086,144.09537592864598,156.24973023911923,174.31626097779525
|
||||||
|
84210526.31578948,222.58004579781826,181.84535330550491,177.88066614004384,205.6838482700887,226.51914660223963,184.2652988281937,282.8511699834359,177.0481427574422,166.61861493977153,203.33858888631383
|
||||||
|
94736842.10526316,235.2771218713796,191.76186071802704,184.81355733195335,288.1168865873543,213.87271481888612,262.9539938649147,329.8786438130606,165.87362296371577,206.2428021466468,223.4308408890487
|
||||||
|
105263157.89473684,277.33492727986334,185.62132651802577,238.3797582576975,329.5792514057701,230.78671148031373,294.2913171176765,321.5439626822181,172.01154988855535,242.44967724543858,219.0033588662042
|
||||||
|
115789473.68421052,307.9460944888334,196.83660464645092,249.20223194220392,365.42608106177596,244.3459545349563,246.85476109393747,347.049113987638,246.1799794863961,298.40788930472905,257.67848279502584
|
||||||
|
126315789.47368422,347.809756225166,242.00587657482015,248.1962690478877,344.42295861640457,288.91935710695645,261.3101041660415,393.9906976361685,306.75502706498656,294.1474340985686,280.94859180655175
|
||||||
|
136842105.2631579,364.8341567673181,243.57412176903262,316.8440445559178,371.4005094036832,292.3935806733089,250.12750055221966,413.14024274418557,325.0262529718744,343.15706575709373,330.67676127964114
|
||||||
|
147368421.0526316,376.5973664989432,234.88572824942437,342.1860892234417,367.29504693825817,312.1661194916247,285.9846863287638,443.6323015727825,318.17135258196464,331.9041370843586,358.2829684725759
|
||||||
|
157894736.84210527,391.15023676873574,299.9427468393649,362.61438464210306,430.82592337580604,334.20080076219966,325.78666906327095,397.1084731393738,343.20987882054413,347.52650701289696,355.7689700402381
|
||||||
|
168421052.63157895,431.4388437244701,289.24209603576446,372.89117574955947,464.0627012543401,341.34415942215855,334.354972790483,430.0620384599364,375.63122438922153,389.5687670456736,354.5264400128182
|
||||||
|
178947368.42105263,352.4585110446092,273.3356909482763,355.1215074213423,374.1417406955089,403.7611349403363,313.9370236661791,407.1033625446693,347.8211592581959,368.04207233007264,344.00061052608356
|
||||||
|
189473684.21052632,417.68170600502117,367.0253049960757,396.2376656786557,426.6167923804796,440.9451707977998,309.5667177953879,440.08899143990385,364.85273147322795,404.96625137890476,361.7230547717734
|
||||||
|
200000000.0,422.54157597924535,403.6577642889399,362.1906981876022,428.67707180192593,449.54696711113576,330.7473110933053,422.07590990003786,366.151764852436,425.28433222206013,422.3892536116274
|
||||||
|
21
results/AcrobotSwingup_medium_no_gauss_data.csv
Normal file
21
results/AcrobotSwingup_medium_no_gauss_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19,trial_20,trial_21,trial_22,trial_23
|
||||||
|
0.0,20.56792640686035,20.694686889648438,27.230201721191406,19.650291442871094,25.915163040161133,22.800601959228516,16.541336059570312,28.28856658935547,25.41530418395996,14.245672225952148,12.60469913482666,18.117097854614258,12.392337799072266,17.59383773803711,17.011587142944336,15.569441795349121,26.47991180419922,26.600513458251953,29.363418579101562,19.30875015258789,18.296459197998047,19.347705841064453,18.06810760498047,15.1729154586792
|
||||||
|
10526315.789473685,20.455478189998097,20.959935733867898,27.619092415574514,20.65590083594963,25.82039531088664,23.03290929870778,16.91543911850477,28.276127692874407,26.26887520562104,14.66263271412113,13.103804543157755,18.24747444365762,12.340303664887712,17.722366440447868,17.767642831450477,15.994366657600281,26.591578622201556,27.16763295894721,29.601874473125484,19.339223734755844,18.515797695748695,19.498976440785498,17.81188729734591,15.337385750767824
|
||||||
|
21052631.57894737,19.98474667805384,26.599500297757068,34.884383246756656,39.083428752616335,25.117819380995474,27.67150148094072,23.928852499098905,28.6789212039984,40.93171528212912,22.318657095618857,21.28815513432356,20.61353594037882,11.972843739062217,20.61344607008255,30.3429830559114,23.834606995563593,28.673132827593182,37.16521374196062,34.06019354258713,19.57685927047789,23.185324332586532,23.54986958903289,16.266374356095316,18.451559335891147
|
||||||
|
31578947.368421055,29.89590784257221,36.22366988143969,40.422587253658975,51.34459084172287,31.460397925093726,34.738744744932966,30.434024901069968,35.644452533295116,44.433872102421816,26.775225594201203,21.1771018187776,24.79223196313735,16.660436427633478,30.446008705882328,29.46459187197801,31.24291776439531,31.873688664411162,40.20703274927971,37.4359847060366,18.482554976578154,31.58732871631279,34.52606377186166,36.60535631793192,21.650181599528917
|
||||||
|
42105263.15789474,25.693784470703463,41.688210355442976,40.120689346891034,48.32060510877757,29.01029781101483,39.10866658620722,31.395369531581608,44.62680800312774,45.921255773248106,21.032803263381084,24.542978595044474,38.85680985013204,21.10714446503535,45.150223923914155,28.38664605491214,37.04762179724398,41.084357627004465,34.84686585286648,37.095269872541245,23.25593162193853,36.12825788103478,30.585764462432703,33.46833162236742,25.5586665504692
|
||||||
|
52631578.94736842,29.98410180230674,58.00781450843712,50.20170002527679,54.7083921126526,29.99497980334463,40.30055198098154,29.30138546984818,39.204851754835275,52.31242519428236,27.14147968511316,22.793377828322495,42.177156947398366,25.261695368808805,31.703358406056534,39.04563715144827,34.41158092603954,47.885991559670906,42.57657494333023,44.17904169815193,24.27075033290234,46.46227927326413,39.24186738295651,35.876836068463874,34.802591774963524
|
||||||
|
63157894.73684211,37.1513207678856,60.42686261744382,51.55921792934476,56.98365892399712,45.19351288771202,48.27100860907431,38.555211217119435,46.135930781714485,51.40481214790793,43.884528475496246,26.49556779081307,42.29194941619345,33.098247912575665,38.8559821986921,41.27332652750273,35.249141859071244,47.23070071129918,50.616800725769636,47.431705986797155,32.39677495160592,51.14264327826345,54.8855280071043,31.801127468057782,33.5308085133288
|
||||||
|
73684210.5263158,49.26544851495,72.5665120457059,54.79502974843756,60.3390900096095,78.69969272229646,49.482111447777115,55.16856996091779,57.573415225624544,50.90916306066257,43.175314648288435,31.26030163649067,42.747942266588474,36.05703218482496,45.1938174999528,49.664824606432816,33.43029099490256,57.870604091713474,46.025410462748724,49.12948376064155,38.61497010935583,53.32324912316499,60.31599218153764,31.12590754141004,35.77825318453344
|
||||||
|
84210526.31578948,55.723247431239265,90.70969067220874,54.84655732370479,51.41228573490708,90.21230147155698,52.66307671851068,54.436093773372946,67.33914173326335,48.451507732313424,38.26900785963291,32.70394029462106,40.638775928281355,34.16304710259728,52.05742089825984,54.01781494160108,34.127236337873086,70.39194189098733,45.52619843775216,45.07086507303232,35.37879041539004,65.15607181761074,73.32002550264475,27.97100908975852,41.001384672778464
|
||||||
|
94736842.10526316,53.65552625116838,119.57116091055306,63.67593567709018,43.95159997830265,94.79055795048743,53.58576762402776,52.065969897459915,76.87933387954067,47.30876932842167,42.40506146933853,33.70902853536515,38.806042419349225,32.99063246687384,54.654408582638425,52.91051000997507,40.050437248180366,73.2189207081154,60.02974500961169,49.98471180293864,48.22730687217287,78.71822929795098,88.9526270310139,33.87797114861371,36.35797570120553
|
||||||
|
105263157.89473684,47.47443305546227,148.13899773293255,61.61426323503669,45.788186711370116,117.0812779000839,63.84442637530555,51.76095406758571,72.52540222982621,40.606140865513495,50.042788749689706,41.255762772361166,32.880488908026685,30.62345660875172,54.25343176747293,62.724123380190775,45.95945261963682,75.76374827482198,70.8202111878306,57.14868385408724,54.657851671000266,95.58515290366975,102.84635889158685,36.60072861950319,35.43645039277146
|
||||||
|
115789473.68421052,44.535915883531445,164.82720318684287,64.3932838883674,52.10600800475833,127.98372183740139,93.77632044348607,55.341098148097124,93.41681771597456,56.43406792224444,50.50122993452851,44.65405796299021,50.973083292864196,32.55388350563944,59.9133537098497,75.7428189076669,55.140545387966476,80.8822842591811,87.3021288963236,66.77246057960383,51.14197987084821,120.97803712745618,118.79421289995767,45.34466498959139,37.189526810860286
|
||||||
|
126315789.47368422,75.81319968418902,182.57978809507273,73.35451628280148,61.34368185389406,110.73743592669096,121.93784806512072,68.49931875436758,143.28141910580723,68.89877746904325,56.49389529678136,47.967624305110235,65.11861085866957,43.44680387049998,65.01857131244898,106.05033310975399,55.09855855411109,111.40372865005217,113.57915672412213,80.53304962141033,57.55933259431676,141.57789722241856,123.65863279531868,45.65078847651005,38.39826784338648
|
||||||
|
136842105.2631579,93.10308516182398,196.30341812183983,83.9718467786578,65.13426848569075,131.0771817835332,133.80897478363025,81.53090939130067,113.99729728310717,88.03930388345614,71.30945767449872,58.2717024388509,86.07419400992406,43.610368646454276,84.8982153143992,113.31184844765554,55.792270862133634,149.76182951371595,125.51064462691464,79.83867698730359,74.7188103430159,161.5680981325889,142.72949009884016,36.32292799885872,39.173593412496544
|
||||||
|
147368421.0526316,107.82091506250678,203.20631955559895,134.84592485811743,70.9217153331497,146.82890448023738,164.2989379612528,83.06711849260694,117.28062497632493,85.3776231844009,63.62366568352871,58.73154523524964,117.0072014481655,48.85834209449652,113.4731640345585,117.42279105239298,65.21350358325442,174.03436241899502,125.53260034231928,77.42364286699454,92.31308578074476,145.05393613292898,160.44351972817054,53.31616459334906,37.4685000033012
|
||||||
|
157894736.84210527,132.3860596024594,210.68172586331573,155.47957892212347,66.57274173833738,161.28191481167426,173.6719290096096,109.13726121921951,183.694379375809,103.82564183266788,73.55421182474025,66.26501727413769,131.9231748201593,52.591262064977364,167.26997951182884,143.63448359175405,55.71610990632067,212.47818753800237,128.376423681645,91.2211974010986,112.1541069736111,169.60154679516677,188.11068707960465,62.62832676220964,36.958442079207735
|
||||||
|
168421052.63157895,129.3990554248197,202.39567406487927,190.3892880555996,83.04692684448328,180.50324491780881,168.4348282497015,135.22031588270394,191.88012423607782,119.62647360183526,80.45935500295539,74.34429707949842,107.99215858266624,59.44362404339862,185.14248205882362,148.64435478855037,81.82859276090633,206.11789878385548,131.96695798321775,61.96289417964928,130.12336313559408,159.84511655189323,171.2107347187243,59.16434668470948,49.84168935152303
|
||||||
|
178947368.42105263,130.14830403929767,238.74713514947496,193.5877089175161,98.45961401438201,170.09788606904056,191.2685152322797,141.23556813127115,207.35462354486833,114.41394540696427,72.64258853296576,113.46922786556536,128.85966795290773,62.42799756514977,213.08014986884893,120.7555092415486,77.83668065770759,226.45436477000695,157.28596079853105,90.02678856231954,109.7011419024081,157.77175503283988,176.71767578606296,85.91141996712237,45.8612212943135
|
||||||
|
189473684.21052632,148.10580558005793,257.848876953125,209.7747942681458,112.5199505459734,186.80492281055186,203.39924788640147,180.05037021001291,193.46168203301048,91.2469565833292,94.94104739548922,138.1341755989516,141.10012071218517,51.52152940643296,195.55914593040117,166.75279750412852,93.69295750727613,241.95503717528817,138.8859730967194,128.40282541430886,150.54730322362644,163.56386923872532,208.545816238402,68.5810945502939,47.12030731725643
|
||||||
|
200000000.0,163.92602742424137,257.848876953125,222.84618224752577,120.7504091686324,202.18845235125013,218.98799946904182,192.41719100114545,223.38270798796103,119.5254408921066,90.19675819458146,169.40402983285878,141.0112056222401,67.9249422052189,226.41533396745982,183.46244250394795,102.75374621231305,221.57285341542018,151.18313093326594,110.10695993272881,203.97469048907882,162.42532920994256,197.72680628770277,74.30333949625492,64.3154486315815
|
||||||
|
21
results/AcrobotSwingup_medium_no_norm_data.csv
Normal file
21
results/AcrobotSwingup_medium_no_norm_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14
|
||||||
|
0.0,47.869720458984375,58.712181091308594,44.333343505859375,41.355712890625,37.90257263183594,34.95667266845703,45.589073181152344,24.459699630737305,35.678009033203125,16.657047271728516,27.218734741210938,32.52488708496094,36.20814895629883,26.279478073120117,39.244510650634766
|
||||||
|
10526315.789473685,49.29063777627625,58.76334741311225,45.03257943829134,42.02801873600962,38.448579330172734,36.199575969490155,46.56510167942483,24.837706763864883,35.28293939353769,18.001048768398423,28.850799982072246,33.40719292761753,37.014281983200945,26.961627381097866,39.94536994595772
|
||||||
|
21052631.57894737,73.45035568829059,60.87957780814402,58.132507757074784,53.558576345485,49.752640293864665,59.215450858723095,62.62452806441573,31.785702308139395,30.832541590045693,41.35304042323392,55.98932058159829,48.09765991704781,51.33298249183599,38.26314907021552,51.61325548838958
|
||||||
|
31578947.368421055,76.99637896988612,74.89629091429744,78.2164982913934,59.68905180519308,66.87411823336272,79.9125827435724,66.29432197724189,40.908907871301246,51.470578287818434,50.66871893464735,56.99951398651273,49.470479488445044,58.108238176914796,39.935585397473666,55.92852660124075
|
||||||
|
42105263.15789474,77.20904692702017,95.6457178643866,114.61005032838547,80.43264315300041,69.67968304021868,91.29662574575879,87.2257849085364,58.16996546097904,67.96853664059223,55.81807043942058,64.16077246981314,56.73561223037025,62.73421761657723,53.19722038854192,79.3265990124515
|
||||||
|
52631578.94736842,93.50889701864726,90.52608063949113,98.7775371313673,99.8161049724203,85.53871632026834,114.28390605731188,80.21875930959332,60.86613331931202,66.6982865639939,52.776405683473534,78.89242282631572,63.45055898058117,82.06539993768253,69.35131658630192,96.08941618869096
|
||||||
|
63157894.73684211,109.64508339506767,86.83300030652508,114.31641004290279,122.29602599053173,104.2039977047087,178.79898665007462,92.65466195526545,94.88769507986028,120.22093783347896,67.7084982640773,113.80443493893935,75.44789627033423,83.91133156905876,104.04209863689636,107.37283942037341
|
||||||
|
73684210.5263158,135.0752723853922,130.5833125542901,145.55241227348097,142.50321737555092,160.42165486539338,191.55847590018342,96.83154423621554,133.60035480794156,192.03457746194508,83.07782337032363,155.14148683301136,106.94744174913029,124.88237557628317,163.59441809046632,119.80007983174518
|
||||||
|
84210526.31578948,195.86231121627247,143.80465263739185,169.00912825024358,165.55034481653547,229.1272350817176,212.1457455382783,131.9723182404801,175.7012977322713,230.4742121554478,111.33295470707304,188.8701921595761,142.50506885758398,164.68399536510583,210.84611961848188,171.88463378778124
|
||||||
|
94736842.10526316,234.05469998969102,155.6639017773137,211.0865517506309,180.67522689790937,260.9845077515144,230.5395382791345,181.58756950196303,227.05604486732932,250.69576037458435,137.21576921144433,215.4093741190896,175.93486869603478,190.22174822925035,258.1007847446649,176.14627765977647
|
||||||
|
105263157.89473684,279.72017166877055,178.68899161605952,221.06454092057788,182.79071971328304,273.07925085843107,232.15890297145063,220.90591024155432,258.0034095441866,267.8309465451914,147.9430823000019,190.49524683214293,221.20472686384852,217.81632852892795,272.7247296703819,179.29262215097194
|
||||||
|
115789473.68421052,321.0871794149486,228.37973195673518,206.03242298527272,190.8365536065478,305.66065317961977,241.17183250556195,242.6524470752296,281.70605709059083,284.8530318427449,175.53607803872086,208.67567991405477,209.90171087580705,244.65127274195902,246.61435033220005,200.41697646710514
|
||||||
|
126315789.47368422,367.754370141888,230.6884329021802,278.35264386992054,230.2395017351801,325.0279333152269,261.89122750290215,275.95443328331714,314.7859843256071,307.28986924572996,220.26971481414387,225.67105506859986,231.3499813609837,260.7052821270977,278.55216303393456,225.71498346262695
|
||||||
|
136842105.2631579,338.52611540856446,251.85102954333513,277.34812283202217,229.3739144752973,325.48060944833253,305.4015089587162,286.66453712204486,315.3246169192639,329.99774053866184,261.38205052247673,248.6789461872915,231.1932914988486,273.56350744885094,326.1203416556203,228.32358028370257
|
||||||
|
147368421.0526316,348.01561527635255,284.17108497180436,304.46867742696963,206.65898752699596,344.23603294166503,288.33661475769367,295.8969949275833,339.92593602510044,301.8182663498162,310.3573374977733,277.3077840369164,262.10900957688403,293.04451926395177,374.99705746299344,242.19735510544103
|
||||||
|
157894736.84210527,350.9310190581879,310.1460168905701,302.0861082121606,219.26373640404515,411.6105357624818,322.1885147923578,321.6463864575298,327.980167345327,344.08887564706674,318.9533957161236,368.32841468286654,241.97992110764218,307.8654899496451,355.3286603512526,253.80407772545504
|
||||||
|
168421052.63157895,370.2034518236599,322.8737896129365,306.88093436127554,253.36878476842952,393.8295918462019,357.47758338457993,334.1633609906458,410.40711471050406,394.16926754478607,313.4923239784558,344.1847378553776,260.907170821425,301.92927812803487,373.24019814330126,268.9494959456108
|
||||||
|
178947368.42105263,406.05958974022946,326.7172720072342,279.0484546545965,259.8890382025876,359.51827348797605,368.6971468941987,358.2286658567735,392.57389515613585,405.38095781281385,311.75182260535763,305.1717284006425,267.3274316261845,321.5086544816514,397.01163309482325,304.7654493612926
|
||||||
|
189473684.21052632,383.4264807597092,381.7778935812187,376.6378812003994,361.9220677984058,363.92390738795007,391.75030517578125,336.1125650480183,416.18784044637573,374.8191024907078,308.32443611212386,389.25533058570693,257.59616719900407,318.48103226849247,427.7011272955139,311.1310159334516
|
||||||
|
200000000.0,389.8529172727936,338.4693834687534,411.97202535993176,369.41083181845516,409.6536834177218,391.75030517578125,423.62656088565524,419.11572526003187,436.28846347488854,381.3566862093775,417.6742971335587,356.52561697207,415.26328250608947,397.3572099836249,341.0073424248319
|
||||||
|
21
results/AcrobotSwingup_no_gauss_small_data.csv
Normal file
21
results/AcrobotSwingup_no_gauss_small_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11
|
||||||
|
0.0,7.036541938781738,8.972885131835938,9.178613662719727,13.057394027709961,20.935565948486328,3.738186836242676,16.270450592041016,11.112337112426758,9.143903732299805,15.158102035522461,13.173521041870117,8.069116592407227
|
||||||
|
2631578.947368421,7.215907369497803,9.241187055517823,9.167839875836906,13.311776398704323,21.537728781664843,3.9355120302843565,16.329040148937843,11.088037770925157,9.40808370940057,15.310171122133855,13.318510708886707,8.425649370735
|
||||||
|
5263157.894736842,10.592409572783351,14.956911815000167,9.01192895974706,18.56523179388773,33.28107177039964,7.291756598108611,17.798330840652092,10.581063712438842,14.074436097697372,20.035543768400963,16.218544683205042,14.219530749219855
|
||||||
|
7894736.842105264,14.063325709970334,25.282323430331008,9.52039750133721,25.54513647164272,44.54037490888521,8.226915177753726,22.350000371573458,10.27999373221992,16.715215160679126,36.575896050770695,20.566712171473952,16.224200203627806
|
||||||
|
10526315.789473685,18.34656747323242,29.089509322083558,12.467276938816848,24.825184412981667,36.97605146946504,9.433160418205974,25.81803375082168,12.41754551004835,21.193595239371476,33.17040234073542,25.17867946187215,29.949910223071264
|
||||||
|
13157894.736842105,32.020697502821804,33.986002416058426,20.35991486470496,34.59705371256412,49.31650784692193,10.296928980023988,28.313154635440114,15.33887879452939,31.736783649937514,35.54791069805746,31.27345040610829,29.094816251918466
|
||||||
|
15789473.684210528,43.52815986245127,41.881233053379944,28.39564776394795,24.724392373465687,51.094941011777046,16.706243691682154,31.458539126538913,26.257584598823517,37.216238371099124,42.38832273695442,31.733172776399396,37.3430446306259
|
||||||
|
18421052.63157895,43.66899180166692,48.694136310513244,23.871280860682106,27.98739899137158,52.73351259404727,16.390817073274434,28.64950056116089,29.436779262838563,28.613390124101098,37.201772456096066,37.72564058303008,35.50624589801578
|
||||||
|
21052631.57894737,49.92106935092947,48.08083232799726,23.502787706718223,32.105897068688414,60.80239459137508,16.923600884685886,31.256639556582616,34.42222876240012,30.21620967410443,33.04852811661949,43.1367643331887,38.48255201266083
|
||||||
|
23684210.52631579,46.47420726997205,51.44839063204226,29.92109089440194,38.6935820988898,64.6127330626147,25.192530730455456,40.236983463694436,38.524427103399816,34.20377650650585,35.16773167849516,48.19439941448601,49.8213740398803
|
||||||
|
26315789.47368421,50.21257188286725,53.634558237903335,33.86004211193265,33.451480138489664,68.93566209717636,27.950928788291616,39.733823403158844,44.71752654804417,37.748818752858945,41.38564315987261,46.704095466067585,52.327457077243984
|
||||||
|
28947368.42105263,55.81991646284047,48.363020469927314,31.36212024015279,35.23886922061175,72.16539797781248,33.33408810877321,34.34389020725465,46.055876391787606,46.847281174956095,46.01799565462855,55.54399822848407,48.98097906517603
|
||||||
|
31578947.368421055,61.79718457703115,45.574108651140065,31.655370813864096,34.73509823479315,82.80617212762134,24.660010304436106,47.03059691087528,59.00430963400991,46.76823886197029,51.40030662769096,83.85806302812955,48.08269762864899
|
||||||
|
34210526.315789476,59.88604153638029,40.36125620760002,33.93677319101267,31.00203024850864,100.04191914291594,32.43643586658501,68.26518329979024,69.71039359065139,49.77841519886355,44.87758626543253,77.99151494758527,72.2935736235283
|
||||||
|
36842105.2631579,77.80048581908287,47.797998232834075,40.14916849959838,33.63892320673552,86.84277946648504,46.42326783535389,55.40742590354749,59.515931518908346,51.91601159355482,54.790484925799106,82.3933804247353,70.39881617103256
|
||||||
|
39473684.21052632,93.28066287804756,63.164702514759234,38.7822109650191,38.33385589644519,97.66979808352085,52.989114820152274,58.69004093442845,69.62327660308569,52.84236831358348,76.55681243421218,91.50552462813266,71.4651162714344
|
||||||
|
42105263.15789474,117.93345735806179,75.07464471847398,34.36888893423318,35.31494313065695,112.62410892442983,54.40473152957134,86.95128916736456,82.44475145485264,47.126046129210835,80.84268295467726,99.57014869586914,53.988663404767195
|
||||||
|
44736842.10526316,132.81895365987046,86.45211965497842,47.013377541216954,33.65556947484489,112.48178938429524,84.81238428389763,95.00780731390057,88.7324321864178,57.255329771147956,106.9411708811231,99.83027172198008,58.750367225465105
|
||||||
|
47368421.05263158,144.5577440834772,76.31703876004325,42.24173354609274,34.24696775498978,101.40433938586646,94.2619216374413,114.43639502507168,117.1517905439779,61.62054878547581,127.65004437592222,115.40699528364098,46.094077137079594
|
||||||
|
50000000.0,174.30350200361326,97.29701978517205,60.68100835245691,37.6703331070511,101.96988257020712,72.06400879041145,91.03422615677118,95.1696273437456,71.02403727526728,148.64710405076804,96.5444134893386,61.41081830114126
|
||||||
|
21
results/AcrobotSwingup_no_norm_small_data.csv
Normal file
21
results/AcrobotSwingup_no_norm_small_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19,trial_20
|
||||||
|
0.0,32.4077033996582,36.155517578125,25.75421142578125,29.606937408447266,27.17302703857422,16.026126861572266,13.865445137023926,31.84101104736328,30.781850814819336,18.447200775146484,27.443321228027344,23.80484390258789,23.779891967773438,27.92617416381836,31.59305191040039,39.16869354248047,33.610191345214844,36.45005416870117,20.54007911682129,38.84449005126953,34.24555969238281
|
||||||
|
2631578.947368421,33.296037207277614,36.785427371096745,26.648522366880545,30.070928266142957,27.658375122354798,16.909636004397083,14.5849724026046,32.483172309241795,31.134407273356977,18.28971532364879,27.769625172369864,24.67473756232998,24.22445243704352,27.639669323871505,32.86372066523436,39.94947247461021,34.48854379632466,36.91144894359638,22.061570849171595,39.57076941414058,35.02109238286593
|
||||||
|
5263157.894736842,47.85268858083398,48.244665590267104,42.69550373105957,39.864313121606436,37.426982542794,33.6573874229911,27.179000907564674,43.72329865292829,38.284123305014624,15.910192525081357,36.48614875559497,40.41140686373879,32.42776290967111,24.889028077021532,53.30326504027084,53.15286421536408,50.76808675710845,44.85972475671207,46.95364156745147,51.82332628598009,48.673801405484326
|
||||||
|
7894736.842105264,46.289031177160645,55.042522301737456,54.09337787392067,56.42648777237724,53.144822111121094,50.22762564730694,33.501808243249016,49.271767463927205,49.36965923597443,19.09700184072422,59.83897444905089,50.45623192499673,38.46070101455762,41.67901350319427,49.6850867264819,54.63208278024007,63.855021779825314,46.707314440304636,44.729709547167836,54.444386954762436,54.15614254775759
|
||||||
|
10526315.789473685,52.056275916066525,54.81231460751259,74.05637095030656,63.5784007053296,66.31524625313249,60.829015134609314,41.481055804071666,58.02204051089584,54.31379683443714,27.48259941575187,52.55809385921817,51.009362222167596,39.46608662262683,51.2882578602458,57.61938410989135,53.74984419733864,66.73611811273976,45.4475195833438,51.68484967947006,61.35790378470335,53.40799793085068
|
||||||
|
13157894.736842105,65.33051521372349,64.54581476310449,101.39453396258922,82.1989485288756,69.99954431695951,92.37778505030761,53.489512749841836,65.90184293982477,58.01873857391756,37.75656481033547,64.60210800154387,51.54042098185156,51.49881111311409,69.99729087472045,61.28976902142786,59.26768115029729,81.32731635404178,50.51788533190033,61.01289577361124,66.07327938235656,56.682834893857176
|
||||||
|
15789473.684210528,62.288964994437485,74.67267434872753,117.30094707945051,85.57247418495427,70.64805491359445,99.1536963618363,76.71144264556695,70.18010282099578,77.18331596618546,46.548501034006684,66.02018492836042,62.126340740605414,68.03360012194292,73.19627934867654,79.54463746351219,77.18552978357451,99.40059083489666,64.95080359490625,85.92186557210552,73.2946830179305,74.03522284188597
|
||||||
|
18421052.63157895,74.93735160680689,107.81465900012412,139.2873089840207,96.71375098380769,86.87840321203927,113.47289029651239,93.44800781175867,97.10603002875715,96.94276465686075,51.13459859087733,84.31004567019167,85.57612551139911,87.22768531824332,79.04176998198281,80.70959805724033,93.1210169256394,116.47493457860234,79.01124874019474,122.10848017792293,73.69862018011696,113.93861476468165
|
||||||
|
21052631.57894737,97.10230527558156,93.96989148475457,96.52644659277475,116.53272218007461,113.20687297011347,174.57018046960278,153.29147939602757,141.83370591646417,106.18938247516876,56.92739289635767,88.59680833182506,98.30280876638487,116.02657619374611,102.99297806893028,95.20292315324589,94.353535294863,122.6360879416611,110.15239549921492,138.70916615166493,85.8180380409114,147.08891463263214
|
||||||
|
23684210.52631579,116.27735660887656,116.03497409178692,128.7576511145629,148.53811679045745,147.7007118017307,180.4411057644934,218.819252871658,178.99188914985868,144.46769188926493,102.97105440930812,107.9235355442911,124.23589051595356,148.52493570150597,141.86054870983654,114.07920847511193,115.40575793876708,159.42236704381384,135.99380406923075,180.29462874637417,116.24597945662829,188.09331493859807
|
||||||
|
26315789.47368421,131.7258633593113,170.21886141296898,192.91515870132275,187.39992579083034,202.23182970912833,207.65967708380268,238.20913633349198,208.33421997944734,208.33317792514686,153.72142048283297,151.02752303053467,181.4257410114987,205.62764731066048,184.45465234458612,191.9764393842121,164.1790215488616,241.4987096171465,155.8979871799741,238.7906318729604,140.21512545286123,200.8675222978037
|
||||||
|
28947368.42105263,158.33178354729574,202.37646896315744,227.33339871899574,216.93278682467184,211.331247086629,248.69978439911085,246.6390119787316,253.57222508443027,235.39637340138825,180.44286899309904,189.12914332953846,230.71455120359266,239.18017664613652,190.25308435701268,272.071464554755,170.13196379375592,248.429662619849,141.57710379225892,260.24952784708995,191.00687052491133,233.41085929751725
|
||||||
|
31578947.368421055,154.90293820891682,259.51886878713685,238.00155754465806,230.06350551450683,204.04818964120093,268.90613665128353,278.3264847906343,297.9188012779585,263.6566409047621,233.0199083050203,187.61077770929256,248.62723383299203,286.4701455133114,232.60310746163879,239.4052901964768,236.2464176310728,265.57598176194034,147.03692383069415,277.6510531974962,227.1836707213909,257.47360423760404
|
||||||
|
34210526.315789476,185.3483334431771,288.244503821445,200.98698775078117,262.38887776108334,234.9703059204728,300.94985060984084,285.87677064917756,294.33157443199457,279.39890948160865,259.3397508126547,236.45249789400123,271.87452206742074,303.82141308936383,244.9907631181036,278.33346704069606,268.4523852427911,303.5926700412732,189.8020199536452,273.70698676413116,265.58575844009334,302.29368292558894
|
||||||
|
36842105.2631579,217.15594078315593,313.3082447720697,242.89344419353233,264.4147583865393,269.04264408052796,311.566828934935,285.8760857961845,345.1251253437798,287.88174234070607,267.13349160510745,288.908934617637,254.7768222093582,299.069269646568,258.82332522543845,286.61811474733406,278.37361948642047,313.9588218418845,245.4122034481358,295.88553080961645,285.4122809714888,306.8542738398356
|
||||||
|
39473684.21052632,223.78152848932882,311.36378852218144,290.2786089949661,268.27760689707674,288.8315125849934,331.56831176228144,316.60071037895466,327.69570335721045,335.2528018445355,312.5851707836598,276.15293550821553,300.10106629042417,325.54629257461704,258.3734440263619,323.2678803412703,266.3304482982429,308.70753486351293,217.55363201649232,287.675268513344,294.6170784805455,293.491367597329
|
||||||
|
42105263.15789474,234.71857714785102,314.98354696236817,283.7284783920753,305.4871167595037,327.815720531749,342.18832026294064,347.4369622518482,359.1234421835712,321.55971752705665,296.5348788998464,328.4429737894159,320.2516846670008,323.5348077546857,268.7956971871225,305.5235315637245,316.0435993308176,319.21332096855394,211.6789066732095,343.86831283833516,292.32147242421917,354.51419196696827
|
||||||
|
44736842.10526316,197.38530471809517,334.59399147756875,341.7113641431127,325.66671816942767,310.07803450322547,327.6141084031385,331.2294814979601,414.69330027823304,375.55925973110584,289.038970218636,351.0568013585836,363.8722931092961,352.0182927139413,286.6342829280943,351.55434802859776,334.9405559625817,316.564209680478,194.9518083879657,334.9490503408737,338.65567082843626,398.9040526068277
|
||||||
|
47368421.05263158,273.97604445753996,355.3467957329552,358.03872707494406,338.0551271167819,360.6422533754497,398.6396637395478,368.90517846666216,321.8780714666414,387.91837769185406,400.57232862760486,339.23682612345823,374.71732094908685,346.9451931840495,288.66709963832864,350.67221955265694,337.94357375557075,374.24965236342183,219.90429991178235,365.6490119190427,295.2192700559743,430.01798900672935
|
||||||
|
50000000.0,281.7031864978765,311.4724302511466,355.67379710391947,346.4399628294142,322.5400290457826,406.9658508630175,337.0044558989374,410.9943090128271,380.6061951141608,320.96123184342133,373.0588185175469,321.95514380461293,389.21944143113336,300.4861226395557,379.84359742465773,346.62374156713486,362.50578028277346,193.65573316655662,406.5343745429265,325.86513288240684,355.2923764508021
|
||||||
|
21
results/AcrobotSwingup_ppo.csv
Normal file
21
results/AcrobotSwingup_ppo.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19,trial_20,trial_21,trial_22,trial_23,trial_24,trial_25,trial_26,trial_27,trial_28,trial_29,trial_30,trial_31,trial_32,trial_33,trial_34,trial_35,trial_36,trial_37,trial_38,trial_39,trial_40,trial_41,trial_42,trial_43,trial_44,trial_45,trial_46,trial_47
|
||||||
|
0.0,20.115737915039062,18.025856018066406,11.86644172668457,15.197259902954102,12.585914611816406,14.272492408752441,22.784841537475586,12.306120872497559,21.898910522460938,23.83329963684082,18.866573333740234,17.847206115722656,18.518878936767578,11.115408897399902,17.001426696777344,20.172229766845703,23.71015739440918,29.406579971313477,13.539291381835938,13.235689163208008,9.226959228515625,28.54927635192871,17.407058715820312,27.34342384338379,23.04488182067871,39.098270416259766,21.33762550354004,15.688055038452148,11.3455228805542,18.1380558013916,27.41034507751465,12.818075180053711,38.510257720947266,15.566980361938477,23.278188705444336,4.447911262512207,20.364103317260742,19.513654708862305,13.808625221252441,15.358587265014648,10.231523513793945,12.558725357055664,18.58182144165039,10.572517395019531,25.525001525878906,12.722334861755371,17.007261276245117,9.125801086425781
|
||||||
|
10526315.789473685,20.99003336489861,19.695332581450817,13.650125059936393,16.44222663773786,13.994926114285422,15.526145540980153,24.344568278676626,13.808757747776312,23.41892435648311,25.006169808192638,20.652298254020245,19.592732201477414,20.191323842450853,12.348489230661201,18.275268850275353,21.965644733428213,24.94986858646646,30.745484584374896,15.153443824417291,15.179515028537518,10.87049962257208,29.597357828340332,18.80037345636089,28.493466136512705,24.846022199297586,40.2290630431077,22.872938856859165,17.53301496731009,12.749183071619855,19.82421601453791,28.9575134141737,15.31352581225666,39.233430306842536,16.845108690968306,24.40671003022208,5.8899246726644305,21.59961258426536,21.441238509418852,14.984651757313914,16.51844825390778,11.679132875380136,14.182769498062886,20.10140677708668,12.74538051319461,27.32918151902484,14.16414267478196,18.750413315442916,10.568386891521412
|
||||||
|
21052631.57894737,36.50103998070691,48.59159159090711,45.24867933996827,38.93564758810971,38.96691085718098,38.30157363365232,50.733868247109605,40.54464261921076,50.08773664391272,45.59309800592486,51.92510816789235,50.117045375180545,49.58327570072584,34.28846075197997,41.19828154004264,53.50697724562438,47.01492937953518,55.01123516096468,42.70081296658087,49.515068548145884,40.79992686232702,48.41331993367451,42.85124744652381,50.58103277092578,56.10343209107167,60.647490753898,50.17434636988468,49.53187714591416,38.43725805453069,49.40967888911345,55.559622110958905,57.82486044066517,53.47086184738085,40.272871666626585,44.7027670638094,32.78702423170498,44.108258088209624,54.76228181128274,37.3025376836968,38.372731013579546,37.38045675720287,42.3322039080211,46.4966869555682,49.76637190268723,57.09628902780217,39.63602644892363,48.574570999849705,36.17749075414071
|
||||||
|
31578947.368421055,46.0092408909176,59.06723302288027,61.75543097333359,54.06830382067452,54.1453695506002,55.76161229071773,55.80993557464424,55.86212922269453,62.277380621550904,55.397391830093724,64.94820228066305,63.43323923434825,63.92296468014533,47.377278103031365,56.06188437200237,68.88896825408094,60.25719345096819,71.86782118096082,50.415891036638925,66.34984256197129,62.23151603073831,61.85701596101236,50.38871411986422,74.93790714135213,67.12088183792427,75.12919006255196,64.6519909787851,61.705406404691146,58.42517961904283,63.907506613932824,62.6379648724318,66.48832262732563,73.37214997660503,58.467918055250706,60.22513375056707,56.99836651269873,63.12220958281835,70.08047386622083,61.24544590806532,60.20770052682165,53.021089184172766,54.50503218504201,58.58654492207058,58.421589191992695,56.701568782174775,53.18032981471838,57.52358756825287,52.00068682149425
|
||||||
|
42105263.15789474,59.13485917382954,72.17071119858949,75.03923971897348,64.62930561403984,74.5333023693753,70.78636066297746,66.81600480140742,66.16120931291515,70.89693105402415,63.92526383223296,72.9717496605461,73.9231096617568,78.44358913802705,60.161263377877816,66.75150536487803,83.56791378223335,73.9409593853924,81.67364461765395,63.01517705379926,74.84088675888292,67.8074348117341,79.59179036116997,55.047547126080524,86.96195082783369,74.80049916746874,87.89306677791221,71.05857437849045,76.18306966039283,70.32247327470384,77.84671556049767,67.5417610318542,73.66021137587582,88.09372345983486,67.23367932042586,81.32396608797467,66.39543448665135,84.7797823220574,99.1923789553695,79.29654767036108,71.71791404791156,70.21733166974998,75.53018094545587,79.44781452912703,70.86087699469768,69.44502858252076,66.41357255039783,74.20543114019563,70.53072454757638
|
||||||
|
52631578.94736842,74.12287566691141,92.64629672880483,98.35459462101275,88.42794013766371,96.37462676194731,84.01929215036353,80.2651875079545,74.68868268136751,80.8093271000192,72.3610943320344,90.27917632181972,85.8752041417187,102.15085274796324,75.22330158195585,87.20856945148547,108.08583835836427,90.08596844217867,97.60818125385987,75.36059574764273,88.06487334977804,73.4886192490148,100.20308109401748,65.2900314744758,110.40513653584712,91.05952323318931,102.92840158103493,85.53425837933521,96.25011465700544,94.34933309777126,89.77800156518693,82.14671428522244,87.53234132480424,99.30892465901341,80.14131626309285,97.00569510594052,76.72960226142835,101.06439402280505,110.5411619077594,105.80331898488909,88.56557575759795,88.36190002539564,93.97652771625245,103.29841013285757,87.02789230644703,95.21211416321778,84.44847093526677,92.478146303022,86.37305143285656
|
||||||
|
63157894.73684211,92.73054026104408,115.21297844810996,116.61447448087368,119.51207331070611,117.07241269566347,103.58924159890564,89.34486535274091,85.97095979374532,95.73118289669466,84.2034599781862,104.42320336698661,97.53755253895993,132.9138573489031,96.8023282341102,119.82758588672017,132.77700927779284,101.01236790381476,114.8423849349207,107.29050684156843,108.45178906768149,95.26287550390428,112.98168701932372,88.4342502980929,127.90862654157789,106.39067165322416,115.4095493593457,101.79841552484895,111.7681924914967,138.4894709360484,111.48859915603727,100.6182267592802,104.87999526822007,117.74069062473868,94.47991004771968,112.1836414605049,89.69569966477701,121.403965453527,122.9033747895355,126.88489477856504,115.16950885906115,116.83623353447611,107.95046465057582,129.01982703078488,100.58142067397071,120.92605304412565,100.28352709824524,105.70647929257964,115.73200970867501
|
||||||
|
73684210.5263158,129.43767368155753,161.43375981588775,145.28661221261666,140.03078686258138,147.42051968758622,139.65465338953317,106.03460404712327,105.5526431297414,115.70200272967364,99.8605795783143,119.94506746751368,103.95190989659517,173.9106457938861,133.24997963442615,166.92250320034677,142.66956609945714,116.55016722408358,136.39818662987028,142.75602436503215,135.23853294625673,110.84562988213969,126.41170682597112,104.26585467921318,148.8353853734659,119.82041712371515,130.0881805175972,114.85789213424492,125.0251404775353,185.33347453643412,131.78167202125545,128.23394026671255,129.03586922488964,132.88233660198645,106.99317895036985,126.02903215345667,117.00723915088955,144.5281678812986,133.92475179007013,159.23491072770304,139.8584267906948,132.2037467723119,128.39707942643406,155.88648075893153,112.5340477782521,141.17353151734517,113.2994548214646,122.78248973258893,158.44374976556884
|
||||||
|
84210526.31578948,167.84364720791,197.72169350818254,173.0057276863801,151.83991206253665,176.96259821815173,166.11068757287023,129.32439358957586,123.10738529624041,134.20174325404076,117.60739936260636,135.4960705743602,120.89349277643616,220.5485154284665,162.94731330706472,192.5469118663809,147.56028175816311,133.76977655844675,152.95613575410977,168.39320200434022,163.15280230190615,130.81318299905746,143.23171177002862,117.75827022685239,160.66686938343946,137.35922040437399,149.51469685199189,137.7903189041608,135.27010727027778,212.40673679261988,147.95112377058436,145.02937692651457,145.20685848734055,150.42192583863425,122.89223735715544,136.64024815764122,161.56927404278204,163.52099033537993,148.13309601436362,195.5809310717596,169.01824574985665,153.59108119327936,141.8396488378583,188.92313787547505,124.75621543861823,163.1779825476068,131.64064526458856,142.79359726694483,194.8275684035716
|
||||||
|
94736842.10526316,210.42991110975558,198.45931177786514,187.79628941792365,171.31469841198248,194.83413372616027,180.90297067268568,144.23602922046118,142.41991722980035,144.13573494371946,137.22669442443802,153.5328669202955,148.71281150647974,238.0424058758486,182.07556616108337,211.91917187970267,150.74389963446396,149.74523880310005,162.77277383905863,192.87233389158658,182.4646193817299,147.23400476620304,147.70000912112872,142.7666377428496,169.2730219198231,147.91166343843344,157.28189860049048,165.90790930922836,150.55306552069834,213.78126109826928,160.69546859936372,161.18234937776325,163.71650310995835,161.46304166449073,139.97138275416603,144.067942897864,189.80113415812193,182.53297036939875,155.79055231826126,217.7112299536569,184.98418330749976,198.83440633312156,156.23077577608444,213.22766363496925,137.05515959631867,179.91230441778487,146.2535017409731,156.93443005884947,209.7809814709623
|
||||||
|
105263157.89473684,232.5801353111987,210.83904519561585,185.72850215682692,166.77719458904622,220.5477372796747,186.13173794531755,154.62417247726316,171.639151733833,156.0919977452121,152.20878486760435,166.50943799329266,164.6489291205961,230.354676880995,199.02276820026935,227.88404665041168,161.5140254497528,156.42884391166496,170.94303089933382,202.43936624355263,195.69182442396962,151.8656806564397,150.89317305026952,157.27811861170295,170.85700879161377,158.96333008690885,162.69854021972236,182.72345238635083,157.13774660477347,231.25600339724087,167.04195160210298,182.64008852591806,186.90907099141307,163.65269184962864,152.0107054392552,150.6032050322958,209.0794615457428,199.3099481070471,163.12400664203386,227.6026524288007,189.57571642402135,234.09679511701302,159.58689975259708,226.87821574942558,148.78214669962338,187.18196412986995,161.67987710476913,159.57432784424924,222.24016834684025
|
||||||
|
115789473.68421052,232.4767570370122,227.47387753493072,221.4324579790771,176.83288685890776,234.15810001680725,204.70220843469338,168.00981230790265,191.91092183504904,164.94040890029757,163.26578362290218,178.55722537492287,174.70932164920333,235.0951385550882,207.00482358327366,230.78202790302583,170.0275976103511,168.32032679033742,172.83196632646622,216.13220818018814,206.37882833042468,163.6619249382095,161.52165259297038,166.10232201532313,172.0077243709003,165.81229848416723,166.13864536637084,205.7490314993997,169.05401633283603,235.73331700430518,164.95262678264746,198.03975982770035,198.40192957020201,168.92522583303358,167.1581140057367,157.39720997433088,223.2544048594058,211.23558287665125,172.75166822569523,234.62923694787924,185.95312652260148,241.36291810828894,169.29496041798856,231.95262842586166,154.46332546350368,195.42994595432546,172.01909221923418,171.5661897031059,235.71806607517178
|
||||||
|
126315789.47368422,237.74994240986013,230.10673851649847,232.0332693685455,203.61070237843288,231.33603592922813,213.18060231572042,177.86646551240514,196.20678927759715,171.51279136133988,168.28731757600553,190.10289658899123,196.0843304899261,240.50558058178655,225.33390156582124,242.52187639326272,169.10735171620536,173.91888784256008,179.15063322878282,237.12494802904263,221.69332591021163,172.60797658422317,161.9300862458274,168.13524186396532,171.44152466015802,173.38684486121022,172.51443457966698,222.59329681482342,168.43196232365108,235.13038362773173,168.6263231489797,208.86748438148948,207.31945915367464,175.9803743030556,171.90525147161986,168.97818181290194,226.2786681936058,218.48649966980943,173.63634192662886,230.521205268243,198.45611639019526,238.67826682477775,178.15388675145496,235.43788943511956,164.73387055846135,202.34781084579114,176.35829869003507,176.69225537446727,229.37513830605636
|
||||||
|
136842105.2631579,233.96077022942973,233.57024914315204,236.953998254318,215.01258751741736,210.6179108208979,206.91990264829175,183.19354167671415,194.60672727813352,189.21258608877164,177.08455648349593,186.23701474376,204.11578502702085,227.1077566847329,230.73563459119308,244.78455359144058,176.12253371260834,178.5201465171295,191.41994825207792,242.48000501492513,216.2777831280743,183.38279897717558,151.465997012492,175.3276890827926,179.37439983662148,181.3717215355496,177.4659062167449,233.24426814777536,168.2825952687422,245.84654184589758,178.3651224168384,199.3093997997095,217.85347828442372,176.7597097006118,189.74696836820766,172.33770708236173,237.67759244511333,219.86177831619895,176.5656075977883,228.51614144630216,217.68078957493947,242.2092498444289,185.11801623233137,242.91128908828355,168.1520120859394,206.40260213563977,181.49276373124684,182.61431055447898,239.44464660305894
|
||||||
|
147368421.0526316,256.51880521804014,241.71404973200814,234.906671063474,209.11119381179438,228.09892030700095,215.71303982054428,177.96827881744034,195.77171165097784,196.96541973312807,190.28528231357603,195.9916115343736,207.09509632759148,260.1160818288036,243.36757541891612,258.0664409867119,172.79624328454776,189.19058571215152,191.8728628038039,256.5213067617443,217.49362493403402,189.09811883050318,166.03922176435384,174.54450867950422,173.7067621001081,193.08833490473083,172.38552387896667,237.30479419008518,174.81907766099783,244.16008792210815,169.24418460860477,206.07602961704012,230.3289359059195,187.0870441900066,190.30728083039915,177.13153561446146,238.642289617798,234.11118456944206,183.11726231852398,239.80736518434543,217.398306659962,249.2747647403185,180.47463434530096,241.80599107388974,171.72352066421442,216.21951583382827,180.4773247144559,186.8997847115069,234.19994371857007
|
||||||
|
157894736.84210527,272.9973498724834,243.1036166697328,261.6509548093391,213.13572285487382,230.28795180067297,219.1862002463387,183.6934405563198,205.36197634619194,199.28625227205475,185.94168791690856,186.36315266552722,217.57129649541386,256.69623324696045,238.36543072760105,265.8871036479843,169.3632280185117,192.37370787259616,193.00019719404196,256.69457285424016,220.6536148156238,191.82427747716864,184.12602674866153,182.59406876060442,177.43932466984953,190.99999609956944,183.9662900581502,249.08488995505502,176.15458430968988,255.65864079682947,175.84686073636084,210.93730536990714,248.55298014005797,195.45184930069294,199.28714069063645,178.15323248059795,242.20888770117818,238.77259016264014,180.63334881281094,249.43664083227392,215.1137567411004,251.17790990828476,188.20038219997429,261.4252625612176,175.55021674398571,206.84717373227974,181.15893837215168,183.7629431553825,252.19052241515587
|
||||||
|
168421052.63157895,259.1583409137673,255.5661911132263,257.94790812410474,242.10298211884964,243.11837709775594,218.0022984436014,199.01301015513096,220.67573835050632,210.96419598125024,176.52650947187746,187.1175006334141,225.03193622084535,257.1772800205157,256.822484069914,265.4721050222825,181.43566013637343,187.25192161147945,194.24676667025875,259.4355525600613,217.5936264093563,192.92899700677296,178.79031517763218,181.99175078188614,172.8004459733778,191.4733243061235,179.66103118938753,245.02469644586134,182.6064309948369,264.6468594595996,182.09044258085976,213.68353560294472,237.48761900111907,200.2271955244429,202.40501485240756,165.66363645128266,255.85006360069866,248.6541079358711,180.98582668846004,254.56865756267325,226.68538590745584,247.6424540403477,183.63210459627274,258.27513554089614,179.6110638185221,223.96018838882446,182.93256146425685,185.29863220833016,263.12861523998083
|
||||||
|
178947368.42105263,258.06713463510505,244.9406214075323,263.7968395977304,250.62336762666374,255.03041817033224,223.93611429545027,193.11754804337784,217.43485930219416,206.5314167065964,189.4498057149949,202.3198976505166,218.37617022789746,266.9521903900889,251.62246356004658,263.03261585983546,183.07188943948938,195.5042264837142,198.92969755200963,265.52481169730345,218.10426938913537,194.84532397758431,187.38965299956686,183.18688851574285,179.09966613005568,197.977317718175,172.01068189843375,240.85967522075302,193.39580420245755,264.09188218435423,185.41872074915267,209.17137035496347,261.1699341705631,199.00650791728927,205.38099097933136,181.95772232982097,254.6185787270604,259.1494016842581,177.24149271338104,253.6255093890957,231.1086173299442,265.8514873854754,175.05004589296774,258.93855079059125,180.03134696610746,232.49636560467968,200.62069254197243,191.10873224674995,256.4807274173997
|
||||||
|
189473684.21052632,270.3865470932461,263.0419140245115,272.65751714323363,263.33790659458685,249.44910713955966,221.13681178715422,196.35509520092168,218.32991277651445,219.14375945810136,188.4439731325304,190.02247040159486,227.96949156029072,274.48225778308273,261.7003117061718,258.26886217514897,191.30007911108208,199.55322449978368,205.34929706498858,260.18214532592623,234.30578638972338,191.1086660107582,185.96507701690507,178.2452777721545,168.45371006829586,199.1645234662245,174.9261985173516,246.687340644423,201.2480795907677,263.272754644423,189.08681122987554,223.08584084883952,256.167954095678,201.11285711762977,211.75162286309325,176.2175536471886,254.4283483848182,257.3701615817329,186.08323909040965,271.4703980806462,240.12019909955458,259.5929720286187,177.4243450528862,277.00845293968996,194.79623739830015,222.00446155923225,189.9446571152296,192.65399237658178,268.82115934727267
|
||||||
|
200000000.0,267.91880712854237,272.7931202320676,269.27073893421573,265.1939502734887,262.8962555229664,238.81900832449136,197.94460576145272,227.63914077454493,222.20485477698477,191.7891308155499,199.79672703460642,231.65580000375448,270.34225262309377,266.05554832125966,267.3007045215682,189.98927299207762,206.35346199572086,208.53994776386963,258.68447832998476,238.90278542512343,201.8495815561006,192.9409442669467,189.4685658028251,178.18097492503492,200.4002200655247,182.2914140883245,250.54050380697376,200.9682526831564,272.3693185354534,193.97698507810892,224.60126796835348,253.014526331111,201.97456855758242,214.89438538488588,178.21431248517413,255.79509671189282,277.8395784528632,191.38983692228794,274.0556806169058,236.4217993193551,277.13766993974383,187.90420866561564,265.36823686800506,191.60781301401164,238.6506689810439,203.23852259783368,198.87188944926388,262.5358409685524
|
||||||
|
21
results/AcrobotSwingup_ppo_3.csv
Normal file
21
results/AcrobotSwingup_ppo_3.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19,trial_20,trial_21,trial_22,trial_23,trial_24,trial_25,trial_26,trial_27,trial_28,trial_29,trial_30,trial_31,trial_32,trial_33,trial_34,trial_35,trial_36,trial_37,trial_38,trial_39
|
||||||
|
0.0,8.45626449584961,6.79600715637207,11.114153861999512,6.566476821899414,6.975578784942627,6.019253730773926,13.187922477722168,9.610929489135742,11.058207511901855,8.138980865478516,8.241827964782715,4.533395290374756,9.133914947509766,7.11447811126709,8.011873245239258,4.775976181030273,6.504958152770996,6.739677429199219,8.172922134399414,5.260425090789795,12.010283470153809,9.059353828430176,5.858153343200684,9.182978630065918,4.886148929595947,8.927388191223145,11.782123565673828,7.7661285400390625,4.323923587799072,6.703610420227051,5.1463422775268555,5.8211894035339355,6.207094192504883,9.287851333618164,4.452250957489014,7.079854965209961,11.736565589904785,13.737770080566406,8.21053695678711,4.320318222045898
|
||||||
|
10526315.789473685,10.009982198703819,7.293472492314071,11.877441951233656,7.262770368534443,8.175056939375771,6.525961906669812,15.0992217638099,11.253482130142007,12.444622852003661,10.052993200853509,9.659487795159531,4.875226873812882,10.655476437958985,7.828629460843626,8.98008916088976,5.087962464837053,7.872664695200601,7.204090728963677,10.022698724797188,5.604066545400418,13.609666500913903,9.635663715315019,6.117543101947951,10.573231855280946,5.345820382029758,9.715044028156662,13.370590102866583,9.290915342156165,4.673137092231476,7.865581077889868,6.138127724366608,6.715619771580091,6.9514799458993775,10.871857570412047,4.848607417775209,8.174443679482488,13.325478092246172,15.441990286988979,9.627932193072468,4.6575822993246145
|
||||||
|
21052631.57894737,38.18205327278525,19.23387150066051,27.439888810857596,21.833078328581976,30.819563517909383,17.949183065774452,48.52724730865282,40.91852479502352,38.39768967936409,44.74188149710111,36.17542252962031,11.96534862320798,38.02968546550029,23.339854307730693,28.29620426913873,12.897475992104974,33.08549053936041,16.578491293120912,42.640002246523004,12.87019920368212,42.09351143154246,22.3604550088067,12.348915678487673,36.006850027332185,15.366465796755211,25.753232260077286,42.08869685339961,36.92182561045208,13.977509982445358,30.24490268212689,25.195678819043156,24.949501106427814,22.797075661069552,38.86162909277176,13.576542959752341,29.2741754908125,41.7144245714816,45.663469470831004,35.18268819978858,12.325642516088372
|
||||||
|
31578947.368421055,58.013910893144455,50.67936909048099,52.318839514963436,48.60969466486962,54.174062730109846,44.707791336788866,63.013155839212466,61.40298973524381,63.01978642646213,69.40506853351056,60.11700600976965,25.726848224358548,56.3552013331111,54.772534936294996,55.98441710273563,36.384902458379976,54.828825582066486,32.82055692039327,58.67891034716096,29.794846534517465,58.0189348286704,49.43809348515073,31.231169633041052,56.670483489982644,36.94967785506533,51.05759884649243,62.64895246372948,56.07791274976904,44.89888386500592,56.481478592767864,47.505170420637754,53.405977329007065,52.64539229819278,54.378019095364955,34.875119445644266,52.9066852514743,58.851417333991826,62.362998697710665,53.04047671239049,31.59892114180649
|
||||||
|
42105263.15789474,67.16482135628729,66.14724005350115,65.99457742203636,65.88242422774888,68.29126735712683,64.48606450158144,74.34230360743742,73.98893932622556,74.48689122923193,82.14153906645207,66.12357135699067,40.220219802328096,69.01546774073981,71.62547107474296,69.84803427850771,53.96573019952325,69.30090123423248,52.09925923882429,69.07723416970047,56.2727919710475,65.64607713394219,64.51118986568623,52.50437225335358,70.869917626196,54.837937330192474,64.74198234696303,77.85259279897669,66.13422684326066,64.48479921327403,70.47406649861969,61.79015246921134,66.47448670236687,67.8938870813873,68.55993982167125,59.7832056796204,59.947047325547715,70.41620238756869,73.93636627548949,66.73692670167318,50.14127413821188
|
||||||
|
52631578.94736842,72.39728682882402,73.85714769179718,73.07820563909915,69.2525891040831,78.57301801383248,79.44638966436696,88.61256873840853,82.33217617165596,81.37845979053063,88.33625116806033,72.74142276889894,46.68622040593806,77.03961924779283,79.56834631786782,75.30002029632267,66.7609730666075,78.83269765351865,65.02206354806009,79.37277631721668,68.89787210047328,73.30938844172248,74.4511260131472,64.50051780402083,77.34975363889973,67.60310479676625,74.65888580021517,88.3820510723543,77.59928844297444,74.43479855170789,78.77381094419229,74.61289349243746,76.77708783921194,77.53904222583506,74.23028400833422,69.04162342464578,68.71021679328584,82.05520417446327,80.31058287550373,75.7045200116664,62.083673207465964
|
||||||
|
63157894.73684211,77.96186640771968,77.88192279314896,79.90939156508843,74.59772101593646,81.95450827012928,91.32023482420934,99.8219248004815,88.71310677616715,87.05812763003762,96.66909369991427,83.85186222373119,58.91816984942581,84.70048816729121,84.30572490179473,82.699403670521,79.1708958555952,85.94157966649433,73.81358258946285,86.92019426967134,78.1754470913364,82.84876727397423,82.02697752816526,77.95594730424254,84.60909789255781,76.16912031656653,85.08030606839301,98.90300199393586,84.50312371704717,84.08359830816202,82.87125030126928,82.58972449869,85.47248331339736,91.35298261479824,83.54609760419154,75.5701546082041,75.47427564335167,90.62633216851636,86.56694207903917,83.08580821203559,68.41902974780412
|
||||||
|
73684210.5263158,85.20590856124407,81.63994845240235,87.5536801860851,80.02211912042381,88.93895388714495,101.66903585119178,103.37438984995404,95.74795153291272,90.27874880733995,104.7106697629322,94.23625790080123,70.3899351770982,92.9502840445395,89.58890648277017,91.02809318582273,93.11215345229222,93.00044250232361,83.37840738091774,96.38188635392781,84.69166377052623,90.95690393782223,93.58000030876279,85.33535254897339,89.766755722071,85.47888010135566,94.94427861931665,109.41930468729245,94.07833133751419,94.37431791011977,91.07855648976697,89.65548677730098,92.03973163144245,100.83050526488687,91.16842168468435,86.46374962338533,81.56014853678747,96.1543835915233,97.22680180177217,89.64854993021059,76.84670810420015
|
||||||
|
84210526.31578948,94.58850524333045,90.83613501906065,95.3034353195135,87.10750257011266,99.6640167285861,110.56034452267961,110.83371086530077,105.54086266825404,95.57707602049835,116.70126111685735,103.4113903040701,84.48521418122374,100.21657412633341,99.15831680079907,96.7834836779539,106.77713621603816,101.31314210036454,99.64253775168655,108.07471593595278,90.08850997554299,99.46355560842974,101.27142288116868,91.54180394860185,94.90197676321147,95.64812839146796,103.65011057777748,115.76212588935017,100.13870888882397,108.95919690683608,100.59892966476504,97.21905508364998,103.31965357593552,107.68338891591392,96.90379751999953,93.71759573095723,92.03145708189116,106.78001212793045,110.79734578076493,98.82696552520974,87.42132459667582
|
||||||
|
94736842.10526316,101.88429206826433,99.8938707395191,99.14809043627037,100.76287327209421,108.37623480725982,121.46893862669984,116.83811802307655,114.04406868684985,104.81740577085527,127.59927797798387,108.38569930503326,100.743737881286,111.05183225240651,110.1534130271741,109.64443115089243,124.00553335309276,108.91868194906748,114.74200134024726,123.8274421608514,104.30303329357315,108.79121520411522,111.31312831736832,99.55750597928782,105.24583764570325,104.61805018256328,112.71411208948766,126.85815563199428,107.85342714006798,124.77597780818755,116.60481493026431,102.22960813126818,116.8053509888763,116.2450770734296,105.78968097283982,102.0391513479466,104.50903597449332,119.26716255988936,120.32318122658126,106.96064208208524,96.76329541963801
|
||||||
|
105263157.89473684,111.88603721228829,109.70143845173791,109.40784566178712,120.98175197472368,121.3114516633947,134.780379322262,131.72081355721667,128.20252870361725,118.03989737272428,143.36412465539334,113.22776764160709,109.64102217435341,121.19535848263062,116.03230664678888,131.2139862133938,145.4731281469073,118.53652354707513,131.00931832082878,132.2579038516306,121.75001301436873,120.51721003816729,122.21878272422958,110.75283410728308,113.11012512098719,116.92501035761966,114.78063356546154,134.47534718399564,117.2748983751291,146.00184947318317,129.52482397912115,109.84680408870909,128.9956928406148,125.41471709348158,114.95474209576598,111.84281066906254,115.01606822203732,127.06839895293818,128.95356210814452,115.09946180492062,109.78053243115669
|
||||||
|
115789473.68421052,122.60972126728115,122.95534448994329,122.96286662279981,137.06600378061594,131.99550881515415,142.2984627663594,149.21274357902047,139.15723138878386,134.29736175092964,156.59085089756675,120.22325258631868,115.90000440039958,130.83590036591417,123.52368846411851,150.31279879304842,166.98742200747915,125.53196502524399,143.90746519378183,144.42663199704273,132.56288756764496,133.69792656292958,131.17964799281137,121.72636962244468,121.33427487247704,130.81087104185713,117.4281842420471,138.12385699280907,128.90960083200164,162.3300031091285,139.35922574860732,122.14053971846678,148.77687064605738,132.8697192714733,122.45246475511888,119.64158058870284,123.56158002449452,136.25863906419178,137.66648146757788,126.75669043253168,125.90588279619524
|
||||||
|
126315789.47368422,126.85695797832389,134.28583930073682,134.9243479001225,151.37521788139424,140.21280420883213,144.62339296945245,163.84559288150388,144.1045494399903,150.9614755559166,167.67398843573733,130.45619186470054,124.95830020118619,135.2700953762617,136.77639995189256,167.81090206386642,174.77412622919374,129.76628504144517,157.2553264000079,162.37243566073872,147.02536539803583,145.03258706905837,142.08734372233423,135.1896556891563,140.27134785609235,133.7065102471209,128.25321889782217,149.95095576820614,141.6027406969229,172.91866216071756,148.55347630918192,137.87239560063858,159.6526409143226,140.00874730028275,132.2734392104552,131.14642235521137,132.78090331660084,142.92996688034395,143.44610237332262,135.26064672826732,140.25254454870304
|
||||||
|
136842105.2631579,128.25246481388973,147.71527992709028,145.00722832664064,167.41595134613755,143.0661991436313,149.30622178188156,167.95497013938063,157.74698866812972,168.7108649037304,168.8626981082055,144.16069731974866,144.00697861783146,141.85380575077357,147.8173775117731,186.94637737057877,178.88245540795398,134.71520058674497,160.6708919626359,157.9662813112924,167.13078010102885,152.3738460931669,148.7167743189761,150.05542535526766,161.2204795634565,139.43851340584808,138.57832816342238,162.37730958261153,144.69840190070488,185.15795489204558,157.26213668315697,140.91641295286428,165.11060669390778,148.6120298835462,141.3425099924826,146.77010574301195,139.72182381524604,152.89198275868088,146.2648896236086,139.31816132329507,156.75999156156573
|
||||||
|
147368421.0526316,134.77661006123736,163.22143277872632,154.96791705687292,174.7445341323056,142.4531365738681,164.0062487229416,179.1334203721909,168.04421263091123,177.22472287858952,169.5565973941309,156.76209875817446,166.15327856340897,151.50470790894408,151.55741512362647,217.21657504624278,191.6683819564756,140.26085027639556,172.76058940511,165.78387243604067,189.32316427052518,157.58407029996619,156.04412420238484,167.82283924796576,179.03455921239802,152.783394252577,145.01445177692787,168.14986296902072,155.1739957870869,191.07194103180868,167.6409816995387,143.29448019120832,183.1490840509823,157.3619156405536,149.21267782139316,160.39338348389003,139.87974457364334,161.35556538423674,155.4813307823402,145.56563454932453,165.64636754015476
|
||||||
|
157894736.84210527,136.23639003713706,163.8631174365817,158.57466104486312,186.48805579481197,157.06537044461083,165.02104080292986,174.38983147746143,176.514878952561,195.1106327815482,174.77758008406764,162.44861303546423,182.54323082369783,163.26382600988708,159.11155832379313,222.54600961968674,218.71034922205183,146.62281801747648,185.4798412069142,173.8299399800908,200.39737295604974,167.57263379971243,157.7886441031734,184.49155916145634,191.88277577264157,152.08469013423965,154.23577465372898,175.35115161922005,165.82630178419012,193.5049134458201,168.76205435952485,150.4369752606444,185.74660883517808,152.67771580134237,161.93311256384916,167.23404639410344,141.37671673277242,178.96127353303157,167.4840652854324,156.21128387885412,173.81255692685247
|
||||||
|
168421052.63157895,139.6767140788715,179.29714142912974,163.9306062951973,202.61394998175285,160.75181686250787,176.68706361672886,181.61770352366227,186.2762466203473,217.93236788223987,180.73991708081846,154.53878704530712,190.05474930060538,162.52324899345885,168.75508809816145,244.9614926282719,231.2320996479975,152.12870720747105,224.8866804504659,174.52597083411388,210.20824376169665,167.3110150438927,162.59094451272918,192.76136448310683,197.26422471881244,159.32390785679593,160.64695354313733,186.7008671133142,173.6902160287894,209.8485705872322,175.39101470210215,154.86256685415464,192.97556720480034,162.95017685256175,165.04581404326694,172.13883195821597,143.9112652087806,181.38266882011433,165.1702125488556,168.67916444464075,179.65576557307364
|
||||||
|
178947368.42105263,152.1773001259384,187.78848363975078,172.71605956950677,228.25923802313548,167.60074508706123,176.77090451740492,188.96848370110064,190.18331680783274,214.38406492241367,177.1231710149228,165.43429649301348,201.93324024909256,171.46795913209712,160.94297722192516,247.65322664264497,242.92557070959472,158.65025637928304,218.67045822657045,181.4624049275123,235.76302255780908,180.5963450416386,167.0881583495401,192.86693630282898,217.72460795464278,167.95777068602908,170.51874512293662,180.6173929217532,179.95675759958593,226.86208948687008,182.95824959977347,169.5371000146767,204.63095265465927,158.99093024312955,165.78888877627756,164.25471335954944,149.21484890316003,174.30290365846534,163.73526394705692,173.40510318636728,190.7973366211326
|
||||||
|
189473684.21052632,151.5611848667718,192.523295115566,172.48407511384204,240.28192562244607,168.5187837294099,177.28882083998494,192.08789655318552,194.6066968843052,216.42897035928644,187.1353163027367,171.6872317234565,204.17419042299989,174.06555168078876,180.1811453005779,237.52306122206915,247.26534311989337,175.63854791203363,232.8222674091767,185.56134612829402,243.0356346525644,177.98689555725562,165.72059787541545,207.72896230344628,219.45322499298322,173.64358805239695,169.4350698038812,188.87591009894567,180.77158077156113,242.5699783759269,186.196442212507,161.08386867313834,207.74302595465798,168.9096205596118,172.26485912439895,174.6773919341802,153.4163679908187,181.72981270718444,180.64700451295133,165.20710539751767,189.6273595542954
|
||||||
|
200000000.0,169.8640814752955,194.23091325242268,190.5104549174246,252.9276318848133,175.20456174408136,186.4562118947506,185.4500541239977,200.59314903381625,227.82707963883877,193.05572496433007,181.82379878273136,205.98679436351122,177.09725610206002,181.77721870337663,241.6123682872245,256.39578675125773,174.02710151907644,235.72361347392984,191.75352107615848,246.22038053681976,183.59360968439202,173.57362318980066,213.383493435226,221.69393600994036,183.60875355648366,177.18748693874008,184.21683073592814,184.46073462147461,232.9164292961359,198.74460786737893,168.4230472523915,217.64467298827674,168.59258995009097,170.57408494149385,186.11799173370787,159.83466629291834,190.49950953612202,193.4700266232616,168.05141248828485,201.34765830400744
|
||||||
|
21
results/AcrobotSwingup_ppo_4.csv
Normal file
21
results/AcrobotSwingup_ppo_4.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19,trial_20,trial_21,trial_22,trial_23,trial_24,trial_25,trial_26,trial_27,trial_28,trial_29,trial_30,trial_31,trial_32,trial_33,trial_34,trial_35,trial_36,trial_37,trial_38,trial_39
|
||||||
|
0.0,16.36858558654785,26.00448226928711,16.071184158325195,18.28192138671875,13.429677963256836,17.10512924194336,25.6998291015625,10.222268104553223,28.574752807617188,11.341191291809082,30.163766860961914,20.912904739379883,14.828292846679688,15.408313751220703,20.8553466796875,14.672196388244629,13.907516479492188,15.634739875793457,16.230104446411133,18.430156707763672,17.961626052856445,13.207015037536621,12.6388521194458,18.22677993774414,35.21895980834961,11.414321899414062,20.768640518188477,16.839088439941406,19.221025466918945,12.757458686828613,20.066017150878906,13.08200454711914,23.689924240112305,15.852598190307617,15.245919227600098,12.187602043151855,13.174694061279297,28.60873794555664,17.09139633178711,14.216440200805664
|
||||||
|
10526315.789473685,18.11368068621863,27.425026894472232,18.38610494032069,20.559494980381302,15.357595737471435,19.554993659495235,26.761648597376333,10.970705721973651,30.780688186410888,13.062509208275237,31.809527941909813,22.208616002811084,15.833878229993946,17.418129372144755,23.389531875251073,17.08550101876455,16.076059786835042,17.629000143648494,18.291182536348042,20.612136182021814,19.07975352904824,14.941778097541647,14.993891973799183,20.705034495576434,36.287913665020646,13.346746479426809,22.259029439503426,19.006784267496535,21.414071171314887,14.07391811784233,22.40518033229991,14.151989958437362,24.98784505076954,17.72551954455591,17.327674104739238,14.617211791113991,15.160476124466209,30.06212252066819,18.816248280065874,16.63664920676056
|
||||||
|
21052631.57894737,49.808253384466155,53.586484389952346,58.33789627190029,59.641477115844424,49.485490342111966,62.088607374362006,45.23082691309277,25.395185544209717,69.2804608328108,43.39822179718856,60.78165504336357,46.121952212335664,34.171111139316636,52.522181122545724,67.04657731302227,58.607984166705876,53.388078167085176,52.57056698946081,54.04979319342121,58.269837626047085,40.3598502209601,45.84551030737954,56.20714531717125,63.3390201349379,55.6848138251298,47.73393425372251,47.87616282530191,56.20832159549741,58.82206584021986,38.75625651626954,62.57557550233652,34.6567980071473,47.78055875756652,51.22342188313728,54.576655816276485,55.85619692370254,50.49176067109749,55.432243933534856,49.919930236347,58.87255255747948
|
||||||
|
31578947.368421055,71.2328965874425,76.72366700503058,71.72805332285134,72.38856534550477,68.78785730122365,77.75055297775778,54.84982752621463,45.98318301807894,84.81863231211901,59.27591451101645,75.08017226686273,67.39146763946954,49.83687174013736,67.73219686420505,83.50066093080923,71.85027076681648,65.2021016405271,67.83170830018798,67.88532795560988,71.00286569293598,62.64011187081605,63.95127428807009,74.29570469978444,75.51093908382833,70.50078880433973,68.12104493177334,55.40099380311874,67.08796066650476,68.0606789353234,64.09864922682046,74.35191240774628,58.24304416154478,60.30808506197093,70.66172751338529,76.4867134548084,63.85158081234038,70.85282582199392,66.78655990795168,72.24478695335233,76.44440740997028
|
||||||
|
42105263.15789474,77.7484224071298,96.2619023380022,86.77329450399922,92.2800974454576,87.50333584684084,89.44790268332343,75.83912219697419,79.6533371911815,95.03514255702991,74.54084875618322,88.62358675141745,83.9772437308964,70.56895016633243,80.37723935789678,109.88468526258363,88.13595094674181,78.81073993708618,77.67221483043356,82.76332022839966,83.84221494841773,72.35994575855804,77.1283459156504,89.5836734703208,84.03249544491398,88.31363126008135,86.18312306582433,64.09375353281352,79.1559498618019,80.94900068368278,85.278437561193,87.46399147631058,72.42653136628157,78.06582603477705,79.88587121339386,85.17665191551986,74.68685175177133,90.40675565476563,80.20776065507094,93.7273579443756,91.01352395261755
|
||||||
|
52631578.94736842,97.90770206716005,111.82454926939552,108.36010775632558,113.60709602313524,100.94954457148455,102.09600476088863,98.63393407428529,114.47409346051658,114.82926711629963,91.64805373960172,107.6202433730922,98.91310043760944,97.9633287821285,91.6397987453685,123.41990384670011,108.68151090450976,95.34269344441365,90.40612532687567,98.38066887498479,95.47824429171649,86.86412951463726,90.836374701865,109.58662343318278,97.78240419253501,114.7920796329006,103.26096448824428,83.33933468191907,91.29743300586773,95.64397068965633,109.01332511391666,103.03634391085262,85.95805003169995,91.38664265323709,98.16425843027078,96.35277655951865,91.66977146538258,125.90220136954926,96.41779016323615,109.98729364992921,108.88566012965344
|
||||||
|
63157894.73684211,125.37047808326349,132.5711681602982,126.87471587328535,145.0350278463225,114.64886130157271,116.68497737391836,126.1461284182904,146.35200817961444,140.7719584568881,113.85298913750292,121.29458211638922,121.52078314802489,132.52744425288364,107.74238071431745,135.83552353360648,129.40495361544583,119.79203189694817,104.5109738485057,111.8080031797909,108.76287958859737,102.42907586697892,102.36826832072391,126.47751222930623,111.86739099979236,127.14862887866775,115.09438136628626,109.29610119264709,107.71489048606803,113.00789435707301,159.81973507323428,117.95406236336503,102.72584452374821,108.89689731081933,118.5749325989191,120.51059561625249,111.65244195513283,158.31589792202385,114.68582999219196,123.05018297337266,122.84147329504636
|
||||||
|
73684210.5263158,144.71100891346416,147.196440286212,145.5022155543031,153.94363329607035,128.68522016495632,141.15200766760063,153.16975309466557,191.6228309517257,152.46652183006842,128.5352684340525,135.55142759376946,146.13146848563838,179.11930449521608,128.338865160261,152.4205584544637,143.08971354154835,145.0637107935226,121.55526656797801,128.53096226072378,122.18272159366231,123.49836732246952,111.95723559379248,146.287317784952,128.33978786837528,141.19485306091752,132.82848383967155,130.23896851291536,127.90642033754706,130.97265689103887,211.09225135067493,140.1532788498745,119.35181612364556,124.72892990404962,136.54365653005993,144.2544150934903,131.2036046145737,187.63218898749253,134.87938210376413,150.14396719002988,138.15857934600925
|
||||||
|
84210526.31578948,162.0370020480037,156.31898723018466,170.17524223215364,158.96548778264477,147.21586082483594,167.06266810821364,168.66515395515844,222.6042258329999,163.89596786789616,154.3754300787178,148.28315644977496,161.6299791445032,211.47772916598333,156.22074531254017,162.48690242483346,158.1717726279494,166.50903633856046,139.71684627420686,145.973302042055,136.40780240891712,144.3153340080108,128.3483490357769,162.1167041198699,155.554084555264,152.28493956317533,158.2822235135821,166.36073634730153,141.41227091250326,145.14944021853714,240.56001150211802,162.55430589686472,149.41755262702458,143.98615096645673,159.8707130866698,164.31333764438153,153.59446034768283,214.87280916375138,146.8226820620143,162.96231645626375,155.4091572279415
|
||||||
|
94736842.10526316,178.6098961453276,180.3776782914542,187.94196749604475,175.1137714721572,167.51043468783934,197.65437052623884,183.60058132156607,248.15268314458493,176.3391928693842,167.40053468439058,165.85219958891003,174.54120849799418,223.13057357894252,172.2440696009886,171.6474812578957,169.86788164364003,183.04048356186814,156.96150197497366,165.05764635947438,164.15211817560268,166.07471719747434,154.2224998240078,171.74908025558636,171.1352173864759,167.79463098319945,175.25931500257548,216.97324772170873,152.88438665970045,161.1512457246678,263.9758201241493,176.70337209918657,183.45234608348883,164.93694414834073,180.3543908214635,189.62062114246004,162.01816177231947,244.53163251388105,158.1373564490321,170.5537056993497,176.28618925512663
|
||||||
|
105263157.89473684,193.30205027020207,187.1914229847883,203.96110944148575,183.5644382188855,180.48377511458384,217.92043037320437,188.80162024019168,260.39796915196314,191.81029515740283,173.93596558334755,182.01333142606506,190.71595154285762,239.05674876516215,177.2939729300067,186.91102707402527,176.84491001362616,186.19677410883585,164.3617218653581,184.53437045001917,183.20509244670828,205.85819966030252,179.22450909059793,182.29957852700412,181.46296024347276,171.77988297391136,184.23072389874432,245.4784072789459,171.587254687442,176.30843450834877,265.0269311613653,188.4264004807558,217.97755798705728,183.66718326422315,187.72174528612655,220.2040809714893,170.94536307677006,267.03336479327027,172.14859114913398,185.58392228289324,190.33947336203173
|
||||||
|
115789473.68421052,208.79319411928964,188.5515183116673,214.68912284286728,189.80226731093967,200.99899175387503,233.08334664822945,201.32957168336225,257.29808450166206,197.17796779661296,192.7586928189131,190.24551466990707,194.74039885104528,247.5697243989257,182.7494370709083,197.44390861273143,192.5252884511637,199.58557758061343,173.26205946096422,189.7186690824844,193.52964714462573,257.2164222328534,208.11338950400537,188.8945580527723,197.64554594944224,181.77447867963122,193.0287127659509,257.6411816591701,184.85294553505418,187.84853991527638,274.019204659145,181.5535320074522,234.9126296684435,195.239675157123,192.50984263556322,237.40363958160137,181.16123714510587,281.62209652492214,182.80345032329043,199.7007162364566,191.43039679382812
|
||||||
|
126315789.47368422,215.17213121295967,201.6638237665565,217.10458251478929,201.11104144184876,202.3109144380218,235.62390698455377,210.5578905895476,271.0791172423191,197.943549037475,204.26847384758602,199.08479770052136,196.26545097590153,252.2816290018301,190.39628484176467,200.7925093481085,199.14967165379642,212.5621515621439,188.94016080318727,202.14251602125302,202.68086015682803,282.91423865896843,225.25598983223088,191.99741405843037,208.46533837311816,196.57091622785188,203.0233711468216,276.11628966978714,191.3800815208467,195.658297057132,285.8831648093512,190.9419632189162,248.15049343772873,202.1314541157593,200.4752658915982,242.11983166160346,187.66282315151844,270.4795668808707,188.63239177921142,200.02009251540386,197.85975267979578
|
||||||
|
136842105.2631579,217.98853788786977,199.50312690065343,224.58337993837296,208.9769738179082,201.75919132896408,237.96610568327588,214.3017990535481,269.23628770842777,203.11322987975342,214.43410531173453,203.61582658706115,211.0856882412183,261.94065957708375,200.47173371163927,205.69781590882596,203.31990171622206,208.64185875484984,214.56072741309364,213.57002062117294,198.83915595549294,289.17342066162183,233.29456474535024,195.0289080642432,206.31307422148885,202.4801899294609,210.67321051298086,280.72116640069805,208.26695025499674,208.2685022002855,287.9528258196865,206.90146983045457,247.507406615567,205.81075941488518,205.34690878047013,254.87218656343438,201.3848158490955,279.7956237606395,201.0043864201476,203.50824197127878,208.17130551171437
|
||||||
|
147368421.0526316,217.32419527509867,200.58219213952978,228.61363123080739,209.80447670553198,201.222953930621,237.39189137671133,223.35014302066818,272.90075761905337,216.9850845644844,214.35539995352647,208.8383002987859,209.1085995929393,273.30098894725546,213.81537856256534,212.79977858793043,201.63605250381036,217.41520763442458,221.49273532662036,223.80436662698057,218.16479455855085,302.49030731127203,235.43395312638165,200.28105959742024,215.59501777271487,208.17055271396677,217.70824264159162,286.52214212447325,210.8696490934846,208.64572556188892,289.8288936274864,218.72212303890086,253.16180579079486,206.12732411297733,206.6611341719317,261.24226328342576,204.50120856012333,273.27725328649507,200.5985948860315,201.80964809301156,213.60618488502965
|
||||||
|
157894736.84210527,226.49780440796943,207.24315012999685,235.65792930423387,216.60826363722043,210.51183623339662,246.26078926357042,224.9306315092829,283.4974892593487,216.7543588270771,216.01218530515058,209.7497131654843,213.69744228850277,267.45722396700666,220.66192130278023,206.0893927935996,217.75918784176216,212.0598593994851,229.30866086994843,222.2335027193346,223.07983601749604,296.1687280665309,232.0055078167747,199.23900616932607,215.60805316054754,211.86143905027586,221.95405283743656,281.6259924049523,212.99003534163464,215.70163428019785,281.1735813626787,217.48441232105205,263.0578509477119,219.5334513043763,213.5692440741115,265.75040038330405,200.06002417883715,270.0052004317828,215.17445203381234,203.44633494395958,216.50068448000997
|
||||||
|
168421052.63157895,223.93587985923745,212.07874561610976,240.97792695565897,220.25464743128114,201.85987193630672,257.9197292466573,231.85746126491938,284.3861946518071,224.42235904321117,213.27050948076962,215.80219595320008,222.14729864181245,288.94469209919345,223.15203127438343,205.0707641955558,215.25038784890953,209.04970478681315,227.22810439191696,227.75368749732127,220.06600590888152,299.0494781174488,245.81559792515975,208.89824395760937,216.78852146814404,209.43846228644458,220.00116854889572,288.96192830685436,217.41571646582057,218.8857263006332,279.92003510203057,208.23940556399378,275.61640013221887,223.18092598281078,219.0619147140894,279.9290393990493,213.08295235739521,273.8928890016931,221.08530729431195,208.0619827835844,226.94234414576164
|
||||||
|
178947368.42105263,220.84286781637786,222.79287762696393,233.476701383239,235.1781182960295,215.92675599996568,258.2066027607944,231.309206453717,297.90250441432,234.27120057524405,222.58885262502034,215.86338138093248,224.88938328099547,290.6832793721532,224.4027044708709,209.12194377076594,227.27354518809148,224.1860935392885,234.3348136155394,223.93597238975218,219.984420466745,296.871845642533,239.51992024720707,210.27114336064648,232.16097224402625,211.57943688960452,223.45880453474305,295.7942003047202,227.79182380499768,218.21277280552235,291.1328999962338,220.58823350864435,283.095428308374,224.1877421786663,216.26136203403289,275.3371057023302,212.07490917730695,281.80039588318635,215.6809128863412,212.09627136150556,227.36656261592526
|
||||||
|
189473684.21052632,228.9371860816538,215.13666229416458,246.25167450886684,233.39958843233842,217.31005046935624,265.44211602987014,234.30711191793557,293.95242045097405,232.69408592201995,220.64381109058363,219.52441634299683,229.41705037145732,285.4059974689893,223.51765812929318,206.89673686448558,226.15606300066383,227.87982908650778,239.24564065794536,242.57317224293536,217.44256504973876,306.3944620587489,245.83614297058443,215.22088968819858,234.96357834421696,211.93833487482942,222.2622428366352,300.1891358423761,229.86779704169885,223.61005206368966,296.5044463980231,224.47793868794997,287.04360817806213,229.73564116900317,218.09183779334097,285.510653839217,205.06777701194596,295.79533367507014,222.75202131692393,209.32378203056527,223.2976482120082
|
||||||
|
200000000.0,233.3627727023865,223.80396291692006,239.77410919650606,223.2427730474033,219.3700780766575,259.3557613209674,237.12765725113843,296.86745795607567,228.5117539614439,232.06042272323057,221.5428994470521,226.27285162084982,293.36829774160134,233.21333471097444,214.7782752443301,227.2409575769776,228.57191316234437,242.49128405279234,246.86727137393072,217.1853632675974,298.617594296995,249.63688149028704,223.22638544048132,233.73226100755366,217.80707008979823,229.38289530888983,298.8386672107797,234.02891624601264,230.70007269006027,302.4543564429409,220.78411024965737,292.27454152703285,233.72480676127108,225.3258987429895,292.12308772614125,214.41391163515416,291.9945796179144,223.05400930266632,211.99844315491225,225.65526270787967
|
||||||
|
21
results/AcrobotSwingup_ppo_brax.csv
Normal file
21
results/AcrobotSwingup_ppo_brax.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4
|
||||||
|
0.0,2.501397132873535,4.15701961517334,18.337873458862305,4.919705390930176,2.8577992916107178
|
||||||
|
2631578.947368421,3.2738337275294356,4.904298884312116,21.022863421344052,6.145146936096513,3.1349880342441203
|
||||||
|
5263157.894736842,4.046270322185336,5.651578153450891,23.7078533838258,7.37058848126285,3.412176776877523
|
||||||
|
7894736.842105264,4.818706916841237,6.398857422589667,26.392843346307544,8.596030026429187,3.6893655195109254
|
||||||
|
10526315.789473685,5.591143511497138,7.146136691728442,29.07783330878929,9.821471571595524,3.966554262144328
|
||||||
|
13157894.736842105,6.363580106153039,7.893415960867218,31.76282327127104,11.046913116761859,4.2437430047777305
|
||||||
|
15789473.684210528,7.1360167008089395,8.640695230005994,34.44781323375278,12.272354661928198,4.520931747411133
|
||||||
|
18421052.63157895,7.90845329546484,9.38797449914477,37.13280319623453,13.497796207094533,4.798120490044536
|
||||||
|
21052631.57894737,10.08382865989156,12.913037713421017,39.85648699235498,16.609374490550213,6.9925580420776425
|
||||||
|
23684210.52631579,13.508844910606154,18.91235886818092,42.61463658099896,21.400993012455537,10.89474831506806
|
||||||
|
26315789.47368421,16.93386116132075,24.91168002294082,45.37278616964294,26.19261153436086,14.79693858805848
|
||||||
|
28947368.42105263,20.35887741203534,30.911001177700726,48.130935758286924,30.984230056266185,18.699128861048898
|
||||||
|
31578947.368421055,23.78389366274994,36.91032233246063,50.88908534693091,35.77584857817152,22.60131913403932
|
||||||
|
34210526.315789476,27.208909913464534,42.909643487220535,53.64723493557489,40.56746710007684,26.503509407029735
|
||||||
|
36842105.2631579,30.63392616417913,48.90896464198044,56.405384524218874,45.35908562198216,30.405699680020156
|
||||||
|
39473684.21052632,34.11003416511966,55.13150480301365,59.32188230615698,50.270359410071066,34.29788977937087
|
||||||
|
42105263.15789474,38.419113092778026,64.9932810773975,64.81999924831223,57.13242481361356,38.02704271778726
|
||||||
|
44736842.10526316,42.72819202043639,74.85505735178135,70.31811619046749,63.994490217156056,41.75619565620365
|
||||||
|
47368421.05263158,47.03727094809476,84.7168336261652,75.81623313262274,70.85655562069856,45.48534859462004
|
||||||
|
50000000.0,51.34634987575312,94.57860990054905,81.31435007477799,77.71862102424105,49.21450153303643
|
||||||
|
21
results/AcrobotSwingup_ppo_brax_full.csv
Normal file
21
results/AcrobotSwingup_ppo_brax_full.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4
|
||||||
|
0.0,2.501397132873535,4.15701961517334,18.337873458862305,4.919705390930176,2.8577992916107178
|
||||||
|
26315789.47368421,16.93386116132075,24.91168002294082,45.37278616964294,26.19261153436086,14.79693858805848
|
||||||
|
52631578.94736842,55.655428803411496,104.44038617493291,86.81246701693325,84.58068642778355,52.94365447145282
|
||||||
|
78947368.42105263,139.32360786356423,205.00362737868966,130.39425794466547,115.773391299734,111.61672259631911
|
||||||
|
105263157.89473684,144.93495417791502,214.09120973474103,151.21790558727164,161.61130390041754,170.95907509326935
|
||||||
|
131578947.36842105,162.14931891388014,201.09987498113983,154.43530250693624,166.92701495425743,227.34414487031466
|
||||||
|
157894736.84210527,189.28393236586925,248.40014506954898,182.80273702740666,175.95200299118696,217.2686989432887
|
||||||
|
184210526.31578946,187.62173461914065,259.6754150390625,215.6973571777344,208.6976623535156,238.82362365722656
|
||||||
|
210526315.78947368,187.62173461914065,259.6754150390625,215.6973571777344,208.6976623535156,238.82362365722656
|
||||||
|
236842105.2631579,187.62173461914065,259.6754150390625,215.6973571777344,208.6976623535156,238.82362365722656
|
||||||
|
263157894.7368421,187.62173461914065,259.6754150390625,215.6973571777344,208.6976623535156,238.82362365722656
|
||||||
|
289473684.2105263,187.62173461914065,259.6754150390625,215.6973571777344,208.6976623535156,238.82362365722656
|
||||||
|
315789473.68421054,187.62173461914065,259.6754150390625,215.6973571777344,208.6976623535156,238.82362365722656
|
||||||
|
342105263.15789473,187.62173461914065,259.6754150390625,215.6973571777344,208.6976623535156,238.82362365722656
|
||||||
|
368421052.6315789,187.62173461914065,259.6754150390625,215.6973571777344,208.6976623535156,238.82362365722656
|
||||||
|
394736842.1052632,187.62173461914065,259.6754150390625,215.6973571777344,208.6976623535156,238.82362365722656
|
||||||
|
421052631.57894737,187.62173461914065,259.6754150390625,215.6973571777344,208.6976623535156,238.82362365722656
|
||||||
|
447368421.05263156,187.62173461914065,259.6754150390625,215.6973571777344,208.6976623535156,238.82362365722656
|
||||||
|
473684210.5263158,187.62173461914065,259.6754150390625,215.6973571777344,208.6976623535156,238.82362365722656
|
||||||
|
500000000.0,187.62173461914065,259.6754150390625,215.6973571777344,208.6976623535156,238.82362365722656
|
||||||
|
21
results/AcrobotSwingup_ppo_long.csv
Normal file
21
results/AcrobotSwingup_ppo_long.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19,trial_20,trial_21,trial_22,trial_23,trial_24,trial_25,trial_26,trial_27,trial_28,trial_29,trial_30,trial_31,trial_32,trial_33,trial_34,trial_35,trial_36,trial_37,trial_38,trial_39,trial_40,trial_41,trial_42,trial_43,trial_44,trial_45,trial_46,trial_47,trial_48,trial_49,trial_50,trial_51,trial_52,trial_53,trial_54,trial_55,trial_56,trial_57,trial_58,trial_59,trial_60,trial_61,trial_62,trial_63,trial_64,trial_65,trial_66,trial_67,trial_68,trial_69,trial_70,trial_71,trial_72,trial_73,trial_74,trial_75,trial_76,trial_77,trial_78,trial_79
|
||||||
|
0.0,89.20394897460938,80.8363265991211,90.47622680664062,83.14705657958984,64.39078521728516,80.56387329101562,85.53324890136719,70.99172973632812,87.89069366455078,75.88204193115234,84.02256774902344,68.24169158935547,98.6713638305664,90.78836059570312,69.00027465820312,74.3541488647461,84.44625854492188,89.91865539550781,69.98278045654297,73.83918762207031,80.39730834960938,78.81390380859375,75.34542083740234,78.56448364257812,82.18145751953125,93.45423126220703,83.18026733398438,97.29779052734375,76.18155670166016,80.51220703125,96.50995635986328,86.4296875,81.37446594238281,83.57403564453125,75.21205139160156,76.11017608642578,93.61534118652344,84.02777862548828,79.27843475341797,97.04215240478516,78.34747314453125,69.44991302490234,77.99539947509766,80.59425354003906,82.23249053955078,77.73381042480469,75.1167221069336,78.68003845214844,81.82601165771484,87.6048812866211,74.93865203857422,81.61906433105469,74.98188781738281,79.41499328613281,86.32952880859375,81.18024444580078,83.0518569946289,71.0606689453125,86.39849090576172,73.83191680908203,80.37627410888672,74.10311126708984,79.9956283569336,74.34557342529297,74.59795379638672,76.42047119140625,80.4571762084961,87.75067138671875,71.04902648925781,73.02156066894531,79.36551666259766,79.2263412475586,94.48149871826172,84.76939392089844,92.80410766601562,90.59996795654297,77.6960678100586,80.04590606689453,72.02660369873047,78.71775817871094
|
||||||
|
10526315.789473685,89.20394897460938,80.8363265991211,90.47622680664062,83.14705657958984,64.39078521728516,80.56387329101562,85.53324890136719,70.99172973632812,87.89069366455078,75.88204193115234,84.02256774902344,68.24169158935547,98.6713638305664,90.78836059570312,69.00027465820312,74.3541488647461,84.44625854492188,89.91865539550781,69.98278045654297,73.83918762207031,80.39730834960938,78.81390380859375,75.34542083740234,78.56448364257812,82.18145751953125,93.45423126220703,83.18026733398438,97.29779052734375,76.18155670166016,80.51220703125,96.50995635986328,86.4296875,81.37446594238281,83.57403564453125,75.21205139160156,76.11017608642578,93.61534118652344,84.02777862548828,79.27843475341797,97.04215240478516,78.34747314453125,69.44991302490234,77.99539947509766,80.59425354003906,82.23249053955078,77.73381042480469,75.1167221069336,78.68003845214844,81.82601165771484,87.6048812866211,74.93865203857422,81.61906433105469,74.98188781738281,79.41499328613281,86.32952880859375,81.18024444580078,83.0518569946289,71.0606689453125,86.39849090576172,73.83191680908203,80.37627410888672,74.10311126708984,79.9956283569336,74.34557342529297,74.59795379638672,76.42047119140625,80.4571762084961,87.75067138671875,71.04902648925781,73.02156066894531,79.36551666259766,79.2263412475586,94.48149871826172,84.76939392089844,92.80410766601562,90.59996795654297,77.6960678100586,80.04590606689453,72.02660369873047,78.71775817871094
|
||||||
|
21052631.57894737,89.20394897460938,80.8363265991211,90.47622680664062,83.14705657958984,64.39078521728516,80.56387329101562,85.53324890136719,70.99172973632812,87.89069366455078,75.88204193115234,84.02256774902344,68.24169158935547,98.6713638305664,90.78836059570312,69.00027465820312,74.3541488647461,84.44625854492188,89.91865539550781,69.98278045654297,73.83918762207031,80.39730834960938,78.81390380859375,75.34542083740234,78.56448364257812,82.18145751953125,93.45423126220703,83.18026733398438,97.29779052734375,76.18155670166016,80.51220703125,96.50995635986328,86.4296875,81.37446594238281,83.57403564453125,75.21205139160156,76.11017608642578,93.61534118652344,84.02777862548828,79.27843475341797,97.04215240478516,78.34747314453125,69.44991302490234,77.99539947509766,80.59425354003906,82.23249053955078,77.73381042480469,75.1167221069336,78.68003845214844,81.82601165771484,87.6048812866211,74.93865203857422,81.61906433105469,74.98188781738281,79.41499328613281,86.32952880859375,81.18024444580078,83.0518569946289,71.0606689453125,86.39849090576172,73.83191680908203,80.37627410888672,74.10311126708984,79.9956283569336,74.34557342529297,74.59795379638672,76.42047119140625,80.4571762084961,87.75067138671875,71.04902648925781,73.02156066894531,79.36551666259766,79.2263412475586,94.48149871826172,84.76939392089844,92.80410766601562,90.59996795654297,77.6960678100586,80.04590606689453,72.02660369873047,78.71775817871094
|
||||||
|
31578947.368421055,89.20394897460938,80.8363265991211,90.47622680664062,83.14705657958984,64.39078521728516,80.56387329101562,85.53324890136719,70.99172973632812,87.89069366455078,75.88204193115234,84.02256774902344,68.24169158935547,98.6713638305664,90.78836059570312,69.00027465820312,74.3541488647461,84.44625854492188,89.91865539550781,69.98278045654297,73.83918762207031,80.39730834960938,78.81390380859375,75.34542083740234,78.56448364257812,82.18145751953125,93.45423126220703,83.18026733398438,97.29779052734375,76.18155670166016,80.51220703125,96.50995635986328,86.4296875,81.37446594238281,83.57403564453125,75.21205139160156,76.11017608642578,93.61534118652344,84.02777862548828,79.27843475341797,97.04215240478516,78.34747314453125,69.44991302490234,77.99539947509766,80.59425354003906,82.23249053955078,77.73381042480469,75.1167221069336,78.68003845214844,81.82601165771484,87.6048812866211,74.93865203857422,81.61906433105469,74.98188781738281,79.41499328613281,86.32952880859375,81.18024444580078,83.0518569946289,71.0606689453125,86.39849090576172,73.83191680908203,80.37627410888672,74.10311126708984,79.9956283569336,74.34557342529297,74.59795379638672,76.42047119140625,80.4571762084961,87.75067138671875,71.04902648925781,73.02156066894531,79.36551666259766,79.2263412475586,94.48149871826172,84.76939392089844,92.80410766601562,90.59996795654297,77.6960678100586,80.04590606689453,72.02660369873047,78.71775817871094
|
||||||
|
42105263.15789474,91.83890336714788,83.37862017327646,93.38319495671179,86.11032741058649,65.59998702883225,82.802158690762,90.36925149599601,72.92086281559308,89.91974067405238,78.65103180613627,85.27209212108828,70.35783520844504,100.39711517163674,93.48102061809595,72.1783904838595,75.62877706764395,87.38326875031655,92.81064837440394,72.77644977355351,75.63981741466598,82.29444355113107,80.66863387292689,76.674734631156,80.8799628138212,84.51592516975472,95.8623842300842,84.8222124846936,99.30070928940317,78.54292610308305,82.05789682066838,99.1743735451241,88.77804447440147,83.97995970524084,86.19764531529677,77.44173004791925,77.56665337874124,96.28442318778784,84.5284150889004,81.07855277031743,99.7750480334143,80.49402177736287,71.09090211112414,80.39085019615136,83.74238441467615,84.66842691447596,79.73682998461324,76.04671674904895,79.65702344751672,84.30187994780385,89.92895681869207,77.11726170997045,83.9215963991395,79.65131888701646,81.60676160617795,88.64996913257067,83.00430863354758,85.68049400720818,73.09730952945112,88.45758819014577,76.72754542868059,83.23132882250727,75.5819695531868,81.82479689078318,75.70032069745405,76.13718041682672,78.71135311621212,81.74163673432167,90.45945818375517,73.48493777289781,75.21757633792149,82.17086121417232,81.70204255081858,96.63803022173221,86.93000881472949,95.52408690668044,92.92724565573428,80.56230631514022,83.82137228212116,73.79306175654698,82.11033787105718
|
||||||
|
52631578.94736842,104.11502401484306,95.22303870536511,106.92661408773543,99.91605791435616,71.23359775393583,93.23021835246617,112.89994556506338,81.90859728780191,99.37296914700883,91.55161705869573,91.09356363768846,80.216842426694,108.43730430390139,106.02598890738557,86.98507306576188,81.56720607576587,101.06665231362811,106.2842989323604,85.7920153114243,84.02886150527095,91.13310151227809,89.30972842209124,82.86794107031707,91.66766439374133,95.39209312296,107.0818483906326,92.47195277017363,108.63220756512311,89.54442796881963,89.25918819502492,111.58775986963754,99.71892209385632,96.11882475362135,98.4209114720018,87.82969132271644,84.35230819376345,108.7195426566098,86.86085531742434,89.46521252356573,112.50747199855496,90.4946844330073,78.7361881400038,91.55113522443725,98.40936861038621,96.01733257803147,89.0687978720019,80.37951524478288,84.20874764740859,95.83682640570208,100.75670841350181,87.26729513978239,94.64897807338727,101.40596455725688,91.8181005215164,99.46078447479537,91.50253239837978,97.92718229782858,82.5859160866186,98.0508193899039,90.2181342547228,96.53288585113212,82.47189629817294,90.34680154865964,82.01202043968864,83.30836021036447,89.38445735595852,87.72587404290209,103.07956030903966,84.83372653741131,85.44870343936964,95.24082108558649,93.23621102543729,106.68520257208714,96.99620547641966,108.19633406670016,103.76991598690418,93.91596806848582,101.41108043443856,82.02290144495129,97.91619687495428
|
||||||
|
63157894.73684211,116.39114466253824,107.06745723745377,120.47003321875908,113.72178841812584,76.86720847903939,103.65827801417034,135.43063963413076,90.89633176001072,108.82619761996528,104.45220231125519,96.91503515428866,90.07584964494296,116.47749343616604,118.57095719667518,101.79175564766426,87.5056350838878,114.75003587693968,119.75794949031686,98.8075808492951,92.41790559587594,99.97175947342512,97.95082297125558,89.06114750947813,102.45536597366149,106.26826107616529,118.301312551181,100.12169305565368,117.96370584084305,100.54592983455622,96.46047956938146,124.00114619415099,110.6597997133112,108.25768980200189,110.64417762870687,98.21765259751365,91.13796300878568,121.15466212543177,89.19329554594826,97.85187227681403,125.23989596369566,100.49534708865173,86.38147416888347,102.71142025272313,113.07635280609627,107.366238241587,98.40076575939057,84.7123137405168,88.76047184730047,107.37177286360031,111.58446000831155,97.41732856959435,105.37635974763504,123.16061022749733,102.02943943685483,110.27159981702009,100.00075616321199,110.17387058844899,92.07452264378607,107.64405058966202,103.70872308076503,109.83444287975698,89.36182304315909,98.8688062065361,88.32372018192325,90.4795400039022,100.05756159570493,93.71011135148251,115.69966243432418,96.18251530192484,95.67983054081779,108.31078095700066,104.770379500056,116.73237492244206,107.06240213810986,120.86858122671988,114.6125863180741,107.26962982183143,119.00078858675596,90.25274113335561,113.72205587885139
|
||||||
|
73684210.5263158,128.6672653102334,118.9118757695424,134.01345234978274,127.5275189218955,82.50081920414297,114.08633767587452,157.9613337031981,99.88406623221955,118.27942609292174,117.35278756381464,102.73650667088886,99.93485686319192,124.5176825684307,131.1159254859648,116.59843822956663,93.44406409200973,128.4334194402512,133.23160004827332,111.82314638716589,100.8069496864809,108.81041743457214,106.59191752041993,95.2543539486392,113.24306755358162,117.14442902937057,129.52077671172938,107.77143334113372,127.29520411656299,111.5474317002928,103.661770943738,136.41453251866443,121.60067733276605,120.39655485038242,122.86744378541192,108.60561387231084,97.9236178238079,133.58978159425374,91.5257357744722,106.23853203006233,137.97231992883633,110.49600974429616,94.02676019776314,113.87170528100901,127.74333700180631,118.71514390514251,107.73273364677922,89.04511223625072,93.31219604719234,118.90671932149854,122.41221160312129,107.5673619994063,116.1037414218828,144.91525589773778,112.24077835219327,121.08241515924479,108.49897992804419,122.42055887906939,101.56312920095353,117.23728178942014,117.19931190680722,123.13599990838185,96.25174978814525,107.39081086441256,94.63541992415784,97.65071979743993,110.7306658354513,99.69434866006293,128.31976455960867,107.53130406643834,105.91095764226593,121.38074082841484,116.3045479746747,126.77954727279698,117.12859879980003,133.5408283867396,125.45525664924399,120.62329157517703,136.59049673907336,98.48258082175994,129.52791488274852
|
||||||
|
84210526.31578948,139.68291226813668,128.94681896578902,148.62833511490902,143.0844506842608,91.20545182362653,129.27292150440638,180.69630025108435,109.37763755300038,128.53894398070437,128.70190337919462,114.10592263854443,110.60160879080811,135.48891710194854,144.33773898021667,134.58994552765526,102.73288828555567,138.5170013085627,153.4985014977878,125.25831002640922,109.8377665566523,118.19698081217149,117.17608292516414,102.38698935314724,124.64108090230633,126.44978389494803,139.41901772017295,118.71161188825016,137.45320586558856,125.09871083229697,113.46550792241031,146.36297607735582,133.35785752027155,134.72326402832596,134.08462554505329,119.93194249152643,114.8597193193155,151.10453830647006,96.17101366756035,116.89370439476089,149.12408896603742,119.83257702872199,102.80520067886137,134.0615314108183,140.29797241900766,130.82674936463133,125.34865163539091,97.57429407148976,100.09496637844478,130.82673135061344,133.1458388654479,120.9796613688862,127.27521115706568,167.08111139314659,122.42681659989245,131.00404553689125,118.70512334973529,134.2743756131287,116.48327395022741,128.6186399674069,130.0710934009661,136.1661843050715,104.11886874790667,116.35198655654354,104.10131657552851,106.60930670888635,124.35127449431909,114.72998590565125,139.92858233527795,123.17997080013363,117.42721537989758,132.4576990519369,128.53139771607775,136.7404064673136,129.0214081451173,145.61363782182625,138.34311157976822,135.8183032487238,151.20700613101764,108.23916551404713,142.00782020864725
|
||||||
|
94736842.10526316,149.02279099586764,136.57610998092184,164.6677019707033,160.96956070021332,103.9929312065481,150.785844141798,203.70284194811933,119.54370630657368,139.87040346999166,137.98837939973848,132.85119192954411,122.3422370976059,150.3569031538767,158.45940123799767,156.8155975931239,116.47597904231947,143.81473689998947,182.7968600224136,139.25131884196622,119.72180427839449,128.31197073089263,130.34351241159823,110.7685720600714,136.85048847603213,133.66678206644787,147.56072560841233,134.02634576869062,148.71002330105762,142.03985503759142,126.72913121190592,153.0343399083144,146.2002915459378,151.9586572308413,143.96424327370738,132.50580701469818,145.29058568534077,175.3725456277378,103.89115136233352,130.5648075121602,158.17441682873547,128.28624613454613,113.09014164964434,166.25590649195573,150.04429299176937,143.95234511803494,153.97787399710236,111.68246255192255,109.84385671462977,143.25867842129557,143.7543301829795,138.7290616745795,139.03708485699673,189.7936600615526,132.57921824305527,139.743526753905,131.18190321852543,145.60587944343192,138.62451281436716,142.37726821925315,142.12018603159223,148.83558557806873,113.28514030526219,125.89702914948795,117.76063909119516,117.94420665790975,141.89051652400448,141.79923287737816,150.19292287315935,144.54521769946672,130.6520221409468,140.88501145479134,141.67914986894195,146.58651448092797,143.34265334660657,156.88950980114146,153.94998825917284,153.46134356570295,161.87072216839093,120.02551948577371,150.065953343859
|
||||||
|
105263157.89473684,158.36266972359857,144.20540099605464,180.7070688264976,178.85467071616583,116.78041058946968,172.29876677918963,226.70938364515428,129.70977506014697,151.2018629592789,147.27485542028234,151.5964612205438,134.0828654044037,165.22488920580483,172.58106349577864,179.0412496585925,130.21906979908326,149.11247249141624,212.0952185470394,153.24432765752323,129.60584200013665,138.42696064961376,143.5109418980323,119.15015476699557,149.05989604975792,140.88378023794772,155.70243349665174,149.34107964913105,159.9668407365267,158.98099924288587,139.99275450140155,159.7057037392729,159.04272557160408,169.19405043335667,153.8438610023615,145.07967153786993,175.72145205136604,199.64055294900555,111.61128905710669,144.2359106295595,167.2247446914335,136.73991524037027,123.37508262042729,198.45028157309315,159.79061356453107,157.07794087143859,182.6070963588138,125.79063103235535,119.59274705081476,155.69062549197773,154.36282150051105,156.47846198027278,150.7989585569278,212.5062087299586,142.73161988621808,148.48300797091875,143.65868308731558,156.93738327373512,160.76575167850692,156.13589647109944,154.16927866221837,161.50498685106595,122.45141186261772,135.44207174243235,131.41996160686182,129.27910660693314,159.42975855368985,168.86847984910506,160.45726341104077,165.91046459879985,143.876828901996,149.31232385764574,154.82690202180618,156.43262249454236,157.66389854809583,168.16538178045664,169.55686493857746,171.10438388268207,172.53443820576422,131.81187345750033,158.12408647907077
|
||||||
|
115789473.68421052,167.70254845132953,151.83469201118743,196.74643568229186,196.73978073211833,129.56788997239127,193.81168941658126,249.71592534218922,139.87584381372025,162.5333224485662,156.5613314408262,170.3417305115435,145.82349371120148,180.09287525773294,186.70272575355963,201.26690172406114,143.96216055584705,154.410208082843,241.39357707166522,167.23733647308026,139.48987972187885,148.54195056833493,156.6783713844664,127.53173747391973,161.2693036234837,148.10077840944757,163.84414138489112,164.6558135295715,171.22365817199577,175.9221434481803,153.25637779089718,166.37706757023147,171.88515959727033,186.42944363587202,163.72347873101563,157.6535360610417,206.1523184173913,223.90856027027328,119.33142675187986,157.9070137469588,176.27507255413153,145.1935843461944,133.66002359121026,230.64465665423063,169.53693413729275,170.2035366248422,211.23631872052528,139.89879951278814,129.34163738699976,168.12257256265988,164.97131281804263,174.22786228596607,162.56083225685887,235.21875739836463,152.88402152938093,157.2224891879325,156.13546295610573,168.2688871040383,182.90699054264667,169.8945247229457,166.2183712928445,174.17438812406317,131.61768341997322,144.98711433537676,145.07928412252846,140.61400655595654,176.96900058337525,195.93772682083198,170.7216039489222,187.27571149813295,157.10163566304522,157.73963626050016,167.97465417467038,166.27873050815677,171.98514374958512,179.44125375977183,185.16374161798205,188.74742419966122,183.19815424313754,143.5982274292269,166.18221961428253
|
||||||
|
126315789.47368422,175.8350267487467,161.8061472119717,208.03998840503252,207.5285752009487,143.1370844519452,206.5322182061682,262.9093908576754,147.4373956341245,170.21033228892534,164.85177601750044,186.61592005445027,165.94827330141354,191.33288729269253,194.05506209282004,216.3226515594612,164.73235903866075,163.1639558660109,254.12751875640282,176.57977594373298,150.19921869827937,156.79100413562685,175.32154168795353,137.01920083866887,167.96755922241059,154.34686713746214,171.63340669874506,183.2646974514231,178.38948431792682,189.8972654332745,163.70381406535734,173.8379736809767,178.73959604916976,199.60412640399056,171.7256522931138,165.35930926408466,223.84505093299947,233.63456976838884,125.53268858893591,163.4329808375842,184.71556278679344,156.55853860096755,151.22210737757408,245.9729120910498,175.13738877980498,178.04721135668808,228.51011787610867,161.45716021145978,143.55529653386233,182.66647041215464,172.4374555043484,194.01013558954415,169.34834222867053,247.16227268466824,161.06461024977824,164.863327717682,177.76237437347794,174.9926809654094,198.86354254623694,177.32570665150138,172.56719936479988,180.32495410467945,140.36866652049187,153.81399892229288,162.30112614426918,161.6359521892758,194.16625012634864,214.0882600325049,176.0203774171853,198.874505284668,165.01138273653396,164.57930624509783,174.14524410248134,173.87135472493324,180.6116648077387,185.9919314511595,190.8415762522049,200.59258663901994,190.2688025673258,154.66611321472726,173.33574270104108
|
||||||
|
136842105.2631579,183.29983274978161,173.0727801359674,216.70918508079367,214.39322555135328,157.13855433954222,214.39069987064707,270.67638894428507,153.55869303844932,175.8664924361307,172.59143182128628,201.52364371865576,190.70934868883603,200.56669021731145,197.66407412350426,227.41356506832255,189.38843086935645,173.82882073879696,257.70161425254184,183.35052814640085,161.36493550101682,164.00822602770708,196.99270531819965,147.11819775914387,171.6182397150659,160.05605913972136,179.22777714833123,203.69519139390823,183.29306052573583,202.2322266091105,172.59394589949844,181.735483501478,182.28276973319888,210.53330404174173,178.68963055020595,170.37310917549598,234.49380792399813,235.31909705662372,130.89403741421833,164.45482358072272,192.81882298321895,169.53338497770832,172.8083297453016,251.97448472135864,178.44524872328807,182.97007187542576,239.50455742730784,187.1353529243939,160.23789835276492,198.37824181124694,178.1659318175363,214.9165537490244,173.3851120400441,253.15069215725129,168.15481931722397,171.8966351686835,204.4491558967886,179.16848807873777,211.4000658874924,181.25791631630747,175.76387622633592,182.87071291434674,148.8900020321279,162.24375411202496,181.49298202372324,188.01467248890293,211.1743835292117,227.30689331483603,178.57327513041753,205.0726134215902,169.98199046742678,170.54103645709165,176.45757971128185,180.21783999416596,186.0890984127293,189.92965552594688,191.02881626947848,209.23161933094346,195.35254438486663,165.33669807856847,179.98903152150544
|
||||||
|
147368421.0526316,190.7646387508165,184.3394130599631,225.37838175655486,221.25787590175784,171.1400242271392,222.24918153512596,278.44338703089477,159.67999044277414,181.52265258333608,180.33108762507212,216.43136738286125,215.4704240762585,209.80049314193036,201.2730861541885,238.5044785771839,214.04450270005213,184.49368561158303,261.2757097486809,190.1212803490687,172.5306523037543,171.2254479197873,218.66386894844578,157.21719467961887,175.2689202077212,165.76525114198057,186.8221475979174,224.12568533639336,188.19663673354484,214.56718778494653,181.48407773363954,189.6329933219793,185.825943417228,221.4624816794929,185.65360880729813,175.38690908690734,245.14256491499677,237.0036243448586,136.25538623950075,165.47666632386125,200.92208317964443,182.5082313544491,194.39455211302914,257.97605735166746,181.75310866677117,187.89293239416344,250.498996978507,212.81354563732805,176.9205001716675,214.09001321033924,183.89440813072417,235.82297190850463,177.4218818514177,259.1391116298343,175.24502838466967,178.929942619685,231.13593742009925,183.34429519206617,223.9365892287478,185.1901259811136,178.960553087872,185.41647172401403,157.41133754376395,170.67350930175706,200.6848379031773,214.3933927885301,228.18251693207472,240.52552659716716,181.12617284364978,211.2707215585124,174.95259819831958,176.50276666908547,178.7699153200824,186.56432526339867,191.56653201771988,193.86737960073427,191.21605628675206,217.870652022867,200.43628620240744,176.00728294240966,186.6423203419698
|
||||||
|
157894736.84210527,198.22944475185136,195.60604598395878,234.04757843231602,228.12252625216243,185.1414941147362,230.10766319960481,286.2103851175044,165.80128784709896,187.17881273054144,188.07074342885795,231.33909104706674,240.23149946368096,219.03429606654927,204.8820981848727,249.59539208604525,238.70057453074782,195.1585504843691,264.84980524481995,196.89203255173655,183.69636910649177,178.44266981186752,240.33503257869188,167.31619160009387,178.91960070037652,171.47444314423979,194.4165180475036,244.55617927887846,193.10021294135385,226.90214896078257,190.37420956778064,197.53050314248063,189.3691171012571,232.39165931724403,192.61758706439028,180.4007089983187,255.79132190599543,238.68815163309347,141.61673506478317,166.4985090669998,209.02534337606994,195.4830777311899,215.98077448075662,263.9776299819763,185.06096861025426,192.81579291290112,261.4934365297062,238.4917383502622,193.60310199057008,229.80178460943154,189.62288444391208,256.72939006798487,181.45865166279128,265.1275311024173,182.3352374521154,185.96325007068648,257.8227189434099,187.52010230539454,236.47311257000322,189.1223356459197,182.15722994940805,187.96223053368135,165.9326730554,179.10326449148914,219.87669378263138,240.7721130881572,245.19065033493774,253.74415987949828,183.679070556882,217.46882969543464,179.92320592921237,182.46449688107933,181.0822509288829,192.9108105326314,197.04396562271046,197.80510367552165,191.40329630402564,226.50968471479052,205.52002801994826,186.67786780625087,193.29560916243415
|
||||||
|
168421052.63157895,205.9888778148926,197.95220443185346,242.27585374037645,229.92877350817756,201.3382438044799,233.7308674368832,290.61178702594833,171.2707621143135,187.79079740727707,191.07411632709557,232.29201785283075,251.40304134856302,220.8044729853601,207.40489982749617,256.0445701128228,248.04761227610368,201.3066829274566,268.9784077610666,201.21632215273348,187.78481378234986,182.18753470294695,248.0711262919566,176.90307019853196,183.9438369790933,176.20218866105887,198.10063838694563,254.7428497556504,196.45825596397273,240.3647540234793,194.78226263272134,203.0281613170275,194.22603240832066,236.4290188430419,196.70714229873673,182.0284070950466,267.3135290301077,239.57377240086527,158.20646169971562,173.81722267199092,212.26023321997096,205.24530170862033,229.74410170828537,270.6985091935234,192.47675646077894,199.6954621984688,268.1604115414818,247.00093365012773,200.40598084837447,238.12187796517424,196.37514748847386,266.69776600483715,187.5134183681903,269.63889756361203,188.6837065989291,191.20129164021432,265.1613543522325,192.69453564756796,244.54224160041176,195.22499685356823,190.28928300796125,194.21637746848558,180.18536685196648,181.5141567984116,225.63029411360827,249.55610435193927,258.91630720052035,262.8972055007216,190.00298770395344,220.6122901646054,186.18810867190032,186.1181991683149,183.69386154396713,196.6215860751197,199.46599709096049,201.1814751354281,197.09940585436254,232.83689637180842,209.42987146420492,191.95207515548802,196.68333470226986
|
||||||
|
178947368.42105263,213.79684700022278,198.8288263168155,250.43149276962117,230.90171214174532,217.896638375756,236.65636270698624,294.4587490721257,176.63285667486386,187.57181728459958,193.29724587384518,230.94606646924302,260.3358776120474,221.34511089960623,209.74876189862385,261.72908029321405,254.87267819228595,206.71074021308377,273.1983583712553,205.13758764860745,190.70736642139565,185.36037222191774,253.51159158266483,186.40558371069608,189.19434938855323,180.76825314860307,201.1405939431732,263.24197848030775,199.56169173100812,254.0131243082109,198.4519492972054,208.13047395824066,199.29937021112343,239.33103756071003,200.32317230294245,183.09828728244452,278.97962613562714,240.32778331477434,176.6459230968414,182.17326696923936,214.69311995808437,214.47828633994524,242.21870483629345,277.5378851648472,200.5692740383447,206.89749124609037,274.1144909127267,252.68175143319237,205.5812989568178,245.2242848430025,203.29606658809527,274.86423633497344,193.90062481405374,273.906938021079,194.909983373972,196.14358561693425,269.31262475974506,198.0334801668007,251.87542331115526,201.6852127976633,199.2343775468499,201.08143495003645,195.38223021905713,182.93351674937435,229.17010997343095,255.44158414053703,272.101217124735,271.3804964206886,196.94813312227913,223.25253564822088,192.66623032442874,189.39168273144688,186.35477397200805,199.89816141636254,201.3846892843286,204.46537077530104,203.70303273698383,238.78326443728275,213.1463298384936,196.33729682505708,199.53309954816368
|
||||||
|
189473684.21052632,221.60481618555298,199.70544820177753,258.58713179886587,231.87465077531306,234.45503294703207,239.5818579770893,298.3057111183032,181.99495123541422,187.35283716192205,195.5203754205948,229.6001150856553,269.26871387553183,221.8857488138524,212.09262396975154,267.4135904736053,261.6977441084682,212.1147974987109,277.41830898144406,209.05885314448142,193.62991906044144,188.53320974088854,258.95205687337307,195.90809722286016,194.4448617980132,185.33431763614726,204.18054949940077,271.7411072049651,202.66512749804355,267.6614945929425,202.12163596168944,213.2327865994538,204.3727080139262,242.23305627837817,203.93920230714815,184.16816746984244,290.6457232411466,241.08179422868338,195.08538449396718,190.5293112664878,217.1260066961978,223.71127097127015,254.6933079643015,284.3772611361709,208.6617916159105,214.09952029371195,280.06857028397167,258.362569216257,210.7566170652611,252.32669172083078,210.2169856877167,283.03070666510973,200.2878312599172,278.17497847854594,201.13626014901496,201.0858795936542,273.46389516725765,203.37242468603347,259.2086050218988,208.1454287417584,208.1794720857386,207.9464924315873,210.57909358614782,184.35287670033708,232.70992583325366,261.3270639291348,285.28612704894965,279.86378734065556,203.89327854060483,225.89278113183636,199.14435197695718,192.66516629457885,189.01568640004896,203.17473675760536,203.30338147769675,207.749266415174,210.3066596196051,244.7296325027571,216.8627882127823,200.72251849462617,202.3828643940575
|
||||||
|
200000000.0,228.76269212522004,200.69975860789418,265.94194428054124,232.76553176892432,250.08783490289198,242.55966837135585,301.83609067962357,187.0284614325746,187.82120883954983,198.2224617471036,228.9776226462502,277.8194700275597,222.56791405536626,214.33373812193932,272.7415870549647,268.52429546847156,217.10768222318669,281.6527971115552,212.85492263773554,196.56414485644353,192.03486473819143,264.21748235076666,204.84881375730038,199.4451436121997,190.19191891621603,207.10087341815233,279.9664264956587,205.88248565204833,280.25393356696554,205.77266044718655,218.07520352284374,209.03563604307803,245.0758623253358,207.556834312646,185.97427628385392,301.28782076663094,242.17428892380312,212.27580334265767,198.5906684735888,219.33945378190592,232.56141565446006,266.7438325207484,291.0878413970533,216.08499281755402,220.85581793263555,285.8567609312503,264.2189851081685,216.23558863173974,259.08652114201533,216.8688116522604,290.5007953843788,206.37006641061683,282.2129952750708,207.3651767294658,205.7469646550323,277.50303197612885,208.4057634833612,265.99263874872736,214.42054994639597,216.76998138780655,214.36116063183076,225.21096895654736,186.00843726804382,236.20122693263386,267.3553788493338,297.34719662407514,287.768639826853,210.49468800954912,228.8693565916466,205.0969310416595,196.0119051954856,191.50222176940818,206.3447930293256,205.0714834840282,210.94941536454778,216.44774750620127,250.4497663529688,220.29545422702245,205.35989174462463,205.2354086192423
|
||||||
|
21
results/AcrobotSwingup_sac_brax_full.csv
Normal file
21
results/AcrobotSwingup_sac_brax_full.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4
|
||||||
|
0.0,23.877418518066406,9.7648344039917,13.799413681030272,2.478842735290528,8.282493591308594
|
||||||
|
26315789.47368421,20.416343688964844,54.71392059326172,9.031767845153809,77.3991928100586,44.179649353027344
|
||||||
|
52631578.94736842,20.416343688964844,54.71392059326172,9.031767845153809,77.3991928100586,44.179649353027344
|
||||||
|
78947368.42105263,20.416343688964844,54.71392059326172,9.031767845153809,77.3991928100586,44.179649353027344
|
||||||
|
105263157.89473684,20.416343688964844,54.71392059326172,9.031767845153809,77.3991928100586,44.179649353027344
|
||||||
|
131578947.36842105,20.416343688964844,54.71392059326172,9.031767845153809,77.3991928100586,44.179649353027344
|
||||||
|
157894736.84210527,20.416343688964844,54.71392059326172,9.031767845153809,77.3991928100586,44.179649353027344
|
||||||
|
184210526.31578946,20.416343688964844,54.71392059326172,9.031767845153809,77.3991928100586,44.179649353027344
|
||||||
|
210526315.78947368,20.416343688964844,54.71392059326172,9.031767845153809,77.3991928100586,44.179649353027344
|
||||||
|
236842105.2631579,20.416343688964844,54.71392059326172,9.031767845153809,77.3991928100586,44.179649353027344
|
||||||
|
263157894.7368421,20.416343688964844,54.71392059326172,9.031767845153809,77.3991928100586,44.179649353027344
|
||||||
|
289473684.2105263,20.416343688964844,54.71392059326172,9.031767845153809,77.3991928100586,44.179649353027344
|
||||||
|
315789473.68421054,20.416343688964844,54.71392059326172,9.031767845153809,77.3991928100586,44.179649353027344
|
||||||
|
342105263.15789473,20.416343688964844,54.71392059326172,9.031767845153809,77.3991928100586,44.179649353027344
|
||||||
|
368421052.6315789,20.416343688964844,54.71392059326172,9.031767845153809,77.3991928100586,44.179649353027344
|
||||||
|
394736842.1052632,20.416343688964844,54.71392059326172,9.031767845153809,77.3991928100586,44.179649353027344
|
||||||
|
421052631.57894737,20.416343688964844,54.71392059326172,9.031767845153809,77.3991928100586,44.179649353027344
|
||||||
|
447368421.05263156,20.416343688964844,54.71392059326172,9.031767845153809,77.3991928100586,44.179649353027344
|
||||||
|
473684210.5263158,20.416343688964844,54.71392059326172,9.031767845153809,77.3991928100586,44.179649353027344
|
||||||
|
500000000.0,20.416343688964844,54.71392059326172,9.031767845153809,77.3991928100586,44.179649353027344
|
||||||
|
21
results/AcrobotSwingup_short.csv
Normal file
21
results/AcrobotSwingup_short.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9
|
||||||
|
0.0,5.442519187927246,7.250877380371094,5.316642761230469,3.012593984603882,26.74881362915039,4.914645195007324,7.996160507202148,5.320699691772461,3.261324882507324,5.343799591064453
|
||||||
|
10526315.789473685,5.644174656515988,7.24969633249566,5.625666272571089,3.7357252699192722,26.88449832283662,4.996019693746791,9.009333084180044,5.47804449147537,3.4805014495392155,5.569833892941929
|
||||||
|
21052631.57894737,8.953462466370532,7.598145867537944,11.347035630453904,18.652106352718008,28.807274577556143,6.890313509464718,25.63049072421406,9.384062045380226,7.426494656298361,9.482830919088006
|
||||||
|
31578947.368421055,8.93399750942834,12.13195872468838,16.55677825794491,39.66738266376082,27.894621940953996,12.606323327910456,24.811595367365765,21.5401859810354,10.717081848444248,10.664481935914285
|
||||||
|
42105263.15789474,12.02167098843325,20.569740292541372,19.206564270186952,32.33643460785583,35.83131872105136,22.078928839033495,34.11576524054905,31.88515200569111,21.565607800274844,11.563106448696592
|
||||||
|
52631578.94736842,20.660424032875458,24.93170792952056,18.346212369201503,34.67800492172096,36.95568672381857,30.6267119318639,51.32536940444828,35.11178915879568,41.08024024227608,13.877128529514591
|
||||||
|
63157894.73684211,30.760883469082973,32.76651876438358,25.829142973528683,40.84354469460794,43.76368614145841,37.274091985314016,56.419866168763164,42.219812522345634,38.25032584453802,12.0473862822036
|
||||||
|
73684210.5263158,31.90788460505347,36.59270049295185,36.1496786073036,43.6141872660275,44.875909703290944,51.95149808441008,59.59644322438873,44.32102376363986,41.89194138402795,15.68377083752026
|
||||||
|
84210526.31578948,33.9212043682459,41.15845937378849,45.22676409520932,52.531939097718855,46.6548523514885,67.35187538979457,61.9609546426921,44.89437379756132,48.45533976503687,22.591024504845493
|
||||||
|
94736842.10526316,42.18171439431298,44.12119291160369,53.476365922478095,59.74593852897925,49.680326182353944,78.91847413388233,55.83102060103185,44.20250636612651,59.607603587777746,28.704994040363342
|
||||||
|
105263157.89473684,41.435979964825584,47.373546658295346,51.70767327214377,58.71246009668815,59.26576795292775,94.51061926456039,67.09953715233262,55.84967740088536,64.1373477442773,37.469585520986705
|
||||||
|
115789473.68421052,37.20279779570007,50.80912768443494,51.544284165235766,65.1542723333654,76.23925808381671,97.01587872376402,80.90168928722348,61.09613903472587,72.55748604727171,59.20956043313951
|
||||||
|
126315789.47368422,45.41232763292718,56.429375389276125,58.93628879981194,72.3946982409155,85.2578649268256,89.08518029200403,80.16595243062008,55.9072218238069,84.27279865254657,65.22588341388015
|
||||||
|
136842105.2631579,50.56750783006405,70.06576031609504,63.97542938913046,98.98068797912379,86.21042420708902,103.46112701408258,83.73963840891946,74.41341511622446,111.75118494993518,68.69660209977891
|
||||||
|
147368421.0526316,51.51011738379246,77.7657078715903,70.50427223622304,105.10085728545764,95.67980647177909,140.1497882306576,98.64327240865317,75.11959188805062,133.60699487467224,84.40921568986123
|
||||||
|
157894736.84210527,63.674517002503634,80.46387188008617,77.15916907834793,103.84812606916533,125.64939506896812,163.39977499863284,105.63095698041269,80.24119955169198,130.4829300121092,80.44546442943266
|
||||||
|
168421052.63157895,52.84079470099504,92.87587839348495,90.82733714613559,110.50039066832483,151.5876588649697,192.35271416344472,118.3676575187831,89.61545830156004,121.37149931222118,94.50031158369335
|
||||||
|
178947368.42105263,53.86142701058135,105.78510179195213,90.88503478698618,124.75640345040782,176.03556388026294,168.90976162981292,127.29585662622449,100.54407359948614,117.63526607412133,166.20052614479928
|
||||||
|
189473684.21052632,76.84454399244112,107.3315844934155,142.78747741296021,130.7823175218297,191.63129986158037,190.09394032184107,128.6451477573519,106.44492553252924,142.629961631057,152.17465936452396
|
||||||
|
200000000.0,101.57951037781804,101.66482655115817,149.8108610455927,133.21095945490035,173.8824815420728,192.90091792219565,145.66472634751545,146.74551153653547,129.65049538957444,123.61702839089067
|
||||||
|
21
results/AcrobotSwingup_small_data.csv
Normal file
21
results/AcrobotSwingup_small_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9
|
||||||
|
0.0,21.143787384033203,24.950244903564453,25.915971755981445,5.609339714050293,15.463691711425781,8.226822853088379,27.15403938293457,15.241652488708496,12.177453994750977,30.06814193725586
|
||||||
|
10526315.789473685,21.649905536194794,25.03972107546605,26.6945992185582,5.651483880242441,16.190795193232823,9.355019883820246,27.068654280192014,16.348996282101112,12.628627423645923,29.79971093405296
|
||||||
|
21052631.57894737,31.669455244961785,27.967026213405866,43.35570763171215,6.707193571795552,29.360611235459757,30.258399746806422,29.484230133325607,33.82426144935995,20.943016148422565,27.47771813799924
|
||||||
|
31578947.368421055,48.57210915390186,42.19637191437,70.67126506990095,11.437445366019876,40.019626091071856,47.79413811310383,59.46388078912308,29.25864620535658,30.742051522497743,43.449991564052254
|
||||||
|
42105263.15789474,72.46305174981129,69.27248592154638,68.87495037112541,23.174289230577173,57.74981307269299,52.19825482744094,70.67808091524896,49.8877977173662,45.16419863333497,49.7860565035297
|
||||||
|
52631578.94736842,85.89172594496912,110.37495772907938,101.55738578676929,31.246036799588445,82.5718856348638,66.21550888782805,106.44960211665264,83.36450990902419,41.81255452897286,90.76513869763247
|
||||||
|
63157894.73684211,107.42673106446162,111.74988424823391,128.25394605343692,37.251463650625176,114.80390528463596,116.97158008904671,111.08776573250665,109.39052654650072,56.80315453718079,113.87136640843428
|
||||||
|
73684210.5263158,134.15535191258732,128.35023728110042,133.73955943276513,50.6117862775055,203.84148939222183,140.51565167609178,135.56639250204833,133.37613922460756,82.35233525273462,134.46289287361
|
||||||
|
84210526.31578948,141.52107128641282,148.25509043413516,148.76502678169769,64.04725177780082,233.1545941776846,148.75117956304155,150.40423626318534,183.0965331465914,137.91188694780223,146.39757210924355
|
||||||
|
94736842.10526316,149.70425090457925,169.66141951864282,173.02416326200535,89.86290360351514,208.84744904803273,183.46922188122187,152.41621971427568,200.54500967107322,180.4957869437593,152.53179365041512
|
||||||
|
105263157.89473684,174.8547318589324,188.7344498755694,179.4877930622847,122.09386962429308,242.8772793779578,203.94090047371355,176.9090458656612,217.3267324116917,186.3765743407021,171.9555685320389
|
||||||
|
115789473.68421052,175.02431768228473,204.05730722704257,168.40918219609605,136.4573945359345,283.79446858118115,216.3534090325773,197.1148780787833,218.76593329136227,192.03161275357917,182.67082356448978
|
||||||
|
126315789.47368422,190.6295792732873,195.54635549829936,181.375536526009,163.54471097428387,298.94175364105985,238.40326076432282,214.2638809023802,222.29419886818224,198.0300006110252,180.3398503314425
|
||||||
|
136842105.2631579,202.9228182745855,264.156798067268,198.41209203124544,202.50592252488286,335.3950682301436,268.3921438786627,226.49619762933816,242.32804109416182,223.18929271643515,206.17048960300863
|
||||||
|
147368421.0526316,210.0085232195927,300.5169910218907,245.74626547502683,221.50875702970907,370.9656401938679,289.2419595777823,269.252716678414,308.58199695644294,242.11765124692153,230.81278011954063
|
||||||
|
157894736.84210527,231.800024504229,283.51838286332475,267.3845028685731,223.56292527897537,389.66562890304755,328.1059084544552,283.6526422110951,339.47178655309693,251.04638799550295,239.46738760833762
|
||||||
|
168421052.63157895,273.6868914672873,325.50722620427774,263.9479132570388,163.13395302315496,315.46081193200104,327.9694239809242,312.229780736062,354.85282855813193,301.15937278739636,242.6716226336038
|
||||||
|
178947368.42105263,250.56473353579435,326.47424281444245,279.9382224721922,223.14015765583088,372.89498513648056,301.91581754968433,363.51116248288315,369.4901477426208,266.99353717213853,295.51992197685627
|
||||||
|
189473684.21052632,244.91513414107203,354.9988503475929,304.36651867429964,289.7885733717366,326.06483611241606,382.76876090850857,332.3314706288546,368.6001663201404,336.1819409267394,288.0432487126863
|
||||||
|
200000000.0,278.0767317351542,341.18878836223956,310.42929331723013,291.2432496986891,425.37240703011815,358.8474684938004,316.34138662407275,394.288765524563,331.3182063322318,312.505204068987
|
||||||
|
21
results/AcrobotSwingup_small_data_no_aux_data.csv
Normal file
21
results/AcrobotSwingup_small_data_no_aux_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19
|
||||||
|
0.0,18.75823974609375,14.461141586303711,16.01470184326172,18.090566635131836,23.036684036254883,20.02173614501953,14.201720237731934,25.413047790527344,16.639739990234375,18.806427001953125,21.615612030029297,33.70068359375,15.774639129638672,17.438735961914062,8.52690315246582,10.927852630615234,18.090906143188477,30.251937866210938,20.521072387695312,7.866001129150391
|
||||||
|
10526315.789473685,18.95663177940241,14.679991669209876,16.53096974473911,18.640571824437487,24.05478337360159,20.345497325542603,14.395769586041054,25.492619934664663,16.87929535841884,19.66069181928014,22.163282540139235,34.309999930705885,16.753577784384884,17.70605289042409,9.358309808984359,11.85973777128719,18.20400110955693,30.793135257350112,20.72564996356387,9.808205686478653
|
||||||
|
21052631.57894737,23.199730384228054,19.53319524454567,25.615079343380856,27.71541249714895,42.99219836350376,25.622667352341303,19.198424660371117,29.691785299185653,22.343447429837447,32.78933182287538,32.43747615304432,48.31500428056453,32.70503140518498,24.066384712187702,25.460601344838782,29.80376712022313,21.604313003492653,40.6083022053676,24.52623080000942,44.26785212887291
|
||||||
|
31578947.368421055,34.998622802181636,32.030469626353394,29.130659056811947,30.24346049997822,62.31972353193404,27.333932322344214,32.10714687695959,52.25762436037414,36.44752184673112,25.99852062503284,41.870729686295064,80.16823527829882,32.270658694646244,43.56984276298713,44.743702722460526,48.74965188942251,34.234126533311986,50.88227401342543,29.817243256211448,63.147237345122235
|
||||||
|
42105263.15789474,63.971590077530315,49.584025191468214,36.226841899207756,43.77628805131298,87.92872245968874,45.45685556142945,42.556413106310735,58.90250798713137,49.10309450051791,38.72170643830399,55.44670150096728,99.63538881517184,46.12985678487703,64.89894363399688,55.248616665188955,55.177855654477746,56.24970961867773,82.66420580170161,40.423423045890154,80.29794068407485
|
||||||
|
52631578.94736842,98.4728934545266,74.4852093608116,72.85803203509703,39.09799578452581,123.40330749147486,73.74027267434838,86.20468924722925,91.28302565702027,55.18855368286123,62.809217351904124,114.60861483057367,140.54132087462662,68.87266173010428,70.30622231064575,64.65796735517453,77.1295779962041,112.65181225990241,137.3020037412437,50.62587037222289,118.47202574957028
|
||||||
|
63157894.73684211,124.28229688698235,118.71174201402309,94.99075119222137,45.395117986008245,152.13793448992385,105.00765161534096,154.92206309797695,125.97943578119754,73.51715838240455,92.27466788797169,168.0818776134969,145.17245468043225,105.35746499375955,118.63894536001536,84.38898047123757,90.29509158003201,125.9182234314423,156.52006075809865,79.75445696257489,144.09728176322173
|
||||||
|
73684210.5263158,137.42366611974063,161.06180406046047,123.6773079627934,71.61936653519975,174.71624296532116,133.452921654833,213.9489148297964,147.60351528421333,90.53230239570637,126.98336865624978,176.80310207857153,155.04102794875564,153.4332998665953,172.9378955696098,153.63759448940655,135.2336313972802,185.18758691405657,174.21458064056833,99.40771791799166,144.4558823443227
|
||||||
|
84210526.31578948,164.21659833374446,217.7935296828727,158.27573495888643,96.7216238640352,203.85325980714813,183.4873572842236,231.54993988569424,165.13856179595325,131.48514264442255,170.13256577969918,205.78511936941968,167.33126487461155,211.3656506135523,169.3720560149803,207.77624364605902,157.29439442276626,193.0630929077101,243.5700014852751,131.65782407809493,166.0659417868321
|
||||||
|
94736842.10526316,181.51220510012556,243.43479424525165,178.01957255383113,155.32249732429054,220.5754297687324,239.31619700772941,247.4418307136218,172.3533365619975,184.82712201022872,189.62107595161552,262.1979055125627,194.12376751158376,241.1921834272856,176.93247149913594,247.3627117283209,158.42026331050738,225.97339862898778,261.32327998716414,177.44845740410432,177.0247334342584
|
||||||
|
105263157.89473684,198.0096928173485,259.4421345319114,210.90652565323745,221.03936561768734,267.1348665000824,274.8354723564145,268.21756830456513,198.5696794594589,204.07792483621026,208.82608657926733,283.7850764628262,201.9100604171238,276.2401415675961,211.49736673358075,253.04026660958817,188.50099362271973,269.7374382809919,282.8506965838641,240.48119359085763,232.1412788799595
|
||||||
|
115789473.68421052,250.5670167934201,280.7614241466958,218.51866923441847,262.125673587385,274.144658366729,278.13837262467996,289.1868119140741,214.83601623773575,227.15192683078246,225.4826004013543,305.3001219443667,217.80904485231622,276.8796306199645,213.41832626819445,307.9345027417192,255.6365129849422,282.12798685239954,331.247436826414,259.67865135605314,230.17924682841408
|
||||||
|
126315789.47368422,290.8054249451762,314.2190957016562,234.4402698453444,285.3937708449826,274.623438683243,302.32848584850086,297.5951310458936,247.0147946831923,256.25027671315996,213.07089983987674,313.6341825839225,211.92326660004346,304.5580219733749,214.15753124295182,355.8844682050874,275.9049194988452,278.5325409774965,354.2368935845235,274.48761060594524,225.68550052900395
|
||||||
|
136842105.2631579,275.98421592725606,359.43405992378825,246.13946147671697,287.74003632915645,338.85824161916565,305.3804693966691,319.95543257062457,258.40591138361896,271.84641782406953,225.58042039022553,342.5051712552266,258.0907046636387,331.24646046254117,248.34457736761618,329.5711626128972,349.9030665523294,292.38850783789917,359.5457573236686,319.3750871186277,235.74952257418403
|
||||||
|
147368421.0526316,297.6300829566417,376.8636279342247,314.0579665083635,276.5927273543588,294.2709540039548,313.0668772643953,285.88640042437737,295.1035076307128,285.6760377964815,255.12739657051347,412.4991186332175,307.0424257964309,353.06695179612353,270.2616121864715,351.5560347323933,344.394067421184,321.62609933551994,400.8925259379139,347.1145794170385,233.8801709478747
|
||||||
|
157894736.84210527,284.8795360293745,314.8992354463672,276.2453566665794,332.3453529534908,371.1090958719439,333.4673375104273,376.89931814467485,270.03079128166314,297.7468851825372,315.1431278465032,390.70731245348657,339.58155921811544,379.99751271278575,253.60312757432624,325.22254847373983,360.1086416668839,364.3346968085152,415.4709293346161,345.6949456402635,286.1496385148523
|
||||||
|
168421052.63157895,302.425370168818,382.22827861579833,285.4077107173254,382.00304093981714,333.20042203073683,373.36028317137107,305.9463193806254,318.54770647455786,360.5023484348921,303.53869090384063,378.19464547218047,297.8456407623608,362.2866318575893,325.5463147203018,389.3690568654491,329.6699895779512,380.39852529879755,364.3315282607673,371.7944422904144,260.7707438085878
|
||||||
|
178947368.42105263,332.7696301467366,360.6190439785451,323.008909593246,379.1684169060966,361.5371000274727,336.70157324186323,393.6129710676763,306.82438906333783,390.44150348845614,356.3475132582921,406.3477573060428,347.67516311639895,399.6350900875069,282.0385175462905,393.1432591255682,414.7364889134,388.82122873921475,363.00839847606966,363.1584802253425,213.13626362197616
|
||||||
|
189473684.21052632,386.0490943096021,371.90457319850077,287.36568286742533,321.95136646336135,358.77593825134215,374.2654761766133,384.5231781301406,322.61573018030447,378.9132727143507,351.7304975030495,473.9359682585394,373.9145102669327,426.17746199498214,351.1358055355146,408.0136561699191,290.3405796613059,361.6506055831249,434.41434981552186,420.73069737789706,303.774452436994
|
||||||
|
200000000.0,353.85820796458347,436.96923741698265,334.3423507056738,402.34367268336445,350.8376684345697,381.51825607764096,436.7213738827329,287.41446037355223,348.02008474814266,362.9231588526776,440.88951381884124,322.05720034555384,436.56543233206395,331.8007484031351,373.69643274106477,396.67642704279797,417.0949217598689,424.561028651501,453.88209460910997,306.87099325264757
|
||||||
|
21
results/AcrobotSwingup_small_net.csv
Normal file
21
results/AcrobotSwingup_small_net.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9
|
||||||
|
0.0,6.695439338684082,13.671712875366211,5.240187644958496,32.056556701660156,2.9780356884002686,5.632288932800293,6.35692834854126,2.8975038528442383,14.635035514831543,6.313169956207275
|
||||||
|
10526315.789473685,6.941475775018025,14.366268093437164,5.202905239728267,32.342013331537764,3.720010691121804,6.522928813387256,6.413608182847263,3.025298770353291,15.23447740362013,6.931947194771206
|
||||||
|
21052631.57894737,13.181898201457187,28.777713911251844,5.162218943707789,39.23374384478601,17.642845192237907,23.19534060604602,8.047544990127024,5.43459590216918,25.26702283324875,18.785140267544918
|
||||||
|
31578947.368421055,34.763486982382574,50.917206742447334,12.293249043029766,57.443427979378576,31.529724209458465,38.56197919368414,16.35472495240673,7.859167267518103,26.416229647602318,34.629113589803026
|
||||||
|
42105263.15789474,68.58792886682825,52.57280507147147,27.88167950182823,79.65005154251392,43.18257812821304,48.73154326604674,35.48218425130085,13.664725385276071,38.860148923796636,54.92927134532347
|
||||||
|
52631578.94736842,107.37400620255279,73.83052433723806,42.92524955500113,135.54704990363848,58.44888650585698,73.90886532108705,57.19258305948444,28.331929314086974,86.87509830048374,74.32034118560212
|
||||||
|
63157894.73684211,116.4453290236954,121.22916458650312,56.94732399755403,145.5236423884072,72.45480781399397,93.70869286139587,74.34528760042383,29.5933128681003,135.8480005266759,93.32111337723163
|
||||||
|
73684210.5263158,125.61816882723917,168.30034940972556,83.13201424749855,156.52252671721075,128.37953463923242,112.28112003450744,90.03068119749798,52.93947641044815,167.8442719285673,105.20006576399231
|
||||||
|
84210526.31578948,146.03371798397762,195.34827333011785,147.84093812727204,177.72929191094025,200.81911428020933,123.54545762796482,117.50251016161118,88.00433149578829,221.36114261982516,167.3809712755383
|
||||||
|
94736842.10526316,154.83013607400605,218.675491456964,190.7901464123227,204.51836185186193,224.80112380226893,141.44065970318636,142.86435193818195,112.72374934630876,235.9166721095669,183.1518594199931
|
||||||
|
105263157.89473684,173.42247361316245,243.6001876832871,203.38102937479428,246.15958553139853,239.7141078824482,175.1982059548106,170.823583594237,162.44651383008818,258.2533842300444,199.7372955296013
|
||||||
|
115789473.68421052,203.03306778712286,248.40889312686022,208.03820465406056,247.98445455866178,276.9956956999454,192.4846126570926,195.3785680099455,223.59463152083003,274.7882439728589,221.38974064060196
|
||||||
|
126315789.47368422,190.0511273492406,281.5629787748871,230.47287619279035,293.0201806203151,291.0314447136797,215.27738962246113,215.84348256502128,252.4664022499835,264.02355960927844,213.7485852915164
|
||||||
|
136842105.2631579,240.6597492020382,303.90627285334546,238.79270212793944,330.94297262580443,330.92045706238116,234.24302487979307,224.40164555852763,282.6101103632239,291.75382554448544,238.0903869497479
|
||||||
|
147368421.0526316,269.87854915378495,323.5505091696565,230.68242554023985,343.1863376729376,352.8681890730052,250.80420970140733,243.54819732524683,279.4537875652313,312.3960284078881,282.14780969335766
|
||||||
|
157894736.84210527,230.90003951500654,353.24528939613344,294.3499276595103,323.0739140869673,368.8929651233463,269.9043786836133,257.10587254439037,268.76824468349486,324.7708702895615,277.84771515342334
|
||||||
|
168421052.63157895,256.2792843596757,353.0683127083607,315.6034634001037,345.2827516949408,335.0769211143007,310.58473694159386,279.34989413396147,298.2569014557181,302.34034912473936,271.5042335689893
|
||||||
|
178947368.42105263,321.5378950376095,365.89717006584283,415.2563585311257,388.0074948864961,392.0681465642109,290.4856142425636,289.52990993345543,283.20656576249075,402.8194304429759,365.05681249408514
|
||||||
|
189473684.21052632,348.9452018315112,359.19486913043704,361.54894053110456,438.60524256622364,437.5917075287932,328.7063503432142,371.0287197998356,342.585880562209,392.1345618689159,355.87759496779324
|
||||||
|
200000000.0,356.2318041748122,359.8931556375403,397.162943482399,444.88651133211033,372.4624353835457,361.93330409181743,341.14009829885083,329.92227372683976,434.90980041654484,376.9363100481661
|
||||||
|
21
results/AcrobotSwingup_small_no_gauss_data.csv
Normal file
21
results/AcrobotSwingup_small_no_gauss_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11
|
||||||
|
0.0,7.036541938781738,8.972885131835938,9.178613662719727,13.057394027709961,20.935565948486328,3.738186836242676,16.270450592041016,11.112337112426758,9.143903732299805,15.158102035522461,13.173521041870117,8.069116592407227
|
||||||
|
10526315.789473685,7.215907369497803,9.241187055517823,9.167839875836906,13.311776398704323,21.537728781664843,3.9355120302843565,16.329040148937843,11.088037770925157,9.40808370940057,15.310171122133855,13.318510708886707,8.425649370735
|
||||||
|
21052631.57894737,10.592409572783351,14.956911815000167,9.01192895974706,18.56523179388773,33.28107177039964,7.291756598108611,17.798330840652092,10.581063712438842,14.074436097697372,20.035543768400963,16.218544683205042,14.219530749219855
|
||||||
|
31578947.368421055,14.063325709970334,25.282323430331008,9.52039750133721,25.54513647164272,44.54037490888521,8.226915177753726,22.350000371573458,10.27999373221992,16.715215160679126,36.575896050770695,20.566712171473952,16.224200203627806
|
||||||
|
42105263.15789474,18.34656747323242,29.089509322083558,12.467276938816848,24.825184412981667,36.97605146946504,9.433160418205974,25.81803375082168,12.41754551004835,21.193595239371476,33.17040234073542,25.17867946187215,29.949910223071264
|
||||||
|
52631578.94736842,32.020697502821804,33.986002416058426,20.35991486470496,34.59705371256412,49.31650784692193,10.296928980023988,28.313154635440114,15.33887879452939,31.736783649937514,35.54791069805746,31.27345040610829,29.094816251918466
|
||||||
|
63157894.73684211,43.52815986245127,41.881233053379944,28.39564776394795,24.724392373465687,51.094941011777046,16.706243691682154,31.458539126538913,26.257584598823517,37.216238371099124,42.38832273695442,31.733172776399396,37.3430446306259
|
||||||
|
73684210.5263158,43.66899180166692,48.694136310513244,23.871280860682106,27.98739899137158,52.73351259404727,16.390817073274434,28.64950056116089,29.436779262838563,28.613390124101098,37.201772456096066,37.72564058303008,35.50624589801578
|
||||||
|
84210526.31578948,49.92106935092947,48.08083232799726,23.502787706718223,32.105897068688414,60.80239459137508,16.923600884685886,31.256639556582616,34.42222876240012,30.21620967410443,33.04852811661949,43.1367643331887,38.48255201266083
|
||||||
|
94736842.10526316,46.47420726997205,51.44839063204226,29.92109089440194,38.6935820988898,64.6127330626147,25.192530730455456,40.236983463694436,38.524427103399816,34.20377650650585,35.16773167849516,48.19439941448601,49.8213740398803
|
||||||
|
105263157.89473684,50.21257188286725,53.634558237903335,33.86004211193265,33.451480138489664,68.93566209717636,27.950928788291616,39.733823403158844,44.71752654804417,37.748818752858945,41.38564315987261,46.704095466067585,52.327457077243984
|
||||||
|
115789473.68421052,55.81991646284047,48.363020469927314,31.36212024015279,35.23886922061175,72.16539797781248,33.33408810877321,34.34389020725465,46.055876391787606,46.847281174956095,46.01799565462855,55.54399822848407,48.98097906517603
|
||||||
|
126315789.47368422,61.79718457703115,45.574108651140065,31.655370813864096,34.73509823479315,82.80617212762134,24.660010304436106,47.03059691087528,59.00430963400991,46.76823886197029,51.40030662769096,83.85806302812955,48.08269762864899
|
||||||
|
136842105.2631579,59.88604153638029,40.36125620760002,33.93677319101267,31.00203024850864,100.04191914291594,32.43643586658501,68.26518329979024,69.71039359065139,49.77841519886355,44.87758626543253,77.99151494758527,72.2935736235283
|
||||||
|
147368421.0526316,77.80048581908287,47.797998232834075,40.14916849959838,33.63892320673552,86.84277946648504,46.42326783535389,55.40742590354749,59.515931518908346,51.91601159355482,54.790484925799106,82.3933804247353,70.39881617103256
|
||||||
|
157894736.84210527,93.28066287804756,63.164702514759234,38.7822109650191,38.33385589644519,97.66979808352085,52.989114820152274,58.69004093442845,69.62327660308569,52.84236831358348,76.55681243421218,91.50552462813266,71.4651162714344
|
||||||
|
168421052.63157895,117.93345735806179,75.07464471847398,34.36888893423318,35.31494313065695,112.62410892442983,54.40473152957134,86.95128916736456,82.44475145485264,47.126046129210835,80.84268295467726,99.57014869586914,53.988663404767195
|
||||||
|
178947368.42105263,132.81895365987046,86.45211965497842,47.013377541216954,33.65556947484489,112.48178938429524,84.81238428389763,95.00780731390057,88.7324321864178,57.255329771147956,106.9411708811231,99.83027172198008,58.750367225465105
|
||||||
|
189473684.21052632,144.5577440834772,76.31703876004325,42.24173354609274,34.24696775498978,101.40433938586646,94.2619216374413,114.43639502507168,117.1517905439779,61.62054878547581,127.65004437592222,115.40699528364098,46.094077137079594
|
||||||
|
200000000.0,174.30350200361326,97.29701978517205,60.68100835245691,37.6703331070511,101.96988257020712,72.06400879041145,91.03422615677118,95.1696273437456,71.02403727526728,148.64710405076804,96.5444134893386,61.41081830114126
|
||||||
|
21
results/AcrobotSwingup_small_no_norm_data.csv
Normal file
21
results/AcrobotSwingup_small_no_norm_data.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
steps,trial_0,trial_1,trial_2,trial_3,trial_4,trial_5,trial_6,trial_7,trial_8,trial_9,trial_10,trial_11,trial_12,trial_13,trial_14,trial_15,trial_16,trial_17,trial_18,trial_19,trial_20
|
||||||
|
0.0,32.4077033996582,36.155517578125,25.75421142578125,29.606937408447266,27.17302703857422,16.026126861572266,13.865445137023926,31.84101104736328,30.781850814819336,18.447200775146484,27.443321228027344,23.80484390258789,23.779891967773438,27.92617416381836,31.59305191040039,39.16869354248047,33.610191345214844,36.45005416870117,20.54007911682129,38.84449005126953,34.24555969238281
|
||||||
|
10526315.789473685,33.296037207277614,36.785427371096745,26.648522366880545,30.070928266142957,27.658375122354798,16.909636004397083,14.5849724026046,32.483172309241795,31.134407273356977,18.28971532364879,27.769625172369864,24.67473756232998,24.22445243704352,27.639669323871505,32.86372066523436,39.94947247461021,34.48854379632466,36.91144894359638,22.061570849171595,39.57076941414058,35.02109238286593
|
||||||
|
21052631.57894737,47.85268858083398,48.244665590267104,42.69550373105957,39.864313121606436,37.426982542794,33.6573874229911,27.179000907564674,43.72329865292829,38.284123305014624,15.910192525081357,36.48614875559497,40.41140686373879,32.42776290967111,24.889028077021532,53.30326504027084,53.15286421536408,50.76808675710845,44.85972475671207,46.95364156745147,51.82332628598009,48.673801405484326
|
||||||
|
31578947.368421055,46.289031177160645,55.042522301737456,54.09337787392067,56.42648777237724,53.144822111121094,50.22762564730694,33.501808243249016,49.271767463927205,49.36965923597443,19.09700184072422,59.83897444905089,50.45623192499673,38.46070101455762,41.67901350319427,49.6850867264819,54.63208278024007,63.855021779825314,46.707314440304636,44.729709547167836,54.444386954762436,54.15614254775759
|
||||||
|
42105263.15789474,52.056275916066525,54.81231460751259,74.05637095030656,63.5784007053296,66.31524625313249,60.829015134609314,41.481055804071666,58.02204051089584,54.31379683443714,27.48259941575187,52.55809385921817,51.009362222167596,39.46608662262683,51.2882578602458,57.61938410989135,53.74984419733864,66.73611811273976,45.4475195833438,51.68484967947006,61.35790378470335,53.40799793085068
|
||||||
|
52631578.94736842,65.33051521372349,64.54581476310449,101.39453396258922,82.1989485288756,69.99954431695951,92.37778505030761,53.489512749841836,65.90184293982477,58.01873857391756,37.75656481033547,64.60210800154387,51.54042098185156,51.49881111311409,69.99729087472045,61.28976902142786,59.26768115029729,81.32731635404178,50.51788533190033,61.01289577361124,66.07327938235656,56.682834893857176
|
||||||
|
63157894.73684211,62.288964994437485,74.67267434872753,117.30094707945051,85.57247418495427,70.64805491359445,99.1536963618363,76.71144264556695,70.18010282099578,77.18331596618546,46.548501034006684,66.02018492836042,62.126340740605414,68.03360012194292,73.19627934867654,79.54463746351219,77.18552978357451,99.40059083489666,64.95080359490625,85.92186557210552,73.2946830179305,74.03522284188597
|
||||||
|
73684210.5263158,74.93735160680689,107.81465900012412,139.2873089840207,96.71375098380769,86.87840321203927,113.47289029651239,93.44800781175867,97.10603002875715,96.94276465686075,51.13459859087733,84.31004567019167,85.57612551139911,87.22768531824332,79.04176998198281,80.70959805724033,93.1210169256394,116.47493457860234,79.01124874019474,122.10848017792293,73.69862018011696,113.93861476468165
|
||||||
|
84210526.31578948,97.10230527558156,93.96989148475457,96.52644659277475,116.53272218007461,113.20687297011347,174.57018046960278,153.29147939602757,141.83370591646417,106.18938247516876,56.92739289635767,88.59680833182506,98.30280876638487,116.02657619374611,102.99297806893028,95.20292315324589,94.353535294863,122.6360879416611,110.15239549921492,138.70916615166493,85.8180380409114,147.08891463263214
|
||||||
|
94736842.10526316,116.27735660887656,116.03497409178692,128.7576511145629,148.53811679045745,147.7007118017307,180.4411057644934,218.819252871658,178.99188914985868,144.46769188926493,102.97105440930812,107.9235355442911,124.23589051595356,148.52493570150597,141.86054870983654,114.07920847511193,115.40575793876708,159.42236704381384,135.99380406923075,180.29462874637417,116.24597945662829,188.09331493859807
|
||||||
|
105263157.89473684,131.7258633593113,170.21886141296898,192.91515870132275,187.39992579083034,202.23182970912833,207.65967708380268,238.20913633349198,208.33421997944734,208.33317792514686,153.72142048283297,151.02752303053467,181.4257410114987,205.62764731066048,184.45465234458612,191.9764393842121,164.1790215488616,241.4987096171465,155.8979871799741,238.7906318729604,140.21512545286123,200.8675222978037
|
||||||
|
115789473.68421052,158.33178354729574,202.37646896315744,227.33339871899574,216.93278682467184,211.331247086629,248.69978439911085,246.6390119787316,253.57222508443027,235.39637340138825,180.44286899309904,189.12914332953846,230.71455120359266,239.18017664613652,190.25308435701268,272.071464554755,170.13196379375592,248.429662619849,141.57710379225892,260.24952784708995,191.00687052491133,233.41085929751725
|
||||||
|
126315789.47368422,154.90293820891682,259.51886878713685,238.00155754465806,230.06350551450683,204.04818964120093,268.90613665128353,278.3264847906343,297.9188012779585,263.6566409047621,233.0199083050203,187.61077770929256,248.62723383299203,286.4701455133114,232.60310746163879,239.4052901964768,236.2464176310728,265.57598176194034,147.03692383069415,277.6510531974962,227.1836707213909,257.47360423760404
|
||||||
|
136842105.2631579,185.3483334431771,288.244503821445,200.98698775078117,262.38887776108334,234.9703059204728,300.94985060984084,285.87677064917756,294.33157443199457,279.39890948160865,259.3397508126547,236.45249789400123,271.87452206742074,303.82141308936383,244.9907631181036,278.33346704069606,268.4523852427911,303.5926700412732,189.8020199536452,273.70698676413116,265.58575844009334,302.29368292558894
|
||||||
|
147368421.0526316,217.15594078315593,313.3082447720697,242.89344419353233,264.4147583865393,269.04264408052796,311.566828934935,285.8760857961845,345.1251253437798,287.88174234070607,267.13349160510745,288.908934617637,254.7768222093582,299.069269646568,258.82332522543845,286.61811474733406,278.37361948642047,313.9588218418845,245.4122034481358,295.88553080961645,285.4122809714888,306.8542738398356
|
||||||
|
157894736.84210527,223.78152848932882,311.36378852218144,290.2786089949661,268.27760689707674,288.8315125849934,331.56831176228144,316.60071037895466,327.69570335721045,335.2528018445355,312.5851707836598,276.15293550821553,300.10106629042417,325.54629257461704,258.3734440263619,323.2678803412703,266.3304482982429,308.70753486351293,217.55363201649232,287.675268513344,294.6170784805455,293.491367597329
|
||||||
|
168421052.63157895,234.71857714785102,314.98354696236817,283.7284783920753,305.4871167595037,327.815720531749,342.18832026294064,347.4369622518482,359.1234421835712,321.55971752705665,296.5348788998464,328.4429737894159,320.2516846670008,323.5348077546857,268.7956971871225,305.5235315637245,316.0435993308176,319.21332096855394,211.6789066732095,343.86831283833516,292.32147242421917,354.51419196696827
|
||||||
|
178947368.42105263,197.38530471809517,334.59399147756875,341.7113641431127,325.66671816942767,310.07803450322547,327.6141084031385,331.2294814979601,414.69330027823304,375.55925973110584,289.038970218636,351.0568013585836,363.8722931092961,352.0182927139413,286.6342829280943,351.55434802859776,334.9405559625817,316.564209680478,194.9518083879657,334.9490503408737,338.65567082843626,398.9040526068277
|
||||||
|
189473684.21052632,273.97604445753996,355.3467957329552,358.03872707494406,338.0551271167819,360.6422533754497,398.6396637395478,368.90517846666216,321.8780714666414,387.91837769185406,400.57232862760486,339.23682612345823,374.71732094908685,346.9451931840495,288.66709963832864,350.67221955265694,337.94357375557075,374.24965236342183,219.90429991178235,365.6490119190427,295.2192700559743,430.01798900672935
|
||||||
|
200000000.0,281.7031864978765,311.4724302511466,355.67379710391947,346.4399628294142,322.5400290457826,406.9658508630175,337.0044558989374,410.9943090128271,380.6061951141608,320.96123184342133,373.0588185175469,321.95514380461293,389.21944143113336,300.4861226395557,379.84359742465773,346.62374156713486,362.50578028277346,193.65573316655662,406.5343745429265,325.86513288240684,355.2923764508021
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user