remove UncertaintyPenalty/Abort aliases; use Parameterized_Objectives/Terminators dicts
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
2c1bbc1a31
commit
845ca708a7
@ -196,7 +196,7 @@ env.close()
|
|||||||
HER works by relabelling past trajectories with the goal that was *actually achieved*, turning every episode into useful training signal even when the agent never reaches the intended target. This makes it much more sample-efficient than standard RL for goal-reaching tasks. This matters a lot given how slow the real game is.
|
HER works by relabelling past trajectories with the goal that was *actually achieved*, turning every episode into useful training signal even when the agent never reaches the intended target. This makes it much more sample-efficient than standard RL for goal-reaching tasks. This matters a lot given how slow the real game is.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from nucon.rl import NuconGoalEnv, UncertaintyPenalty, UncertaintyAbort
|
from nucon.rl import NuconGoalEnv, Parameterized_Objectives, Parameterized_Terminators
|
||||||
from stable_baselines3 import SAC
|
from stable_baselines3 import SAC
|
||||||
from stable_baselines3.common.buffers import HerReplayBuffer
|
from stable_baselines3.common.buffers import HerReplayBuffer
|
||||||
|
|
||||||
@ -214,8 +214,8 @@ env = NuconGoalEnv(
|
|||||||
# Keep policy within the simulator's known data distribution.
|
# Keep policy within the simulator's known data distribution.
|
||||||
# SIM_UNCERTAINTY (kNN-GP posterior std) is injected into obs when a simulator is active.
|
# SIM_UNCERTAINTY (kNN-GP posterior std) is injected into obs when a simulator is active.
|
||||||
# Tune start/scale/threshold to taste.
|
# Tune start/scale/threshold to taste.
|
||||||
additional_objectives=[UncertaintyPenalty(start=0.3, scale=1.0)], # L2 penalty above soft threshold
|
additional_objectives=[Parameterized_Objectives['uncertainty_penalty'](start=0.3, scale=1.0)],
|
||||||
terminators=[UncertaintyAbort(threshold=0.7)], # abort episode at hard threshold
|
terminators=[Parameterized_Terminators['uncertainty_abort'](threshold=0.7)],
|
||||||
)
|
)
|
||||||
# Or use a preset: env = gym.make('Nucon-goal_power-v0', simulator=simulator)
|
# Or use a preset: env = gym.make('Nucon-goal_power-v0', simulator=simulator)
|
||||||
|
|
||||||
|
|||||||
@ -43,9 +43,6 @@ Parameterized_Terminators = {
|
|||||||
"uncertainty_abort": _uncertainty_abort, # (threshold,) -> (obs) -> float
|
"uncertainty_abort": _uncertainty_abort, # (threshold,) -> (obs) -> float
|
||||||
}
|
}
|
||||||
|
|
||||||
# Convenience aliases
|
|
||||||
UncertaintyPenalty = _uncertainty_penalty
|
|
||||||
UncertaintyAbort = _uncertainty_abort
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user