diff --git a/README.md b/README.md index 9a17244..f955c52 100644 --- a/README.md +++ b/README.md @@ -196,7 +196,7 @@ env.close() HER works by relabelling past trajectories with the goal that was *actually achieved*, turning every episode into useful training signal even when the agent never reaches the intended target. This makes it much more sample-efficient than standard RL for goal-reaching tasks. This matters a lot given how slow the real game is. ```python -from nucon.rl import NuconGoalEnv, UncertaintyPenalty, UncertaintyAbort +from nucon.rl import NuconGoalEnv, Parameterized_Objectives, Parameterized_Terminators from stable_baselines3 import SAC from stable_baselines3.common.buffers import HerReplayBuffer @@ -214,8 +214,8 @@ env = NuconGoalEnv( # Keep policy within the simulator's known data distribution. # SIM_UNCERTAINTY (kNN-GP posterior std) is injected into obs when a simulator is active. # Tune start/scale/threshold to taste. - additional_objectives=[UncertaintyPenalty(start=0.3, scale=1.0)], # L2 penalty above soft threshold - terminators=[UncertaintyAbort(threshold=0.7)], # abort episode at hard threshold + additional_objectives=[Parameterized_Objectives['uncertainty_penalty'](start=0.3, scale=1.0)], + terminators=[Parameterized_Terminators['uncertainty_abort'](threshold=0.7)], ) # Or use a preset: env = gym.make('Nucon-goal_power-v0', simulator=simulator) diff --git a/nucon/rl.py b/nucon/rl.py index 93a4509..2fbf22d 100644 --- a/nucon/rl.py +++ b/nucon/rl.py @@ -43,9 +43,6 @@ Parameterized_Terminators = { "uncertainty_abort": _uncertainty_abort, # (threshold,) -> (obs) -> float } -# Convenience aliases -UncertaintyPenalty = _uncertainty_penalty -UncertaintyAbort = _uncertainty_abort # ---------------------------------------------------------------------------