rename: objectives -> additional_objectives in NuconGoalEnv
Clarifies that the goal reward is the primary built-in objective; additional_objectives are additive on top of it. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
36a33e74e5
commit
041e0ec1bd
@ -214,7 +214,7 @@ env = NuconGoalEnv(
|
|||||||
# Keep policy within the simulator's known data distribution.
|
# Keep policy within the simulator's known data distribution.
|
||||||
# SIM_UNCERTAINTY (kNN-GP posterior std) is injected into obs when a simulator is active.
|
# SIM_UNCERTAINTY (kNN-GP posterior std) is injected into obs when a simulator is active.
|
||||||
# Tune start/scale/threshold to taste.
|
# Tune start/scale/threshold to taste.
|
||||||
objectives=[UncertaintyPenalty(start=0.3, scale=1.0)], # L2 penalty above soft threshold
|
additional_objectives=[UncertaintyPenalty(start=0.3, scale=1.0)], # L2 penalty above soft threshold
|
||||||
terminators=[UncertaintyAbort(threshold=0.7)], # abort episode at hard threshold
|
terminators=[UncertaintyAbort(threshold=0.7)], # abort episode at hard threshold
|
||||||
)
|
)
|
||||||
# Or use a preset: env = gym.make('Nucon-goal_power-v0', simulator=simulator)
|
# Or use a preset: env = gym.make('Nucon-goal_power-v0', simulator=simulator)
|
||||||
|
|||||||
@ -280,8 +280,8 @@ class NuconGoalEnv(gym.Env):
|
|||||||
seconds_per_step=5,
|
seconds_per_step=5,
|
||||||
terminators=None,
|
terminators=None,
|
||||||
terminate_above=0,
|
terminate_above=0,
|
||||||
objectives=None,
|
additional_objectives=None,
|
||||||
objective_weights=None,
|
additional_objective_weights=None,
|
||||||
):
|
):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
@ -351,9 +351,9 @@ class NuconGoalEnv(gym.Env):
|
|||||||
self.action_space = spaces.Dict(action_spaces)
|
self.action_space = spaces.Dict(action_spaces)
|
||||||
|
|
||||||
self._terminators = terminators or []
|
self._terminators = terminators or []
|
||||||
_objs = objectives or []
|
_objs = additional_objectives or []
|
||||||
self._objectives = [Objectives[o] if isinstance(o, str) else o for o in _objs]
|
self._objectives = [Objectives[o] if isinstance(o, str) else o for o in _objs]
|
||||||
self._objective_weights = objective_weights or [1.0] * len(self._objectives)
|
self._objective_weights = additional_objective_weights or [1.0] * len(self._objectives)
|
||||||
self._desired_goal = np.zeros(n_goals, dtype=np.float32)
|
self._desired_goal = np.zeros(n_goals, dtype=np.float32)
|
||||||
self._total_steps = 0
|
self._total_steps = 0
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user