Morer objectives and fixes

This commit is contained in:
Dominik Moritz Roth 2024-10-03 21:56:27 +02:00
parent 33b5db2f57
commit 4c3ad983fc

View File

@ -3,7 +3,7 @@ from gymnasium import spaces
import numpy as np import numpy as np
import time import time
from typing import Dict, Any from typing import Dict, Any
from .core import Nucon, BreakerStatus, PumpStatus, PumpDryStatus, PumpOverloadStatus from nucon import Nucon, BreakerStatus, PumpStatus, PumpDryStatus, PumpOverloadStatus
Objectives = { Objectives = {
"null": lambda obs: 0, "null": lambda obs: 0,
@ -13,12 +13,16 @@ Objectives = {
Parameterized_Objectives = { Parameterized_Objectives = {
"target_temperature": lambda goal_temp: lambda obs: -((obs["CORE_TEMP"] - goal_temp) ** 2), "target_temperature": lambda goal_temp: lambda obs: -((obs["CORE_TEMP"] - goal_temp) ** 2),
"target_gap": lambda goal_gap: lambda obs: -((obs["CORE_TEMP"] - obs["CORE_TEMP_MIN"] - goal_gap) ** 2),
"temp_below": lambda max_temp: lambda obs: -(np.clip(obs["CORE_TEMP"] - max_temp, 0, np.inf) ** 2),
"temp_above": lambda min_temp: lambda obs: -(np.clip(min_temp - obs["CORE_TEMP"], 0, np.inf) ** 2),
"constant": lambda constant: lambda obs: constant,
} }
class NuconEnv(gym.Env): class NuconEnv(gym.Env):
metadata = {'render_modes': ['human']} metadata = {'render_modes': ['human']}
def __init__(self, render_mode=None, seconds_per_step=5, objectives=['null'], terminators=['null'], objective_weights=None, terminate_above=0): def __init__(self, nucon=None, simulator=None, render_mode=None, seconds_per_step=5, objectives=['null'], terminators=['null'], objective_weights=None, terminate_above=0):
super().__init__() super().__init__()
self.render_mode = render_mode self.render_mode = render_mode
@ -26,22 +30,30 @@ class NuconEnv(gym.Env):
if objective_weights is None: if objective_weights is None:
objective_weights = [1.0 for objective in objectives] objective_weights = [1.0 for objective in objectives]
self.objective_weights = objective_weights self.objective_weights = objective_weights
self.terminate_at = terminate_at self.terminate_above = terminate_above
self.simulator = simulator
if nucon is None:
if simulator:
nucon = Nucon(port=simulator.port)
else:
nucon = Nucon()
self.nucon = nucon
# Define observation space # Define observation space
obs_spaces = {'EPISODE_TIME': spaces.Box(low=0, high=np.inf, shape=(1,), dtype=np.float32)} obs_spaces = {'EPISODE_TIME': spaces.Box(low=0, high=np.inf, shape=(1,), dtype=np.float32)}
for param in Nucon.get_all_readable(): for param_id, param in self.nucon.get_all_readable().items():
if param.param_type == float: if param.param_type == float:
obs_spaces[param.id] = spaces.Box(low=param.min_val or -np.inf, high=param.max_val or np.inf, shape=(1,), dtype=np.float32) obs_spaces[param_id] = spaces.Box(low=param.min_val or -np.inf, high=param.max_val or np.inf, shape=(1,), dtype=np.float32)
elif param.param_type == int: elif param.param_type == int:
if param.min_val is not None and param.max_val is not None: if param.min_val is not None and param.max_val is not None:
obs_spaces[param.id] = spaces.Box(low=param.min_val, high=param.max_val, shape=(1,), dtype=np.float32) obs_spaces[param_id] = spaces.Box(low=param.min_val, high=param.max_val, shape=(1,), dtype=np.float32)
else: else:
obs_spaces[param.id] = spaces.Box(low=-np.inf, high=np.inf, shape=(1,), dtype=np.float32) obs_spaces[param_id] = spaces.Box(low=-np.inf, high=np.inf, shape=(1,), dtype=np.float32)
elif param.param_type == bool: elif param.param_type == bool:
obs_spaces[param.id] = spaces.Box(low=0, high=1, shape=(1,), dtype=np.float32) obs_spaces[param_id] = spaces.Box(low=0, high=1, shape=(1,), dtype=np.float32)
elif issubclass(param.param_type, Enum): elif issubclass(param.param_type, Enum):
obs_spaces[param.id] = spaces.Box(low=0, high=1, shape=(len(param.param_type),), dtype=np.float32) obs_spaces[param_id] = spaces.Box(low=0, high=1, shape=(len(param.param_type),), dtype=np.float32)
else: else:
raise ValueError(f"Unsupported observation parameter type: {param.param_type}") raise ValueError(f"Unsupported observation parameter type: {param.param_type}")
@ -49,23 +61,26 @@ class NuconEnv(gym.Env):
# Define action space # Define action space
action_spaces = {} action_spaces = {}
for param in Nucon.get_all_writable(): for param_id, param in self.nucon.get_all_writable().items():
if param.param_type == float: if param.param_type == float:
action_spaces[param.id] = spaces.Box(low=param.min_val or -np.inf, high=param.max_val or np.inf, shape=(1,), dtype=np.float32) action_spaces[param_id] = spaces.Box(low=param.min_val or -np.inf, high=param.max_val or np.inf, shape=(1,), dtype=np.float32)
elif param.param_type == int: elif param.param_type == int:
if param.min_val is not None and param.max_val is not None: if param.min_val is not None and param.max_val is not None:
action_spaces[param.id] = spaces.Box(low=param.min_val, high=param.max_val, shape=(1,), dtype=np.float32) action_spaces[param_id] = spaces.Box(low=param.min_val, high=param.max_val, shape=(1,), dtype=np.float32)
else: else:
action_spaces[param.id] = spaces.Box(low=-np.inf, high=np.inf, shape=(1,), dtype=np.float32) action_spaces[param_id] = spaces.Box(low=-np.inf, high=np.inf, shape=(1,), dtype=np.float32)
elif param.param_type == bool: elif param.param_type == bool:
action_spaces[param.id] = spaces.Box(low=0, high=1, shape=(1,), dtype=np.float32) action_spaces[param_id] = spaces.Box(low=0, high=1, shape=(1,), dtype=np.float32)
elif issubclass(param.param_type, Enum): elif issubclass(param.param_type, Enum):
action_spaces[param.id] = spaces.Box(low=0, high=1, shape=(len(param.param_type),), dtype=np.float32) action_spaces[param_id] = spaces.Box(low=0, high=1, shape=(len(param.param_type),), dtype=np.float32)
else: else:
raise ValueError(f"Unsupported action parameter type: {param.param_type}") raise ValueError(f"Unsupported action parameter type: {param.param_type}")
self.action_space = spaces.Dict(action_spaces) self.action_space = spaces.Dict(action_spaces)
self.objectives = []
self.terminators = []
for objective in objectives: for objective in objectives:
if objective in Objectives: if objective in Objectives:
self.objectives.append(Objectives[objective]) self.objectives.append(Objectives[objective])
@ -84,11 +99,11 @@ class NuconEnv(gym.Env):
def _get_obs(self): def _get_obs(self):
obs = {} obs = {}
for param in Nucon.get_all_readable(): for param_id, param in self.nucon.get_all_readable().items():
value = Nucon.get(param) value = self.nucon.get(param_id)
if isinstance(value, Enum): if isinstance(value, Enum):
value = value.value value = value.value
obs[param.id] = value obs[param_id] = value
obs["EPISODE_TIME"] = self._total_steps * self.seconds_per_step obs["EPISODE_TIME"] = self._total_steps * self.seconds_per_step
return obs return obs
@ -112,12 +127,12 @@ class NuconEnv(gym.Env):
def step(self, action): def step(self, action):
# Apply the action to the Nucon system # Apply the action to the Nucon system
for param_id, value in action.items(): for param_id, value in action.items():
param = next(p for p in Nucon if p.id == param_id) param = next(p for p in self.nucon if p.id == param_id)
if issubclass(param.param_type, Enum): if issubclass(param.param_type, Enum):
value = param.param_type(value) value = param.param_type(value)
if param.min_val is not None and param.max_val is not None: if param.min_val is not None and param.max_val is not None:
value = np.clip(value, param.min_val, param.max_val) value = np.clip(value, param.min_val, param.max_val)
Nucon.set(param, value) self.nucon.set(param, value)
observation = self._get_obs() observation = self._get_obs()
terminated = np.sum([terminator(observation) for terminator in self.terminators]) > self.terminate_above terminated = np.sum([terminator(observation) for terminator in self.terminators]) > self.terminate_above
@ -126,7 +141,10 @@ class NuconEnv(gym.Env):
reward = sum(obj for obj in info['objectives_weighted'].values()) reward = sum(obj for obj in info['objectives_weighted'].values())
self._total_steps += 1 self._total_steps += 1
time.sleep(self.seconds_per_step) if self.simulator:
self.simulator.update(self.seconds_per_step)
else:
time.sleep(self.seconds_per_step)
return observation, reward, terminated, truncated, info return observation, reward, terminated, truncated, info
def render(self): def render(self):
@ -148,6 +166,7 @@ class NuconEnv(gym.Env):
def _unflatten_observation(self, flat_observation): def _unflatten_observation(self, flat_observation):
return {k: v.reshape(1, -1) for k, v in self.observation_space.items()} return {k: v.reshape(1, -1) for k, v in self.observation_space.items()}
def register_nucon_envs(): def register_nucon_envs():
gym.register( gym.register(
id='Nucon-max_power-v0', id='Nucon-max_power-v0',
@ -155,9 +174,14 @@ def register_nucon_envs():
kwargs={'seconds_per_step': 5, 'objectives': ['max_power']} kwargs={'seconds_per_step': 5, 'objectives': ['max_power']}
) )
gym.register( gym.register(
id='Nucon-target_temperature_600-v0', id='Nucon-target_temperature_350-v0',
entry_point='nucon.rl:NuconEnv', entry_point='nucon.rl:NuconEnv',
kwargs={'seconds_per_step': 5, 'objectives': [Parameterized_Objectives['target_temperature'](goal_temp=600)]} kwargs={'seconds_per_step': 5, 'objectives': [Parameterized_Objectives['target_temperature'](goal_temp=350)]}
)
gym.register(
id='Nucon-safe_max_power-v0',
entry_point='nucon.rl:NuconEnv',
kwargs={'seconds_per_step': 5, 'objectives': [Parameterized_Objectives['temp_above'](min_temp=310), Parameterized_Objectives['temp_below'](max_temp=365), 'max_power'], 'objective_weights': [1, 10, 1/100_000]}
) )
register_nucon_envs() register_nucon_envs()