Compare commits

...

2 Commits

Author SHA1 Message Date
dc8bafbbfe Updated README 2024-10-02 19:22:38 +02:00
dc59173fe7 Better parameterized objectives and gym bindings 2024-10-02 19:22:23 +02:00
2 changed files with 25 additions and 4 deletions

View File

@ -102,15 +102,17 @@ The `NuconEnv` class in `nucon/rl.py` provides a Gym-compatible environment for
- Observation space: Includes all readable parameters from the Nucon system.
- Action space: Encompasses all writable parameters in the Nucon system.
- Step function: Applies actions to the Nucon system and returns new observations.
- Objective function: Allows for custom objective functions to be defined for training.
- Objective function: Allows for predefined or custom objective functions to be defined for training.
### Usage
Here's a basic example of how to use the RL environment:
```python
from nucon.rl import NuconEnv
from nucon.rl import NuconEnv, Parameterized_Objectives
env = NuconEnv(objectives=['max_power'], seconds_per_step=5)
# env2 = gym.make('Nucon-max_power-v0')
# env3 = NuconEnv(objectives=[Parameterized_Objectives['target_temperature'](goal_temp=600)], seconds_per_step=5)
obs, info = env.reset()
for _ in range(1000):
@ -122,6 +124,8 @@ for _ in range(1000):
env.close()
```
Objectives takes either strings of the name of predefined objectives, or lambda functions which take an observation and return a scalar reward. Final rewards are summed across all objectives. `info['objectives']` contains all objectives and their values.
## Testing
NuCon includes a test suite to verify its functionality and compatibility with the Nucleares game.

View File

@ -10,10 +10,13 @@ Objectives = {
"coeff": lambda obj, coeff: lambda obs: obj(obs) * coeff,
"max_power": lambda obs: obs["GENERATOR_0_KW"] + obs["GENERATOR_1_KW"] + obs["GENERATOR_2_KW"],
"target_temperature": lambda goal_temp: lambda obs: (obs["CORE_TEMP"] - goal_temp) ** 2,
"episode_time": lambda obs: obs["EPISODE_TIME"],
}
Parameterized_Objectives = {
"target_temperature": lambda goal_temp: lambda obs: -((obs["CORE_TEMP"] - goal_temp) ** 2),
}
class NuconEnv(gym.Env):
metadata = {'render_modes': ['human']}
@ -141,3 +144,17 @@ class NuconEnv(gym.Env):
def _unflatten_observation(self, flat_observation):
return {k: v.reshape(1, -1) for k, v in self.observation_space.items()}
def register_nucon_envs():
gym.register(
id='Nucon-max_power-v0',
entry_point='nucon.rl:NuconEnv',
kwargs={'seconds_per_step': 5, 'objectives': ['max_power']}
)
gym.register(
id='Nucon-target_temperature_600-v0',
entry_point='nucon.rl:NuconEnv',
kwargs={'seconds_per_step': 5, 'objectives': [Parameterized_Objectives['target_temperature'](goal_temp=600)]}
)
register_nucon_envs()