From be2afcac0b8d29fa3a5105e0265f451d67bb2a94 Mon Sep 17 00:00:00 2001 From: David Orme Date: Thu, 16 Oct 2025 15:48:59 +0100 Subject: [PATCH 1/7] New configuration builder - overrides not implemented yet --- .vscode/settings.json | 5 +- tests/conftest.py | 6 +- tests/core/test_configuration_builder.py | 730 +++++++++++++++++++++++ virtual_ecosystem/core/config_builder.py | 558 +++++++++++++++++ 4 files changed, 1297 insertions(+), 2 deletions(-) create mode 100644 tests/core/test_configuration_builder.py create mode 100644 virtual_ecosystem/core/config_builder.py diff --git a/.vscode/settings.json b/.vscode/settings.json index de1452cc3..1a647ffb5 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -9,7 +9,10 @@ "-s", "tests", // --no-cov needed so that breakpoints work: https://github.com/microsoft/vscode-python/issues/693 - "--no-cov" + "--no-cov", + // Do not use parallel workers in VSCode testing panel. + "-n", + "0" ], "workbench.editorAssociations": { "*.ipynb": "jupyter-notebook" diff --git a/tests/conftest.py b/tests/conftest.py index 5fd0b8cf4..da61f84ed 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -60,7 +60,11 @@ def record_found_in_log( try: # Iterate over the record tuples, ignoring the leading element # giving the logger name - _ = next(msg for msg in caplog.record_tuples if msg[1:] == find) + _ = next( + _ + for _, level, message in caplog.record_tuples + if level == find[0] and message.startswith(find[1]) + ) return True except StopIteration: return False diff --git a/tests/core/test_configuration_builder.py b/tests/core/test_configuration_builder.py new file mode 100644 index 000000000..d2a9e5b76 --- /dev/null +++ b/tests/core/test_configuration_builder.py @@ -0,0 +1,730 @@ +"""Testing the config_builder module.""" + +from contextlib import nullcontext as does_not_raise +from logging import CRITICAL, ERROR, INFO +from operator import attrgetter +from pathlib import Path + +import pytest +import tomli_w +from pydantic import BaseModel + +from tests.conftest import log_check, record_found_in_log +from virtual_ecosystem.core.exceptions import ConfigurationError + +# ------------------------------------------------ +# Testing configuration dictionary compilation and merging +# ------------------------------------------------ + + +@pytest.mark.parametrize( + "dest,source,exp_result, exp_conflicts", + [ + pytest.param( + {"d1": {"d2": 3}}, + {"d3": {"d2": 3}}, + {"d1": {"d2": 3}, "d3": {"d2": 3}}, + set(), + id="no_conflict", + ), + pytest.param( + {"d1": {"d2": 3}}, + {"d1": {"d3": 3}}, + {"d1": {"d2": 3, "d3": 3}}, + set(), + id="no_conflict2", + ), + pytest.param( + { + "a": {"aa": {"aaa": True, "aab": True}, "ab": {"abb": True}}, + "b": { + "ba": {"bab": {"baba": True}}, + "bb": { + "bba": {"bbab": {"bbaba": True}}, + "bbb": {"bbba": {"bbbaa": True}}, + }, + }, + }, + { + "a": {"ab": {"aba": False}}, + "b": { + "ba": {"baa": {"baaa": False}}, + "bb": { + "bba": {"bbaa": {"bbaaa": False}}, + "bbb": {"bbbb": {"bbbba": False}}, + }, + }, + }, + { + "a": { + "aa": {"aaa": True, "aab": True}, + "ab": {"aba": False, "abb": True}, + }, + "b": { + "ba": {"baa": {"baaa": False}, "bab": {"baba": True}}, + "bb": { + "bba": {"bbaa": {"bbaaa": False}, "bbab": {"bbaba": True}}, + "bbb": {"bbba": {"bbbaa": True}, "bbbb": {"bbbba": False}}, + }, + }, + }, + set(), + id="no_conflict_complex", + ), + pytest.param( + {"d1": 1}, + {"d1": 2}, + {"d1": 2}, # source value takes precedence + set(["d1"]), + id="conflict_root", + ), + pytest.param( + {"d1": 1}, + {"d1": {"d2": 1}}, + {"d1": {"d2": 1}}, + set(["d1"]), + id="conflict_root2", + ), + pytest.param( + {"d1": {"d2": 3, "d3": 12}}, + {"d1": {"d3": 7}}, + {"d1": {"d2": 3, "d3": 7}}, + set(["d1.d3"]), + id="conflict_nested1", + ), + pytest.param( + {"d1": {"d2": {"d3": 12, "d4": 5}}}, + {"d1": {"d2": {"d3": 5, "d4": 7}}}, + {"d1": {"d2": {"d3": 5, "d4": 7}}}, + set(["d1.d2.d3", "d1.d2.d4"]), + id="conflict_nested_multiple", + ), + pytest.param( + { + "a": {"aa": {"aaa": True, "aab": True}, "ab": {"abb": True}}, + "b": { + "ba": {"bab": {"baba": True}}, + "bb": { + "bba": {"bbab": {"bbaba": True}}, + "bbb": {"bbba": {"bbbaa": True}}, + }, + }, + }, + { + "a": {"ab": {"aba": False}}, + "b": { + "ba": {"baa": {"baaa": False}}, + "bb": { + "bba": {"bbaa": {"bbaaa": False}}, + "bbb": {"bbba": {"bbbaa": False}, "bbbb": {"bbbba": False}}, + }, + }, + }, + { + "a": { + "aa": {"aaa": True, "aab": True}, + "ab": {"aba": False, "abb": True}, + }, + "b": { + "ba": {"baa": {"baaa": False}, "bab": {"baba": True}}, + "bb": { + "bba": {"bbaa": {"bbaaa": False}, "bbab": {"bbaba": True}}, + "bbb": {"bbba": {"bbbaa": False}, "bbbb": {"bbbba": False}}, + }, + }, + }, + set(["b.bb.bbb.bbba.bbbaa"]), + id="conflict_complex", + ), + pytest.param( + {"d1": {"d2": [1, 2, 3]}}, + {"d1": {"d2": [4, 5]}}, + {"d1": {"d2": [1, 2, 3, 4, 5]}}, + set(), + id="no_conflict_list_merge", + ), + # The next example passes just fine, which is intentional, but the test is here + # to highlight the behaviour + pytest.param( + {"d1": {"d2": [1, 2, 3]}}, + {"d1": {"d2": [{"file": "a_path"}]}}, + {"d1": {"d2": [1, 2, 3, {"file": "a_path"}]}}, + set(), + id="no_conflict_list_merge_dubious_content", + ), + pytest.param( + {"d1": {"d2": [1, 2, 3]}}, + {"d1": {"d2": "a"}}, + {"d1": {"d2": "a"}}, + set(["d1.d2"]), + id="conflict_list_and_not_list", + ), + ], +) +def test_config_merge(dest, source, exp_result, exp_conflicts): + """Checks configuration merge and validation function.""" + from virtual_ecosystem.core.config_builder import merge_configuration_dicts + + result, conflicts = merge_configuration_dicts(dest, source) + + assert result == exp_result + assert conflicts == exp_conflicts + + +@pytest.mark.parametrize( + argnames="data, expected_data, expected_conflicts", + argvalues=( + pytest.param( + [{"a": 1}, {"b": 2}, {"c": 3}], + {"a": 1, "b": 2, "c": 3}, + set(), + id="simple", + ), + pytest.param( + [{"a": 1, "d": {"e": 4}}, {"b": 2}, {"c": 3, "f": {"g": 4}}], + {"a": 1, "b": 2, "c": 3, "d": {"e": 4}, "f": {"g": 4}}, + set(), + id="nested", + ), + pytest.param( + [{"a": 1, "d": {"e": 4}}, {"b": 2}, {"c": 3, "d": {"e": 5}}], + {"a": 1, "b": 2, "c": 3, "d": {"e": 5}}, + set(["d.e"]), + id="conflicts", + ), + ), +) +def test_compile_configuration_data(data, expected_data, expected_conflicts): + """Tests compile_configuration_data.""" + + from virtual_ecosystem.core.config_builder import compile_configuration_data + + result, conflicts = compile_configuration_data(data) + + assert result == expected_data + assert conflicts == expected_conflicts + + +# ------------------------------------------------ +# Testing ConfigurationLoader methods +# ------------------------------------------------ + + +@pytest.mark.parametrize( + "cfg_paths, cfg_strings, expected_cfg_paths, raises, err_msg", + [ + pytest.param( + "string1", + None, + [Path("string1")], + does_not_raise(), + None, + id="paths_as_str", + ), + pytest.param( + Path("string1"), + None, + [Path("string1")], + does_not_raise(), + None, + id="paths_as_path", + ), + pytest.param( + ["string1", "string2"], + None, + [Path("string1"), Path("string2")], + does_not_raise(), + None, + id="paths_as_str_list", + ), + pytest.param( + ["string1", Path("string2")], + None, + [Path("string1"), Path("string2")], + does_not_raise(), + None, + id="paths_as_mixed_list", + ), + pytest.param( + [Path("string1"), Path("string2")], + None, + [Path("string1"), Path("string2")], + does_not_raise(), + None, + id="paths_as_path_list", + ), + pytest.param( + None, + """[[core.data.variable]] + file_path = "cellid_coords.nc + var_name = "temp" + """, + [], + does_not_raise(), + None, + id="cfg_strings_as_str", + ), + pytest.param( + None, + [ + """[[core.data.variable]] + file_path = "cellid_coords.nc + var_name = "temp" + """, + """[[core.data.variable]] + file_path = "cellid_coords.nc + var_name = "patm" + """, + ], + [], + does_not_raise(), + None, + id="cfg_strings_as_list", + ), + pytest.param( + None, + None, + [], + pytest.raises(ValueError), + "Provide cfg_paths or cfg_strings.", + id="neither", + ), + pytest.param( + "string1", + """[[core.data.variable]] + file_path = "cellid_coords.nc + var_name = "temp" + """, + [], + pytest.raises(ValueError), + "Do not use both cfg_paths and cfg_strings.", + id="both", + ), + ], +) +def test_ConfigurationLoader_init( + cfg_paths, cfg_strings, expected_cfg_paths, raises, err_msg +): + """Tests the normalisation and startup of ConfigurationLoader instance init.""" + from virtual_ecosystem.core.config_builder import ConfigurationLoader + + # Just check normalisation and error conditions, no processing + with raises as err: + cfg = ConfigurationLoader(cfg_paths=cfg_paths, cfg_strings=cfg_strings) + + if not isinstance(raises, does_not_raise): + assert str(err) == err_msg + + assert cfg.cfg_paths == expected_cfg_paths + if cfg_strings is not None: + assert cfg.from_cfg_strings is True + + +@pytest.mark.parametrize( + "cfg_paths,expected_exception,expected_log_entries", + [ + pytest.param( + ["file_does_not_exist"], + pytest.raises(ConfigurationError), + ( + (ERROR, "Config file path does not exist"), + (CRITICAL, "Config paths not all valid: check log."), + ), + id="bad_path", + ), + pytest.param( + ["cfg_no_toml"], + pytest.raises(ConfigurationError), + ( + (ERROR, "Config directory path contains no TOML files"), + (CRITICAL, "Config paths not all valid: check log."), + ), + id="no_toml_dir", + ), + pytest.param( + ["bad_json_in_schema.json"], + pytest.raises(ConfigurationError), + ( + (ERROR, "Config file path with non-TOML suffix"), + (CRITICAL, "Config paths not all valid: check log."), + ), + id="not_toml", + ), + pytest.param( + [".", "all_config.toml"], + pytest.raises(ConfigurationError), + ( + (ERROR, "Repeated files in config paths:"), + (CRITICAL, "Config paths not all valid: check log."), + ), + id="dupes", + ), + pytest.param( + ["all_config.toml"], + does_not_raise(), + ((INFO, "Config paths resolve to 1 files"),), + id="valid", + ), + ], +) +def test_ConfigurationLoader_collect_config_paths( + caplog, + shared_datadir, + cfg_paths, + expected_exception, + expected_log_entries, +): + """Checks errors for missing config files.""" + from virtual_ecosystem.core.config_builder import ConfigurationLoader + + caplog.clear() + + # Init the class + cfg = ConfigurationLoader([shared_datadir / p for p in cfg_paths]) + + # Check that file resolution runs as expected + with expected_exception: + cfg.collect_config_paths() + + log_check(caplog, expected_log_entries) + + +@pytest.mark.parametrize( + "cfg_paths,expected_exception,expected_log_entries", + [ + pytest.param( + ["toml_errors.toml"], + pytest.raises(ConfigurationError), + ( + (ERROR, "Config TOML parsing error in"), + (CRITICAL, "Errors parsing config files:"), + ), + id="toml_errors", + ), + pytest.param( + ["all_config.toml"], + does_not_raise(), + ((INFO, "Config TOML loaded from "),), + id="toml_valid", + ), + ], +) +def test_ConfigurationLoader_load_config_toml( + caplog, shared_datadir, cfg_paths, expected_exception, expected_log_entries +): + """Check errors for incorrectly formatted config files.""" + from virtual_ecosystem.core.config_builder import ConfigurationLoader + + # Initialise the ConfigurationLoader instance and manually resolve the config paths + # to toml files + cfg = ConfigurationLoader([shared_datadir / p for p in cfg_paths]) + cfg.collect_config_paths() + caplog.clear() + + # Check that load_config_toml behaves as expected + with expected_exception: + cfg.load_config_toml() + + log_check(caplog, expected_log_entries) + + +@pytest.mark.parametrize( + "cfg_paths,expected_exception,expected_log_entries", + [ + pytest.param( + "toml_errors.toml", + pytest.raises(ConfigurationError), + ((CRITICAL, "TOML parsing error in cfg_strings:"),), + id="toml_errors", + ), + pytest.param( + "all_config.toml", + does_not_raise(), + ((INFO, "Config TOML loaded from config strings"),), + id="toml_valid", + ), + ], +) +def test_Config_load_config_toml_string( + caplog, shared_datadir, cfg_paths, expected_exception, expected_log_entries +): + """Check errors for incorrectly formatted cfg_strings.""" + from virtual_ecosystem.core.config_builder import ConfigurationLoader + + # Initialise the Config instance and manually run the load process + with open(Path(shared_datadir) / cfg_paths) as cfg_file: + cfg_strings = cfg_file.read() + + cfg = ConfigurationLoader(cfg_strings=cfg_strings) + caplog.clear() + + # Check that load_config_toml behaves as expected + with expected_exception: + cfg.load_config_toml_string() + + log_check(caplog, expected_log_entries) + + +@pytest.mark.parametrize( + "content,expected_exception,expected_path_log,expected_string_log", + [ + pytest.param( + {"filename1.toml": {"core": {"grid": {"cell_nx": 10, "cell_ny": 10}}}}, + does_not_raise(), + ( + (INFO, "Config paths resolve to 1 files"), + (INFO, "Config TOML loaded from "), + (INFO, "Configuration data compiled."), + ), + ( + (INFO, "Config TOML loaded from config strings"), + (INFO, "Configuration data compiled."), + ), + id="single_file_valid", + ), + pytest.param( + { + "filename1.toml": {"core": {"grid": {"cell_nx": 10, "cell_ny": 10}}}, + "filename2.toml": {"core": {"grid": {"cell_nx": 10, "cell_ny": 10}}}, + }, + pytest.raises(ConfigurationError), + ( + (INFO, "Config paths resolve to 2 files"), + (INFO, "Config TOML loaded from "), + (INFO, "Config TOML loaded from "), + ( + CRITICAL, + "Duplicated entries in config files: " + "core.grid.cell_nx, core.grid.cell_ny", + ), + ), + ( + (INFO, "Config TOML loaded from config strings"), + ( + CRITICAL, + "Duplicated entries in config files: " + "core.grid.cell_nx, core.grid.cell_ny", + ), + ), + id="two_files_conflict", + ), + pytest.param( + { + "filename1.toml": {"core": {"grid": {"cell_nx": 10}}}, + "filename2.toml": {"core": {"grid": {"cell_ny": 10}}}, + }, + does_not_raise(), + ( + (INFO, "Config paths resolve to 2 files"), + (INFO, "Config TOML loaded from "), + (INFO, "Config TOML loaded from "), + (INFO, "Configuration data compiled."), + ), + ( + (INFO, "Config TOML loaded from config strings"), + (INFO, "Configuration data compiled."), + ), + id="two_files_valid", + ), + pytest.param( + { + "filename1.toml": {"core": {"grid": {"cell_nx": 10}}}, + "filename2.toml": {"core": {}}, + "filename3.toml": {"core": {"grid": {"cell_ny": 10}}}, + }, + does_not_raise(), + ( + (INFO, "Config paths resolve to 3 files"), + (INFO, "Config TOML loaded from "), + (INFO, "Config TOML loaded from "), + (INFO, "Config TOML loaded from "), + (INFO, "Configuration data compiled."), + ), + ( + (INFO, "Config TOML loaded from config strings"), + (INFO, "Configuration data compiled."), + ), + id="three_files_valid", + ), + pytest.param( + { + "filename1.toml": {"core": {"grid": {"cell_nx": 10, "cell_ny": 10}}}, + "filename2.toml": {"core": {}}, + "filename3.toml": {"core": {"grid": {"cell_ny": 10}}}, + }, + pytest.raises(ConfigurationError), + ( + (INFO, "Config paths resolve to 3 files"), + (INFO, "Config TOML loaded from "), + (INFO, "Config TOML loaded from "), + (INFO, "Config TOML loaded from "), + ( + CRITICAL, + "Duplicated entries in config files: core.grid.cell_ny", + ), + ), + ( + (INFO, "Config TOML loaded from config strings"), + ( + CRITICAL, + "Duplicated entries in config files: core.grid.cell_ny", + ), + ), + id="three_files_conflict", + ), + ], +) +def test_ConfigurationLoader_load_configuration_data( + tmpdir, caplog, content, expected_exception, expected_path_log, expected_string_log +): + """This test checks the load_configuration_data method. + + This method wraps up the stages of collating the configuration files, loading their + data and resolving relative paths and the final compilation of the resulting data + into a single configuration data dictionary. + """ + from virtual_ecosystem.core.config_builder import ConfigurationLoader + + # ---------------------------- + # Test the cfg_paths option + # ---------------------------- + + cfg_paths = [] + for filename, filedata in content.items(): + filepath = tmpdir / filename + cfg_paths.append(filepath) + with open(filepath, "wb") as outfile: + tomli_w.dump(filedata, outfile) + + # Initialise the Config instance + config_builder = ConfigurationLoader(cfg_paths=cfg_paths) + caplog.clear() + + # Check that load_configuration_data behaves as expected + with expected_exception: + config_builder.load_configuration_data() + + # Test the values that were passed into valid configs. + + assert config_builder.data["core"]["grid"]["cell_nx"] == 10 + assert config_builder.data["core"]["grid"]["cell_nx"] == 10 + + log_check(caplog, expected_path_log) + + # Tidy up + [Path(p).unlink() for p in cfg_paths] + + # ---------------------------- + # Test the cfg_strings option + # ---------------------------- + + cfg_strings = [tomli_w.dumps(v) for k, v in content.items()] + + # Initialise the Config instance + config_builder = ConfigurationLoader(cfg_strings=cfg_strings) + caplog.clear() + + # Check that load_configuration_data behaves as expected + with expected_exception: + config_builder.load_configuration_data() + + # Test the values that were passed into valid configs. + + assert config_builder.data["core"]["grid"]["cell_nx"] == 10 + assert config_builder.data["core"]["grid"]["cell_nx"] == 10 + + log_check(caplog, expected_string_log) + + +# --------------------------------------------------------------- +# Testing functions to create and validate configuration models +# --------------------------------------------------------------- + + +@pytest.mark.parametrize( + argnames="requested, outcome, expected", + argvalues=( + pytest.param( + ["core", "plants"], + does_not_raise(), + ["core", "plants"], + id="core present", + ), + pytest.param( + ["plants"], + does_not_raise(), + ["core", "plants"], + id="core added", + ), + pytest.param( + ["planets"], + pytest.raises(ModuleNotFoundError), + None, + id="unknown model", + ), + ), +) +def test_build_configuration_model(requested, outcome, expected): + """Tests build_configuration_model.""" + from virtual_ecosystem.core.config_builder import build_configuration_model + + with outcome: + model = build_configuration_model(requested) + + assert issubclass(model, BaseModel) + assert set(expected) == set(model.model_fields.keys()) + + +@pytest.mark.parametrize( + argnames="data, outcome, expected_attr_values, expected_log", + argvalues=( + pytest.param( + {"core": {"grid": {"cell_nx": 10}}}, + does_not_raise(), + (("core.grid.cell_nx", 10),), + ((INFO, "Configuration validated."),), + id="core_only", + ), + pytest.param( + { + "core": {"grid": {"cell_nx": 10}}, + "plants": {"constants": {"dsr_to_ppfd": 0.2}}, + }, + does_not_raise(), + (("core.grid.cell_nx", 10), ("plants.constants.dsr_to_ppfd", 0.2)), + ((INFO, "Configuration validated."),), + id="core_plants", + ), + pytest.param( + { + "core": {"grid": {"cell_nx": 10}}, + "plants": {"constants": {"dsr_to_ppfd": "a"}}, + }, + pytest.raises(ConfigurationError), + None, + ( + (ERROR, "plants.constants.dsr_to_ppfd = a"), + (CRITICAL, "Configuration validation failed. See errors above."), + ), + id="core_plants_bad_type", + ), + ), +) +def test_get_configuration(caplog, data, outcome, expected_attr_values, expected_log): + """Tests the get_configuration function.""" + from virtual_ecosystem.core.config_builder import get_configuration + + with outcome: + # Run the function + config = get_configuration(data=data) + + # Do we get a model + assert isinstance(config, BaseModel) + + # Are the attributes as expected + for attr_path, expected_value in expected_attr_values: + assert expected_value == attrgetter(attr_path)(config) + + # Does the log contain the expected outputs + for message in expected_log: + assert record_found_in_log(caplog=caplog, find=message) diff --git a/virtual_ecosystem/core/config_builder.py b/virtual_ecosystem/core/config_builder.py new file mode 100644 index 000000000..e4afd8727 --- /dev/null +++ b/virtual_ecosystem/core/config_builder.py @@ -0,0 +1,558 @@ +"""The :mod:`~virtual_ecosystem.core.config` module is used to read in the various +configuration files, validate their contents, and then configure a ready to run instance +of the virtual ecosystem model. The basic details of how this system is used can be +found :doc:`here `. + +The validation of configuration documents is done using JSONSchema documents associated +with the different model components. See the :mod:`~virtual_ecosystem.core.schema` +module for details. +""" # noqa: D205 + +import tomllib +from collections.abc import Sequence +from copy import deepcopy +from pathlib import Path +from typing import Any + +from pydantic import ValidationError, create_model + +from virtual_ecosystem.core.configuration import ConfigRoot +from virtual_ecosystem.core.exceptions import ConfigurationError +from virtual_ecosystem.core.logger import LOGGER +from virtual_ecosystem.core.registry import MODULE_REGISTRY, register_module + + +def merge_configuration_dicts( + dest: dict, source: dict, **kwargs +) -> tuple[dict, set[str]]: + """Recursively merge two configuration dictionaries. + + This function returns a copy of the input ``dest`` dictionary that has been extended + recursively with the entries from the input ``source`` dictionary. + + The merging process looks for duplicated settings. In general, if two input + dictionaries share complete key paths (that is a set of nested dictionary keys + leading to a value) then that indicates a duplicated setting. The values might be + identical, but the configuration files should not duplicate settings. When + duplicated key paths are found, the value from the source dictionary is used and the + function extends the returned ``conflicts`` set with the duplicated key path. + + However an exception is where both entries are lists - for example, resulting from a + TOML array of tables (https://toml.io/en/v1.0.0#array-of-tables). In this case, it + is reasonable to append the source values to the destination values. The motivating + example here are `[[core.data.variable]]` entries, which can quite reasonably be + split across configuration sources. Note that no attempt is made to check that the + combined values are congruent - this is deferred to error handling when the + configuration data is loaded. + + Args: + dest: A dictionary to extend + source: A dictionary of key value pairs to extend ``dest`` + **kwargs: Additional arguments used in recursion + + Returns: + A copy of dest, extended recursively with values from source, and a tuple of + duplicate key paths. + """ + + # Copy inputs to avoid mangling inputs + dest = deepcopy(dest) + source = deepcopy(source) + + # Populate conflicts and path from defaults or kwargs + conflicts: set = kwargs.get("conflicts", set()) + path: str | None = kwargs.get("path", None) + + # Loop over the elements in the source dictionary + for src_key, src_val in source.items(): + # Get the source key from the dest dictionary and then check for three possible + # outcomes of comparing dest_val and src_val + dest_val = dest.get(src_key) + + if isinstance(dest_val, dict) and isinstance(src_val, dict): + # Both values for this key are dictionaries, so recurse, extending the path + next_path = src_key if path is None else f"{path}.{src_key}" + dest[src_key], conflicts = merge_configuration_dicts( + dest_val, src_val, conflicts=conflicts, path=next_path + ) + elif isinstance(dest_val, list) and isinstance(src_val, list): + # Both values for this key are lists, so merge the lists + dest[src_key] = [*dest_val, *src_val] + elif dest_val is None: + # The key is not currently in dest, so add the key value pair + dest[src_key] = src_val + else: + # The key is in _both_, so override destval with srcval to keep processing, + # but extend the conflicts set with the path to the conflicting key. + dest[src_key] = src_val + conflict_path = src_key if path is None else f"{path}.{src_key}" + conflicts.add(conflict_path) + + # NOTE: Could extend here to check for dest_val == src_val and then ignore + # duplicate matching definitions, but cleaner to just forbid overlap. + + return dest, conflicts + + +def compile_configuration_data(data: list[dict]) -> tuple[dict, set[str]]: + """Compile a combined configuration multiple configuration dictionaries. + + This method sequentially merges configuration dictionaries, such as those loaded + from multiple individual configuration files, into a single configuration + dictionary. It returns the merged dictionary and a set of keys that have duplicated + definitions in the input files. + """ + + # Handle empty lists + if len(data) == 0: + LOGGER.warning("No config files set") + return {}, set() + + # Just return the contents for a singleton list + if len(data) == 1: + return data[0], set() + + # Otherwise, merge other dicts into first + compiled = data[0] + + for src in data[1:]: + compiled, conflicts = merge_configuration_dicts(compiled, src) + + return compiled, conflicts + + +def _resolve_config_paths(config_dir: Path, config_dict: dict[str, Any]) -> None: + """Resolve paths in a configuration file. + + Configuration files may contain keys providing file paths for data and other + settings: these paths may be absolute but also could be relative to the specific + configuration file. This becomes a problem when configurations are compiled across + multiple configuration files, possibly in different locations, so this function + searches the configuration dictionary loaded from a single file and updates + configured relative paths to their absolute paths. + + At present, the configuration schema does not have an explicit mechanism to type a + configuration option as being a path, so we currently use the `_path` suffix to + indicate configuration options setting a path. So, this function recursively search + a configuration file payload for values stored under keys ending in `_path` and + resolves the paths. + + Args: + config_dir: A folder containing a configuration file. + config_dict: A dictionary of contents of the configuration file, which may + contain file paths to resolve. + + Raises: + ValueError: if a key ending in ``_path`` has a non-string value. + """ + + if not config_dir.is_absolute(): + config_dir = config_dir.absolute() + + for key, item in config_dict.items(): + if isinstance(item, dict): + _resolve_config_paths(config_dir=config_dir, config_dict=item) + elif isinstance(item, list): + for list_entry in item: + if isinstance(list_entry, dict): + _resolve_config_paths(config_dir=config_dir, config_dict=list_entry) + elif key.endswith("_path"): + if not isinstance(item, str): + raise ValueError( + f"The value for config key '{key}' is not a string: {item}" + ) + file_path = Path(item) + if not file_path.is_absolute(): + # The resolve method is used here because it is the only method to + # resolve ../ entries from relative file paths and then the path is made + # explicitly absolute + file_resolved = (config_dir / file_path).resolve().absolute() + + config_dict[key] = str(file_resolved) + + +class ConfigurationLoader: + """Configuration loading. + + The ``Config`` class is used to generate a validated configuration for a Virtual + Ecosystem simulation. The ``cfg_paths`` attribute is used to provide paths to TOML + configuration files or directories containing sets of files to be used. The provided + paths are then run through the follow steps to resolve and load the configuration + data. + + * The :meth:`~virtual_ecosystem.core.config.Config.collect_config_paths` method is + used to collect the actual TOML files to be used to build the configuration from + the provided paths. + + * The :meth:`~virtual_ecosystem.core.config.Config.load_config_toml` method is then + used to load the parsed contents of each resolved file into the + :attr:`~virtual_ecosystem.core.config.Config.toml_contents` attribute. + + Alternatively, configuration data may be passed as a string or list of strings using + the ``cfg_strings`` argument. These strings must contain TOML formatted data, which + is parsed and added to the + :attr:`~virtual_ecosystem.core.config.Config.toml_contents` attribute. + + Whichever approach is used, the next two steps are then applied to the provided TOML + data: + + * The :meth:`~virtual_ecosystem.core.config.Config.build_config` method is used to + merge the loaded configuration across files and check that configuration settings + are uniquely defined. + + * The :meth:`~virtual_ecosystem.core.config.Config.validate_config` method + validates the compiled configuration against the appropriate configuration schema + for the :mod:`~virtual_ecosystem.core` module and any models included in the + configuration. This validation will also fill in any missing configuration + settings with defined defaults. + + By default, creating a ``Config`` instance automatically runs these steps across the + provided ``cfg_paths``, but the ``auto`` argument can be used to turn off automatic + validation. + + The :meth:`~virtual_ecosystem.core.config.Config.export_config` method can be used + to export the compiled and validated configuration as a single TOML file. + + If the core.data_output_options.save_merged_config option is set to true a merged + config file will be automatically generated, unless ``auto`` is set to false. + + Args: + cfg_paths: A string, Path or list of strings or Paths giving configuration + file or directory paths. + cfg_strings: A string or list of strings containing TOML formatted configuration + data. + override_params: Extra parameters provided by the user. + auto: A boolean flag setting whether the configuration data is automatically + loaded and validated + """ + + def __init__( + self, + cfg_paths: str | Path | Sequence[str | Path] = [], + cfg_strings: str | list[str] = [], + ) -> None: + # Define attributes + self.cfg_paths: list[Path] = [] + """The configuration file paths, normalised from the cfg_paths argument.""" + self.toml_files: list[str | Path] = [] + """A list of TOML file paths resolved from the initial config paths.""" + self.cfg_strings: list[str] = [] + """A list of strings containing TOML content, provided by the ``cfg_strings`` + argument.""" + self.toml_contents: dict[str | Path, dict] = {} + """A dictionary of the parsed TOML contents of config files or strings, keyed by + file path or string index.""" + self.merge_conflicts: list = [] + """A list of configuration keys duplicated across configuration files.""" + self.config_errors: list[tuple[str, Any]] = [] + """Configuration errors, as a list of tuples of key path and error details.""" + self.from_cfg_strings: bool = False + """A boolean flag indicating whether paths or strings were used to create the + instance.""" + self.model_classes: dict[str, Any] = {} # FIXME: -> dict[str, Type[BaseModel]] + """A dictionary of the model classes specified in the configuration, keyed by + model name.""" + + # Prohibit using neither paths and string or both paths and strings. Note that + # these trap empty lists, so you have to provide _something_. + if not (cfg_paths or cfg_strings): + to_raise = ValueError("Provide cfg_paths or cfg_strings.") + LOGGER.critical(to_raise) + raise to_raise + + if cfg_paths and cfg_strings: + to_raise = ValueError("Do not use both cfg_paths and cfg_strings.") + LOGGER.critical(to_raise) + raise to_raise + + # Standardise inputs and set from_cfg_strings + if cfg_strings: + # Standardise to a list of strings + self.cfg_strings = ( + [cfg_strings] if isinstance(cfg_strings, str) else cfg_strings + ) + self.from_cfg_strings = True + + if cfg_paths: + # Standardise cfg_paths to list of Paths + self.cfg_paths = ( + [Path(cfg_paths)] + if isinstance(cfg_paths, str | Path) + else [Path(p) for p in cfg_paths] + ) + + def load_configuration_data(self): + """Loading configuration data. + + This method loads configuration data from the sources set when the class + instance was created. The method then attempts to compile the sources into a + single compiled dictionary of configuration data, ready for validation and + conversion to a configuration model. + """ + if self.from_cfg_strings: + # Load the TOML content + self.load_config_toml_string() + else: + # Load the TOML content from resolved paths and resolve file paths + # within configuration files. + self.collect_config_paths() + self.load_config_toml() + self.resolve_config_file_paths() + + data, conflicts = compile_configuration_data(list(self.toml_contents.values())) + + # Report on duplicated settings, sorting the conflicts to give stable ordering + # in log reports and errors. + if conflicts: + to_raise = ConfigurationError( + f"Duplicated entries in config files: {', '.join(sorted(conflicts))}", + ) + LOGGER.critical(to_raise) + raise to_raise + + self.data = data + self.conflicts = conflicts + + LOGGER.info("Configuration data compiled.") + + def collect_config_paths(self) -> None: + """Collect TOML config files from provided paths. + + The :class:`~virtual_ecosystem.core.config.Config` class is initialised with a + list of paths to either individual TOML config files or directories containing + possibly multiple config files. This method examines that list to collect all + the individual TOML config files in the provided locations and then populates + the :attr:`~virtual_ecosystem.core.config.Config.toml_files` attribute. + + Raises: + ConfigurationError: this is raised if any of the paths: do not exist, are + directories that do not contain TOML files, are not TOML files or if the + resolved files contain duplicate entries. + """ + + all_valid = True + + # Validate the paths + for path in self.cfg_paths: + if not path.exists(): + all_valid = False + LOGGER.error(f"Config file path does not exist: {path}") + elif path.is_dir(): + toml_in_dir = list(path.glob("*.toml")) + if toml_in_dir: + self.toml_files.extend(toml_in_dir) + else: + all_valid = False + LOGGER.error( + f"Config directory path contains no TOML files: {path}" + ) + elif path.is_file() and path.suffix != ".toml": + all_valid = False + LOGGER.error(f"Config file path with non-TOML suffix: {path}") + else: + self.toml_files.append(path) + + # Check that no files are resolved twice + dupl_files = { + str(md) for md in self.toml_files if self.toml_files.count(md) > 1 + } + if dupl_files: + all_valid = False + LOGGER.error(f"Repeated files in config paths: {','.join(dupl_files)}") + + # Raise if there are any path errors + if not all_valid: + to_raise = ConfigurationError("Config paths not all valid: check log.") + LOGGER.critical(to_raise) + raise to_raise + + LOGGER.info(f"Config paths resolve to {len(self.toml_files)} files") + + def load_config_toml(self) -> None: + """Load the contents of resolved configuration files. + + This method populates the + :attr:`~virtual_ecosystem.core.config.Config.toml_contents` dictionary with the + contents of the configuration files set in + :attr:`~virtual_ecosystem.core.config.Config.toml_files`. That attribute is + normally populated by providing a set of paths to + :class:`~virtual_ecosystem.core.config.Config` and running the + :meth:`~virtual_ecosystem.core.config.Config.collect_config_paths` method, but + it can also be set directly. + + Raises: + ConfigurationError: Invalid TOML content in config files. + """ + + failed_inputs = False + + # Load the contents into the instance + for this_file in self.toml_files: + try: + with open(this_file, "rb") as file_io: + self.toml_contents[this_file] = tomllib.load(file_io) + except tomllib.TOMLDecodeError as err: + failed_inputs = True + LOGGER.error(f"Config TOML parsing error in {this_file}: {err!s}") + else: + LOGGER.info(f"Config TOML loaded from {this_file}") + + if failed_inputs: + to_raise = ConfigurationError("Errors parsing config files: check log") + LOGGER.critical(to_raise) + raise to_raise + + def load_config_toml_string(self) -> None: + """Load the contents of a config provided as a string. + + This method populates the + :attr:`~virtual_ecosystem.core.config.Config.toml_contents` dictionary with the + contents of a provided TOML formatted string. + + Raises: + ConfigurationError: Invalid TOML string. + """ + + for index, cfg_string in enumerate(self.cfg_strings): + # Load the contents into the instance + try: + self.toml_contents[f"cfg_string_{index}"] = tomllib.loads(cfg_string) + except tomllib.TOMLDecodeError as err: + to_raise = ConfigurationError( + f"TOML parsing error in cfg_strings: {err!s}" + ) + LOGGER.critical(to_raise) + raise to_raise + + LOGGER.info("Config TOML loaded from config strings") + + def resolve_config_file_paths(self) -> None: + """Resolve the locations of configured file paths. + + Configuration files can contain paths to other resources, such as the paths to + files containing input data variables. These paths can be absolute, but may also + be relative to the location of the configuration file itself. This method is + used to resolve the location of files to the common root of the provided set of + configuration files, typically the path where a simulation is started. + """ + + # Safeguard against running this when the toml_contents is from a cfg_string + if self.from_cfg_strings: + # TODO - how to resolve relative paths in cfg_string - niche use case + LOGGER.warning("Config file paths not resolved with cfg_string") + return + + for config_file, contents in self.toml_contents.items(): + if isinstance(config_file, Path): + try: + _resolve_config_paths( + config_dir=config_file.parent, config_dict=contents + ) + except ValueError as excep: + LOGGER.critical(excep) + raise excep + + +def build_configuration_model(requested_modules: list[str]) -> type[ConfigRoot]: + """Build a schema to validate the model configuration. + + This method identifies the modules to be configured from the top-level + configuration keys, setting the requested modules to be used in the configured + simulation. The schemas for the requested modules are then loaded and combined + using the :meth:`~virtual_ecosystem.core.schema.merge_schemas` function to + generate a single validation schema for model configuration. + """ + + # The core module is mandatory + if "core" not in requested_modules: + requested_modules = ["core", *requested_modules] + + # Register the requested modules, which handles unknown module names. This step is + # required to populate the module registry with the details of the requested modules + for module in requested_modules: + module = ( + "virtual_ecosystem.core" + if module == "core" + else f"virtual_ecosystem.models.{module}" + ) + register_module(module) + + # Create a list of submodels in the configuration. + submodels = ( + (module, MODULE_REGISTRY[module].config) for module in requested_modules + ) + + # Use pydantic create_model to dynamically generate a model with a field for each + # requested module. Mypy does not like this, but it seems to be used as intended: + # https://docs.pydantic.dev/latest/concepts/models/#dynamic-model-creation + combined_model = create_model( + "Configuration", **{fname: (cname, cname()) for fname, cname in submodels} + ) # type: ignore[call-overload] + + return combined_model + + +def get_configuration( + data: dict[str, Any] = {}, # override_params: dict[str, Any] = {} +) -> ConfigRoot: + """Generate a configuration model from configuration data. + + This method takes a dictionary of configuration data - typically loaded and compiled + using the :class:`ConfigurationLoader` class - and tries to build a validated + configuration model. + + The first step is to take the root sections in the configuration data - indicating + the various science models requested for a simulation - and uses those to build a + composite configuration validator. + + The provided data is then passed into the validator. If validation is successful + then a validated configuration object is returned, otherwise the specific validation + errors are written to the log and the function raises a :class`ConfigurationError` + + Args: + data: A dictionary of unvalidated configuration data. + """ + + # Build the configuration model from the compiled configuration + try: + ConfigurationModel = build_configuration_model( + requested_modules=list(data.keys()) + ) + except (ModuleNotFoundError, RuntimeError) as err: + LOGGER.critical(str(err)) + raise + + LOGGER.info("Configuration model built.") + + try: + configuration = ConfigurationModel().model_validate(data) + except ValidationError as validation_errors: + for error in validation_errors.errors(): + LOGGER.error( + f"{'.'.join(str(x) for x in error['loc'])} = {error['input']}: " + f"{error['msg']}" + ) + LOGGER.critical("Configuration validation failed. See errors above.") + raise ConfigurationError("Validation errors in configuration data - check log.") + + # self.override_config(override_params) + LOGGER.info("Configuration validated.") + + return configuration + + +# def override_config(self, override_params: dict[str, Any]) -> None: +# """Override any parameters desired. + +# Args: +# override_params: Extra parameter settings +# """ +# updated, conflicts = config_merge(self, override_params, conflicts=tuple()) + +# # Conflicts are not errors as we want users to be able to override parameters +# if conflicts: +# LOGGER.info( +# "The following parameter values were overridden: " + ", ".join(conflicts) +# ) + +# self.update(updated) From 18025ed815b7b2245f9067e9daf00c4c43ba93a7 Mon Sep 17 00:00:00 2001 From: David Orme Date: Thu, 16 Oct 2025 16:41:46 +0100 Subject: [PATCH 2/7] Relocated pydantic tester and removed spammy file output --- .../test_configuration.py} | 6 ++- tests/core/test_configuration_builder.py | 38 +++++++++++++++---- virtual_ecosystem/core/config_builder.py | 17 +++++++-- virtual_ecosystem/main.py | 10 +++++ 4 files changed, 58 insertions(+), 13 deletions(-) rename tests/{test_pydantic_config.py => core/test_configuration.py} (95%) diff --git a/tests/test_pydantic_config.py b/tests/core/test_configuration.py similarity index 95% rename from tests/test_pydantic_config.py rename to tests/core/test_configuration.py index 212d0cc82..77672eeeb 100644 --- a/tests/test_pydantic_config.py +++ b/tests/core/test_configuration.py @@ -37,20 +37,22 @@ def test_pydantic(tmp_path): ) # Dump config to file - with open("config.toml", "wb") as tomlfile: + config_path = tmp_path / "config.toml" + with open(config_path, "wb") as tomlfile: tomli_w.dump(json.loads(combined().model_dump_json()), tomlfile) # Reload it - substituting path placeholders for a temporary real file. tmp_file = tmp_path / "temp_file.txt" tmp_file.touch() - with open("config.toml") as tomlfile: + with open(config_path) as tomlfile: content = tomlfile.read() content = content.replace('""', f"'{tmp_file!s}'") content_parsed = tomllib.loads(content) config = combined().model_validate_json(json.dumps(content_parsed)) tmp_file.unlink() + config_path.unlink() # Very basic check for submodels for submodel, _ in submodel_details: diff --git a/tests/core/test_configuration_builder.py b/tests/core/test_configuration_builder.py index d2a9e5b76..63b76a5e6 100644 --- a/tests/core/test_configuration_builder.py +++ b/tests/core/test_configuration_builder.py @@ -445,7 +445,7 @@ def test_ConfigurationLoader_load_config_toml( ), ], ) -def test_Config_load_config_toml_string( +def test_ConfigurationLoader_load_config_toml_string( caplog, shared_datadir, cfg_paths, expected_exception, expected_log_entries ): """Check errors for incorrectly formatted cfg_strings.""" @@ -574,17 +574,31 @@ def test_Config_load_config_toml_string( ), ], ) +@pytest.mark.parametrize("override", [True, False]) def test_ConfigurationLoader_load_configuration_data( - tmpdir, caplog, content, expected_exception, expected_path_log, expected_string_log + tmpdir, + caplog, + content, + expected_exception, + expected_path_log, + expected_string_log, + override, ): """This test checks the load_configuration_data method. This method wraps up the stages of collating the configuration files, loading their data and resolving relative paths and the final compilation of the resulting data - into a single configuration data dictionary. + into a single configuration data dictionary. It also checks that override_params is + handled correctly. """ from virtual_ecosystem.core.config_builder import ConfigurationLoader + # Set an override value or leave empty + if override: + override_params = {"core": {"grid": {"cell_ny": 20}}} + else: + override_params = None + # ---------------------------- # Test the cfg_paths option # ---------------------------- @@ -597,7 +611,9 @@ def test_ConfigurationLoader_load_configuration_data( tomli_w.dump(filedata, outfile) # Initialise the Config instance - config_builder = ConfigurationLoader(cfg_paths=cfg_paths) + config_builder = ConfigurationLoader( + cfg_paths=cfg_paths, override_params=override_params + ) caplog.clear() # Check that load_configuration_data behaves as expected @@ -607,7 +623,10 @@ def test_ConfigurationLoader_load_configuration_data( # Test the values that were passed into valid configs. assert config_builder.data["core"]["grid"]["cell_nx"] == 10 - assert config_builder.data["core"]["grid"]["cell_nx"] == 10 + if override: + assert config_builder.data["core"]["grid"]["cell_ny"] == 20 + else: + assert config_builder.data["core"]["grid"]["cell_ny"] == 10 log_check(caplog, expected_path_log) @@ -621,7 +640,9 @@ def test_ConfigurationLoader_load_configuration_data( cfg_strings = [tomli_w.dumps(v) for k, v in content.items()] # Initialise the Config instance - config_builder = ConfigurationLoader(cfg_strings=cfg_strings) + config_builder = ConfigurationLoader( + cfg_strings=cfg_strings, override_params=override_params + ) caplog.clear() # Check that load_configuration_data behaves as expected @@ -631,7 +652,10 @@ def test_ConfigurationLoader_load_configuration_data( # Test the values that were passed into valid configs. assert config_builder.data["core"]["grid"]["cell_nx"] == 10 - assert config_builder.data["core"]["grid"]["cell_nx"] == 10 + if override: + assert config_builder.data["core"]["grid"]["cell_ny"] == 20 + else: + assert config_builder.data["core"]["grid"]["cell_ny"] == 10 log_check(caplog, expected_string_log) diff --git a/virtual_ecosystem/core/config_builder.py b/virtual_ecosystem/core/config_builder.py index e4afd8727..b56bba3ad 100644 --- a/virtual_ecosystem/core/config_builder.py +++ b/virtual_ecosystem/core/config_builder.py @@ -59,7 +59,9 @@ def merge_configuration_dicts( dest = deepcopy(dest) source = deepcopy(source) - # Populate conflicts and path from defaults or kwargs + # Populate conflicts and path from defaults or kwargs. These are not provided as + # explicit arguments, because they would never really be used outside of recursion + # and so are not really part of the API. conflicts: set = kwargs.get("conflicts", set()) path: str | None = kwargs.get("path", None) @@ -222,14 +224,13 @@ class ConfigurationLoader: cfg_strings: A string or list of strings containing TOML formatted configuration data. override_params: Extra parameters provided by the user. - auto: A boolean flag setting whether the configuration data is automatically - loaded and validated """ def __init__( self, cfg_paths: str | Path | Sequence[str | Path] = [], cfg_strings: str | list[str] = [], + override_params: dict[str, Any] | None = None, ) -> None: # Define attributes self.cfg_paths: list[Path] = [] @@ -252,6 +253,9 @@ def __init__( self.model_classes: dict[str, Any] = {} # FIXME: -> dict[str, Type[BaseModel]] """A dictionary of the model classes specified in the configuration, keyed by model name.""" + self.override_params: dict[str, Any] | None = override_params + """An optional set of parameters that can be used to override configuration data + loaded from file.""" # Prohibit using neither paths and string or both paths and strings. Note that # these trap empty lists, so you have to provide _something_. @@ -310,8 +314,13 @@ def load_configuration_data(self): LOGGER.critical(to_raise) raise to_raise + # Override any existing parameters. Conflicts are allowed here - although this + # mechanism can also be used to set configuration options _not_ in the other + # sources - so do nothing about conflicting settings + if self.override_params is not None: + data, _ = merge_configuration_dicts(data, self.override_params) + self.data = data - self.conflicts = conflicts LOGGER.info("Configuration data compiled.") diff --git a/virtual_ecosystem/main.py b/virtual_ecosystem/main.py index a142941c7..3b303cd4f 100644 --- a/virtual_ecosystem/main.py +++ b/virtual_ecosystem/main.py @@ -15,6 +15,7 @@ from virtual_ecosystem.core import variables from virtual_ecosystem.core.config import Config +from virtual_ecosystem.core.config_builder import ConfigurationLoader, get_configuration from virtual_ecosystem.core.core_components import CoreComponents from virtual_ecosystem.core.data import Data, merge_continuous_data_files from virtual_ecosystem.core.exceptions import ConfigurationError, InitialisationError @@ -117,6 +118,15 @@ def ve_run( cfg_paths=cfg_paths, cfg_strings=cfg_strings, override_params=override_params ) + # NEW configuration system + config_loader = ConfigurationLoader( + cfg_paths=cfg_paths, + cfg_strings=cfg_strings, + override_params=override_params, + ) + config_loader.load_configuration_data() + new_config = get_configuration(config_loader.data) # noqa: F841 + # Save the merged config if requested data_opt = config["core"]["data_output_options"] if data_opt["save_merged_config"]: From c091a377e2f1f7f044fbcd97225f06766579ca1d Mon Sep 17 00:00:00 2001 From: David Orme Date: Sat, 18 Oct 2025 00:07:55 +0100 Subject: [PATCH 3/7] Revised usage pattern --- tests/core/test_configuration_builder.py | 93 ++++++++++++++++++++---- virtual_ecosystem/core/config_builder.py | 80 +++++++++++--------- virtual_ecosystem/main.py | 8 +- 3 files changed, 125 insertions(+), 56 deletions(-) diff --git a/tests/core/test_configuration_builder.py b/tests/core/test_configuration_builder.py index 63b76a5e6..495fdae94 100644 --- a/tests/core/test_configuration_builder.py +++ b/tests/core/test_configuration_builder.py @@ -310,7 +310,9 @@ def test_ConfigurationLoader_init( # Just check normalisation and error conditions, no processing with raises as err: - cfg = ConfigurationLoader(cfg_paths=cfg_paths, cfg_strings=cfg_strings) + cfg = ConfigurationLoader( + cfg_paths=cfg_paths, cfg_strings=cfg_strings, autoload=False + ) if not isinstance(raises, does_not_raise): assert str(err) == err_msg @@ -380,11 +382,11 @@ def test_ConfigurationLoader_collect_config_paths( caplog.clear() # Init the class - cfg = ConfigurationLoader([shared_datadir / p for p in cfg_paths]) + cfg = ConfigurationLoader([shared_datadir / p for p in cfg_paths], autoload=False) # Check that file resolution runs as expected with expected_exception: - cfg.collect_config_paths() + cfg._collect_config_paths() log_check(caplog, expected_log_entries) @@ -417,13 +419,13 @@ def test_ConfigurationLoader_load_config_toml( # Initialise the ConfigurationLoader instance and manually resolve the config paths # to toml files - cfg = ConfigurationLoader([shared_datadir / p for p in cfg_paths]) - cfg.collect_config_paths() + cfg = ConfigurationLoader([shared_datadir / p for p in cfg_paths], autoload=False) + cfg._collect_config_paths() caplog.clear() # Check that load_config_toml behaves as expected with expected_exception: - cfg.load_config_toml() + cfg._load_config_toml() log_check(caplog, expected_log_entries) @@ -455,12 +457,12 @@ def test_ConfigurationLoader_load_config_toml_string( with open(Path(shared_datadir) / cfg_paths) as cfg_file: cfg_strings = cfg_file.read() - cfg = ConfigurationLoader(cfg_strings=cfg_strings) + cfg = ConfigurationLoader(cfg_strings=cfg_strings, autoload=False) caplog.clear() # Check that load_config_toml behaves as expected with expected_exception: - cfg.load_config_toml_string() + cfg._load_config_toml_string() log_check(caplog, expected_log_entries) @@ -612,13 +614,14 @@ def test_ConfigurationLoader_load_configuration_data( # Initialise the Config instance config_builder = ConfigurationLoader( - cfg_paths=cfg_paths, override_params=override_params + cfg_paths=cfg_paths, override_params=override_params, autoload=False ) caplog.clear() # Check that load_configuration_data behaves as expected with expected_exception: - config_builder.load_configuration_data() + config_builder._load_data() + config_builder._compile_data() # Test the values that were passed into valid configs. @@ -641,13 +644,14 @@ def test_ConfigurationLoader_load_configuration_data( # Initialise the Config instance config_builder = ConfigurationLoader( - cfg_strings=cfg_strings, override_params=override_params + cfg_strings=cfg_strings, override_params=override_params, autoload=False ) caplog.clear() # Check that load_configuration_data behaves as expected with expected_exception: - config_builder.load_configuration_data() + config_builder._load_data() + config_builder._compile_data() # Test the values that were passed into valid configs. @@ -734,13 +738,72 @@ def test_build_configuration_model(requested, outcome, expected): ), ), ) -def test_get_configuration(caplog, data, outcome, expected_attr_values, expected_log): +def test_generate_configuration( + caplog, data, outcome, expected_attr_values, expected_log +): + """Tests the generate_configuration function.""" + from virtual_ecosystem.core.config_builder import generate_configuration + + with outcome: + # Run the function + config = generate_configuration(data=data) + + # Do we get a model + assert isinstance(config, BaseModel) + + # Are the attributes as expected + for attr_path, expected_value in expected_attr_values: + assert expected_value == attrgetter(attr_path)(config) + + # Does the log contain the expected outputs + for message in expected_log: + assert record_found_in_log(caplog=caplog, find=message) + + +@pytest.mark.parametrize( + argnames="cfg_strings, outcome, expected_attr_values, expected_log", + argvalues=( + pytest.param( + "[core]\n[core.grid]\ncell_nx = 10\n", + does_not_raise(), + (("core.grid.cell_nx", 10),), + ((INFO, "Configuration validated."),), + id="core_only", + ), + pytest.param( + [ + "[core]\n[core.grid]\ncell_nx = 10\n", + "[plants]\n[plants.constants]\ndsr_to_ppfd = 0.2", + ], + does_not_raise(), + (("core.grid.cell_nx", 10), ("plants.constants.dsr_to_ppfd", 0.2)), + ((INFO, "Configuration validated."),), + id="core_plants", + ), + pytest.param( + [ + "[core]\n[core.grid]\ncell_nx = 10\n", + "[plants]\n[plants.constants]\ndsr_to_ppfd = a", + ], + pytest.raises(ConfigurationError), + None, + ( + (ERROR, "plants.constants.dsr_to_ppfd = a"), + (CRITICAL, "Configuration validation failed. See errors above."), + ), + id="core_plants_bad_type", + ), + ), +) +def test_ConfigurationLoader_get_configuration( + caplog, cfg_strings, outcome, expected_attr_values, expected_log +): """Tests the get_configuration function.""" - from virtual_ecosystem.core.config_builder import get_configuration + from virtual_ecosystem.core.config_builder import ConfigurationLoader with outcome: # Run the function - config = get_configuration(data=data) + config = ConfigurationLoader(cfg_strings=cfg_strings).get_configuration() # Do we get a model assert isinstance(config, BaseModel) diff --git a/virtual_ecosystem/core/config_builder.py b/virtual_ecosystem/core/config_builder.py index ead0150fb..828e77505 100644 --- a/virtual_ecosystem/core/config_builder.py +++ b/virtual_ecosystem/core/config_builder.py @@ -1,13 +1,21 @@ """The :mod:`~virtual_ecosystem.core.config_builder` provides tools to load a set of TOML formatted configuration dictionaries, either from files or from strings. String inputs are primarily intended for use in configuring models for testing, where it is -more convenient to simply provide a string. The main class :class:`ConfigLoader` handles -the loading of configuration data and compiling multiple sources into a single -dictionary of configuration data. +more convenient to simply provide a string. -The :func:`get_configuration` function: +The main class :class:`ConfigLoader` handles the loading of configuration data and +compiling multiple sources into a single dictionary of configuration data. The public +:meth:`ConfigLoader.get_configuration` method then passes the compiled data in the model +instance through the :method:`generate_configuration` function to return the actual +configuration object. Canonical use looks like this: -* takes a compiled configuration document, +.. code-block:: python + + config_object = ConfigurationLoader(...).get_configuration() + +The :func:`generate_configuration` function: + +* takes a compiled dictionary of configuration settings, * assembles a pydantic validation model class using the configuration validators for each of the requested science modules, and * passes the data through the validator to return a validated configuration model for @@ -231,6 +239,8 @@ class ConfigurationLoader: cfg_strings: A string or list of strings containing TOML formatted configuration data. override_params: Extra parameters provided by the user. + autoload: A boolean flag that can be used to turn off automatic data loading and + compilation. """ def __init__( @@ -238,6 +248,7 @@ def __init__( cfg_paths: str | Path | Sequence[str | Path] = [], cfg_strings: str | list[str] = [], override_params: dict[str, Any] | None = None, + autoload: bool = True, ) -> None: # Define attributes self.cfg_paths: list[Path] = [] @@ -292,23 +303,32 @@ def __init__( else [Path(p) for p in cfg_paths] ) - def load_configuration_data(self): - """Loading configuration data. + if autoload: + self._load_data() + self._compile_data() + + def _load_data(self): + """Load configuration data. This method loads configuration data from the sources set when the class - instance was created. The method then attempts to compile the sources into a - single compiled dictionary of configuration data, ready for validation and - conversion to a configuration model. + instance was created. """ if self.from_cfg_strings: # Load the TOML content - self.load_config_toml_string() + self._load_config_toml_string() else: # Load the TOML content from resolved paths and resolve file paths # within configuration files. - self.collect_config_paths() - self.load_config_toml() - self.resolve_config_file_paths() + self._collect_config_paths() + self._load_config_toml() + self._resolve_config_file_paths() + + def _compile_data(self): + """Compile configuration data. + + This method compiles loaded configuration data into a single data dictionary, + warning of conflicting or repeated settings across the sources. + """ data, conflicts = compile_configuration_data(list(self.toml_contents.values())) @@ -331,7 +351,7 @@ def load_configuration_data(self): LOGGER.info("Configuration data compiled.") - def collect_config_paths(self) -> None: + def _collect_config_paths(self) -> None: """Collect TOML config files from provided paths. The :class:`~virtual_ecosystem.core.config.Config` class is initialised with a @@ -384,7 +404,7 @@ def collect_config_paths(self) -> None: LOGGER.info(f"Config paths resolve to {len(self.toml_files)} files") - def load_config_toml(self) -> None: + def _load_config_toml(self) -> None: """Load the contents of resolved configuration files. This method populates the @@ -418,7 +438,7 @@ def load_config_toml(self) -> None: LOGGER.critical(to_raise) raise to_raise - def load_config_toml_string(self) -> None: + def _load_config_toml_string(self) -> None: """Load the contents of a config provided as a string. This method populates the @@ -442,7 +462,7 @@ def load_config_toml_string(self) -> None: LOGGER.info("Config TOML loaded from config strings") - def resolve_config_file_paths(self) -> None: + def _resolve_config_file_paths(self) -> None: """Resolve the locations of configured file paths. Configuration files can contain paths to other resources, such as the paths to @@ -468,6 +488,11 @@ def resolve_config_file_paths(self) -> None: LOGGER.critical(excep) raise excep + def get_configuration(self) -> Configuration: + """Get the configuration instance for the loaded configuration data.""" + + return generate_configuration(self.data) + def build_configuration_model(requested_modules: list[str]) -> type[Configuration]: """Build a schema to validate the model configuration. @@ -508,7 +533,7 @@ def build_configuration_model(requested_modules: list[str]) -> type[Configuratio return combined_model -def get_configuration(data: dict[str, Any] = {}) -> Configuration: +def generate_configuration(data: dict[str, Any] = {}) -> Configuration: """Generate a configuration model from configuration data. This method takes a dictionary of configuration data - typically loaded and compiled @@ -553,20 +578,3 @@ def get_configuration(data: dict[str, Any] = {}) -> Configuration: LOGGER.info("Configuration validated.") return configuration - - -# def override_config(self, override_params: dict[str, Any]) -> None: -# """Override any parameters desired. - -# Args: -# override_params: Extra parameter settings -# """ -# updated, conflicts = config_merge(self, override_params, conflicts=tuple()) - -# # Conflicts are not errors as we want users to be able to override parameters -# if conflicts: -# LOGGER.info( -# "The following parameter values were overridden: " + ", ".join(conflicts) -# ) - -# self.update(updated) diff --git a/virtual_ecosystem/main.py b/virtual_ecosystem/main.py index 3b303cd4f..aa8b58daa 100644 --- a/virtual_ecosystem/main.py +++ b/virtual_ecosystem/main.py @@ -15,7 +15,7 @@ from virtual_ecosystem.core import variables from virtual_ecosystem.core.config import Config -from virtual_ecosystem.core.config_builder import ConfigurationLoader, get_configuration +from virtual_ecosystem.core.config_builder import ConfigurationLoader from virtual_ecosystem.core.core_components import CoreComponents from virtual_ecosystem.core.data import Data, merge_continuous_data_files from virtual_ecosystem.core.exceptions import ConfigurationError, InitialisationError @@ -119,13 +119,11 @@ def ve_run( ) # NEW configuration system - config_loader = ConfigurationLoader( + new_config = ConfigurationLoader( # noqa: F841 cfg_paths=cfg_paths, cfg_strings=cfg_strings, override_params=override_params, - ) - config_loader.load_configuration_data() - new_config = get_configuration(config_loader.data) # noqa: F841 + ).get_configuration() # Save the merged config if requested data_opt = config["core"]["data_output_options"] From b40cc26e2e2e8706e8eff363e502687d5c9b68cb Mon Sep 17 00:00:00 2001 From: David Orme Date: Mon, 20 Oct 2025 12:06:06 +0100 Subject: [PATCH 4/7] Making changes for proposed option 4 in design Q in #1093 --- tests/core/test_configuration.py | 20 ++++----- .../test_modules/one_model/model_config.py | 2 +- virtual_ecosystem/core/config_builder.py | 14 +++--- virtual_ecosystem/core/configuration.py | 29 ++++++++++++- virtual_ecosystem/core/model_config.py | 2 +- virtual_ecosystem/core/registry.py | 43 ++++++++++++++++--- .../models/abiotic/model_config.py | 2 +- .../models/abiotic_simple/model_config.py | 2 +- .../models/animal/model_config.py | 2 +- .../models/hydrology/model_config.py | 2 +- .../models/litter/model_config.py | 2 +- .../models/plants/model_config.py | 2 +- virtual_ecosystem/models/soil/model_config.py | 2 +- .../models/testing/model_config.py | 2 +- 14 files changed, 93 insertions(+), 33 deletions(-) diff --git a/tests/core/test_configuration.py b/tests/core/test_configuration.py index 354833997..646766b3b 100644 --- a/tests/core/test_configuration.py +++ b/tests/core/test_configuration.py @@ -14,20 +14,20 @@ def test_pydantic(tmp_path): """Builds a combined config model from all models and then dumps and reloads it.""" modules = ( - "virtual_ecosystem.core", - "virtual_ecosystem.models.abiotic", - "virtual_ecosystem.models.abiotic_simple", - "virtual_ecosystem.models.animal", - "virtual_ecosystem.models.hydrology", - "virtual_ecosystem.models.litter", - "virtual_ecosystem.models.plants", - "virtual_ecosystem.models.soil", + ("virtual_ecosystem.core", "CoreConfiguration"), + ("virtual_ecosystem.models.abiotic", "AbioticConfiguration"), + ("virtual_ecosystem.models.abiotic_simple", "AbioticSimpleConfiguration"), + ("virtual_ecosystem.models.animal", "AnimalConfiguration"), + ("virtual_ecosystem.models.hydrology", "HydrologyConfiguration"), + ("virtual_ecosystem.models.litter", "LitterConfiguration"), + ("virtual_ecosystem.models.plants", "PlantsConfiguration"), + ("virtual_ecosystem.models.soil", "SoilConfiguration"), ) submodel_details = {} - for module_name in modules: + for module_name, config_name in modules: module = import_module(f"{module_name}.model_config") - config_class = getattr(module, "ModelConfiguration") + config_class = getattr(module, config_name) submodel_details[module_name.split(".")[-1]] = (config_class, config_class()) # Combine diff --git a/tests/core/test_modules/one_model/model_config.py b/tests/core/test_modules/one_model/model_config.py index dfc77f06c..264887de1 100644 --- a/tests/core/test_modules/one_model/model_config.py +++ b/tests/core/test_modules/one_model/model_config.py @@ -3,7 +3,7 @@ from virtual_ecosystem.core.configuration import ModelConfigurationRoot -class ModelConfiguration(ModelConfigurationRoot): +class OneModelConfiguration(ModelConfigurationRoot): """Root configuration class for the testing model.""" numeric_value: float = 1.0 diff --git a/virtual_ecosystem/core/config_builder.py b/virtual_ecosystem/core/config_builder.py index 828e77505..b54943d28 100644 --- a/virtual_ecosystem/core/config_builder.py +++ b/virtual_ecosystem/core/config_builder.py @@ -31,7 +31,7 @@ from pydantic import ValidationError, create_model -from virtual_ecosystem.core.configuration import Configuration +from virtual_ecosystem.core.configuration import CompiledConfiguration from virtual_ecosystem.core.exceptions import ConfigurationError from virtual_ecosystem.core.logger import LOGGER from virtual_ecosystem.core.registry import MODULE_REGISTRY, register_module @@ -488,13 +488,15 @@ def _resolve_config_file_paths(self) -> None: LOGGER.critical(excep) raise excep - def get_configuration(self) -> Configuration: + def get_configuration(self) -> CompiledConfiguration: """Get the configuration instance for the loaded configuration data.""" return generate_configuration(self.data) -def build_configuration_model(requested_modules: list[str]) -> type[Configuration]: +def build_configuration_model( + requested_modules: list[str], +) -> type[CompiledConfiguration]: """Build a schema to validate the model configuration. This method identifies the modules to be configured from the top-level @@ -527,13 +529,15 @@ def build_configuration_model(requested_modules: list[str]) -> type[Configuratio # requested module. Mypy does not like this, but it seems to be used as intended: # https://docs.pydantic.dev/latest/concepts/models/#dynamic-model-creation combined_model = create_model( - "Configuration", **{fname: (cname, cname()) for fname, cname in submodels} + "CompiledConfiguration", + __base__=CompiledConfiguration, + **{fname: (cname, cname()) for fname, cname in submodels}, ) # type: ignore[call-overload] return combined_model -def generate_configuration(data: dict[str, Any] = {}) -> Configuration: +def generate_configuration(data: dict[str, Any] = {}) -> CompiledConfiguration: """Generate a configuration model from configuration data. This method takes a dictionary of configuration data - typically loaded and compiled diff --git a/virtual_ecosystem/core/configuration.py b/virtual_ecosystem/core/configuration.py index 0a57bb3ac..da02d0c48 100644 --- a/virtual_ecosystem/core/configuration.py +++ b/virtual_ecosystem/core/configuration.py @@ -41,8 +41,35 @@ class Configuration(BaseModel): model_config = ConfigDict(use_attribute_docstrings=True) +class CompiledConfiguration(Configuration): + """Compiled configuration class for Virtual Ecosystem models. + + This class is used as the base for dynamically compiled complete model returned by + the ``ConfigurationLoader(...).get_configuration()`` method. It provides a shared + method to extract specific model configurations by name. This is needed because the + dynamic creation means that model fields are not explicitly declared, so `mypy` gets + does not handle ``configuration.plants``, but we can use + `configuration.get_subconfiguration("plants")` instead. + """ + + def get_subconfiguration(self, name: str) -> Configuration: + """Get a named subconfiguration object from a compiled configuration. + + This method can be used to extract model configurations or the core + configuration from a compiled configuration instance. + + Args: + name: The required subconfiguration. + """ + + try: + return getattr(self, name) + except AttributeError: + raise AttributeError(f"Model configuration for {name} not loaded") + + class ModelConfigurationRoot(Configuration): - """Root configuration class for models. + """Root configuration class for individual Virtual Ecosystem models. This model provides a common Pydantic base class that must be used to define the root configuration class of a Virtual Ecosystem model. Each model must define an diff --git a/virtual_ecosystem/core/model_config.py b/virtual_ecosystem/core/model_config.py index f7eba6130..b16f4e77f 100644 --- a/virtual_ecosystem/core/model_config.py +++ b/virtual_ecosystem/core/model_config.py @@ -214,7 +214,7 @@ class Variables(Configuration): variable: tuple[DataSource, ...] = (DataSource(), DataSource()) -class ModelConfiguration(Configuration): +class CoreConfiguration(Configuration): """The core model configuration.""" constants: CoreConstants = CoreConstants() diff --git a/virtual_ecosystem/core/registry.py b/virtual_ecosystem/core/registry.py index 72ff2c1e6..0c69dab47 100644 --- a/virtual_ecosystem/core/registry.py +++ b/virtual_ecosystem/core/registry.py @@ -191,7 +191,9 @@ def register_module(module_name: str) -> None: ) # Find and register the model configuration - model_config_class = get_model_configuration_class(module_name=module_name) + model_config_class = get_model_configuration_class( + module_name=module_name, module_name_short=module_name_short + ) LOGGER.info("Configuration class registered for %s", module_name) @@ -204,22 +206,49 @@ def register_module(module_name: str) -> None: ) -def get_model_configuration_class(module_name): - """Get the root configuration class for a model.""" +def get_model_configuration_class(module_name: str, module_name_short: str): + """Get the root configuration class for a model. + + Discovery is name based, with the function attempting to retrieve a class based on + the model short name: + + * ``plants`` -> ``PlantsConfiguration``, + * ``abiotic_simple`` -> ``AbioticSimpleConfiguration`` + + .. TODO: + + It would probably be cleaner and more flexible to use explicit setting of the + configuration model name as a class attribute on the model definition, but that + requires more changes in the BaseModel and definitions, so use this string + pattern is being used to keep the rollout of the new config system simpler. + + Args: + module_name: The full module name (e.g. ``virtual_ecosystem.models.plants``) + module_name_short: The short module name (e.g ``plants``) + """ try: + # Raise ModuleNotFound if the configuration module is missing config_submodule = import_module(f"{module_name}.model_config") - model_config_class = getattr(config_submodule, "ModelConfiguration") - assert issubclass(model_config_class, Configuration) + # Raises Attribute error if the expected name is not found. + expected_config_class_name = ( + "".join(word.capitalize() for word in module_name_short.split("_")) + + "Configuration" + ) + model_config_class = getattr(config_submodule, expected_config_class_name) + # Raises a runtime error if the retrieved class is not a Configuration. + if not issubclass(model_config_class, Configuration): + raise RuntimeError except ModuleNotFoundError: raise RuntimeError( f"Model {module_name} does not provide a model_config submodule." ) except AttributeError: raise RuntimeError( - f"A ModelConfiguration object is not found in {module_name}.model_config." + f"The {module_name}.model_config module does " + f"not contain {expected_config_class_name}" ) - except AssertionError: + except RuntimeError: raise RuntimeError( f"Model {module_name} config class does does inherit from `ConfigRoot`." ) diff --git a/virtual_ecosystem/models/abiotic/model_config.py b/virtual_ecosystem/models/abiotic/model_config.py index 6155f49b5..e7423e04f 100644 --- a/virtual_ecosystem/models/abiotic/model_config.py +++ b/virtual_ecosystem/models/abiotic/model_config.py @@ -169,7 +169,7 @@ class AbioticConstants(Configuration): """Initial non-zero fill value for energy fluxes, [W m-2].""" -class ModelConfiguration(ModelConfigurationRoot): +class AbioticConfiguration(ModelConfigurationRoot): """The abiotic model configuration.""" constants: AbioticConstants = AbioticConstants() diff --git a/virtual_ecosystem/models/abiotic_simple/model_config.py b/virtual_ecosystem/models/abiotic_simple/model_config.py index c7fffe80c..a90ac4851 100644 --- a/virtual_ecosystem/models/abiotic_simple/model_config.py +++ b/virtual_ecosystem/models/abiotic_simple/model_config.py @@ -60,7 +60,7 @@ class AbioticSimpleBounds(Configuration): """Bounds for soil temperature, [C].""" -class ModelConfiguration(ModelConfigurationRoot): +class AbioticSimpleConfiguration(ModelConfigurationRoot): """Root configuration class for the abiotic simple model.""" constants: AbioticSimpleConstants = AbioticSimpleConstants() diff --git a/virtual_ecosystem/models/animal/model_config.py b/virtual_ecosystem/models/animal/model_config.py index 44272d203..3d6c9b5e7 100644 --- a/virtual_ecosystem/models/animal/model_config.py +++ b/virtual_ecosystem/models/animal/model_config.py @@ -357,7 +357,7 @@ def get_population_density_terms( """The probability a seasonal migration event occurs per time step (month).""" -class ModelConfiguration(ModelConfigurationRoot): +class AnimalConfiguration(ModelConfigurationRoot): """Root configuration class for the animal model.""" functional_group_definitions_path: str = "" diff --git a/virtual_ecosystem/models/hydrology/model_config.py b/virtual_ecosystem/models/hydrology/model_config.py index 3a59b3d2d..2e8579922 100644 --- a/virtual_ecosystem/models/hydrology/model_config.py +++ b/virtual_ecosystem/models/hydrology/model_config.py @@ -208,7 +208,7 @@ class HydrologyConstants(Configuration): """Factor to convert matric potential from m to kPa.""" -class ModelConfiguration(ModelConfigurationRoot): +class HydrologyConfiguration(ModelConfigurationRoot): """Root configuration clas for the hydrology model.""" initial_soil_moisture: float = Field(gt=0, default=0.5) diff --git a/virtual_ecosystem/models/litter/model_config.py b/virtual_ecosystem/models/litter/model_config.py index 4ed9f13d2..0205db9c6 100644 --- a/virtual_ecosystem/models/litter/model_config.py +++ b/virtual_ecosystem/models/litter/model_config.py @@ -161,7 +161,7 @@ class LitterConstants(Configuration): """ -class ModelConfiguration(ModelConfigurationRoot): +class LitterConfiguration(ModelConfigurationRoot): """Root configuration clas for the litter model.""" constants: LitterConstants = LitterConstants() diff --git a/virtual_ecosystem/models/plants/model_config.py b/virtual_ecosystem/models/plants/model_config.py index 64b2d552e..7f3981663 100644 --- a/virtual_ecosystem/models/plants/model_config.py +++ b/virtual_ecosystem/models/plants/model_config.py @@ -121,7 +121,7 @@ class PlantsExportConfig(Configuration): """A float format string to control data precision in export files.""" -class ModelConfiguration(ModelConfigurationRoot): +class PlantsConfiguration(ModelConfigurationRoot): """Root configuration class for the plants model.""" pft_definitions_path: FILEPATH_PLACEHOLDER diff --git a/virtual_ecosystem/models/soil/model_config.py b/virtual_ecosystem/models/soil/model_config.py index de1eff60b..b97bef96a 100644 --- a/virtual_ecosystem/models/soil/model_config.py +++ b/virtual_ecosystem/models/soil/model_config.py @@ -375,7 +375,7 @@ class SoilConstants(Configuration): """ -class ModelConfiguration(ModelConfigurationRoot): +class SoilConfiguration(ModelConfigurationRoot): """Root configuration class for the soil model.""" enzyme_class_definition_path: str = "" diff --git a/virtual_ecosystem/models/testing/model_config.py b/virtual_ecosystem/models/testing/model_config.py index dfc77f06c..3a4d7b080 100644 --- a/virtual_ecosystem/models/testing/model_config.py +++ b/virtual_ecosystem/models/testing/model_config.py @@ -3,7 +3,7 @@ from virtual_ecosystem.core.configuration import ModelConfigurationRoot -class ModelConfiguration(ModelConfigurationRoot): +class TestingConfiguration(ModelConfigurationRoot): """Root configuration class for the testing model.""" numeric_value: float = 1.0 From bc23c23a86e2e14de470e7380b70f5667ac44324 Mon Sep 17 00:00:00 2001 From: David Orme Date: Mon, 20 Oct 2025 15:26:14 +0100 Subject: [PATCH 5/7] @dalonsoa's cunning type hinting fix --- virtual_ecosystem/core/configuration.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/virtual_ecosystem/core/configuration.py b/virtual_ecosystem/core/configuration.py index da02d0c48..b638a63fe 100644 --- a/virtual_ecosystem/core/configuration.py +++ b/virtual_ecosystem/core/configuration.py @@ -14,13 +14,16 @@ found :doc:`here `. """ # noqa: D205 +from collections.abc import Callable from pathlib import Path -from typing import Annotated, TypeAlias +from typing import Annotated, TypeAlias, TypeVar from pydantic import BaseModel, BeforeValidator, ConfigDict, Field, FilePath from pydantic._internal._model_construction import ModelMetaclass from pydantic_core import PydanticUndefined +T = TypeVar("T") + RST_TO_MD = [ (":cite:t:", "{cite:t}"), (":cite:p:", "{cite:p}"), @@ -52,11 +55,21 @@ class CompiledConfiguration(Configuration): `configuration.get_subconfiguration("plants")` instead. """ - def get_subconfiguration(self, name: str) -> Configuration: + def get_subconfiguration(self, name: str, _: Callable[..., T]) -> T: """Get a named subconfiguration object from a compiled configuration. This method can be used to extract model configurations or the core - configuration from a compiled configuration instance. + configuration from a compiled configuration instance. The second argument is + used to provide support for static typing in `mypy` by explicitly providing the + type of the returned object. The method should be called as - for example: + + .. code-block:: Python + + subconfig: SubConfigConfiguration = ( + compiled_configuration_instance.get_subconfiguration( + "subconfig", SubConfigConfiguration + ) + ) Args: name: The required subconfiguration. From c6dd49d86b489316c3d8ce61e8b278016fcd8418 Mon Sep 17 00:00:00 2001 From: David Orme Date: Mon, 20 Oct 2025 17:33:03 +0100 Subject: [PATCH 6/7] Doc fix --- docs/source/using_the_ve/configuration/new_config.md | 5 +++-- virtual_ecosystem/core/configuration.py | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/source/using_the_ve/configuration/new_config.md b/docs/source/using_the_ve/configuration/new_config.md index 3fccb7f77..9c4b41c95 100644 --- a/docs/source/using_the_ve/configuration/new_config.md +++ b/docs/source/using_the_ve/configuration/new_config.md @@ -21,6 +21,7 @@ kernelspec: import tomli_w import json from virtual_ecosystem.core.configuration import model_config_to_html, Configuration +from virtual_ecosystem.core.registry import get_model_configuration_class from IPython.display import display, Markdown, HTML from importlib import import_module @@ -32,8 +33,8 @@ def get_config_class(module_name: str) -> Configuration: """ # Get the config class for the module - module = import_module(f"{module_name}.model_config") - config_class = getattr(module, "ModelConfiguration") + short_name = module_name.split(".")[-1] + config_class = get_model_configuration_class(module_name, short_name) return module_name.split(".")[-1], config_class diff --git a/virtual_ecosystem/core/configuration.py b/virtual_ecosystem/core/configuration.py index b638a63fe..e87eca200 100644 --- a/virtual_ecosystem/core/configuration.py +++ b/virtual_ecosystem/core/configuration.py @@ -23,6 +23,7 @@ from pydantic_core import PydanticUndefined T = TypeVar("T") +"""Generic type to support static typing of subconfigurations.""" RST_TO_MD = [ (":cite:t:", "{cite:t}"), @@ -73,6 +74,8 @@ def get_subconfiguration(self, name: str, _: Callable[..., T]) -> T: Args: name: The required subconfiguration. + _: The class of objected returned by the method. This is not used by the + method itself but is used to support static typing of the return value. """ try: From 7104dbaab2d335e469bf33b8ee39555f53dd9e69 Mon Sep 17 00:00:00 2001 From: David Orme Date: Tue, 21 Oct 2025 10:54:30 +0100 Subject: [PATCH 7/7] Remove micromethod from ConfigLoader --- tests/core/test_configuration_builder.py | 12 ++++++--- virtual_ecosystem/core/config_builder.py | 31 ++++++++++-------------- virtual_ecosystem/main.py | 10 +++++--- 3 files changed, 28 insertions(+), 25 deletions(-) diff --git a/tests/core/test_configuration_builder.py b/tests/core/test_configuration_builder.py index 495fdae94..e8f41d863 100644 --- a/tests/core/test_configuration_builder.py +++ b/tests/core/test_configuration_builder.py @@ -795,15 +795,19 @@ def test_generate_configuration( ), ), ) -def test_ConfigurationLoader_get_configuration( +def test_ConfigurationLoader_to_generate_configuration( caplog, cfg_strings, outcome, expected_attr_values, expected_log ): - """Tests the get_configuration function.""" - from virtual_ecosystem.core.config_builder import ConfigurationLoader + """Tests the flow through the loader to the generate_configuration function.""" + from virtual_ecosystem.core.config_builder import ( + ConfigurationLoader, + generate_configuration, + ) with outcome: # Run the function - config = ConfigurationLoader(cfg_strings=cfg_strings).get_configuration() + config_data = ConfigurationLoader(cfg_strings=cfg_strings) + config = generate_configuration(config_data.data) # Do we get a model assert isinstance(config, BaseModel) diff --git a/virtual_ecosystem/core/config_builder.py b/virtual_ecosystem/core/config_builder.py index b54943d28..63cccf23a 100644 --- a/virtual_ecosystem/core/config_builder.py +++ b/virtual_ecosystem/core/config_builder.py @@ -4,16 +4,9 @@ more convenient to simply provide a string. The main class :class:`ConfigLoader` handles the loading of configuration data and -compiling multiple sources into a single dictionary of configuration data. The public -:meth:`ConfigLoader.get_configuration` method then passes the compiled data in the model -instance through the :method:`generate_configuration` function to return the actual -configuration object. Canonical use looks like this: +compiling multiple sources into a single dictionary of configuration data. -.. code-block:: python - - config_object = ConfigurationLoader(...).get_configuration() - -The :func:`generate_configuration` function: +The :func:`generate_configuration` function then: * takes a compiled dictionary of configuration settings, * assembles a pydantic validation model class using the configuration validators for @@ -21,6 +14,13 @@ * passes the data through the validator to return a validated configuration model for the simulation. +Canonical usage patterns for the module would be: + +.. code-block:: python + + config_data = ConfigurationLoader(...) + config_object = generate_configuration(config_data.data) + """ # noqa: D205 import tomllib @@ -488,11 +488,6 @@ def _resolve_config_file_paths(self) -> None: LOGGER.critical(excep) raise excep - def get_configuration(self) -> CompiledConfiguration: - """Get the configuration instance for the loaded configuration data.""" - - return generate_configuration(self.data) - def build_configuration_model( requested_modules: list[str], @@ -540,13 +535,13 @@ def build_configuration_model( def generate_configuration(data: dict[str, Any] = {}) -> CompiledConfiguration: """Generate a configuration model from configuration data. - This method takes a dictionary of configuration data - typically loaded and compiled - using the :class:`ConfigurationLoader` class - and tries to build a validated - configuration model. + This method takes a dictionary of configuration data and tries to build a validated + configuration model. The input data is typically loaded and compiled using the + :class:`ConfigurationLoader` class. The first step is to take the root sections in the configuration data - indicating the various science models requested for a simulation - and uses those to build a - composite configuration validator. + composite configuration validator class. The provided data is then passed into the validator. If validation is successful then a validated configuration object is returned, otherwise the specific validation diff --git a/virtual_ecosystem/main.py b/virtual_ecosystem/main.py index aa8b58daa..d7f4925c4 100644 --- a/virtual_ecosystem/main.py +++ b/virtual_ecosystem/main.py @@ -15,7 +15,10 @@ from virtual_ecosystem.core import variables from virtual_ecosystem.core.config import Config -from virtual_ecosystem.core.config_builder import ConfigurationLoader +from virtual_ecosystem.core.config_builder import ( + ConfigurationLoader, + generate_configuration, +) from virtual_ecosystem.core.core_components import CoreComponents from virtual_ecosystem.core.data import Data, merge_continuous_data_files from virtual_ecosystem.core.exceptions import ConfigurationError, InitialisationError @@ -119,11 +122,12 @@ def ve_run( ) # NEW configuration system - new_config = ConfigurationLoader( # noqa: F841 + config_data = ConfigurationLoader( cfg_paths=cfg_paths, cfg_strings=cfg_strings, override_params=override_params, - ).get_configuration() + ) + configuration = generate_configuration(config_data.data) # noqa: F841 # Save the merged config if requested data_opt = config["core"]["data_output_options"]