From be2afcac0b8d29fa3a5105e0265f451d67bb2a94 Mon Sep 17 00:00:00 2001
From: David Orme <davidorme@users.noreply.github.com>
Date: Thu, 16 Oct 2025 15:48:59 +0100
Subject: [PATCH 1/7] New configuration builder - overrides not implemented yet

---
 .vscode/settings.json                    |   5 +-
 tests/conftest.py                        |   6 +-
 tests/core/test_configuration_builder.py | 730 +++++++++++++++++++++++
 virtual_ecosystem/core/config_builder.py | 558 +++++++++++++++++
 4 files changed, 1297 insertions(+), 2 deletions(-)
 create mode 100644 tests/core/test_configuration_builder.py
 create mode 100644 virtual_ecosystem/core/config_builder.py

diff --git a/.vscode/settings.json b/.vscode/settings.json
index de1452cc3..1a647ffb5 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -9,7 +9,10 @@
         "-s",
         "tests",
         // --no-cov needed so that breakpoints work: https://github.com/microsoft/vscode-python/issues/693
-        "--no-cov"
+        "--no-cov",
+        // Do not use parallel workers in VSCode testing panel.
+        "-n",
+        "0"
     ],
     "workbench.editorAssociations": {
         "*.ipynb": "jupyter-notebook"
diff --git a/tests/conftest.py b/tests/conftest.py
index 5fd0b8cf4..da61f84ed 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -60,7 +60,11 @@ def record_found_in_log(
     try:
         # Iterate over the record tuples, ignoring the leading element
         # giving the logger name
-        _ = next(msg for msg in caplog.record_tuples if msg[1:] == find)
+        _ = next(
+            _
+            for _, level, message in caplog.record_tuples
+            if level == find[0] and message.startswith(find[1])
+        )
         return True
     except StopIteration:
         return False
diff --git a/tests/core/test_configuration_builder.py b/tests/core/test_configuration_builder.py
new file mode 100644
index 000000000..d2a9e5b76
--- /dev/null
+++ b/tests/core/test_configuration_builder.py
@@ -0,0 +1,730 @@
+"""Testing the config_builder module."""
+
+from contextlib import nullcontext as does_not_raise
+from logging import CRITICAL, ERROR, INFO
+from operator import attrgetter
+from pathlib import Path
+
+import pytest
+import tomli_w
+from pydantic import BaseModel
+
+from tests.conftest import log_check, record_found_in_log
+from virtual_ecosystem.core.exceptions import ConfigurationError
+
+# ------------------------------------------------
+# Testing configuration dictionary compilation and merging
+# ------------------------------------------------
+
+
+@pytest.mark.parametrize(
+    "dest,source,exp_result, exp_conflicts",
+    [
+        pytest.param(
+            {"d1": {"d2": 3}},
+            {"d3": {"d2": 3}},
+            {"d1": {"d2": 3}, "d3": {"d2": 3}},
+            set(),
+            id="no_conflict",
+        ),
+        pytest.param(
+            {"d1": {"d2": 3}},
+            {"d1": {"d3": 3}},
+            {"d1": {"d2": 3, "d3": 3}},
+            set(),
+            id="no_conflict2",
+        ),
+        pytest.param(
+            {
+                "a": {"aa": {"aaa": True, "aab": True}, "ab": {"abb": True}},
+                "b": {
+                    "ba": {"bab": {"baba": True}},
+                    "bb": {
+                        "bba": {"bbab": {"bbaba": True}},
+                        "bbb": {"bbba": {"bbbaa": True}},
+                    },
+                },
+            },
+            {
+                "a": {"ab": {"aba": False}},
+                "b": {
+                    "ba": {"baa": {"baaa": False}},
+                    "bb": {
+                        "bba": {"bbaa": {"bbaaa": False}},
+                        "bbb": {"bbbb": {"bbbba": False}},
+                    },
+                },
+            },
+            {
+                "a": {
+                    "aa": {"aaa": True, "aab": True},
+                    "ab": {"aba": False, "abb": True},
+                },
+                "b": {
+                    "ba": {"baa": {"baaa": False}, "bab": {"baba": True}},
+                    "bb": {
+                        "bba": {"bbaa": {"bbaaa": False}, "bbab": {"bbaba": True}},
+                        "bbb": {"bbba": {"bbbaa": True}, "bbbb": {"bbbba": False}},
+                    },
+                },
+            },
+            set(),
+            id="no_conflict_complex",
+        ),
+        pytest.param(
+            {"d1": 1},
+            {"d1": 2},
+            {"d1": 2},  # source value takes precedence
+            set(["d1"]),
+            id="conflict_root",
+        ),
+        pytest.param(
+            {"d1": 1},
+            {"d1": {"d2": 1}},
+            {"d1": {"d2": 1}},
+            set(["d1"]),
+            id="conflict_root2",
+        ),
+        pytest.param(
+            {"d1": {"d2": 3, "d3": 12}},
+            {"d1": {"d3": 7}},
+            {"d1": {"d2": 3, "d3": 7}},
+            set(["d1.d3"]),
+            id="conflict_nested1",
+        ),
+        pytest.param(
+            {"d1": {"d2": {"d3": 12, "d4": 5}}},
+            {"d1": {"d2": {"d3": 5, "d4": 7}}},
+            {"d1": {"d2": {"d3": 5, "d4": 7}}},
+            set(["d1.d2.d3", "d1.d2.d4"]),
+            id="conflict_nested_multiple",
+        ),
+        pytest.param(
+            {
+                "a": {"aa": {"aaa": True, "aab": True}, "ab": {"abb": True}},
+                "b": {
+                    "ba": {"bab": {"baba": True}},
+                    "bb": {
+                        "bba": {"bbab": {"bbaba": True}},
+                        "bbb": {"bbba": {"bbbaa": True}},
+                    },
+                },
+            },
+            {
+                "a": {"ab": {"aba": False}},
+                "b": {
+                    "ba": {"baa": {"baaa": False}},
+                    "bb": {
+                        "bba": {"bbaa": {"bbaaa": False}},
+                        "bbb": {"bbba": {"bbbaa": False}, "bbbb": {"bbbba": False}},
+                    },
+                },
+            },
+            {
+                "a": {
+                    "aa": {"aaa": True, "aab": True},
+                    "ab": {"aba": False, "abb": True},
+                },
+                "b": {
+                    "ba": {"baa": {"baaa": False}, "bab": {"baba": True}},
+                    "bb": {
+                        "bba": {"bbaa": {"bbaaa": False}, "bbab": {"bbaba": True}},
+                        "bbb": {"bbba": {"bbbaa": False}, "bbbb": {"bbbba": False}},
+                    },
+                },
+            },
+            set(["b.bb.bbb.bbba.bbbaa"]),
+            id="conflict_complex",
+        ),
+        pytest.param(
+            {"d1": {"d2": [1, 2, 3]}},
+            {"d1": {"d2": [4, 5]}},
+            {"d1": {"d2": [1, 2, 3, 4, 5]}},
+            set(),
+            id="no_conflict_list_merge",
+        ),
+        # The next example passes just fine, which is intentional, but the test is here
+        # to highlight the behaviour
+        pytest.param(
+            {"d1": {"d2": [1, 2, 3]}},
+            {"d1": {"d2": [{"file": "a_path"}]}},
+            {"d1": {"d2": [1, 2, 3, {"file": "a_path"}]}},
+            set(),
+            id="no_conflict_list_merge_dubious_content",
+        ),
+        pytest.param(
+            {"d1": {"d2": [1, 2, 3]}},
+            {"d1": {"d2": "a"}},
+            {"d1": {"d2": "a"}},
+            set(["d1.d2"]),
+            id="conflict_list_and_not_list",
+        ),
+    ],
+)
+def test_config_merge(dest, source, exp_result, exp_conflicts):
+    """Checks configuration merge and validation function."""
+    from virtual_ecosystem.core.config_builder import merge_configuration_dicts
+
+    result, conflicts = merge_configuration_dicts(dest, source)
+
+    assert result == exp_result
+    assert conflicts == exp_conflicts
+
+
+@pytest.mark.parametrize(
+    argnames="data, expected_data, expected_conflicts",
+    argvalues=(
+        pytest.param(
+            [{"a": 1}, {"b": 2}, {"c": 3}],
+            {"a": 1, "b": 2, "c": 3},
+            set(),
+            id="simple",
+        ),
+        pytest.param(
+            [{"a": 1, "d": {"e": 4}}, {"b": 2}, {"c": 3, "f": {"g": 4}}],
+            {"a": 1, "b": 2, "c": 3, "d": {"e": 4}, "f": {"g": 4}},
+            set(),
+            id="nested",
+        ),
+        pytest.param(
+            [{"a": 1, "d": {"e": 4}}, {"b": 2}, {"c": 3, "d": {"e": 5}}],
+            {"a": 1, "b": 2, "c": 3, "d": {"e": 5}},
+            set(["d.e"]),
+            id="conflicts",
+        ),
+    ),
+)
+def test_compile_configuration_data(data, expected_data, expected_conflicts):
+    """Tests compile_configuration_data."""
+
+    from virtual_ecosystem.core.config_builder import compile_configuration_data
+
+    result, conflicts = compile_configuration_data(data)
+
+    assert result == expected_data
+    assert conflicts == expected_conflicts
+
+
+# ------------------------------------------------
+# Testing ConfigurationLoader methods
+# ------------------------------------------------
+
+
+@pytest.mark.parametrize(
+    "cfg_paths, cfg_strings, expected_cfg_paths, raises, err_msg",
+    [
+        pytest.param(
+            "string1",
+            None,
+            [Path("string1")],
+            does_not_raise(),
+            None,
+            id="paths_as_str",
+        ),
+        pytest.param(
+            Path("string1"),
+            None,
+            [Path("string1")],
+            does_not_raise(),
+            None,
+            id="paths_as_path",
+        ),
+        pytest.param(
+            ["string1", "string2"],
+            None,
+            [Path("string1"), Path("string2")],
+            does_not_raise(),
+            None,
+            id="paths_as_str_list",
+        ),
+        pytest.param(
+            ["string1", Path("string2")],
+            None,
+            [Path("string1"), Path("string2")],
+            does_not_raise(),
+            None,
+            id="paths_as_mixed_list",
+        ),
+        pytest.param(
+            [Path("string1"), Path("string2")],
+            None,
+            [Path("string1"), Path("string2")],
+            does_not_raise(),
+            None,
+            id="paths_as_path_list",
+        ),
+        pytest.param(
+            None,
+            """[[core.data.variable]]
+            file_path = "cellid_coords.nc
+            var_name = "temp"
+            """,
+            [],
+            does_not_raise(),
+            None,
+            id="cfg_strings_as_str",
+        ),
+        pytest.param(
+            None,
+            [
+                """[[core.data.variable]]
+                file_path = "cellid_coords.nc
+                var_name = "temp"
+                """,
+                """[[core.data.variable]]
+                file_path = "cellid_coords.nc
+                var_name = "patm"
+                """,
+            ],
+            [],
+            does_not_raise(),
+            None,
+            id="cfg_strings_as_list",
+        ),
+        pytest.param(
+            None,
+            None,
+            [],
+            pytest.raises(ValueError),
+            "Provide cfg_paths or cfg_strings.",
+            id="neither",
+        ),
+        pytest.param(
+            "string1",
+            """[[core.data.variable]]
+            file_path = "cellid_coords.nc
+            var_name = "temp"
+            """,
+            [],
+            pytest.raises(ValueError),
+            "Do not use both cfg_paths and cfg_strings.",
+            id="both",
+        ),
+    ],
+)
+def test_ConfigurationLoader_init(
+    cfg_paths, cfg_strings, expected_cfg_paths, raises, err_msg
+):
+    """Tests the normalisation and startup of ConfigurationLoader instance init."""
+    from virtual_ecosystem.core.config_builder import ConfigurationLoader
+
+    # Just check normalisation and error conditions, no processing
+    with raises as err:
+        cfg = ConfigurationLoader(cfg_paths=cfg_paths, cfg_strings=cfg_strings)
+
+        if not isinstance(raises, does_not_raise):
+            assert str(err) == err_msg
+
+        assert cfg.cfg_paths == expected_cfg_paths
+        if cfg_strings is not None:
+            assert cfg.from_cfg_strings is True
+
+
+@pytest.mark.parametrize(
+    "cfg_paths,expected_exception,expected_log_entries",
+    [
+        pytest.param(
+            ["file_does_not_exist"],
+            pytest.raises(ConfigurationError),
+            (
+                (ERROR, "Config file path does not exist"),
+                (CRITICAL, "Config paths not all valid: check log."),
+            ),
+            id="bad_path",
+        ),
+        pytest.param(
+            ["cfg_no_toml"],
+            pytest.raises(ConfigurationError),
+            (
+                (ERROR, "Config directory path contains no TOML files"),
+                (CRITICAL, "Config paths not all valid: check log."),
+            ),
+            id="no_toml_dir",
+        ),
+        pytest.param(
+            ["bad_json_in_schema.json"],
+            pytest.raises(ConfigurationError),
+            (
+                (ERROR, "Config file path with non-TOML suffix"),
+                (CRITICAL, "Config paths not all valid: check log."),
+            ),
+            id="not_toml",
+        ),
+        pytest.param(
+            [".", "all_config.toml"],
+            pytest.raises(ConfigurationError),
+            (
+                (ERROR, "Repeated files in config paths:"),
+                (CRITICAL, "Config paths not all valid: check log."),
+            ),
+            id="dupes",
+        ),
+        pytest.param(
+            ["all_config.toml"],
+            does_not_raise(),
+            ((INFO, "Config paths resolve to 1 files"),),
+            id="valid",
+        ),
+    ],
+)
+def test_ConfigurationLoader_collect_config_paths(
+    caplog,
+    shared_datadir,
+    cfg_paths,
+    expected_exception,
+    expected_log_entries,
+):
+    """Checks errors for missing config files."""
+    from virtual_ecosystem.core.config_builder import ConfigurationLoader
+
+    caplog.clear()
+
+    # Init the class
+    cfg = ConfigurationLoader([shared_datadir / p for p in cfg_paths])
+
+    # Check that file resolution runs as expected
+    with expected_exception:
+        cfg.collect_config_paths()
+
+    log_check(caplog, expected_log_entries)
+
+
+@pytest.mark.parametrize(
+    "cfg_paths,expected_exception,expected_log_entries",
+    [
+        pytest.param(
+            ["toml_errors.toml"],
+            pytest.raises(ConfigurationError),
+            (
+                (ERROR, "Config TOML parsing error in"),
+                (CRITICAL, "Errors parsing config files:"),
+            ),
+            id="toml_errors",
+        ),
+        pytest.param(
+            ["all_config.toml"],
+            does_not_raise(),
+            ((INFO, "Config TOML loaded from "),),
+            id="toml_valid",
+        ),
+    ],
+)
+def test_ConfigurationLoader_load_config_toml(
+    caplog, shared_datadir, cfg_paths, expected_exception, expected_log_entries
+):
+    """Check errors for incorrectly formatted config files."""
+    from virtual_ecosystem.core.config_builder import ConfigurationLoader
+
+    # Initialise the ConfigurationLoader instance and manually resolve the config paths
+    # to toml files
+    cfg = ConfigurationLoader([shared_datadir / p for p in cfg_paths])
+    cfg.collect_config_paths()
+    caplog.clear()
+
+    # Check that load_config_toml behaves as expected
+    with expected_exception:
+        cfg.load_config_toml()
+
+    log_check(caplog, expected_log_entries)
+
+
+@pytest.mark.parametrize(
+    "cfg_paths,expected_exception,expected_log_entries",
+    [
+        pytest.param(
+            "toml_errors.toml",
+            pytest.raises(ConfigurationError),
+            ((CRITICAL, "TOML parsing error in cfg_strings:"),),
+            id="toml_errors",
+        ),
+        pytest.param(
+            "all_config.toml",
+            does_not_raise(),
+            ((INFO, "Config TOML loaded from config strings"),),
+            id="toml_valid",
+        ),
+    ],
+)
+def test_Config_load_config_toml_string(
+    caplog, shared_datadir, cfg_paths, expected_exception, expected_log_entries
+):
+    """Check errors for incorrectly formatted cfg_strings."""
+    from virtual_ecosystem.core.config_builder import ConfigurationLoader
+
+    # Initialise the Config instance and manually run the load process
+    with open(Path(shared_datadir) / cfg_paths) as cfg_file:
+        cfg_strings = cfg_file.read()
+
+    cfg = ConfigurationLoader(cfg_strings=cfg_strings)
+    caplog.clear()
+
+    # Check that load_config_toml behaves as expected
+    with expected_exception:
+        cfg.load_config_toml_string()
+
+    log_check(caplog, expected_log_entries)
+
+
+@pytest.mark.parametrize(
+    "content,expected_exception,expected_path_log,expected_string_log",
+    [
+        pytest.param(
+            {"filename1.toml": {"core": {"grid": {"cell_nx": 10, "cell_ny": 10}}}},
+            does_not_raise(),
+            (
+                (INFO, "Config paths resolve to 1 files"),
+                (INFO, "Config TOML loaded from "),
+                (INFO, "Configuration data compiled."),
+            ),
+            (
+                (INFO, "Config TOML loaded from config strings"),
+                (INFO, "Configuration data compiled."),
+            ),
+            id="single_file_valid",
+        ),
+        pytest.param(
+            {
+                "filename1.toml": {"core": {"grid": {"cell_nx": 10, "cell_ny": 10}}},
+                "filename2.toml": {"core": {"grid": {"cell_nx": 10, "cell_ny": 10}}},
+            },
+            pytest.raises(ConfigurationError),
+            (
+                (INFO, "Config paths resolve to 2 files"),
+                (INFO, "Config TOML loaded from "),
+                (INFO, "Config TOML loaded from "),
+                (
+                    CRITICAL,
+                    "Duplicated entries in config files: "
+                    "core.grid.cell_nx, core.grid.cell_ny",
+                ),
+            ),
+            (
+                (INFO, "Config TOML loaded from config strings"),
+                (
+                    CRITICAL,
+                    "Duplicated entries in config files: "
+                    "core.grid.cell_nx, core.grid.cell_ny",
+                ),
+            ),
+            id="two_files_conflict",
+        ),
+        pytest.param(
+            {
+                "filename1.toml": {"core": {"grid": {"cell_nx": 10}}},
+                "filename2.toml": {"core": {"grid": {"cell_ny": 10}}},
+            },
+            does_not_raise(),
+            (
+                (INFO, "Config paths resolve to 2 files"),
+                (INFO, "Config TOML loaded from "),
+                (INFO, "Config TOML loaded from "),
+                (INFO, "Configuration data compiled."),
+            ),
+            (
+                (INFO, "Config TOML loaded from config strings"),
+                (INFO, "Configuration data compiled."),
+            ),
+            id="two_files_valid",
+        ),
+        pytest.param(
+            {
+                "filename1.toml": {"core": {"grid": {"cell_nx": 10}}},
+                "filename2.toml": {"core": {}},
+                "filename3.toml": {"core": {"grid": {"cell_ny": 10}}},
+            },
+            does_not_raise(),
+            (
+                (INFO, "Config paths resolve to 3 files"),
+                (INFO, "Config TOML loaded from "),
+                (INFO, "Config TOML loaded from "),
+                (INFO, "Config TOML loaded from "),
+                (INFO, "Configuration data compiled."),
+            ),
+            (
+                (INFO, "Config TOML loaded from config strings"),
+                (INFO, "Configuration data compiled."),
+            ),
+            id="three_files_valid",
+        ),
+        pytest.param(
+            {
+                "filename1.toml": {"core": {"grid": {"cell_nx": 10, "cell_ny": 10}}},
+                "filename2.toml": {"core": {}},
+                "filename3.toml": {"core": {"grid": {"cell_ny": 10}}},
+            },
+            pytest.raises(ConfigurationError),
+            (
+                (INFO, "Config paths resolve to 3 files"),
+                (INFO, "Config TOML loaded from "),
+                (INFO, "Config TOML loaded from "),
+                (INFO, "Config TOML loaded from "),
+                (
+                    CRITICAL,
+                    "Duplicated entries in config files: core.grid.cell_ny",
+                ),
+            ),
+            (
+                (INFO, "Config TOML loaded from config strings"),
+                (
+                    CRITICAL,
+                    "Duplicated entries in config files: core.grid.cell_ny",
+                ),
+            ),
+            id="three_files_conflict",
+        ),
+    ],
+)
+def test_ConfigurationLoader_load_configuration_data(
+    tmpdir, caplog, content, expected_exception, expected_path_log, expected_string_log
+):
+    """This test checks the load_configuration_data method.
+
+    This method wraps up the stages of collating the configuration files, loading their
+    data and resolving relative paths and the final compilation of the resulting data
+    into a single configuration data dictionary.
+    """
+    from virtual_ecosystem.core.config_builder import ConfigurationLoader
+
+    # ----------------------------
+    # Test the cfg_paths option
+    # ----------------------------
+
+    cfg_paths = []
+    for filename, filedata in content.items():
+        filepath = tmpdir / filename
+        cfg_paths.append(filepath)
+        with open(filepath, "wb") as outfile:
+            tomli_w.dump(filedata, outfile)
+
+    # Initialise the Config instance
+    config_builder = ConfigurationLoader(cfg_paths=cfg_paths)
+    caplog.clear()
+
+    # Check that load_configuration_data behaves as expected
+    with expected_exception:
+        config_builder.load_configuration_data()
+
+        # Test the values that were passed into valid configs.
+
+        assert config_builder.data["core"]["grid"]["cell_nx"] == 10
+        assert config_builder.data["core"]["grid"]["cell_nx"] == 10
+
+    log_check(caplog, expected_path_log)
+
+    # Tidy up
+    [Path(p).unlink() for p in cfg_paths]
+
+    # ----------------------------
+    # Test the cfg_strings option
+    # ----------------------------
+
+    cfg_strings = [tomli_w.dumps(v) for k, v in content.items()]
+
+    # Initialise the Config instance
+    config_builder = ConfigurationLoader(cfg_strings=cfg_strings)
+    caplog.clear()
+
+    # Check that load_configuration_data behaves as expected
+    with expected_exception:
+        config_builder.load_configuration_data()
+
+        # Test the values that were passed into valid configs.
+
+        assert config_builder.data["core"]["grid"]["cell_nx"] == 10
+        assert config_builder.data["core"]["grid"]["cell_nx"] == 10
+
+    log_check(caplog, expected_string_log)
+
+
+# ---------------------------------------------------------------
+# Testing functions to create and validate configuration models
+# ---------------------------------------------------------------
+
+
+@pytest.mark.parametrize(
+    argnames="requested, outcome, expected",
+    argvalues=(
+        pytest.param(
+            ["core", "plants"],
+            does_not_raise(),
+            ["core", "plants"],
+            id="core present",
+        ),
+        pytest.param(
+            ["plants"],
+            does_not_raise(),
+            ["core", "plants"],
+            id="core added",
+        ),
+        pytest.param(
+            ["planets"],
+            pytest.raises(ModuleNotFoundError),
+            None,
+            id="unknown model",
+        ),
+    ),
+)
+def test_build_configuration_model(requested, outcome, expected):
+    """Tests build_configuration_model."""
+    from virtual_ecosystem.core.config_builder import build_configuration_model
+
+    with outcome:
+        model = build_configuration_model(requested)
+
+        assert issubclass(model, BaseModel)
+        assert set(expected) == set(model.model_fields.keys())
+
+
+@pytest.mark.parametrize(
+    argnames="data, outcome, expected_attr_values, expected_log",
+    argvalues=(
+        pytest.param(
+            {"core": {"grid": {"cell_nx": 10}}},
+            does_not_raise(),
+            (("core.grid.cell_nx", 10),),
+            ((INFO, "Configuration validated."),),
+            id="core_only",
+        ),
+        pytest.param(
+            {
+                "core": {"grid": {"cell_nx": 10}},
+                "plants": {"constants": {"dsr_to_ppfd": 0.2}},
+            },
+            does_not_raise(),
+            (("core.grid.cell_nx", 10), ("plants.constants.dsr_to_ppfd", 0.2)),
+            ((INFO, "Configuration validated."),),
+            id="core_plants",
+        ),
+        pytest.param(
+            {
+                "core": {"grid": {"cell_nx": 10}},
+                "plants": {"constants": {"dsr_to_ppfd": "a"}},
+            },
+            pytest.raises(ConfigurationError),
+            None,
+            (
+                (ERROR, "plants.constants.dsr_to_ppfd = a"),
+                (CRITICAL, "Configuration validation failed. See errors above."),
+            ),
+            id="core_plants_bad_type",
+        ),
+    ),
+)
+def test_get_configuration(caplog, data, outcome, expected_attr_values, expected_log):
+    """Tests the get_configuration function."""
+    from virtual_ecosystem.core.config_builder import get_configuration
+
+    with outcome:
+        # Run the function
+        config = get_configuration(data=data)
+
+        # Do we get a model
+        assert isinstance(config, BaseModel)
+
+        # Are the attributes as expected
+        for attr_path, expected_value in expected_attr_values:
+            assert expected_value == attrgetter(attr_path)(config)
+
+        # Does the log contain the expected outputs
+        for message in expected_log:
+            assert record_found_in_log(caplog=caplog, find=message)
diff --git a/virtual_ecosystem/core/config_builder.py b/virtual_ecosystem/core/config_builder.py
new file mode 100644
index 000000000..e4afd8727
--- /dev/null
+++ b/virtual_ecosystem/core/config_builder.py
@@ -0,0 +1,558 @@
+"""The :mod:`~virtual_ecosystem.core.config` module is used to read in the various
+configuration files, validate their contents, and then configure a ready to run instance
+of the virtual ecosystem model. The basic details of how this system is used can be
+found :doc:`here </using_the_ve/configuration/config>`.
+
+The validation of configuration documents is done using JSONSchema documents associated
+with the different model components. See the :mod:`~virtual_ecosystem.core.schema`
+module for details.
+"""  # noqa: D205
+
+import tomllib
+from collections.abc import Sequence
+from copy import deepcopy
+from pathlib import Path
+from typing import Any
+
+from pydantic import ValidationError, create_model
+
+from virtual_ecosystem.core.configuration import ConfigRoot
+from virtual_ecosystem.core.exceptions import ConfigurationError
+from virtual_ecosystem.core.logger import LOGGER
+from virtual_ecosystem.core.registry import MODULE_REGISTRY, register_module
+
+
+def merge_configuration_dicts(
+    dest: dict, source: dict, **kwargs
+) -> tuple[dict, set[str]]:
+    """Recursively merge two configuration dictionaries.
+
+    This function returns a copy of the input ``dest`` dictionary that has been extended
+    recursively with the entries from the input ``source`` dictionary.
+
+    The merging process looks for duplicated settings. In general, if two input
+    dictionaries share complete key paths (that is a set of nested dictionary keys
+    leading to a value) then that indicates a duplicated setting. The values might be
+    identical, but the configuration files should not duplicate settings. When
+    duplicated key paths are found, the value from the source dictionary is used and the
+    function extends the returned ``conflicts`` set with the duplicated key path.
+
+    However an exception is where both entries are lists - for example, resulting from a
+    TOML array of tables (https://toml.io/en/v1.0.0#array-of-tables). In this case, it
+    is reasonable to append the source values to the destination values. The motivating
+    example here are `[[core.data.variable]]` entries, which can quite reasonably be
+    split across configuration sources. Note that no attempt is made to check that the
+    combined values are congruent - this is deferred to error handling when the
+    configuration data is loaded.
+
+    Args:
+        dest: A dictionary to extend
+        source: A dictionary of key value pairs to extend ``dest``
+        **kwargs: Additional arguments used in recursion
+
+    Returns:
+        A copy of dest, extended recursively with values from source, and a tuple of
+        duplicate key paths.
+    """
+
+    # Copy inputs to avoid mangling inputs
+    dest = deepcopy(dest)
+    source = deepcopy(source)
+
+    # Populate conflicts and path from defaults or kwargs
+    conflicts: set = kwargs.get("conflicts", set())
+    path: str | None = kwargs.get("path", None)
+
+    # Loop over the elements in the source dictionary
+    for src_key, src_val in source.items():
+        # Get the source key from the dest dictionary and then check for three possible
+        # outcomes of comparing dest_val and src_val
+        dest_val = dest.get(src_key)
+
+        if isinstance(dest_val, dict) and isinstance(src_val, dict):
+            # Both values for this key are dictionaries, so recurse, extending the path
+            next_path = src_key if path is None else f"{path}.{src_key}"
+            dest[src_key], conflicts = merge_configuration_dicts(
+                dest_val, src_val, conflicts=conflicts, path=next_path
+            )
+        elif isinstance(dest_val, list) and isinstance(src_val, list):
+            # Both values for this key are lists, so merge the lists
+            dest[src_key] = [*dest_val, *src_val]
+        elif dest_val is None:
+            # The key is not currently in dest, so add the key value pair
+            dest[src_key] = src_val
+        else:
+            # The key is in _both_, so override destval with srcval to keep processing,
+            # but extend the conflicts set with the path to the conflicting key.
+            dest[src_key] = src_val
+            conflict_path = src_key if path is None else f"{path}.{src_key}"
+            conflicts.add(conflict_path)
+
+            # NOTE: Could extend here to check for dest_val == src_val and then ignore
+            #       duplicate matching definitions, but cleaner to just forbid overlap.
+
+    return dest, conflicts
+
+
+def compile_configuration_data(data: list[dict]) -> tuple[dict, set[str]]:
+    """Compile a combined configuration multiple configuration dictionaries.
+
+    This method sequentially merges configuration dictionaries, such as those loaded
+    from multiple individual configuration files, into a single configuration
+    dictionary. It returns the merged dictionary and a set of keys that have duplicated
+    definitions in the input files.
+    """
+
+    # Handle empty lists
+    if len(data) == 0:
+        LOGGER.warning("No config files set")
+        return {}, set()
+
+    # Just return the contents for a singleton list
+    if len(data) == 1:
+        return data[0], set()
+
+    # Otherwise, merge other dicts into first
+    compiled = data[0]
+
+    for src in data[1:]:
+        compiled, conflicts = merge_configuration_dicts(compiled, src)
+
+    return compiled, conflicts
+
+
+def _resolve_config_paths(config_dir: Path, config_dict: dict[str, Any]) -> None:
+    """Resolve paths in a configuration file.
+
+    Configuration files may contain keys providing file paths for data and other
+    settings: these paths may be absolute but also could be relative to the specific
+    configuration file. This becomes a problem when configurations are compiled across
+    multiple configuration files, possibly in different locations, so this function
+    searches the configuration dictionary loaded from a single file and updates
+    configured relative paths to their absolute paths.
+
+    At present, the configuration schema does not have an explicit mechanism to type a
+    configuration option as being a path, so we currently use the `_path` suffix to
+    indicate configuration options setting a path. So, this function recursively search
+    a configuration file payload for values stored under keys ending in `_path` and
+    resolves the paths.
+
+    Args:
+        config_dir: A folder containing a configuration file.
+        config_dict: A dictionary of contents of the configuration file, which may
+            contain file paths to resolve.
+
+    Raises:
+        ValueError: if a key ending in ``_path`` has a non-string value.
+    """
+
+    if not config_dir.is_absolute():
+        config_dir = config_dir.absolute()
+
+    for key, item in config_dict.items():
+        if isinstance(item, dict):
+            _resolve_config_paths(config_dir=config_dir, config_dict=item)
+        elif isinstance(item, list):
+            for list_entry in item:
+                if isinstance(list_entry, dict):
+                    _resolve_config_paths(config_dir=config_dir, config_dict=list_entry)
+        elif key.endswith("_path"):
+            if not isinstance(item, str):
+                raise ValueError(
+                    f"The value for config key '{key}' is not a string: {item}"
+                )
+            file_path = Path(item)
+            if not file_path.is_absolute():
+                # The resolve method is used here because it is the only method to
+                # resolve ../ entries from relative file paths and then the path is made
+                # explicitly absolute
+                file_resolved = (config_dir / file_path).resolve().absolute()
+
+                config_dict[key] = str(file_resolved)
+
+
+class ConfigurationLoader:
+    """Configuration loading.
+
+    The ``Config`` class is used to generate a validated configuration for a Virtual
+    Ecosystem simulation. The ``cfg_paths`` attribute is used to provide paths to TOML
+    configuration files or directories containing sets of files to be used. The provided
+    paths are then run through the follow steps to resolve and load the configuration
+    data.
+
+    * The :meth:`~virtual_ecosystem.core.config.Config.collect_config_paths` method is
+      used to collect the actual TOML files to be used to build the configuration from
+      the provided paths.
+
+    * The :meth:`~virtual_ecosystem.core.config.Config.load_config_toml` method is then
+      used to load the parsed contents of each resolved file into the
+      :attr:`~virtual_ecosystem.core.config.Config.toml_contents` attribute.
+
+    Alternatively, configuration data may be passed as a string or list of strings using
+    the ``cfg_strings`` argument. These strings must contain TOML formatted data, which
+    is parsed and added to the
+    :attr:`~virtual_ecosystem.core.config.Config.toml_contents` attribute.
+
+    Whichever approach is used, the next two steps are then applied to the provided TOML
+    data:
+
+    * The :meth:`~virtual_ecosystem.core.config.Config.build_config` method is used to
+      merge the loaded configuration across files and check that configuration settings
+      are uniquely defined.
+
+    * The :meth:`~virtual_ecosystem.core.config.Config.validate_config` method
+      validates the compiled configuration against the appropriate configuration schema
+      for the :mod:`~virtual_ecosystem.core` module and any models included in the
+      configuration. This validation will also fill in any missing configuration
+      settings with defined defaults.
+
+    By default, creating a ``Config`` instance automatically runs these steps across the
+    provided ``cfg_paths``, but the ``auto`` argument can be used to turn off automatic
+    validation.
+
+    The :meth:`~virtual_ecosystem.core.config.Config.export_config` method can be used
+    to export the compiled and validated configuration as a single TOML file.
+
+    If the core.data_output_options.save_merged_config option is set to true a merged
+    config file will be automatically generated, unless ``auto`` is set to false.
+
+    Args:
+        cfg_paths: A string, Path or list of strings or Paths giving configuration
+            file or directory paths.
+        cfg_strings: A string or list of strings containing TOML formatted configuration
+            data.
+        override_params: Extra parameters provided by the user.
+        auto: A boolean flag setting whether the configuration data is automatically
+            loaded and validated
+    """
+
+    def __init__(
+        self,
+        cfg_paths: str | Path | Sequence[str | Path] = [],
+        cfg_strings: str | list[str] = [],
+    ) -> None:
+        # Define attributes
+        self.cfg_paths: list[Path] = []
+        """The configuration file paths, normalised from the cfg_paths argument."""
+        self.toml_files: list[str | Path] = []
+        """A list of TOML file paths resolved from the initial config paths."""
+        self.cfg_strings: list[str] = []
+        """A list of strings containing TOML content, provided by the ``cfg_strings``
+        argument."""
+        self.toml_contents: dict[str | Path, dict] = {}
+        """A dictionary of the parsed TOML contents of config files or strings, keyed by
+        file path or string index."""
+        self.merge_conflicts: list = []
+        """A list of configuration keys duplicated across configuration files."""
+        self.config_errors: list[tuple[str, Any]] = []
+        """Configuration errors, as a list of tuples of key path and error details."""
+        self.from_cfg_strings: bool = False
+        """A boolean flag indicating whether paths or strings were used to create the
+        instance."""
+        self.model_classes: dict[str, Any] = {}  # FIXME: -> dict[str, Type[BaseModel]]
+        """A dictionary of the model classes specified in the configuration, keyed by
+        model name."""
+
+        # Prohibit using neither paths and string or both paths and strings. Note that
+        # these trap empty lists, so you have to provide _something_.
+        if not (cfg_paths or cfg_strings):
+            to_raise = ValueError("Provide cfg_paths or cfg_strings.")
+            LOGGER.critical(to_raise)
+            raise to_raise
+
+        if cfg_paths and cfg_strings:
+            to_raise = ValueError("Do not use both cfg_paths and cfg_strings.")
+            LOGGER.critical(to_raise)
+            raise to_raise
+
+        # Standardise inputs and set from_cfg_strings
+        if cfg_strings:
+            # Standardise to a list of strings
+            self.cfg_strings = (
+                [cfg_strings] if isinstance(cfg_strings, str) else cfg_strings
+            )
+            self.from_cfg_strings = True
+
+        if cfg_paths:
+            # Standardise cfg_paths to list of Paths
+            self.cfg_paths = (
+                [Path(cfg_paths)]
+                if isinstance(cfg_paths, str | Path)
+                else [Path(p) for p in cfg_paths]
+            )
+
+    def load_configuration_data(self):
+        """Loading configuration data.
+
+        This method loads configuration data from the sources set when the class
+        instance was created. The method then attempts to compile the sources into a
+        single compiled dictionary of configuration data, ready for validation and
+        conversion to a configuration model.
+        """
+        if self.from_cfg_strings:
+            # Load the TOML content
+            self.load_config_toml_string()
+        else:
+            # Load the TOML content from resolved paths and resolve file paths
+            # within configuration files.
+            self.collect_config_paths()
+            self.load_config_toml()
+            self.resolve_config_file_paths()
+
+        data, conflicts = compile_configuration_data(list(self.toml_contents.values()))
+
+        # Report on duplicated settings, sorting the conflicts to give stable ordering
+        # in log reports and errors.
+        if conflicts:
+            to_raise = ConfigurationError(
+                f"Duplicated entries in config files: {', '.join(sorted(conflicts))}",
+            )
+            LOGGER.critical(to_raise)
+            raise to_raise
+
+        self.data = data
+        self.conflicts = conflicts
+
+        LOGGER.info("Configuration data compiled.")
+
+    def collect_config_paths(self) -> None:
+        """Collect TOML config files from provided paths.
+
+        The :class:`~virtual_ecosystem.core.config.Config` class is initialised with a
+        list of paths to either individual TOML config files or directories containing
+        possibly multiple config files. This method examines that list to collect all
+        the individual TOML config files in the provided locations and then populates
+        the :attr:`~virtual_ecosystem.core.config.Config.toml_files` attribute.
+
+        Raises:
+            ConfigurationError: this is raised if any of the paths: do not exist, are
+                directories that do not contain TOML files, are not TOML files or if the
+                resolved files contain duplicate entries.
+        """
+
+        all_valid = True
+
+        # Validate the paths
+        for path in self.cfg_paths:
+            if not path.exists():
+                all_valid = False
+                LOGGER.error(f"Config file path does not exist: {path}")
+            elif path.is_dir():
+                toml_in_dir = list(path.glob("*.toml"))
+                if toml_in_dir:
+                    self.toml_files.extend(toml_in_dir)
+                else:
+                    all_valid = False
+                    LOGGER.error(
+                        f"Config directory path contains no TOML files: {path}"
+                    )
+            elif path.is_file() and path.suffix != ".toml":
+                all_valid = False
+                LOGGER.error(f"Config file path with non-TOML suffix: {path}")
+            else:
+                self.toml_files.append(path)
+
+        # Check that no files are resolved twice
+        dupl_files = {
+            str(md) for md in self.toml_files if self.toml_files.count(md) > 1
+        }
+        if dupl_files:
+            all_valid = False
+            LOGGER.error(f"Repeated files in config paths: {','.join(dupl_files)}")
+
+        # Raise if there are any path errors
+        if not all_valid:
+            to_raise = ConfigurationError("Config paths not all valid: check log.")
+            LOGGER.critical(to_raise)
+            raise to_raise
+
+        LOGGER.info(f"Config paths resolve to {len(self.toml_files)} files")
+
+    def load_config_toml(self) -> None:
+        """Load the contents of resolved configuration files.
+
+        This method populates the
+        :attr:`~virtual_ecosystem.core.config.Config.toml_contents` dictionary with the
+        contents of the configuration files set in
+        :attr:`~virtual_ecosystem.core.config.Config.toml_files`. That attribute is
+        normally populated by providing a set of paths to
+        :class:`~virtual_ecosystem.core.config.Config` and running the
+        :meth:`~virtual_ecosystem.core.config.Config.collect_config_paths` method, but
+        it can also be set directly.
+
+        Raises:
+            ConfigurationError: Invalid TOML content in config files.
+        """
+
+        failed_inputs = False
+
+        # Load the contents into the instance
+        for this_file in self.toml_files:
+            try:
+                with open(this_file, "rb") as file_io:
+                    self.toml_contents[this_file] = tomllib.load(file_io)
+            except tomllib.TOMLDecodeError as err:
+                failed_inputs = True
+                LOGGER.error(f"Config TOML parsing error in {this_file}: {err!s}")
+            else:
+                LOGGER.info(f"Config TOML loaded from {this_file}")
+
+        if failed_inputs:
+            to_raise = ConfigurationError("Errors parsing config files: check log")
+            LOGGER.critical(to_raise)
+            raise to_raise
+
+    def load_config_toml_string(self) -> None:
+        """Load the contents of a config provided as a string.
+
+        This method populates the
+        :attr:`~virtual_ecosystem.core.config.Config.toml_contents` dictionary with the
+        contents of a provided TOML formatted string.
+
+        Raises:
+            ConfigurationError: Invalid TOML string.
+        """
+
+        for index, cfg_string in enumerate(self.cfg_strings):
+            # Load the contents into the instance
+            try:
+                self.toml_contents[f"cfg_string_{index}"] = tomllib.loads(cfg_string)
+            except tomllib.TOMLDecodeError as err:
+                to_raise = ConfigurationError(
+                    f"TOML parsing error in cfg_strings: {err!s}"
+                )
+                LOGGER.critical(to_raise)
+                raise to_raise
+
+        LOGGER.info("Config TOML loaded from config strings")
+
+    def resolve_config_file_paths(self) -> None:
+        """Resolve the locations of configured file paths.
+
+        Configuration files can contain paths to other resources, such as the paths to
+        files containing input data variables. These paths can be absolute, but may also
+        be relative to the location of the configuration file itself. This method is
+        used to resolve the location of files to the common root of the provided set of
+        configuration files, typically the path where a simulation is started.
+        """
+
+        # Safeguard against running this when the toml_contents is from a cfg_string
+        if self.from_cfg_strings:
+            # TODO - how to resolve relative paths in cfg_string - niche use case
+            LOGGER.warning("Config file paths not resolved with cfg_string")
+            return
+
+        for config_file, contents in self.toml_contents.items():
+            if isinstance(config_file, Path):
+                try:
+                    _resolve_config_paths(
+                        config_dir=config_file.parent, config_dict=contents
+                    )
+                except ValueError as excep:
+                    LOGGER.critical(excep)
+                    raise excep
+
+
+def build_configuration_model(requested_modules: list[str]) -> type[ConfigRoot]:
+    """Build a schema to validate the model configuration.
+
+    This method identifies the modules to be configured from the top-level
+    configuration keys, setting the requested modules to be used in the configured
+    simulation. The schemas for the requested modules are then loaded and combined
+    using the :meth:`~virtual_ecosystem.core.schema.merge_schemas` function to
+    generate a single validation schema for model configuration.
+    """
+
+    # The core module is mandatory
+    if "core" not in requested_modules:
+        requested_modules = ["core", *requested_modules]
+
+    # Register the requested modules, which handles unknown module names. This step is
+    # required to populate the module registry with the details of the requested modules
+    for module in requested_modules:
+        module = (
+            "virtual_ecosystem.core"
+            if module == "core"
+            else f"virtual_ecosystem.models.{module}"
+        )
+        register_module(module)
+
+    # Create a list of submodels in the configuration.
+    submodels = (
+        (module, MODULE_REGISTRY[module].config) for module in requested_modules
+    )
+
+    # Use pydantic create_model to dynamically generate a model with a field for each
+    # requested module. Mypy does not like this, but it seems to be used as intended:
+    # https://docs.pydantic.dev/latest/concepts/models/#dynamic-model-creation
+    combined_model = create_model(
+        "Configuration", **{fname: (cname, cname()) for fname, cname in submodels}
+    )  # type: ignore[call-overload]
+
+    return combined_model
+
+
+def get_configuration(
+    data: dict[str, Any] = {},  # override_params: dict[str, Any] = {}
+) -> ConfigRoot:
+    """Generate a configuration model from configuration data.
+
+    This method takes a dictionary of configuration data - typically loaded and compiled
+    using the :class:`ConfigurationLoader` class - and tries to build a validated
+    configuration model.
+
+    The first step is to take the root sections in the configuration data - indicating
+    the various science models requested for a simulation - and uses those to build a
+    composite configuration validator.
+
+    The provided data is then passed into the validator. If validation is successful
+    then a validated configuration object is returned, otherwise the specific validation
+    errors are written to the log and the function raises a :class`ConfigurationError`
+
+    Args:
+        data: A dictionary of unvalidated configuration data.
+    """
+
+    # Build the configuration model from the compiled configuration
+    try:
+        ConfigurationModel = build_configuration_model(
+            requested_modules=list(data.keys())
+        )
+    except (ModuleNotFoundError, RuntimeError) as err:
+        LOGGER.critical(str(err))
+        raise
+
+    LOGGER.info("Configuration model built.")
+
+    try:
+        configuration = ConfigurationModel().model_validate(data)
+    except ValidationError as validation_errors:
+        for error in validation_errors.errors():
+            LOGGER.error(
+                f"{'.'.join(str(x) for x in error['loc'])} = {error['input']}: "
+                f"{error['msg']}"
+            )
+        LOGGER.critical("Configuration validation failed. See errors above.")
+        raise ConfigurationError("Validation errors in configuration data - check log.")
+
+    # self.override_config(override_params)
+    LOGGER.info("Configuration validated.")
+
+    return configuration
+
+
+# def override_config(self, override_params: dict[str, Any]) -> None:
+#     """Override any parameters desired.
+
+#     Args:
+#         override_params: Extra parameter settings
+#     """
+#     updated, conflicts = config_merge(self, override_params, conflicts=tuple())
+
+#     # Conflicts are not errors as we want users to be able to override parameters
+#     if conflicts:
+#         LOGGER.info(
+#             "The following parameter values were overridden: " + ", ".join(conflicts)
+#         )
+
+#     self.update(updated)

From 18025ed815b7b2245f9067e9daf00c4c43ba93a7 Mon Sep 17 00:00:00 2001
From: David Orme <davidorme@users.noreply.github.com>
Date: Thu, 16 Oct 2025 16:41:46 +0100
Subject: [PATCH 2/7] Relocated pydantic tester and removed spammy file output

---
 .../test_configuration.py}                    |  6 ++-
 tests/core/test_configuration_builder.py      | 38 +++++++++++++++----
 virtual_ecosystem/core/config_builder.py      | 17 +++++++--
 virtual_ecosystem/main.py                     | 10 +++++
 4 files changed, 58 insertions(+), 13 deletions(-)
 rename tests/{test_pydantic_config.py => core/test_configuration.py} (95%)

diff --git a/tests/test_pydantic_config.py b/tests/core/test_configuration.py
similarity index 95%
rename from tests/test_pydantic_config.py
rename to tests/core/test_configuration.py
index 212d0cc82..77672eeeb 100644
--- a/tests/test_pydantic_config.py
+++ b/tests/core/test_configuration.py
@@ -37,20 +37,22 @@ def test_pydantic(tmp_path):
     )
 
     # Dump config to file
-    with open("config.toml", "wb") as tomlfile:
+    config_path = tmp_path / "config.toml"
+    with open(config_path, "wb") as tomlfile:
         tomli_w.dump(json.loads(combined().model_dump_json()), tomlfile)
 
     # Reload it - substituting path placeholders for a temporary real file.
     tmp_file = tmp_path / "temp_file.txt"
     tmp_file.touch()
 
-    with open("config.toml") as tomlfile:
+    with open(config_path) as tomlfile:
         content = tomlfile.read()
         content = content.replace('"<PLACEHOLDER>"', f"'{tmp_file!s}'")
         content_parsed = tomllib.loads(content)
         config = combined().model_validate_json(json.dumps(content_parsed))
 
     tmp_file.unlink()
+    config_path.unlink()
 
     # Very basic check for submodels
     for submodel, _ in submodel_details:
diff --git a/tests/core/test_configuration_builder.py b/tests/core/test_configuration_builder.py
index d2a9e5b76..63b76a5e6 100644
--- a/tests/core/test_configuration_builder.py
+++ b/tests/core/test_configuration_builder.py
@@ -445,7 +445,7 @@ def test_ConfigurationLoader_load_config_toml(
         ),
     ],
 )
-def test_Config_load_config_toml_string(
+def test_ConfigurationLoader_load_config_toml_string(
     caplog, shared_datadir, cfg_paths, expected_exception, expected_log_entries
 ):
     """Check errors for incorrectly formatted cfg_strings."""
@@ -574,17 +574,31 @@ def test_Config_load_config_toml_string(
         ),
     ],
 )
+@pytest.mark.parametrize("override", [True, False])
 def test_ConfigurationLoader_load_configuration_data(
-    tmpdir, caplog, content, expected_exception, expected_path_log, expected_string_log
+    tmpdir,
+    caplog,
+    content,
+    expected_exception,
+    expected_path_log,
+    expected_string_log,
+    override,
 ):
     """This test checks the load_configuration_data method.
 
     This method wraps up the stages of collating the configuration files, loading their
     data and resolving relative paths and the final compilation of the resulting data
-    into a single configuration data dictionary.
+    into a single configuration data dictionary. It also checks that override_params is
+    handled correctly.
     """
     from virtual_ecosystem.core.config_builder import ConfigurationLoader
 
+    # Set an override value or leave empty
+    if override:
+        override_params = {"core": {"grid": {"cell_ny": 20}}}
+    else:
+        override_params = None
+
     # ----------------------------
     # Test the cfg_paths option
     # ----------------------------
@@ -597,7 +611,9 @@ def test_ConfigurationLoader_load_configuration_data(
             tomli_w.dump(filedata, outfile)
 
     # Initialise the Config instance
-    config_builder = ConfigurationLoader(cfg_paths=cfg_paths)
+    config_builder = ConfigurationLoader(
+        cfg_paths=cfg_paths, override_params=override_params
+    )
     caplog.clear()
 
     # Check that load_configuration_data behaves as expected
@@ -607,7 +623,10 @@ def test_ConfigurationLoader_load_configuration_data(
         # Test the values that were passed into valid configs.
 
         assert config_builder.data["core"]["grid"]["cell_nx"] == 10
-        assert config_builder.data["core"]["grid"]["cell_nx"] == 10
+        if override:
+            assert config_builder.data["core"]["grid"]["cell_ny"] == 20
+        else:
+            assert config_builder.data["core"]["grid"]["cell_ny"] == 10
 
     log_check(caplog, expected_path_log)
 
@@ -621,7 +640,9 @@ def test_ConfigurationLoader_load_configuration_data(
     cfg_strings = [tomli_w.dumps(v) for k, v in content.items()]
 
     # Initialise the Config instance
-    config_builder = ConfigurationLoader(cfg_strings=cfg_strings)
+    config_builder = ConfigurationLoader(
+        cfg_strings=cfg_strings, override_params=override_params
+    )
     caplog.clear()
 
     # Check that load_configuration_data behaves as expected
@@ -631,7 +652,10 @@ def test_ConfigurationLoader_load_configuration_data(
         # Test the values that were passed into valid configs.
 
         assert config_builder.data["core"]["grid"]["cell_nx"] == 10
-        assert config_builder.data["core"]["grid"]["cell_nx"] == 10
+        if override:
+            assert config_builder.data["core"]["grid"]["cell_ny"] == 20
+        else:
+            assert config_builder.data["core"]["grid"]["cell_ny"] == 10
 
     log_check(caplog, expected_string_log)
 
diff --git a/virtual_ecosystem/core/config_builder.py b/virtual_ecosystem/core/config_builder.py
index e4afd8727..b56bba3ad 100644
--- a/virtual_ecosystem/core/config_builder.py
+++ b/virtual_ecosystem/core/config_builder.py
@@ -59,7 +59,9 @@ def merge_configuration_dicts(
     dest = deepcopy(dest)
     source = deepcopy(source)
 
-    # Populate conflicts and path from defaults or kwargs
+    # Populate conflicts and path from defaults or kwargs. These are not provided as
+    # explicit arguments, because they would never really be used outside of recursion
+    # and so are not really part of the API.
     conflicts: set = kwargs.get("conflicts", set())
     path: str | None = kwargs.get("path", None)
 
@@ -222,14 +224,13 @@ class ConfigurationLoader:
         cfg_strings: A string or list of strings containing TOML formatted configuration
             data.
         override_params: Extra parameters provided by the user.
-        auto: A boolean flag setting whether the configuration data is automatically
-            loaded and validated
     """
 
     def __init__(
         self,
         cfg_paths: str | Path | Sequence[str | Path] = [],
         cfg_strings: str | list[str] = [],
+        override_params: dict[str, Any] | None = None,
     ) -> None:
         # Define attributes
         self.cfg_paths: list[Path] = []
@@ -252,6 +253,9 @@ def __init__(
         self.model_classes: dict[str, Any] = {}  # FIXME: -> dict[str, Type[BaseModel]]
         """A dictionary of the model classes specified in the configuration, keyed by
         model name."""
+        self.override_params: dict[str, Any] | None = override_params
+        """An optional set of parameters that can be used to override configuration data
+        loaded from file."""
 
         # Prohibit using neither paths and string or both paths and strings. Note that
         # these trap empty lists, so you have to provide _something_.
@@ -310,8 +314,13 @@ def load_configuration_data(self):
             LOGGER.critical(to_raise)
             raise to_raise
 
+        # Override any existing parameters. Conflicts are allowed here - although this
+        # mechanism can also be used to set configuration options _not_ in the other
+        # sources - so do nothing about conflicting settings
+        if self.override_params is not None:
+            data, _ = merge_configuration_dicts(data, self.override_params)
+
         self.data = data
-        self.conflicts = conflicts
 
         LOGGER.info("Configuration data compiled.")
 
diff --git a/virtual_ecosystem/main.py b/virtual_ecosystem/main.py
index a142941c7..3b303cd4f 100644
--- a/virtual_ecosystem/main.py
+++ b/virtual_ecosystem/main.py
@@ -15,6 +15,7 @@
 
 from virtual_ecosystem.core import variables
 from virtual_ecosystem.core.config import Config
+from virtual_ecosystem.core.config_builder import ConfigurationLoader, get_configuration
 from virtual_ecosystem.core.core_components import CoreComponents
 from virtual_ecosystem.core.data import Data, merge_continuous_data_files
 from virtual_ecosystem.core.exceptions import ConfigurationError, InitialisationError
@@ -117,6 +118,15 @@ def ve_run(
         cfg_paths=cfg_paths, cfg_strings=cfg_strings, override_params=override_params
     )
 
+    # NEW configuration system
+    config_loader = ConfigurationLoader(
+        cfg_paths=cfg_paths,
+        cfg_strings=cfg_strings,
+        override_params=override_params,
+    )
+    config_loader.load_configuration_data()
+    new_config = get_configuration(config_loader.data)  # noqa: F841
+
     # Save the merged config if requested
     data_opt = config["core"]["data_output_options"]
     if data_opt["save_merged_config"]:

From c091a377e2f1f7f044fbcd97225f06766579ca1d Mon Sep 17 00:00:00 2001
From: David Orme <davidorme@users.noreply.github.com>
Date: Sat, 18 Oct 2025 00:07:55 +0100
Subject: [PATCH 3/7] Revised usage pattern

---
 tests/core/test_configuration_builder.py | 93 ++++++++++++++++++++----
 virtual_ecosystem/core/config_builder.py | 80 +++++++++++---------
 virtual_ecosystem/main.py                |  8 +-
 3 files changed, 125 insertions(+), 56 deletions(-)

diff --git a/tests/core/test_configuration_builder.py b/tests/core/test_configuration_builder.py
index 63b76a5e6..495fdae94 100644
--- a/tests/core/test_configuration_builder.py
+++ b/tests/core/test_configuration_builder.py
@@ -310,7 +310,9 @@ def test_ConfigurationLoader_init(
 
     # Just check normalisation and error conditions, no processing
     with raises as err:
-        cfg = ConfigurationLoader(cfg_paths=cfg_paths, cfg_strings=cfg_strings)
+        cfg = ConfigurationLoader(
+            cfg_paths=cfg_paths, cfg_strings=cfg_strings, autoload=False
+        )
 
         if not isinstance(raises, does_not_raise):
             assert str(err) == err_msg
@@ -380,11 +382,11 @@ def test_ConfigurationLoader_collect_config_paths(
     caplog.clear()
 
     # Init the class
-    cfg = ConfigurationLoader([shared_datadir / p for p in cfg_paths])
+    cfg = ConfigurationLoader([shared_datadir / p for p in cfg_paths], autoload=False)
 
     # Check that file resolution runs as expected
     with expected_exception:
-        cfg.collect_config_paths()
+        cfg._collect_config_paths()
 
     log_check(caplog, expected_log_entries)
 
@@ -417,13 +419,13 @@ def test_ConfigurationLoader_load_config_toml(
 
     # Initialise the ConfigurationLoader instance and manually resolve the config paths
     # to toml files
-    cfg = ConfigurationLoader([shared_datadir / p for p in cfg_paths])
-    cfg.collect_config_paths()
+    cfg = ConfigurationLoader([shared_datadir / p for p in cfg_paths], autoload=False)
+    cfg._collect_config_paths()
     caplog.clear()
 
     # Check that load_config_toml behaves as expected
     with expected_exception:
-        cfg.load_config_toml()
+        cfg._load_config_toml()
 
     log_check(caplog, expected_log_entries)
 
@@ -455,12 +457,12 @@ def test_ConfigurationLoader_load_config_toml_string(
     with open(Path(shared_datadir) / cfg_paths) as cfg_file:
         cfg_strings = cfg_file.read()
 
-    cfg = ConfigurationLoader(cfg_strings=cfg_strings)
+    cfg = ConfigurationLoader(cfg_strings=cfg_strings, autoload=False)
     caplog.clear()
 
     # Check that load_config_toml behaves as expected
     with expected_exception:
-        cfg.load_config_toml_string()
+        cfg._load_config_toml_string()
 
     log_check(caplog, expected_log_entries)
 
@@ -612,13 +614,14 @@ def test_ConfigurationLoader_load_configuration_data(
 
     # Initialise the Config instance
     config_builder = ConfigurationLoader(
-        cfg_paths=cfg_paths, override_params=override_params
+        cfg_paths=cfg_paths, override_params=override_params, autoload=False
     )
     caplog.clear()
 
     # Check that load_configuration_data behaves as expected
     with expected_exception:
-        config_builder.load_configuration_data()
+        config_builder._load_data()
+        config_builder._compile_data()
 
         # Test the values that were passed into valid configs.
 
@@ -641,13 +644,14 @@ def test_ConfigurationLoader_load_configuration_data(
 
     # Initialise the Config instance
     config_builder = ConfigurationLoader(
-        cfg_strings=cfg_strings, override_params=override_params
+        cfg_strings=cfg_strings, override_params=override_params, autoload=False
     )
     caplog.clear()
 
     # Check that load_configuration_data behaves as expected
     with expected_exception:
-        config_builder.load_configuration_data()
+        config_builder._load_data()
+        config_builder._compile_data()
 
         # Test the values that were passed into valid configs.
 
@@ -734,13 +738,72 @@ def test_build_configuration_model(requested, outcome, expected):
         ),
     ),
 )
-def test_get_configuration(caplog, data, outcome, expected_attr_values, expected_log):
+def test_generate_configuration(
+    caplog, data, outcome, expected_attr_values, expected_log
+):
+    """Tests the generate_configuration function."""
+    from virtual_ecosystem.core.config_builder import generate_configuration
+
+    with outcome:
+        # Run the function
+        config = generate_configuration(data=data)
+
+        # Do we get a model
+        assert isinstance(config, BaseModel)
+
+        # Are the attributes as expected
+        for attr_path, expected_value in expected_attr_values:
+            assert expected_value == attrgetter(attr_path)(config)
+
+        # Does the log contain the expected outputs
+        for message in expected_log:
+            assert record_found_in_log(caplog=caplog, find=message)
+
+
+@pytest.mark.parametrize(
+    argnames="cfg_strings, outcome, expected_attr_values, expected_log",
+    argvalues=(
+        pytest.param(
+            "[core]\n[core.grid]\ncell_nx =  10\n",
+            does_not_raise(),
+            (("core.grid.cell_nx", 10),),
+            ((INFO, "Configuration validated."),),
+            id="core_only",
+        ),
+        pytest.param(
+            [
+                "[core]\n[core.grid]\ncell_nx =  10\n",
+                "[plants]\n[plants.constants]\ndsr_to_ppfd = 0.2",
+            ],
+            does_not_raise(),
+            (("core.grid.cell_nx", 10), ("plants.constants.dsr_to_ppfd", 0.2)),
+            ((INFO, "Configuration validated."),),
+            id="core_plants",
+        ),
+        pytest.param(
+            [
+                "[core]\n[core.grid]\ncell_nx =  10\n",
+                "[plants]\n[plants.constants]\ndsr_to_ppfd = a",
+            ],
+            pytest.raises(ConfigurationError),
+            None,
+            (
+                (ERROR, "plants.constants.dsr_to_ppfd = a"),
+                (CRITICAL, "Configuration validation failed. See errors above."),
+            ),
+            id="core_plants_bad_type",
+        ),
+    ),
+)
+def test_ConfigurationLoader_get_configuration(
+    caplog, cfg_strings, outcome, expected_attr_values, expected_log
+):
     """Tests the get_configuration function."""
-    from virtual_ecosystem.core.config_builder import get_configuration
+    from virtual_ecosystem.core.config_builder import ConfigurationLoader
 
     with outcome:
         # Run the function
-        config = get_configuration(data=data)
+        config = ConfigurationLoader(cfg_strings=cfg_strings).get_configuration()
 
         # Do we get a model
         assert isinstance(config, BaseModel)
diff --git a/virtual_ecosystem/core/config_builder.py b/virtual_ecosystem/core/config_builder.py
index ead0150fb..828e77505 100644
--- a/virtual_ecosystem/core/config_builder.py
+++ b/virtual_ecosystem/core/config_builder.py
@@ -1,13 +1,21 @@
 """The :mod:`~virtual_ecosystem.core.config_builder` provides tools to load a set of
 TOML formatted configuration dictionaries, either from files or from strings. String
 inputs are primarily intended for use in configuring models for testing, where it is
-more convenient to simply provide a string. The main class :class:`ConfigLoader` handles
-the loading of configuration data and compiling multiple sources into a single
-dictionary of configuration data.
+more convenient to simply provide a string.
 
-The :func:`get_configuration` function:
+The main class :class:`ConfigLoader` handles the loading of configuration data and
+compiling multiple sources into a single dictionary of configuration data. The public
+:meth:`ConfigLoader.get_configuration` method then passes the compiled data in the model
+instance through the :method:`generate_configuration` function to return the actual
+configuration object. Canonical use looks like this:
 
-* takes a compiled configuration document,
+.. code-block:: python
+
+    config_object = ConfigurationLoader(...).get_configuration()
+
+The :func:`generate_configuration` function:
+
+* takes a compiled dictionary of configuration settings,
 * assembles a pydantic validation model class using the configuration validators for
   each of the requested science modules, and
 * passes the data through the validator to return a validated configuration model for
@@ -231,6 +239,8 @@ class ConfigurationLoader:
         cfg_strings: A string or list of strings containing TOML formatted configuration
             data.
         override_params: Extra parameters provided by the user.
+        autoload: A boolean flag that can be used to turn off automatic data loading and
+            compilation.
     """
 
     def __init__(
@@ -238,6 +248,7 @@ def __init__(
         cfg_paths: str | Path | Sequence[str | Path] = [],
         cfg_strings: str | list[str] = [],
         override_params: dict[str, Any] | None = None,
+        autoload: bool = True,
     ) -> None:
         # Define attributes
         self.cfg_paths: list[Path] = []
@@ -292,23 +303,32 @@ def __init__(
                 else [Path(p) for p in cfg_paths]
             )
 
-    def load_configuration_data(self):
-        """Loading configuration data.
+        if autoload:
+            self._load_data()
+            self._compile_data()
+
+    def _load_data(self):
+        """Load configuration data.
 
         This method loads configuration data from the sources set when the class
-        instance was created. The method then attempts to compile the sources into a
-        single compiled dictionary of configuration data, ready for validation and
-        conversion to a configuration model.
+        instance was created.
         """
         if self.from_cfg_strings:
             # Load the TOML content
-            self.load_config_toml_string()
+            self._load_config_toml_string()
         else:
             # Load the TOML content from resolved paths and resolve file paths
             # within configuration files.
-            self.collect_config_paths()
-            self.load_config_toml()
-            self.resolve_config_file_paths()
+            self._collect_config_paths()
+            self._load_config_toml()
+            self._resolve_config_file_paths()
+
+    def _compile_data(self):
+        """Compile configuration data.
+
+        This method compiles loaded configuration data into a single data dictionary,
+        warning of conflicting or repeated settings across the sources.
+        """
 
         data, conflicts = compile_configuration_data(list(self.toml_contents.values()))
 
@@ -331,7 +351,7 @@ def load_configuration_data(self):
 
         LOGGER.info("Configuration data compiled.")
 
-    def collect_config_paths(self) -> None:
+    def _collect_config_paths(self) -> None:
         """Collect TOML config files from provided paths.
 
         The :class:`~virtual_ecosystem.core.config.Config` class is initialised with a
@@ -384,7 +404,7 @@ def collect_config_paths(self) -> None:
 
         LOGGER.info(f"Config paths resolve to {len(self.toml_files)} files")
 
-    def load_config_toml(self) -> None:
+    def _load_config_toml(self) -> None:
         """Load the contents of resolved configuration files.
 
         This method populates the
@@ -418,7 +438,7 @@ def load_config_toml(self) -> None:
             LOGGER.critical(to_raise)
             raise to_raise
 
-    def load_config_toml_string(self) -> None:
+    def _load_config_toml_string(self) -> None:
         """Load the contents of a config provided as a string.
 
         This method populates the
@@ -442,7 +462,7 @@ def load_config_toml_string(self) -> None:
 
         LOGGER.info("Config TOML loaded from config strings")
 
-    def resolve_config_file_paths(self) -> None:
+    def _resolve_config_file_paths(self) -> None:
         """Resolve the locations of configured file paths.
 
         Configuration files can contain paths to other resources, such as the paths to
@@ -468,6 +488,11 @@ def resolve_config_file_paths(self) -> None:
                     LOGGER.critical(excep)
                     raise excep
 
+    def get_configuration(self) -> Configuration:
+        """Get the configuration instance for the loaded configuration data."""
+
+        return generate_configuration(self.data)
+
 
 def build_configuration_model(requested_modules: list[str]) -> type[Configuration]:
     """Build a schema to validate the model configuration.
@@ -508,7 +533,7 @@ def build_configuration_model(requested_modules: list[str]) -> type[Configuratio
     return combined_model
 
 
-def get_configuration(data: dict[str, Any] = {}) -> Configuration:
+def generate_configuration(data: dict[str, Any] = {}) -> Configuration:
     """Generate a configuration model from configuration data.
 
     This method takes a dictionary of configuration data - typically loaded and compiled
@@ -553,20 +578,3 @@ def get_configuration(data: dict[str, Any] = {}) -> Configuration:
     LOGGER.info("Configuration validated.")
 
     return configuration
-
-
-# def override_config(self, override_params: dict[str, Any]) -> None:
-#     """Override any parameters desired.
-
-#     Args:
-#         override_params: Extra parameter settings
-#     """
-#     updated, conflicts = config_merge(self, override_params, conflicts=tuple())
-
-#     # Conflicts are not errors as we want users to be able to override parameters
-#     if conflicts:
-#         LOGGER.info(
-#             "The following parameter values were overridden: " + ", ".join(conflicts)
-#         )
-
-#     self.update(updated)
diff --git a/virtual_ecosystem/main.py b/virtual_ecosystem/main.py
index 3b303cd4f..aa8b58daa 100644
--- a/virtual_ecosystem/main.py
+++ b/virtual_ecosystem/main.py
@@ -15,7 +15,7 @@
 
 from virtual_ecosystem.core import variables
 from virtual_ecosystem.core.config import Config
-from virtual_ecosystem.core.config_builder import ConfigurationLoader, get_configuration
+from virtual_ecosystem.core.config_builder import ConfigurationLoader
 from virtual_ecosystem.core.core_components import CoreComponents
 from virtual_ecosystem.core.data import Data, merge_continuous_data_files
 from virtual_ecosystem.core.exceptions import ConfigurationError, InitialisationError
@@ -119,13 +119,11 @@ def ve_run(
     )
 
     # NEW configuration system
-    config_loader = ConfigurationLoader(
+    new_config = ConfigurationLoader(  # noqa: F841
         cfg_paths=cfg_paths,
         cfg_strings=cfg_strings,
         override_params=override_params,
-    )
-    config_loader.load_configuration_data()
-    new_config = get_configuration(config_loader.data)  # noqa: F841
+    ).get_configuration()
 
     # Save the merged config if requested
     data_opt = config["core"]["data_output_options"]

From b40cc26e2e2e8706e8eff363e502687d5c9b68cb Mon Sep 17 00:00:00 2001
From: David Orme <davidorme@users.noreply.github.com>
Date: Mon, 20 Oct 2025 12:06:06 +0100
Subject: [PATCH 4/7] Making changes for proposed option 4 in design Q in #1093

---
 tests/core/test_configuration.py              | 20 ++++-----
 .../test_modules/one_model/model_config.py    |  2 +-
 virtual_ecosystem/core/config_builder.py      | 14 +++---
 virtual_ecosystem/core/configuration.py       | 29 ++++++++++++-
 virtual_ecosystem/core/model_config.py        |  2 +-
 virtual_ecosystem/core/registry.py            | 43 ++++++++++++++++---
 .../models/abiotic/model_config.py            |  2 +-
 .../models/abiotic_simple/model_config.py     |  2 +-
 .../models/animal/model_config.py             |  2 +-
 .../models/hydrology/model_config.py          |  2 +-
 .../models/litter/model_config.py             |  2 +-
 .../models/plants/model_config.py             |  2 +-
 virtual_ecosystem/models/soil/model_config.py |  2 +-
 .../models/testing/model_config.py            |  2 +-
 14 files changed, 93 insertions(+), 33 deletions(-)

diff --git a/tests/core/test_configuration.py b/tests/core/test_configuration.py
index 354833997..646766b3b 100644
--- a/tests/core/test_configuration.py
+++ b/tests/core/test_configuration.py
@@ -14,20 +14,20 @@ def test_pydantic(tmp_path):
     """Builds a combined config model from all models and then dumps and reloads it."""
 
     modules = (
-        "virtual_ecosystem.core",
-        "virtual_ecosystem.models.abiotic",
-        "virtual_ecosystem.models.abiotic_simple",
-        "virtual_ecosystem.models.animal",
-        "virtual_ecosystem.models.hydrology",
-        "virtual_ecosystem.models.litter",
-        "virtual_ecosystem.models.plants",
-        "virtual_ecosystem.models.soil",
+        ("virtual_ecosystem.core", "CoreConfiguration"),
+        ("virtual_ecosystem.models.abiotic", "AbioticConfiguration"),
+        ("virtual_ecosystem.models.abiotic_simple", "AbioticSimpleConfiguration"),
+        ("virtual_ecosystem.models.animal", "AnimalConfiguration"),
+        ("virtual_ecosystem.models.hydrology", "HydrologyConfiguration"),
+        ("virtual_ecosystem.models.litter", "LitterConfiguration"),
+        ("virtual_ecosystem.models.plants", "PlantsConfiguration"),
+        ("virtual_ecosystem.models.soil", "SoilConfiguration"),
     )
 
     submodel_details = {}
-    for module_name in modules:
+    for module_name, config_name in modules:
         module = import_module(f"{module_name}.model_config")
-        config_class = getattr(module, "ModelConfiguration")
+        config_class = getattr(module, config_name)
         submodel_details[module_name.split(".")[-1]] = (config_class, config_class())
 
     # Combine
diff --git a/tests/core/test_modules/one_model/model_config.py b/tests/core/test_modules/one_model/model_config.py
index dfc77f06c..264887de1 100644
--- a/tests/core/test_modules/one_model/model_config.py
+++ b/tests/core/test_modules/one_model/model_config.py
@@ -3,7 +3,7 @@
 from virtual_ecosystem.core.configuration import ModelConfigurationRoot
 
 
-class ModelConfiguration(ModelConfigurationRoot):
+class OneModelConfiguration(ModelConfigurationRoot):
     """Root configuration class for the testing model."""
 
     numeric_value: float = 1.0
diff --git a/virtual_ecosystem/core/config_builder.py b/virtual_ecosystem/core/config_builder.py
index 828e77505..b54943d28 100644
--- a/virtual_ecosystem/core/config_builder.py
+++ b/virtual_ecosystem/core/config_builder.py
@@ -31,7 +31,7 @@
 
 from pydantic import ValidationError, create_model
 
-from virtual_ecosystem.core.configuration import Configuration
+from virtual_ecosystem.core.configuration import CompiledConfiguration
 from virtual_ecosystem.core.exceptions import ConfigurationError
 from virtual_ecosystem.core.logger import LOGGER
 from virtual_ecosystem.core.registry import MODULE_REGISTRY, register_module
@@ -488,13 +488,15 @@ def _resolve_config_file_paths(self) -> None:
                     LOGGER.critical(excep)
                     raise excep
 
-    def get_configuration(self) -> Configuration:
+    def get_configuration(self) -> CompiledConfiguration:
         """Get the configuration instance for the loaded configuration data."""
 
         return generate_configuration(self.data)
 
 
-def build_configuration_model(requested_modules: list[str]) -> type[Configuration]:
+def build_configuration_model(
+    requested_modules: list[str],
+) -> type[CompiledConfiguration]:
     """Build a schema to validate the model configuration.
 
     This method identifies the modules to be configured from the top-level
@@ -527,13 +529,15 @@ def build_configuration_model(requested_modules: list[str]) -> type[Configuratio
     # requested module. Mypy does not like this, but it seems to be used as intended:
     # https://docs.pydantic.dev/latest/concepts/models/#dynamic-model-creation
     combined_model = create_model(
-        "Configuration", **{fname: (cname, cname()) for fname, cname in submodels}
+        "CompiledConfiguration",
+        __base__=CompiledConfiguration,
+        **{fname: (cname, cname()) for fname, cname in submodels},
     )  # type: ignore[call-overload]
 
     return combined_model
 
 
-def generate_configuration(data: dict[str, Any] = {}) -> Configuration:
+def generate_configuration(data: dict[str, Any] = {}) -> CompiledConfiguration:
     """Generate a configuration model from configuration data.
 
     This method takes a dictionary of configuration data - typically loaded and compiled
diff --git a/virtual_ecosystem/core/configuration.py b/virtual_ecosystem/core/configuration.py
index 0a57bb3ac..da02d0c48 100644
--- a/virtual_ecosystem/core/configuration.py
+++ b/virtual_ecosystem/core/configuration.py
@@ -41,8 +41,35 @@ class Configuration(BaseModel):
     model_config = ConfigDict(use_attribute_docstrings=True)
 
 
+class CompiledConfiguration(Configuration):
+    """Compiled configuration class for Virtual Ecosystem models.
+
+    This class is used as the base for dynamically compiled complete model returned by
+    the ``ConfigurationLoader(...).get_configuration()`` method. It provides a shared
+    method to extract specific model configurations by name. This is needed because the
+    dynamic creation means that model fields are not explicitly declared, so `mypy` gets
+    does not handle ``configuration.plants``, but we can use
+    `configuration.get_subconfiguration("plants")` instead.
+    """
+
+    def get_subconfiguration(self, name: str) -> Configuration:
+        """Get a named subconfiguration object from a compiled configuration.
+
+        This method can be used to extract model configurations or the core
+        configuration from a compiled configuration instance.
+
+        Args:
+            name: The required subconfiguration.
+        """
+
+        try:
+            return getattr(self, name)
+        except AttributeError:
+            raise AttributeError(f"Model configuration for {name} not loaded")
+
+
 class ModelConfigurationRoot(Configuration):
-    """Root configuration class for models.
+    """Root configuration class for individual Virtual Ecosystem models.
 
     This model provides a common Pydantic base class that must be used to define
     the root configuration class of a Virtual Ecosystem model. Each model must define an
diff --git a/virtual_ecosystem/core/model_config.py b/virtual_ecosystem/core/model_config.py
index f7eba6130..b16f4e77f 100644
--- a/virtual_ecosystem/core/model_config.py
+++ b/virtual_ecosystem/core/model_config.py
@@ -214,7 +214,7 @@ class Variables(Configuration):
     variable: tuple[DataSource, ...] = (DataSource(), DataSource())
 
 
-class ModelConfiguration(Configuration):
+class CoreConfiguration(Configuration):
     """The core model configuration."""
 
     constants: CoreConstants = CoreConstants()
diff --git a/virtual_ecosystem/core/registry.py b/virtual_ecosystem/core/registry.py
index 72ff2c1e6..0c69dab47 100644
--- a/virtual_ecosystem/core/registry.py
+++ b/virtual_ecosystem/core/registry.py
@@ -191,7 +191,9 @@ def register_module(module_name: str) -> None:
             )
 
     # Find and register the model configuration
-    model_config_class = get_model_configuration_class(module_name=module_name)
+    model_config_class = get_model_configuration_class(
+        module_name=module_name, module_name_short=module_name_short
+    )
 
     LOGGER.info("Configuration class registered for %s", module_name)
 
@@ -204,22 +206,49 @@ def register_module(module_name: str) -> None:
     )
 
 
-def get_model_configuration_class(module_name):
-    """Get the root configuration class for a model."""
+def get_model_configuration_class(module_name: str, module_name_short: str):
+    """Get the root configuration class for a model.
+
+    Discovery is name based, with the function attempting to retrieve a class based on
+    the model short name:
+
+    * ``plants`` -> ``PlantsConfiguration``,
+    * ``abiotic_simple`` -> ``AbioticSimpleConfiguration``
+
+    .. TODO:
+
+        It would probably be cleaner and more flexible to use explicit setting of the
+        configuration model name as a class attribute on the model definition, but that
+        requires more changes in the BaseModel and definitions, so use this string
+        pattern is being used to keep the rollout of the new config system simpler.
+
+    Args:
+        module_name: The full module name (e.g. ``virtual_ecosystem.models.plants``)
+        module_name_short: The short module name (e.g ``plants``)
+    """
 
     try:
+        # Raise ModuleNotFound if the configuration module is missing
         config_submodule = import_module(f"{module_name}.model_config")
-        model_config_class = getattr(config_submodule, "ModelConfiguration")
-        assert issubclass(model_config_class, Configuration)
+        # Raises Attribute error if the expected name is not found.
+        expected_config_class_name = (
+            "".join(word.capitalize() for word in module_name_short.split("_"))
+            + "Configuration"
+        )
+        model_config_class = getattr(config_submodule, expected_config_class_name)
+        # Raises a runtime error if the retrieved class is not a Configuration.
+        if not issubclass(model_config_class, Configuration):
+            raise RuntimeError
     except ModuleNotFoundError:
         raise RuntimeError(
             f"Model {module_name} does not provide a model_config submodule."
         )
     except AttributeError:
         raise RuntimeError(
-            f"A ModelConfiguration object is not found in {module_name}.model_config."
+            f"The {module_name}.model_config module does "
+            f"not contain {expected_config_class_name}"
         )
-    except AssertionError:
+    except RuntimeError:
         raise RuntimeError(
             f"Model {module_name} config class does does inherit from `ConfigRoot`."
         )
diff --git a/virtual_ecosystem/models/abiotic/model_config.py b/virtual_ecosystem/models/abiotic/model_config.py
index 6155f49b5..e7423e04f 100644
--- a/virtual_ecosystem/models/abiotic/model_config.py
+++ b/virtual_ecosystem/models/abiotic/model_config.py
@@ -169,7 +169,7 @@ class AbioticConstants(Configuration):
     """Initial non-zero fill value for energy fluxes, [W m-2]."""
 
 
-class ModelConfiguration(ModelConfigurationRoot):
+class AbioticConfiguration(ModelConfigurationRoot):
     """The abiotic model configuration."""
 
     constants: AbioticConstants = AbioticConstants()
diff --git a/virtual_ecosystem/models/abiotic_simple/model_config.py b/virtual_ecosystem/models/abiotic_simple/model_config.py
index c7fffe80c..a90ac4851 100644
--- a/virtual_ecosystem/models/abiotic_simple/model_config.py
+++ b/virtual_ecosystem/models/abiotic_simple/model_config.py
@@ -60,7 +60,7 @@ class AbioticSimpleBounds(Configuration):
     """Bounds for soil temperature, [C]."""
 
 
-class ModelConfiguration(ModelConfigurationRoot):
+class AbioticSimpleConfiguration(ModelConfigurationRoot):
     """Root configuration class for the abiotic simple model."""
 
     constants: AbioticSimpleConstants = AbioticSimpleConstants()
diff --git a/virtual_ecosystem/models/animal/model_config.py b/virtual_ecosystem/models/animal/model_config.py
index 44272d203..3d6c9b5e7 100644
--- a/virtual_ecosystem/models/animal/model_config.py
+++ b/virtual_ecosystem/models/animal/model_config.py
@@ -357,7 +357,7 @@ def get_population_density_terms(
     """The probability a seasonal migration event occurs per time step (month)."""
 
 
-class ModelConfiguration(ModelConfigurationRoot):
+class AnimalConfiguration(ModelConfigurationRoot):
     """Root configuration class for the animal model."""
 
     functional_group_definitions_path: str = ""
diff --git a/virtual_ecosystem/models/hydrology/model_config.py b/virtual_ecosystem/models/hydrology/model_config.py
index 3a59b3d2d..2e8579922 100644
--- a/virtual_ecosystem/models/hydrology/model_config.py
+++ b/virtual_ecosystem/models/hydrology/model_config.py
@@ -208,7 +208,7 @@ class HydrologyConstants(Configuration):
     """Factor to convert matric potential from m to kPa."""
 
 
-class ModelConfiguration(ModelConfigurationRoot):
+class HydrologyConfiguration(ModelConfigurationRoot):
     """Root configuration clas for the hydrology model."""
 
     initial_soil_moisture: float = Field(gt=0, default=0.5)
diff --git a/virtual_ecosystem/models/litter/model_config.py b/virtual_ecosystem/models/litter/model_config.py
index 4ed9f13d2..0205db9c6 100644
--- a/virtual_ecosystem/models/litter/model_config.py
+++ b/virtual_ecosystem/models/litter/model_config.py
@@ -161,7 +161,7 @@ class LitterConstants(Configuration):
     """
 
 
-class ModelConfiguration(ModelConfigurationRoot):
+class LitterConfiguration(ModelConfigurationRoot):
     """Root configuration clas for the litter model."""
 
     constants: LitterConstants = LitterConstants()
diff --git a/virtual_ecosystem/models/plants/model_config.py b/virtual_ecosystem/models/plants/model_config.py
index 64b2d552e..7f3981663 100644
--- a/virtual_ecosystem/models/plants/model_config.py
+++ b/virtual_ecosystem/models/plants/model_config.py
@@ -121,7 +121,7 @@ class PlantsExportConfig(Configuration):
     """A float format string to control data precision in export files."""
 
 
-class ModelConfiguration(ModelConfigurationRoot):
+class PlantsConfiguration(ModelConfigurationRoot):
     """Root configuration class for the plants model."""
 
     pft_definitions_path: FILEPATH_PLACEHOLDER
diff --git a/virtual_ecosystem/models/soil/model_config.py b/virtual_ecosystem/models/soil/model_config.py
index de1eff60b..b97bef96a 100644
--- a/virtual_ecosystem/models/soil/model_config.py
+++ b/virtual_ecosystem/models/soil/model_config.py
@@ -375,7 +375,7 @@ class SoilConstants(Configuration):
     """
 
 
-class ModelConfiguration(ModelConfigurationRoot):
+class SoilConfiguration(ModelConfigurationRoot):
     """Root configuration class for the soil model."""
 
     enzyme_class_definition_path: str = ""
diff --git a/virtual_ecosystem/models/testing/model_config.py b/virtual_ecosystem/models/testing/model_config.py
index dfc77f06c..3a4d7b080 100644
--- a/virtual_ecosystem/models/testing/model_config.py
+++ b/virtual_ecosystem/models/testing/model_config.py
@@ -3,7 +3,7 @@
 from virtual_ecosystem.core.configuration import ModelConfigurationRoot
 
 
-class ModelConfiguration(ModelConfigurationRoot):
+class TestingConfiguration(ModelConfigurationRoot):
     """Root configuration class for the testing model."""
 
     numeric_value: float = 1.0

From bc23c23a86e2e14de470e7380b70f5667ac44324 Mon Sep 17 00:00:00 2001
From: David Orme <davidorme@users.noreply.github.com>
Date: Mon, 20 Oct 2025 15:26:14 +0100
Subject: [PATCH 5/7] @dalonsoa's cunning type hinting fix

---
 virtual_ecosystem/core/configuration.py | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/virtual_ecosystem/core/configuration.py b/virtual_ecosystem/core/configuration.py
index da02d0c48..b638a63fe 100644
--- a/virtual_ecosystem/core/configuration.py
+++ b/virtual_ecosystem/core/configuration.py
@@ -14,13 +14,16 @@
 found :doc:`here </using_the_ve/configuration/config>`.
 """  # noqa: D205
 
+from collections.abc import Callable
 from pathlib import Path
-from typing import Annotated, TypeAlias
+from typing import Annotated, TypeAlias, TypeVar
 
 from pydantic import BaseModel, BeforeValidator, ConfigDict, Field, FilePath
 from pydantic._internal._model_construction import ModelMetaclass
 from pydantic_core import PydanticUndefined
 
+T = TypeVar("T")
+
 RST_TO_MD = [
     (":cite:t:", "{cite:t}"),
     (":cite:p:", "{cite:p}"),
@@ -52,11 +55,21 @@ class CompiledConfiguration(Configuration):
     `configuration.get_subconfiguration("plants")` instead.
     """
 
-    def get_subconfiguration(self, name: str) -> Configuration:
+    def get_subconfiguration(self, name: str, _: Callable[..., T]) -> T:
         """Get a named subconfiguration object from a compiled configuration.
 
         This method can be used to extract model configurations or the core
-        configuration from a compiled configuration instance.
+        configuration from a compiled configuration instance. The second argument is
+        used to provide support for static typing in `mypy` by explicitly providing the
+        type of the returned object. The method should be called as - for example:
+
+        .. code-block:: Python
+
+            subconfig: SubConfigConfiguration = (
+                compiled_configuration_instance.get_subconfiguration(
+                    "subconfig", SubConfigConfiguration
+                )
+            )
 
         Args:
             name: The required subconfiguration.

From c6dd49d86b489316c3d8ce61e8b278016fcd8418 Mon Sep 17 00:00:00 2001
From: David Orme <davidorme@users.noreply.github.com>
Date: Mon, 20 Oct 2025 17:33:03 +0100
Subject: [PATCH 6/7] Doc fix

---
 docs/source/using_the_ve/configuration/new_config.md | 5 +++--
 virtual_ecosystem/core/configuration.py              | 3 +++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/docs/source/using_the_ve/configuration/new_config.md b/docs/source/using_the_ve/configuration/new_config.md
index 3fccb7f77..9c4b41c95 100644
--- a/docs/source/using_the_ve/configuration/new_config.md
+++ b/docs/source/using_the_ve/configuration/new_config.md
@@ -21,6 +21,7 @@ kernelspec:
 import tomli_w
 import json
 from virtual_ecosystem.core.configuration import model_config_to_html, Configuration
+from virtual_ecosystem.core.registry import get_model_configuration_class
 from IPython.display import display, Markdown, HTML
 from importlib import import_module
 
@@ -32,8 +33,8 @@ def get_config_class(module_name: str) -> Configuration:
     """
 
     # Get the config class for the module
-    module = import_module(f"{module_name}.model_config")
-    config_class = getattr(module, "ModelConfiguration")
+    short_name = module_name.split(".")[-1]
+    config_class = get_model_configuration_class(module_name, short_name)
 
     return module_name.split(".")[-1], config_class
 
diff --git a/virtual_ecosystem/core/configuration.py b/virtual_ecosystem/core/configuration.py
index b638a63fe..e87eca200 100644
--- a/virtual_ecosystem/core/configuration.py
+++ b/virtual_ecosystem/core/configuration.py
@@ -23,6 +23,7 @@
 from pydantic_core import PydanticUndefined
 
 T = TypeVar("T")
+"""Generic type to support static typing of subconfigurations."""
 
 RST_TO_MD = [
     (":cite:t:", "{cite:t}"),
@@ -73,6 +74,8 @@ def get_subconfiguration(self, name: str, _: Callable[..., T]) -> T:
 
         Args:
             name: The required subconfiguration.
+            _: The class of objected returned by the method. This is not used by the
+                method itself but is used to support static typing of the return value.
         """
 
         try:

From 7104dbaab2d335e469bf33b8ee39555f53dd9e69 Mon Sep 17 00:00:00 2001
From: David Orme <davidorme@users.noreply.github.com>
Date: Tue, 21 Oct 2025 10:54:30 +0100
Subject: [PATCH 7/7] Remove micromethod from ConfigLoader

---
 tests/core/test_configuration_builder.py | 12 ++++++---
 virtual_ecosystem/core/config_builder.py | 31 ++++++++++--------------
 virtual_ecosystem/main.py                | 10 +++++---
 3 files changed, 28 insertions(+), 25 deletions(-)

diff --git a/tests/core/test_configuration_builder.py b/tests/core/test_configuration_builder.py
index 495fdae94..e8f41d863 100644
--- a/tests/core/test_configuration_builder.py
+++ b/tests/core/test_configuration_builder.py
@@ -795,15 +795,19 @@ def test_generate_configuration(
         ),
     ),
 )
-def test_ConfigurationLoader_get_configuration(
+def test_ConfigurationLoader_to_generate_configuration(
     caplog, cfg_strings, outcome, expected_attr_values, expected_log
 ):
-    """Tests the get_configuration function."""
-    from virtual_ecosystem.core.config_builder import ConfigurationLoader
+    """Tests the flow through the loader to the generate_configuration function."""
+    from virtual_ecosystem.core.config_builder import (
+        ConfigurationLoader,
+        generate_configuration,
+    )
 
     with outcome:
         # Run the function
-        config = ConfigurationLoader(cfg_strings=cfg_strings).get_configuration()
+        config_data = ConfigurationLoader(cfg_strings=cfg_strings)
+        config = generate_configuration(config_data.data)
 
         # Do we get a model
         assert isinstance(config, BaseModel)
diff --git a/virtual_ecosystem/core/config_builder.py b/virtual_ecosystem/core/config_builder.py
index b54943d28..63cccf23a 100644
--- a/virtual_ecosystem/core/config_builder.py
+++ b/virtual_ecosystem/core/config_builder.py
@@ -4,16 +4,9 @@
 more convenient to simply provide a string.
 
 The main class :class:`ConfigLoader` handles the loading of configuration data and
-compiling multiple sources into a single dictionary of configuration data. The public
-:meth:`ConfigLoader.get_configuration` method then passes the compiled data in the model
-instance through the :method:`generate_configuration` function to return the actual
-configuration object. Canonical use looks like this:
+compiling multiple sources into a single dictionary of configuration data.
 
-.. code-block:: python
-
-    config_object = ConfigurationLoader(...).get_configuration()
-
-The :func:`generate_configuration` function:
+The :func:`generate_configuration` function then:
 
 * takes a compiled dictionary of configuration settings,
 * assembles a pydantic validation model class using the configuration validators for
@@ -21,6 +14,13 @@
 * passes the data through the validator to return a validated configuration model for
   the simulation.
 
+Canonical usage patterns for the module would be:
+
+.. code-block:: python
+
+    config_data = ConfigurationLoader(...)
+    config_object = generate_configuration(config_data.data)
+
 """  # noqa: D205
 
 import tomllib
@@ -488,11 +488,6 @@ def _resolve_config_file_paths(self) -> None:
                     LOGGER.critical(excep)
                     raise excep
 
-    def get_configuration(self) -> CompiledConfiguration:
-        """Get the configuration instance for the loaded configuration data."""
-
-        return generate_configuration(self.data)
-
 
 def build_configuration_model(
     requested_modules: list[str],
@@ -540,13 +535,13 @@ def build_configuration_model(
 def generate_configuration(data: dict[str, Any] = {}) -> CompiledConfiguration:
     """Generate a configuration model from configuration data.
 
-    This method takes a dictionary of configuration data - typically loaded and compiled
-    using the :class:`ConfigurationLoader` class - and tries to build a validated
-    configuration model.
+    This method takes a dictionary of configuration data and tries to build a validated
+    configuration model. The input data is typically loaded and compiled using the
+    :class:`ConfigurationLoader` class.
 
     The first step is to take the root sections in the configuration data - indicating
     the various science models requested for a simulation - and uses those to build a
-    composite configuration validator.
+    composite configuration validator class.
 
     The provided data is then passed into the validator. If validation is successful
     then a validated configuration object is returned, otherwise the specific validation
diff --git a/virtual_ecosystem/main.py b/virtual_ecosystem/main.py
index aa8b58daa..d7f4925c4 100644
--- a/virtual_ecosystem/main.py
+++ b/virtual_ecosystem/main.py
@@ -15,7 +15,10 @@
 
 from virtual_ecosystem.core import variables
 from virtual_ecosystem.core.config import Config
-from virtual_ecosystem.core.config_builder import ConfigurationLoader
+from virtual_ecosystem.core.config_builder import (
+    ConfigurationLoader,
+    generate_configuration,
+)
 from virtual_ecosystem.core.core_components import CoreComponents
 from virtual_ecosystem.core.data import Data, merge_continuous_data_files
 from virtual_ecosystem.core.exceptions import ConfigurationError, InitialisationError
@@ -119,11 +122,12 @@ def ve_run(
     )
 
     # NEW configuration system
-    new_config = ConfigurationLoader(  # noqa: F841
+    config_data = ConfigurationLoader(
         cfg_paths=cfg_paths,
         cfg_strings=cfg_strings,
         override_params=override_params,
-    ).get_configuration()
+    )
+    configuration = generate_configuration(config_data.data)  # noqa: F841
 
     # Save the merged config if requested
     data_opt = config["core"]["data_output_options"]