diff --git a/docs/source/development/defining_new_models.md b/docs/source/development/defining_new_models.md index b577d925f..55a7658b6 100644 --- a/docs/source/development/defining_new_models.md +++ b/docs/source/development/defining_new_models.md @@ -193,47 +193,73 @@ def cleanup(self) -> None: """Placeholder function for freshwater model cleanup.""" ``` -## Including a configuration schema +## Writing a schema for the module configuration -A detailed description of the configuration system can be found -[here](../virtual_rainforest/core/config.md). The key thing to note is that a `JSON` -schema file should be saved within your model folder. This file should have a name of -the format "{MODEL_NAME}_schema.json". In order for this schema to be generally -accessible, it needs to be registered in the model's `__init__.py` (i.e. the -`__init__.py` in the model folder). This means that when the model is imported, it's -schema is automatically added to the schema registry. +The root module directory **must** also contain a [JSONSchema](https://json-schema.org/) +document that defines the configuration options for the model. A detailed description +of the configuration system works can be found +[here](../virtual_rainforest/core/config.md) but the schema definition is used to +validate configuration files for a Virtual Rainforest simulation that uses your model. -```python -from virtual_rainforest.core.config import register_schema +So the example model used here would need to provide the file: +`virtual_rainforest/models/freshwater/freshwater_schema.json` -@register_schema("freshwater") -def schema() -> dict: - """Defines the schema that the freshwater module configuration should conform to.""" +Writing JSONSchema documents can be very tedious. The following tools may be of use: - schema_file = Path(__file__).parent.resolve() / "freshwater_schema.json" +* [https://www.jsonschema.net/app](https://www.jsonschema.net/app): this is a web + application that takes a data document - which is what the configuration file - and + automatically generates a JSON schema to validate it. You will need to then edit it + but you'll be starting with a valid schema! +* [https://jsonschemalint.com/](https://jsonschemalint.com/) works the other way. It + takes a data document and a schema and checks whether the data is compliant. This can + be useful for checking errors. - with schema_file.open() as f: - config_schema = json.load(f) +Both of those tools take data documents formatted as JSON as inputs, where we use TOML +configuration files, but there are lots of web tools to convert TOML to JSON and back. - return config_schema -``` +## Setting up the model `__init__.py` file + +All model directories need to include an `__init__.py` file. The simple presence of the +`__init__.py` file tells Python that the directory content should be treated as module, +but then the file needs to contain code to do two things: + +1. The `__init__.py` file needs to register the JSONSchema file for the module. The + {meth}`~virtual_rainforest.core.config.register_schema` function takes a module name + and the path to the schema file and then, after checking the file can be loaded and is + valid, adds the schema to the schema registry + {data}`~virtual_rainforest.core.config.SCHEMA_REGISTRY`. -## Ensuring that schema and models are always added to the registry +1. It also needs to import the main BaseModel subclass. So for example, it should import + `FreshwaterModel` from the `virtual_rainforest.models.freshwater.freshwater_model` + module. This gives a shorter reference for a commonly used object + (`virtual_rainforest.models.freshwater.FreshwaterModel`) but it also means + that the BaseModel class is always imported when the model module + (`virtual_rainforest.models.freshwater`) is imported. This is used when the package + is loaded to automatically discover all the BaseModel classes and register them. -At the moment, a configuration schema only gets added to the schema registry when the -model it belongs to is imported, and a `Model` class only gets added to the registry -when the class itself is imported. This is a problem because the script that runs the -main Virtual Rainforest simulation does not import these things directly. To circumvent -this these imports needed to be placed in the top level `__init__.py` file (the one in -the same folder as `main.py`). This won't pass the `pre-commit` checks unless `flake8` -checks are turned off for the relevant lines. It's only strictly necessary to import the -`Model` class, as this implicitly entails importing the specific model as a whole. -However, for the sake of clarity we currently include both imports. + The class is not going to actually be used within the file, so needs `#noqa: 401` + to suppress a `flake8` error. + +The resulting `__init__.py` file should then look something like this: ```python -# Import all module schema here to ensure that they are added to the registry -from virtual_rainforest.models.freshwater import schema # noqa +"""This is the freshwater model module. The module level docstring should contain a +short description of the overall model design and purpose. +""" # noqa: D204, D415 + +from importlib import resources -# Import models here so that they also end up in the registry -from virtual_rainforest.models.freshwater.model import FreshWaterModel # noqa +from virtual_rainforest.core.config import register_schema +from virtual_rainforest.models.freshwater.freshwater_model import ( + FreshWaterModel +) # noqa: 401 + +with resources.path( + "virtual_rainforest.models.freshwater", "freshwater_schema.json" +) as schema_file_path: + register_schema(module_name="freshwater", schema_file_path=schema_file_path) ``` + +When the `virtual_rainforest` package is loaded, it will automatically import +`virtual_rainforest.models.freshwater`. That will cause both the model and the schema to +be loaded and registered. diff --git a/tests/core/test_config.py b/tests/core/test_config.py index dfe8b7a5e..a2ee2d7b4 100644 --- a/tests/core/test_config.py +++ b/tests/core/test_config.py @@ -6,10 +6,12 @@ to date. """ +import json from contextlib import nullcontext as does_not_raise from logging import CRITICAL, ERROR, INFO from pathlib import Path +import jsonschema import pytest import virtual_rainforest.core.config as config @@ -290,72 +292,64 @@ def test_final_validation_log(caplog, shared_datadir, file_path, expected_log_en [ ( "core", - {}, + "", ValueError, ( ( CRITICAL, - "The module schema core is used multiple times, this shouldn't" - " be the case!", + "The module schema for core is already registered", ), ), ), ( "test", "najsnjasnda", - OSError, - ((CRITICAL, "Module schema test not valid JSON!"),), + json.JSONDecodeError, + ( + (ERROR, "JSON error in schema file"), + (CRITICAL, "Schema registration for test failed: check log"), + ), ), ( "bad_module_1", - {"type": "object", "propertie": {"bad_module_1": {}}}, - KeyError, + '{"type": "hobbit", "properties": {"bad_module_1": {}}}', + jsonschema.SchemaError, ( - ( - CRITICAL, - "Schema for bad_module_1 module incorrectly structured, " - "'properties' key missing!", - ), + (ERROR, "Module schema invalid in: "), + (CRITICAL, "Schema registration for bad_module_1 failed: check log"), ), ), ( "bad_module_2", - {"type": "object", "properties": {"bad_module_1": {}}}, - KeyError, + '{"type": "object", "properties": {"bad_module_1": {}}}', + ValueError, ( - ( - CRITICAL, - "Schema for bad_module_2 module incorrectly structured, " - "'bad_module_2' key missing!", - ), + (ERROR, "Missing key in module schema bad_module_2:"), + (CRITICAL, "Schema registration for bad_module_2 failed: check log"), ), ), ( "bad_module_3", - {"type": "object", "properties": {"bad_module_3": {}}}, - KeyError, + '{"type": "object", "properties": {"bad_module_3": {}}}', + ValueError, ( - ( - CRITICAL, - "Schema for bad_module_3 module incorrectly structured, 'required'" - " key missing!", - ), + (ERROR, "Missing key in module schema bad_module_3"), + (CRITICAL, "Schema registration for bad_module_3 failed: check log"), ), ), ], ) def test_register_schema_errors( - caplog, schema_name, schema, expected_exception, expected_log_entries + caplog, mocker, schema_name, schema, expected_exception, expected_log_entries ): """Test that the schema registering decorator throws the correct errors.""" - # Check that construct_combined_schema fails as expected - with pytest.raises(expected_exception): - @register_schema(schema_name) - def to_be_decorated() -> dict: - return schema + data = mocker.mock_open(read_data=schema) + mocker.patch("builtins.open", data) - to_be_decorated() + # Check that construct_combined_schema fails as expected + with pytest.raises(expected_exception): + register_schema(schema_name, "file_path") # Then check that the correct (critical error) log messages are emitted log_check(caplog, expected_log_entries) diff --git a/virtual_rainforest/__init__.py b/virtual_rainforest/__init__.py index 526c139b3..c543e556d 100644 --- a/virtual_rainforest/__init__.py +++ b/virtual_rainforest/__init__.py @@ -1,13 +1,18 @@ import importlib.metadata +import pkgutil +from importlib import import_module -# Import all module schema here to ensure that they are added to the registry -from virtual_rainforest.core import schema # noqa -from virtual_rainforest.models.abiotic import schema # noqa -from virtual_rainforest.models.abiotic.abiotic_model import AbioticModel # noqa -from virtual_rainforest.models.plants import schema # noqa -from virtual_rainforest.models.soil import schema # noqa +import virtual_rainforest.models as vfm -# Import models here so that they also end up in the registry -from virtual_rainforest.models.soil.soil_model import SoilModel # noqa +# Import the core module schema to register the schema +import_module("virtual_rainforest.core") + +# Autodiscover models in the models module to add their schema and BaseModel subclass +# All modules within virtual_rainforest.model are expected to: +# - import their BaseModel subclass to the module root, for example: +# from virtual_rainforest.models.soil.model import SoilModel # noqa: F401 +# - register their configuration schema using core.config.register_schema +for module_info in pkgutil.iter_modules(vfm.__path__): + import_module(f"virtual_rainforest.models.{module_info.name}") __version__ = importlib.metadata.version("virtual_rainforest") diff --git a/virtual_rainforest/core/__init__.py b/virtual_rainforest/core/__init__.py index 28e8333aa..94fa633db 100644 --- a/virtual_rainforest/core/__init__.py +++ b/virtual_rainforest/core/__init__.py @@ -27,19 +27,9 @@ configuration schema for the core submodules. """ # noqa: D205, D415 -import json -from pathlib import Path +from importlib import resources from virtual_rainforest.core.config import register_schema - -@register_schema("core") -def schema() -> dict: - """Defines the schema that the core module configuration should conform to.""" - - schema_file = Path(__file__).parent.resolve() / "core_schema.json" - - with schema_file.open() as f: - config_schema = json.load(f) - - return config_schema +with resources.path("virtual_rainforest.core", "core_schema.json") as schema_file_path: + register_schema(module_name="core", schema_file_path=schema_file_path) diff --git a/virtual_rainforest/core/config.py b/virtual_rainforest/core/config.py index 37d8afcc7..3f9e1ebef 100644 --- a/virtual_rainforest/core/config.py +++ b/virtual_rainforest/core/config.py @@ -3,48 +3,36 @@ of the virtual rainforest model. The basic details of how this system is used can be found :doc:`here `. -When a new module is defined a ``JSON`` file should be written, which includes the +When a new module is defined a ``JSONSchema`` file should be written, which includes the expected configuration tags, their expected types, and any constraints on their values (e.g. the number of soil layers being strictly positive). Additionally, where sensible default values exist (e.g. 1 week for the model time step) they should also be included in the schema. This schema should be saved in the folder of the module that it relates -to. In order to make this schema generally accessible to the ``vr`` package, it should -then be added to the schema registry. The -:func:`~virtual_rainforest.core.config.register_schema` decorator is used for this -purpose. - -Schema registration for each module takes place in the module's ``__init__.py``. Here, a -function is written that reads in the ``JSON`` file describing the schema. This function -is then decorated using :func:`~virtual_rainforest.core.config.register_schema`, which -results in the schema being added to the registry. The user provides a schema name to -the decorator to register the schema under, this should be unique, and generally should -just be the module name. An example of decorator usage is shown below: +to. In order to make this schema generally accessible the module's ``__init__.py`` +should call the :func:`~virtual_rainforest.core.config.register_schema` function, which +will load and validate the schema before adding it to the registry. You will need to +provides a module name to register the schema under, which should be unique to that +model. The function also requires the path to the schema file, which should be located +using the :meth:`importlib.resources.path()` context manager: .. code-block:: python - @register_schema("example_module") - def schema() -> dict: - schema_file = Path(__file__).parent.resolve() / "example_module_schema.json" + with resources.path( + "virtual_rainforest.models.soil", "soil_schema.json" + ) as schema_file_path: + register_schema(module_name="soil", schema_file_path=schema_file_path) - with schema_file.open() as f: - config_schema = json.load(f) - - return config_schema - -It's important to note that the schema will only be added to the registry if the module -``__init__`` is run. This means that somewhere in your chain of imports the module must -be imported. Currently this is tackled by importing all active modules in the top level -``__init__``, i.e. ``virtual_rainforest.__init__.py``. This ensures that any script -that imports ``virtual_rainforest`` will have implicitly imported all modules which -define schema, in turn ensuring that -:attr:`~virtual_rainforest.core.config.SCHEMA_REGISTRY` contains all necessary schema. +The base :mod:`virtual_rainforest` module will automatically import modules when it is +imported, which ensures that all modules schemas are registered in +:attr:`~virtual_rainforest.core.config.SCHEMA_REGISTRY`. """ # noqa: D205, D415 +import json import sys from collections import ChainMap from copy import deepcopy from pathlib import Path -from typing import Any, Callable, Generator, Iterator, Optional, Union +from typing import Any, Generator, Iterator, Optional, Union import dpath.util # type: ignore import tomli_w @@ -100,98 +88,124 @@ def log_all_validation_errors( raise to_raise -def validate_and_add_defaults( - validator_class: type[Draft202012Validator], -) -> type[Draft202012Validator]: - """Extend validator so that it can populate default values from the schema. +# Set up a JSON Schema validator that fills in default values + + +def set_defaults( + validator: type[Draft202012Validator], + properties: dict[str, Any], + instance: dict[str, Any], + schema: dict[str, Any], +) -> Iterator: + """Generate an iterator to populate schema defaults. + + This function is used to extend the Draft202012Validator to include automatic + insertion of default values from a schema where values are not specified. The + function signature follows the required JSON schema pattern: + + https://python-jsonschema.readthedocs.io/en/latest/creating/ Args: - validator_class: Validator to be extended + validator: a validator instance, + properties: the value of the property being validated within the instance + instance: the instance + schema: the schema Returns: - An extended validator class + An iterator to be applied to JSON schema entries. """ + for property, subschema in properties.items(): + if "default" in subschema: + instance.setdefault(property, subschema["default"]) - validate_properties = validator_class.VALIDATORS["properties"] - - def set_defaults( - validator: type[Draft202012Validator], - properties: dict[str, Any], - instance: dict[str, Any], - schema: dict[str, Any], - ) -> Iterator: - """Generate an iterator to populate defaults.""" - for property, subschema in properties.items(): - if "default" in subschema: - instance.setdefault(property, subschema["default"]) - - for error in validate_properties( - validator, - properties, - instance, - schema, - ): - yield error - - return validators.extend( - validator_class, - {"properties": set_defaults}, - ) + for error in Draft202012Validator.VALIDATORS["properties"]( + validator, + properties, + instance, + schema, + ): + yield error -# Make a global validator using the above function to allow for the adding of defaults -ValidatorWithDefaults = validate_and_add_defaults(Draft202012Validator) +ValidatorWithDefaults = validators.extend( + Draft202012Validator, {"properties": set_defaults} +) -def register_schema(module_name: str) -> Callable: - """Decorator function to add configuration schema to the registry. +def register_schema(module_name: str, schema_file_path: Path) -> None: + """Simple function to add configuration schema to the registry. Args: module_name: The name to register the schema under + schema_file_path: The file path to the JSON Schema file for the model + Raises: - ValueError: If the schema name has already been used - OSError: If the module schema is not valid JSON - KeyError: If a module schema is missing one of the required keys + ValueError: If the module name has already been used to register a schema """ - def wrap(func: Callable) -> Callable: - # Type the exception raising with a general base class - to_raise: Exception + if module_name in SCHEMA_REGISTRY: + excep = ValueError(f"The module schema for {module_name} is already registered") + LOGGER.critical(excep) + raise excep - if module_name in SCHEMA_REGISTRY: - to_raise = ValueError( - f"The module schema {module_name} is used multiple times, this " - f"shouldn't be the case!", - ) - LOGGER.critical(to_raise) - raise to_raise - else: - # Check that this is a valid schema - try: - Draft202012Validator.check_schema(func()) - except exceptions.SchemaError: - to_raise = OSError(f"Module schema {module_name} not valid JSON!") - LOGGER.critical(to_raise) - raise to_raise + try: + SCHEMA_REGISTRY[module_name] = get_schema(module_name, schema_file_path) + except Exception as excep: + LOGGER.critical(f"Schema registration for {module_name} failed: check log") + raise excep - # Check that relevant keys are included - try: - func()["properties"][module_name] - func()["required"] - except KeyError as err: - to_raise = KeyError( - f"Schema for {module_name} module incorrectly structured, {err} key" - f" missing!" - ) - LOGGER.critical(to_raise) - raise to_raise + LOGGER.info("Schema registered for module %s: %s ", module_name, schema_file_path) + + +def get_schema(module_name: str, schema_file_path: Path) -> dict: + """Function to load the JSON schema for a module. + + This function tries to load a JSON schema file and then - if the JSON loaded + correctly - checks that the JSON provides a valid JSON Schema. + + Args: + module_name: The name to register the schema under + schema_file_path: The file path to the JSON Schema file + + Raises: + FileNotFoundError: the schema path does not exist + JSONDecodeError: the file at the schema path is not valid JSON + SchemaError: the file contents are not valid JSON Schema + ValueError: the JSON Schema is missing required keys + """ - # If it is valid then add it to the registry - SCHEMA_REGISTRY[module_name] = func() + try: + fobj = open(schema_file_path) + json_schema = json.load(fobj) + except FileNotFoundError as excep: + LOGGER.error(excep) + raise excep + except json.JSONDecodeError as excep: + LOGGER.error(f"JSON error in schema file {schema_file_path}") + raise excep + + # Check that this is a valid schema - deliberately log a separate message and let + # the schema traceback output rather than replacing it. + try: + ValidatorWithDefaults.check_schema(json_schema) + except exceptions.SchemaError as excep: + LOGGER.error(f"Module schema invalid in: {schema_file_path}") + raise excep + + # Check that relevant keys are included - deliberately re-raising as ValueError here + # as the KeyError has built in quoting and is expecting to provide simple missing + # key messages. + try: + json_schema["properties"][module_name] + json_schema["required"] + except KeyError as excep: + to_raise = ValueError(f"Missing key in module schema {module_name}: {excep}") + LOGGER.error(to_raise) + raise to_raise - return func + LOGGER.info("Module schema for %s loaded", module_name) - return wrap + return json_schema def check_dict_leaves( diff --git a/virtual_rainforest/models/__init__.py b/virtual_rainforest/models/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/virtual_rainforest/models/abiotic/__init__.py b/virtual_rainforest/models/abiotic/__init__.py index 0ebbb5de8..a5ed98e58 100644 --- a/virtual_rainforest/models/abiotic/__init__.py +++ b/virtual_rainforest/models/abiotic/__init__.py @@ -14,19 +14,12 @@ calculates the radiation balance of the Virtual Rainforest. """ # noqa: D205, D415 -import json -from pathlib import Path +from importlib import resources from virtual_rainforest.core.config import register_schema +from virtual_rainforest.models.abiotic.abiotic_model import AbioticModel # noqa: F401 - -@register_schema("abiotic") -def schema() -> dict: - """Defines the schema that the abiotic module configuration should conform to.""" - - schema_file = Path(__file__).parent.resolve() / "abiotic_schema.json" - - with schema_file.open() as f: - config_schema = json.load(f) - - return config_schema +with resources.path( + "virtual_rainforest.models.abiotic", "abiotic_schema.json" +) as schema_file_path: + register_schema(module_name="abiotic", schema_file_path=schema_file_path) diff --git a/virtual_rainforest/models/plants/__init__.py b/virtual_rainforest/models/plants/__init__.py index 9dc046d25..5a2b49198 100644 --- a/virtual_rainforest/models/plants/__init__.py +++ b/virtual_rainforest/models/plants/__init__.py @@ -1,16 +1,8 @@ -import json -from pathlib import Path +from importlib import resources from virtual_rainforest.core.config import register_schema - -@register_schema("plants") -def schema() -> dict: - """Defines the schema that the plant module configuration should conform to.""" - - schema_file = Path(__file__).parent.resolve() / "plants_schema.json" - - with schema_file.open() as f: - config_schema = json.load(f) - - return config_schema +with resources.path( + "virtual_rainforest.models.plants", "plants_schema.json" +) as schema_file_path: + register_schema(module_name="plants", schema_file_path=schema_file_path) diff --git a/virtual_rainforest/models/soil/__init__.py b/virtual_rainforest/models/soil/__init__.py index 2a6f958fd..5c3b95a5c 100644 --- a/virtual_rainforest/models/soil/__init__.py +++ b/virtual_rainforest/models/soil/__init__.py @@ -12,20 +12,12 @@ containing the constants required by the broader soil model. """ # noqa: D205, D415 - -import json -from pathlib import Path +from importlib import resources from virtual_rainforest.core.config import register_schema +from virtual_rainforest.models.soil.soil_model import SoilModel # noqa: F401 - -@register_schema("soil") -def schema() -> dict: - """Defines the schema that the soil module configuration should conform to.""" - - schema_file = Path(__file__).parent.resolve() / "soil_schema.json" - - with schema_file.open() as f: - config_schema = json.load(f) - - return config_schema +with resources.path( + "virtual_rainforest.models.soil", "soil_schema.json" +) as schema_file_path: + register_schema(module_name="soil", schema_file_path=schema_file_path)