diff --git a/modelskill/__init__.py b/modelskill/__init__.py index 05091f6ca..a7c02e2bf 100644 --- a/modelskill/__init__.py +++ b/modelskill/__init__.py @@ -27,7 +27,13 @@ from .quantity import Quantity from .model.factory import ModelResult from .model import model_result -from .model import PointModelResult, TrackModelResult, GridModelResult, DfsuModelResult +from .model import ( + PointModelResult, + TrackModelResult, + GridModelResult, + DfsuModelResult, + DummyModelResult, +) from .obs import observation, PointObservation, TrackObservation from .matching import compare, from_matched, match from .connection import Connector @@ -67,6 +73,7 @@ def load(filename: Union[str, Path]) -> ComparerCollection: "TrackModelResult", "GridModelResult", "DfsuModelResult", + "DummyModelResult", "observation", "PointObservation", "TrackObservation", diff --git a/modelskill/matching.py b/modelskill/matching.py index 4a6a1712d..85df5afd0 100644 --- a/modelskill/matching.py +++ b/modelskill/matching.py @@ -22,6 +22,7 @@ import mikeio + from . import model_result, Quantity from .timeseries import TimeSeries from .types import GeometryType, Period @@ -29,6 +30,7 @@ from .model.dfsu import DfsuModelResult from .model.track import TrackModelResult from .model.point import PointModelResult +from .model.dummy import DummyModelResult from .obs import Observation, PointObservation, TrackObservation from .comparison import Comparer, ComparerCollection from . import __version__ @@ -51,6 +53,7 @@ GridModelResult, DfsuModelResult, TrackModelResult, + DummyModelResult, ] ObsInputType = Union[ str, @@ -514,18 +517,27 @@ def _parse_single_model( gtype: Optional[GeometryTypes] = None, ) -> Any: # TODO if isinstance( - mod, (DfsuModelResult, GridModelResult, TrackModelResult, PointModelResult) + mod, + ( + str, + Path, + pd.DataFrame, + xr.Dataset, + xr.DataArray, + mikeio.Dfs0, + mikeio.Dataset, + mikeio.DataArray, + mikeio.dfsu.Dfsu2DH, + ), ): - if item is not None: + try: + return model_result(mod, item=item, gtype=gtype) + except ValueError as e: raise ValueError( - "mod_item argument not allowed if mod is an modelskill.ModelResult" + f"Could not compare. Unknown model result type {type(mod)}. {str(e)}" ) + else: + if item is not None: + raise ValueError("item argument not allowed if mod is a ModelResult type") + # assume it is already a model result return mod - - try: - # return ModelResult(mod, item=item, gtype=gtype) - return model_result(mod, item=item, gtype=gtype) - except ValueError as e: - raise ValueError( - f"Could not compare. Unknown model result type {type(mod)}. {str(e)}" - ) diff --git a/modelskill/model/__init__.py b/modelskill/model/__init__.py index b4956d849..f9576077b 100644 --- a/modelskill/model/__init__.py +++ b/modelskill/model/__init__.py @@ -5,6 +5,7 @@ from .track import TrackModelResult from .dfsu import DfsuModelResult from .grid import GridModelResult +from .dummy import DummyModelResult __all__ = [ "PointModelResult", @@ -12,4 +13,5 @@ "DfsuModelResult", "GridModelResult", "model_result", + "DummyModelResult", ] diff --git a/modelskill/model/dummy.py b/modelskill/model/dummy.py new file mode 100644 index 000000000..573b23836 --- /dev/null +++ b/modelskill/model/dummy.py @@ -0,0 +1,86 @@ +from __future__ import annotations +from typing import Literal + +import pandas as pd + +from modelskill.model.point import PointModelResult +from modelskill.model.track import TrackModelResult +from modelskill.obs import PointObservation, TrackObservation + + +class DummyModelResult: + """Dummy model result that always returns the same value. + + Similar in spirit to + + Parameters + ---------- + data : float, optional + The value to return if strategy is 'constant', by default None + name : str, optional + The name of the model result, by default 'dummy' + strategy : str, optional + The strategy to use, 'mean' uses the mean of the observation, 'constant' uses the value given in data, by default 'constant' + + Examples + -------- + >>> import pandas as pd + >>> import modelskill as ms + >>> obs = ms.PointObservation(pd.DataFrame([0.0, 1.0], index=pd.date_range("2000", freq="H", periods=2)), name="foo") + >>> mr = ms.DummyModelResult(strategy='mean') + >>> pmr = mr.extract(obs) + >>> pmr.to_dataframe() + dummy + time + 2000-01-01 00:00:00 0.5 + 2000-01-01 01:00:00 0.5 + + + + """ + + def __init__( + self, + data: float | None = None, + *, + name: str = "dummy", + strategy: Literal["mean", "constant"] = "constant", + ): + if strategy == "constant": + if data is None: + raise ValueError("data must be given when strategy is 'constant'") + self.data = data + self.name = name + self.strategy = strategy + + def extract( + self, observation: PointObservation | TrackObservation + ) -> PointModelResult | TrackModelResult: + if isinstance(observation, PointObservation): + da = observation.data[observation.name].copy() + if self.strategy == "mean": + da[:] = da.mean() + else: + da[:] = self.data + pmr = PointModelResult( + data=da, x=observation.x, y=observation.y, name=self.name + ) + return pmr + + if isinstance(observation, TrackObservation): + da = observation.data[observation.name].copy() + if self.strategy == "mean": + da[:] = da.mean() + else: + da[:] = self.data + + data = pd.DataFrame( + { + "x": observation.x, + "y": observation.y, + "value": da.values, + }, + index=da.time, + ) + tmr = TrackModelResult(data=data, name=self.name) + return tmr diff --git a/tests/test_match.py b/tests/test_match.py index 33e4e4131..d088de66d 100644 --- a/tests/test_match.py +++ b/tests/test_match.py @@ -5,6 +5,7 @@ import mikeio import modelskill as ms from modelskill.comparison._comparison import ItemSelection +from modelskill.model.dfsu import DfsuModelResult @pytest.fixture @@ -52,7 +53,7 @@ def mr12_gaps(): @pytest.fixture -def mr1(): +def mr1() -> DfsuModelResult: fn = "tests/testdata/SW/HKZN_local_2017_DutchCoast.dfsu" return ms.model_result(fn, item=0, name="SW_1") @@ -509,3 +510,30 @@ def test_multiple_obs_not_allowed_with_non_spatial_modelresults(): # but this is not allowed with pytest.raises(ValueError, match="SpatialField type"): ms.match(obs=[o1, o2], mod=[m1, m2, m3]) + + +def test_compare_model_vs_dummy(mr1, o1): + mean_obs = o1.trim(mr1.time[0], mr1.time[-1]).values.mean() + + mr2 = ms.DummyModelResult(data=mean_obs, name="dummy") + + cmp = ms.match(obs=o1, mod=[mr1, mr2]) + assert cmp.score(metric="r2")["dummy"] == pytest.approx(0.0) + + +def test_compare_model_vs_dummy_for_track(mr1, o3): + mr = ms.DummyModelResult(name="dummy", strategy="mean") + + cmp = ms.match(obs=o3, mod=mr) + assert cmp.score(metric="r2")["dummy"] == pytest.approx(0.0) + + assert cmp.score()["dummy"] == pytest.approx(1.140079520671913) + + cmp2 = ms.match(obs=o3, mod=[mr1, mr]) + + assert cmp2.score()["dummy"] == pytest.approx( + 1.225945 + ) # not identical to above since it is evaluated on a subset of the data + assert cmp2.score()["SW_1"] == pytest.approx( + 0.35179650395619716 + ) # better than dummy 🙂