Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions docs/releases/unreleased.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,16 @@ The `dummy` module is now fully type-annotated.
- Added a virtual function to the base engine class; New NN engines need to override `refresh_targets` function
- Classifier KNN now calls this engine-specific function under `clean_up_classes()`

## build

- Added Python 3.14 wheel builds and updated PyO3 for 3.14 support.
- Replaced poetry with uv for dependency management.

## evaluate

- Moved forecasting evaluation utilities from `time_series.evaluate` to `evaluate` (`evaluate.evaluate` and `evaluate.iter_evaluate`) and deprecated `time_series.evaluate`/`time_series.iter_evaluate`.
- Added `evaluate.ForecastingTrack` to benchmark and compare time series forecasting models.

## utils

- The `utils` module is now fully type-checked.
Expand Down
3 changes: 2 additions & 1 deletion river/anomaly/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class ThresholdFilter(anomaly.base.AnomalyFilter):
of human interventions. We don't want our model to learn these values.

>>> from river import datasets
>>> from river import evaluate
>>> from river import metrics
>>> from river import time_series

Expand All @@ -53,7 +54,7 @@ class ThresholdFilter(anomaly.base.AnomalyFilter):
... )
... )

>>> time_series.evaluate(
>>> evaluate.evaluate(
... dataset,
... model,
... metric,
Expand Down
12 changes: 11 additions & 1 deletion river/evaluate/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,23 @@

from __future__ import annotations

from .forecasting import evaluate, iter_evaluate
from .progressive_validation import iter_progressive_val_score, progressive_val_score
from .tracks import BinaryClassificationTrack, MultiClassClassificationTrack, RegressionTrack, Track
from .tracks import (
BinaryClassificationTrack,
ForecastingTrack,
MultiClassClassificationTrack,
RegressionTrack,
Track,
)

__all__ = [
"evaluate",
"iter_evaluate",
"iter_progressive_val_score",
"progressive_val_score",
"BinaryClassificationTrack",
"ForecastingTrack",
"MultiClassClassificationTrack",
"RegressionTrack",
"Track",
Expand Down
43 changes: 17 additions & 26 deletions river/time_series/evaluate.py → river/evaluate/forecasting.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
from __future__ import annotations

import collections
import numbers
import typing

from river import base, metrics, time_series
from river import base, metrics

__all__ = ["evaluate", "iter_evaluate"]


TimeSeries = typing.Iterator[
typing.Tuple[ # noqa: UP006
typing.Union[dict, None], # noqa: UP007
numbers.Number,
typing.Union[typing.List[dict], None], # noqa: UP006, UP007
typing.List[numbers.Number], # noqa: UP006
tuple[
dict | None,
float,
list[dict] | None,
list[float],
]
]
Comment thread
kulbachcedric marked this conversation as resolved.

Expand All @@ -23,7 +25,7 @@ def _iter_with_horizon(dataset: base.typing.Dataset, horizon: int) -> TimeSeries
--------

>>> from river import datasets
>>> from river.time_series.evaluate import _iter_with_horizon
>>> from river.evaluate.forecasting import _iter_with_horizon

>>> dataset = datasets.AirlinePassengers()

Expand Down Expand Up @@ -75,11 +77,10 @@ def _iter_with_horizon(dataset: base.typing.Dataset, horizon: int) -> TimeSeries

def iter_evaluate(
dataset: base.typing.Dataset,
model: time_series.base.Forecaster,
model,
metric: metrics.base.RegressionMetric,
horizon: int,
agg_func: typing.Callable[[list[float]], float] | None = None,
grace_period: int | None = None,
):
"""Evaluates the performance of a forecaster on a time series dataset and yields results.

Expand All @@ -98,22 +99,18 @@ def iter_evaluate(
A regression metric.
horizon
agg_func
grace_period
Initial period during which the metric is not updated. This is to fairly evaluate models
which need a warming up period to start producing meaningful forecasts. The value of this
parameter is equal to the horizon by default.

"""

from river import time_series

horizon_metric = (
time_series.HorizonAggMetric(metric, agg_func)
if agg_func
else time_series.HorizonMetric(metric)
)
steps = _iter_with_horizon(dataset, horizon)

grace_period = horizon if grace_period is None else grace_period
for _ in range(grace_period):
for _ in range(horizon):
x, y, x_horizon, y_horizon = next(steps)
model.learn_one(y=y, x=x) # type: ignore

Expand All @@ -126,12 +123,11 @@ def iter_evaluate(

def evaluate(
dataset: base.typing.Dataset,
model: time_series.base.Forecaster,
model,
metric: metrics.base.RegressionMetric,
horizon: int,
agg_func: typing.Callable[[list[float]], float] | None = None,
grace_period: int | None = None,
) -> time_series.HorizonMetric:
):
Comment thread
kulbachcedric marked this conversation as resolved.
"""Evaluates the performance of a forecaster on a time series dataset.

To understand why this method is useful, it's important to understand the difference between
Expand All @@ -154,15 +150,10 @@ def evaluate(
A regression metric.
horizon
agg_func
grace_period
Initial period during which the metric is not updated. This is to fairly evaluate models
which need a warming up period to start producing meaningful forecasts. The value of this
parameter is equal to the horizon by default.

"""

horizon_metric = None
steps = iter_evaluate(dataset, model, metric, horizon, agg_func, grace_period)
steps = iter_evaluate(dataset, model, metric, horizon, agg_func)
for *_, horizon_metric in steps:
pass

Expand Down
55 changes: 55 additions & 0 deletions river/evaluate/tracks.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
from __future__ import annotations

import datetime as dt
import statistics
import time

from river import datasets, evaluate, metrics


Expand Down Expand Up @@ -110,3 +114,54 @@ def __init__(self):
],
metric=metrics.MAE() + metrics.RMSE() + metrics.R2(),
)


class ForecastingTrack(Track):
"""This track evaluates a model's performance on forecasting tasks.

The objective is to compare time series forecasters with a common protocol over a fixed set of
datasets and horizons.

"""

def __init__(self):
super().__init__(
name="Forecasting",
datasets=[
datasets.AirlinePassengers(),
datasets.WaterFlow(),
],
metric=metrics.MAE(),
)
self.horizons = {
datasets.AirlinePassengers: 12,
datasets.WaterFlow: 24,
}

def run(self, model, dataset, n_checkpoints=10):
horizon = self.horizons.get(type(dataset))
if horizon is None:
raise ValueError(f"No horizon configured for {dataset.__class__.__name__}")

checkpoint = max(1, (dataset.n_samples - 2 * horizon) // n_checkpoints)

start = time.perf_counter()
model = model.clone()
steps = evaluate.iter_evaluate(
dataset=dataset,
model=model,
metric=self.metric.clone(),
horizon=horizon,
agg_func=statistics.mean,
)

for step, (*_, horizon_metric) in enumerate(steps, start=1):
if step % checkpoint != 0:
continue

yield {
horizon_metric.__class__.__name__: horizon_metric,
"Step": step,
"Time": dt.timedelta(seconds=time.perf_counter() - start),
"Memory": model._raw_memory_usage,
}
28 changes: 27 additions & 1 deletion river/time_series/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,38 @@

from __future__ import annotations

import warnings

from . import base
from .evaluate import evaluate, iter_evaluate
from .holt_winters import HoltWinters
from .metrics import ForecastingMetric, HorizonAggMetric, HorizonMetric
from .snarimax import SNARIMAX


def iter_evaluate(*args, **kwargs):
warnings.warn(
Comment thread
kulbachcedric marked this conversation as resolved.
"`time_series.iter_evaluate` is deprecated and will be removed in a future release; "
"use `evaluate.iter_evaluate` instead.",
DeprecationWarning,
stacklevel=2,
)
from river import evaluate

return evaluate.iter_evaluate(*args, **kwargs)
Comment thread
kulbachcedric marked this conversation as resolved.


def evaluate(*args, **kwargs):
warnings.warn(
"`time_series.evaluate` is deprecated and will be removed in a future release; "
"use `evaluate.evaluate` instead.",
DeprecationWarning,
stacklevel=2,
)
from river import evaluate as _evaluate

return _evaluate.evaluate(*args, **kwargs)


__all__ = [
"base",
"evaluate",
Expand Down
3 changes: 2 additions & 1 deletion river/time_series/holt_winters.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ class HoltWinters(time_series.base.Forecaster):
--------

>>> from river import datasets
>>> from river import evaluate
>>> from river import metrics
>>> from river import time_series

Expand All @@ -124,7 +125,7 @@ class HoltWinters(time_series.base.Forecaster):

>>> metric = metrics.MAE()

>>> time_series.evaluate(
>>> evaluate.evaluate(
... dataset,
... model,
... metric,
Expand Down
10 changes: 6 additions & 4 deletions river/time_series/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,14 @@ class HorizonMetric(ForecastingMetric):
Examples
--------

This is used internally by the `time_series.evaluate` function.
This is used internally by the `evaluate.evaluate` function.

>>> from river import evaluate
>>> from river import datasets
>>> from river import metrics
>>> from river import time_series

>>> metric = time_series.evaluate(
>>> metric = evaluate.evaluate(
... dataset=datasets.AirlinePassengers(),
... model=time_series.HoltWinters(alpha=0.1),
... metric=metrics.MAE(),
Expand Down Expand Up @@ -111,14 +112,15 @@ class HorizonAggMetric(HorizonMetric):
Examples
--------

This is used internally by the `time_series.evaluate` function when you pass an `agg_func`.
This is used internally by the `evaluate.evaluate` function when you pass an `agg_func`.

>>> import statistics
>>> from river import evaluate
>>> from river import datasets
>>> from river import metrics
>>> from river import time_series

>>> metric = time_series.evaluate(
>>> metric = evaluate.evaluate(
... dataset=datasets.AirlinePassengers(),
... model=time_series.HoltWinters(alpha=0.1),
... metric=metrics.MAE(),
Expand Down
32 changes: 20 additions & 12 deletions river/time_series/test_evaluate.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from __future__ import annotations

from river import datasets, metrics, stats, time_series
import pytest

from river import datasets, evaluate, metrics, stats, time_series


class MeanForecaster(time_series.base.Forecaster):
Expand All @@ -19,25 +21,31 @@ def test_forecasts_at_each_step():
model = MeanForecaster()
metric = metrics.MAE()
horizon = 12
grace_period = 1

steps = time_series.iter_evaluate(
dataset=dataset, model=model, metric=metric, horizon=horizon, grace_period=grace_period
)
steps = evaluate.iter_evaluate(dataset=dataset, model=model, metric=metric, horizon=horizon)

_, _, y_pred, _ = next(steps)
assert y_pred == [112] * horizon
assert y_pred == [pytest.approx(126.66666666666667)] * horizon
_, _, y_pred, _ = next(steps)
assert y_pred == [(112 + 118) / 2] * horizon
assert y_pred == [pytest.approx(125.76923076923077)] * horizon
_, _, y_pred, _ = next(steps)
assert y_pred == [(112 + 118 + 132) / 3] * horizon
assert y_pred == [pytest.approx(125.78571428571429)] * horizon
_, _, y_pred, _ = next(steps)
assert y_pred == [(112 + 118 + 132 + 129) / 4] * horizon
assert y_pred == [pytest.approx(126.8)] * horizon

n_steps = sum(
1
for _ in time_series.iter_evaluate(
dataset=dataset, model=model, metric=metric, horizon=horizon, grace_period=grace_period
for _ in evaluate.iter_evaluate(
dataset=dataset, model=model, metric=metric, horizon=horizon
)
)
assert n_steps == dataset.n_samples - horizon - grace_period
assert n_steps == dataset.n_samples - 2 * horizon


def test_time_series_evaluate_deprecated():
dataset = datasets.AirlinePassengers().take(20)
model = MeanForecaster()
metric = metrics.MAE()

with pytest.warns(DeprecationWarning, match="time_series.evaluate"):
time_series.evaluate(dataset=dataset, model=model, metric=metric, horizon=4)
4 changes: 2 additions & 2 deletions river/time_series/test_snarimax.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pytest
import sympy

from river import compose, datasets, metrics, time_series
from river import compose, datasets, evaluate, metrics, time_series
from river.time_series.snarimax import Differencer


Expand Down Expand Up @@ -270,6 +270,6 @@ def get_ordinal_date(x):

model = extract_features | snarimax

time_series.evaluate(
evaluate.evaluate(
dataset=datasets.AirlinePassengers(), model=model, metric=metrics.MAE(), horizon=12
)
Loading