Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
572fd47
Narwhalify_from_group_dataframe
cnhwl Apr 9, 2025
c48b7a2
fix some errors
cnhwl Apr 9, 2025
b0dd83c
fix some errors
cnhwl Apr 11, 2025
79e2ea3
set time_index to nw.Datetime(time_unit="ns")
cnhwl Apr 11, 2025
03f6c3c
Merge branch 'master' into Feat/Narwhalify_from_group_dataframe
cnhwl Apr 16, 2025
5f15cbb
Merge branch 'master' into Feat/Narwhalify_from_group_dataframe
cnhwl Apr 18, 2025
c2dc052
Merge branch 'master' into feat/Narwhalify_from_group_dataframe
dennisbader Apr 28, 2025
7e30d5b
update from_group_dataframe
dennisbader Apr 28, 2025
8dd0283
Merge branch 'master' into Feat/Narwhalify_from_group_dataframe
cnhwl Apr 29, 2025
dc205d6
add polars to from_group_dataframe_tests
dennisbader Apr 29, 2025
0610da2
update changelog
dennisbader Apr 29, 2025
198c321
Merge branch 'feeat/nw_from_group_dataframe' into Feat/Narwhalify_fro…
dennisbader Apr 29, 2025
74c8c05
Merge branch 'master' into Feat/Narwhalify_from_group_dataframe
dennisbader Aug 22, 2025
2fc7964
update changelog
dennisbader Aug 22, 2025
9dafc6f
Merge branch 'master' into Feat/Narwhalify_from_group_dataframe
dennisbader Aug 22, 2025
6e1d8f3
keep old ts
dennisbader Aug 23, 2025
df1a222
improve efficiency
dennisbader Aug 24, 2025
d75b08e
maintain order
dennisbader Aug 24, 2025
6f2b9ba
cleen up tests after maintaining order
dennisbader Aug 24, 2025
408ac03
remove old from_group_dataframe
dennisbader Aug 24, 2025
5e7e60d
Merge branch 'master' into Feat/Narwhalify_from_group_dataframe
cnhwl Aug 31, 2025
21d1860
Update CHANGELOG.md
cnhwl Aug 31, 2025
d67bcb6
set index for pandas df for perfomance boost
dennisbader Sep 1, 2025
f022ebf
remove old from_group_df
dennisbader Sep 2, 2025
a8340cf
improve code coverage
dennisbader Sep 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ but cannot always guarantee backwards compatibility. Changes that may **break co
**Improved**

- 🔴 Added future and static covariates support to `BlockRNNModel`. This improvement required changes to the underlying model architecture which means that saved model instances from older Darts versions cannot be loaded any longer. [#2845](https://github.com/unit8co/darts/pull/2845) by [Gabriel Margaria](https://github.com/Jaco-Pastorius).
- `from_group_dataframe()` now supports creating `TimeSeries` from **additional DataFrame backends** (Polars, PyArrow, ...). We leverage `narwhals` as the compatibility layer between DataFrame libraries. See their [documentation](https://narwhals-dev.github.io/narwhals/) for all supported backends. [#2766](https://github.com/unit8co/darts/pull/2766) by [He Weilin](https://github.com/cnhwl).
- Added `add_regressor_configs` parameter to the `Prophet` model, enabling component-specific control over `prior_scale`, `mode`, and `standardize` for the future covariates. [#2882](https://github.com/unit8co/darts/issues/2882) by [Ramsay Davis](https://github.com/RamsayDavisWL).
- 🔴 Increased the decimal places for quantile component names from 2 to 3 for more precise quantiles. (e.g. `component_name_q0.500` for quantile 0.5). This affects quantile forecasts as well as quantiles computed with `TimeSeries.quantile()`. [#2887](https://github.com/unit8co/darts/pull/2786) by [He Weilin](https://github.com/cnhwl).

Expand Down
13 changes: 13 additions & 0 deletions darts/tests/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,19 @@ def test_pandas_creation(self):
ts_pd_df = ts.to_dataframe(time_as_index=False)
assert ts_pd_df.equals(pd_df.reset_index())

def test_df_time_col_not_present(self):
df = pd.DataFrame({"a": [i for i in range(10)]})
with pytest.raises(AttributeError) as exc:
_ = TimeSeries.from_dataframe(df, time_col="missing_col")
assert str(exc.value) == "time_col='missing_col' is not present."

def test_df_invalid_time_col_dtype(self):
# cannot be float
df = pd.DataFrame({"a": [i for i in range(10)], "times": [1.0] * 10})
with pytest.raises(AttributeError) as exc:
_ = TimeSeries.from_dataframe(df, time_col="times")
assert str(exc.value).startswith("Invalid type of `time_col`")

@pytest.mark.skipif(not POLARS_AVAILABLE, reason="requires polars")
def test_polars_creation(self, caplog):
expected_idx = pl.Series("time", range(10))
Expand Down
121 changes: 104 additions & 17 deletions darts/tests/test_timeseries_static_covariates.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
import copy
import itertools
import logging
import os

import narwhals as nw
import numpy as np
import pandas as pd
import pytest

from darts import TimeSeries, concatenate
from darts.dataprocessing.transformers import BoxCox, Scaler
from darts.tests.conftest import POLARS_AVAILABLE
from darts.timeseries import (
DEFAULT_GLOBAL_STATIC_COV_NAME,
METADATA_TAG,
Expand All @@ -15,6 +19,15 @@
from darts.utils.timeseries_generation import linear_timeseries
from darts.utils.utils import generate_index

TEST_BACKENDS = ["pandas"]

if POLARS_AVAILABLE:
import polars as pl

TEST_BACKENDS.append("polars")
else:
pl = None


def setup_test_case():
n_groups = 5
Expand Down Expand Up @@ -64,6 +77,15 @@ def setup_tag(tag, ts):
class TestTimeSeriesStaticCovariate:
n_groups, len_ts, df_long_uni, df_long_multi = setup_test_case()

@staticmethod
def pd_to_backend(df, backend, index=False):
if backend == "pandas":
return df
elif backend == "polars":
if index:
return pl.from_pandas(df.reset_index())
return pl.from_pandas(df)

@pytest.mark.parametrize("tag", [STATIC_COV_TAG, METADATA_TAG])
def test_ts_from_x(self, tag, tmpdir_module):
ts = linear_timeseries(length=10)
Expand Down Expand Up @@ -119,17 +141,20 @@ def test_invalid_metadata(self):
== "`metadata` must be of type `dict` mapping metadata attributes to their values."
)

@pytest.mark.parametrize("index_type", ["int", "dt", "str"])
def test_from_group_dataframe(self, index_type):
@pytest.mark.parametrize(
"config", itertools.product(["int", "dt", "str"], TEST_BACKENDS)
)
def test_from_group_dataframe(self, config):
"""Tests correct extract of TimeSeries groups from a long DataFrame with unsorted (time/integer) index"""
index_type, backend = config
group = ["a", "a", "a", "b", "b", "b"]
values = np.arange(len(group))

if index_type == "int":
index_expected = pd.RangeIndex(3)
index_expected = pd.RangeIndex(3, name="time")
time = [2, 1, 0, 0, 1, 2]
else:
index_expected = pd.date_range("2024-01-01", periods=3)
index_expected = pd.date_range("2024-01-01", periods=3, name="time")
time = index_expected[::-1].append(index_expected)
if index_type == "str":
time = time.astype(str)
Expand All @@ -140,7 +165,9 @@ def test_from_group_dataframe(self, index_type):
"time": time,
"x": values,
})
ts = TimeSeries.from_group_dataframe(df, group_cols="group", time_col="time")
ts = TimeSeries.from_group_dataframe(
self.pd_to_backend(df, backend), group_cols="group", time_col="time"
)

# check the time index
assert ts[0].time_index.equals(index_expected)
Expand All @@ -150,11 +177,16 @@ def test_from_group_dataframe(self, index_type):
assert (ts[0].values().flatten() == [values[2], values[1], values[0]]).all()
assert (ts[1].values().flatten() == [values[3], values[4], values[5]]).all()

def test_timeseries_from_longitudinal_df(self):
@pytest.mark.parametrize("backend", TEST_BACKENDS)
def test_timeseries_from_longitudinal_df(self, backend):
# univariate static covs: only group by "st1", keep static covs "st1"
value_cols = ["a", "b", "c"]

df_long_uni = self.pd_to_backend(self.df_long_uni, backend)
df_long_multi = self.pd_to_backend(self.df_long_multi, backend)

ts_groups1 = TimeSeries.from_group_dataframe(
df=self.df_long_uni,
df=df_long_uni,
group_cols="st1",
static_cols=None,
time_col="times",
Expand All @@ -173,7 +205,7 @@ def test_timeseries_from_longitudinal_df(self):

# multivariate static covs: only group by "st1", keep static covs "st1", "constant"
ts_groups2 = TimeSeries.from_group_dataframe(
df=self.df_long_multi,
df=df_long_multi,
group_cols=["st1"],
static_cols="constant",
time_col="times",
Expand All @@ -189,7 +221,7 @@ def test_timeseries_from_longitudinal_df(self):

# multivariate static covs: group by "st1" and "st2", keep static covs "st1", "st2", "constant"
ts_groups3 = TimeSeries.from_group_dataframe(
df=self.df_long_multi,
df=df_long_multi,
group_cols=["st1", "st2"],
static_cols=["constant"],
time_col="times",
Expand All @@ -210,7 +242,7 @@ def test_timeseries_from_longitudinal_df(self):
# drop group columns gives same time series with dropped static covariates
# drop first column
ts_groups4 = TimeSeries.from_group_dataframe(
df=self.df_long_multi,
df=df_long_multi,
group_cols=["st1", "st2"],
static_cols=["constant"],
time_col="times",
Expand All @@ -229,7 +261,7 @@ def test_timeseries_from_longitudinal_df(self):

# drop last column
ts_groups5 = TimeSeries.from_group_dataframe(
df=self.df_long_multi,
df=df_long_multi,
group_cols=["st1", "st2"],
static_cols=["constant"],
time_col="times",
Expand All @@ -248,7 +280,7 @@ def test_timeseries_from_longitudinal_df(self):

# drop all columns
ts_groups6 = TimeSeries.from_group_dataframe(
df=self.df_long_multi,
df=df_long_multi,
group_cols=["st1", "st2"],
static_cols=["constant"],
time_col="times",
Expand All @@ -265,7 +297,7 @@ def test_timeseries_from_longitudinal_df(self):

# drop all static covariates (no `static_cols`, all `group_cols` dropped) and no metadata cols
ts_groups7 = TimeSeries.from_group_dataframe(
df=self.df_long_multi,
df=df_long_multi,
group_cols=["st1", "st2"],
time_col="times",
value_cols=value_cols,
Expand All @@ -277,7 +309,7 @@ def test_timeseries_from_longitudinal_df(self):
assert ts.metadata is None

ts_groups7_parallel = TimeSeries.from_group_dataframe(
df=self.df_long_multi,
df=df_long_multi,
group_cols=["st1", "st2"],
time_col="times",
value_cols=value_cols,
Expand All @@ -286,22 +318,25 @@ def test_timeseries_from_longitudinal_df(self):
)
assert ts_groups7_parallel == ts_groups7

def test_from_group_dataframe_invalid_drop_cols(self):
@pytest.mark.parametrize("backend", TEST_BACKENDS)
def test_from_group_dataframe_invalid_drop_cols(self, backend):
# drop col is not part of `group_cols`
with pytest.raises(ValueError) as err:
_ = TimeSeries.from_group_dataframe(
df=self.df_long_multi,
df=self.pd_to_backend(self.df_long_multi, backend),
group_cols=["st1"],
time_col="times",
value_cols="a",
drop_group_cols=["invalid"],
)
assert str(err.value).endswith("received: {'invalid'}.")

def test_from_group_dataframe_groups_too_short(self):
@pytest.mark.parametrize("backend", TEST_BACKENDS)
def test_from_group_dataframe_groups_too_short(self, backend):
# groups that are too short for TimeSeries requirements should raise an error
df = copy.deepcopy(self.df_long_multi)
df.loc[:, "non_static"] = np.arange(len(df))
df = self.pd_to_backend(df, backend)
with pytest.raises(ValueError) as err:
_ = TimeSeries.from_group_dataframe(
df=df,
Expand Down Expand Up @@ -332,6 +367,58 @@ def test_from_group_dataframe_not_unique(self):
assert s_.static_covariates_values()[0, 0] == val
assert s_.metadata == {"st2": val}

@pytest.mark.parametrize("backend", TEST_BACKENDS)
def test_from_group_dataframe_non_pandas_requires_time_col(self, backend):
value_cols = ["a", "b", "c"]
df = copy.deepcopy(self.pd_to_backend(self.df_long_multi, backend))
if backend == "pandas":
df = df.set_index("times")
ts_groups = TimeSeries.from_group_dataframe(
df=df,
group_cols="st1",
static_cols=None,
time_col=None,
value_cols=value_cols,
metadata_cols=["st1", "constant"],
)
assert len(ts_groups) == self.n_groups
for i, ts in enumerate(ts_groups):
assert ts.static_covariates.index.equals(
pd.Index([DEFAULT_GLOBAL_STATIC_COV_NAME])
)
assert ts.static_covariates.shape == (1, 1)
assert ts.static_covariates.columns.equals(pd.Index(["st1"]))
assert (ts.static_covariates_values(copy=False) == [[i]]).all()
assert ts.metadata == {"st1": i, "constant": 1}
else:
with pytest.raises(ValueError) as exc:
_ = TimeSeries.from_group_dataframe(
df=df,
group_cols="st1",
time_col=None,
)
assert (
str(exc.value)
== "`time_col` is required when `df` is not a `pandas.DataFrame`."
)

@pytest.mark.parametrize("backend", TEST_BACKENDS)
def test_from_group_dataframe_warn_on_sorted_index(self, backend, caplog):
df = copy.deepcopy(self.pd_to_backend(self.df_long_multi, backend))
df = nw.from_native(df)
df = df.sort(by="times")
with caplog.at_level(logging.WARNING):
_ = TimeSeries.from_group_dataframe(
df=df,
group_cols="st1",
time_col="times",
)
assert (
"UserWarning: The (time) index from `df` is monotonically increasing."
in caplog.text
)
caplog.clear()

def test_with_static_covariates_univariate(self):
ts = linear_timeseries(length=10)
static_covs_series = pd.Series([0.0, 1.0], index=["st1", "st2"])
Expand Down
Loading