From d846808f7052734682b2c5598525f2cc719e1dcb Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Sun, 1 Feb 2026 18:52:31 +0100 Subject: [PATCH 01/10] WIP --- narwhals/_arrow/series.py | 41 +++++++++++++++++++------------ narwhals/_compliant/series.py | 6 ++++- narwhals/_pandas_like/series.py | 13 +++++++--- narwhals/_polars/series.py | 8 ++++-- narwhals/series.py | 16 ++++++++---- tests/series_only/scatter_test.py | 10 ++++++++ 6 files changed, 67 insertions(+), 27 deletions(-) diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index 45a6d6b3d2..8b50765bf1 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -1,5 +1,6 @@ from __future__ import annotations +from collections.abc import Sequence from typing import TYPE_CHECKING, Any, Callable, Literal, cast, overload import pyarrow as pa @@ -37,7 +38,7 @@ from narwhals.exceptions import InvalidOperationError, ShapeError if TYPE_CHECKING: - from collections.abc import Iterable, Iterator, Sequence + from collections.abc import Iterable, Iterator from types import ModuleType import pandas as pd @@ -451,23 +452,31 @@ def _gather_slice(self, rows: _SliceIndex | range) -> Self: raise NotImplementedError(msg) return self._with_native(self.native.slice(start, stop - start)) - def scatter(self, indices: int | Sequence[int], values: Any) -> Self: + def scatter( + self, + indices: Self | int | Sequence[int], + values: Self | PythonLiteral | Sequence[PythonLiteral] | None, + ) -> Self: import numpy as np # ignore-banned-import - values_native: ArrayAny - if isinstance(indices, int): - indices_native = pa.array([indices]) - values_native = pa.array([values]) - else: - # TODO(unassigned): we may also want to let `indices` be a Series. - # https://github.com/narwhals-dev/narwhals/issues/2155 - indices_native = pa.array(indices) - if isinstance(values, self.__class__): - values_native = values.native.combine_chunks() - else: - # NOTE: Requires fixes in https://github.com/zen-xu/pyarrow-stubs/pull/209 - pa_array: Incomplete = pa.array - values_native = pa_array(values) + indices_native = ( + pa.array([indices]) + if isinstance(indices, int) + else indices.native.combine_chunks() + if isinstance(indices, self.__class__) + else pa.array(indices) + ) + + # NOTE: Requires fixes in https://github.com/zen-xu/pyarrow-stubs/pull/209 + pa_array: Incomplete = pa.array + + values_native = ( + values.native.combine_chunks() + if isinstance(values, self.__class__) + else pa_array(values) + if isinstance(values, Sequence) + else pa_array([values]) + ) sorting_indices = pc.sort_indices(indices_native) indices_native = indices_native.take(sorting_indices) diff --git a/narwhals/_compliant/series.py b/narwhals/_compliant/series.py index eb7f3ae6ef..4ed8ab5307 100644 --- a/narwhals/_compliant/series.py +++ b/narwhals/_compliant/series.py @@ -162,7 +162,11 @@ def sample( with_replacement: bool, seed: int | None, ) -> Self: ... - def scatter(self, indices: int | Sequence[int], values: Any) -> Self: ... + def scatter( + self, + indices: Self | int | Sequence[int], + values: Self | PythonLiteral | Sequence[PythonLiteral] | None, + ) -> Self: ... def shift(self, n: int) -> Self: ... def skew(self) -> float | None: ... def sort(self, *, descending: bool, nulls_last: bool) -> Self: ... diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index 187a5c6c42..d52b103cca 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -278,15 +278,22 @@ def ewm_mean( result[mask_na] = None return self._with_native(result) - def scatter(self, indices: int | Sequence[int], values: Any) -> Self: + def scatter( + self, + indices: Self | int | Sequence[int], + values: Self | PythonLiteral | Sequence[PythonLiteral] | None, + ) -> Self: + indices_native = ( + indices.native if isinstance(indices, self.__class__) else indices + ) if isinstance(values, self.__class__): values = set_index( values.native, - self.native.index[indices], + self.native.index[indices_native], implementation=self._implementation, ) s = self.native.copy(deep=True) - s.iloc[indices] = values + s.iloc[indices_native] = values s.name = self.name return self._with_native(s) diff --git a/narwhals/_polars/series.py b/narwhals/_polars/series.py index 2ff271fc66..589fbe5ff5 100644 --- a/narwhals/_polars/series.py +++ b/narwhals/_polars/series.py @@ -504,8 +504,12 @@ def sort(self, *, descending: bool, nulls_last: bool) -> Self: return self._with_native(result) - def scatter(self, indices: int | Sequence[int], values: Any) -> Self: - s = self.native.clone().scatter(indices, extract_native(values)) + def scatter( + self, + indices: Self | int | Sequence[int], + values: Self | PythonLiteral | Sequence[PythonLiteral] | None, + ) -> Self: + s = self.native.clone().scatter(extract_native(indices), extract_native(values)) return self._with_native(s) def value_counts( diff --git a/narwhals/series.py b/narwhals/series.py index c3290c6873..f2cc7df467 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -394,12 +394,16 @@ def to_native(self) -> IntoSeriesT: """ return self._compliant_series.native - def scatter(self, indices: int | Sequence[int], values: Any) -> Self: - """Set value(s) at given position(s). + def scatter( + self, + indices: Self | Sequence[int] | int, + values: Self | Sequence[PythonLiteral] | PythonLiteral | None, + ) -> Self: + """Set value(s) at the given index location(s). Arguments: - indices: Position(s) to set items at. - values: Values to set. + indices: Integer(s) representing the index location(s). + values: Replacement values. Note: This method always returns a new Series, without modifying the original one. @@ -436,8 +440,10 @@ def scatter(self, indices: int | Sequence[int], values: Any) -> Self: a: [[999,888,3]] b: [[4,5,6]] """ + compliant_indices = self._extract_native(indices) + compliant_values = self._extract_native(values) return self._with_compliant( - self._compliant_series.scatter(indices, self._extract_native(values)) + self._compliant_series.scatter(compliant_indices, compliant_values) ) @property diff --git a/tests/series_only/scatter_test.py b/tests/series_only/scatter_test.py index 4b827276b9..49dc254457 100644 --- a/tests/series_only/scatter_test.py +++ b/tests/series_only/scatter_test.py @@ -80,3 +80,13 @@ def test_scatter_2862(constructor_eager: ConstructorEager) -> None: result = ser.scatter([2, 0], [999, 888]) expected = {"a": [888, 2, 999]} assert_equal_data({"a": result}, expected) + + +def test_scatter_series_indices(constructor_eager: ConstructorEager) -> None: + df = nw.from_native( + constructor_eager({"a": [1, 2, 3], "idx": [0, 2, 1]}), eager_only=True + ) + indices = df["idx"] + result = df["a"].scatter(indices, [999, 888, 777]) + expected = {"a": [999, 777, 888]} + assert_equal_data({"a": result}, expected) From e75cfe6591753100d682f34f8c038f22d0a2f7a8 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Mon, 2 Feb 2026 19:56:37 +0100 Subject: [PATCH 02/10] simplify via Dan's suggestion --- narwhals/_arrow/series.py | 47 +++++---------------------------- narwhals/_compliant/series.py | 6 +---- narwhals/_pandas_like/series.py | 21 +++++---------- narwhals/_polars/series.py | 6 +---- narwhals/series.py | 18 +++++++++++-- 5 files changed, 31 insertions(+), 67 deletions(-) diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index 8b50765bf1..650217bd3d 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -1,6 +1,5 @@ from __future__ import annotations -from collections.abc import Sequence from typing import TYPE_CHECKING, Any, Callable, Literal, cast, overload import pyarrow as pa @@ -38,7 +37,7 @@ from narwhals.exceptions import InvalidOperationError, ShapeError if TYPE_CHECKING: - from collections.abc import Iterable, Iterator + from collections.abc import Iterable, Iterator, Sequence from types import ModuleType import pandas as pd @@ -452,45 +451,11 @@ def _gather_slice(self, rows: _SliceIndex | range) -> Self: raise NotImplementedError(msg) return self._with_native(self.native.slice(start, stop - start)) - def scatter( - self, - indices: Self | int | Sequence[int], - values: Self | PythonLiteral | Sequence[PythonLiteral] | None, - ) -> Self: - import numpy as np # ignore-banned-import - - indices_native = ( - pa.array([indices]) - if isinstance(indices, int) - else indices.native.combine_chunks() - if isinstance(indices, self.__class__) - else pa.array(indices) - ) - - # NOTE: Requires fixes in https://github.com/zen-xu/pyarrow-stubs/pull/209 - pa_array: Incomplete = pa.array - - values_native = ( - values.native.combine_chunks() - if isinstance(values, self.__class__) - else pa_array(values) - if isinstance(values, Sequence) - else pa_array([values]) - ) - - sorting_indices = pc.sort_indices(indices_native) - indices_native = indices_native.take(sorting_indices) - values_native = values_native.take(sorting_indices) - - mask: _1DArray = np.zeros(self.len(), dtype=bool) - mask[indices_native] = True - # NOTE: Multiple issues - # - Missing `values` type - # - `mask` accepts a `np.ndarray`, but not mentioned in stubs - # - Missing `replacements` type - # - Missing return type - pc_replace_with_mask: Incomplete = pc.replace_with_mask - return self._with_native(pc_replace_with_mask(self.native, mask, values_native)) + def scatter(self, indices: Self, values: Self) -> Self: + mask = pc.is_in(arange(start=0, end=len(self), step=1), indices.native) + sorted_indices = pc.sort_indices(indices.native) + replacements = values.native.take(sorted_indices).combine_chunks() + return self._with_native(pc.replace_with_mask(self.native, mask, replacements)) def to_list(self) -> list[Any]: return self.native.to_pylist() diff --git a/narwhals/_compliant/series.py b/narwhals/_compliant/series.py index 4ed8ab5307..077e6a86ad 100644 --- a/narwhals/_compliant/series.py +++ b/narwhals/_compliant/series.py @@ -162,11 +162,7 @@ def sample( with_replacement: bool, seed: int | None, ) -> Self: ... - def scatter( - self, - indices: Self | int | Sequence[int], - values: Self | PythonLiteral | Sequence[PythonLiteral] | None, - ) -> Self: ... + def scatter(self, indices: Self, values: Self) -> Self: ... def shift(self, n: int) -> Self: ... def skew(self) -> float | None: ... def sort(self, *, descending: bool, nulls_last: bool) -> Self: ... diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index d52b103cca..d3e2060bfb 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -278,22 +278,15 @@ def ewm_mean( result[mask_na] = None return self._with_native(result) - def scatter( - self, - indices: Self | int | Sequence[int], - values: Self | PythonLiteral | Sequence[PythonLiteral] | None, - ) -> Self: - indices_native = ( - indices.native if isinstance(indices, self.__class__) else indices + def scatter(self, indices: Self, values: Self) -> Self: + indices_native = indices.native + values_native = set_index( + values.native, + self.native.index[indices_native], + implementation=self._implementation, ) - if isinstance(values, self.__class__): - values = set_index( - values.native, - self.native.index[indices_native], - implementation=self._implementation, - ) s = self.native.copy(deep=True) - s.iloc[indices_native] = values + s.iloc[indices_native] = values_native s.name = self.name return self._with_native(s) diff --git a/narwhals/_polars/series.py b/narwhals/_polars/series.py index 589fbe5ff5..1c815775dd 100644 --- a/narwhals/_polars/series.py +++ b/narwhals/_polars/series.py @@ -504,11 +504,7 @@ def sort(self, *, descending: bool, nulls_last: bool) -> Self: return self._with_native(result) - def scatter( - self, - indices: Self | int | Sequence[int], - values: Self | PythonLiteral | Sequence[PythonLiteral] | None, - ) -> Self: + def scatter(self, indices: Self, values: Self) -> Self: s = self.native.clone().scatter(extract_native(indices), extract_native(values)) return self._with_native(s) diff --git a/narwhals/series.py b/narwhals/series.py index f2cc7df467..4bb59ee4c8 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -396,8 +396,8 @@ def to_native(self) -> IntoSeriesT: def scatter( self, - indices: Self | Sequence[int] | int, - values: Self | Sequence[PythonLiteral] | PythonLiteral | None, + indices: Self | Iterable[int] | int, + values: Self | Iterable[PythonLiteral] | PythonLiteral | None, ) -> Self: """Set value(s) at the given index location(s). @@ -440,6 +440,20 @@ def scatter( a: [[999,888,3]] b: [[4,5,6]] """ + impl = self.implementation + if not isinstance(indices, Iterable): + indices = Series.from_iterable(name="", values=[indices], backend=impl) + elif not isinstance(indices, Series): + indices = Series.from_iterable(name="", values=indices, backend=impl) + + if indices.is_empty(): + return self + + if isinstance(values, str) or not isinstance(values, Iterable): + values = Series.from_iterable(name="", values=[values], backend=impl) + elif not isinstance(values, Series): + values = Series.from_iterable(name="", values=values, backend=impl) + compliant_indices = self._extract_native(indices) compliant_values = self._extract_native(values) return self._with_compliant( From 1a1d3235ac8f46e6a1fda3828ff3d958f4978641 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Tue, 3 Feb 2026 13:05:36 +0000 Subject: [PATCH 03/10] test: Port updated tests from (#2572) https://github.com/narwhals-dev/narwhals/pull/3444#issuecomment-3837127487 --- tests/series_only/scatter_test.py | 132 +++++++++++++----------------- 1 file changed, 55 insertions(+), 77 deletions(-) diff --git a/tests/series_only/scatter_test.py b/tests/series_only/scatter_test.py index 49dc254457..d608f78f1a 100644 --- a/tests/series_only/scatter_test.py +++ b/tests/series_only/scatter_test.py @@ -1,23 +1,59 @@ from __future__ import annotations +from functools import partial +from typing import TYPE_CHECKING, Any + import pytest import narwhals as nw -from tests.utils import ConstructorEager, assert_equal_data - - -def test_scatter(constructor_eager: ConstructorEager) -> None: - df = nw.from_native( - constructor_eager({"a": [1, 2, 3], "b": [142, 124, 132]}), eager_only=True - ) - result = df.with_columns( - df["a"].scatter([0, 1], [999, 888]), df["b"].scatter([0, 2, 1], df["b"]) - ) - expected = {"a": [999, 888, 3], "b": [142, 132, 124]} - assert_equal_data(result, expected) - - -def test_scatter_indices() -> None: +from tests.utils import ConstructorEager, assert_equal_data, assert_equal_series + +if TYPE_CHECKING: + from collections.abc import Collection + + +def series(frame: ConstructorEager, name: str, values: Collection[Any]) -> nw.Series[Any]: + return nw.from_native(frame({name: values})).get_column(name) + + +@pytest.mark.filterwarnings( + "ignore:.*all arguments of to_dict except for the argument:FutureWarning" +) +@pytest.mark.parametrize( + ("data", "indices", "values", "expected"), + [ + ([142, 124, 13], [0, 2, 1], (142, 124, 13), [142, 13, 124]), + ([1, 2, 3], 0, 999, [999, 2, 3]), + ( + [16, 12, 10, 9, 6, 5, 2], + (6, 1, 0, 5, 3, 2, 4), + [16, 12, 10, 9, 6, 5, 2], + [10, 12, 5, 6, 2, 9, 16], + ), + ([5.5, 9.2, 1.0], (), (), [5.5, 9.2, 1.0]), + ], + ids=["single-series", "integer", "unordered-indices", "empty-indices"], +) +def test_scatter( + data: list[Any], + indices: int | Collection[int], + values: int | Collection[int], + expected: list[Any], + constructor_eager: ConstructorEager, +) -> None: + constructor = partial(series, constructor_eager) + s = constructor("s", data) + df = s.to_frame().with_row_index("dont change me") + unchanged_indexed = df.to_dict(as_series=False) + assert_equal_series(s.scatter(indices, values), expected, "s") + if not isinstance(indices, int): + assert_equal_series(s.scatter(constructor("i", indices), values), expected, "s") + if not isinstance(values, int): + assert_equal_series(s.scatter(indices, constructor("v", values)), expected, "s") + assert_equal_data(df, unchanged_indexed) + + +def test_scatter_pandas_index() -> None: pytest.importorskip("pandas") import pandas as pd @@ -27,66 +63,8 @@ def test_scatter_indices() -> None: pd.testing.assert_series_equal(result.to_native(), expected) -def test_scatter_unchanged(constructor_eager: ConstructorEager) -> None: - df = nw.from_native( - constructor_eager({"a": [1, 2, 3], "b": [142, 124, 132]}), eager_only=True - ) - df.with_columns( - df["a"].scatter([0, 1], [999, 888]), df["b"].scatter([0, 2, 1], [142, 124, 132]) - ) - expected = {"a": [1, 2, 3], "b": [142, 124, 132]} - assert_equal_data(df, expected) - - -def test_single_series(constructor_eager: ConstructorEager) -> None: - df = nw.from_native( - constructor_eager({"a": [1, 2, 3], "b": [142, 124, 132]}), eager_only=True - ) - s = df["a"] - s.scatter([0, 1], [999, 888]) - expected = {"a": [1, 2, 3]} - assert_equal_data({"a": s}, expected) - - -def test_scatter_integer(constructor_eager: ConstructorEager) -> None: - df = nw.from_native( - constructor_eager({"a": [1, 2, 3], "b": [142, 124, 132]}), eager_only=True - ) - s = df["a"] - result = s.scatter(0, 999) - expected = {"a": [999, 2, 3]} - assert_equal_data({"a": result}, expected) - - -def test_scatter_unordered_indices(constructor_eager: ConstructorEager) -> None: - data = {"a": [16, 12, 10, 9, 6, 5, 2]} - indices = [6, 1, 0, 5, 3, 2, 4] - df = nw.from_native(constructor_eager(data)) - result = df["a"].scatter(indices, df["a"]) - assert_equal_data({"a": result}, {"a": [10, 12, 5, 6, 2, 9, 16]}) - - def test_scatter_2862(constructor_eager: ConstructorEager) -> None: - df = nw.from_native( - constructor_eager({"a": [1, 2, 3], "b": [142, 124, 132]}), eager_only=True - ) - ser = df["a"] - result = ser.scatter(1, 999) - expected = {"a": [1, 999, 3]} - assert_equal_data({"a": result}, expected) - result = ser.scatter([0, 2], [999, 888]) - expected = {"a": [999, 2, 888]} - assert_equal_data({"a": result}, expected) - result = ser.scatter([2, 0], [999, 888]) - expected = {"a": [888, 2, 999]} - assert_equal_data({"a": result}, expected) - - -def test_scatter_series_indices(constructor_eager: ConstructorEager) -> None: - df = nw.from_native( - constructor_eager({"a": [1, 2, 3], "idx": [0, 2, 1]}), eager_only=True - ) - indices = df["idx"] - result = df["a"].scatter(indices, [999, 888, 777]) - expected = {"a": [999, 777, 888]} - assert_equal_data({"a": result}, expected) + s = series(constructor_eager, "a", [1, 2, 3]) + assert_equal_series(s.scatter(1, 999), [1, 999, 3], "a") + assert_equal_series(s.scatter([0, 2], [999, 888]), [999, 2, 888], "a") + assert_equal_series(s.scatter([2, 0], [999, 888]), [888, 2, 999], "a") From 8ff3927dd926744a16179c7ac28ff8d90c9252b5 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Tue, 3 Feb 2026 15:36:40 +0100 Subject: [PATCH 04/10] aplly Dan's suggestions --- narwhals/_polars/series.py | 2 +- narwhals/series.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/narwhals/_polars/series.py b/narwhals/_polars/series.py index 1c815775dd..b77bba4eb9 100644 --- a/narwhals/_polars/series.py +++ b/narwhals/_polars/series.py @@ -505,7 +505,7 @@ def sort(self, *, descending: bool, nulls_last: bool) -> Self: return self._with_native(result) def scatter(self, indices: Self, values: Self) -> Self: - s = self.native.clone().scatter(extract_native(indices), extract_native(values)) + s = self.native.clone().scatter(indices.native, values.native) return self._with_native(s) def value_counts( diff --git a/narwhals/series.py b/narwhals/series.py index 4bb59ee4c8..7d192b4798 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -397,7 +397,7 @@ def to_native(self) -> IntoSeriesT: def scatter( self, indices: Self | Iterable[int] | int, - values: Self | Iterable[PythonLiteral] | PythonLiteral | None, + values: Self | Iterable[PythonLiteral] | PythonLiteral, ) -> Self: """Set value(s) at the given index location(s). From 6b23fdce867dbfae5c46b227180c029306d83284 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Thu, 5 Feb 2026 10:40:58 +0100 Subject: [PATCH 05/10] try fix old pandas --- narwhals/_pandas_like/series.py | 11 +++++++---- narwhals/series.py | 9 +++++++-- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index ade789942d..69b0f418ad 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -281,14 +281,17 @@ def ewm_mean( return self._with_native(result) def scatter(self, indices: Self, values: Self) -> Self: + impl = self._implementation indices_native = indices.native values_native = set_index( - values.native, - self.native.index[indices_native], - implementation=self._implementation, + values.native, self.native.index[indices_native], implementation=impl ) s = self.native.copy(deep=True) - s.iloc[indices_native] = values_native + min_pd_version = (1, 2) + if impl.is_pandas() and self._backend_version < min_pd_version: + s.iloc[indices_native.values] = values_native # noqa: PD011 + else: + s.iloc[indices_native] = values_native s.name = self.name return self._with_native(s) diff --git a/narwhals/series.py b/narwhals/series.py index 0e5d8be00d..48ab019835 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -441,10 +441,15 @@ def scatter( b: [[4,5,6]] """ impl = self.implementation + Int64 = self._version.dtypes.Int64() # noqa: N806 if not isinstance(indices, Iterable): - indices = Series.from_iterable(name="", values=[indices], backend=impl) + indices = Series.from_iterable( + name="", values=[indices], dtype=Int64, backend=impl + ) elif not isinstance(indices, Series): - indices = Series.from_iterable(name="", values=indices, backend=impl) + indices = Series.from_iterable( + name="", values=indices, dtype=Int64, backend=impl + ) if indices.is_empty(): return self From b714afba92859780b53983285441cc691aacdaf9 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Thu, 5 Feb 2026 12:42:54 +0100 Subject: [PATCH 06/10] refactor pandas-like scatter and _scatter_in_place --- narwhals/_pandas_like/series.py | 57 ++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index 69b0f418ad..0085965007 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -2,7 +2,7 @@ import operator import warnings -from typing import TYPE_CHECKING, Any, Callable, Literal +from typing import TYPE_CHECKING, Any, Callable, Literal, overload import numpy as np @@ -280,37 +280,44 @@ def ewm_mean( result[mask_na] = None return self._with_native(result) - def scatter(self, indices: Self, values: Self) -> Self: - impl = self._implementation - indices_native = indices.native + @overload + def _scatter( + self, indices: Self, values: Self, *, in_place: Literal[True] + ) -> None: ... + @overload + def _scatter( + self, indices: Self, values: Self, *, in_place: Literal[False] + ) -> Self: ... + + def _scatter(self, indices: Self, values: Self, *, in_place: bool) -> Self | None: + impl, backend_version = self._implementation, self._backend_version + native_series, indices_native = self.native, indices.native values_native = set_index( - values.native, self.native.index[indices_native], implementation=impl + values.native, native_series.index[indices_native], implementation=impl ) - s = self.native.copy(deep=True) + series = native_series if in_place else native_series.copy(deep=True) + + if (is_pandas := impl.is_pandas()) and in_place and parse_version(np) < (2,): + values_native = values_native.copy() # pragma: no cover + min_pd_version = (1, 2) - if impl.is_pandas() and self._backend_version < min_pd_version: - s.iloc[indices_native.values] = values_native # noqa: PD011 + if is_pandas and backend_version < min_pd_version: + series.iloc[indices_native.values] = values_native # noqa: PD011 else: - s.iloc[indices_native] = values_native - s.name = self.name - return self._with_native(s) + series.iloc[indices_native] = values_native + + if in_place: + return None + + series.name = self.name + return self._with_native(series) + + def scatter(self, indices: Self, values: Self) -> Self: + return self._scatter(indices=indices, values=values, in_place=False) def _scatter_in_place(self, indices: Self, values: Self) -> None: # Scatter, modifying original Series. Use with care! - implementation = self._implementation - backend_version = self._backend_version - values_native = set_index( - values.native, - self.native.index[indices.native], - implementation=implementation, - ) - if implementation is Implementation.PANDAS and parse_version(np) < (2,): - values_native = values_native.copy() # pragma: no cover - min_pd_version = (1, 2) - if implementation is Implementation.PANDAS and backend_version < min_pd_version: - self.native.iloc[indices.native.values] = values_native # noqa: PD011 - else: - self.native.iloc[indices.native] = values_native + return self._scatter(indices=indices, values=values, in_place=True) def cast(self, dtype: IntoDType) -> Self: if self.dtype == dtype and self.native.dtype != "object": From 509ff1c9c6ddeee9a128b80905b432af1036aa9c Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Thu, 5 Feb 2026 12:45:45 +0100 Subject: [PATCH 07/10] rm _scatter_in_place in favor of _scatter(..., in_place=True) --- narwhals/_pandas_like/expr.py | 4 ++-- narwhals/_pandas_like/series.py | 4 ---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index 40f7560552..f2e162227e 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -238,7 +238,7 @@ def func(df: PandasLikeDataFrame) -> Sequence[PandasLikeSeries]: sorting_indices = df.get_column(token) for s in results: - s._scatter_in_place(sorting_indices, s) + s._scatter(sorting_indices, s, in_place=True) return results return self.__class__( @@ -384,7 +384,7 @@ def func(df: PandasLikeDataFrame) -> Sequence[PandasLikeSeries]: # noqa: C901, # Ignore settingwithcopy warnings/errors, they're false-positives here. warnings.filterwarnings("ignore", message="\n.*copy of a slice") for s in results: - s._scatter_in_place(sorting_indices, s) + s._scatter(sorting_indices, s, in_place=True) return results if reverse: return [s._gather_slice(slice(None, None, -1)) for s in results] diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index 0085965007..28d40a8f99 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -315,10 +315,6 @@ def _scatter(self, indices: Self, values: Self, *, in_place: bool) -> Self | Non def scatter(self, indices: Self, values: Self) -> Self: return self._scatter(indices=indices, values=values, in_place=False) - def _scatter_in_place(self, indices: Self, values: Self) -> None: - # Scatter, modifying original Series. Use with care! - return self._scatter(indices=indices, values=values, in_place=True) - def cast(self, dtype: IntoDType) -> Self: if self.dtype == dtype and self.native.dtype != "object": # Avoid dealing with pandas' type-system if we can. Note that it's only From 2d19bff25a0ccff4a89ae5e5c1ee467fbae57ac4 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Sat, 7 Feb 2026 15:58:39 +0100 Subject: [PATCH 08/10] simplify top level and pandas implementations --- narwhals/_pandas_like/series.py | 24 +++++++++------------- narwhals/_pandas_like/utils.py | 9 +++++++++ narwhals/series.py | 35 +++++++++++++++------------------ 3 files changed, 34 insertions(+), 34 deletions(-) diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index 28d40a8f99..0a8ea48b0b 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -4,8 +4,6 @@ import warnings from typing import TYPE_CHECKING, Any, Callable, Literal, overload -import numpy as np - from narwhals._compliant import EagerSeries, EagerSeriesHist from narwhals._pandas_like.series_cat import PandasLikeSeriesCatNamespace from narwhals._pandas_like.series_dt import PandasLikeSeriesDateTimeNamespace @@ -13,6 +11,7 @@ from narwhals._pandas_like.series_str import PandasLikeSeriesStringNamespace from narwhals._pandas_like.series_struct import PandasLikeSeriesStructNamespace from narwhals._pandas_like.utils import ( + NUMPY_VERSION, align_and_extract_native, broadcast_series_to_index, get_dtype_backend, @@ -25,7 +24,7 @@ set_index, ) from narwhals._typing_compat import assert_never -from narwhals._utils import Implementation, is_list_of, no_default, parse_version +from narwhals._utils import Implementation, is_list_of, no_default from narwhals.dependencies import is_numpy_array_1d, is_pandas_like_series from narwhals.exceptions import InvalidOperationError @@ -297,20 +296,15 @@ def _scatter(self, indices: Self, values: Self, *, in_place: bool) -> Self | Non ) series = native_series if in_place else native_series.copy(deep=True) - if (is_pandas := impl.is_pandas()) and in_place and parse_version(np) < (2,): - values_native = values_native.copy() # pragma: no cover - - min_pd_version = (1, 2) - if is_pandas and backend_version < min_pd_version: - series.iloc[indices_native.values] = values_native # noqa: PD011 - else: - series.iloc[indices_native] = values_native + if impl.is_pandas(): + if in_place and NUMPY_VERSION < (2,): # pragma: no cover + values_native = values_native.copy() + if backend_version < (1, 2): + indices_native = indices_native.array - if in_place: - return None + series.iloc[indices_native] = values_native - series.name = self.name - return self._with_native(series) + return None if in_place else self._with_native(series) def scatter(self, indices: Self, values: Self) -> Self: return self._scatter(indices=indices, values=values, in_place=False) diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index eee0833763..8d8476704a 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -5,6 +5,7 @@ import re from typing import TYPE_CHECKING, Any, Callable, Literal, TypeVar, cast +import numpy as np import pandas as pd from narwhals._compliant import EagerSeriesNamespace @@ -23,6 +24,7 @@ _DeferredIterable, check_columns_exist, isinstance_or_issubclass, + parse_version, requires, ) from narwhals.exceptions import ShapeError @@ -114,6 +116,13 @@ Always available if we reached here, due to a module-level import. """ +NUMPY_VERSION = parse_version(np) +"""Static version for `numpy`. + +Always available if we reached here, as imported in both _pandas_like/dataframe.py and +_pandas_like/series.py. +""" + def is_pandas_or_modin(implementation: Implementation) -> bool: return implementation in {Implementation.PANDAS, Implementation.MODIN} diff --git a/narwhals/series.py b/narwhals/series.py index 48ab019835..48cd69f10a 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -2,6 +2,7 @@ import math from collections.abc import Iterable, Iterator, Mapping, Sequence +from functools import partial from typing import ( TYPE_CHECKING, Any, @@ -440,30 +441,26 @@ def scatter( a: [[999,888,3]] b: [[4,5,6]] """ - impl = self.implementation - Int64 = self._version.dtypes.Int64() # noqa: N806 - if not isinstance(indices, Iterable): - indices = Series.from_iterable( - name="", values=[indices], dtype=Int64, backend=impl - ) - elif not isinstance(indices, Series): - indices = Series.from_iterable( - name="", values=indices, dtype=Int64, backend=impl - ) + into_series = partial( + type(self).from_iterable, name="", backend=self.implementation + ) + + if not isinstance(indices, Series): + if not isinstance(indices, Iterable): + indices = [indices] + dtypes = self._version.dtypes + indices = into_series(values=indices, dtype=dtypes.Int64) if indices.is_empty(): return self - if isinstance(values, str) or not isinstance(values, Iterable): - values = Series.from_iterable(name="", values=[values], backend=impl) - elif not isinstance(values, Series): - values = Series.from_iterable(name="", values=values, backend=impl) + if not isinstance(values, Series): + if not isinstance(values, Iterable) or isinstance(values, str): + values = [values] + values = into_series(values=values) - compliant_indices = self._extract_native(indices) - compliant_values = self._extract_native(values) - return self._with_compliant( - self._compliant_series.scatter(compliant_indices, compliant_values) - ) + result = self._compliant.scatter(indices._compliant, values._compliant) + return self._with_compliant(result) @property def shape(self) -> tuple[int]: From 5914403669eb78aac74c1a6d663d534681ab3bb0 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Sat, 7 Feb 2026 16:02:27 +0100 Subject: [PATCH 09/10] use to_numpy instead --- narwhals/_pandas_like/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index 0a8ea48b0b..fe7defa907 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -300,7 +300,7 @@ def _scatter(self, indices: Self, values: Self, *, in_place: bool) -> Self | Non if in_place and NUMPY_VERSION < (2,): # pragma: no cover values_native = values_native.copy() if backend_version < (1, 2): - indices_native = indices_native.array + indices_native = indices_native.to_numpy() series.iloc[indices_native] = values_native From 4c6dcd377b44aca3aa08073a3081a6b9194c4f78 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Thu, 19 Feb 2026 11:55:49 +0100 Subject: [PATCH 10/10] apply Dan's suggestion for pandas --- narwhals/_pandas_like/expr.py | 4 ++-- narwhals/_pandas_like/series.py | 19 ++++++++++--------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index f2e162227e..944a136b11 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -238,7 +238,7 @@ def func(df: PandasLikeDataFrame) -> Sequence[PandasLikeSeries]: sorting_indices = df.get_column(token) for s in results: - s._scatter(sorting_indices, s, in_place=True) + s.scatter(sorting_indices, s, in_place=True) return results return self.__class__( @@ -384,7 +384,7 @@ def func(df: PandasLikeDataFrame) -> Sequence[PandasLikeSeries]: # noqa: C901, # Ignore settingwithcopy warnings/errors, they're false-positives here. warnings.filterwarnings("ignore", message="\n.*copy of a slice") for s in results: - s._scatter(sorting_indices, s, in_place=True) + s.scatter(sorting_indices, s, in_place=True) return results if reverse: return [s._gather_slice(slice(None, None, -1)) for s in results] diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index fe7defa907..f0dc6c4e48 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -280,16 +280,20 @@ def ewm_mean( return self._with_native(result) @overload - def _scatter( + def scatter( self, indices: Self, values: Self, *, in_place: Literal[True] ) -> None: ... @overload - def _scatter( - self, indices: Self, values: Self, *, in_place: Literal[False] + def scatter( + self, indices: Self, values: Self, *, in_place: Literal[False] = False ) -> Self: ... - def _scatter(self, indices: Self, values: Self, *, in_place: bool) -> Self | None: - impl, backend_version = self._implementation, self._backend_version + def scatter( + self, indices: Self, values: Self, *, in_place: bool = False + ) -> Self | None: + # !NOTE: See conversation at https://github.com/narwhals-dev/narwhals/pull/3444#discussion_r2787546529 + # to understand why signature differs from `CompliantSeries` + impl = self._implementation native_series, indices_native = self.native, indices.native values_native = set_index( values.native, native_series.index[indices_native], implementation=impl @@ -299,16 +303,13 @@ def _scatter(self, indices: Self, values: Self, *, in_place: bool) -> Self | Non if impl.is_pandas(): if in_place and NUMPY_VERSION < (2,): # pragma: no cover values_native = values_native.copy() - if backend_version < (1, 2): + if self._backend_version < (1, 2): indices_native = indices_native.to_numpy() series.iloc[indices_native] = values_native return None if in_place else self._with_native(series) - def scatter(self, indices: Self, values: Self) -> Self: - return self._scatter(indices=indices, values=values, in_place=False) - def cast(self, dtype: IntoDType) -> Self: if self.dtype == dtype and self.native.dtype != "object": # Avoid dealing with pandas' type-system if we can. Note that it's only