Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 40 additions & 41 deletions .github/workflows/downstream_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -151,48 +151,47 @@ jobs:
cd scikit-lego
pytest -n auto --disable-warnings --cov=sklego -m "not cvxpy and not formulaic and not umap"

# temporarily un-enabled as it's been failing for some time due to unrelated reasons
# shiny:
# strategy:
# matrix:
# python-version: ["3.12"]
# os: [ubuntu-latest]
shiny:
strategy:
matrix:
python-version: ["3.12"]
os: [ubuntu-latest]

# runs-on: ${{ matrix.os }}
# steps:
# - uses: actions/checkout@v4
# - uses: actions/setup-python@v5
# with:
# python-version: ${{ matrix.python-version }}
# - name: Install uv
# uses: astral-sh/setup-uv@v5
# with:
# enable-cache: "true"
# cache-suffix: ${{ matrix.python-version }}
# cache-dependency-glob: "pyproject.toml"
# - name: clone-shiny
# run: |
# git clone https://github.com/posit-dev/py-shiny.git
# cd py-shiny
# git log
# - name: install-basics
# run: uv pip install --upgrade tox virtualenv setuptools --system
# - name: install-shiny-dev
# env:
# UV_SYSTEM_PYTHON: 1
# run: |
# cd py-shiny
# make narwhals-install-shiny
# - name: install-narwhals-dev
# run: |
# uv pip uninstall narwhals --system
# uv pip install -e . --system
# - name: show-deps
# run: uv pip freeze
# - name: Run `make narwhals-test-integration`
# run: |
# cd py-shiny
# make narwhals-test-integration
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install uv
uses: astral-sh/setup-uv@v5
with:
enable-cache: "true"
cache-suffix: ${{ matrix.python-version }}
cache-dependency-glob: "pyproject.toml"
- name: clone-shiny
run: |
git clone https://github.com/posit-dev/py-shiny.git
cd py-shiny
git log
- name: install-basics
run: uv pip install --upgrade tox virtualenv setuptools --system
- name: install-shiny-dev
env:
UV_SYSTEM_PYTHON: 1
run: |
cd py-shiny
make narwhals-install-shiny
- name: install-narwhals-dev
run: |
uv pip uninstall narwhals --system
uv pip install -e . --system
- name: show-deps
run: uv pip freeze
- name: Run `make narwhals-test-integration`
run: |
cd py-shiny
make narwhals-test-integration

tea-tasting:
strategy:
Expand Down
3 changes: 3 additions & 0 deletions docs/backcompat.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,9 @@ before making any change.

The following are differences between the main Narwhals namespace and `narwhals.stable.v1`:

- Since Narwhals 1.24.1, an empty or all-null object-dtype pandas Series is inferred to
be of dtype `String`. Previously, it would have been inferred as `Object`.

- Since Narwhals 1.23:

- Passing an `ibis.Table` to `from_native` returns a `LazyFrame`. In
Expand Down
3 changes: 2 additions & 1 deletion narwhals/_dask/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,9 +202,10 @@ def drop_nulls(self: Self, subset: list[str] | None) -> Self:

@property
def schema(self: Self) -> dict[str, DType]:
native_dtypes = self._native_frame.dtypes
return {
col: native_to_narwhals_dtype(
self._native_frame[col], self._version, self._implementation
native_dtypes[col], self._version, self._implementation
)
for col in self._native_frame.columns
}
Expand Down
6 changes: 4 additions & 2 deletions narwhals/_dask/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ def median(self: Self) -> Self:
from narwhals.exceptions import InvalidOperationError

def func(s: dx.Series) -> dx.Series:
dtype = native_to_narwhals_dtype(s, self._version, Implementation.DASK)
dtype = native_to_narwhals_dtype(s.dtype, self._version, Implementation.DASK)
if not dtype.is_numeric():
msg = "`median` operation not supported for non-numeric input type."
raise InvalidOperationError(msg)
Expand Down Expand Up @@ -553,7 +553,9 @@ def is_null(self: Self) -> Self:

def is_nan(self: Self) -> Self:
def func(_input: dx.Series) -> dx.Series:
dtype = native_to_narwhals_dtype(_input, self._version, self._implementation)
dtype = native_to_narwhals_dtype(
_input.dtype, self._version, self._implementation
)
if dtype.is_numeric():
return _input != _input # noqa: PLR0124
msg = f"`.is_nan` only supported for numeric dtypes and not {dtype}, did you mean `.is_null`?"
Expand Down
4 changes: 2 additions & 2 deletions narwhals/_dask/expr_dt.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def replace_time_zone(self: Self, time_zone: str | None) -> DaskExpr:
def convert_time_zone(self: Self, time_zone: str) -> DaskExpr:
def func(s: dx.Series, time_zone: str) -> dx.Series:
dtype = native_to_narwhals_dtype(
s, self._compliant_expr._version, Implementation.DASK
s.dtype, self._compliant_expr._version, Implementation.DASK
)
if dtype.time_zone is None: # type: ignore[attr-defined]
return s.dt.tz_localize("UTC").dt.tz_convert(time_zone)
Expand All @@ -148,7 +148,7 @@ def func(s: dx.Series, time_zone: str) -> dx.Series:
def timestamp(self: Self, time_unit: Literal["ns", "us", "ms"]) -> DaskExpr:
def func(s: dx.Series, time_unit: Literal["ns", "us", "ms"]) -> dx.Series:
dtype = native_to_narwhals_dtype(
s, self._compliant_expr._version, Implementation.DASK
s.dtype, self._compliant_expr._version, Implementation.DASK
)
is_pyarrow_dtype = "pyarrow" in str(dtype)
mask_na = s.isna()
Expand Down
58 changes: 42 additions & 16 deletions narwhals/_pandas_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,19 @@
from typing import Sequence
from typing import overload

import numpy as np

from narwhals._expression_parsing import evaluate_into_exprs
from narwhals._pandas_like.series import PANDAS_TO_NUMPY_DTYPE_MISSING
from narwhals._pandas_like.series import PandasLikeSeries
from narwhals._pandas_like.utils import broadcast_and_extract_dataframe_comparand
from narwhals._pandas_like.utils import broadcast_series
from narwhals._pandas_like.utils import check_column_names_are_unique
from narwhals._pandas_like.utils import convert_str_slice_to_int_slice
from narwhals._pandas_like.utils import create_compliant_series
from narwhals._pandas_like.utils import horizontal_concat
from narwhals._pandas_like.utils import native_to_narwhals_dtype
from narwhals._pandas_like.utils import object_native_to_narwhals_dtype
from narwhals._pandas_like.utils import pivot_table
from narwhals._pandas_like.utils import rename
from narwhals._pandas_like.utils import select_columns_by_name
Expand All @@ -36,14 +41,12 @@
from pathlib import Path
from types import ModuleType

import numpy as np
import pandas as pd
import polars as pl
from typing_extensions import Self

from narwhals._pandas_like.group_by import PandasLikeGroupBy
from narwhals._pandas_like.namespace import PandasLikeNamespace
from narwhals._pandas_like.series import PandasLikeSeries
from narwhals._pandas_like.typing import IntoPandasLikeExpr
from narwhals.dtypes import DType
from narwhals.typing import SizeUnit
Expand All @@ -52,6 +55,31 @@
from narwhals.typing import CompliantDataFrame
from narwhals.typing import CompliantLazyFrame

CLASSICAL_NUMPY_DTYPES = frozenset(
[
np.dtype("float64"),
np.dtype("float32"),
np.dtype("int64"),
np.dtype("int32"),
np.dtype("int16"),
np.dtype("int8"),
np.dtype("uint64"),
np.dtype("uint32"),
np.dtype("uint16"),
np.dtype("uint8"),
np.dtype("bool"),
np.dtype("datetime64[s]"),
np.dtype("datetime64[ms]"),
np.dtype("datetime64[us]"),
np.dtype("datetime64[ns]"),
np.dtype("timedelta64[s]"),
np.dtype("timedelta64[ms]"),
np.dtype("timedelta64[us]"),
np.dtype("timedelta64[ns]"),
np.dtype("object"),
]
)


class PandasLikeDataFrame(CompliantDataFrame, CompliantLazyFrame):
# --- not in the spec ---
Expand Down Expand Up @@ -120,8 +148,6 @@ def _from_native_frame(
)

def get_column(self: Self, name: str) -> PandasLikeSeries:
from narwhals._pandas_like.series import PandasLikeSeries

return PandasLikeSeries(
self._native_frame[name],
implementation=self._implementation,
Expand Down Expand Up @@ -179,8 +205,6 @@ def __getitem__(
item = tuple(list(i) if is_sequence_but_not_str(i) else i for i in item) # type: ignore[assignment]

if isinstance(item, str):
from narwhals._pandas_like.series import PandasLikeSeries

return PandasLikeSeries(
self._native_frame[item],
implementation=self._implementation,
Expand Down Expand Up @@ -238,8 +262,6 @@ def __getitem__(
raise TypeError(msg) # pragma: no cover

elif isinstance(item, tuple) and len(item) == 2:
from narwhals._pandas_like.series import PandasLikeSeries

if isinstance(item[1], str):
item = (item[0], self._native_frame.columns.get_loc(item[1])) # type: ignore[assignment]
native_series = self._native_frame.iloc[item]
Expand Down Expand Up @@ -344,8 +366,13 @@ def iter_rows(

@property
def schema(self: Self) -> dict[str, DType]:
native_dtypes = self._native_frame.dtypes
return {
col: native_to_narwhals_dtype(
native_dtypes[col], self._version, self._implementation
)
if native_dtypes[col] != "object"
else object_native_to_narwhals_dtype(
self._native_frame[col], self._version, self._implementation
)
for col in self._native_frame.columns
Expand Down Expand Up @@ -820,8 +847,6 @@ def shape(self: Self) -> tuple[int, int]:
return self._native_frame.shape # type: ignore[no-any-return]

def to_dict(self: Self, *, as_series: bool) -> dict[str, Any]:
from narwhals._pandas_like.series import PandasLikeSeries

if as_series:
return {
col: PandasLikeSeries(
Expand All @@ -835,7 +860,12 @@ def to_dict(self: Self, *, as_series: bool) -> dict[str, Any]:
return self._native_frame.to_dict(orient="list") # type: ignore[no-any-return]

def to_numpy(self: Self, dtype: Any = None, copy: bool | None = None) -> np.ndarray:
from narwhals._pandas_like.series import PANDAS_TO_NUMPY_DTYPE_MISSING
native_dtypes = self._native_frame.dtypes
if native_dtypes.isin(CLASSICAL_NUMPY_DTYPES).all():
# Fast path, no conversions necessary.
if dtype is not None:
return self._native_frame.to_numpy(dtype=dtype, copy=copy)
return self._native_frame.to_numpy(copy=copy)

if copy is None:
# pandas default differs from Polars, but cuDF default is True
Expand Down Expand Up @@ -865,7 +895,7 @@ def to_numpy(self: Self, dtype: Any = None, copy: bool | None = None) -> np.ndar
# so we cast each Series to numpy and let numpy find a common dtype.
# If there aren't any dtypes where `to_numpy()` is "broken" (i.e. it
# returns Object) then we just call `to_numpy()` on the DataFrame.
for col_dtype in df.dtypes:
for col_dtype in native_dtypes:
if str(col_dtype) in PANDAS_TO_NUMPY_DTYPE_MISSING:
import numpy as np

Expand Down Expand Up @@ -913,8 +943,6 @@ def write_csv(self: Self, file: str | Path | BytesIO | None) -> str | None:

# --- descriptive ---
def is_duplicated(self: Self) -> PandasLikeSeries:
from narwhals._pandas_like.series import PandasLikeSeries

return PandasLikeSeries(
self._native_frame.duplicated(keep=False),
implementation=self._implementation,
Expand All @@ -926,8 +954,6 @@ def is_empty(self: Self) -> bool:
return self._native_frame.empty # type: ignore[no-any-return]

def is_unique(self: Self) -> PandasLikeSeries:
from narwhals._pandas_like.series import PandasLikeSeries

return PandasLikeSeries(
~self._native_frame.duplicated(keep=False),
implementation=self._implementation,
Expand Down
10 changes: 8 additions & 2 deletions narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from narwhals._pandas_like.utils import narwhals_to_native_dtype
from narwhals._pandas_like.utils import native_series_from_iterable
from narwhals._pandas_like.utils import native_to_narwhals_dtype
from narwhals._pandas_like.utils import object_native_to_narwhals_dtype
from narwhals._pandas_like.utils import rename
from narwhals._pandas_like.utils import select_columns_by_name
from narwhals._pandas_like.utils import set_index
Expand Down Expand Up @@ -179,8 +180,13 @@ def shape(self: Self) -> tuple[int]:

@property
def dtype(self: Self) -> DType:
return native_to_narwhals_dtype(
self._native_series, self._version, self._implementation
native_dtype = self._native_series.dtype
return (
native_to_narwhals_dtype(native_dtype, self._version, self._implementation)
if native_dtype != "object"
else object_native_to_narwhals_dtype(
self._native_series, self._version, self._implementation
)
)

def ewm_mean(
Expand Down
Loading