diff --git a/narwhals/_compliant/dataframe.py b/narwhals/_compliant/dataframe.py index 1cd8023a32..485e5f4e6a 100644 --- a/narwhals/_compliant/dataframe.py +++ b/narwhals/_compliant/dataframe.py @@ -11,7 +11,9 @@ CompliantSeriesT, EagerExprT, EagerSeriesT, + NativeDataFrameT, NativeFrameT, + NativeLazyFrameT, NativeSeriesT, ) from narwhals._translate import ( @@ -261,12 +263,12 @@ def write_parquet(self, file: str | Path | BytesIO) -> None: ... class CompliantLazyFrame( - _StoresNative[NativeFrameT], - FromNative[NativeFrameT], + _StoresNative[NativeLazyFrameT], + FromNative[NativeLazyFrameT], ToNarwhals[ToNarwhalsT_co], - Protocol[CompliantExprT_contra, NativeFrameT, ToNarwhalsT_co], + Protocol[CompliantExprT_contra, NativeLazyFrameT, ToNarwhalsT_co], ): - _native_frame: NativeFrameT + _native_frame: NativeLazyFrameT _implementation: Implementation _version: Version @@ -274,7 +276,9 @@ def __narwhals_lazyframe__(self) -> Self: ... def __narwhals_namespace__(self) -> Any: ... @classmethod - def from_native(cls, data: NativeFrameT, /, *, context: _LimitedContext) -> Self: ... + def from_native( + cls, data: NativeLazyFrameT, /, *, context: _LimitedContext + ) -> Self: ... def simple_select(self, *column_names: str) -> Self: """`select` where all args are column names.""" @@ -290,7 +294,7 @@ def aggregate(self, *exprs: CompliantExprT_contra) -> Self: def _with_version(self, version: Version) -> Self: ... @property - def native(self) -> NativeFrameT: + def native(self) -> NativeLazyFrameT: return self._native_frame @property @@ -354,10 +358,12 @@ def with_row_index(self, name: str, order_by: Sequence[str]) -> Self: ... class EagerDataFrame( - CompliantDataFrame[EagerSeriesT, EagerExprT, NativeFrameT, "DataFrame[NativeFrameT]"], - CompliantLazyFrame[EagerExprT, NativeFrameT, "DataFrame[NativeFrameT]"], + CompliantDataFrame[ + EagerSeriesT, EagerExprT, NativeDataFrameT, "DataFrame[NativeDataFrameT]" + ], + CompliantLazyFrame[EagerExprT, "Incomplete", "DataFrame[NativeDataFrameT]"], ValidateBackendVersion, - Protocol[EagerSeriesT, EagerExprT, NativeFrameT, NativeSeriesT], + Protocol[EagerSeriesT, EagerExprT, NativeDataFrameT, NativeSeriesT], ): @property def _backend_version(self) -> tuple[int, ...]: @@ -365,13 +371,15 @@ def _backend_version(self) -> tuple[int, ...]: def __narwhals_namespace__( self, - ) -> EagerNamespace[Self, EagerSeriesT, EagerExprT, NativeFrameT, NativeSeriesT]: ... + ) -> EagerNamespace[ + Self, EagerSeriesT, EagerExprT, NativeDataFrameT, NativeSeriesT + ]: ... - def to_narwhals(self) -> DataFrame[NativeFrameT]: + def to_narwhals(self) -> DataFrame[NativeDataFrameT]: return self._version.dataframe(self, level="full") def _with_native( - self, df: NativeFrameT, *, validate_column_names: bool = True + self, df: NativeDataFrameT, *, validate_column_names: bool = True ) -> Self: ... def _check_columns_exist(self, subset: Sequence[str]) -> ColumnNotFoundError | None: diff --git a/narwhals/_compliant/typing.py b/narwhals/_compliant/typing.py index 0fb3c301e9..dbd6f9f7a8 100644 --- a/narwhals/_compliant/typing.py +++ b/narwhals/_compliant/typing.py @@ -23,6 +23,8 @@ from narwhals._compliant.window import WindowInputs from narwhals.typing import ( FillNullStrategy, + IntoLazyFrame, + NativeDataFrame, NativeFrame, NativeSeries, RankMethod, @@ -90,6 +92,8 @@ class ScalarKwargs(TypedDict, total=False): NativeSeriesT_contra = TypeVar( "NativeSeriesT_contra", bound="NativeSeries", contravariant=True ) +NativeDataFrameT = TypeVar("NativeDataFrameT", bound="NativeDataFrame") +NativeLazyFrameT = TypeVar("NativeLazyFrameT", bound="IntoLazyFrame") NativeFrameT = TypeVar("NativeFrameT", bound="NativeFrame") NativeFrameT_co = TypeVar("NativeFrameT_co", bound="NativeFrame", covariant=True) NativeFrameT_contra = TypeVar( diff --git a/narwhals/_interchange/dataframe.py b/narwhals/_interchange/dataframe.py index a222c9094e..a07f1fad4c 100644 --- a/narwhals/_interchange/dataframe.py +++ b/narwhals/_interchange/dataframe.py @@ -8,11 +8,11 @@ if TYPE_CHECKING: import pandas as pd import pyarrow as pa - from typing_extensions import Self + from typing_extensions import Self, TypeIs from narwhals._interchange.series import InterchangeSeries from narwhals.dtypes import DType - from narwhals.typing import DataFrameLike + from narwhals.stable.v1.typing import DataFrameLike class DtypeKind(enum.IntEnum): @@ -153,3 +153,7 @@ def select(self, *exprs: str) -> Self: # pragma: no cover "at https://github.com/narwhals-dev/narwhals/issues." ) raise NotImplementedError(msg) + + +def supports_dataframe_interchange(obj: Any) -> TypeIs[DataFrameLike]: + return hasattr(obj, "__dataframe__") diff --git a/narwhals/_namespace.py b/narwhals/_namespace.py index 4f937eb7ef..efe2a2bcdf 100644 --- a/narwhals/_namespace.py +++ b/narwhals/_namespace.py @@ -61,7 +61,7 @@ Polars, SparkLike, ) - from narwhals.typing import DataFrameLike, NativeFrame, NativeLazyFrame, NativeSeries + from narwhals.typing import NativeDataFrame, NativeLazyFrame, NativeSeries T = TypeVar("T") @@ -86,7 +86,7 @@ def rename(self, *args: Any, inplace: Literal[False], **kwds: Any) -> Self: """`inplace=False` is required to avoid (incorrect?) default overloads.""" ... - class _BasePandasLikeFrame(NativeFrame, _BasePandasLike, Protocol): ... + class _BasePandasLikeFrame(NativeDataFrame, _BasePandasLike, Protocol): ... class _BasePandasLikeSeries(NativeSeries, _BasePandasLike, Protocol): def where(self, cond: Any, other: Any = ..., **kwds: Any) -> Any: ... @@ -131,9 +131,7 @@ class _ModinSeries(_BasePandasLikeSeries, Protocol): ) NativeKnown: TypeAlias = "_NativePolars | _NativeArrow | _NativePandasLike | _NativeSparkLike | _NativeDuckDB | _NativeDask | _NativeIbis" - NativeUnknown: TypeAlias = ( - "NativeFrame | NativeSeries | NativeLazyFrame | DataFrameLike" - ) + NativeUnknown: TypeAlias = "NativeDataFrame | NativeSeries | NativeLazyFrame" NativeAny: TypeAlias = "NativeKnown | NativeUnknown" __all__ = ["Namespace"] diff --git a/narwhals/_sql/dataframe.py b/narwhals/_sql/dataframe.py index 540ed7d0d8..356a77373f 100644 --- a/narwhals/_sql/dataframe.py +++ b/narwhals/_sql/dataframe.py @@ -3,7 +3,11 @@ from typing import TYPE_CHECKING, Any, Protocol from narwhals._compliant.dataframe import CompliantLazyFrame -from narwhals._compliant.typing import CompliantExprT_contra, NativeExprT, NativeFrameT +from narwhals._compliant.typing import ( + CompliantExprT_contra, + NativeExprT, + NativeLazyFrameT, +) from narwhals._translate import ToNarwhalsT_co from narwhals._utils import check_columns_exist @@ -20,8 +24,8 @@ class SQLLazyFrame( - CompliantLazyFrame[CompliantExprT_contra, NativeFrameT, ToNarwhalsT_co], - Protocol[CompliantExprT_contra, NativeFrameT, ToNarwhalsT_co], + CompliantLazyFrame[CompliantExprT_contra, NativeLazyFrameT, ToNarwhalsT_co], + Protocol[CompliantExprT_contra, NativeLazyFrameT, ToNarwhalsT_co], ): def _evaluate_window_expr( self, diff --git a/narwhals/_utils.py b/narwhals/_utils.py index 8073f6a886..0d7aa2d13d 100644 --- a/narwhals/_utils.py +++ b/narwhals/_utils.py @@ -73,7 +73,7 @@ NativeFrameT_co, NativeSeriesT_co, ) - from narwhals._compliant.typing import EvalNames + from narwhals._compliant.typing import EvalNames, NativeLazyFrameT from narwhals._namespace import Namespace from narwhals._translate import ArrowStreamExportable, IntoArrowTable, ToNarwhalsT_co from narwhals._typing import ( @@ -90,7 +90,6 @@ CompliantDataFrame, CompliantLazyFrame, CompliantSeries, - DataFrameLike, DTypes, IntoSeriesT, MultiIndexSelector, @@ -1623,8 +1622,8 @@ def is_compliant_dataframe( def is_compliant_lazyframe( - obj: CompliantLazyFrame[CompliantExprT, NativeFrameT_co, ToNarwhalsT_co] | Any, -) -> TypeIs[CompliantLazyFrame[CompliantExprT, NativeFrameT_co, ToNarwhalsT_co]]: + obj: CompliantLazyFrame[CompliantExprT, NativeLazyFrameT, ToNarwhalsT_co] | Any, +) -> TypeIs[CompliantLazyFrame[CompliantExprT, NativeLazyFrameT, ToNarwhalsT_co]]: return _hasattr_static(obj, "__narwhals_lazyframe__") @@ -1676,10 +1675,6 @@ def has_native_namespace(obj: Any) -> TypeIs[SupportsNativeNamespace]: return _hasattr_static(obj, "__native_namespace__") -def _supports_dataframe_interchange(obj: Any) -> TypeIs[DataFrameLike]: - return hasattr(obj, "__dataframe__") - - def supports_arrow_c_stream(obj: Any) -> TypeIs[ArrowStreamExportable]: return _hasattr_static(obj, "__arrow_c_stream__") diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 19def52e03..11f215de3f 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -79,6 +79,7 @@ IntoDataFrame, IntoExpr, IntoFrame, + IntoLazyFrame, IntoSchema, JoinStrategy, LazyUniqueKeepStrategy, @@ -95,7 +96,7 @@ PS = ParamSpec("PS") _FrameT = TypeVar("_FrameT", bound="IntoFrame") -FrameT = TypeVar("FrameT", bound="IntoFrame") +LazyFrameT = TypeVar("LazyFrameT", bound="IntoLazyFrame") DataFrameT = TypeVar("DataFrameT", bound="IntoDataFrame") R = TypeVar("R") @@ -469,8 +470,7 @@ def _lazyframe(self) -> type[LazyFrame[Any]]: def __init__(self, df: Any, *, level: Literal["full", "lazy", "interchange"]) -> None: self._level: Literal["full", "lazy", "interchange"] = level - # NOTE: Interchange support (`DataFrameLike`) is the source of the error - self._compliant_frame: CompliantDataFrame[Any, Any, DataFrameT, Self] # type: ignore[type-var] + self._compliant_frame: CompliantDataFrame[Any, Any, DataFrameT, Self] if is_compliant_dataframe(df): self._compliant_frame = df.__narwhals_dataframe__() else: # pragma: no cover @@ -2219,7 +2219,7 @@ def explode(self, columns: str | Sequence[str], *more_columns: str) -> Self: return super().explode(columns, *more_columns) -class LazyFrame(BaseFrame[FrameT]): +class LazyFrame(BaseFrame[LazyFrameT]): """Narwhals LazyFrame, backed by a native lazyframe. Warning: @@ -2285,7 +2285,7 @@ def _dataframe(self) -> type[DataFrame[Any]]: def __init__(self, df: Any, *, level: Literal["full", "lazy", "interchange"]) -> None: self._level = level - self._compliant_frame: CompliantLazyFrame[Any, FrameT, Self] # type: ignore[type-var] + self._compliant_frame: CompliantLazyFrame[Any, LazyFrameT, Self] if is_compliant_lazyframe(df): self._compliant_frame = df.__narwhals_lazyframe__() else: # pragma: no cover @@ -2384,7 +2384,7 @@ def collect( msg = f"Unsupported `backend` value.\nExpected one of {get_args(_LazyFrameCollectImpl)} or None, got: {eager_backend}." raise ValueError(msg) - def to_native(self) -> FrameT: + def to_native(self) -> LazyFrameT: """Convert Narwhals LazyFrame to native one. Examples: diff --git a/narwhals/functions.py b/narwhals/functions.py index 0a1076b6ac..43fce8c536 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -51,7 +51,7 @@ IntoDType, IntoExpr, IntoSchema, - NativeFrame, + NativeDataFrame, NativeLazyFrame, NativeSeries, NonNestedLiteral, @@ -301,7 +301,9 @@ def from_dict( try: # implementation is UNKNOWN, Narwhals extension using this feature should # implement `from_dict` function in the top-level namespace. - native_frame: NativeFrame = _native_namespace.from_dict(data, schema=schema) + native_frame: NativeDataFrame = _native_namespace.from_dict( + data, schema=schema + ) except AttributeError as e: msg = "Unknown namespace is expected to implement `from_dict` function." raise AttributeError(msg) from e @@ -397,7 +399,9 @@ def from_numpy( try: # implementation is UNKNOWN, Narwhals extension using this feature should # implement `from_numpy` function in the top-level namespace. - native_frame: NativeFrame = _native_namespace.from_numpy(data, schema=schema) + native_frame: NativeDataFrame = _native_namespace.from_numpy( + data, schema=schema + ) except AttributeError as e: msg = "Unknown namespace is expected to implement `from_numpy` function." raise AttributeError(msg) from e @@ -470,7 +474,7 @@ def from_arrow( try: # implementation is UNKNOWN, Narwhals extension using this feature should # implement PyCapsule support - native: NativeFrame = _native_namespace.DataFrame(native_frame) + native: NativeDataFrame = _native_namespace.DataFrame(native_frame) except AttributeError as e: msg = "Unknown namespace is expected to implement `DataFrame` class which accepts object which supports PyCapsule Interface." raise AttributeError(msg) from e @@ -594,7 +598,7 @@ def read_csv( """ impl = Implementation.from_backend(backend) native_namespace = impl.to_native_namespace() - native_frame: NativeFrame + native_frame: NativeDataFrame if impl in { Implementation.POLARS, Implementation.PANDAS, @@ -670,7 +674,7 @@ def scan_csv( """ implementation = Implementation.from_backend(backend) native_namespace = implementation.to_native_namespace() - native_frame: NativeFrame | NativeLazyFrame + native_frame: NativeDataFrame | NativeLazyFrame if implementation is Implementation.POLARS: native_frame = native_namespace.scan_csv(source, **kwargs) elif implementation in { @@ -750,7 +754,7 @@ def read_parquet( """ impl = Implementation.from_backend(backend) native_namespace = impl.to_native_namespace() - native_frame: NativeFrame + native_frame: NativeDataFrame if impl in { Implementation.POLARS, Implementation.PANDAS, @@ -853,7 +857,7 @@ def scan_parquet( """ implementation = Implementation.from_backend(backend) native_namespace = implementation.to_native_namespace() - native_frame: NativeFrame | NativeLazyFrame + native_frame: NativeDataFrame | NativeLazyFrame if implementation is Implementation.POLARS: native_frame = native_namespace.scan_parquet(source, **kwargs) elif implementation in { diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index 0e70fe2f9b..6d3fe30367 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -59,8 +59,8 @@ UInt128, Unknown, ) +from narwhals.stable.v1.typing import IntoDataFrameT, IntoLazyFrameT from narwhals.translate import _from_native_impl, get_native_namespace, to_py_scalar -from narwhals.typing import IntoDataFrameT, IntoFrameT if TYPE_CHECKING: from collections.abc import Iterable, Mapping, Sequence @@ -86,7 +86,6 @@ IntoDType, IntoExpr, IntoFrame, - IntoLazyFrameT, IntoSeries, NonNestedLiteral, SingleColSelector, @@ -105,7 +104,8 @@ IntoSeriesT = TypeVar("IntoSeriesT", bound="IntoSeries", default=Any) -class DataFrame(NwDataFrame[IntoDataFrameT]): +# NOTE legit +class DataFrame(NwDataFrame[IntoDataFrameT]): # type: ignore[type-var] _version = Version.V1 @inherit_doc(NwDataFrame) @@ -223,7 +223,7 @@ def _l1_norm(self) -> Self: return self.select(all()._l1_norm()) -class LazyFrame(NwLazyFrame[IntoFrameT]): +class LazyFrame(NwLazyFrame[IntoLazyFrameT]): @inherit_doc(NwLazyFrame) def __init__(self, df: Any, *, level: Literal["full", "lazy", "interchange"]) -> None: assert df._version is Version.V1 # noqa: S101 @@ -474,9 +474,9 @@ def __init__( @overload -def _stableify(obj: NwDataFrame[IntoFrameT]) -> DataFrame[IntoFrameT]: ... +def _stableify(obj: NwDataFrame[IntoDataFrameT]) -> DataFrame[IntoDataFrameT]: ... # type: ignore[type-var] @overload -def _stableify(obj: NwLazyFrame[IntoFrameT]) -> LazyFrame[IntoFrameT]: ... +def _stableify(obj: NwLazyFrame[IntoLazyFrameT]) -> LazyFrame[IntoLazyFrameT]: ... @overload def _stableify(obj: NwSeries[IntoSeriesT]) -> Series[IntoSeriesT]: ... @overload @@ -484,11 +484,11 @@ def _stableify(obj: NwExpr) -> Expr: ... def _stableify( - obj: NwDataFrame[IntoFrameT] - | NwLazyFrame[IntoFrameT] + obj: NwDataFrame[IntoDataFrameT] # type: ignore[type-var] + | NwLazyFrame[IntoLazyFrameT] | NwSeries[IntoSeriesT] | NwExpr, -) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT] | Series[IntoSeriesT] | Expr: +) -> DataFrame[IntoDataFrameT] | LazyFrame[IntoLazyFrameT] | Series[IntoSeriesT] | Expr: if isinstance(obj, NwDataFrame): return DataFrame(obj._compliant_frame._with_version(Version.V1), level=obj._level) if isinstance(obj, NwLazyFrame): @@ -592,14 +592,14 @@ def from_native( @overload def from_native( - native_object: IntoFrameT | IntoSeriesT, + native_object: IntoDataFrameT | IntoLazyFrameT | IntoSeriesT, *, strict: Literal[False], eager_only: Literal[False] = ..., eager_or_interchange_only: Literal[False] = ..., series_only: Literal[False] = ..., allow_series: Literal[True], -) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT] | Series[IntoSeriesT]: ... +) -> DataFrame[IntoDataFrameT] | LazyFrame[IntoLazyFrameT] | Series[IntoSeriesT]: ... @overload @@ -616,14 +616,26 @@ def from_native( @overload def from_native( - native_object: IntoFrameT, + native_object: IntoDataFrameT, + *, + strict: Literal[False], + eager_only: Literal[False] = ..., + eager_or_interchange_only: Literal[False] = ..., + series_only: Literal[False] = ..., + allow_series: None = ..., +) -> DataFrame[IntoDataFrameT]: ... + + +@overload +def from_native( + native_object: IntoLazyFrameT, *, strict: Literal[False], eager_only: Literal[False] = ..., eager_or_interchange_only: Literal[False] = ..., series_only: Literal[False] = ..., allow_series: None = ..., -) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT]: ... +) -> LazyFrame[IntoLazyFrameT]: ... @overload @@ -642,7 +654,7 @@ def from_native( def from_native( native_object: IntoDataFrameT, *, - strict: Literal[True] = ..., + strict: Literal[True] | None = ..., eager_only: Literal[False] = ..., eager_or_interchange_only: Literal[True], series_only: Literal[False] = ..., @@ -650,11 +662,23 @@ def from_native( ) -> DataFrame[IntoDataFrameT]: ... +@overload +def from_native( + native_object: IntoLazyFrameT, + *, + strict: Literal[True] | None = ..., + eager_only: Literal[False] = ..., + eager_or_interchange_only: Literal[False] = ..., + series_only: Literal[False] = ..., + allow_series: None = ..., +) -> LazyFrame[IntoLazyFrameT]: ... + + @overload def from_native( native_object: IntoDataFrameT, *, - strict: Literal[True] = ..., + strict: Literal[True] | None = ..., eager_only: Literal[True], eager_or_interchange_only: Literal[False] = ..., series_only: Literal[False] = ..., @@ -666,7 +690,7 @@ def from_native( def from_native( native_object: IntoFrame | IntoSeries, *, - strict: Literal[True] = ..., + strict: Literal[True] | None = ..., eager_only: Literal[False] = ..., eager_or_interchange_only: Literal[False] = ..., series_only: Literal[False] = ..., @@ -678,7 +702,7 @@ def from_native( def from_native( native_object: IntoSeriesT, *, - strict: Literal[True] = ..., + strict: Literal[True] | None = ..., eager_only: Literal[False] = ..., eager_or_interchange_only: Literal[False] = ..., series_only: Literal[True], @@ -688,27 +712,14 @@ def from_native( @overload def from_native( - native_object: IntoLazyFrameT, - *, - strict: Literal[True] = ..., - eager_only: Literal[False] = ..., - eager_or_interchange_only: Literal[False] = ..., - series_only: Literal[False] = ..., - allow_series: None = ..., -) -> LazyFrame[IntoLazyFrameT]: ... - - -# NOTE: `pl.LazyFrame` originally matched here -@overload -def from_native( - native_object: IntoFrameT, + native_object: IntoDataFrameT, *, - strict: Literal[True] = ..., + strict: Literal[True] | None = ..., eager_only: Literal[False] = ..., eager_or_interchange_only: Literal[False] = ..., series_only: Literal[False] = ..., allow_series: None = ..., -) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT]: ... +) -> DataFrame[IntoDataFrameT]: ... @overload @@ -785,14 +796,14 @@ def from_native( @overload def from_native( - native_object: IntoFrameT | IntoSeriesT, + native_object: IntoDataFrameT | IntoLazyFrameT | IntoSeriesT, *, pass_through: Literal[True], eager_only: Literal[False] = ..., eager_or_interchange_only: Literal[False] = ..., series_only: Literal[False] = ..., allow_series: Literal[True], -) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT] | Series[IntoSeriesT]: ... +) -> DataFrame[IntoDataFrameT] | LazyFrame[IntoLazyFrameT] | Series[IntoSeriesT]: ... @overload @@ -809,14 +820,14 @@ def from_native( @overload def from_native( - native_object: IntoFrameT, + native_object: IntoDataFrameT | IntoLazyFrameT, *, pass_through: Literal[True], eager_only: Literal[False] = ..., eager_or_interchange_only: Literal[False] = ..., series_only: Literal[False] = ..., allow_series: None = ..., -) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT]: ... +) -> DataFrame[IntoDataFrameT] | LazyFrame[IntoLazyFrameT]: ... @overload @@ -881,14 +892,26 @@ def from_native( @overload def from_native( - native_object: IntoFrameT, + native_object: IntoDataFrameT, + *, + pass_through: Literal[False] = ..., + eager_only: Literal[False] = ..., + eager_or_interchange_only: Literal[False] = ..., + series_only: Literal[False] = ..., + allow_series: None = ..., +) -> DataFrame[IntoDataFrameT]: ... + + +@overload +def from_native( + native_object: IntoLazyFrameT, *, pass_through: Literal[False] = ..., eager_only: Literal[False] = ..., eager_or_interchange_only: Literal[False] = ..., series_only: Literal[False] = ..., allow_series: None = ..., -) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT]: ... +) -> LazyFrame[IntoLazyFrameT]: ... # All params passed in as variables @@ -905,7 +928,12 @@ def from_native( def from_native( - native_object: IntoFrameT | IntoFrame | IntoSeriesT | IntoSeries | T, + native_object: IntoDataFrameT + | IntoLazyFrameT + | IntoFrame + | IntoSeriesT + | IntoSeries + | T, *, strict: bool | None = None, pass_through: bool | None = None, @@ -914,7 +942,7 @@ def from_native( series_only: bool = False, allow_series: bool | None = None, **kwds: Any, -) -> LazyFrame[IntoFrameT] | DataFrame[IntoFrameT] | Series[IntoSeriesT] | T: +) -> LazyFrame[IntoLazyFrameT] | DataFrame[IntoDataFrameT] | Series[IntoSeriesT] | T: """Convert `native_object` to Narwhals Dataframe, Lazyframe, or Series. See `narwhals.from_native` for full docstring. Note that `native_namespace` is @@ -951,8 +979,8 @@ def to_native( ) -> IntoDataFrameT: ... @overload def to_native( - narwhals_object: LazyFrame[IntoFrameT], *, strict: Literal[True] = ... -) -> IntoFrameT: ... + narwhals_object: LazyFrame[IntoLazyFrameT], *, strict: Literal[True] = ... +) -> IntoLazyFrameT: ... @overload def to_native( narwhals_object: Series[IntoSeriesT], *, strict: Literal[True] = ... @@ -965,8 +993,8 @@ def to_native( ) -> IntoDataFrameT: ... @overload def to_native( - narwhals_object: LazyFrame[IntoFrameT], *, pass_through: Literal[False] = ... -) -> IntoFrameT: ... + narwhals_object: LazyFrame[IntoLazyFrameT], *, pass_through: Literal[False] = ... +) -> IntoLazyFrameT: ... @overload def to_native( narwhals_object: Series[IntoSeriesT], *, pass_through: Literal[False] = ... @@ -977,12 +1005,12 @@ def to_native(narwhals_object: Any, *, pass_through: bool) -> Any: ... def to_native( narwhals_object: DataFrame[IntoDataFrameT] - | LazyFrame[IntoFrameT] + | LazyFrame[IntoLazyFrameT] | Series[IntoSeriesT], *, strict: bool | None = None, pass_through: bool | None = None, -) -> IntoFrameT | IntoSeriesT | Any: +) -> IntoLazyFrameT | IntoDataFrameT | IntoSeriesT | Any: """Convert Narwhals object to native one. See `narwhals.to_native` for full docstring. Note that `native_namespace` is diff --git a/narwhals/stable/v1/typing.py b/narwhals/stable/v1/typing.py index c3a0b421bc..7581910607 100644 --- a/narwhals/stable/v1/typing.py +++ b/narwhals/stable/v1/typing.py @@ -4,6 +4,7 @@ if TYPE_CHECKING: import sys + from collections.abc import Iterable, Sized from narwhals.stable.v1 import DataFrame, LazyFrame @@ -23,8 +24,13 @@ def columns(self) -> Any: ... def join(self, *args: Any, **kwargs: Any) -> Any: ... - class NativeSeries(Protocol): - def __len__(self) -> int: ... + class NativeDataFrame(Sized, NativeFrame, Protocol): ... + + class NativeLazyFrame(NativeFrame, Protocol): + def explain(self, *args: Any, **kwargs: Any) -> Any: ... + + class NativeSeries(Sized, Iterable[Any], Protocol): + def filter(self, *args: Any, **kwargs: Any) -> Any: ... class DataFrameLike(Protocol): def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ... @@ -40,7 +46,8 @@ def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ... `nw.Expr`, e.g. `df.select('a')`. """ -IntoDataFrame: TypeAlias = Union["NativeFrame", "DataFrame[Any]", "DataFrameLike"] + +IntoDataFrame: TypeAlias = Union["NativeDataFrame", "DataFrameLike"] """Anything which can be converted to a Narwhals DataFrame. Use this if your function accepts a narwhalifiable object but doesn't care about its backend. @@ -53,9 +60,9 @@ def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ... ... return df.shape """ -IntoFrame: TypeAlias = Union[ - "NativeFrame", "DataFrame[Any]", "LazyFrame[Any]", "DataFrameLike" -] +IntoLazyFrame: TypeAlias = "NativeLazyFrame" + +IntoFrame: TypeAlias = Union["IntoDataFrame", "IntoLazyFrame"] """Anything which can be converted to a Narwhals DataFrame or LazyFrame. Use this if your function can accept an object which can be converted to either @@ -83,7 +90,7 @@ def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ... ... return df.columns """ -IntoSeries: TypeAlias = Union["Series[Any]", "NativeSeries"] +IntoSeries: TypeAlias = "NativeSeries" """Anything which can be converted to a Narwhals Series. Use this if your function can accept an object which can be converted to `nw.Series` @@ -126,6 +133,8 @@ def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ... ... return df.with_columns(c=df["a"] + 1).to_native() """ +IntoLazyFrameT = TypeVar("IntoLazyFrameT", bound="IntoLazyFrame") + FrameT = TypeVar("FrameT", "DataFrame[Any]", "LazyFrame[Any]") """TypeVar bound to Narwhals DataFrame or Narwhals LazyFrame. diff --git a/narwhals/stable/v2/__init__.py b/narwhals/stable/v2/__init__.py index ad5effd0b7..fe8f7a71b0 100644 --- a/narwhals/stable/v2/__init__.py +++ b/narwhals/stable/v2/__init__.py @@ -56,7 +56,7 @@ from narwhals.series import Series as NwSeries from narwhals.stable.v2 import dependencies, dtypes, selectors from narwhals.translate import _from_native_impl, get_native_namespace, to_py_scalar -from narwhals.typing import IntoDataFrameT, IntoFrameT +from narwhals.typing import IntoDataFrameT, IntoLazyFrameT if TYPE_CHECKING: from collections.abc import Iterable, Mapping, Sequence @@ -213,7 +213,7 @@ def is_unique(self) -> Series[Any]: return _stableify(super().is_unique()) -class LazyFrame(NwLazyFrame[IntoFrameT]): +class LazyFrame(NwLazyFrame[IntoLazyFrameT]): @inherit_doc(NwLazyFrame) def __init__(self, df: Any, *, level: Literal["full", "lazy", "interchange"]) -> None: assert df._version is Version.V2 # noqa: S101 @@ -305,9 +305,9 @@ def __init__( @overload -def _stableify(obj: NwDataFrame[IntoFrameT]) -> DataFrame[IntoFrameT]: ... +def _stableify(obj: NwDataFrame[IntoDataFrameT]) -> DataFrame[IntoDataFrameT]: ... @overload -def _stableify(obj: NwLazyFrame[IntoFrameT]) -> LazyFrame[IntoFrameT]: ... +def _stableify(obj: NwLazyFrame[IntoLazyFrameT]) -> LazyFrame[IntoLazyFrameT]: ... @overload def _stableify(obj: NwSeries[IntoSeriesT]) -> Series[IntoSeriesT]: ... @overload @@ -315,11 +315,11 @@ def _stableify(obj: NwExpr) -> Expr: ... def _stableify( - obj: NwDataFrame[IntoFrameT] - | NwLazyFrame[IntoFrameT] + obj: NwDataFrame[IntoDataFrameT] + | NwLazyFrame[IntoLazyFrameT] | NwSeries[IntoSeriesT] | NwExpr, -) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT] | Series[IntoSeriesT] | Expr: +) -> DataFrame[IntoDataFrameT] | LazyFrame[IntoLazyFrameT] | Series[IntoSeriesT] | Expr: if isinstance(obj, NwDataFrame): return DataFrame(obj._compliant_frame._with_version(Version.V2), level=obj._level) if isinstance(obj, NwLazyFrame): @@ -437,6 +437,17 @@ def from_native( ) -> DataFrame[IntoDataFrameT]: ... +@overload +def from_native( + native_object: IntoLazyFrameT, + *, + pass_through: Literal[False] = ..., + eager_only: Literal[False] = ..., + series_only: Literal[False] = ..., + allow_series: None = ..., +) -> LazyFrame[IntoLazyFrameT]: ... + + @overload def from_native( native_object: IntoDataFrameT, @@ -483,14 +494,19 @@ def from_native( def from_native( # noqa: D417 - native_object: IntoFrameT | IntoFrame | IntoSeriesT | IntoSeries | T, + native_object: IntoDataFrameT + | IntoLazyFrameT + | IntoFrame + | IntoSeriesT + | IntoSeries + | T, *, pass_through: bool = False, eager_only: bool = False, series_only: bool = False, allow_series: bool | None = None, **kwds: Any, -) -> LazyFrame[IntoFrameT] | DataFrame[IntoFrameT] | Series[IntoSeriesT] | T: +) -> LazyFrame[IntoLazyFrameT] | DataFrame[IntoDataFrameT] | Series[IntoSeriesT] | T: """Convert `native_object` to Narwhals Dataframe, Lazyframe, or Series. Arguments: @@ -547,8 +563,8 @@ def to_native( ) -> IntoDataFrameT: ... @overload def to_native( - narwhals_object: LazyFrame[IntoFrameT], *, pass_through: Literal[False] = ... -) -> IntoFrameT: ... + narwhals_object: LazyFrame[IntoLazyFrameT], *, pass_through: Literal[False] = ... +) -> IntoLazyFrameT: ... @overload def to_native( narwhals_object: Series[IntoSeriesT], *, pass_through: Literal[False] = ... @@ -559,11 +575,11 @@ def to_native(narwhals_object: Any, *, pass_through: bool) -> Any: ... def to_native( narwhals_object: DataFrame[IntoDataFrameT] - | LazyFrame[IntoFrameT] + | LazyFrame[IntoLazyFrameT] | Series[IntoSeriesT], *, pass_through: bool = False, -) -> IntoFrameT | IntoSeriesT | Any: +) -> IntoDataFrameT | IntoLazyFrameT | IntoSeriesT | Any: """Convert Narwhals object to native one. Arguments: diff --git a/narwhals/stable/v2/typing.py b/narwhals/stable/v2/typing.py index 16d42804c2..ac45701fdd 100644 --- a/narwhals/stable/v2/typing.py +++ b/narwhals/stable/v2/typing.py @@ -4,6 +4,7 @@ if TYPE_CHECKING: import sys + from collections.abc import Iterable, Sized from narwhals.stable.v2 import DataFrame, LazyFrame @@ -23,8 +24,13 @@ def columns(self) -> Any: ... def join(self, *args: Any, **kwargs: Any) -> Any: ... - class NativeSeries(Protocol): - def __len__(self) -> int: ... + class NativeDataFrame(Sized, NativeFrame, Protocol): ... + + class NativeLazyFrame(NativeFrame, Protocol): + def explain(self, *args: Any, **kwargs: Any) -> Any: ... + + class NativeSeries(Sized, Iterable[Any], Protocol): + def filter(self, *args: Any, **kwargs: Any) -> Any: ... IntoExpr: TypeAlias = Union["Expr", str, "Series[Any]"] @@ -37,7 +43,7 @@ def __len__(self) -> int: ... `nw.Expr`, e.g. `df.select('a')`. """ -IntoDataFrame: TypeAlias = Union["NativeFrame", "DataFrame[Any]"] +IntoDataFrame: TypeAlias = "NativeDataFrame" """Anything which can be converted to a Narwhals DataFrame. Use this if your function accepts a narwhalifiable object but doesn't care about its backend. @@ -50,7 +56,9 @@ def __len__(self) -> int: ... ... return df.shape """ -IntoFrame: TypeAlias = Union["NativeFrame", "DataFrame[Any]", "LazyFrame[Any]"] +IntoLazyFrame: TypeAlias = "NativeLazyFrame" + +IntoFrame: TypeAlias = Union["IntoDataFrame", "IntoLazyFrame"] """Anything which can be converted to a Narwhals DataFrame or LazyFrame. Use this if your function can accept an object which can be converted to either @@ -78,7 +86,7 @@ def __len__(self) -> int: ... ... return df.columns """ -IntoSeries: TypeAlias = Union["Series[Any]", "NativeSeries"] +IntoSeries: TypeAlias = "NativeSeries" """Anything which can be converted to a Narwhals Series. Use this if your function can accept an object which can be converted to `nw.Series` diff --git a/narwhals/translate.py b/narwhals/translate.py index 41aa01750d..fdace19827 100644 --- a/narwhals/translate.py +++ b/narwhals/translate.py @@ -34,9 +34,9 @@ from narwhals.series import Series from narwhals.typing import ( DataFrameT, + Frame, IntoDataFrameT, IntoFrame, - IntoFrameT, IntoLazyFrameT, IntoSeries, IntoSeriesT, @@ -56,8 +56,8 @@ def to_native( ) -> IntoDataFrameT: ... @overload def to_native( - narwhals_object: LazyFrame[IntoFrameT], *, pass_through: Literal[False] = ... -) -> IntoFrameT: ... + narwhals_object: LazyFrame[IntoLazyFrameT], *, pass_through: Literal[False] = ... +) -> IntoLazyFrameT: ... @overload def to_native( narwhals_object: Series[IntoSeriesT], *, pass_through: Literal[False] = ... @@ -68,11 +68,11 @@ def to_native(narwhals_object: Any, *, pass_through: bool) -> Any: ... def to_native( narwhals_object: DataFrame[IntoDataFrameT] - | LazyFrame[IntoFrameT] + | LazyFrame[IntoLazyFrameT] | Series[IntoSeriesT], *, pass_through: bool = False, -) -> IntoDataFrameT | IntoFrameT | IntoSeriesT | Any: +) -> IntoDataFrameT | IntoLazyFrameT | IntoSeriesT | Any: """Convert Narwhals object to native one. Arguments: @@ -168,13 +168,13 @@ def from_native( @overload def from_native( - native_object: IntoFrameT | IntoLazyFrameT | IntoSeriesT, + native_object: IntoDataFrameT | IntoLazyFrameT | IntoSeriesT, *, pass_through: Literal[True], eager_only: Literal[False] = ..., series_only: Literal[False] = ..., allow_series: Literal[True], -) -> DataFrame[IntoFrameT] | LazyFrame[IntoLazyFrameT] | Series[IntoSeriesT]: ... +) -> DataFrame[IntoDataFrameT] | LazyFrame[IntoLazyFrameT] | Series[IntoSeriesT]: ... @overload @@ -188,11 +188,8 @@ def from_native( ) -> Series[IntoSeriesT]: ... -# NOTE: Seems like `mypy` is giving a false positive -# Following this advice will introduce overlapping overloads? -# > note: Flipping the order of overloads will fix this error @overload -def from_native( # type: ignore[overload-overlap] +def from_native( native_object: IntoLazyFrameT, *, pass_through: Literal[False] = ..., @@ -259,14 +256,19 @@ def from_native( def from_native( # noqa: D417 - native_object: IntoLazyFrameT | IntoFrameT | IntoSeriesT | IntoFrame | IntoSeries | T, + native_object: IntoLazyFrameT + | IntoDataFrameT + | IntoSeriesT + | IntoFrame + | IntoSeries + | T, *, pass_through: bool = False, eager_only: bool = False, series_only: bool = False, allow_series: bool | None = None, **kwds: Any, -) -> LazyFrame[IntoLazyFrameT] | DataFrame[IntoFrameT] | Series[IntoSeriesT] | T: +) -> LazyFrame[IntoLazyFrameT] | DataFrame[IntoDataFrameT] | Series[IntoSeriesT] | T: """Convert `native_object` to Narwhals Dataframe, Lazyframe, or Series. Arguments: @@ -323,8 +325,8 @@ def _from_native_impl( # noqa: C901, PLR0911, PLR0912, PLR0915 allow_series: bool | None = None, version: Version, ) -> Any: + from narwhals._interchange.dataframe import supports_dataframe_interchange from narwhals._utils import ( - _supports_dataframe_interchange, is_compliant_dataframe, is_compliant_lazyframe, is_compliant_series, @@ -508,7 +510,7 @@ def _from_native_impl( # noqa: C901, PLR0911, PLR0912, PLR0915 return ns_spark.compliant.from_native(native_object).to_narwhals() # Interchange protocol - if _supports_dataframe_interchange(native_object): + if supports_dataframe_interchange(native_object): from narwhals._interchange.dataframe import InterchangeFrame if eager_only or series_only: @@ -538,9 +540,7 @@ def _from_native_impl( # noqa: C901, PLR0911, PLR0912, PLR0915 return native_object -def get_native_namespace( - *obj: DataFrame[Any] | LazyFrame[Any] | Series[Any] | IntoFrame | IntoSeries, -) -> Any: +def get_native_namespace(*obj: Frame | Series[Any] | IntoFrame | IntoSeries) -> Any: """Get native namespace from object. Arguments: @@ -573,7 +573,7 @@ def get_native_namespace( def _get_native_namespace_single_obj( - obj: DataFrame[Any] | LazyFrame[Any] | Series[Any] | IntoFrame | IntoSeries, + obj: Frame | Series[Any] | IntoFrame | IntoSeries, ) -> Any: if has_native_namespace(obj): return obj.__native_namespace__() diff --git a/narwhals/typing.py b/narwhals/typing.py index 337f8aea42..72c0ab94b9 100644 --- a/narwhals/typing.py +++ b/narwhals/typing.py @@ -15,6 +15,7 @@ from typing_extensions import TypeAlias from narwhals import dtypes + from narwhals._namespace import _NativeIbis from narwhals.dataframe import DataFrame, LazyFrame from narwhals.expr import Expr from narwhals.schema import Schema @@ -29,15 +30,14 @@ def columns(self) -> Any: ... def join(self, *args: Any, **kwargs: Any) -> Any: ... + class NativeDataFrame(Sized, NativeFrame, Protocol): ... + class NativeLazyFrame(NativeFrame, Protocol): def explain(self, *args: Any, **kwargs: Any) -> Any: ... class NativeSeries(Sized, Iterable[Any], Protocol): def filter(self, *args: Any, **kwargs: Any) -> Any: ... - class DataFrameLike(Protocol): - def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ... - class SupportsNativeNamespace(Protocol): def __native_namespace__(self) -> ModuleType: ... @@ -110,7 +110,7 @@ def Binary(self) -> type[dtypes.Binary]: ... which will be interpreted as a `nw.Expr`, e.g. `df.select('a')`. """ -IntoDataFrame: TypeAlias = Union["NativeFrame", "DataFrameLike"] +IntoDataFrame: TypeAlias = "NativeDataFrame" """Anything which can be converted to a Narwhals DataFrame. Use this if your function accepts a narwhalifiable object but doesn't care about its backend. @@ -123,7 +123,7 @@ def Binary(self) -> type[dtypes.Binary]: ... ... return df.shape """ -IntoLazyFrame: TypeAlias = "NativeLazyFrame" +IntoLazyFrame: TypeAlias = Union["NativeLazyFrame", "_NativeIbis"] IntoFrame: TypeAlias = Union["IntoDataFrame", "IntoLazyFrame"] """Anything which can be converted to a Narwhals DataFrame or LazyFrame. diff --git a/tests/conftest.py b/tests/conftest.py index 5f33b47eac..b390a3d65d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -26,7 +26,7 @@ from narwhals._spark_like.dataframe import SQLFrameDataFrame from narwhals._typing import EagerAllowed - from narwhals.typing import NativeFrame, NativeLazyFrame + from narwhals.typing import NativeDataFrame, NativeLazyFrame from tests.utils import Constructor, ConstructorEager, ConstructorLazy Data: TypeAlias = "dict[str, list[Any]]" @@ -100,27 +100,27 @@ def pandas_pyarrow_constructor(obj: Data) -> pd.DataFrame: return pd.DataFrame(obj).convert_dtypes(dtype_backend="pyarrow") -def modin_constructor(obj: Data) -> NativeFrame: # pragma: no cover +def modin_constructor(obj: Data) -> NativeDataFrame: # pragma: no cover import modin.pandas as mpd import pandas as pd df = mpd.DataFrame(pd.DataFrame(obj)) - return cast("NativeFrame", df) + return cast("NativeDataFrame", df) -def modin_pyarrow_constructor(obj: Data) -> NativeFrame: # pragma: no cover +def modin_pyarrow_constructor(obj: Data) -> NativeDataFrame: # pragma: no cover import modin.pandas as mpd import pandas as pd df = mpd.DataFrame(pd.DataFrame(obj)).convert_dtypes(dtype_backend="pyarrow") - return cast("NativeFrame", df) + return cast("NativeDataFrame", df) -def cudf_constructor(obj: Data) -> NativeFrame: # pragma: no cover +def cudf_constructor(obj: Data) -> NativeDataFrame: # pragma: no cover import cudf df = cudf.DataFrame(obj) - return cast("NativeFrame", df) + return cast("NativeDataFrame", df) def polars_eager_constructor(obj: Data) -> pl.DataFrame: diff --git a/tests/expr_and_series/dt/datetime_attributes_test.py b/tests/expr_and_series/dt/datetime_attributes_test.py index 2fbe13149c..c7bf55e7c0 100644 --- a/tests/expr_and_series/dt/datetime_attributes_test.py +++ b/tests/expr_and_series/dt/datetime_attributes_test.py @@ -1,12 +1,16 @@ from __future__ import annotations from datetime import date, datetime +from typing import TYPE_CHECKING, cast import pytest import narwhals as nw from tests.utils import Constructor, ConstructorEager, assert_equal_data +if TYPE_CHECKING: + import dask.dataframe as dd + data = { "a": [datetime(2021, 3, 1, 12, 34, 56, 49000), datetime(2020, 1, 2, 2, 4, 14, 715000)] } @@ -119,7 +123,9 @@ def test_to_date(request: pytest.FixtureRequest, constructor: Constructor) -> No request.applymarker(pytest.mark.xfail) dates = {"a": [datetime(2001, 1, 1), None, datetime(2001, 1, 3)]} if "dask" in str(constructor): - df = nw.from_native(constructor(dates).astype({"a": "timestamp[ns][pyarrow]"})) # type: ignore[union-attr] + df_dask = cast("dd.DataFrame", constructor(dates)) + df_dask = cast("dd.DataFrame", df_dask.astype({"a": "timestamp[ns][pyarrow]"})) + df = nw.from_native(df_dask) else: df = nw.from_native(constructor(dates)) result = df.select(nw.col("a").dt.date()) diff --git a/tests/expr_and_series/struct_/field_test.py b/tests/expr_and_series/struct_/field_test.py index 0945ebe8ce..39d83247ee 100644 --- a/tests/expr_and_series/struct_/field_test.py +++ b/tests/expr_and_series/struct_/field_test.py @@ -1,5 +1,7 @@ from __future__ import annotations +from typing import cast + import pandas as pd import pyarrow as pa import pytest @@ -18,7 +20,7 @@ def test_get_field_expr(request: pytest.FixtureRequest, constructor: Constructor df_native = constructor(data) if "pandas" in str(constructor): - df_native = df_native.assign( # type: ignore[union-attr] + df_native = cast("pd.DataFrame", df_native).assign( user=pd.Series( data["user"], dtype=pd.ArrowDtype( @@ -53,7 +55,7 @@ def test_get_field_series( df_native = constructor_eager(data) if "pandas" in str(constructor_eager): - df_native = df_native.assign( # type: ignore[union-attr] + df_native = cast("pd.DataFrame", df_native).assign( user=pd.Series( data["user"], dtype=pd.ArrowDtype( diff --git a/tests/frame/join_test.py b/tests/frame/join_test.py index e2c575b7b2..5b22c76df5 100644 --- a/tests/frame/join_test.py +++ b/tests/frame/join_test.py @@ -2,7 +2,7 @@ import re from datetime import datetime -from typing import Any, Literal +from typing import TYPE_CHECKING, Any, Literal import pandas as pd import pytest @@ -16,6 +16,21 @@ assert_equal_data, ) +if TYPE_CHECKING: + from narwhals.typing import IntoLazyFrameT, JoinStrategy, NativeDataFrame + + +def from_native_lazy( + native: IntoLazyFrameT | NativeDataFrame, +) -> nw.LazyFrame[IntoLazyFrameT] | nw.LazyFrame[Any]: + """Every join test [needs to use `.lazy()` for typing]*. + + *Unless both left/right frames are of the same concrete type. + + [needs to use `.lazy()` for typing]: https://github.com/narwhals-dev/narwhals/pull/2944#discussion_r2286264815 + """ + return nw.from_native(native).lazy() + @pytest.mark.parametrize( ("df1", "df2", "expected", "on", "left_on", "right_on"), @@ -93,8 +108,8 @@ def test_full_join( right_on: None | str | list[str], constructor: Constructor, ) -> None: - df_left = nw.from_native(constructor(df1)) - df_right = nw.from_native(constructor(df2)) + df_left = from_native_lazy(constructor(df1)) + df_right = from_native_lazy(constructor(df2)) result = df_left.join( df_right, on=on, left_on=left_on, right_on=right_on, how="full" ).sort("id", nulls_last=True) @@ -109,8 +124,8 @@ def test_full_join_duplicate( df1 = {"foo": [1, 2, 3], "val1": [1, 2, 3]} df2 = {"foo": [1, 2, 3], "foo_right": [1, 2, 3]} - df_left = nw.from_native(constructor(df1)).lazy() - df_right = nw.from_native(constructor(df2)).lazy() + df_left = from_native_lazy(constructor(df1)) + df_right = from_native_lazy(constructor(df2)) exceptions: list[type[Exception]] = [nw.exceptions.NarwhalsError] if "pyspark" in str(constructor) and "sqlframe" not in str(constructor): @@ -132,7 +147,7 @@ def test_inner_join_two_keys(constructor: Constructor) -> None: "zor ro": [7.0, 8.0, 9.0], "idx": [0, 1, 2], } - df = nw.from_native(constructor(data)) + df = from_native_lazy(constructor(data)) df_right = df result = df.join( df_right, @@ -161,7 +176,7 @@ def test_inner_join_single_key(constructor: Constructor) -> None: "zor ro": [7.0, 8.0, 9.0], "idx": [0, 1, 2], } - df = nw.from_native(constructor(data)) + df = from_native_lazy(constructor(data)) df_right = df result = df.join( df_right, left_on="antananarivo", right_on="antananarivo", how="inner" @@ -185,7 +200,7 @@ def test_cross_join(constructor: Constructor) -> None: if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 1, 4): pytest.skip() data = {"antananarivo": [1, 3, 2]} - df = nw.from_native(constructor(data)) + df = from_native_lazy(constructor(data)) result = df.join(df, how="cross").sort("antananarivo", "antananarivo_right") expected = { "antananarivo": [1, 1, 1, 2, 2, 2, 3, 3, 3], @@ -205,7 +220,7 @@ def test_suffix( constructor: Constructor, how: Literal["inner", "left"], suffix: str ) -> None: data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} - df = nw.from_native(constructor(data)) + df = from_native_lazy(constructor(data)) df_right = df result = df.join( df_right, @@ -223,7 +238,7 @@ def test_cross_join_suffix(constructor: Constructor, suffix: str) -> None: if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 1, 4): pytest.skip() data = {"antananarivo": [1, 3, 2]} - df = nw.from_native(constructor(data)) + df = from_native_lazy(constructor(data)) result = df.join(df, how="cross", suffix=suffix).sort( "antananarivo", f"antananarivo{suffix}" ) @@ -273,7 +288,7 @@ def test_anti_join( expected: dict[str, list[Any]], ) -> None: data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} - df = nw.from_native(constructor(data)) + df = from_native_lazy(constructor(data)) other = df.filter(filter_expr) result = df.join(other, how="anti", left_on=join_key, right_on=join_key) assert_equal_data(result, expected) @@ -311,7 +326,7 @@ def test_semi_join( expected: dict[str, list[Any]], ) -> None: data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} - df = nw.from_native(constructor(data)) + df = from_native_lazy(constructor(data)) other = df.filter(filter_expr) result = df.join(other, how="semi", left_on=join_key, right_on=join_key).sort( "antananarivo" @@ -322,7 +337,7 @@ def test_semi_join( @pytest.mark.parametrize("how", ["right"]) def test_join_not_implemented(constructor: Constructor, how: str) -> None: data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} - df = nw.from_native(constructor(data)) + df = from_native_lazy(constructor(data)) with pytest.raises( NotImplementedError, @@ -330,7 +345,12 @@ def test_join_not_implemented(constructor: Constructor, how: str) -> None: f"Only the following join strategies are supported: ('inner', 'left', 'full', 'cross', 'anti', 'semi'); found '{how}'." ), ): - df.join(df, left_on="antananarivo", right_on="antananarivo", how=how) # type: ignore[arg-type] + df.join( + df, + left_on="antananarivo", + right_on="antananarivo", + how=how, # type: ignore[arg-type] + ) def test_left_join(constructor: Constructor) -> None: @@ -344,8 +364,8 @@ def test_left_join(constructor: Constructor) -> None: "co": [4.0, 5.0, 7.0], "idx": [0.0, 1.0, 2.0], } - df_left = nw.from_native(constructor(data_left)) - df_right = nw.from_native(constructor(data_right)) + df_left = from_native_lazy(constructor(data_left)) + df_right = from_native_lazy(constructor(data_right)) result = df_left.join(df_right, left_on="bob", right_on="co", how="left") result = result.sort("idx") result = result.drop("idx_right") @@ -370,8 +390,8 @@ def test_left_join(constructor: Constructor) -> None: def test_left_join_multiple_column(constructor: Constructor) -> None: data_left = {"antananarivo": [1, 2, 3], "bob": [4, 5, 6], "idx": [0, 1, 2]} data_right = {"antananarivo": [1, 2, 3], "c": [4, 5, 6], "idx": [0, 1, 2]} - df_left = nw.from_native(constructor(data_left)) - df_right = nw.from_native(constructor(data_right)) + df_left = from_native_lazy(constructor(data_left)) + df_right = from_native_lazy(constructor(data_right)) result = df_left.join( df_right, left_on=["antananarivo", "bob"], @@ -397,8 +417,8 @@ def test_left_join_overlapping_column(constructor: Constructor) -> None: "d": [1.0, 4.0, 2.0], "idx": [0.0, 1.0, 2.0], } - df_left = nw.from_native(constructor(data_left)) - df_right = nw.from_native(constructor(data_right)) + df_left = from_native_lazy(constructor(data_left)) + df_right = from_native_lazy(constructor(data_right)) result = df_left.join(df_right, left_on="bob", right_on="c", how="left").sort("idx") result = result.drop("idx_right") expected: dict[str, list[Any]] = { @@ -425,35 +445,35 @@ def test_left_join_overlapping_column(constructor: Constructor) -> None: @pytest.mark.parametrize("how", ["inner", "left", "semi", "anti"]) -def test_join_keys_exceptions(constructor: Constructor, how: str) -> None: +def test_join_keys_exceptions(constructor: Constructor, how: JoinStrategy) -> None: data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} - df = nw.from_native(constructor(data)) + df = from_native_lazy(constructor(data)) with pytest.raises( ValueError, match=rf"Either \(`left_on` and `right_on`\) or `on` keys should be specified for {how}.", ): - df.join(df, how=how) # type: ignore[arg-type] + df.join(df, how=how) with pytest.raises( ValueError, match=rf"Either \(`left_on` and `right_on`\) or `on` keys should be specified for {how}.", ): - df.join(df, how=how, left_on="antananarivo") # type: ignore[arg-type] + df.join(df, how=how, left_on="antananarivo") with pytest.raises( ValueError, match=rf"Either \(`left_on` and `right_on`\) or `on` keys should be specified for {how}.", ): - df.join(df, how=how, right_on="antananarivo") # type: ignore[arg-type] + df.join(df, how=how, right_on="antananarivo") with pytest.raises( ValueError, match=f"If `on` is specified, `left_on` and `right_on` should be None for {how}.", ): - df.join(df, how=how, on="antananarivo", right_on="antananarivo") # type: ignore[arg-type] + df.join(df, how=how, on="antananarivo", right_on="antananarivo") with pytest.raises( ValueError, match="`left_on` and `right_on` must have the same length." ): - df.join(df, how=how, left_on=["antananarivo", "bob"], right_on="antananarivo") # type: ignore[arg-type] + df.join(df, how=how, left_on=["antananarivo", "bob"], right_on="antananarivo") @pytest.mark.parametrize( @@ -493,10 +513,10 @@ def test_joinasof_numeric( ("pandas_pyarrow" in str(constructor)) or ("pandas_nullable" in str(constructor)) ): request.applymarker(pytest.mark.xfail) - df = nw.from_native( + df = from_native_lazy( constructor({"antananarivo": [1, 5, 10], "val": ["a", "b", "c"]}) ).sort("antananarivo") - df_right = nw.from_native( + df_right = from_native_lazy( constructor({"antananarivo": [1, 2, 3, 6, 7], "val": [1, 2, 3, 6, 7]}) ).sort("antananarivo") result = df.join_asof( @@ -562,7 +582,7 @@ def test_joinasof_time( request.applymarker(pytest.mark.xfail) if PANDAS_VERSION < (2, 1) and ("pandas_pyarrow" in str(constructor)): request.applymarker(pytest.mark.xfail) - df = nw.from_native( + df = from_native_lazy( constructor( { "datetime": [ @@ -574,7 +594,7 @@ def test_joinasof_time( } ) ).sort("datetime") - df_right = nw.from_native( + df_right = from_native_lazy( constructor( { "datetime": [ @@ -603,7 +623,7 @@ def test_joinasof_by(constructor: Constructor, request: pytest.FixtureRequest) - ("pandas_pyarrow" in str(constructor)) or ("pandas_nullable" in str(constructor)) ): request.applymarker(pytest.mark.xfail) - df = nw.from_native( + df = from_native_lazy( constructor( { "antananarivo": [1, 5, 7, 10], @@ -612,7 +632,7 @@ def test_joinasof_by(constructor: Constructor, request: pytest.FixtureRequest) - } ) ).sort("antananarivo") - df_right = nw.from_native( + df_right = from_native_lazy( constructor( {"antananarivo": [1, 4, 5, 8], "bob": ["D", "D", "A", "F"], "d": [1, 3, 4, 1]} ) @@ -638,10 +658,10 @@ def test_joinasof_suffix( ("pandas_pyarrow" in str(constructor)) or ("pandas_nullable" in str(constructor)) ): request.applymarker(pytest.mark.xfail) - df = nw.from_native( + df = from_native_lazy( constructor({"antananarivo": [1, 5, 10], "val": ["a", "b", "c"]}) ).sort("antananarivo") - df_right = nw.from_native( + df_right = from_native_lazy( constructor({"antananarivo": [1, 2, 3, 6, 7], "val": [1, 2, 3, 6, 7]}) ).sort("antananarivo") result = df.join_asof( @@ -656,7 +676,7 @@ def test_joinasof_not_implemented( constructor: Constructor, strategy: Literal["backward", "forward"] ) -> None: data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} - df = nw.from_native(constructor(data)) + df = from_native_lazy(constructor(data)) with pytest.raises( NotImplementedError, @@ -669,7 +689,7 @@ def test_joinasof_not_implemented( def test_joinasof_keys_exceptions(constructor: Constructor) -> None: data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} - df = nw.from_native(constructor(data)) + df = from_native_lazy(constructor(data)) with pytest.raises( ValueError, @@ -702,45 +722,37 @@ def test_joinasof_keys_exceptions(constructor: Constructor) -> None: df.join_asof(df, right_on="antananarivo", on="antananarivo") -def test_joinasof_by_exceptions(constructor: Constructor) -> None: - data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} - df = nw.from_native(constructor(data)) - with pytest.raises( - ValueError, match="If `by` is specified, `by_left` and `by_right` should be None." - ): - df.join_asof(df, on="antananarivo", by_left="bob", by_right="bob", by="bob") - - with pytest.raises( - ValueError, - match="Can not specify only `by_left` or `by_right`, you need to specify both.", - ): - df.join_asof(df, on="antananarivo", by_left="bob") - - with pytest.raises( - ValueError, - match="Can not specify only `by_left` or `by_right`, you need to specify both.", - ): - df.join_asof(df, on="antananarivo", by_right="bob") - - with pytest.raises( - ValueError, match="If `by` is specified, `by_left` and `by_right` should be None." - ): - df.join_asof(df, on="antananarivo", by_left="bob", by="bob") +ON = "antananarivo" +BY = "bob" - with pytest.raises( - ValueError, match="If `by` is specified, `by_left` and `by_right` should be None." - ): - df.join_asof(df, on="antananarivo", by_right="bob", by="bob") - with pytest.raises( - ValueError, match="`by_left` and `by_right` must have the same length." - ): - df.join_asof( - df, - on="antananarivo", - by_left=["antananarivo", "bob"], - by_right=["antananarivo"], - ) +@pytest.mark.parametrize( + ("on", "by_left", "by_right", "by", "message"), + [ + (ON, BY, BY, BY, r"If.+by.+by_left.+by_right.+should be None"), + (ON, BY, None, None, r"not.+by_left.+or.+by_right.+need.+both"), + (ON, None, BY, None, r"not.+by_left.+or.+by_right.+need.+both"), + (ON, BY, None, BY, r"If.+by.+by_left.+by_right.+should be None"), + (ON, None, BY, BY, r"If.+by.+by_left.+by_right.+should be None"), + (ON, [ON, BY], [ON], None, r"by_left.+by_right.+same.+length"), + ], +) +def test_joinasof_by_exceptions( + constructor: Constructor, + on: str | None, + by_left: str | list[str] | None, + by_right: str | list[str] | None, + by: str | list[str] | None, + message: str, +) -> None: + data = {ON: [1, 3, 2], BY: [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} + df = nw.from_native(constructor(data)) + if isinstance(df, nw.LazyFrame): + with pytest.raises(ValueError, match=message): + df.join_asof(df, on=on, by_left=by_left, by_right=by_right, by=by) + else: + with pytest.raises(ValueError, match=message): + df.join_asof(df, on=on, by_left=by_left, by_right=by_right, by=by) def test_join_duplicate_column_names( @@ -778,7 +790,11 @@ def test_join_duplicate_column_names( request.applymarker(pytest.mark.xfail) else: exception = nw.exceptions.DuplicateError - df = constructor({"a": [1, 2, 3, 4, 5], "b": [6, 6, 6, 6, 6]}) - dfn = nw.from_native(df) - with pytest.raises(exception): - dfn.join(dfn, on=["a"]).join(dfn, on=["a"]).lazy().collect() + data = {"a": [1, 2, 3, 4, 5], "b": [6, 6, 6, 6, 6]} + df = nw.from_native(constructor(data)) + if isinstance(df, nw.LazyFrame): + with pytest.raises(exception): + df.join(df, on=["a"]).join(df, on=["a"]).collect() + else: + with pytest.raises(exception): + df.join(df, on=["a"]).join(df, on=["a"]) diff --git a/tests/frame/sample_test.py b/tests/frame/sample_test.py index 46196b13ec..b86ddaee1d 100644 --- a/tests/frame/sample_test.py +++ b/tests/frame/sample_test.py @@ -36,5 +36,5 @@ def test_sample_with_seed(constructor_eager: ConstructorEager) -> None: r2 = nw.to_native(df.sample(n=n, seed=123)) r3 = nw.to_native(df.sample(n=n, seed=42)) - assert r1.equals(r2) # type: ignore[union-attr] - assert not r1.equals(r3) # type: ignore[union-attr] + assert r1.equals(r2) # type: ignore[attr-defined] + assert not r1.equals(r3) # type: ignore[attr-defined] diff --git a/tests/frame/with_row_index_test.py b/tests/frame/with_row_index_test.py index eead211910..c9d5f59fe0 100644 --- a/tests/frame/with_row_index_test.py +++ b/tests/frame/with_row_index_test.py @@ -1,6 +1,5 @@ from __future__ import annotations -from contextlib import nullcontext as does_not_raise from typing import TYPE_CHECKING import pytest @@ -49,16 +48,10 @@ def test_with_row_index_lazy( def test_with_row_index_lazy_exception(constructor: Constructor) -> None: frame = nw.from_native(constructor(data)) - msg = r"(LazyFrame\.)?with_row_index\(\) missing 1 required keyword-only argument: 'order_by'$" - context = ( - pytest.raises(TypeError, match=msg) - if isinstance(frame, nw.LazyFrame) - else does_not_raise() - ) - - with context: + if isinstance(frame, nw.LazyFrame): + with pytest.raises(TypeError, match=msg): + frame.with_row_index() # type: ignore[call-arg] + else: result = frame.with_row_index() - - expected = {"index": [0, 1], **data} - assert_equal_data(result, expected) + assert_equal_data(result, {"index": [0, 1], **data}) diff --git a/tests/preserve_pandas_like_columns_name_attr_test.py b/tests/preserve_pandas_like_columns_name_attr_test.py index cc7195e688..3127040bee 100644 --- a/tests/preserve_pandas_like_columns_name_attr_test.py +++ b/tests/preserve_pandas_like_columns_name_attr_test.py @@ -1,17 +1,17 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Callable import pytest import narwhals as nw if TYPE_CHECKING: - from tests.utils import Constructor + import pandas as pd def test_ops_preserve_column_index_name( - constructor: Constructor, request: pytest.FixtureRequest + constructor: Callable[..., pd.DataFrame], request: pytest.FixtureRequest ) -> None: if not any(x in str(constructor) for x in ("pandas", "modin", "cudf", "dask")): pytest.skip( @@ -23,11 +23,11 @@ def test_ops_preserve_column_index_name( data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8.0, 9.0]} df_native = constructor(data) - df_native.columns.name = "foo" # type: ignore[union-attr] + df_native.columns.name = "foo" df = nw.from_native(df_native) result = df.with_columns(b=nw.col("a") + 1, c=nw.col("a") * 2).select("c", "b") - assert result.to_native().columns.name == "foo" # type: ignore[union-attr] + assert result.to_native().columns.name == "foo" assert result.lazy().collect(backend="pandas").to_native().columns.name == "foo" diff --git a/tests/tpch_q1_test.py b/tests/tpch_q1_test.py index d8b9882a61..97a3fa0dcc 100644 --- a/tests/tpch_q1_test.py +++ b/tests/tpch_q1_test.py @@ -105,7 +105,7 @@ def test_q1_w_generic_funcs(library: str) -> None: if library == "pandas" and PANDAS_VERSION < (1, 5): pytest.skip() elif library == "pandas": - df_raw: IntoFrame = pd.read_csv("tests/data/lineitem.csv") + df_raw: pd.DataFrame | pl.DataFrame = pd.read_csv("tests/data/lineitem.csv") else: pytest.importorskip("polars") import polars as pl diff --git a/tests/translate/from_native_test.py b/tests/translate/from_native_test.py index 8a5c568190..c0676b6872 100644 --- a/tests/translate/from_native_test.py +++ b/tests/translate/from_native_test.py @@ -301,7 +301,7 @@ def __dataframe__(self) -> None: # pragma: no cover result = nw.from_native(mockdf, pass_through=True) assert result is mockdf with pytest.raises(TypeError): - nw.from_native(mockdf) + nw.from_native(mockdf) # type: ignore[call-overload] def test_from_native_strict_native_series() -> None: diff --git a/tests/utils.py b/tests/utils.py index 57ff53cd0b..cb0fba5634 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -20,7 +20,7 @@ from typing_extensions import TypeAlias - from narwhals.typing import DataFrameLike, Frame, NativeFrame, NativeLazyFrame + from narwhals.typing import Frame, NativeDataFrame, NativeLazyFrame def get_module_version_as_tuple(module_name: str) -> tuple[int, ...]: @@ -40,8 +40,8 @@ def get_module_version_as_tuple(module_name: str) -> tuple[int, ...]: PYSPARK_VERSION: tuple[int, ...] = get_module_version_as_tuple("pyspark") CUDF_VERSION: tuple[int, ...] = get_module_version_as_tuple("cudf") -Constructor: TypeAlias = Callable[[Any], "NativeLazyFrame | NativeFrame | DataFrameLike"] -ConstructorEager: TypeAlias = Callable[[Any], "NativeFrame | DataFrameLike"] +Constructor: TypeAlias = Callable[[Any], "NativeLazyFrame | NativeDataFrame"] +ConstructorEager: TypeAlias = Callable[[Any], "NativeDataFrame"] ConstructorLazy: TypeAlias = Callable[[Any], "NativeLazyFrame"] diff --git a/tests/v1_test.py b/tests/v1_test.py index 04ec3a9e20..661fca8ae1 100644 --- a/tests/v1_test.py +++ b/tests/v1_test.py @@ -475,8 +475,8 @@ def test_renamed_taxicab_norm_dataframe() -> None: result = nw_v1.from_native(pa.table({"a": [1, 2, 3, -4, 5]}))._l1_norm() expected = {"a": [15]} assert_equal_data(result, expected) - result = nw_v1.from_native(pa.table({"a": [1, 2, 3, -4, 5]})).lazy()._l1_norm() - assert_equal_data(result, expected) + result_lazy = nw_v1.from_native(pa.table({"a": [1, 2, 3, -4, 5]})).lazy()._l1_norm() + assert_equal_data(result_lazy, expected) def test_renamed_taxicab_norm_dataframe_narwhalify() -> None: @@ -579,16 +579,11 @@ def test_dataframe_recursive_v1() -> None: if TYPE_CHECKING: assert_type(pl_frame, pl.DataFrame) - assert_type( - nw_frame, "nw_v1.DataFrame[pl.DataFrame] | nw_v1.LazyFrame[pl.DataFrame]" - ) + assert_type(nw_frame, "nw_v1.DataFrame[pl.DataFrame]") nw_frame_depth_2 = nw_v1.DataFrame(nw_frame, level="full") # type: ignore[var-annotated] assert_type(nw_frame_depth_2, nw_v1.DataFrame[Any]) # NOTE: Checking that the type is `DataFrame[Unknown]` - assert_type( - nw_frame_early_return, - "nw_v1.DataFrame[pl.DataFrame] | nw_v1.LazyFrame[pl.DataFrame]", - ) + assert_type(nw_frame_early_return, "nw_v1.DataFrame[pl.DataFrame]") def test_lazyframe_recursive_v1() -> None: