Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
0a5c5e7
docs: Try removing one inherited doc
dangotbanned Apr 23, 2025
64f6ef9
test: Fix broken test
dangotbanned Apr 23, 2025
931bd33
docs: Remove the other `v1.DataFrame` docs
dangotbanned Apr 23, 2025
e1caa89
test: Fix test on class docstring
dangotbanned Apr 23, 2025
72988f4
cov
dangotbanned Apr 23, 2025
04eaf9b
test: Use `getdoc` on both sides?
dangotbanned Apr 23, 2025
2e0c363
test: Rewrite `test_dataframe_docstrings`
dangotbanned Apr 23, 2025
48a327a
test: Update `Series`, `LazyFrame` tests
dangotbanned Apr 23, 2025
fe0497c
docs: Remove non-deprecated `LazyFrame` docs
dangotbanned Apr 23, 2025
b1a884b
docs: Remove duplicate `Series` docs
dangotbanned Apr 23, 2025
e705fe6
docs: Remove warning from `v1.Expr.sample`
dangotbanned Apr 23, 2025
c9d0cd3
docs: Remove worse `Schema` doc
dangotbanned Apr 23, 2025
a33eb01
docs: Remove duplicate `DType` docs
dangotbanned Apr 24, 2025
a571160
Merge branch 'main' into remove-inherited-docs
dangotbanned Apr 24, 2025
04d360f
Merge branch 'main' into remove-inherited-docs
dangotbanned Apr 24, 2025
19a3e9e
Merge branch 'main' into remove-inherited-docs
dangotbanned Apr 26, 2025
d5d8963
Merge branch 'main' into remove-inherited-docs
dangotbanned Apr 26, 2025
6b5c51f
chore: Add `@inherit_doc`
dangotbanned Apr 27, 2025
9f40f19
Merge branch 'remove-inherited-docs' of https://github.com/narwhals-d…
dangotbanned Apr 27, 2025
433cc0b
test: remove dead code
dangotbanned Apr 27, 2025
a78975a
docs: great
dangotbanned Apr 27, 2025
6165a21
Merge branch 'main' into remove-inherited-docs
dangotbanned Apr 27, 2025
c4c004a
test: cov for `v1.Schema`
dangotbanned Apr 27, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
207 changes: 23 additions & 184 deletions narwhals/stable/v1/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
from narwhals.utils import deprecate_native_namespace
from narwhals.utils import find_stacklevel
from narwhals.utils import generate_temporary_column_name
from narwhals.utils import inherit_doc
from narwhals.utils import is_ordered_categorical
from narwhals.utils import maybe_align_index
from narwhals.utils import maybe_convert_dtypes
Expand All @@ -87,6 +88,7 @@
from types import ModuleType
from typing import Mapping

from typing_extensions import ParamSpec
from typing_extensions import Self
from typing_extensions import TypeVar

Expand All @@ -111,6 +113,8 @@
SeriesT = TypeVar("SeriesT", bound="Series[Any]")
IntoSeriesT = TypeVar("IntoSeriesT", bound="IntoSeries", default=Any)
T = TypeVar("T", default=Any)
P = ParamSpec("P")
R = TypeVar("R")
else:
from typing import TypeVar

Expand All @@ -119,40 +123,20 @@


class DataFrame(NwDataFrame[IntoDataFrameT]):
"""Narwhals DataFrame, backed by a native eager dataframe.

!!! warning
This class is not meant to be instantiated directly - instead:

- If the native object is a eager dataframe from one of the supported
backend (e.g. pandas.DataFrame, polars.DataFrame, pyarrow.Table),
you can use [`narwhals.from_native`][]:
```py
narwhals.from_native(native_dataframe)
narwhals.from_native(native_dataframe, eager_only=True)
```

- If the object is a dictionary of column names and generic sequences mapping
(e.g. `dict[str, list]`), you can create a DataFrame via
[`narwhals.from_dict`][]:
```py
narwhals.from_dict(
data={"a": [1, 2, 3]},
backend=narwhals.get_native_namespace(another_object),
)
```
"""
@inherit_doc(NwDataFrame)
def __init__(self, df: Any, *, level: Literal["full", "lazy", "interchange"]) -> None:
super().__init__(df, level=level)

# We need to override any method which don't return Self so that type
# annotations are correct.

@property
def _series(self) -> type[Series[Any]]:
return Series
return cast("type[Series[Any]]", Series)

@property
def _lazyframe(self) -> type[LazyFrame[Any]]:
return LazyFrame
return cast("type[LazyFrame[Any]]", LazyFrame)

@overload
def __getitem__(self, item: tuple[SingleIndexSelector, SingleColSelector]) -> Any: ...
Expand Down Expand Up @@ -192,32 +176,6 @@ def lazy(
self,
backend: ModuleType | Implementation | str | None = None,
) -> LazyFrame[Any]:
"""Restrict available API methods to lazy-only ones.

If `backend` is specified, then a conversion between different backends
might be triggered.

If a library does not support lazy execution and `backend` is not specified,
then this is will only restrict the API to lazy-only operations. This is useful
if you want to ensure that you write dataframe-agnostic code which all has
the possibility of running entirely lazily.

Arguments:
backend: Which lazy backend collect to. This will be the underlying
backend for the resulting Narwhals LazyFrame. If not specified, and the
given library does not support lazy execution, then this will restrict
the API to lazy-only operations.

`backend` can be specified in various ways:

- As `Implementation.<BACKEND>` with `BACKEND` being `DASK`, `DUCKDB`
or `POLARS`.
- As a string: `"dask"`, `"duckdb"` or `"polars"`
- Directly as a module `dask.dataframe`, `duckdb` or `polars`.

Returns:
A new LazyFrame.
"""
return super().lazy(backend=backend) # type: ignore[return-value]

# Not sure what mypy is complaining about, probably some fancy
Expand All @@ -233,31 +191,12 @@ def to_dict(
def to_dict(
self, *, as_series: bool = True
) -> dict[str, Series[Any]] | dict[str, list[Any]]:
"""Convert DataFrame to a dictionary mapping column name to values.

Arguments:
as_series: If set to true ``True``, then the values are Narwhals Series,
otherwise the values are Any.

Returns:
A mapping from column name to values / Series.
"""
return super().to_dict(as_series=as_series) # type: ignore[return-value]

def is_duplicated(self) -> Series[Any]:
r"""Get a mask of all duplicated rows in this DataFrame.

Returns:
A new Series.
"""
return super().is_duplicated() # type: ignore[return-value]

def is_unique(self) -> Series[Any]:
r"""Get a mask of all unique rows in this DataFrame.

Returns:
A new Series.
"""
return super().is_unique() # type: ignore[return-value]

def _l1_norm(self) -> Self:
Expand All @@ -270,17 +209,9 @@ def _l1_norm(self) -> Self:


class LazyFrame(NwLazyFrame[IntoFrameT]):
"""Narwhals LazyFrame, backed by a native lazyframe.

!!! warning
This class is not meant to be instantiated directly - instead use
[`narwhals.from_native`][] with a native
object that is a lazy dataframe from one of the supported
backend (e.g. polars.LazyFrame, dask_expr._collection.DataFrame):
```py
narwhals.from_native(native_lazyframe)
```
"""
@inherit_doc(NwLazyFrame)
def __init__(self, df: Any, *, level: Literal["full", "lazy", "interchange"]) -> None:
super().__init__(df, level=level)

@property
def _dataframe(self) -> type[DataFrame[Any]]:
Expand Down Expand Up @@ -319,37 +250,6 @@ def collect(
backend: ModuleType | Implementation | str | None = None,
**kwargs: Any,
) -> DataFrame[Any]:
r"""Materialize this LazyFrame into a DataFrame.

As each underlying lazyframe has different arguments to set when materializing
the lazyframe into a dataframe, we allow to pass them as kwargs (see examples
below for how to generalize the specification).

Arguments:
backend: specifies which eager backend collect to. This will be the underlying
backend for the resulting Narwhals DataFrame. If None, then the following
default conversions will be applied:

- `polars.LazyFrame` -> `polars.DataFrame`
- `dask.DataFrame` -> `pandas.DataFrame`
- `duckdb.PyRelation` -> `pyarrow.Table`
- `pyspark.DataFrame` -> `pyarrow.Table`

`backend` can be specified in various ways:

- As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`
or `POLARS`.
- As a string: `"pandas"`, `"pyarrow"` or `"polars"`
- Directly as a module `pandas`, `pyarrow` or `polars`.
kwargs: backend specific kwargs to pass along. To know more please check the
backend specific documentation:

- [polars.LazyFrame.collect](https://docs.pola.rs/api/python/dev/reference/lazyframe/api/polars.LazyFrame.collect.html)
- [dask.dataframe.DataFrame.compute](https://docs.dask.org/en/stable/generated/dask.dataframe.DataFrame.compute.html)

Returns:
DataFrame
"""
return super().collect(backend=backend, **kwargs) # type: ignore[return-value]

def _l1_norm(self) -> Self:
Expand Down Expand Up @@ -387,29 +287,11 @@ def gather_every(self, n: int, offset: int = 0) -> Self:


class Series(NwSeries[IntoSeriesT]):
"""Narwhals Series, backed by a native series.

!!! warning
This class is not meant to be instantiated directly - instead:

- If the native object is a series from one of the supported backend (e.g.
pandas.Series, polars.Series, pyarrow.ChunkedArray), you can use
[`narwhals.from_native`][]:
```py
narwhals.from_native(native_series, allow_series=True)
narwhals.from_native(native_series, series_only=True)
```

- If the object is a generic sequence (e.g. a list or a tuple of values), you can
create a series via [`narwhals.new_series`][]:
```py
narwhals.new_series(
name=name,
values=values,
backend=narwhals.get_native_namespace(another_object),
)
```
"""
@inherit_doc(NwSeries)
def __init__(
self, series: Any, *, level: Literal["full", "lazy", "interchange"]
) -> None:
super().__init__(series, level=level)

# We need to override any method which don't return Self so that type
# annotations are correct.
Expand All @@ -419,11 +301,6 @@ def _dataframe(self) -> type[DataFrame[Any]]:
return DataFrame

def to_frame(self) -> DataFrame[Any]:
"""Convert to dataframe.

Returns:
A DataFrame containing this Series as a single column.
"""
return super().to_frame() # type: ignore[return-value]

def value_counts(
Expand All @@ -434,21 +311,6 @@ def value_counts(
name: str | None = None,
normalize: bool = False,
) -> DataFrame[Any]:
r"""Count the occurrences of unique values.

Arguments:
sort: Sort the output by count in descending order. If set to False (default),
the order of the output is random.
parallel: Execute the computation in parallel. Used for Polars only.
name: Give the resulting count column a specific name; if `normalize` is True
defaults to "proportion", otherwise defaults to "count".
normalize: If true gives relative frequencies of the unique values

Returns:
A DataFrame with two columns:
- The original values as first column
- Either count or proportion as second column, depending on normalize parameter.
"""
return super().value_counts( # type: ignore[return-value]
sort=sort, parallel=parallel, name=name, normalize=normalize
)
Expand All @@ -460,20 +322,6 @@ def hist(
bin_count: int | None = None,
include_breakpoint: bool = True,
) -> DataFrame[Any]:
"""Bin values into buckets and count their occurrences.

!!! warning
This functionality is considered **unstable**. It may be changed at any point
without it being considered a breaking change.

Arguments:
bins: A monotonically increasing sequence of values.
bin_count: If no bins provided, this will be used to determine the distance of the bins.
include_breakpoint: Include a column that shows the intervals as categories.

Returns:
A new DataFrame containing the counts of values that occur within each passed bin.
"""
from narwhals.exceptions import NarwhalsUnstableWarning
from narwhals.utils import find_stacklevel

Expand Down Expand Up @@ -596,13 +444,6 @@ def sample(
) -> Self:
"""Sample randomly from this expression.

!!! warning
`Expr.sample` is deprecated and will be removed in a future version.
Hint: instead of `df.select(nw.col('a').sample())`, use
`df.select(nw.col('a')).sample()` instead.
Note: this will remain available in `narwhals.stable.v1`.
See [stable api](../backcompat.md/) for more information.

Arguments:
n: Number of items to return. Cannot be used with fraction.
fraction: Fraction of items to return. Cannot be used with n.
Expand All @@ -622,16 +463,14 @@ def sample(


class Schema(NwSchema):
"""Ordered mapping of column names to their data type.

Arguments:
schema: Mapping[str, DType] | Iterable[tuple[str, DType]] | None
The schema definition given by column names and their associated.
*instantiated* Narwhals data type. Accepts a mapping or an iterable of tuples.
"""

_version = Version.V1

@inherit_doc(NwSchema)
def __init__(
self, schema: Mapping[str, DType] | Iterable[tuple[str, DType]] | None = None
) -> None:
super().__init__(schema)


@overload
def _stableify(obj: NwDataFrame[IntoFrameT]) -> DataFrame[IntoFrameT]: ...
Expand Down
32 changes: 15 additions & 17 deletions narwhals/stable/v1/_dtypes.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import annotations

from typing import TYPE_CHECKING

from narwhals.dtypes import Array
from narwhals.dtypes import Binary
from narwhals.dtypes import Boolean
Expand Down Expand Up @@ -35,33 +37,29 @@
from narwhals.dtypes import UInt128
from narwhals.dtypes import Unknown
from narwhals.dtypes import UnsignedIntegerType
from narwhals.utils import inherit_doc

if TYPE_CHECKING:
from datetime import timezone

class Datetime(NwDatetime):
"""Data type representing a calendar date and time of day.
from narwhals.typing import TimeUnit

Arguments:
time_unit: Unit of time. Defaults to `'us'` (microseconds).
time_zone: Time zone string, as defined in zoneinfo (to see valid strings run
`import zoneinfo; zoneinfo.available_timezones()` for a full list).

Notes:
Adapted from [Polars implementation](https://github.com/pola-rs/polars/blob/py-1.7.1/py-polars/polars/datatypes/classes.py#L398-L457)
"""
class Datetime(NwDatetime):
@inherit_doc(NwDatetime)
def __init__(
self, time_unit: TimeUnit = "us", time_zone: str | timezone | None = None
) -> None:
super().__init__(time_unit, time_zone)

def __hash__(self) -> int:
return hash(self.__class__)


class Duration(NwDuration):
"""Data type representing a time duration.

Arguments:
time_unit: Unit of time. Defaults to `'us'` (microseconds).

Notes:
Adapted from [Polars implementation](https://github.com/pola-rs/polars/blob/py-1.7.1/py-polars/polars/datatypes/classes.py#L460-L502)
"""
@inherit_doc(NwDuration)
def __init__(self, time_unit: TimeUnit = "us") -> None:
super().__init__(time_unit)

def __hash__(self) -> int:
return hash(self.__class__)
Expand Down
Loading
Loading