Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
59 commits
Select commit Hold shift + click to select a range
54d39ee
feat: Add `Schema.from_arrow`
dangotbanned Aug 2, 2025
8b3773a
feat: Add `Schema.from_polars`
dangotbanned Aug 2, 2025
30932b9
feat: Add `Schema.from_pandas`
dangotbanned Aug 2, 2025
c6d41d4
test: Add `test_schema_from_polars`
dangotbanned Aug 2, 2025
0188904
test: add `test_schema_from_arrow`
dangotbanned Aug 6, 2025
0c5b7d5
test: wip `test_schema_from_pandas`
dangotbanned Aug 6, 2025
451ba84
Merge remote-tracking branch 'upstream/main' into schema-from
dangotbanned Aug 6, 2025
99bc1d6
be quiet mypy
dangotbanned Aug 6, 2025
a9e28bf
Merge remote-tracking branch 'upstream/main' into schema-from
dangotbanned Aug 7, 2025
ac376ac
test: fill out pandas tests
dangotbanned Aug 7, 2025
f694136
test: Branch time unit for pandas nightly
dangotbanned Aug 7, 2025
0b4c05d
feat: Add dtype/schema guards
dangotbanned Aug 7, 2025
eff9049
feat: Add `Schema.from_native`
dangotbanned Aug 8, 2025
9cb6ae2
Merge branch 'main' into schema-from
dangotbanned Aug 8, 2025
536566c
test: `from_native` polars coverage
dangotbanned Aug 8, 2025
33bf1b6
Merge branch 'main' into schema-from
dangotbanned Aug 8, 2025
f546a23
test: `from_native` pyarrow coverage
dangotbanned Aug 8, 2025
a2d615f
test: error coverage
dangotbanned Aug 9, 2025
6bd8085
feat(DRAFT): Re-do pandas-like
dangotbanned Aug 9, 2025
ed12a1c
test: Move some of pandas-like to conftest
dangotbanned Aug 9, 2025
6760edf
cov
dangotbanned Aug 9, 2025
89df6dd
migrate some tests, remove old constructor
dangotbanned Aug 9, 2025
c5869be
more pandas-like, fix missing `np.dtype`
dangotbanned Aug 9, 2025
0a19c61
reorder methods, add to api ref
dangotbanned Aug 9, 2025
a0d0c32
ci: add `Schema` to `check_api_reference`
dangotbanned Aug 9, 2025
228f42a
test: do a fancy py->pl->nw->pa->nw->pl->nw->pl->py
dangotbanned Aug 9, 2025
6b1a03f
Merge branch 'main' into schema-from
dangotbanned Aug 9, 2025
96813ea
refactor: organize imports
dangotbanned Aug 10, 2025
b204f45
docs: Add non-example docs
dangotbanned Aug 10, 2025
9f07071
feat: Allow empty mapping
dangotbanned Aug 10, 2025
12e6389
chore(typing): ignore mypy for now
dangotbanned Aug 10, 2025
cf613b7
docs: Add examples (excl cudf)
dangotbanned Aug 10, 2025
af6baf1
Merge branch 'main' into schema-from
dangotbanned Aug 11, 2025
51f34ee
Merge branch 'main' into schema-from
dangotbanned Aug 12, 2025
d5bb7b9
Merge branch 'main' into schema-from
dangotbanned Aug 13, 2025
ec71058
Merge branch 'main' into schema-from
dangotbanned Aug 13, 2025
ef107aa
revert: remove `from_{cudf,modin,pandas}`
dangotbanned Aug 13, 2025
87894ef
docs: remove returns sections πŸ₯³
dangotbanned Aug 13, 2025
24c740c
Merge remote-tracking branch 'upstream/main' into schema-from
dangotbanned Aug 13, 2025
99e4ffd
Merge branch 'main' into schema-from
dangotbanned Aug 14, 2025
e227bda
Merge branch 'main' into schema-from
dangotbanned Aug 15, 2025
4966868
Merge branch 'main' into schema-from
dangotbanned Aug 15, 2025
c3ee75b
Merge branch 'main' into schema-from
dangotbanned Aug 17, 2025
41353bc
Merge branch 'main' into schema-from
dangotbanned Aug 18, 2025
9d89527
Merge remote-tracking branch 'upstream/main' into schema-from
dangotbanned Aug 19, 2025
5059f5d
Merge branch 'main' into schema-from
dangotbanned Aug 19, 2025
ff8b24c
Merge branch 'main' into schema-from
dangotbanned Aug 20, 2025
20b5c2d
Merge branch 'main' into schema-from
dangotbanned Aug 20, 2025
fc23471
Merge remote-tracking branch 'upstream/main' into schema-from
dangotbanned Aug 21, 2025
f61c750
Merge branch 'main' into schema-from
dangotbanned Aug 23, 2025
06a4ef9
refactor: Pass `()` to `infer` to be more explicit
dangotbanned Aug 23, 2025
8559d43
Merge remote-tracking branch 'upstream/main' into schema-from
dangotbanned Aug 23, 2025
fe8372f
Merge remote-tracking branch 'upstream/main' into schema-from
dangotbanned Aug 25, 2025
33f67dd
revert: changes to `_pandas_like.utils`
dangotbanned Aug 25, 2025
8ab552e
Merge branch 'main' into schema-from
dangotbanned Aug 25, 2025
50b074b
Merge branch 'main' into schema-from
dangotbanned Aug 27, 2025
a5ac573
Merge remote-tracking branch 'upstream/main' into schema-from
dangotbanned Aug 28, 2025
a6264d1
refactor: Update for (#3038)
dangotbanned Aug 28, 2025
5377373
refactor: why do 2 lines when 1 does the trick?
dangotbanned Aug 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/api-reference/schema.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
members:
- names
- dtypes
- from_arrow
- from_native
- from_pandas_like
- from_polars
- len
- to_arrow
- to_pandas
Expand Down
2 changes: 1 addition & 1 deletion narwhals/_pandas_like/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ def native_to_narwhals_dtype(
# Per conversations with their maintainers, they don't support arbitrary
# objects, so we can just return String.
return version.dtypes.String()
if allow_object: # pragma: no cover
if allow_object:
return object_native_to_narwhals_dtype(None, version, implementation)
msg = (
"Unreachable code, object dtype should be handled separately" # pragma: no cover
Expand Down
36 changes: 36 additions & 0 deletions narwhals/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
IntoDataFrameT,
IntoLazyFrameT,
IntoSeriesT,
PandasLikeDType,
_1DArray,
_1DArrayInt,
_2DArray,
Expand Down Expand Up @@ -306,6 +307,17 @@ def is_polars_series(ser: Any) -> TypeIs[pl.Series]:
return (pl := get_polars()) is not None and isinstance(ser, pl.Series)


def is_polars_schema(obj: Any) -> TypeIs[pl.Schema]:
return (
bool(pl := get_polars()) and hasattr(pl, "Schema") and isinstance(obj, pl.Schema)
)


# NOTE: For `pl.Schema` only instantiated dtypes are expected
def is_polars_data_type(obj: Any) -> TypeIs[pl.DataType]:
return bool(pl := get_polars()) and isinstance(obj, pl.DataType)


def is_pyarrow_chunked_array(ser: Any) -> TypeIs[pa.ChunkedArray[Any]]:
"""Check whether `ser` is a PyArrow ChunkedArray without importing PyArrow.

Expand All @@ -330,6 +342,14 @@ def is_pyarrow_scalar(obj: Any) -> TypeIs[pa.Scalar[Any]]:
return (pa := get_pyarrow()) is not None and isinstance(obj, pa.Scalar)


def is_pyarrow_schema(obj: Any) -> TypeIs[pa.Schema]:
return bool(pa := get_pyarrow()) and isinstance(obj, pa.Schema)


def is_pyarrow_data_type(obj: Any) -> TypeIs[pa.DataType]:
return bool(pa := get_pyarrow()) and isinstance(obj, pa.DataType)


def is_pyspark_dataframe(df: Any) -> TypeIs[pyspark_sql.DataFrame]:
"""Check whether `df` is a PySpark DataFrame without importing PySpark.

Expand Down Expand Up @@ -438,6 +458,22 @@ def is_pandas_like_index(index: Any) -> bool:
) # pragma: no cover


def is_pandas_like_dtype(obj: Any) -> TypeIs[PandasLikeDType]:
return bool(pd := get_pandas()) and isinstance(
obj, (pd.api.extensions.ExtensionDtype, get_numpy().dtype)
)


def is_cudf_dtype(
obj: Any,
) -> TypeIs[pd.api.extensions.ExtensionDtype]: # pragma: no cover
return (
bool(pd := get_pandas())
and isinstance(obj, (pd.api.extensions.ExtensionDtype))
and hasattr(obj, "to_arrow")
)


def is_into_series(native_series: Any | IntoSeriesT) -> TypeIs[IntoSeriesT]:
"""Check whether `native_series` can be converted to a Narwhals Series.

Expand Down
198 changes: 195 additions & 3 deletions narwhals/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,36 @@
from __future__ import annotations

from collections import OrderedDict
from collections.abc import Mapping
from functools import partial
from typing import TYPE_CHECKING, cast

from narwhals._utils import Implementation, Version, zip_strict
from narwhals._utils import Implementation, Version, qualified_type_name, zip_strict
from narwhals.dependencies import (
get_cudf,
is_cudf_dtype,
is_pandas_like_dtype,
is_polars_data_type,
is_polars_schema,
is_pyarrow_data_type,
is_pyarrow_schema,
)

if TYPE_CHECKING:
from collections.abc import Iterable, Mapping
from collections.abc import Iterable
from typing import Any, ClassVar

import polars as pl
import pyarrow as pa
from typing_extensions import Self

from narwhals.dtypes import DType
from narwhals.typing import DTypeBackend
from narwhals.typing import (
DTypeBackend,
IntoArrowSchema,
IntoPandasSchema,
IntoPolarsSchema,
)


__all__ = ["Schema"]
Expand Down Expand Up @@ -76,6 +92,150 @@ def len(self) -> int:
"""Get the number of columns in the schema."""
return len(self)

@classmethod
def from_arrow(cls, schema: IntoArrowSchema, /) -> Self:
Comment on lines +95 to +96
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've made this parameter positional-only for flexibility on our end.

I don't feel strongly about keeping it that way, but we should probably rename it to native_schema for consistency elsewhere.
E.g. nw.from_native vs nw.Schema.__init__ distinguish native vs narwhals-levels

"""Construct a Schema from a pyarrow Schema.

Arguments:
schema: A pyarrow Schema or mapping of column names to pyarrow data types.

Examples:
>>> import pyarrow as pa
>>> import narwhals as nw
>>>
>>> mapping = {
... "a": pa.timestamp("us", "UTC"),
... "b": pa.date32(),
... "c": pa.string(),
... "d": pa.uint8(),
... }
>>> native = pa.schema(mapping)
>>>
>>> nw.Schema.from_arrow(native)
Schema({'a': Datetime(time_unit='us', time_zone='UTC'), 'b': Date, 'c': String, 'd': UInt8})

>>> nw.Schema.from_arrow(mapping) == nw.Schema.from_arrow(native)
True
"""
if isinstance(schema, Mapping):
if not schema:
return cls()
import pyarrow as pa # ignore-banned-import

schema = pa.schema(schema)
from narwhals._arrow.utils import native_to_narwhals_dtype

return cls(
(field.name, native_to_narwhals_dtype(field.type, cls._version))
for field in schema
)

@classmethod
def from_pandas_like(cls, schema: IntoPandasSchema, /) -> Self:
"""Construct a Schema from a pandas-like schema representation.

Arguments:
schema: A mapping of column names to pandas-like data types.

Examples:
>>> import numpy as np
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>>
>>> data = {"a": [1], "b": ["a"], "c": [False], "d": [9.2]}
>>> native = pd.DataFrame(data).convert_dtypes().dtypes.to_dict()
>>>
>>> nw.Schema.from_pandas_like(native)
Schema({'a': Int64, 'b': String, 'c': Boolean, 'd': Float64})
>>>
>>> mapping = {
... "a": pd.DatetimeTZDtype("us", "UTC"),
... "b": pd.ArrowDtype(pa.date32()),
... "c": pd.StringDtype("python"),
... "d": np.dtype("uint8"),
... }
>>>
>>> nw.Schema.from_pandas_like(mapping)
Schema({'a': Datetime(time_unit='us', time_zone='UTC'), 'b': Date, 'c': String, 'd': UInt8})
"""
if not schema:
return cls()
impl = (
Implementation.CUDF
if get_cudf() and any(is_cudf_dtype(dtype) for dtype in schema.values())
else Implementation.PANDAS
)
return cls._from_pandas_like(schema, impl)

@classmethod
def from_native(
cls, schema: IntoArrowSchema | IntoPolarsSchema | IntoPandasSchema, /
) -> Self:
"""Construct a Schema from a native schema representation.

Arguments:
schema: A native schema object, or mapping of column names to
*instantiated* native data types.

Examples:
>>> import datetime as dt
>>> import pyarrow as pa
>>> import narwhals as nw
>>>
>>> data = {"a": [1], "b": ["a"], "c": [dt.time(1, 2, 3)], "d": [[2]]}
>>> native = pa.table(data).schema
>>>
>>> nw.Schema.from_native(native)
Schema({'a': Int64, 'b': String, 'c': Time, 'd': List(Int64)})
"""
if is_pyarrow_schema(schema):
return cls.from_arrow(schema)
if is_polars_schema(schema):
return cls.from_polars(schema)
if isinstance(schema, Mapping):
return cls._from_native_mapping(schema) if schema else cls()
msg = (
f"Expected an arrow, polars, or pandas schema, but got "
f"{qualified_type_name(schema)!r}\n\n{schema!r}"
)
raise TypeError(msg)

@classmethod
def from_polars(cls, schema: IntoPolarsSchema, /) -> Self:
"""Construct a Schema from a polars Schema.

Arguments:
schema: A polars Schema or mapping of column names to *instantiated*
polars data types.

Examples:
>>> import polars as pl
>>> import narwhals as nw
>>>
>>> mapping = {
... "a": pl.Datetime(time_zone="UTC"),
... "b": pl.Date(),
... "c": pl.String(),
... "d": pl.UInt8(),
... }
>>> native = pl.Schema(mapping)
>>>
>>> nw.Schema.from_polars(native)
Schema({'a': Datetime(time_unit='us', time_zone='UTC'), 'b': Date, 'c': String, 'd': UInt8})

>>> nw.Schema.from_polars(mapping) == nw.Schema.from_polars(native)
True
"""
if not schema:
return cls()
from narwhals._polars.utils import native_to_narwhals_dtype

return cls(
(name, native_to_narwhals_dtype(dtype, cls._version))
for name, dtype in schema.items()
)

def to_arrow(self) -> pa.Schema:
"""Convert Schema to a pyarrow Schema.

Expand Down Expand Up @@ -171,3 +331,35 @@ def to_polars(self) -> pl.Schema:
if pl_version >= (1, 0, 0)
else cast("pl.Schema", dict(schema))
)

@classmethod
def _from_native_mapping(
cls,
native: Mapping[str, pa.DataType] | Mapping[str, pl.DataType] | IntoPandasSchema,
/,
) -> Self:
first_item = next(iter(native.items()))
first_key, first_dtype = first_item
if is_polars_data_type(first_dtype):
return cls.from_polars(cast("IntoPolarsSchema", native))
if is_pandas_like_dtype(first_dtype):
return cls.from_pandas_like(cast("IntoPandasSchema", native))
if is_pyarrow_data_type(first_dtype):
return cls.from_arrow(cast("IntoArrowSchema", native))
msg = (
f"Expected an arrow, polars, or pandas dtype, but found "
f"`{first_key}: {qualified_type_name(first_dtype)}`\n\n{native!r}"
)
raise TypeError(msg)

@classmethod
def _from_pandas_like(
cls, schema: IntoPandasSchema, implementation: Implementation, /
) -> Self:
from narwhals._pandas_like.utils import native_to_narwhals_dtype

impl = implementation
return cls(
(name, native_to_narwhals_dtype(dtype, cls._version, impl, allow_object=True))
for name, dtype in schema.items()
)
12 changes: 11 additions & 1 deletion narwhals/typing.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
from __future__ import annotations

from collections.abc import Mapping
from typing import TYPE_CHECKING, Any, Literal, Protocol, TypeVar, Union

from narwhals._compliant import CompliantDataFrame, CompliantLazyFrame, CompliantSeries
from narwhals._typing import Backend, EagerAllowed, IntoBackend, LazyAllowed

if TYPE_CHECKING:
import datetime as dt
from collections.abc import Iterable, Mapping, Sequence, Sized
from collections.abc import Iterable, Sequence, Sized
from decimal import Decimal
from types import ModuleType

import numpy as np
import pandas as pd
import polars as pl
import pyarrow as pa
from typing_extensions import TypeAlias

from narwhals import dtypes
Expand Down Expand Up @@ -359,6 +363,8 @@ def Binary(self) -> type[dtypes.Binary]: ...
Into1DArray: TypeAlias = "_1DArray | _NumpyScalar"
"""A 1-dimensional `numpy.ndarray` or scalar that can be converted into one."""

PandasLikeDType: TypeAlias = "pd.api.extensions.ExtensionDtype | np.dtype[Any]"


NumericLiteral: TypeAlias = "int | float | Decimal"
TemporalLiteral: TypeAlias = "dt.date | dt.datetime | dt.time | dt.timedelta"
Expand Down Expand Up @@ -429,6 +435,10 @@ def Binary(self) -> type[dtypes.Binary]: ...
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
"""

IntoArrowSchema: TypeAlias = "pa.Schema | Mapping[str, pa.DataType]"
IntoPolarsSchema: TypeAlias = "pl.Schema | Mapping[str, pl.DataType]"
IntoPandasSchema: TypeAlias = Mapping[str, PandasLikeDType]


# Annotations for `__getitem__` methods
_T = TypeVar("_T")
Expand Down
16 changes: 16 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,10 @@ def ibis_lazy_constructor(obj: Data) -> ibis.Table: # pragma: no cover
}
GPU_CONSTRUCTORS: dict[str, ConstructorEager] = {"cudf": cudf_constructor}

ID_PANDAS_LIKE = frozenset(
("pandas", "pandas[nullable]", "pandas[pyarrow]", "modin", "modin[pyarrow]", "cudf")
)


def pytest_generate_tests(metafunc: pytest.Metafunc) -> None:
if metafunc.config.getoption("all_cpu_constructors"): # pragma: no cover
Expand Down Expand Up @@ -286,6 +290,18 @@ def pytest_generate_tests(metafunc: pytest.Metafunc) -> None:
)
elif "constructor" in metafunc.fixturenames:
metafunc.parametrize("constructor", constructors, ids=constructors_ids)
elif "constructor_pandas_like" in metafunc.fixturenames:
pandas_like_constructors = []
pandas_like_constructors_ids = []
for fn, name in zip(eager_constructors, eager_constructors_ids):
if name in ID_PANDAS_LIKE:
pandas_like_constructors.append(fn)
pandas_like_constructors_ids.append(name)
metafunc.parametrize(
"constructor_pandas_like",
pandas_like_constructors,
ids=pandas_like_constructors_ids,
)


TEST_EAGER_BACKENDS: list[EagerAllowed] = []
Expand Down
Loading
Loading