diff --git a/docs/api-reference/schema.md b/docs/api-reference/schema.md index e69d403cb5..74c0319ae0 100644 --- a/docs/api-reference/schema.md +++ b/docs/api-reference/schema.md @@ -6,6 +6,10 @@ members: - names - dtypes + - from_arrow + - from_native + - from_pandas_like + - from_polars - len - to_arrow - to_pandas diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index 6dc19e29f2..9e0102b624 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -362,7 +362,7 @@ def native_to_narwhals_dtype( # Per conversations with their maintainers, they don't support arbitrary # objects, so we can just return String. return version.dtypes.String() - if allow_object: # pragma: no cover + if allow_object: return object_native_to_narwhals_dtype(None, version, implementation) msg = ( "Unreachable code, object dtype should be handled separately" # pragma: no cover diff --git a/narwhals/dependencies.py b/narwhals/dependencies.py index 9384c91446..caef82fc53 100644 --- a/narwhals/dependencies.py +++ b/narwhals/dependencies.py @@ -28,6 +28,7 @@ IntoDataFrameT, IntoLazyFrameT, IntoSeriesT, + PandasLikeDType, _1DArray, _1DArrayInt, _2DArray, @@ -306,6 +307,17 @@ def is_polars_series(ser: Any) -> TypeIs[pl.Series]: return (pl := get_polars()) is not None and isinstance(ser, pl.Series) +def is_polars_schema(obj: Any) -> TypeIs[pl.Schema]: + return ( + bool(pl := get_polars()) and hasattr(pl, "Schema") and isinstance(obj, pl.Schema) + ) + + +# NOTE: For `pl.Schema` only instantiated dtypes are expected +def is_polars_data_type(obj: Any) -> TypeIs[pl.DataType]: + return bool(pl := get_polars()) and isinstance(obj, pl.DataType) + + def is_pyarrow_chunked_array(ser: Any) -> TypeIs[pa.ChunkedArray[Any]]: """Check whether `ser` is a PyArrow ChunkedArray without importing PyArrow. @@ -330,6 +342,14 @@ def is_pyarrow_scalar(obj: Any) -> TypeIs[pa.Scalar[Any]]: return (pa := get_pyarrow()) is not None and isinstance(obj, pa.Scalar) +def is_pyarrow_schema(obj: Any) -> TypeIs[pa.Schema]: + return bool(pa := get_pyarrow()) and isinstance(obj, pa.Schema) + + +def is_pyarrow_data_type(obj: Any) -> TypeIs[pa.DataType]: + return bool(pa := get_pyarrow()) and isinstance(obj, pa.DataType) + + def is_pyspark_dataframe(df: Any) -> TypeIs[pyspark_sql.DataFrame]: """Check whether `df` is a PySpark DataFrame without importing PySpark. @@ -438,6 +458,22 @@ def is_pandas_like_index(index: Any) -> bool: ) # pragma: no cover +def is_pandas_like_dtype(obj: Any) -> TypeIs[PandasLikeDType]: + return bool(pd := get_pandas()) and isinstance( + obj, (pd.api.extensions.ExtensionDtype, get_numpy().dtype) + ) + + +def is_cudf_dtype( + obj: Any, +) -> TypeIs[pd.api.extensions.ExtensionDtype]: # pragma: no cover + return ( + bool(pd := get_pandas()) + and isinstance(obj, (pd.api.extensions.ExtensionDtype)) + and hasattr(obj, "to_arrow") + ) + + def is_into_series(native_series: Any | IntoSeriesT) -> TypeIs[IntoSeriesT]: """Check whether `native_series` can be converted to a Narwhals Series. diff --git a/narwhals/schema.py b/narwhals/schema.py index a34379983d..b459be68fa 100644 --- a/narwhals/schema.py +++ b/narwhals/schema.py @@ -7,20 +7,36 @@ from __future__ import annotations from collections import OrderedDict +from collections.abc import Mapping from functools import partial from typing import TYPE_CHECKING, cast -from narwhals._utils import Implementation, Version, zip_strict +from narwhals._utils import Implementation, Version, qualified_type_name, zip_strict +from narwhals.dependencies import ( + get_cudf, + is_cudf_dtype, + is_pandas_like_dtype, + is_polars_data_type, + is_polars_schema, + is_pyarrow_data_type, + is_pyarrow_schema, +) if TYPE_CHECKING: - from collections.abc import Iterable, Mapping + from collections.abc import Iterable from typing import Any, ClassVar import polars as pl import pyarrow as pa + from typing_extensions import Self from narwhals.dtypes import DType - from narwhals.typing import DTypeBackend + from narwhals.typing import ( + DTypeBackend, + IntoArrowSchema, + IntoPandasSchema, + IntoPolarsSchema, + ) __all__ = ["Schema"] @@ -76,6 +92,150 @@ def len(self) -> int: """Get the number of columns in the schema.""" return len(self) + @classmethod + def from_arrow(cls, schema: IntoArrowSchema, /) -> Self: + """Construct a Schema from a pyarrow Schema. + + Arguments: + schema: A pyarrow Schema or mapping of column names to pyarrow data types. + + Examples: + >>> import pyarrow as pa + >>> import narwhals as nw + >>> + >>> mapping = { + ... "a": pa.timestamp("us", "UTC"), + ... "b": pa.date32(), + ... "c": pa.string(), + ... "d": pa.uint8(), + ... } + >>> native = pa.schema(mapping) + >>> + >>> nw.Schema.from_arrow(native) + Schema({'a': Datetime(time_unit='us', time_zone='UTC'), 'b': Date, 'c': String, 'd': UInt8}) + + >>> nw.Schema.from_arrow(mapping) == nw.Schema.from_arrow(native) + True + """ + if isinstance(schema, Mapping): + if not schema: + return cls() + import pyarrow as pa # ignore-banned-import + + schema = pa.schema(schema) + from narwhals._arrow.utils import native_to_narwhals_dtype + + return cls( + (field.name, native_to_narwhals_dtype(field.type, cls._version)) + for field in schema + ) + + @classmethod + def from_pandas_like(cls, schema: IntoPandasSchema, /) -> Self: + """Construct a Schema from a pandas-like schema representation. + + Arguments: + schema: A mapping of column names to pandas-like data types. + + Examples: + >>> import numpy as np + >>> import pandas as pd + >>> import pyarrow as pa + >>> import narwhals as nw + >>> + >>> data = {"a": [1], "b": ["a"], "c": [False], "d": [9.2]} + >>> native = pd.DataFrame(data).convert_dtypes().dtypes.to_dict() + >>> + >>> nw.Schema.from_pandas_like(native) + Schema({'a': Int64, 'b': String, 'c': Boolean, 'd': Float64}) + >>> + >>> mapping = { + ... "a": pd.DatetimeTZDtype("us", "UTC"), + ... "b": pd.ArrowDtype(pa.date32()), + ... "c": pd.StringDtype("python"), + ... "d": np.dtype("uint8"), + ... } + >>> + >>> nw.Schema.from_pandas_like(mapping) + Schema({'a': Datetime(time_unit='us', time_zone='UTC'), 'b': Date, 'c': String, 'd': UInt8}) + """ + if not schema: + return cls() + impl = ( + Implementation.CUDF + if get_cudf() and any(is_cudf_dtype(dtype) for dtype in schema.values()) + else Implementation.PANDAS + ) + return cls._from_pandas_like(schema, impl) + + @classmethod + def from_native( + cls, schema: IntoArrowSchema | IntoPolarsSchema | IntoPandasSchema, / + ) -> Self: + """Construct a Schema from a native schema representation. + + Arguments: + schema: A native schema object, or mapping of column names to + *instantiated* native data types. + + Examples: + >>> import datetime as dt + >>> import pyarrow as pa + >>> import narwhals as nw + >>> + >>> data = {"a": [1], "b": ["a"], "c": [dt.time(1, 2, 3)], "d": [[2]]} + >>> native = pa.table(data).schema + >>> + >>> nw.Schema.from_native(native) + Schema({'a': Int64, 'b': String, 'c': Time, 'd': List(Int64)}) + """ + if is_pyarrow_schema(schema): + return cls.from_arrow(schema) + if is_polars_schema(schema): + return cls.from_polars(schema) + if isinstance(schema, Mapping): + return cls._from_native_mapping(schema) if schema else cls() + msg = ( + f"Expected an arrow, polars, or pandas schema, but got " + f"{qualified_type_name(schema)!r}\n\n{schema!r}" + ) + raise TypeError(msg) + + @classmethod + def from_polars(cls, schema: IntoPolarsSchema, /) -> Self: + """Construct a Schema from a polars Schema. + + Arguments: + schema: A polars Schema or mapping of column names to *instantiated* + polars data types. + + Examples: + >>> import polars as pl + >>> import narwhals as nw + >>> + >>> mapping = { + ... "a": pl.Datetime(time_zone="UTC"), + ... "b": pl.Date(), + ... "c": pl.String(), + ... "d": pl.UInt8(), + ... } + >>> native = pl.Schema(mapping) + >>> + >>> nw.Schema.from_polars(native) + Schema({'a': Datetime(time_unit='us', time_zone='UTC'), 'b': Date, 'c': String, 'd': UInt8}) + + >>> nw.Schema.from_polars(mapping) == nw.Schema.from_polars(native) + True + """ + if not schema: + return cls() + from narwhals._polars.utils import native_to_narwhals_dtype + + return cls( + (name, native_to_narwhals_dtype(dtype, cls._version)) + for name, dtype in schema.items() + ) + def to_arrow(self) -> pa.Schema: """Convert Schema to a pyarrow Schema. @@ -171,3 +331,35 @@ def to_polars(self) -> pl.Schema: if pl_version >= (1, 0, 0) else cast("pl.Schema", dict(schema)) ) + + @classmethod + def _from_native_mapping( + cls, + native: Mapping[str, pa.DataType] | Mapping[str, pl.DataType] | IntoPandasSchema, + /, + ) -> Self: + first_item = next(iter(native.items())) + first_key, first_dtype = first_item + if is_polars_data_type(first_dtype): + return cls.from_polars(cast("IntoPolarsSchema", native)) + if is_pandas_like_dtype(first_dtype): + return cls.from_pandas_like(cast("IntoPandasSchema", native)) + if is_pyarrow_data_type(first_dtype): + return cls.from_arrow(cast("IntoArrowSchema", native)) + msg = ( + f"Expected an arrow, polars, or pandas dtype, but found " + f"`{first_key}: {qualified_type_name(first_dtype)}`\n\n{native!r}" + ) + raise TypeError(msg) + + @classmethod + def _from_pandas_like( + cls, schema: IntoPandasSchema, implementation: Implementation, / + ) -> Self: + from narwhals._pandas_like.utils import native_to_narwhals_dtype + + impl = implementation + return cls( + (name, native_to_narwhals_dtype(dtype, cls._version, impl, allow_object=True)) + for name, dtype in schema.items() + ) diff --git a/narwhals/typing.py b/narwhals/typing.py index de0315cc65..7b3cf43778 100644 --- a/narwhals/typing.py +++ b/narwhals/typing.py @@ -1,5 +1,6 @@ from __future__ import annotations +from collections.abc import Mapping from typing import TYPE_CHECKING, Any, Literal, Protocol, TypeVar, Union from narwhals._compliant import CompliantDataFrame, CompliantLazyFrame, CompliantSeries @@ -7,11 +8,14 @@ if TYPE_CHECKING: import datetime as dt - from collections.abc import Iterable, Mapping, Sequence, Sized + from collections.abc import Iterable, Sequence, Sized from decimal import Decimal from types import ModuleType import numpy as np + import pandas as pd + import polars as pl + import pyarrow as pa from typing_extensions import TypeAlias from narwhals import dtypes @@ -359,6 +363,8 @@ def Binary(self) -> type[dtypes.Binary]: ... Into1DArray: TypeAlias = "_1DArray | _NumpyScalar" """A 1-dimensional `numpy.ndarray` or scalar that can be converted into one.""" +PandasLikeDType: TypeAlias = "pd.api.extensions.ExtensionDtype | np.dtype[Any]" + NumericLiteral: TypeAlias = "int | float | Decimal" TemporalLiteral: TypeAlias = "dt.date | dt.datetime | dt.time | dt.timedelta" @@ -429,6 +435,10 @@ def Binary(self) -> type[dtypes.Binary]: ... └────────────────────────┘ """ +IntoArrowSchema: TypeAlias = "pa.Schema | Mapping[str, pa.DataType]" +IntoPolarsSchema: TypeAlias = "pl.Schema | Mapping[str, pl.DataType]" +IntoPandasSchema: TypeAlias = Mapping[str, PandasLikeDType] + # Annotations for `__getitem__` methods _T = TypeVar("_T") diff --git a/tests/conftest.py b/tests/conftest.py index cbc20a658d..c33099bb76 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -234,6 +234,10 @@ def ibis_lazy_constructor(obj: Data) -> ibis.Table: # pragma: no cover } GPU_CONSTRUCTORS: dict[str, ConstructorEager] = {"cudf": cudf_constructor} +ID_PANDAS_LIKE = frozenset( + ("pandas", "pandas[nullable]", "pandas[pyarrow]", "modin", "modin[pyarrow]", "cudf") +) + def pytest_generate_tests(metafunc: pytest.Metafunc) -> None: if metafunc.config.getoption("all_cpu_constructors"): # pragma: no cover @@ -286,6 +290,18 @@ def pytest_generate_tests(metafunc: pytest.Metafunc) -> None: ) elif "constructor" in metafunc.fixturenames: metafunc.parametrize("constructor", constructors, ids=constructors_ids) + elif "constructor_pandas_like" in metafunc.fixturenames: + pandas_like_constructors = [] + pandas_like_constructors_ids = [] + for fn, name in zip(eager_constructors, eager_constructors_ids): + if name in ID_PANDAS_LIKE: + pandas_like_constructors.append(fn) + pandas_like_constructors_ids.append(name) + metafunc.parametrize( + "constructor_pandas_like", + pandas_like_constructors, + ids=pandas_like_constructors_ids, + ) TEST_EAGER_BACKENDS: list[EagerAllowed] = [] diff --git a/tests/frame/schema_test.py b/tests/frame/schema_test.py index e749d27482..8259c050b1 100644 --- a/tests/frame/schema_test.py +++ b/tests/frame/schema_test.py @@ -1,22 +1,33 @@ from __future__ import annotations import re -from datetime import date, datetime, timedelta, timezone -from typing import TYPE_CHECKING, Any +from datetime import date, datetime, time, timedelta, timezone +from importlib.util import find_spec +from typing import TYPE_CHECKING, Any, Literal import pandas as pd import pytest import narwhals as nw from narwhals.exceptions import PerformanceWarning -from tests.utils import PANDAS_VERSION +from tests.utils import PANDAS_VERSION, POLARS_VERSION, ConstructorPandasLike if TYPE_CHECKING: - from collections.abc import Sequence + from collections.abc import Callable, Sequence - from narwhals.typing import DTypeBackend + import polars as pl + from typing_extensions import TypeAlias + + from narwhals.typing import ( + DTypeBackend, + IntoArrowSchema, + IntoPandasSchema, + IntoPolarsSchema, + ) from tests.utils import Constructor, ConstructorEager + TimeUnit: TypeAlias = Literal["ns", "us"] + data = {"a": [datetime(2020, 1, 1)], "b": [datetime(2020, 1, 1, tzinfo=timezone.utc)]} @@ -416,3 +427,251 @@ def test_schema_to_pandas_invalid() -> None: msg = "Expected one of {None, 'pyarrow', 'numpy_nullable'}, got: 'cabbage'" with pytest.raises(ValueError, match=msg): schema.to_pandas("cabbage") # type: ignore[arg-type] + + +@pytest.fixture(scope="session") +def time_unit() -> TimeUnit: + """Backcompat for `pandas>=3` breaking change. + + https://pandas.pydata.org/docs/dev/whatsnew/v3.0.0.html#datetime-resolution-inference + """ + return "us" if PANDAS_VERSION >= (3,) else "ns" + + +def _polars_schema() -> Sequence[type[pl.Schema | dict[str, pl.DataType]]]: + # helper so we can parametrize both if available + if find_spec("polars"): + import polars as pl + + if POLARS_VERSION >= (1,): + return (pl.Schema, dict) + return (dict,) # pragma: no cover + + +def _arrow_schema() -> Sequence[Callable[..., IntoArrowSchema]]: + if find_spec("pyarrow"): + import pyarrow as pa + + return (pa.schema, dict) + return (dict,) # pragma: no cover + + +@pytest.fixture(scope="session", params=_polars_schema()) +def polars_schema_constructor( + request: pytest.FixtureRequest, +) -> type[pl.Schema | dict[str, pl.DataType]]: + pytest.importorskip("polars") + return request.param # type: ignore[no-any-return] + + +@pytest.fixture(scope="session", params=_arrow_schema()) +def arrow_schema_constructor( + request: pytest.FixtureRequest, +) -> Callable[..., IntoArrowSchema]: + pytest.importorskip("pyarrow") + return request.param # type: ignore[no-any-return] + + +@pytest.fixture +def target_narwhals(time_unit: TimeUnit) -> nw.Schema: + return nw.Schema( + { + "a": nw.Int64(), + "b": nw.String(), + "c": nw.Boolean(), + "d": nw.Float64(), + "e": nw.Datetime(time_unit), + "f": nw.Date(), + "g": nw.Time(), + } + ) + + +@pytest.fixture +def target_narwhals_pandas(time_unit: TimeUnit) -> nw.Schema: + return nw.Schema( + { + "a": nw.Int64(), + "b": nw.String(), + "c": nw.Boolean(), + "d": nw.Float64(), + "e": nw.Datetime(time_unit), + } + ) + + +@pytest.fixture +def origin_polars( + polars_schema_constructor: type[pl.Schema | dict[str, pl.DataType]], + time_unit: TimeUnit, +) -> IntoPolarsSchema: + pytest.importorskip("polars") + import polars as pl + + return polars_schema_constructor( + { + "a": pl.Int64(), + "b": pl.String(), + "c": pl.Boolean(), + "d": pl.Float64(), + "e": pl.Datetime(time_unit), + "f": pl.Date(), + "g": pl.Time(), + } + ) + + +@pytest.fixture +def origin_arrow( + arrow_schema_constructor: Callable[..., IntoArrowSchema], time_unit: TimeUnit +) -> IntoArrowSchema: + pytest.importorskip("pyarrow") + import pyarrow as pa + + return arrow_schema_constructor( + { + "a": pa.int64(), + "b": pa.string(), + "c": pa.bool_(), + "d": pa.float64(), + "e": pa.timestamp(time_unit), + "f": pa.date32(), + "g": pa.time64("ns"), + } + ) + + +@pytest.fixture +def origin_pandas_like( + constructor_pandas_like: ConstructorPandasLike, +) -> IntoPandasSchema: + data: dict[str, Any] = { + "a": [2, 1], + "b": ["hello", "hi"], + "c": [False, True], + "d": [5.3, 4.99], + "e": [datetime(2006, 1, 1), datetime(2001, 9, 3)], + } + return constructor_pandas_like(data).dtypes.to_dict() + + +@pytest.fixture +def origin_pandas_like_pyarrow( + constructor_pandas_like: ConstructorPandasLike, +) -> IntoPandasSchema: + if PANDAS_VERSION < (1, 5): + pytest.skip(reason="pandas too old for `pyarrow`") + name_pandas_like = {"pandas_pyarrow_constructor", "modin_pyarrow_constructor"} + if constructor_pandas_like.__name__ not in name_pandas_like: + pytest.skip(f"{constructor_pandas_like.__name__!r} is not pandas_like_pyarrow") + data = { + "a": [2, 1], + "b": ["hello", "hi"], + "c": [False, True], + "d": [1.2, 3.4], + "e": [datetime(2003, 1, 1), datetime(2004, 1, 1)], + "f": [date(2003, 1, 1), date(2004, 1, 1)], + "g": [time(10, 1, 1), time(14, 1, 1)], + } + df_pd = constructor_pandas_like(data) + df_nw = nw.from_native(df_pd).with_columns( + nw.col("f").cast(nw.Date()), nw.col("g").cast(nw.Time()) + ) + return df_nw.to_native().dtypes.to_dict() + + +def test_schema_from_polars( + origin_polars: IntoPolarsSchema, target_narwhals: nw.Schema +) -> None: + from_polars = nw.Schema.from_polars(origin_polars) + from_native = nw.Schema.from_native(origin_polars) + assert from_polars == target_narwhals + assert from_native == target_narwhals + assert from_native == from_polars + + +def test_schema_from_arrow( + origin_arrow: IntoArrowSchema, target_narwhals: nw.Schema +) -> None: + from_arrow = nw.Schema.from_arrow(origin_arrow) + from_native = nw.Schema.from_native(origin_arrow) + assert from_arrow == target_narwhals + assert from_native == target_narwhals + assert from_native == from_arrow + + +def test_schema_from_pandas_like( + origin_pandas_like: IntoPandasSchema, target_narwhals_pandas: nw.Schema +) -> None: + from_pandas = nw.Schema.from_pandas_like(origin_pandas_like) + from_native = nw.Schema.from_native(origin_pandas_like) + assert from_pandas == target_narwhals_pandas + assert from_native == target_narwhals_pandas + assert from_native == from_pandas + + +def test_schema_from_pandas_like_pyarrow( + origin_pandas_like_pyarrow: IntoPandasSchema, target_narwhals: nw.Schema +) -> None: + from_pandas = nw.Schema.from_pandas_like(origin_pandas_like_pyarrow) + from_native = nw.Schema.from_native(origin_pandas_like_pyarrow) + assert from_pandas == target_narwhals + assert from_native == target_narwhals + assert from_native == from_pandas + + +def test_schema_from_invalid() -> None: + flags = re.DOTALL | re.IGNORECASE + + with pytest.raises( + TypeError, match=re.compile(r"expected.+schema.+got.+list.+a.+string", flags) + ): + nw.Schema.from_native([("a", nw.String())]) # type: ignore[arg-type] + with pytest.raises( + TypeError, match=re.compile(r"expected.+dtype.+found.+`a: str`", flags) + ): + nw.Schema.from_native({"a": str}) # type: ignore[arg-type] + with pytest.raises( + TypeError, + match=re.compile(r"expected.+dtype.+found.+`a: narwhals.+Int64`.+Schema", flags), + ): + nw.Schema.from_native(nw.Schema({"a": nw.Int64()})) # type: ignore[arg-type] + + +def test_schema_from_empty_mapping() -> None: + # NOTE: Should never require importing a native package + expected = nw.Schema() + assert nw.Schema.from_native({}) == expected + assert nw.Schema.from_arrow({}) == expected + assert nw.Schema.from_pandas_like({}) == expected + assert nw.Schema.from_polars({}) == expected + + +@pytest.mark.skipif( + POLARS_VERSION < (1, 6, 0), reason="https://github.com/pola-rs/polars/pull/18308" +) +def test_schema_from_to_roundtrip() -> None: + pytest.importorskip("polars") + pytest.importorskip("pyarrow") + import polars as pl + + py_schema_1 = { + "a": int, + "b": str, + "c": bool, + "d": float, + "e": datetime, + "f": date, + "g": time, + } + pl_schema_1 = pl.Schema(py_schema_1) + nw_schema_1 = nw.Schema.from_native(pl_schema_1) + pa_schema_1 = nw_schema_1.to_arrow() + nw_schema_2 = nw.Schema.from_native(pa_schema_1) + pl_schema_2 = nw_schema_2.to_polars() + nw_schema_3 = nw.Schema.from_native(pl_schema_2) + py_schema_2 = nw_schema_3.to_polars().to_python() + assert pl_schema_1 == pl_schema_2 + assert nw_schema_1 == nw_schema_2 + assert nw_schema_2 == nw_schema_3 + assert py_schema_1 == py_schema_2 diff --git a/tests/utils.py b/tests/utils.py index 65e00142cc..1af14f98e0 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -45,6 +45,7 @@ def get_module_version_as_tuple(module_name: str) -> tuple[int, ...]: Constructor: TypeAlias = Callable[[Any], "NativeLazyFrame | NativeDataFrame"] ConstructorEager: TypeAlias = Callable[[Any], "NativeDataFrame"] ConstructorLazy: TypeAlias = Callable[[Any], "NativeLazyFrame"] +ConstructorPandasLike: TypeAlias = Callable[[Any], "pd.DataFrame"] def _to_comparable_list(column_values: Any) -> Any: diff --git a/utils/check_api_reference.py b/utils/check_api_reference.py index efabd53f8f..01c5ae889e 100644 --- a/utils/check_api_reference.py +++ b/utils/check_api_reference.py @@ -219,6 +219,18 @@ def read_documented_members(source: str | Path) -> list[str]: print(extra) # noqa: T201 ret = 1 +# Schema +schema_methods = list(iter_api_reference_names(nw.Schema)) +documented = read_documented_members(DIR_API_REF / "schema.md") +if missing := set(schema_methods).difference(documented): + print("Schema: not documented") # noqa: T201 + print(missing) # noqa: T201 + ret = 1 +if extra := set(documented).difference(schema_methods): + print("Schema: outdated") # noqa: T201 + print(extra) # noqa: T201 + ret = 1 + # Check Expr vs Series if missing := set(expr_methods).difference(series_methods).difference(EXPR_ONLY_METHODS): print("In Expr but not in Series") # noqa: T201