Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
592d6fc
add placeholders for from_dicts function
felixgwilliams Sep 20, 2025
9ea6a97
add implementation and unit tests
felixgwilliams Sep 20, 2025
0e49901
add examples to docstrings copied from `from_dict`
felixgwilliams Sep 20, 2025
a411c33
address review comments
felixgwilliams Sep 21, 2025
a734b9b
remove test for deprecated call in from_dicts_test
felixgwilliams Sep 21, 2025
01bb262
add v1 and v2 tests for from_dicts
felixgwilliams Sep 21, 2025
77b3921
refactor: Skip unreachable `Unknown`
dangotbanned Sep 21, 2025
076a821
change data type on from_dicts to Sequence[Mapping[str, Any]] and add…
felixgwilliams Sep 21, 2025
1dde847
use `types.MappingProxyType` instead of custom type to test non-dict …
felixgwilliams Sep 21, 2025
130427d
Merge branch 'main' into feat/from_dicts
FBruzzesi Sep 21, 2025
697bb66
handle mappings as dicts in from_dicts for polars <1.30
felixgwilliams Sep 23, 2025
cbbdc5d
docs: Move compat info to `FROM_DICTS_ACCEPTS_MAPPINGS` flag
dangotbanned Sep 24, 2025
3b5ff0c
refactor(polars): Simplify empty cases
dangotbanned Sep 24, 2025
06b0482
Update narwhals/_polars/dataframe.py
felixgwilliams Sep 24, 2025
5e1164d
revert: Avoid introducing deprecated `native_namespace`
dangotbanned Sep 24, 2025
3a2d4a3
refactor(typing): Use `IntoSchema` everywhere
dangotbanned Sep 24, 2025
c3e5936
docs: Remove *Returns* section
dangotbanned Sep 24, 2025
1b37317
refactor(suggestion): Make stable functions, aliases
dangotbanned Sep 24, 2025
e0cf86d
test: Merge `dict` vs `Mapping` cases
dangotbanned Sep 24, 2025
4154c81
test: Use `assert_equal_data`
dangotbanned Sep 24, 2025
9b12199
add a more distinct example to from_dicts docstring
felixgwilliams Sep 24, 2025
6603bf5
Apply suggestions from code review
felixgwilliams Sep 25, 2025
868b488
tests: test to highlight how eager backends deal with inconsistent di…
felixgwilliams Sep 25, 2025
8de7904
Merge branch 'main' into feat/from_dicts
dangotbanned Sep 29, 2025
f2b45b6
Apply suggestions from code review
felixgwilliams Sep 29, 2025
a2e3d80
add tip on non-uniform keys to nw.from_dicts
felixgwilliams Sep 29, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/api-reference/dataframe.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
- filter
- from_arrow
- from_dict
- from_dicts
- from_numpy
- gather_every
- get_column
Expand Down
1 change: 1 addition & 0 deletions docs/api-reference/narwhals.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Here are the top-level functions available in Narwhals.
- exclude
- from_arrow
- from_dict
- from_dicts
- from_native
- from_numpy
- generate_temporary_column_name
Expand Down
2 changes: 2 additions & 0 deletions narwhals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
exclude,
from_arrow,
from_dict,
from_dicts,
from_numpy,
len_ as len,
lit,
Expand Down Expand Up @@ -137,6 +138,7 @@
"exclude",
"from_arrow",
"from_dict",
"from_dicts",
"from_native",
"from_numpy",
"generate_temporary_column_name",
Expand Down
18 changes: 18 additions & 0 deletions narwhals/_arrow/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,24 @@ def from_dict(
native = pa.Table.from_pydict(data, schema=pa_schema)
return cls.from_native(native, context=context)

@classmethod
def from_dicts(
cls,
data: Sequence[Mapping[str, Any]],
/,
*,
context: _LimitedContext,
schema: IntoSchema | None,
) -> Self:
from narwhals.schema import Schema

pa_schema = Schema(schema).to_arrow() if schema is not None else schema
if pa_schema and not data:
native = pa_schema.empty_table()
else:
native = pa.Table.from_pylist(data, schema=pa_schema)
return cls.from_native(native, context=context)

@staticmethod
def _is_native(obj: pa.Table | Any) -> TypeIs[pa.Table]:
return isinstance(obj, pa.Table)
Expand Down
9 changes: 9 additions & 0 deletions narwhals/_compliant/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,15 @@ def from_dict(
schema: IntoSchema | None,
) -> Self: ...
@classmethod
def from_dicts(
cls,
data: Sequence[Mapping[str, Any]],
/,
*,
context: _LimitedContext,
schema: IntoSchema | None,
) -> Self: ...
@classmethod
def from_numpy(
cls,
data: _2DArray,
Expand Down
25 changes: 25 additions & 0 deletions narwhals/_pandas_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,31 @@ def from_dict(
native = native.astype(Schema(schema).to_pandas(backend))
return cls.from_native(native, context=context)

@classmethod
def from_dicts(
cls,
data: Sequence[Mapping[str, Any]],
/,
*,
context: _LimitedContext,
schema: IntoSchema | None,
) -> Self:
from narwhals.schema import Schema

implementation = context._implementation
ns = implementation.to_native_namespace()
DataFrame = cast("type[pd.DataFrame]", ns.DataFrame)
if data or not schema:
native = DataFrame.from_records(data)
else:
native = DataFrame.from_dict({col: [] for col in schema})
if schema:
backend: Iterable[DTypeBackend] | None = None
if data:
backend = iter_dtype_backends(native.dtypes, implementation)
native = native.astype(Schema(schema).to_pandas(backend))
return cls.from_native(native, context=context)

@staticmethod
def _is_native(obj: Any) -> TypeIs[Any]:
return is_pandas_like_dataframe(obj) # pragma: no cover
Expand Down
25 changes: 25 additions & 0 deletions narwhals/_polars/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from narwhals._polars.namespace import PolarsNamespace
from narwhals._polars.series import PolarsSeries
from narwhals._polars.utils import (
FROM_DICTS_ACCEPTS_MAPPINGS,
catch_polars_exception,
extract_args_kwargs,
native_to_narwhals_dtype,
Expand Down Expand Up @@ -322,6 +323,30 @@ def from_dict(
pl_schema = Schema(schema).to_polars() if schema is not None else schema
return cls.from_native(pl.from_dict(data, pl_schema), context=context)

@classmethod
def from_dicts(
cls,
data: Sequence[Mapping[str, Any]],
/,
*,
context: _LimitedContext,
schema: IntoSchema | None,
) -> Self:
from narwhals.schema import Schema

pl_schema = Schema(schema).to_polars() if schema is not None else schema
if not data:
native = pl.DataFrame(schema=pl_schema)
elif FROM_DICTS_ACCEPTS_MAPPINGS or isinstance(data[0], dict):
native = pl.from_dicts(data, pl_schema) # type: ignore[arg-type]
else: # pragma: no cover
columns = pl_schema or tuple(data[0])
native = pl.DataFrame(
(tuple(row.values()) for row in data), schema=columns, orient="row"
)

return cls.from_native(native, context=context)

@staticmethod
def _is_native(obj: pl.DataFrame | Any) -> TypeIs[pl.DataFrame]:
return isinstance(obj, pl.DataFrame)
Expand Down
6 changes: 6 additions & 0 deletions narwhals/_polars/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,12 @@
SERIES_ACCEPTS_PD_INDEX: Final[bool] = BACKEND_VERSION >= (0, 20, 7)
"""`pl.Series(values: pd.Index)` fixed in https://github.com/pola-rs/polars/pull/14087"""

FROM_DICTS_ACCEPTS_MAPPINGS: Final[bool] = BACKEND_VERSION >= (1, 30, 0)
"""`pl.from_dicts(data: Iterable[Mapping[str, Any]])` since https://github.com/pola-rs/polars/pull/22638

Typing fix in https://github.com/pola-rs/polars/pull/24584
"""


@overload
def extract_native(obj: _StoresNative[NativeT]) -> NativeT: ...
Expand Down
70 changes: 70 additions & 0 deletions narwhals/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -610,6 +610,76 @@ def from_dict(
)
raise ValueError(msg)

@classmethod
def from_dicts(
cls,
data: Sequence[Mapping[str, Any]],
schema: IntoSchema | None = None,
*,
backend: IntoBackend[EagerAllowed],
) -> DataFrame[Any]:
"""Instantiate DataFrame from a sequence of dictionaries representing rows.

Notes:
For pandas-like dataframes, conversion to schema is applied after dataframe
creation.

Comment thread
felixgwilliams marked this conversation as resolved.
Arguments:
data: Sequence with dictionaries mapping column name to value.
schema: The DataFrame schema as Schema or dict of {name: type}. If not
specified, the schema will be inferred by the native library.
backend: Specifies which eager backend instantiate to.

`backend` can be specified in various ways

- As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
`POLARS`, `MODIN` or `CUDF`.
- As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
- Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.

Comment thread
felixgwilliams marked this conversation as resolved.
Tip:
If you expect non-uniform keys in `data`, consider passing `schema` for
more consistent results, as **inference varies between backends**:

- pandas uses all rows
- polars uses the first 100 rows
- pyarrow uses only the first row

Examples:
>>> import polars as pl
>>> import narwhals as nw
>>> data = [
... {"item": "apple", "weight": 80, "price": 0.60},
... {"item": "egg", "weight": 55, "price": 0.40},
... ]
>>> nw.DataFrame.from_dicts(data, backend="polars")
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
| Narwhals DataFrame |
|--------------------------|
|shape: (2, 3) |
|β”Œβ”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”|
|β”‚ item ┆ weight ┆ price β”‚|
|β”‚ --- ┆ --- ┆ --- β”‚|
|β”‚ str ┆ i64 ┆ f64 β”‚|
|β•žβ•β•β•β•β•β•β•β•ͺ════════β•ͺ═══════║|
|β”‚ apple ┆ 80 ┆ 0.6 β”‚|
|β”‚ egg ┆ 55 ┆ 0.4 β”‚|
|β””β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”˜|
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
Comment thread
dangotbanned marked this conversation as resolved.
"""
implementation = Implementation.from_backend(backend)
if is_eager_allowed(implementation):
ns = cls._version.namespace.from_backend(implementation).compliant
compliant = ns._dataframe.from_dicts(data, schema=schema, context=ns)
return cls(compliant, level="full")
# NOTE: (#2786) needs resolving for extensions
msg = (
f"{implementation} support in Narwhals is lazy-only, but `DataFrame.from_dicts` is an eager-only function.\n\n"
"Hint: you may want to use an eager backend and then call `.lazy`, e.g.:\n\n"
f" nw.DataFrame.from_dicts([{{'a': 1}}, {{'a': 2}}], backend='pyarrow').lazy('{implementation}')"
)
raise ValueError(msg)

@classmethod
def from_numpy(
cls,
Expand Down
58 changes: 58 additions & 0 deletions narwhals/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,64 @@ def _from_dict_no_backend(
return data, native_namespace


def from_dicts(
data: Sequence[Mapping[str, Any]],
schema: IntoSchema | None = None,
*,
backend: IntoBackend[EagerAllowed],
) -> DataFrame[Any]:
"""Instantiate DataFrame from a sequence of dictionaries representing rows.
Comment thread
felixgwilliams marked this conversation as resolved.

Notes:
For pandas-like dataframes, conversion to schema is applied after dataframe
creation.

Arguments:
data: Sequence with dictionaries mapping column name to value.
schema: The DataFrame schema as Schema or dict of {name: type}. If not
specified, the schema will be inferred by the native library.
backend: Specifies which eager backend instantiate to.

`backend` can be specified in various ways

- As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
`POLARS`, `MODIN` or `CUDF`.
- As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
- Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.

Comment thread
felixgwilliams marked this conversation as resolved.
Tip:
If you expect non-uniform keys in `data`, consider passing `schema` for
more consistent results, as **inference varies between backends**:

- pandas uses all rows
- polars uses the first 100 rows
- pyarrow uses only the first row

Examples:
>>> import polars as pl
>>> import narwhals as nw
>>> data = [
... {"item": "apple", "weight": 80, "price": 0.60},
... {"item": "egg", "weight": 55, "price": 0.40},
... ]
>>> nw.DataFrame.from_dicts(data, backend="polars")
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
| Narwhals DataFrame |
|--------------------------|
|shape: (2, 3) |
|β”Œβ”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”|
|β”‚ item ┆ weight ┆ price β”‚|
|β”‚ --- ┆ --- ┆ --- β”‚|
|β”‚ str ┆ i64 ┆ f64 β”‚|
|β•žβ•β•β•β•β•β•β•β•ͺ════════β•ͺ═══════║|
|β”‚ apple ┆ 80 ┆ 0.6 β”‚|
|β”‚ egg ┆ 55 ┆ 0.4 β”‚|
|β””β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”˜|
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
"""
return Version.MAIN.dataframe.from_dicts(data, schema, backend=backend)


def from_numpy(
data: _2DArray,
schema: IntoSchema | Sequence[str] | None = None,
Expand Down
18 changes: 17 additions & 1 deletion narwhals/stable/v1/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

from functools import wraps
from typing import TYPE_CHECKING, Any, Callable, Literal, cast, overload
from typing import TYPE_CHECKING, Any, Callable, Final, Literal, cast, overload

import narwhals as nw
from narwhals import exceptions, functions as nw_f
Expand Down Expand Up @@ -84,6 +84,7 @@
IntoDType,
IntoExpr,
IntoFrame,
IntoSchema,
IntoSeries,
NonNestedLiteral,
SingleColSelector,
Expand Down Expand Up @@ -132,6 +133,17 @@ def from_dict(
result = super().from_dict(data, schema, backend=backend)
return cast("DataFrame[Any]", result)

@classmethod
def from_dicts(
cls,
data: Sequence[Any],
schema: IntoSchema | None = None,
*,
backend: IntoBackend[EagerAllowed],
) -> DataFrame[Any]:
result = super().from_dicts(data, schema, backend=backend)
return cast("DataFrame[Any]", result)

@classmethod
def from_numpy(
cls,
Expand Down Expand Up @@ -1261,6 +1273,9 @@ def from_dict(
return _stableify(nw_f.from_dict(data, schema, backend=backend))


from_dicts: Final = DataFrame.from_dicts


@deprecate_native_namespace(required=True)
def from_numpy(
data: _2DArray,
Expand Down Expand Up @@ -1399,6 +1414,7 @@ def scan_parquet(
"exclude",
"from_arrow",
"from_dict",
"from_dicts",
"from_native",
"from_numpy",
"generate_temporary_column_name",
Expand Down
Loading
Loading