Skip to content

Commit bdcf661

Browse files
authored
feat: Add DataFrame.from_dict (#2885)
1 parent 79373f9 commit bdcf661

7 files changed

Lines changed: 218 additions & 9 deletions

File tree

docs/api-reference/dataframe.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
- estimated_size
1515
- explode
1616
- filter
17+
- from_dict
1718
- gather_every
1819
- get_column
1920
- group_by

narwhals/dataframe.py

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
TYPE_CHECKING,
77
Any,
88
Callable,
9+
ClassVar,
910
Generic,
1011
Literal,
1112
NoReturn,
@@ -28,6 +29,7 @@
2829
generate_repr,
2930
is_compliant_dataframe,
3031
is_compliant_lazyframe,
32+
is_eager_allowed,
3133
is_index_selector,
3234
is_list_of,
3335
is_sequence_like,
@@ -38,12 +40,13 @@
3840
)
3941
from narwhals.dependencies import get_polars, is_numpy_array
4042
from narwhals.exceptions import InvalidIntoExprError, InvalidOperationError
43+
from narwhals.functions import _from_dict_no_backend
4144
from narwhals.schema import Schema
4245
from narwhals.series import Series
4346
from narwhals.translate import to_native
4447

4548
if TYPE_CHECKING:
46-
from collections.abc import Iterable, Iterator, Sequence
49+
from collections.abc import Iterable, Iterator, Mapping, Sequence
4750
from io import BytesIO
4851
from pathlib import Path
4952
from types import ModuleType
@@ -55,6 +58,7 @@
5558

5659
from narwhals._compliant import CompliantDataFrame, CompliantLazyFrame
5760
from narwhals._compliant.typing import CompliantExprAny, EagerNamespaceAny
61+
from narwhals.dtypes import DType
5862
from narwhals.group_by import GroupBy, LazyGroupBy
5963
from narwhals.typing import (
6064
AsofJoinStrategy,
@@ -409,6 +413,8 @@ class DataFrame(BaseFrame[DataFrameT]):
409413
```
410414
"""
411415

416+
_version: ClassVar[Version] = Version.MAIN
417+
412418
def _extract_compliant(self, arg: Any) -> Any:
413419
from narwhals.expr import Expr
414420
from narwhals.series import Series
@@ -452,6 +458,68 @@ def __init__(self, df: Any, *, level: Literal["full", "lazy", "interchange"]) ->
452458
msg = f"Expected an object which implements `__narwhals_dataframe__`, got: {type(df)}"
453459
raise AssertionError(msg)
454460

461+
@classmethod
462+
def from_dict(
463+
cls,
464+
data: Mapping[str, Any],
465+
schema: Mapping[str, DType] | Schema | None = None,
466+
*,
467+
backend: ModuleType | Implementation | str | None = None,
468+
) -> DataFrame[Any]:
469+
"""Instantiate DataFrame from dictionary.
470+
471+
Indexes (if present, for pandas-like backends) are aligned following
472+
the [left-hand-rule](../concepts/pandas_index.md/).
473+
474+
Notes:
475+
For pandas-like dataframes, conversion to schema is applied after dataframe
476+
creation.
477+
478+
Arguments:
479+
data: Dictionary to create DataFrame from.
480+
schema: The DataFrame schema as Schema or dict of {name: type}. If not
481+
specified, the schema will be inferred by the native library.
482+
backend: specifies which eager backend instantiate to. Only
483+
necessary if inputs are not Narwhals Series.
484+
485+
`backend` can be specified in various ways
486+
487+
- As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
488+
`POLARS`, `MODIN` or `CUDF`.
489+
- As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
490+
- Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
491+
492+
Returns:
493+
A new DataFrame.
494+
495+
Examples:
496+
>>> import pandas as pd
497+
>>> import narwhals as nw
498+
>>> data = {"c": [5, 2], "d": [1, 4]}
499+
>>> nw.DataFrame.from_dict(data, backend="pandas")
500+
┌──────────────────┐
501+
|Narwhals DataFrame|
502+
|------------------|
503+
| c d |
504+
| 0 5 1 |
505+
| 1 2 4 |
506+
└──────────────────┘
507+
"""
508+
if backend is None:
509+
data, backend = _from_dict_no_backend(data)
510+
implementation = Implementation.from_backend(backend)
511+
if is_eager_allowed(implementation):
512+
ns = cls._version.namespace.from_backend(implementation).compliant
513+
compliant = ns._dataframe.from_dict(data, schema=schema, context=ns)
514+
return cls(compliant, level="full")
515+
# NOTE: (#2786) needs resolving for extensions
516+
msg = (
517+
f"{implementation} support in Narwhals is lazy-only, but `DataFrame.from_dict` is an eager-only function.\n\n"
518+
"Hint: you may want to use an eager backend and then call `.lazy`, e.g.:\n\n"
519+
f" nw.DataFrame.from_dict({{'a': [1, 2]}}, backend='pyarrow').lazy('{implementation}')"
520+
)
521+
raise ValueError(msg)
522+
455523
@property
456524
def implementation(self) -> Implementation:
457525
"""Return implementation of native frame.

narwhals/stable/v1/__init__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,8 @@
9696

9797

9898
class DataFrame(NwDataFrame[IntoDataFrameT]):
99+
_version = Version.V1
100+
99101
@inherit_doc(NwDataFrame)
100102
def __init__(self, df: Any, *, level: Literal["full", "lazy", "interchange"]) -> None:
101103
assert df._version is Version.V1 # noqa: S101
@@ -104,6 +106,17 @@ def __init__(self, df: Any, *, level: Literal["full", "lazy", "interchange"]) ->
104106
# We need to override any method which don't return Self so that type
105107
# annotations are correct.
106108

109+
@classmethod
110+
def from_dict(
111+
cls,
112+
data: Mapping[str, Any],
113+
schema: Mapping[str, DType] | Schema | None = None,
114+
*,
115+
backend: ModuleType | Implementation | str | None = None,
116+
) -> DataFrame[Any]:
117+
result = super().from_dict(data, schema, backend=backend)
118+
return cast("DataFrame[Any]", result)
119+
107120
@property
108121
def _series(self) -> type[Series[Any]]:
109122
return cast("type[Series[Any]]", Series)

tests/conftest.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,12 @@
44
import uuid
55
from copy import deepcopy
66
from functools import lru_cache
7+
from importlib.util import find_spec
78
from typing import TYPE_CHECKING, Any, Callable, cast
89

910
import pytest
1011

11-
from narwhals._utils import generate_temporary_column_name
12+
from narwhals._utils import Implementation, generate_temporary_column_name
1213
from tests.utils import PANDAS_VERSION
1314

1415
if TYPE_CHECKING:
@@ -23,6 +24,7 @@
2324
from pyspark.sql import DataFrame as PySparkDataFrame
2425
from typing_extensions import TypeAlias
2526

27+
from narwhals._namespace import EagerAllowed
2628
from narwhals._spark_like.dataframe import SQLFrameDataFrame
2729
from narwhals.typing import NativeFrame, NativeLazyFrame
2830
from tests.utils import Constructor, ConstructorEager, ConstructorLazy
@@ -308,3 +310,20 @@ def pytest_generate_tests(metafunc: pytest.Metafunc) -> None:
308310
)
309311
elif "constructor" in metafunc.fixturenames:
310312
metafunc.parametrize("constructor", constructors, ids=constructors_ids)
313+
314+
315+
TEST_EAGER_BACKENDS: list[EagerAllowed] = []
316+
TEST_EAGER_BACKENDS.extend(
317+
(Implementation.POLARS, "polars") if find_spec("polars") is not None else ()
318+
)
319+
TEST_EAGER_BACKENDS.extend(
320+
(Implementation.PANDAS, "pandas") if find_spec("pandas") is not None else ()
321+
)
322+
TEST_EAGER_BACKENDS.extend(
323+
(Implementation.PYARROW, "pyarrow") if find_spec("pyarrow") is not None else ()
324+
)
325+
326+
327+
@pytest.fixture(params=TEST_EAGER_BACKENDS)
328+
def eager_backend(request: pytest.FixtureRequest) -> EagerAllowed:
329+
return request.param # type: ignore[no-any-return]

tests/frame/from_dict_test.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
from __future__ import annotations
2+
3+
from typing import TYPE_CHECKING
4+
5+
import pytest
6+
7+
import narwhals as nw
8+
from narwhals._utils import Implementation
9+
from tests.utils import Constructor, assert_equal_data
10+
11+
if TYPE_CHECKING:
12+
from narwhals._namespace import EagerAllowed
13+
14+
15+
def test_from_dict(eager_backend: EagerAllowed) -> None:
16+
result = nw.DataFrame.from_dict({"c": [1, 2], "d": [5, 6]}, backend=eager_backend)
17+
expected = {"c": [1, 2], "d": [5, 6]}
18+
assert_equal_data(result, expected)
19+
assert isinstance(result, nw.DataFrame)
20+
21+
22+
def test_from_dict_schema(eager_backend: EagerAllowed) -> None:
23+
schema = {"c": nw.Int16(), "d": nw.Float32()}
24+
result = nw.DataFrame.from_dict(
25+
{"c": [1, 2], "d": [5, 6]}, backend=eager_backend, schema=schema
26+
)
27+
assert result.collect_schema() == schema
28+
29+
30+
@pytest.mark.parametrize("backend", [Implementation.POLARS, "polars"])
31+
def test_from_dict_without_backend(
32+
constructor: Constructor, backend: EagerAllowed
33+
) -> None:
34+
pytest.importorskip("polars")
35+
36+
df = (
37+
nw.from_native(constructor({"a": [1, 2, 3], "b": [4, 5, 6]}))
38+
.lazy()
39+
.collect(backend=backend)
40+
)
41+
result = nw.DataFrame.from_dict({"c": df["a"], "d": df["b"]})
42+
assert_equal_data(result, {"c": [1, 2, 3], "d": [4, 5, 6]})
43+
44+
45+
def test_from_dict_without_backend_invalid(constructor: Constructor) -> None:
46+
df = nw.from_native(constructor({"a": [1, 2, 3], "b": [4, 5, 6]})).lazy().collect()
47+
with pytest.raises(TypeError, match="backend"):
48+
nw.DataFrame.from_dict({"c": nw.to_native(df["a"]), "d": nw.to_native(df["b"])})
49+
50+
51+
def test_from_dict_with_backend_invalid() -> None:
52+
pytest.importorskip("duckdb")
53+
with pytest.raises(ValueError, match="lazy-only"):
54+
nw.DataFrame.from_dict({"c": [1, 2], "d": [5, 6]}, backend="duckdb")
55+
56+
57+
@pytest.mark.parametrize("backend", [Implementation.POLARS, "polars"])
58+
def test_from_dict_one_native_one_narwhals(
59+
constructor: Constructor, backend: EagerAllowed
60+
) -> None:
61+
pytest.importorskip("polars")
62+
63+
df = (
64+
nw.from_native(constructor({"a": [1, 2, 3], "b": [4, 5, 6]}))
65+
.lazy()
66+
.collect(backend=backend)
67+
)
68+
result = nw.DataFrame.from_dict({"c": nw.to_native(df["a"]), "d": df["b"]})
69+
expected = {"c": [1, 2, 3], "d": [4, 5, 6]}
70+
assert_equal_data(result, expected)
71+
72+
73+
def test_from_dict_empty(eager_backend: EagerAllowed) -> None:
74+
result = nw.DataFrame.from_dict({}, backend=eager_backend)
75+
assert result.shape == (0, 0)
76+
77+
78+
def test_from_dict_empty_with_schema(eager_backend: EagerAllowed) -> None:
79+
schema = nw.Schema({"a": nw.String(), "b": nw.Int8()})
80+
result = nw.DataFrame.from_dict({}, schema, backend=eager_backend)
81+
assert result.schema == schema
82+
83+
84+
def test_alignment() -> None:
85+
pytest.importorskip("pandas")
86+
import pandas as pd
87+
88+
# https://github.com/narwhals-dev/narwhals/issues/1474
89+
df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
90+
result = nw.DataFrame.from_dict(
91+
{"a": df["a"], "b": df["a"].sort_values(ascending=False)}, backend=pd
92+
).to_native()
93+
expected = pd.DataFrame({"a": [1, 2, 3], "b": [3, 2, 1]})
94+
pd.testing.assert_frame_equal(result, expected)

tests/v1_test.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
if TYPE_CHECKING:
3636
from typing_extensions import assert_type
3737

38+
from narwhals._namespace import EagerAllowed
3839
from narwhals.typing import IntoDataFrameT
3940
from tests.utils import Constructor, ConstructorEager
4041

@@ -570,7 +571,8 @@ def test_dataframe_recursive_v1() -> None:
570571

571572
pl_frame = pl.DataFrame({"a": [1, 2, 3]})
572573
nw_frame = nw_v1.from_native(pl_frame)
573-
with pytest.raises(AttributeError):
574+
# NOTE: (#2629) combined with passing in `nw_v1.DataFrame` (w/ a `_version`) into itself changes the error
575+
with pytest.raises(AssertionError):
574576
nw_v1.DataFrame(nw_frame, level="full")
575577

576578
nw_frame_early_return = nw_v1.from_native(nw_frame)
@@ -988,3 +990,13 @@ def minimal_function(data: nw_v1.Series[Any]) -> None:
988990
# check this doesn't raise type-checking errors
989991
minimal_function(col)
990992
assert isinstance(col, nw_v1.Series)
993+
994+
995+
def test_dataframe_from_dict(eager_backend: EagerAllowed) -> None:
996+
schema = {"c": nw_v1.Int16(), "d": nw_v1.Float32()}
997+
result = nw_v1.DataFrame.from_dict(
998+
{"c": [1, 2], "d": [5, 6]}, backend=eager_backend, schema=schema
999+
)
1000+
assert result.collect_schema() == schema
1001+
assert result._version is Version.V1
1002+
assert isinstance(result, nw_v1.DataFrame)

utils/check_api_reference.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from collections import deque
99
from inspect import isfunction
1010
from pathlib import Path
11+
from types import MethodType
1112
from typing import TYPE_CHECKING, Any
1213

1314
import polars as pl
@@ -22,15 +23,16 @@
2223
if sys.version_info >= (3, 13):
2324

2425
def _is_public_method_or_property(obj: Any) -> bool:
25-
return (isfunction(obj) or isinstance(obj, property)) and obj.__name__.startswith(
26-
LOWERCASE
27-
)
26+
return (
27+
isfunction(obj) or isinstance(obj, (MethodType, property))
28+
) and obj.__name__.startswith(LOWERCASE)
2829
else:
2930

3031
def _is_public_method_or_property(obj: Any) -> bool:
31-
return (isfunction(obj) and obj.__name__.startswith(LOWERCASE)) or (
32-
isinstance(obj, property) and obj.fget.__name__.startswith(LOWERCASE)
33-
)
32+
return (
33+
(isfunction(obj) or isinstance(obj, MethodType))
34+
and obj.__name__.startswith(LOWERCASE)
35+
) or (isinstance(obj, property) and obj.fget.__name__.startswith(LOWERCASE))
3436

3537

3638
def iter_api_reference_names(tp: type[Any]) -> Iterator[str]:

0 commit comments

Comments
 (0)