Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
e6a8d72
refactor: Add `expressions` subpackage
dangotbanned Sep 11, 2025
8ad4aed
refactor: Move `aggregation.py`
dangotbanned Sep 11, 2025
1c62a3b
refactor: Move `name.py`
dangotbanned Sep 11, 2025
36ca088
refactor: Move `expr.py`
dangotbanned Sep 11, 2025
1241580
refactor: Move `boolean`, `functions`
dangotbanned Sep 11, 2025
46de12b
refactor: Move remaining `Function`-based modules
dangotbanned Sep 11, 2025
7368936
refactor: Mostly move everything else
dangotbanned Sep 11, 2025
4ddf7e4
refactor: Rename `_plan.demo` -> `_plan.functions`
dangotbanned Sep 11, 2025
ebef1d5
refactor: Split out `series.py`
dangotbanned Sep 11, 2025
77e8cba
refactor: Split out `dataframe.py`
dangotbanned Sep 11, 2025
3c37d69
refactor: Rename `_plan.dummy` -> `_plan.expr`
dangotbanned Sep 11, 2025
1c12bd1
refactor: Rename `expr_parsing` -> `_parse`
dangotbanned Sep 11, 2025
57dec59
ci: Update `pre-commit` exclude
dangotbanned Sep 11, 2025
abaa2cc
chore(ruff): partial bump fixes
dangotbanned Sep 13, 2025
978cd82
feat: export to `expressions`
dangotbanned Sep 13, 2025
a8673f6
refactor: Update imports
dangotbanned Sep 13, 2025
da83726
refactor: Use more `from narwhals._plan import expressions as ir`
dangotbanned Sep 13, 2025
9b18b13
refactor: Even more import updates
dangotbanned Sep 13, 2025
ba70130
refactor: Rename `expr_rewrites` -> `_rewrites`
dangotbanned Sep 13, 2025
2a7079d
refactor: Rename `expr_expansion` -> `_expansion`
dangotbanned Sep 13, 2025
cc3fdf6
chore: export some common modules
dangotbanned Sep 13, 2025
800e772
refactor: Make `meta` depend on `expressions`
dangotbanned Sep 13, 2025
a6a130d
refactor: Move `common.map_ir` -> `_rewrites`
dangotbanned Sep 13, 2025
aab0f1d
refactor: remove `collect`
dangotbanned Sep 13, 2025
3913111
tweak `into_dtype`
dangotbanned Sep 13, 2025
4e39a6d
refactor: more `common` removal prep
dangotbanned Sep 13, 2025
9615a6c
rename `IRMetaNamespace` -> `MetaNamespace`
dangotbanned Sep 13, 2025
9121d78
refactor: Split out `_function.py`
dangotbanned Sep 13, 2025
11f5612
refactor: Split out `expressions.namespace.py`
dangotbanned Sep 13, 2025
3d90dc7
export to `_plan`, update doctests
dangotbanned Sep 13, 2025
a1f20ca
test: Update imports
dangotbanned Sep 13, 2025
a6617d5
refactor: remove `nwd` alias
dangotbanned Sep 14, 2025
5ad77ee
test: Use `nwp` for `narwhals._plan`
dangotbanned Sep 14, 2025
23704fa
refactor: Reuse `ir.boolean` export more
dangotbanned Sep 14, 2025
69268ce
refactor: Split out `_expr_ir.py`
dangotbanned Sep 14, 2025
d63e4b7
refactor: Redo `window` builders
dangotbanned Sep 14, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ repos:
narwhals/stable/v./_?dtypes.py|
narwhals/.*__init__.py|
narwhals/.*typing\.py|
narwhals/_plan/demo\.py|
narwhals/_plan/ranges\.py|
narwhals/_plan/functions\.py|
narwhals/_plan/expressions/ranges\.py|
narwhals/_plan/schema\.py
)
- id: pull-request-target
Expand Down
55 changes: 55 additions & 0 deletions narwhals/_plan/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,56 @@
from __future__ import annotations

from narwhals._plan.dataframe import DataFrame
from narwhals._plan.expr import Expr, Selector
from narwhals._plan.expressions import selectors
from narwhals._plan.functions import (
all,
all_horizontal,
any_horizontal,
col,
concat_str,
exclude,
int_range,
len,
lit,
max,
max_horizontal,
mean,
mean_horizontal,
median,
min,
min_horizontal,
nth,
sum,
sum_horizontal,
when,
)
from narwhals._plan.series import Series

__all__ = [
"DataFrame",
"Expr",
"Selector",
"Series",
"all",
"all_horizontal",
"any_horizontal",
"col",
"concat_str",
"exclude",
"int_range",
"len",
"lit",
"max",
"max_horizontal",
"mean",
"mean_horizontal",
"median",
"min",
"min_horizontal",
"nth",
"selectors",
"sum",
"sum_horizontal",
"when",
]
Original file line number Diff line number Diff line change
Expand Up @@ -45,21 +45,23 @@
from narwhals._plan import common, meta
from narwhals._plan._guards import is_horizontal_reduction
from narwhals._plan._immutable import Immutable
from narwhals._plan.common import ExprIR, NamedIR, SelectorIR
from narwhals._plan.exceptions import (
column_index_error,
column_not_found_error,
duplicate_error,
)
from narwhals._plan.expr import (
from narwhals._plan.expressions import (
Alias,
All,
Columns,
Exclude,
ExprIR,
IndexColumns,
KeepName,
NamedIR,
Nth,
RenameAlias,
SelectorIR,
_ColumnSelection,
col,
cols,
Expand Down
292 changes: 292 additions & 0 deletions narwhals/_plan/_expr_ir.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,292 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Generic, cast

from narwhals._plan._guards import is_function_expr, is_literal
from narwhals._plan._immutable import Immutable
from narwhals._plan.common import dispatch_getter, replace
from narwhals._plan.options import ExprIROptions
from narwhals._plan.typing import ExprIRT
from narwhals.utils import Version

if TYPE_CHECKING:
from collections.abc import Callable, Iterator
from typing import Any, ClassVar

from typing_extensions import Self, TypeAlias

from narwhals._plan.expr import Expr, Selector
from narwhals._plan.expressions.expr import Alias, Cast, Column
from narwhals._plan.meta import MetaNamespace
from narwhals._plan.protocols import Ctx, FrameT_contra, R_co
from narwhals._plan.typing import ExprIRT2, MapIR, Seq
from narwhals.dtypes import DType

Incomplete: TypeAlias = "Any"


def _dispatch_generate(
tp: type[ExprIRT], /
) -> Callable[[Incomplete, ExprIRT, Incomplete, str], Incomplete]:
if not tp.__expr_ir_config__.allow_dispatch:

def _(ctx: Any, /, node: ExprIRT, _: Any, name: str) -> Any:
msg = (
f"{tp.__name__!r} should not appear at the compliant-level.\n\n"
f"Make sure to expand all expressions first, got:\n{ctx!r}\n{node!r}\n{name!r}"
)
raise TypeError(msg)

return _
getter = dispatch_getter(tp)

def _(ctx: Any, /, node: ExprIRT, frame: Any, name: str) -> Any:
return getter(ctx)(node, frame, name)

return _


class ExprIR(Immutable):
"""Anything that can be a node on a graph of expressions."""

_child: ClassVar[Seq[str]] = ()
"""Nested node names, in iteration order."""

__expr_ir_config__: ClassVar[ExprIROptions] = ExprIROptions.default()
__expr_ir_dispatch__: ClassVar[
staticmethod[[Incomplete, Self, Incomplete, str], Incomplete]
]

def __init_subclass__(
cls: type[Self],
*args: Any,
child: Seq[str] = (),
config: ExprIROptions | None = None,
**kwds: Any,
) -> None:
super().__init_subclass__(*args, **kwds)
if child:
cls._child = child
if config:
cls.__expr_ir_config__ = config
cls.__expr_ir_dispatch__ = staticmethod(_dispatch_generate(cls))

def dispatch(
self, ctx: Ctx[FrameT_contra, R_co], frame: FrameT_contra, name: str, /
) -> R_co:
"""Evaluate expression in `frame`, using `ctx` for implementation(s)."""
return self.__expr_ir_dispatch__(ctx, cast("Self", self), frame, name) # type: ignore[no-any-return]

def to_narwhals(self, version: Version = Version.MAIN) -> Expr:
from narwhals._plan import expr

tp = expr.Expr if version is Version.MAIN else expr.ExprV1
return tp._from_ir(self)

@property
def is_scalar(self) -> bool:
return False

def map_ir(self, function: MapIR, /) -> ExprIR:
"""Apply `function` to each child node, returning a new `ExprIR`.

See [`polars_plan::plans::iterator::Expr.map_expr`] and [`polars_plan::plans::visitor::visitors`].

[`polars_plan::plans::iterator::Expr.map_expr`]: https://github.com/pola-rs/polars/blob/0fa7141ce718c6f0a4d6ae46865c867b177a59ed/crates/polars-plan/src/plans/iterator.rs#L152-L159
[`polars_plan::plans::visitor::visitors`]: https://github.com/pola-rs/polars/blob/0fa7141ce718c6f0a4d6ae46865c867b177a59ed/crates/polars-plan/src/plans/visitor/visitors.rs
"""
if not self._child:
return function(self)
children = ((name, getattr(self, name)) for name in self._child)
changed = {name: _map_ir_child(child, function) for name, child in children}
return function(replace(self, **changed))

def iter_left(self) -> Iterator[ExprIR]:
"""Yield nodes root->leaf.

Examples:
>>> from narwhals import _plan as nw
>>>
>>> a = nw.col("a")
>>> b = a.alias("b")
>>> c = b.min().alias("c")
>>> d = c.over(nw.col("e"), nw.col("f"))
>>>
>>> list(a._ir.iter_left())
[col('a')]
>>>
>>> list(b._ir.iter_left())
[col('a'), col('a').alias('b')]
>>>
>>> list(c._ir.iter_left())
[col('a'), col('a').alias('b'), col('a').alias('b').min(), col('a').alias('b').min().alias('c')]
>>>
>>> list(d._ir.iter_left())
[col('a'), col('a').alias('b'), col('a').alias('b').min(), col('a').alias('b').min().alias('c'), col('e'), col('f'), col('a').alias('b').min().alias('c').over([col('e'), col('f')])]
"""
for name in self._child:
child: ExprIR | Seq[ExprIR] = getattr(self, name)
if isinstance(child, ExprIR):
yield from child.iter_left()
else:
for node in child:
yield from node.iter_left()
yield self

def iter_right(self) -> Iterator[ExprIR]:
"""Yield nodes leaf->root.

Note:
Identical to `iter_left` for root nodes.

Examples:
>>> from narwhals import _plan as nw
>>>
>>> a = nw.col("a")
>>> b = a.alias("b")
>>> c = b.min().alias("c")
>>> d = c.over(nw.col("e"), nw.col("f"))
>>>
>>> list(a._ir.iter_right())
[col('a')]
>>>
>>> list(b._ir.iter_right())
[col('a').alias('b'), col('a')]
>>>
>>> list(c._ir.iter_right())
[col('a').alias('b').min().alias('c'), col('a').alias('b').min(), col('a').alias('b'), col('a')]
>>>
>>> list(d._ir.iter_right())
[col('a').alias('b').min().alias('c').over([col('e'), col('f')]), col('f'), col('e'), col('a').alias('b').min().alias('c'), col('a').alias('b').min(), col('a').alias('b'), col('a')]
"""
yield self
for name in reversed(self._child):
child: ExprIR | Seq[ExprIR] = getattr(self, name)
if isinstance(child, ExprIR):
yield from child.iter_right()
else:
for node in reversed(child):
yield from node.iter_right()

def iter_root_names(self) -> Iterator[ExprIR]:
"""Override for different iteration behavior in `ExprIR.meta.root_names`.

Note:
Identical to `iter_left` by default.
"""
yield from self.iter_left()

def iter_output_name(self) -> Iterator[ExprIR]:
"""Override for different iteration behavior in `ExprIR.meta.output_name`.

Note:
Identical to `iter_right` by default.
"""
yield from self.iter_right()

@property
def meta(self) -> MetaNamespace:
from narwhals._plan.meta import MetaNamespace

return MetaNamespace(_ir=self)

def cast(self, dtype: DType) -> Cast:
from narwhals._plan.expressions.expr import Cast

return Cast(expr=self, dtype=dtype)

def alias(self, name: str) -> Alias:
from narwhals._plan.expressions.expr import Alias

return Alias(expr=self, name=name)

def _repr_html_(self) -> str:
return self.__repr__()


def _map_ir_child(obj: ExprIR | Seq[ExprIR], fn: MapIR, /) -> ExprIR | Seq[ExprIR]:
return obj.map_ir(fn) if isinstance(obj, ExprIR) else tuple(e.map_ir(fn) for e in obj)


class SelectorIR(ExprIR, config=ExprIROptions.no_dispatch()):
def to_narwhals(self, version: Version = Version.MAIN) -> Selector:
from narwhals._plan import expr

if version is Version.MAIN:
return expr.Selector._from_ir(self)
return expr.SelectorV1._from_ir(self)

def matches_column(self, name: str, dtype: DType) -> bool:
"""Return True if we can select this column.

- Thinking that we could get more cache hits on an individual column basis.
- May also be more efficient to not iterate over the schema for every selector
- Instead do one pass, evaluating every selector against a single column at a time
"""
raise NotImplementedError(type(self))


class NamedIR(Immutable, Generic[ExprIRT]):
"""Post-projection expansion wrapper for `ExprIR`.

- Somewhat similar to [`polars_plan::plans::expr_ir::ExprIR`].
- The [`polars_plan::plans::aexpr::AExpr`] stage has been skipped (*for now*)
- Parts of that will probably be in here too
- `AExpr` seems like too much duplication when we won't get the memory allocation benefits in python

[`polars_plan::plans::expr_ir::ExprIR`]: https://github.com/pola-rs/polars/blob/2c7a3e77f0faa37c86a3745db4ef7707ae50c72e/crates/polars-plan/src/plans/expr_ir.rs#L63-L74
[`polars_plan::plans::aexpr::AExpr`]: https://github.com/pola-rs/polars/blob/2c7a3e77f0faa37c86a3745db4ef7707ae50c72e/crates/polars-plan/src/plans/aexpr/mod.rs#L145-L231
"""

__slots__ = ("expr", "name")
expr: ExprIRT
name: str

@staticmethod
def from_name(name: str, /) -> NamedIR[Column]:
"""Construct as a simple, unaliased `col(name)` expression.

Intended to be used in `with_columns` from a `FrozenSchema`'s keys.
"""
from narwhals._plan.expressions.expr import col

return NamedIR(expr=col(name), name=name)

@staticmethod
def from_ir(expr: ExprIRT2, /) -> NamedIR[ExprIRT2]:
"""Construct from an already expanded `ExprIR`.

Should be cheap to get the output name from cache, but will raise if used
without care.
"""
return NamedIR(expr=expr, name=expr.meta.output_name(raise_if_undetermined=True))

def map_ir(self, function: MapIR, /) -> Self:
"""**WARNING**: don't use renaming ops here, or `self.name` is invalid."""
return replace(self, expr=function(self.expr.map_ir(function)))

def __repr__(self) -> str:
return f"{self.name}={self.expr!r}"

def _repr_html_(self) -> str:
return f"<b>{self.name}</b>={self.expr._repr_html_()}"

def is_elementwise_top_level(self) -> bool:
"""Return True if the outermost node is elementwise.

Based on [`polars_plan::plans::aexpr::properties::AExpr.is_elementwise_top_level`]

This check:
- Is not recursive
- Is not valid on `ExprIR` *prior* to being expanded

[`polars_plan::plans::aexpr::properties::AExpr.is_elementwise_top_level`]: https://github.com/pola-rs/polars/blob/2c7a3e77f0faa37c86a3745db4ef7707ae50c72e/crates/polars-plan/src/plans/aexpr/properties.rs#L16-L44
"""
from narwhals._plan.expressions import expr

ir = self.expr
if is_function_expr(ir):
return ir.options.is_elementwise()
if is_literal(ir):
return ir.is_scalar
return isinstance(ir, (expr.BinaryExpr, expr.Column, expr.TernaryExpr, expr.Cast))
Loading
Loading