Skip to content
10 changes: 7 additions & 3 deletions narwhals/_duration.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@

__all__ = ["IntervalUnit"]

IntervalUnit: TypeAlias = Literal["ns", "us", "ms", "s", "m", "h", "d", "mo", "q", "y"]
IntervalUnit: TypeAlias = Literal[
"ns", "us", "ms", "s", "m", "h", "d", "w", "mo", "q", "y"
]
"""A Polars duration string interval unit.

- 'ns': nanosecond.
Expand All @@ -23,20 +25,22 @@
- 'm': minute.
- 'h': hour.
- 'd': day.
- 'w': week.
- 'mo': month.
- 'q': quarter.
- 'y': year.
"""
TimedeltaKwd: TypeAlias = Literal[
"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"
"weeks", "days", "hours", "minutes", "seconds", "milliseconds", "microseconds"
]

PATTERN_INTERVAL: re.Pattern[str] = re.compile(
r"^(?P<multiple>-?\d+)(?P<unit>ns|us|ms|mo|m|s|h|d|q|y)\Z"
r"^(?P<multiple>-?\d+)(?P<unit>ns|us|ms|mo|m|s|h|d|w|q|y)\Z"
)
MONTH_MULTIPLES = frozenset([1, 2, 3, 4, 6, 12])
QUARTER_MULTIPLES = frozenset([1, 2, 4])
UNIT_TO_TIMEDELTA: Mapping[IntervalUnit, TimedeltaKwd] = {
"w": "weeks",
"d": "days",
"h": "hours",
"m": "minutes",
Expand Down
2 changes: 2 additions & 0 deletions narwhals/_plan/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
any_horizontal,
col,
concat_str,
date_range,
exclude,
int_range,
len,
Expand Down Expand Up @@ -38,6 +39,7 @@
"any_horizontal",
"col",
"concat_str",
"date_range",
"exclude",
"int_range",
"len",
Expand Down
4 changes: 2 additions & 2 deletions narwhals/_plan/_dispatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from typing_extensions import Never, TypeAlias

from narwhals._plan.compliant.typing import Ctx
from narwhals._plan.expressions import ExprIR, FunctionExpr
from narwhals._plan.expressions import ExprIR, Function, FunctionExpr
from narwhals._plan.typing import ExprIRT, FunctionT

__all__ = ["Dispatcher", "get_dispatch_name"]
Expand Down Expand Up @@ -181,7 +181,7 @@ def _method_name(tp: type[ExprIRT | FunctionT]) -> str:
return f"{ns}.{name}" if (ns := getattr(config, "accessor_name", "")) else name


def get_dispatch_name(expr: ExprIR, /) -> str:
def get_dispatch_name(expr: ExprIR | type[Function], /) -> str:
"""Return the synthesized method name for `expr`."""
return (
repr(expr.function) if is_function_expr(expr) else expr.__expr_ir_dispatch__.name
Expand Down
8 changes: 8 additions & 0 deletions narwhals/_plan/arrow/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from __future__ import annotations

from narwhals._plan.arrow.dataframe import ArrowDataFrame as DataFrame
from narwhals._plan.arrow.expr import ArrowExpr as Expr, ArrowScalar as Scalar
from narwhals._plan.arrow.namespace import ArrowNamespace as Namespace
from narwhals._plan.arrow.series import ArrowSeries as Series

__all__ = ["DataFrame", "Expr", "Namespace", "Scalar", "Series"]
34 changes: 31 additions & 3 deletions narwhals/_plan/arrow/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from narwhals._utils import Implementation

if TYPE_CHECKING:
import datetime as dt
from collections.abc import Iterable, Mapping

from typing_extensions import TypeAlias, TypeIs
Expand All @@ -43,6 +44,7 @@
DataType,
DataTypeRemap,
DataTypeT,
DateScalar,
IntegerScalar,
IntegerType,
LargeStringType,
Expand Down Expand Up @@ -333,12 +335,38 @@ def int_range(
*,
dtype: IntegerType = pa.int64(), # noqa: B008
) -> ChunkedArray[IntegerScalar]:
import numpy as np # ignore-banned-import

if end is None:
end = start
start = 0
return pa.chunked_array([pa.array(np.arange(start, end, step), dtype)])
if BACKEND_VERSION < (21, 0, 0): # pragma: no cover
import numpy as np # ignore-banned-import

arr = pa.array(np.arange(start=start, stop=end, step=step), type=dtype)
else:
int_range_: Incomplete = t.cast("Incomplete", pa.arange) # type: ignore[attr-defined]
arr = t.cast("ArrayAny", int_range_(start=start, stop=end, step=step)).cast(dtype)
return pa.chunked_array([arr])


def date_range(
start: dt.date,
end: dt.date,
interval: int, # (* assuming the `Interval` part is solved)
*,
closed: ClosedInterval = "both",
) -> ChunkedArray[DateScalar]:
start_i = pa.scalar(start).cast(pa.int32()).as_py()
end_i = pa.scalar(end).cast(pa.int32()).as_py()
ca = int_range(start_i, end_i + 1, interval, dtype=pa.int32())
if closed == "both":
return ca.cast(pa.date32())
if closed == "left":
ca = ca.slice(length=ca.length() - 1)
elif closed == "none":
ca = ca.slice(1, length=ca.length() - 1)
else:
ca = ca.slice(1)
return ca.cast(pa.date32())


def nulls_like(n: int, native: ArrowAny) -> ArrayAny:
Expand Down
82 changes: 67 additions & 15 deletions narwhals/_plan/arrow/namespace.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import datetime as dt
from functools import reduce
from typing import TYPE_CHECKING, Any, Literal, cast, overload

Expand All @@ -11,7 +12,7 @@
from narwhals._plan.arrow import functions as fn
from narwhals._plan.compliant.namespace import EagerNamespace
from narwhals._plan.expressions.literal import is_literal_scalar
from narwhals._utils import Version
from narwhals._utils import Implementation, Version
from narwhals.exceptions import InvalidOperationError

if TYPE_CHECKING:
Expand All @@ -21,16 +22,29 @@
from narwhals._plan.arrow.dataframe import ArrowDataFrame as Frame
from narwhals._plan.arrow.expr import ArrowExpr as Expr, ArrowScalar as Scalar
from narwhals._plan.arrow.series import ArrowSeries as Series
from narwhals._plan.arrow.typing import ChunkedArray, IntegerScalar
from narwhals._plan.expressions import expr, functions as F
from narwhals._plan.expressions.boolean import AllHorizontal, AnyHorizontal
from narwhals._plan.expressions.expr import FunctionExpr, RangeExpr
from narwhals._plan.expressions.ranges import IntRange
from narwhals._plan.expressions.ranges import DateRange, IntRange
from narwhals._plan.expressions.strings import ConcatStr
from narwhals._plan.series import Series as NwSeries
from narwhals.typing import ConcatMethod, NonNestedLiteral, PythonLiteral
from narwhals._plan.typing import NonNestedLiteralT
from narwhals.dtypes import IntegerType
from narwhals.typing import (
ClosedInterval,
ConcatMethod,
NonNestedLiteral,
PythonLiteral,
)


Int64 = Version.MAIN.dtypes.Int64()


class ArrowNamespace(EagerNamespace["Frame", "Series", "Expr", "Scalar"]):
implementation = Implementation.PYARROW

def __init__(self, version: Version = Version.MAIN) -> None:
self._version = version

Expand Down Expand Up @@ -155,12 +169,12 @@ def concat_str(
return self._scalar.from_native(result, name, self.version)
return self._expr.from_native(result, name, self.version)

def int_range(self, node: RangeExpr[IntRange], frame: Frame, name: str) -> Expr:
def _range_function_inputs(
self, node: RangeExpr, frame: Frame, valid_type: type[NonNestedLiteralT]
) -> tuple[NonNestedLiteralT, NonNestedLiteralT]:
start_: PythonLiteral
end_: PythonLiteral
start, end = node.function.unwrap_input(node)
step = node.function.step
dtype = node.function.dtype
if is_literal_scalar(start) and is_literal_scalar(end):
start_, end_ = start.unwrap(), end.unwrap()
else:
Expand All @@ -172,21 +186,59 @@ def int_range(self, node: RangeExpr[IntRange], frame: Frame, name: str) -> Expr:
start_, end_ = scalar_start.to_python(), scalar_end.to_python()
else:
msg = (
f"All inputs for `int_range()` must be scalar or aggregations, but got \n"
f"All inputs for `{node.function}()` must be scalar or aggregations, but got \n"
f"{scalar_start.native!r}\n{scalar_end.native!r}"
)
raise InvalidOperationError(msg)
if isinstance(start_, int) and isinstance(end_, int):
if isinstance(start_, valid_type) and isinstance(end_, valid_type):
return start_, end_
msg = f"All inputs for `{node.function}()` must resolve to {valid_type.__name__}, but got \n{start_!r}\n{end_!r}"
raise InvalidOperationError(msg)

def _int_range(
self, start: int, end: int, step: int, dtype: IntegerType, /
) -> ChunkedArray[IntegerScalar]:
if dtype is not Int64:
pa_dtype = narwhals_to_native_dtype(dtype, self.version)
if not pa.types.is_integer(pa_dtype):
raise TypeError(pa_dtype)
native = fn.int_range(start_, end_, step, dtype=pa_dtype)
return self._expr.from_native(native, name, self.version)
raise TypeError(dtype)
return fn.int_range(start, end, step, dtype=pa_dtype)
return fn.int_range(start, end, step)

msg = (
f"All inputs for `int_range()` resolve to int, but got \n{start_!r}\n{end_!r}"
)
raise InvalidOperationError(msg)
def int_range(self, node: RangeExpr[IntRange], frame: Frame, name: str) -> Expr:
start, end = self._range_function_inputs(node, frame, int)
native = self._int_range(start, end, node.function.step, node.function.dtype)
return self._expr.from_native(native, name, self.version)

def int_range_eager(
self,
start: int,
end: int,
step: int = 1,
*,
dtype: IntegerType = Int64,
name: str = "literal",
) -> Series:
native = self._int_range(start, end, step, dtype)
return self._series.from_native(native, name, version=self.version)

def date_range(self, node: RangeExpr[DateRange], frame: Frame, name: str) -> Expr:
start, end = self._range_function_inputs(node, frame, dt.date)
func = node.function
native = fn.date_range(start, end, func.interval, closed=func.closed)
return self._expr.from_native(native, name, self.version)

def date_range_eager(
self,
start: dt.date,
end: dt.date,
interval: int = 1,
*,
closed: ClosedInterval = "both",
name: str = "literal",
) -> Series:
native = fn.date_range(start, end, interval, closed=closed)
return self._series.from_native(native, name, version=self.version)

@overload
def concat(self, items: Iterable[Frame], *, how: ConcatMethod) -> Frame: ...
Expand Down
2 changes: 2 additions & 0 deletions narwhals/_plan/arrow/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import pyarrow as pa
import pyarrow.compute as pc
from pyarrow.lib import (
Date32Type,
Int8Type,
Int16Type,
Int32Type,
Expand All @@ -28,6 +29,7 @@
StringScalar: TypeAlias = "Scalar[StringType | LargeStringType]"
IntegerType: TypeAlias = "Int8Type | Int16Type | Int32Type | Int64Type | Uint8Type | Uint16Type | Uint32Type | Uint64Type"
IntegerScalar: TypeAlias = "Scalar[IntegerType]"
DateScalar: TypeAlias = "Scalar[Date32Type]"

class NativeArrowSeries(NativeSeries, Protocol):
@property
Expand Down
34 changes: 31 additions & 3 deletions narwhals/_plan/compliant/namespace.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Any, Literal, Protocol, overload
from typing import TYPE_CHECKING, Any, ClassVar, Literal, Protocol, overload

from narwhals._plan.compliant.typing import (
ConcatT1,
Expand All @@ -16,21 +16,28 @@
ScalarT_co,
SeriesT,
)
from narwhals._utils import Implementation, Version

if TYPE_CHECKING:
import datetime as dt
from collections.abc import Iterable

from typing_extensions import TypeIs

from narwhals._plan import expressions as ir
from narwhals._plan.expressions import FunctionExpr, boolean, functions as F
from narwhals._plan.expressions.ranges import IntRange
from narwhals._plan.expressions.ranges import DateRange, IntRange
from narwhals._plan.expressions.strings import ConcatStr
from narwhals._plan.series import Series
from narwhals.typing import ConcatMethod, NonNestedLiteral
from narwhals.dtypes import IntegerType
from narwhals.typing import ClosedInterval, ConcatMethod, NonNestedLiteral

Int64 = Version.MAIN.dtypes.Int64()


class CompliantNamespace(HasVersion, Protocol[FrameT, ExprT_co, ScalarT_co]):
implementation: ClassVar[Implementation]

@property
def _expr(self) -> type[ExprT_co]: ...
@property
Expand All @@ -47,6 +54,9 @@ def col(self, node: ir.Column, frame: FrameT, name: str) -> ExprT_co: ...
def concat_str(
self, node: FunctionExpr[ConcatStr], frame: FrameT, name: str
) -> ExprT_co | ScalarT_co: ...
def date_range(
self, node: ir.RangeExpr[DateRange], frame: FrameT, name: str
) -> ExprT_co: ...
def int_range(
self, node: ir.RangeExpr[IntRange], frame: FrameT, name: str
) -> ExprT_co: ...
Expand Down Expand Up @@ -128,6 +138,24 @@ def lit(
def lit(
self, node: ir.Literal[Any], frame: EagerDataFrameT, name: str
) -> EagerExprT_co | EagerScalarT_co: ...
def date_range_eager(
self,
start: dt.date,
end: dt.date,
interval: int = 1,
*,
closed: ClosedInterval = "both",
name: str = "literal",
) -> SeriesT: ...
def int_range_eager(
self,
start: int,
end: int,
step: int = 1,
*,
dtype: IntegerType = Int64,
name: str = "literal",
) -> SeriesT: ...


class LazyNamespace(
Expand Down
4 changes: 2 additions & 2 deletions narwhals/_plan/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,9 +175,9 @@ def from_native(
cls: type[DataFrame[Any, Any]], native: NativeDataFrameT, /
) -> DataFrame[Any, Any]:
if is_pyarrow_table(native):
from narwhals._plan.arrow.dataframe import ArrowDataFrame
from narwhals._plan import arrow as _arrow

return cls(ArrowDataFrame.from_native(native, cls._version))
return cls(_arrow.DataFrame.from_native(native, cls._version))

raise NotImplementedError(type(native))

Expand Down
Loading
Loading