diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py index b4d65c9e76..58849c6ddb 100644 --- a/narwhals/_arrow/dataframe.py +++ b/narwhals/_arrow/dataframe.py @@ -367,7 +367,7 @@ def select(self: ArrowDataFrame, *exprs: ArrowExpr) -> ArrowDataFrame: ) names = [s.name for s in new_series] reshaped = align_series_full_broadcast(*new_series) - df = pa.Table.from_arrays([s._native_series for s in reshaped], names=names) + df = pa.Table.from_arrays([s.native for s in reshaped], names=names) return self._from_native_frame(df, validate_column_names=True) def _extract_comparand(self, other: ArrowSeries) -> ArrowChunkedArray: @@ -438,21 +438,21 @@ def join( self.with_columns( plx.lit(0, None).alias(key_token).broadcast(ExprKind.LITERAL) ) - ._native_frame.join( + .native.join( other.with_columns( plx.lit(0, None).alias(key_token).broadcast(ExprKind.LITERAL) - )._native_frame, + ).native, keys=key_token, right_keys=key_token, join_type="inner", right_suffix=suffix, ) - .drop([key_token]), + .drop([key_token]) ) return self._from_native_frame( self.native.join( - other._native_frame, + other.native, keys=left_on or [], # type: ignore[arg-type] right_keys=right_on, # type: ignore[arg-type] join_type=how_to_join_map[how], diff --git a/narwhals/_arrow/expr.py b/narwhals/_arrow/expr.py index af6431b496..23c850b667 100644 --- a/narwhals/_arrow/expr.py +++ b/narwhals/_arrow/expr.py @@ -67,7 +67,7 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]: try: return [ ArrowSeries( - df._native_frame[column_name], + df.native[column_name], name=column_name, backend_version=df._backend_version, version=df._version, @@ -101,8 +101,8 @@ def from_column_indices( def func(df: ArrowDataFrame) -> list[ArrowSeries]: return [ ArrowSeries( - df._native_frame[column_index], - name=df._native_frame.column_names[column_index], + df.native[column_index], + name=df.native.column_names[column_index], backend_version=df._backend_version, version=df._version, ) @@ -163,9 +163,9 @@ def func(df: ArrowDataFrame) -> Sequence[ArrowSeries]: # TODO(marco): is there a way to do this efficiently without # doing 2 sorts? Here we're sorting the dataframe and then # again calling `sort_indices`. `ArrowSeries.scatter` would also sort. - sorting_indices = pc.sort_indices(df[token]._native_series) # type: ignore[call-overload] + sorting_indices = pc.sort_indices(df[token].native) # type: ignore[call-overload] return [ - ser._from_native_series(pc.take(ser._native_series, sorting_indices)) + ser._from_native_series(pc.take(ser.native, sorting_indices)) for ser in result ] else: diff --git a/narwhals/_arrow/group_by.py b/narwhals/_arrow/group_by.py index c178a940b3..8b76c6d37d 100644 --- a/narwhals/_arrow/group_by.py +++ b/narwhals/_arrow/group_by.py @@ -131,7 +131,6 @@ def __iter__(self: Self) -> Iterator[tuple[Any, ArrowDataFrame]]: null_token: str = "__null_token_value__" # noqa: S105 table = self.compliant.native - # NOTE: stubs fail in multiple places for `ChunkedArray` it, separator_scalar = cast_to_comparable_string_types( *(table[key] for key in self._keys), separator="" ) diff --git a/narwhals/_arrow/namespace.py b/narwhals/_arrow/namespace.py index a3edbcb132..39c7fe5583 100644 --- a/narwhals/_arrow/namespace.py +++ b/narwhals/_arrow/namespace.py @@ -58,9 +58,7 @@ def len(self: Self) -> ArrowExpr: # coverage bug? this is definitely hit return self._expr( # pragma: no cover lambda df: [ - ArrowSeries.from_iterable( - [len(df._native_frame)], name="len", context=self - ) + ArrowSeries.from_iterable([len(df.native)], name="len", context=self) ], depth=0, function_name="len", @@ -158,12 +156,8 @@ def min_horizontal(self: Self, *exprs: ArrowExpr) -> ArrowExpr: def func(df: ArrowDataFrame) -> list[ArrowSeries]: init_series, *series = list(chain.from_iterable(expr(df) for expr in exprs)) init_series, *series = align_series_full_broadcast(init_series, *series) - # NOTE: Stubs copy the wrong signature https://github.com/zen-xu/pyarrow-stubs/blob/d97063876720e6a5edda7eb15f4efe07c31b8296/pyarrow-stubs/compute.pyi#L963 - min_element_wise: Incomplete = pc.min_element_wise native_series = reduce( - min_element_wise, - [s._native_series for s in series], - init_series._native_series, + pc.min_element_wise, [s.native for s in series], init_series.native ) return [ ArrowSeries( @@ -187,13 +181,8 @@ def max_horizontal(self: Self, *exprs: ArrowExpr) -> ArrowExpr: def func(df: ArrowDataFrame) -> list[ArrowSeries]: init_series, *series = list(chain.from_iterable(expr(df) for expr in exprs)) init_series, *series = align_series_full_broadcast(init_series, *series) - # NOTE: stubs are missing `ChunkedArray` support - # https://github.com/zen-xu/pyarrow-stubs/blob/d97063876720e6a5edda7eb15f4efe07c31b8296/pyarrow-stubs/compute.pyi#L948-L954 - max_element_wise: Incomplete = pc.max_element_wise native_series = reduce( - max_element_wise, - [s._native_series for s in series], - init_series._native_series, + pc.max_element_wise, [s.native for s in series], init_series.native ) return [ ArrowSeries( @@ -219,7 +208,7 @@ def concat( *, how: Literal["horizontal", "vertical", "diagonal"], ) -> ArrowDataFrame: - dfs = [item._native_frame for item in items] + dfs = [item.native for item in items] if not dfs: msg = "No dataframes to concatenate" # pragma: no cover diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index f8c5e085dc..08ec2e514a 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -48,7 +48,6 @@ from narwhals._arrow.typing import ArrowArray from narwhals._arrow.typing import ArrowChunkedArray from narwhals._arrow.typing import Incomplete - from narwhals._arrow.typing import Indices # type: ignore[attr-defined] from narwhals._arrow.typing import NullPlacement from narwhals._arrow.typing import Order # type: ignore[attr-defined] from narwhals._arrow.typing import TieBreaker @@ -261,13 +260,13 @@ def __rtruediv__(self: Self, other: Any) -> Self: return self._from_native_series(pc.divide(*cast_for_truediv(other, ser))) # pyright: ignore[reportArgumentType] def __mod__(self: Self, other: Any) -> Self: - floor_div = (self // other)._native_series + floor_div = (self // other).native ser, other = extract_native(self, other) res = pc.subtract(ser, pc.multiply(floor_div, other)) return self._from_native_series(res) def __rmod__(self: Self, other: Any) -> Self: - floor_div = (other // self)._native_series + floor_div = (other // self).native ser, other = extract_native(self, other) res = pc.subtract(other, pc.multiply(floor_div, ser)) return self._from_native_series(res) @@ -292,10 +291,7 @@ def filter(self: Self, predicate: ArrowSeries | list[bool | None]) -> Self: return self._from_native_series(self.native.filter(other_native)) # pyright: ignore[reportArgumentType] def mean(self: Self, *, _return_py_scalar: bool = True) -> float: - # NOTE: stub overly strict https://github.com/zen-xu/pyarrow-stubs/blob/d97063876720e6a5edda7eb15f4efe07c31b8296/pyarrow-stubs/compute.pyi#L274-L307 - # docs say numeric https://arrow.apache.org/docs/python/generated/pyarrow.compute.mean.html - mean: Incomplete = pc.mean - return maybe_extract_py_scalar(mean(self.native), _return_py_scalar) + return maybe_extract_py_scalar(pc.mean(self.native), _return_py_scalar) def median(self: Self, *, _return_py_scalar: bool = True) -> float: from narwhals.exceptions import InvalidOperationError @@ -350,8 +346,7 @@ def var(self: Self, ddof: int, *, _return_py_scalar: bool = True) -> float: ) def skew(self: Self, *, _return_py_scalar: bool = True) -> float | None: - # NOTE: stub issue with `pc.subtract`, `pc.mean` and `pa.ChunkedArray` - ser_not_null: Incomplete = self.native.drop_null() + ser_not_null = self.native.drop_null() if len(ser_not_null) == 0: return None elif len(ser_not_null) == 1: @@ -359,9 +354,7 @@ def skew(self: Self, *, _return_py_scalar: bool = True) -> float | None: elif len(ser_not_null) == 2: return 0.0 else: - m = cast( - "pc.NumericArray[Any]", pc.subtract(ser_not_null, pc.mean(ser_not_null)) - ) + m = pc.subtract(ser_not_null, pc.mean(ser_not_null)) m2 = pc.mean(pc.power(m, lit(2))) m3 = pc.mean(pc.power(m, lit(3))) biased_population_skewness = pc.divide(m3, pc.power(m2, lit(1.5))) @@ -400,7 +393,7 @@ def __getitem__( if isinstance(idx, int): return maybe_extract_py_scalar(self.native[idx], return_py_scalar=True) if isinstance(idx, (Sequence, pa.ChunkedArray)): - return self._from_native_series(self.native.take(cast("Indices", idx))) + return self._from_native_series(self.native.take(idx)) return self._from_native_series(self.native[idx]) def scatter(self: Self, indices: int | Sequence[int], values: Any) -> Self: @@ -427,7 +420,7 @@ def scatter(self: Self, indices: int | Sequence[int], values: Any) -> Self: result = pc.replace_with_mask( self.native, cast("list[bool]", mask), - values_native.take(cast("Indices", indices_native)), + values_native.take(indices_native), ) return self._from_native_series(result) @@ -456,9 +449,7 @@ def abs(self: Self) -> Self: return self._from_native_series(pc.abs(self.native)) def cum_sum(self: Self, *, reverse: bool) -> Self: - # NOTE: stub only permits `NumericArray` - # https://github.com/zen-xu/pyarrow-stubs/blob/d97063876720e6a5edda7eb15f4efe07c31b8296/pyarrow-stubs/compute.pyi#L140 - cum_sum: Incomplete = pc.cumulative_sum + cum_sum = pc.cumulative_sum result = ( cum_sum(self.native, skip_nulls=True) if not reverse @@ -467,32 +458,21 @@ def cum_sum(self: Self, *, reverse: bool) -> Self: return self._from_native_series(result) def round(self: Self, decimals: int) -> Self: - # NOTE: stub only permits `NumericArray` - # https://github.com/zen-xu/pyarrow-stubs/blob/d97063876720e6a5edda7eb15f4efe07c31b8296/pyarrow-stubs/compute.pyi#L140 - pc_round: Incomplete = pc.round return self._from_native_series( - pc_round(self.native, decimals, round_mode="half_towards_infinity") + pc.round(self.native, decimals, round_mode="half_towards_infinity") ) def diff(self: Self) -> Self: - # NOTE: stub only permits `ChunkedArray[TemporalScalar]` - # (https://github.com/zen-xu/pyarrow-stubs/blob/d97063876720e6a5edda7eb15f4efe07c31b8296/pyarrow-stubs/compute.pyi#L145-L148) - diff: Incomplete = pc.pairwise_diff - return self._from_native_series(diff(self.native.combine_chunks())) + return self._from_native_series(pc.pairwise_diff(self.native.combine_chunks())) def any(self: Self, *, _return_py_scalar: bool = True) -> bool: - # NOTE: stub restricts to `BooleanArray`, should be based on truthiness - # Copies `pc.all` - pc_any: Incomplete = pc.any return maybe_extract_py_scalar( - pc_any(self.native, min_count=0), _return_py_scalar + pc.any(self.native, min_count=0), _return_py_scalar ) def all(self: Self, *, _return_py_scalar: bool = True) -> bool: - # NOTE: stub restricts to `BooleanArray`, should be based on truthiness - pc_all: Incomplete = pc.all return maybe_extract_py_scalar( - pc_all(self.native, min_count=0), _return_py_scalar + pc.all(self.native, min_count=0), _return_py_scalar ) def is_between( @@ -630,7 +610,7 @@ def sample( rng = np.random.default_rng(seed=seed) idx = np.arange(0, num_rows) mask = rng.choice(idx, size=n, replace=with_replacement) - return self._from_native_series(self.native.take(mask)) # pyright: ignore[reportArgumentType] + return self._from_native_series(self.native.take(mask)) def fill_null( self: Self, @@ -690,15 +670,15 @@ def to_frame(self: Self) -> ArrowDataFrame: def to_pandas(self: Self) -> pd.Series[Any]: import pandas as pd # ignore-banned-import() - return pd.Series(self.native, name=self.name) # pyright: ignore[reportArgumentType, reportCallIssue] + return pd.Series(self.native, name=self.name) def to_polars(self: Self) -> pl.Series: import polars as pl # ignore-banned-import - return pl.from_arrow(self.native) # type: ignore[return-value] + return cast("pl.Series", pl.from_arrow(self.native)) - def is_unique(self: Self) -> Self: - return self.to_frame().is_unique().alias(self.name) # type: ignore[return-value] + def is_unique(self: Self) -> ArrowSeries: + return self.to_frame().is_unique().alias(self.name) def is_first_distinct(self: Self) -> Self: import numpy as np # ignore-banned-import @@ -828,25 +808,17 @@ def clip( ) -> Self: _, lower_bound = extract_native(self, lower_bound) _, upper_bound = extract_native(self, upper_bound) - # NOTE: stubs are missing `ChunkedArray` support - # https://github.com/zen-xu/pyarrow-stubs/blob/d97063876720e6a5edda7eb15f4efe07c31b8296/pyarrow-stubs/compute.pyi#L948-L954 - max_element_wise: Incomplete = pc.max_element_wise - arr = max_element_wise(self.native, lower_bound) - arr = cast("ArrowChunkedArray", pc.min_element_wise(arr, upper_bound)) - - return self._from_native_series(arr) + arr = pc.max_element_wise(self.native, lower_bound) + return self._from_native_series(pc.min_element_wise(arr, upper_bound)) def to_arrow(self: Self) -> ArrowArray: return self.native.combine_chunks() - def mode(self: Self) -> Self: + def mode(self: Self) -> ArrowSeries: plx = self.__narwhals_namespace__() col_token = generate_temporary_column_name(n_bytes=8, columns=[self.name]) - return self.value_counts( # type: ignore[return-value] - name=col_token, - normalize=False, - sort=False, - parallel=False, # parallel is unused + return self.value_counts( + name=col_token, normalize=False, sort=False, parallel=False ).filter(plx.col(col_token) == plx.col(col_token).max())[self.name] def is_finite(self: Self) -> Self: @@ -860,13 +832,10 @@ def cum_min(self: Self, *, reverse: bool) -> Self: if self._backend_version < (13, 0, 0): msg = "cum_min method is not supported for pyarrow < 13.0.0" raise NotImplementedError(msg) - - native_series = cast("Any", self.native) - result = ( - pc.cumulative_min(native_series, skip_nulls=True) + pc.cumulative_min(self.native, skip_nulls=True) if not reverse - else pc.cumulative_min(native_series[::-1], skip_nulls=True)[::-1] + else pc.cumulative_min(self.native[::-1], skip_nulls=True)[::-1] ) return self._from_native_series(result) @@ -874,13 +843,10 @@ def cum_max(self: Self, *, reverse: bool) -> Self: if self._backend_version < (13, 0, 0): msg = "cum_max method is not supported for pyarrow < 13.0.0" raise NotImplementedError(msg) - - native_series = cast("Any", self.native) - result = ( - pc.cumulative_max(native_series, skip_nulls=True) + pc.cumulative_max(self.native, skip_nulls=True) if not reverse - else pc.cumulative_max(native_series[::-1], skip_nulls=True)[::-1] + else pc.cumulative_max(self.native[::-1], skip_nulls=True)[::-1] ) return self._from_native_series(result) @@ -888,13 +854,10 @@ def cum_prod(self: Self, *, reverse: bool) -> Self: if self._backend_version < (13, 0, 0): msg = "cum_max method is not supported for pyarrow < 13.0.0" raise NotImplementedError(msg) - - native_series = cast("Any", self.native) - result = ( - pc.cumulative_prod(native_series, skip_nulls=True) + pc.cumulative_prod(self.native, skip_nulls=True) if not reverse - else pc.cumulative_prod(native_series[::-1], skip_nulls=True)[::-1] + else pc.cumulative_prod(self.native[::-1], skip_nulls=True)[::-1] ) return self._from_native_series(result) @@ -924,11 +887,7 @@ def rolling_sum( ) result = self._from_native_series( - pc.if_else( - (count_in_window >= min_samples)._native_series, - rolling_sum._native_series, - None, - ) + pc.if_else((count_in_window >= min_samples).native, rolling_sum.native, None) ) return result[offset:] @@ -960,9 +919,7 @@ def rolling_mean( result = ( self._from_native_series( pc.if_else( - (count_in_window >= min_samples)._native_series, - rolling_sum._native_series, - None, + (count_in_window >= min_samples).native, rolling_sum.native, None ) ) / count_in_window @@ -1006,18 +963,15 @@ def rolling_var( count_in_window = valid_count - valid_count.shift(window_size).fill_null( value=0, strategy=None, limit=None ) - # NOTE: stubs are missing `ChunkedArray` support - # https://github.com/zen-xu/pyarrow-stubs/blob/d97063876720e6a5edda7eb15f4efe07c31b8296/pyarrow-stubs/compute.pyi#L948-L954 - max_element_wise: Incomplete = pc.max_element_wise result = self._from_native_series( pc.if_else( - (count_in_window >= min_samples)._native_series, - (rolling_sum_sq - (rolling_sum**2 / count_in_window))._native_series, + (count_in_window >= min_samples).native, + (rolling_sum_sq - (rolling_sum**2 / count_in_window)).native, None, ) ) / self._from_native_series( - max_element_wise((count_in_window - ddof)._native_series, 0) + pc.max_element_wise((count_in_window - ddof).native, 0) ) return result[offset:] @@ -1125,8 +1079,7 @@ def _hist_from_bin_count(bin_count: int): # type: ignore[no-untyped-def] # noqa ) # empty bin intervals should have a 0 count counts_coalesce = cast( - "ArrowArray", - pc.coalesce(cast("ArrowArray", counts.column("counts")), lit(0)), + "ArrowArray", pc.coalesce(counts.column("counts"), lit(0)) ) counts = counts.set_column(0, "counts", counts_coalesce) diff --git a/narwhals/_arrow/series_dt.py b/narwhals/_arrow/series_dt.py index 885e64a86f..9252147d2e 100644 --- a/narwhals/_arrow/series_dt.py +++ b/narwhals/_arrow/series_dt.py @@ -49,7 +49,7 @@ def convert_time_zone(self: Self, time_zone: str) -> ArrowSeries: return self.from_native(ser.native.cast(pa.timestamp(self.unit, time_zone))) def timestamp(self: Self, time_unit: TimeUnit) -> ArrowSeries: - ser: ArrowSeries = self.compliant + ser = self.compliant dtypes = import_dtypes_module(ser._version) if isinstance(ser.dtype, dtypes.Datetime): unit = ser.dtype.time_unit diff --git a/narwhals/_arrow/series_str.py b/narwhals/_arrow/series_str.py index c3591376a8..62c7ef3ac7 100644 --- a/narwhals/_arrow/series_str.py +++ b/narwhals/_arrow/series_str.py @@ -2,7 +2,6 @@ import string from typing import TYPE_CHECKING -from typing import Any import pyarrow.compute as pc @@ -11,11 +10,9 @@ from narwhals._arrow.utils import parse_datetime_format if TYPE_CHECKING: - import pyarrow as pa from typing_extensions import Self from narwhals._arrow.series import ArrowSeries - from narwhals._arrow.typing import Incomplete class ArrowSeriesStringNamespace(ArrowSeriesNamespace): @@ -63,10 +60,7 @@ def split(self: Self, by: str) -> ArrowSeries: def to_datetime(self: Self, format: str | None) -> ArrowSeries: format = parse_datetime_format(self.native) if format is None else format - strptime: Incomplete = pc.strptime - timestamp_array: pa.Array[pa.TimestampScalar[Any, Any]] = strptime( - self.native, format=format, unit="us" - ) + timestamp_array = pc.strptime(self.native, format=format, unit="us") return self.from_native(timestamp_array) def to_uppercase(self: Self) -> ArrowSeries: diff --git a/narwhals/_arrow/series_struct.py b/narwhals/_arrow/series_struct.py index e79147d066..198ff30f06 100644 --- a/narwhals/_arrow/series_struct.py +++ b/narwhals/_arrow/series_struct.py @@ -4,17 +4,14 @@ import pyarrow.compute as pc +from narwhals._arrow.utils import ArrowSeriesNamespace + if TYPE_CHECKING: from typing_extensions import Self from narwhals._arrow.series import ArrowSeries -class ArrowSeriesStructNamespace: - def __init__(self: Self, series: ArrowSeries) -> None: - self._compliant_series: ArrowSeries = series - +class ArrowSeriesStructNamespace(ArrowSeriesNamespace): def field(self: Self, name: str) -> ArrowSeries: - return self._compliant_series._from_native_series( - pc.struct_field(self._compliant_series._native_series, name), - ).alias(name) + return self.from_native(pc.struct_field(self.native, name)).alias(name) diff --git a/narwhals/_arrow/typing.py b/narwhals/_arrow/typing.py index 103dbf2dc3..2382be617d 100644 --- a/narwhals/_arrow/typing.py +++ b/narwhals/_arrow/typing.py @@ -15,7 +15,6 @@ from typing_extensions import TypeAlias import pyarrow as pa - import pyarrow.compute as pc from pyarrow._stubs_typing import ( # pyright: ignore[reportMissingModuleSource] Indices, # noqa: F401 ) @@ -33,7 +32,6 @@ TieBreaker: TypeAlias = Literal["min", "max", "first", "dense"] NullPlacement: TypeAlias = Literal["at_start", "at_end"] - StringArray: TypeAlias = pc.StringArray ArrowChunkedArray: TypeAlias = pa.ChunkedArray[Any] ArrowArray: TypeAlias = pa.Array[Any] _AsPyType = TypeVar("_AsPyType") diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py index 7964492041..ce10365f5c 100644 --- a/narwhals/_arrow/utils.py +++ b/narwhals/_arrow/utils.py @@ -27,8 +27,6 @@ from narwhals._arrow.series import ArrowSeries from narwhals._arrow.typing import ArrowArray from narwhals._arrow.typing import ArrowChunkedArray - from narwhals._arrow.typing import Incomplete - from narwhals._arrow.typing import StringArray from narwhals.dtypes import DType from narwhals.typing import _AnyDArray from narwhals.utils import Version @@ -90,8 +88,7 @@ def nulls_like(n: int, series: ArrowSeries) -> ArrowArray: Uses the type of `series`, without upseting `mypy`. """ - nulls: Incomplete = pa.nulls - return nulls(n, series._type) + return pa.nulls(n, series.native.type) @lru_cache(maxsize=16) @@ -344,7 +341,6 @@ def floordiv_compat(left: Any, right: Any) -> Any: ) result = pc.if_else( pc.and_(has_remainder, has_one_negative_operand), - # GH: 55561 ruff: ignore pc.subtract(divided, lit(1, type=divided.type)), divided, ) @@ -474,8 +470,8 @@ def parse_datetime_format(arr: ArrowChunkedArray) -> str: msg = "Found multiple timezone values while inferring datetime format." raise ValueError(msg) - date_value = _parse_date_format(cast("StringArray", matches.field("date"))) - time_value = _parse_time_format(cast("StringArray", matches.field("time"))) + date_value = _parse_date_format(cast("pc.StringArray", matches.field("date"))) + time_value = _parse_time_format(cast("pc.StringArray", matches.field("time"))) sep_value = separators[0].as_py() tz_value = "%z" if tz[0].as_py() else "" @@ -483,7 +479,7 @@ def parse_datetime_format(arr: ArrowChunkedArray) -> str: return f"{date_value}{sep_value}{time_value}{tz_value}" -def _parse_date_format(arr: StringArray) -> str: +def _parse_date_format(arr: pc.StringArray) -> str: for date_rgx, date_fmt in DATE_FORMATS: matches = pc.extract_regex(arr, pattern=date_rgx) if date_fmt == "%Y%m%d" and pc.all(matches.is_valid()).as_py(): @@ -503,7 +499,7 @@ def _parse_date_format(arr: StringArray) -> str: raise ValueError(msg) -def _parse_time_format(arr: StringArray) -> str: +def _parse_time_format(arr: pc.StringArray) -> str: for time_rgx, time_fmt in TIME_FORMATS: matches = pc.extract_regex(arr, pattern=time_rgx) if pc.all(matches.is_valid()).as_py():