Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions narwhals/_arrow/series_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,13 @@ def ends_with(self, suffix: str) -> ArrowSeries:
pc.equal(self.slice(-len(suffix), None).native, lit(suffix))
)

def contains(self, pattern: str, *, literal: bool) -> ArrowSeries:
check_func = pc.match_substring if literal else pc.match_substring_regex
return self.with_native(check_func(self.native, pattern))
def contains(self, pattern: ArrowSeries, *, literal: bool) -> ArrowSeries:
_, pattern_native = extract_native(self.compliant, pattern)
if not isinstance(pattern_native, pa.StringScalar):
msg = "`.str.contains` only supports str pattern values for pyarrow backend"
raise TypeError(msg)
fn = pc.match_substring if literal else pc.match_substring_regex
return self.with_native(fn(self.native, pattern_native.as_py()))

def slice(self, offset: int, length: int | None) -> ArrowSeries:
stop = offset + length if length is not None else None
Expand Down
2 changes: 1 addition & 1 deletion narwhals/_compliant/any_namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def replace_all(self, value: T, pattern: str, *, literal: bool) -> T: ...
def strip_chars(self, characters: str | None) -> T: ...
def starts_with(self, prefix: str) -> T: ...
def ends_with(self, suffix: str) -> T: ...
def contains(self, pattern: str, *, literal: bool) -> T: ...
def contains(self, pattern: T, *, literal: bool) -> T: ...
def slice(self, offset: int, length: int | None) -> T: ...
def split(self, by: str) -> T: ...
def to_datetime(self, format: str | None) -> T: ...
Expand Down
2 changes: 1 addition & 1 deletion narwhals/_compliant/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -1135,7 +1135,7 @@ def starts_with(self, prefix: str) -> EagerExprT:
def ends_with(self, suffix: str) -> EagerExprT:
return self.compliant._reuse_series_namespace("str", "ends_with", suffix=suffix)

def contains(self, pattern: str, *, literal: bool) -> EagerExprT:
def contains(self, pattern: EagerExprT, *, literal: bool) -> EagerExprT:
return self.compliant._reuse_series_namespace(
"str", "contains", pattern=pattern, literal=literal
)
Expand Down
16 changes: 12 additions & 4 deletions narwhals/_dask/expr_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,18 @@ def starts_with(self, prefix: str) -> DaskExpr:
def ends_with(self, suffix: str) -> DaskExpr:
return self.compliant._with_callable(lambda expr: expr.str.endswith(suffix))

def contains(self, pattern: str, *, literal: bool) -> DaskExpr:
return self.compliant._with_callable(
lambda expr: expr.str.contains(pat=pattern, regex=not literal)
)
def contains(self, pattern: DaskExpr, *, literal: bool) -> DaskExpr:
if not pattern._metadata.is_literal:
msg = "dask backed `Expr.str.contains` only supports str replacement values"
raise TypeError(msg)

def _contains(expr: dx.Series, pattern: dx.Series) -> dx.Series:
# OK to call `compute` here as `pattern` is just a literal expression.
return expr.str.contains( # pyright: ignore[reportAttributeAccessIssue]
pat=pattern.compute(), regex=not literal
)

return self.compliant._with_callable(_contains, pattern=pattern)

def slice(self, offset: int, length: int | None) -> DaskExpr:
return self.compliant._with_callable(
Expand Down
10 changes: 8 additions & 2 deletions narwhals/_pandas_like/series_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,14 @@ def starts_with(self, prefix: str) -> PandasLikeSeries:
def ends_with(self, suffix: str) -> PandasLikeSeries:
return self.with_native(self.native.str.endswith(suffix))

def contains(self, pattern: str, *, literal: bool) -> PandasLikeSeries:
return self.with_native(self.native.str.contains(pat=pattern, regex=not literal))
def contains(self, pattern: PandasLikeSeries, *, literal: bool) -> PandasLikeSeries:
_, pattern_native = align_and_extract_native(self.compliant, pattern)
if not isinstance(pattern_native, str):
msg = f"`.str.contains` only supports str pattern values for {self.compliant._implementation} backend"
raise TypeError(msg)
return self.with_native(
self.native.str.contains(pat=pattern_native, regex=not literal)
)

def slice(self, offset: int, length: int | None) -> PandasLikeSeries:
stop = offset + length if length else None
Expand Down
6 changes: 6 additions & 0 deletions narwhals/_polars/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,12 @@ def replace_all(
self.native.str.replace_all(pattern, value_native, literal=literal)
)

def contains(self, pattern: PolarsExpr, *, literal: bool) -> PolarsExpr:
pattern_native = extract_native(pattern)
return self.compliant._with_native(
self.native.str.contains(pattern_native, literal=literal)
)


class PolarsExprCatNamespace(
PolarsExprNamespace, PolarsCatNamespace[PolarsExpr, pl.Expr]
Expand Down
6 changes: 6 additions & 0 deletions narwhals/_polars/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -824,6 +824,12 @@ def replace_all(
self.native.str.replace_all(pattern, value_native, literal=literal) # type: ignore[arg-type]
)

def contains(self, pattern: PolarsSeries, *, literal: bool) -> PolarsSeries:
pattern_native = extract_native(pattern)
return self.compliant._with_native(
self.native.str.contains(pattern_native, literal=literal) # type: ignore[arg-type]
)


class PolarsSeriesCatNamespace(
PolarsSeriesNamespace, PolarsCatNamespace[PolarsSeries, pl.Series]
Expand Down
17 changes: 11 additions & 6 deletions narwhals/_sql/expr_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,18 @@ def _when(
) -> NativeExpr:
return self.compliant._when(condition, value, otherwise) # type: ignore[no-any-return]

def contains(self, pattern: str, *, literal: bool) -> SQLExprT:
def func(expr: NativeExpr) -> NativeExpr:
if literal:
return self._function("contains", expr, self._lit(pattern))
return self._function("regexp_matches", expr, self._lit(pattern))
def contains(self, pattern: SQLExprT, *, literal: bool) -> SQLExprT:

return self.compliant._with_elementwise(func)
def func(expr: NativeExpr, pattern: NativeExpr) -> NativeExpr:
func_name = "contains" if literal else "regexp_matches"
return self._function(func_name, expr, pattern)

compliant_pattern = (
self.compliant.__narwhals_namespace__().lit(pattern, dtype=None)
if isinstance(pattern, str)
else pattern
)
return self.compliant._with_elementwise(func, pattern=compliant_pattern)

def ends_with(self, suffix: str) -> SQLExprT:
return self.compliant._with_elementwise(
Expand Down
18 changes: 14 additions & 4 deletions narwhals/expr_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,13 +186,19 @@ def ends_with(self, suffix: str) -> ExprT:
ExprNode(ExprKind.ELEMENTWISE, "str.ends_with", suffix=suffix)
)

def contains(self, pattern: str, *, literal: bool = False) -> ExprT:
def contains(self, pattern: str | IntoExpr, *, literal: bool = False) -> ExprT:
r"""Check if string contains a substring that matches a pattern.

Arguments:
pattern: A Character sequence or valid regular expression pattern.
pattern: A Character sequence, valid regular expression pattern, or another
Expr.
literal: If True, treats the pattern as a literal string.
If False, assumes the pattern is a regular expression.
If False, assumes the pattern is a regular expression.

Warning:
Passing an expression as `pattern` is only supported by DuckDB, Ibis, Polars,
PySpark and SQLFrame. Other backends, such as pandas and PyArrow, will raise
a `TypeError`.

Examples:
>>> import pyarrow as pa
Expand All @@ -214,7 +220,11 @@ def contains(self, pattern: str, *, literal: bool = False) -> ExprT:
"""
return self._expr._append_node(
ExprNode(
ExprKind.ELEMENTWISE, "str.contains", pattern=pattern, literal=literal
ExprKind.ELEMENTWISE,
"str.contains",
exprs=(pattern,),
literal=literal,
str_as_lit=True,
)
)

Expand Down
15 changes: 11 additions & 4 deletions narwhals/series_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,13 +152,18 @@ def ends_with(self, suffix: str) -> SeriesT:
self._narwhals_series._compliant_series.str.ends_with(suffix)
)

def contains(self, pattern: str, *, literal: bool = False) -> SeriesT:
def contains(self, pattern: str | SeriesT, *, literal: bool = False) -> SeriesT:
r"""Check if string contains a substring that matches a pattern.

Arguments:
pattern: A Character sequence or valid regular expression pattern.
pattern: A Character sequence, valid regular expression pattern, or another
Series.
literal: If True, treats the pattern as a literal string.
If False, assumes the pattern is a regular expression.
If False, assumes the pattern is a regular expression.

Warning:
Passing a Series as `pattern` is only supported by Polars. Other backends
will raise a `TypeError`.

Examples:
>>> import pyarrow as pa
Expand All @@ -176,7 +181,9 @@ def contains(self, pattern: str, *, literal: bool = False) -> SeriesT:
]
"""
return self._narwhals_series._with_compliant(
self._narwhals_series._compliant_series.str.contains(pattern, literal=literal)
self._narwhals_series._compliant_series.str.contains(
self._extract_compliant(pattern), literal=literal
)
)

def slice(self, offset: int, length: int | None = None) -> SeriesT:
Expand Down
Loading
Loading