Skip to content

Commit 8260a4d

Browse files
encukouakx
authored andcommitted
Add f-string parsing for Python 3.12 (PEP 701)
Since Python 3.12, f-strings are tokenized and parsed like the rest of Python's grammar, using the new tokens FSTRING_START, FSTRING_MIDDLE and FSTRING_END. Make the babel message extractor concatenate these three if they're adjacent to each other. If they're not, that means there are dynamic substitutions, so the f-string is ignored.
1 parent 6fc07a2 commit 8260a4d

File tree

1 file changed

+34
-0
lines changed

1 file changed

+34
-0
lines changed

babel/messages/extract.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import io
2222
import os
2323
import sys
24+
import tokenize
2425
from collections.abc import (
2526
Callable,
2627
Collection,
@@ -90,6 +91,11 @@ def tell(self) -> int: ...
9091

9192
DEFAULT_MAPPING: list[tuple[str, str]] = [('**.py', 'python')]
9293

94+
# New tokens in Python 3.12, or None on older versions
95+
FSTRING_START = getattr(tokenize, "FSTRING_START", None)
96+
FSTRING_MIDDLE = getattr(tokenize, "FSTRING_MIDDLE", None)
97+
FSTRING_END = getattr(tokenize, "FSTRING_END", None)
98+
9399

94100
def _strip_comment_tags(comments: MutableSequence[str], tags: Iterable[str]):
95101
"""Helper function for `extract` that strips comment tags from strings
@@ -513,6 +519,11 @@ def extract_python(
513519
next_line = lambda: fileobj.readline().decode(encoding)
514520

515521
tokens = generate_tokens(next_line)
522+
523+
# Current prefix of a Python 3.12 (PEP 701) f-string, or None if we're not
524+
# currently parsing one.
525+
current_fstring_start = None
526+
516527
for tok, value, (lineno, _), _, _ in tokens:
517528
if call_stack == -1 and tok == NAME and value in ('def', 'class'):
518529
in_def = True
@@ -574,6 +585,20 @@ def extract_python(
574585
val = _parse_python_string(value, encoding, future_flags)
575586
if val is not None:
576587
buf.append(val)
588+
589+
# Python 3.12+, see https://peps.python.org/pep-0701/#new-tokens
590+
elif tok == FSTRING_START:
591+
current_fstring_start = value
592+
elif tok == FSTRING_MIDDLE:
593+
if current_fstring_start is not None:
594+
current_fstring_start += value
595+
elif tok == FSTRING_END:
596+
if current_fstring_start is not None:
597+
fstring = current_fstring_start + value
598+
val = _parse_python_string(fstring, encoding, future_flags)
599+
if val is not None:
600+
buf.append(val)
601+
577602
elif tok == OP and value == ',':
578603
if buf:
579604
messages.append(''.join(buf))
@@ -594,6 +619,15 @@ def extract_python(
594619
elif tok == NAME and value in keywords:
595620
funcname = value
596621

622+
if (current_fstring_start is not None
623+
and tok not in {FSTRING_START, FSTRING_MIDDLE}
624+
):
625+
# In Python 3.12, tokens other than FSTRING_* mean the
626+
# f-string is dynamic, so we don't wan't to extract it.
627+
# And if it's FSTRING_END, we've already handled it above.
628+
# Let's forget that we're in an f-string.
629+
current_fstring_start = None
630+
597631

598632
def _parse_python_string(value: str, encoding: str, future_flags: int) -> str | None:
599633
# Unwrap quotes in a safe manner, maintaining the string's encoding

0 commit comments

Comments
 (0)