Skip to content

Commit 6fc07a2

Browse files
authored
Support 't' specifier in keywords (#1015)
* Support 't' specifier in keywords * Document --keyword format * Document parse_keywords output format and make logic more readable Fixes #1014
1 parent 9ef53c6 commit 6fc07a2

File tree

4 files changed

+179
-64
lines changed

4 files changed

+179
-64
lines changed

babel/messages/extract.py

Lines changed: 59 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,8 @@ class _FileObj(SupportsRead[bytes], SupportsReadline[bytes], Protocol):
5555
def seek(self, __offset: int, __whence: int = ...) -> int: ...
5656
def tell(self) -> int: ...
5757

58-
_Keyword: TypeAlias = tuple[int | tuple[int, int] | tuple[int, str], ...] | None
58+
_SimpleKeyword: TypeAlias = tuple[int | tuple[int, int] | tuple[int, str], ...] | None
59+
_Keyword: TypeAlias = dict[int | None, _SimpleKeyword] | _SimpleKeyword
5960

6061
# 5-tuple of (filename, lineno, messages, comments, context)
6162
_FileExtractionResult: TypeAlias = tuple[str, int, str | tuple[str, ...], list[str], str | None]
@@ -315,6 +316,47 @@ def extract_from_file(
315316
options, strip_comment_tags))
316317

317318

319+
def _match_messages_against_spec(lineno: int, messages: list[str|None], comments: list[str],
320+
fileobj: _FileObj, spec: tuple[int|tuple[int, str], ...]):
321+
translatable = []
322+
context = None
323+
324+
# last_index is 1 based like the keyword spec
325+
last_index = len(messages)
326+
for index in spec:
327+
if isinstance(index, tuple): # (n, 'c')
328+
context = messages[index[0] - 1]
329+
continue
330+
if last_index < index:
331+
# Not enough arguments
332+
return
333+
message = messages[index - 1]
334+
if message is None:
335+
return
336+
translatable.append(message)
337+
338+
# keyword spec indexes are 1 based, therefore '-1'
339+
if isinstance(spec[0], tuple):
340+
# context-aware *gettext method
341+
first_msg_index = spec[1] - 1
342+
else:
343+
first_msg_index = spec[0] - 1
344+
# An empty string msgid isn't valid, emit a warning
345+
if not messages[first_msg_index]:
346+
filename = (getattr(fileobj, "name", None) or "(unknown)")
347+
sys.stderr.write(
348+
f"{filename}:{lineno}: warning: Empty msgid. It is reserved by GNU gettext: gettext(\"\") "
349+
f"returns the header entry with meta information, not the empty string.\n"
350+
)
351+
return
352+
353+
translatable = tuple(translatable)
354+
if len(translatable) == 1:
355+
translatable = translatable[0]
356+
357+
return lineno, translatable, comments, context
358+
359+
318360
def extract(
319361
method: _ExtractionMethod,
320362
fileobj: _FileObj,
@@ -400,56 +442,30 @@ def extract(
400442
options=options or {})
401443

402444
for lineno, funcname, messages, comments in results:
403-
spec = keywords[funcname] or (1,) if funcname else (1,)
404445
if not isinstance(messages, (list, tuple)):
405446
messages = [messages]
406447
if not messages:
407448
continue
408449

409-
# Validate the messages against the keyword's specification
410-
context = None
411-
msgs = []
412-
invalid = False
413-
# last_index is 1 based like the keyword spec
414-
last_index = len(messages)
415-
for index in spec:
416-
if isinstance(index, tuple):
417-
context = messages[index[0] - 1]
418-
continue
419-
if last_index < index:
420-
# Not enough arguments
421-
invalid = True
422-
break
423-
message = messages[index - 1]
424-
if message is None:
425-
invalid = True
426-
break
427-
msgs.append(message)
428-
if invalid:
429-
continue
430-
431-
# keyword spec indexes are 1 based, therefore '-1'
432-
if isinstance(spec[0], tuple):
433-
# context-aware *gettext method
434-
first_msg_index = spec[1] - 1
435-
else:
436-
first_msg_index = spec[0] - 1
437-
if not messages[first_msg_index]:
438-
# An empty string msgid isn't valid, emit a warning
439-
filename = (getattr(fileobj, "name", None) or "(unknown)")
440-
sys.stderr.write(
441-
f"{filename}:{lineno}: warning: Empty msgid. It is reserved by GNU gettext: gettext(\"\") "
442-
f"returns the header entry with meta information, not the empty string.\n"
443-
)
444-
continue
445-
446-
messages = tuple(msgs)
447-
if len(messages) == 1:
448-
messages = messages[0]
450+
specs = keywords[funcname] or None if funcname else None
451+
# {None: x} may be collapsed into x for backwards compatibility.
452+
if not isinstance(specs, dict):
453+
specs = {None: specs}
449454

450455
if strip_comment_tags:
451456
_strip_comment_tags(comments, comment_tags)
452-
yield lineno, messages, comments, context
457+
458+
# None matches all arities.
459+
for arity in (None, len(messages)):
460+
try:
461+
spec = specs[arity]
462+
except KeyError:
463+
continue
464+
if spec is None:
465+
spec = (1,)
466+
result = _match_messages_against_spec(lineno, messages, comments, fileobj, spec)
467+
if result is not None:
468+
yield result
453469

454470

455471
def extract_nothing(

babel/messages/frontend.py

Lines changed: 50 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
:license: BSD, see LICENSE for more details.
99
"""
1010

11+
from __future__ import annotations
12+
1113
import datetime
1214
import fnmatch
1315
import logging
@@ -1111,34 +1113,63 @@ def parse_mapping(fileobj, filename=None):
11111113

11121114
return method_map, options_map
11131115

1116+
def _parse_spec(s: str) -> tuple[int | None, tuple[int|tuple[int, str], ...]]:
1117+
inds = []
1118+
number = None
1119+
for x in s.split(','):
1120+
if x[-1] == 't':
1121+
number = int(x[:-1])
1122+
elif x[-1] == 'c':
1123+
inds.append((int(x[:-1]), 'c'))
1124+
else:
1125+
inds.append(int(x))
1126+
return number, tuple(inds)
11141127

11151128
def parse_keywords(strings: Iterable[str] = ()):
11161129
"""Parse keywords specifications from the given list of strings.
11171130
1118-
>>> kw = sorted(parse_keywords(['_', 'dgettext:2', 'dngettext:2,3', 'pgettext:1c,2']).items())
1119-
>>> for keyword, indices in kw:
1120-
... print((keyword, indices))
1121-
('_', None)
1122-
('dgettext', (2,))
1123-
('dngettext', (2, 3))
1124-
('pgettext', ((1, 'c'), 2))
1131+
>>> import pprint
1132+
>>> keywords = ['_', 'dgettext:2', 'dngettext:2,3', 'pgettext:1c,2',
1133+
... 'polymorphic:1', 'polymorphic:2,2t', 'polymorphic:3c,3t']
1134+
>>> pprint.pprint(parse_keywords(keywords))
1135+
{'_': None,
1136+
'dgettext': (2,),
1137+
'dngettext': (2, 3),
1138+
'pgettext': ((1, 'c'), 2),
1139+
'polymorphic': {None: (1,), 2: (2,), 3: ((3, 'c'),)}}
1140+
1141+
The input keywords are in GNU Gettext style; see :doc:`cmdline` for details.
1142+
1143+
The output is a dictionary mapping keyword names to a dictionary of specifications.
1144+
Keys in this dictionary are numbers of arguments, where ``None`` means that all numbers
1145+
of arguments are matched, and a number means only calls with that number of arguments
1146+
are matched (which happens when using the "t" specifier). However, as a special
1147+
case for backwards compatibility, if the dictionary of specifications would
1148+
be ``{None: x}``, i.e., there is only one specification and it matches all argument
1149+
counts, then it is collapsed into just ``x``.
1150+
1151+
A specification is either a tuple or None. If a tuple, each element can be either a number
1152+
``n``, meaning that the nth argument should be extracted as a message, or the tuple
1153+
``(n, 'c')``, meaning that the nth argument should be extracted as context for the
1154+
messages. A ``None`` specification is equivalent to ``(1,)``, extracting the first
1155+
argument.
11251156
"""
11261157
keywords = {}
11271158
for string in strings:
11281159
if ':' in string:
1129-
funcname, indices = string.split(':')
1160+
funcname, spec_str = string.split(':')
1161+
number, spec = _parse_spec(spec_str)
11301162
else:
1131-
funcname, indices = string, None
1132-
if funcname not in keywords:
1133-
if indices:
1134-
inds = []
1135-
for x in indices.split(','):
1136-
if x[-1] == 'c':
1137-
inds.append((int(x[:-1]), 'c'))
1138-
else:
1139-
inds.append(int(x))
1140-
indices = tuple(inds)
1141-
keywords[funcname] = indices
1163+
funcname = string
1164+
number = None
1165+
spec = None
1166+
keywords.setdefault(funcname, {})[number] = spec
1167+
1168+
# For best backwards compatibility, collapse {None: x} into x.
1169+
for k, v in keywords.items():
1170+
if set(v) == {None}:
1171+
keywords[k] = v[None]
1172+
11421173
return keywords
11431174

11441175

docs/cmdline.rst

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,45 @@ a collection of source files::
133133
header comment for the catalog
134134

135135

136+
The meaning of ``--keyword`` values is as follows:
137+
138+
- Pass a simple identifier like ``_`` to extract the first (and only the first)
139+
argument of all function calls to ``_``,
140+
141+
- To extract other arguments than the first, add a colon and the argument
142+
indices separated by commas. For example, the ``dngettext`` function
143+
typically expects translatable strings as second and third arguments,
144+
so you could pass ``dngettext:2,3``.
145+
146+
- Some arguments should not be interpreted as translatable strings, but
147+
context strings. For that, append "c" to the argument index. For example:
148+
``pgettext:1c,2``.
149+
150+
- In C++ and Python, you may have functions that behave differently
151+
depending on how many arguments they take. For this use case, you can
152+
add an integer followed by "t" after the colon. In this case, the
153+
keyword will only match a function invocation if it has the specified
154+
total number of arguments. For example, if you have a function
155+
``foo`` that behaves as ``gettext`` (argument is a message) or
156+
``pgettext`` (arguments are a context and a message) depending on
157+
whether it takes one or two arguments, you can pass
158+
``--keyword=foo:1,1t --keyword=foo:1c,2,2t``.
159+
160+
The default keywords are equivalent to passing ::
161+
162+
--keyword=_
163+
--keyword=gettext
164+
--keyword=ngettext:1,2
165+
--keyword=ugettext
166+
--keyword=ungettext:1,2
167+
--keyword=dgettext:2
168+
--keyword=dngettext:2,3
169+
--keyword=N_
170+
--keyword=pgettext:1c,2
171+
--keyword=npgettext:1c,2,3
172+
173+
174+
136175
init
137176
====
138177

tests/messages/test_frontend.py

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,15 @@
1717
import time
1818
import unittest
1919
from datetime import datetime, timedelta
20-
from io import StringIO
20+
from io import BytesIO, StringIO
2121

2222
import pytest
2323
from freezegun import freeze_time
2424
from setuptools import Distribution
2525

2626
from babel import __version__ as VERSION
2727
from babel.dates import format_datetime
28-
from babel.messages import Catalog, frontend
28+
from babel.messages import Catalog, extract, frontend
2929
from babel.messages.frontend import (
3030
BaseError,
3131
CommandLineInterface,
@@ -1422,6 +1422,35 @@ def test_parse_keywords():
14221422
}
14231423

14241424

1425+
def test_parse_keywords_with_t():
1426+
kw = frontend.parse_keywords(['_:1', '_:2,2t', '_:2c,3,3t'])
1427+
1428+
assert kw == {
1429+
'_': {
1430+
None: (1,),
1431+
2: (2,),
1432+
3: ((2, 'c'), 3),
1433+
}
1434+
}
1435+
1436+
def test_extract_messages_with_t():
1437+
content = rb"""
1438+
_("1 arg, arg 1")
1439+
_("2 args, arg 1", "2 args, arg 2")
1440+
_("3 args, arg 1", "3 args, arg 2", "3 args, arg 3")
1441+
_("4 args, arg 1", "4 args, arg 2", "4 args, arg 3", "4 args, arg 4")
1442+
"""
1443+
kw = frontend.parse_keywords(['_:1', '_:2,2t', '_:2c,3,3t'])
1444+
result = list(extract.extract("python", BytesIO(content), kw))
1445+
expected = [(2, '1 arg, arg 1', [], None),
1446+
(3, '2 args, arg 1', [], None),
1447+
(3, '2 args, arg 2', [], None),
1448+
(4, '3 args, arg 1', [], None),
1449+
(4, '3 args, arg 3', [], '3 args, arg 2'),
1450+
(5, '4 args, arg 1', [], None)]
1451+
assert result == expected
1452+
1453+
14251454
def configure_cli_command(cmdline):
14261455
"""
14271456
Helper to configure a command class, but not run it just yet.

0 commit comments

Comments
 (0)