-
Notifications
You must be signed in to change notification settings - Fork 460
Closed
Description
Overview Description
The test suite fails when run with Python 3.12.0b1:
FAILED tests/messages/test_extract.py::ExtractPythonTestCase::test_utf8_message_with_utf8_bom - File "<string>", line 1
FAILED tests/messages/test_extract.py::ExtractPythonTestCase::test_utf8_message_with_utf8_bom_and_magic_comment - File "<string>", line 1
FAILED tests/messages/test_extract.py::ExtractPythonTestCase::test_utf8_raw_strings_match_unicode_strings - File "<string>", line 1
FAILED tests/messages/test_extract.py::ExtractTestCase::test_f_strings - AssertionError: assert 3 == 4
FAILED tests/messages/test_extract.py::ExtractTestCase::test_f_strings_non_utf8 - assert 0 == 1
Furthermore, tox -e py312 fails by default because of missing distutils module (installing setuptools can workaround that but distutils use should be removed altogether).
Steps to Reproduce
tox -e py312
Actual Results
________________________________________ ExtractPythonTestCase.test_utf8_message_with_utf8_bom ________________________________________
self = <tests.messages.test_extract.ExtractPythonTestCase testMethod=test_utf8_message_with_utf8_bom>
def test_utf8_message_with_utf8_bom(self):
buf = BytesIO(codecs.BOM_UTF8 + """
# NOTE: hello
msg = _('Bonjour à tous')
""".encode('utf-8'))
> messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {}))
tests/messages/test_extract.py:367:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
babel/messages/extract.py:500: in extract_python
for tok, value, (lineno, _), _, _ in tokens:
/usr/lib/python3.12/tokenize.py:451: in _tokenize
for token in _generate_tokens_from_c_tokenizer(source, extra_tokens=True):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
source = "\ufeff\n# NOTE: hello\nmsg = _('Bonjour à tous')\n", extra_tokens = True
def _generate_tokens_from_c_tokenizer(source, extra_tokens=False):
"""Tokenize a source reading Python code as unicode strings using the internal C tokenizer"""
import _tokenize as c_tokenizer
> for info in c_tokenizer.TokenizerIter(source, extra_tokens=extra_tokens):
E File "<string>", line 1
E
E ^
E SyntaxError: invalid non-printable character U+FEFF
/usr/lib/python3.12/tokenize.py:542: SyntaxError
_______________________________ ExtractPythonTestCase.test_utf8_message_with_utf8_bom_and_magic_comment _______________________________
self = <tests.messages.test_extract.ExtractPythonTestCase testMethod=test_utf8_message_with_utf8_bom_and_magic_comment>
def test_utf8_message_with_utf8_bom_and_magic_comment(self):
buf = BytesIO(codecs.BOM_UTF8 + """# -*- coding: utf-8 -*-
# NOTE: hello
msg = _('Bonjour à tous')
""".encode('utf-8'))
> messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {}))
tests/messages/test_extract.py:376:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
babel/messages/extract.py:500: in extract_python
for tok, value, (lineno, _), _, _ in tokens:
/usr/lib/python3.12/tokenize.py:451: in _tokenize
for token in _generate_tokens_from_c_tokenizer(source, extra_tokens=True):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
source = "\ufeff# -*- coding: utf-8 -*-\n# NOTE: hello\nmsg = _('Bonjour à tous')\n", extra_tokens = True
def _generate_tokens_from_c_tokenizer(source, extra_tokens=False):
"""Tokenize a source reading Python code as unicode strings using the internal C tokenizer"""
import _tokenize as c_tokenizer
> for info in c_tokenizer.TokenizerIter(source, extra_tokens=extra_tokens):
E File "<string>", line 1
E # -*- coding: utf-8 -*-
E ^
E SyntaxError: invalid non-printable character U+FEFF
/usr/lib/python3.12/tokenize.py:542: SyntaxError
__________________________________ ExtractPythonTestCase.test_utf8_raw_strings_match_unicode_strings __________________________________
self = <tests.messages.test_extract.ExtractPythonTestCase testMethod=test_utf8_raw_strings_match_unicode_strings>
def test_utf8_raw_strings_match_unicode_strings(self):
buf = BytesIO(codecs.BOM_UTF8 + """
msg = _('Bonjour à tous')
msgu = _(u'Bonjour à tous')
""".encode('utf-8'))
> messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {}))
tests/messages/test_extract.py:393:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
babel/messages/extract.py:500: in extract_python
for tok, value, (lineno, _), _, _ in tokens:
/usr/lib/python3.12/tokenize.py:451: in _tokenize
for token in _generate_tokens_from_c_tokenizer(source, extra_tokens=True):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
source = "\ufeff\nmsg = _('Bonjour à tous')\nmsgu = _(u'Bonjour à tous')\n", extra_tokens = True
def _generate_tokens_from_c_tokenizer(source, extra_tokens=False):
"""Tokenize a source reading Python code as unicode strings using the internal C tokenizer"""
import _tokenize as c_tokenizer
> for info in c_tokenizer.TokenizerIter(source, extra_tokens=extra_tokens):
E File "<string>", line 1
E
E ^
E SyntaxError: invalid non-printable character U+FEFF
/usr/lib/python3.12/tokenize.py:542: SyntaxError
___________________________________________________ ExtractTestCase.test_f_strings ____________________________________________________
self = <tests.messages.test_extract.ExtractTestCase testMethod=test_f_strings>
def test_f_strings(self):
buf = BytesIO(br"""
t1 = _('foobar')
t2 = _(f'spameggs' f'feast') # should be extracted; constant parts only
t2 = _(f'spameggs' 'kerroshampurilainen') # should be extracted (mixing f with no f)
t3 = _(f'''whoa! a ''' # should be extracted (continues on following lines)
f'flying shark'
'... hello'
)
t4 = _(f'spameggs {t1}') # should not be extracted
""")
messages = list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], {}))
> assert len(messages) == 4
E AssertionError: assert 3 == 4
E + where 3 = len([(2, 'foobar', [], None), (4, 'kerroshampurilainen', [], None), (5, '... hello', [], None)])
tests/messages/test_extract.py:544: AssertionError
_______________________________________________ ExtractTestCase.test_f_strings_non_utf8 _______________________________________________
self = <tests.messages.test_extract.ExtractTestCase testMethod=test_f_strings_non_utf8>
def test_f_strings_non_utf8(self):
buf = BytesIO(b"""
# -- coding: latin-1 --
t2 = _(f'\xe5\xe4\xf6' f'\xc5\xc4\xd6')
""")
messages = list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], {}))
> assert len(messages) == 1
E assert 0 == 1
E + where 0 = len([])
tests/messages/test_extract.py:556: AssertionErrorExpected Results
Passing tests (or at least passing as well as py3.11 did).
Reproducibility
Always.
Additional Information
Confirmed with git 8b152db.
Metadata
Metadata
Assignees
Labels
No labels