Skip to content

Test failures with Python 3.12.0b1 #1005

@mgorny

Description

@mgorny

Overview Description

The test suite fails when run with Python 3.12.0b1:

FAILED tests/messages/test_extract.py::ExtractPythonTestCase::test_utf8_message_with_utf8_bom -   File "<string>", line 1
FAILED tests/messages/test_extract.py::ExtractPythonTestCase::test_utf8_message_with_utf8_bom_and_magic_comment -   File "<string>", line 1
FAILED tests/messages/test_extract.py::ExtractPythonTestCase::test_utf8_raw_strings_match_unicode_strings -   File "<string>", line 1
FAILED tests/messages/test_extract.py::ExtractTestCase::test_f_strings - AssertionError: assert 3 == 4
FAILED tests/messages/test_extract.py::ExtractTestCase::test_f_strings_non_utf8 - assert 0 == 1

Furthermore, tox -e py312 fails by default because of missing distutils module (installing setuptools can workaround that but distutils use should be removed altogether).

Steps to Reproduce

  1. tox -e py312

Actual Results

________________________________________ ExtractPythonTestCase.test_utf8_message_with_utf8_bom ________________________________________

self = <tests.messages.test_extract.ExtractPythonTestCase testMethod=test_utf8_message_with_utf8_bom>

        def test_utf8_message_with_utf8_bom(self):
            buf = BytesIO(codecs.BOM_UTF8 + """
    # NOTE: hello
    msg = _('Bonjour à tous')
    """.encode('utf-8'))
>           messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {}))

tests/messages/test_extract.py:367: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
babel/messages/extract.py:500: in extract_python
    for tok, value, (lineno, _), _, _ in tokens:
/usr/lib/python3.12/tokenize.py:451: in _tokenize
    for token in _generate_tokens_from_c_tokenizer(source, extra_tokens=True):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

source = "\ufeff\n# NOTE: hello\nmsg = _('Bonjour à tous')\n", extra_tokens = True

    def _generate_tokens_from_c_tokenizer(source, extra_tokens=False):
        """Tokenize a source reading Python code as unicode strings using the internal C tokenizer"""
        import _tokenize as c_tokenizer
>       for info in c_tokenizer.TokenizerIter(source, extra_tokens=extra_tokens):
E         File "<string>", line 1
E           
E           ^
E       SyntaxError: invalid non-printable character U+FEFF

/usr/lib/python3.12/tokenize.py:542: SyntaxError
_______________________________ ExtractPythonTestCase.test_utf8_message_with_utf8_bom_and_magic_comment _______________________________

self = <tests.messages.test_extract.ExtractPythonTestCase testMethod=test_utf8_message_with_utf8_bom_and_magic_comment>

        def test_utf8_message_with_utf8_bom_and_magic_comment(self):
            buf = BytesIO(codecs.BOM_UTF8 + """# -*- coding: utf-8 -*-
    # NOTE: hello
    msg = _('Bonjour à tous')
    """.encode('utf-8'))
>           messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {}))

tests/messages/test_extract.py:376: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
babel/messages/extract.py:500: in extract_python
    for tok, value, (lineno, _), _, _ in tokens:
/usr/lib/python3.12/tokenize.py:451: in _tokenize
    for token in _generate_tokens_from_c_tokenizer(source, extra_tokens=True):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

source = "\ufeff# -*- coding: utf-8 -*-\n# NOTE: hello\nmsg = _('Bonjour à tous')\n", extra_tokens = True

    def _generate_tokens_from_c_tokenizer(source, extra_tokens=False):
        """Tokenize a source reading Python code as unicode strings using the internal C tokenizer"""
        import _tokenize as c_tokenizer
>       for info in c_tokenizer.TokenizerIter(source, extra_tokens=extra_tokens):
E         File "<string>", line 1
E           # -*- coding: utf-8 -*-
E           ^
E       SyntaxError: invalid non-printable character U+FEFF

/usr/lib/python3.12/tokenize.py:542: SyntaxError
__________________________________ ExtractPythonTestCase.test_utf8_raw_strings_match_unicode_strings __________________________________

self = <tests.messages.test_extract.ExtractPythonTestCase testMethod=test_utf8_raw_strings_match_unicode_strings>

        def test_utf8_raw_strings_match_unicode_strings(self):
            buf = BytesIO(codecs.BOM_UTF8 + """
    msg = _('Bonjour à tous')
    msgu = _(u'Bonjour à tous')
    """.encode('utf-8'))
>           messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {}))

tests/messages/test_extract.py:393: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
babel/messages/extract.py:500: in extract_python
    for tok, value, (lineno, _), _, _ in tokens:
/usr/lib/python3.12/tokenize.py:451: in _tokenize
    for token in _generate_tokens_from_c_tokenizer(source, extra_tokens=True):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

source = "\ufeff\nmsg = _('Bonjour à tous')\nmsgu = _(u'Bonjour à tous')\n", extra_tokens = True

    def _generate_tokens_from_c_tokenizer(source, extra_tokens=False):
        """Tokenize a source reading Python code as unicode strings using the internal C tokenizer"""
        import _tokenize as c_tokenizer
>       for info in c_tokenizer.TokenizerIter(source, extra_tokens=extra_tokens):
E         File "<string>", line 1
E           
E           ^
E       SyntaxError: invalid non-printable character U+FEFF

/usr/lib/python3.12/tokenize.py:542: SyntaxError
___________________________________________________ ExtractTestCase.test_f_strings ____________________________________________________

self = <tests.messages.test_extract.ExtractTestCase testMethod=test_f_strings>

        def test_f_strings(self):
            buf = BytesIO(br"""
    t1 = _('foobar')
    t2 = _(f'spameggs' f'feast')  # should be extracted; constant parts only
    t2 = _(f'spameggs' 'kerroshampurilainen')  # should be extracted (mixing f with no f)
    t3 = _(f'''whoa! a '''  # should be extracted (continues on following lines)
    f'flying shark'
        '... hello'
    )
    t4 = _(f'spameggs {t1}')  # should not be extracted
    """)
            messages = list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], {}))
>           assert len(messages) == 4
E           AssertionError: assert 3 == 4
E            +  where 3 = len([(2, 'foobar', [], None), (4, 'kerroshampurilainen', [], None), (5, '... hello', [], None)])

tests/messages/test_extract.py:544: AssertionError
_______________________________________________ ExtractTestCase.test_f_strings_non_utf8 _______________________________________________

self = <tests.messages.test_extract.ExtractTestCase testMethod=test_f_strings_non_utf8>

        def test_f_strings_non_utf8(self):
            buf = BytesIO(b"""
    # -- coding: latin-1 --
    t2 = _(f'\xe5\xe4\xf6' f'\xc5\xc4\xd6')
    """)
            messages = list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], {}))
>           assert len(messages) == 1
E           assert 0 == 1
E            +  where 0 = len([])

tests/messages/test_extract.py:556: AssertionError

Expected Results

Passing tests (or at least passing as well as py3.11 did).

Reproducibility

Always.

Additional Information

Confirmed with git 8b152db.

Metadata

Metadata

Assignees

Labels

No labels
No labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions