-
Notifications
You must be signed in to change notification settings - Fork 467
Closed
Description
Overview Description
The test suite fails when run with Python 3.12.0b1:
FAILED tests/messages/test_extract.py::ExtractPythonTestCase::test_utf8_message_with_utf8_bom - File "<string>", line 1
FAILED tests/messages/test_extract.py::ExtractPythonTestCase::test_utf8_message_with_utf8_bom_and_magic_comment - File "<string>", line 1
FAILED tests/messages/test_extract.py::ExtractPythonTestCase::test_utf8_raw_strings_match_unicode_strings - File "<string>", line 1
FAILED tests/messages/test_extract.py::ExtractTestCase::test_f_strings - AssertionError: assert 3 == 4
FAILED tests/messages/test_extract.py::ExtractTestCase::test_f_strings_non_utf8 - assert 0 == 1
Furthermore, tox -e py312 fails by default because of missing distutils module (installing setuptools can workaround that but distutils use should be removed altogether).
Steps to Reproduce
tox -e py312
Actual Results
________________________________________ ExtractPythonTestCase.test_utf8_message_with_utf8_bom ________________________________________
self = <tests.messages.test_extract.ExtractPythonTestCase testMethod=test_utf8_message_with_utf8_bom>
def test_utf8_message_with_utf8_bom(self):
buf = BytesIO(codecs.BOM_UTF8 + """
# NOTE: hello
msg = _('Bonjour à tous')
""".encode('utf-8'))
> messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {}))
tests/messages/test_extract.py:367:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
babel/messages/extract.py:500: in extract_python
for tok, value, (lineno, _), _, _ in tokens:
/usr/lib/python3.12/tokenize.py:451: in _tokenize
for token in _generate_tokens_from_c_tokenizer(source, extra_tokens=True):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
source = "\ufeff\n# NOTE: hello\nmsg = _('Bonjour à tous')\n", extra_tokens = True
def _generate_tokens_from_c_tokenizer(source, extra_tokens=False):
"""Tokenize a source reading Python code as unicode strings using the internal C tokenizer"""
import _tokenize as c_tokenizer
> for info in c_tokenizer.TokenizerIter(source, extra_tokens=extra_tokens):
E File "<string>", line 1
E
E ^
E SyntaxError: invalid non-printable character U+FEFF
/usr/lib/python3.12/tokenize.py:542: SyntaxError
_______________________________ ExtractPythonTestCase.test_utf8_message_with_utf8_bom_and_magic_comment _______________________________
self = <tests.messages.test_extract.ExtractPythonTestCase testMethod=test_utf8_message_with_utf8_bom_and_magic_comment>
def test_utf8_message_with_utf8_bom_and_magic_comment(self):
buf = BytesIO(codecs.BOM_UTF8 + """# -*- coding: utf-8 -*-
# NOTE: hello
msg = _('Bonjour à tous')
""".encode('utf-8'))
> messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {}))
tests/messages/test_extract.py:376:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
babel/messages/extract.py:500: in extract_python
for tok, value, (lineno, _), _, _ in tokens:
/usr/lib/python3.12/tokenize.py:451: in _tokenize
for token in _generate_tokens_from_c_tokenizer(source, extra_tokens=True):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
source = "\ufeff# -*- coding: utf-8 -*-\n# NOTE: hello\nmsg = _('Bonjour à tous')\n", extra_tokens = True
def _generate_tokens_from_c_tokenizer(source, extra_tokens=False):
"""Tokenize a source reading Python code as unicode strings using the internal C tokenizer"""
import _tokenize as c_tokenizer
> for info in c_tokenizer.TokenizerIter(source, extra_tokens=extra_tokens):
E File "<string>", line 1
E # -*- coding: utf-8 -*-
E ^
E SyntaxError: invalid non-printable character U+FEFF
/usr/lib/python3.12/tokenize.py:542: SyntaxError
__________________________________ ExtractPythonTestCase.test_utf8_raw_strings_match_unicode_strings __________________________________
self = <tests.messages.test_extract.ExtractPythonTestCase testMethod=test_utf8_raw_strings_match_unicode_strings>
def test_utf8_raw_strings_match_unicode_strings(self):
buf = BytesIO(codecs.BOM_UTF8 + """
msg = _('Bonjour à tous')
msgu = _(u'Bonjour à tous')
""".encode('utf-8'))
> messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {}))
tests/messages/test_extract.py:393:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
babel/messages/extract.py:500: in extract_python
for tok, value, (lineno, _), _, _ in tokens:
/usr/lib/python3.12/tokenize.py:451: in _tokenize
for token in _generate_tokens_from_c_tokenizer(source, extra_tokens=True):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
source = "\ufeff\nmsg = _('Bonjour à tous')\nmsgu = _(u'Bonjour à tous')\n", extra_tokens = True
def _generate_tokens_from_c_tokenizer(source, extra_tokens=False):
"""Tokenize a source reading Python code as unicode strings using the internal C tokenizer"""
import _tokenize as c_tokenizer
> for info in c_tokenizer.TokenizerIter(source, extra_tokens=extra_tokens):
E File "<string>", line 1
E
E ^
E SyntaxError: invalid non-printable character U+FEFF
/usr/lib/python3.12/tokenize.py:542: SyntaxError
___________________________________________________ ExtractTestCase.test_f_strings ____________________________________________________
self = <tests.messages.test_extract.ExtractTestCase testMethod=test_f_strings>
def test_f_strings(self):
buf = BytesIO(br"""
t1 = _('foobar')
t2 = _(f'spameggs' f'feast') # should be extracted; constant parts only
t2 = _(f'spameggs' 'kerroshampurilainen') # should be extracted (mixing f with no f)
t3 = _(f'''whoa! a ''' # should be extracted (continues on following lines)
f'flying shark'
'... hello'
)
t4 = _(f'spameggs {t1}') # should not be extracted
""")
messages = list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], {}))
> assert len(messages) == 4
E AssertionError: assert 3 == 4
E + where 3 = len([(2, 'foobar', [], None), (4, 'kerroshampurilainen', [], None), (5, '... hello', [], None)])
tests/messages/test_extract.py:544: AssertionError
_______________________________________________ ExtractTestCase.test_f_strings_non_utf8 _______________________________________________
self = <tests.messages.test_extract.ExtractTestCase testMethod=test_f_strings_non_utf8>
def test_f_strings_non_utf8(self):
buf = BytesIO(b"""
# -- coding: latin-1 --
t2 = _(f'\xe5\xe4\xf6' f'\xc5\xc4\xd6')
""")
messages = list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], {}))
> assert len(messages) == 1
E assert 0 == 1
E + where 0 = len([])
tests/messages/test_extract.py:556: AssertionErrorExpected Results
Passing tests (or at least passing as well as py3.11 did).
Reproducibility
Always.
Additional Information
Confirmed with git 8b152db.
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels