Skip to content

Commit 3fc8b74

Browse files
authored
[3.10] bpo-46091: Correctly calculate indentation levels for whitespace lines with continuation characters (GH-30130). (GH-30898)
(cherry picked from commit a0efc0c) Co-authored-by: Pablo Galindo Salgado <Pablogsal@gmail.com>
1 parent 4a57fa2 commit 3fc8b74

File tree

5 files changed

+67
-16
lines changed

5 files changed

+67
-16
lines changed

Lib/test/test_ast.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1045,8 +1045,7 @@ def test_literal_eval_malformed_lineno(self):
10451045
ast.literal_eval(node)
10461046

10471047
def test_literal_eval_syntax_errors(self):
1048-
msg = "unexpected character after line continuation character"
1049-
with self.assertRaisesRegex(SyntaxError, msg):
1048+
with self.assertRaisesRegex(SyntaxError, "unexpected indent"):
10501049
ast.literal_eval(r'''
10511050
\
10521051
(\

Lib/test/test_syntax.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1463,6 +1463,36 @@ def test_empty_line_after_linecont(self):
14631463
except SyntaxError:
14641464
self.fail("Empty line after a line continuation character is valid.")
14651465

1466+
# See issue-46091
1467+
s1 = r"""\
1468+
def fib(n):
1469+
\
1470+
'''Print a Fibonacci series up to n.'''
1471+
\
1472+
a, b = 0, 1
1473+
"""
1474+
s2 = r"""\
1475+
def fib(n):
1476+
'''Print a Fibonacci series up to n.'''
1477+
a, b = 0, 1
1478+
"""
1479+
try:
1480+
self.assertEqual(compile(s1, '<string>', 'exec'), compile(s2, '<string>', 'exec'))
1481+
except SyntaxError:
1482+
self.fail("Indented statement over multiple lines is valid")
1483+
1484+
def test_continuation_bad_indentation(self):
1485+
# Check that code that breaks indentation across multiple lines raises a syntax error
1486+
1487+
code = r"""\
1488+
if x:
1489+
y = 1
1490+
\
1491+
foo = 1
1492+
"""
1493+
1494+
self.assertRaises(IndentationError, exec, code)
1495+
14661496
@support.cpython_only
14671497
def test_nested_named_except_blocks(self):
14681498
code = ""

Lib/test/test_tokenize.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
NEWLINE)
77
from io import BytesIO, StringIO
88
import unittest
9+
from textwrap import dedent
910
from unittest import TestCase, mock
1011
from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
1112
INVALID_UNDERSCORE_LITERALS)
@@ -45,7 +46,6 @@ def check_tokenize(self, s, expected):
4546
# The ENDMARKER and final NEWLINE are omitted.
4647
f = BytesIO(s.encode('utf-8'))
4748
result = stringify_tokens_from_source(tokenize(f.readline), s)
48-
4949
self.assertEqual(result,
5050
[" ENCODING 'utf-8' (0, 0) (0, 0)"] +
5151
expected.rstrip().splitlines())
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Correctly calculate indentation levels for lines with whitespace character
2+
that are ended by line continuation characters. Patch by Pablo Galindo

Parser/tokenizer.c

Lines changed: 33 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1346,6 +1346,24 @@ tok_decimal_tail(struct tok_state *tok)
13461346

13471347
/* Get next token, after space stripping etc. */
13481348

1349+
static inline int
1350+
tok_continuation_line(struct tok_state *tok) {
1351+
int c = tok_nextc(tok);
1352+
if (c != '\n') {
1353+
tok->done = E_LINECONT;
1354+
return -1;
1355+
}
1356+
c = tok_nextc(tok);
1357+
if (c == EOF) {
1358+
tok->done = E_EOF;
1359+
tok->cur = tok->inp;
1360+
return -1;
1361+
} else {
1362+
tok_backup(tok, c);
1363+
}
1364+
return c;
1365+
}
1366+
13491367
static int
13501368
tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
13511369
{
@@ -1362,6 +1380,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
13621380
int col = 0;
13631381
int altcol = 0;
13641382
tok->atbol = 0;
1383+
int cont_line_col = 0;
13651384
for (;;) {
13661385
c = tok_nextc(tok);
13671386
if (c == ' ') {
@@ -1374,14 +1393,23 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
13741393
else if (c == '\014') {/* Control-L (formfeed) */
13751394
col = altcol = 0; /* For Emacs users */
13761395
}
1396+
else if (c == '\\') {
1397+
// Indentation cannot be split over multiple physical lines
1398+
// using backslashes. This means that if we found a backslash
1399+
// preceded by whitespace, **the first one we find** determines
1400+
// the level of indentation of whatever comes next.
1401+
cont_line_col = cont_line_col ? cont_line_col : col;
1402+
if ((c = tok_continuation_line(tok)) == -1) {
1403+
return ERRORTOKEN;
1404+
}
1405+
}
13771406
else {
13781407
break;
13791408
}
13801409
}
13811410
tok_backup(tok, c);
1382-
if (c == '#' || c == '\n' || c == '\\') {
1411+
if (c == '#' || c == '\n') {
13831412
/* Lines with only whitespace and/or comments
1384-
and/or a line continuation character
13851413
shouldn't affect the indentation and are
13861414
not passed to the parser as NEWLINE tokens,
13871415
except *totally* empty lines in interactive
@@ -1402,6 +1430,8 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
14021430
may need to skip to the end of a comment */
14031431
}
14041432
if (!blankline && tok->level == 0) {
1433+
col = cont_line_col ? cont_line_col : col;
1434+
altcol = cont_line_col ? cont_line_col : altcol;
14051435
if (col == tok->indstack[tok->indent]) {
14061436
/* No change */
14071437
if (altcol != tok->altindstack[tok->indent]) {
@@ -1963,19 +1993,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
19631993

19641994
/* Line continuation */
19651995
if (c == '\\') {
1966-
c = tok_nextc(tok);
1967-
if (c != '\n') {
1968-
tok->done = E_LINECONT;
1996+
if ((c = tok_continuation_line(tok)) == -1) {
19691997
return ERRORTOKEN;
19701998
}
1971-
c = tok_nextc(tok);
1972-
if (c == EOF) {
1973-
tok->done = E_EOF;
1974-
tok->cur = tok->inp;
1975-
return ERRORTOKEN;
1976-
} else {
1977-
tok_backup(tok, c);
1978-
}
19791999
tok->cont_line = 1;
19802000
goto again; /* Read next line */
19812001
}

0 commit comments

Comments
 (0)