Skip to content

Commit 89a3102

Browse files
[3.6] bpo-29104: Fixed parsing backslashes in f-strings. (GH-490) (#1812)
(cherry picked from commit 0cd7a3f)
1 parent a2a9984 commit 89a3102

3 files changed

Lines changed: 48 additions & 21 deletions

File tree

Lib/test/test_fstring.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,20 @@ def test_backslashes_in_string_part(self):
361361
self.assertEqual(f'2\x203', '2 3')
362362
self.assertEqual(f'\x203', ' 3')
363363

364+
with self.assertWarns(DeprecationWarning): # invalid escape sequence
365+
value = eval(r"f'\{6*7}'")
366+
self.assertEqual(value, '\\42')
367+
self.assertEqual(f'\\{6*7}', '\\42')
368+
self.assertEqual(fr'\{6*7}', '\\42')
369+
370+
AMPERSAND = 'spam'
371+
# Get the right unicode character (&), or pick up local variable
372+
# depending on the number of backslashes.
373+
self.assertEqual(f'\N{AMPERSAND}', '&')
374+
self.assertEqual(f'\\N{AMPERSAND}', '\\Nspam')
375+
self.assertEqual(fr'\N{AMPERSAND}', '\\Nspam')
376+
self.assertEqual(f'\\\N{AMPERSAND}', '\\&')
377+
364378
def test_misformed_unicode_character_name(self):
365379
# These test are needed because unicode names are parsed
366380
# differently inside f-strings.

Misc/NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ What's New in Python 3.6.2 release candidate 1?
1010
Core and Builtins
1111
-----------------
1212

13+
- bpo-29104: Fixed parsing backslashes in f-strings.
14+
1315
- bpo-27945: Fixed various segfaults with dict when input collections are
1416
mutated during searching, inserting or comparing. Based on patches by
1517
Duane Griffin and Tim Mitchell.

Python/ast.c

Lines changed: 32 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4170,9 +4170,11 @@ decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s,
41704170
while (s < end) {
41714171
if (*s == '\\') {
41724172
*p++ = *s++;
4173-
if (*s & 0x80) {
4173+
if (s >= end || *s & 0x80) {
41744174
strcpy(p, "u005c");
41754175
p += 5;
4176+
if (s >= end)
4177+
break;
41764178
}
41774179
}
41784180
if (*s & 0x80) { /* XXX inefficient */
@@ -4325,59 +4327,68 @@ fstring_find_literal(const char **str, const char *end, int raw,
43254327
brace (which isn't part of a unicode name escape such as
43264328
"\N{EULER CONSTANT}"), or the end of the string. */
43274329

4328-
const char *literal_start = *str;
4329-
const char *literal_end;
4330-
int in_named_escape = 0;
4330+
const char *s = *str;
4331+
const char *literal_start = s;
43314332
int result = 0;
43324333

43334334
assert(*literal == NULL);
4334-
for (; *str < end; (*str)++) {
4335-
char ch = **str;
4336-
if (!in_named_escape && ch == '{' && (*str)-literal_start >= 2 &&
4337-
*(*str-2) == '\\' && *(*str-1) == 'N') {
4338-
in_named_escape = 1;
4339-
} else if (in_named_escape && ch == '}') {
4340-
in_named_escape = 0;
4341-
} else if (ch == '{' || ch == '}') {
4335+
while (s < end) {
4336+
char ch = *s++;
4337+
if (!raw && ch == '\\' && s < end) {
4338+
ch = *s++;
4339+
if (ch == 'N') {
4340+
if (s < end && *s++ == '{') {
4341+
while (s < end && *s++ != '}') {
4342+
}
4343+
continue;
4344+
}
4345+
break;
4346+
}
4347+
if (ch == '{' && warn_invalid_escape_sequence(c, n, ch) < 0) {
4348+
return -1;
4349+
}
4350+
}
4351+
if (ch == '{' || ch == '}') {
43424352
/* Check for doubled braces, but only at the top level. If
43434353
we checked at every level, then f'{0:{3}}' would fail
43444354
with the two closing braces. */
43454355
if (recurse_lvl == 0) {
4346-
if (*str+1 < end && *(*str+1) == ch) {
4356+
if (s < end && *s == ch) {
43474357
/* We're going to tell the caller that the literal ends
43484358
here, but that they should continue scanning. But also
43494359
skip over the second brace when we resume scanning. */
4350-
literal_end = *str+1;
4351-
*str += 2;
4360+
*str = s + 1;
43524361
result = 1;
43534362
goto done;
43544363
}
43554364

43564365
/* Where a single '{' is the start of a new expression, a
43574366
single '}' is not allowed. */
43584367
if (ch == '}') {
4368+
*str = s - 1;
43594369
ast_error(c, n, "f-string: single '}' is not allowed");
43604370
return -1;
43614371
}
43624372
}
43634373
/* We're either at a '{', which means we're starting another
43644374
expression; or a '}', which means we're at the end of this
43654375
f-string (for a nested format_spec). */
4376+
s--;
43664377
break;
43674378
}
43684379
}
4369-
literal_end = *str;
4370-
assert(*str <= end);
4371-
assert(*str == end || **str == '{' || **str == '}');
4380+
*str = s;
4381+
assert(s <= end);
4382+
assert(s == end || *s == '{' || *s == '}');
43724383
done:
4373-
if (literal_start != literal_end) {
4384+
if (literal_start != s) {
43744385
if (raw)
43754386
*literal = PyUnicode_DecodeUTF8Stateful(literal_start,
4376-
literal_end-literal_start,
4387+
s - literal_start,
43774388
NULL, NULL);
43784389
else
43794390
*literal = decode_unicode_with_escapes(c, n, literal_start,
4380-
literal_end-literal_start);
4391+
s - literal_start);
43814392
if (!*literal)
43824393
return -1;
43834394
}

0 commit comments

Comments
 (0)