changeset: 74354:bbed36370b08 user: Antoine Pitrou date: Thu Jan 12 22:46:19 2012 +0100 files: Doc/reference/lexical_analysis.rst Lib/test/test_strlit.py Lib/test/tokenize_tests.txt Misc/NEWS Parser/tokenizer.c Python/ast.c description: Issue #13748: Raw bytes literals can now be written with the `rb` prefix as well as `br`. diff -r 96525742d188 -r bbed36370b08 Doc/reference/lexical_analysis.rst --- a/Doc/reference/lexical_analysis.rst Thu Jan 12 22:38:13 2012 +0100 +++ b/Doc/reference/lexical_analysis.rst Thu Jan 12 22:46:19 2012 +0100 @@ -412,7 +412,7 @@ .. productionlist:: bytesliteral: `bytesprefix`(`shortbytes` | `longbytes`) - bytesprefix: "b" | "B" | "br" | "Br" | "bR" | "BR" + bytesprefix: "b" | "B" | "br" | "Br" | "bR" | "BR" | "rb" | "rB" | "Rb" | "RB" shortbytes: "'" `shortbytesitem`* "'" | '"' `shortbytesitem`* '"' longbytes: "'''" `longbytesitem`* "'''" | '"""' `longbytesitem`* '"""' shortbytesitem: `shortbyteschar` | `bytesescapeseq` @@ -446,6 +446,10 @@ literal characters. As a result, in string literals, ``'\U'`` and ``'\u'`` escapes in raw strings are not treated specially. + .. versionadded:: 3.3 + The ``'rb'`` prefix of raw bytes literals has been added as a synonym + of ``'br'``. + In triple-quoted strings, unescaped newlines and quotes are allowed (and are retained), except that three unescaped quotes in a row terminate the string. (A "quote" is the character used to open the string, i.e. either ``'`` or ``"``.) diff -r 96525742d188 -r bbed36370b08 Lib/test/test_strlit.py --- a/Lib/test/test_strlit.py Thu Jan 12 22:38:13 2012 +0100 +++ b/Lib/test/test_strlit.py Thu Jan 12 22:46:19 2012 +0100 @@ -2,10 +2,10 @@ There are four types of string literals: - 'abc' -- normal str - r'abc' -- raw str - b'xyz' -- normal bytes - br'xyz' -- raw bytes + 'abc' -- normal str + r'abc' -- raw str + b'xyz' -- normal bytes + br'xyz' | rb'xyz' -- raw bytes The difference between normal and raw strings is of course that in a raw string, \ escapes (while still used to determine the end of the @@ -103,12 +103,25 @@ def test_eval_bytes_raw(self): self.assertEqual(eval(""" br'x' """), b'x') + self.assertEqual(eval(""" rb'x' """), b'x') self.assertEqual(eval(r""" br'\x01' """), b'\\' + b'x01') + self.assertEqual(eval(r""" rb'\x01' """), b'\\' + b'x01') self.assertEqual(eval(""" br'\x01' """), byte(1)) + self.assertEqual(eval(""" rb'\x01' """), byte(1)) self.assertEqual(eval(r""" br'\x81' """), b"\\" + b"x81") + self.assertEqual(eval(r""" rb'\x81' """), b"\\" + b"x81") self.assertRaises(SyntaxError, eval, """ br'\x81' """) + self.assertRaises(SyntaxError, eval, """ rb'\x81' """) self.assertEqual(eval(r""" br'\u1881' """), b"\\" + b"u1881") + self.assertEqual(eval(r""" rb'\u1881' """), b"\\" + b"u1881") self.assertRaises(SyntaxError, eval, """ br'\u1881' """) + self.assertRaises(SyntaxError, eval, """ rb'\u1881' """) + self.assertRaises(SyntaxError, eval, """ bb'' """) + self.assertRaises(SyntaxError, eval, """ rr'' """) + self.assertRaises(SyntaxError, eval, """ brr'' """) + self.assertRaises(SyntaxError, eval, """ bbr'' """) + self.assertRaises(SyntaxError, eval, """ rrb'' """) + self.assertRaises(SyntaxError, eval, """ rbb'' """) def check_encoding(self, encoding, extra=""): modname = "xx_" + encoding.replace("-", "_") diff -r 96525742d188 -r bbed36370b08 Lib/test/tokenize_tests.txt --- a/Lib/test/tokenize_tests.txt Thu Jan 12 22:38:13 2012 +0100 +++ b/Lib/test/tokenize_tests.txt Thu Jan 12 22:46:19 2012 +0100 @@ -114,8 +114,12 @@ y = b"abc" + B"ABC" x = br'abc' + Br'ABC' + bR'ABC' + BR'ABC' y = br"abc" + Br"ABC" + bR"ABC" + BR"ABC" +x = rb'abc' + rB'ABC' + Rb'ABC' + RB'ABC' +y = rb"abc" + rB"ABC" + Rb"ABC" + RB"ABC" x = br'\\' + BR'\\' +x = rb'\\' + RB'\\' x = br'\'' + '' +x = rb'\'' + '' y = br''' foo bar \\ baz''' + BR''' @@ -124,6 +128,10 @@ bar \\ baz """ + bR'''spam ''' +y = rB"""foo +bar \\ baz +""" + Rb'''spam +''' # Indentation if 1: diff -r 96525742d188 -r bbed36370b08 Misc/NEWS --- a/Misc/NEWS Thu Jan 12 22:38:13 2012 +0100 +++ b/Misc/NEWS Thu Jan 12 22:46:19 2012 +0100 @@ -10,6 +10,9 @@ Core and Builtins ----------------- +- Issue #13748: Raw bytes literals can now be written with the ``rb`` prefix + as well as ``br``. + - Issue #12736: Use full unicode case mappings for upper, lower, and title case. - Issue #12760: Add a create mode to open(). Patch by David Townshend. diff -r 96525742d188 -r bbed36370b08 Parser/tokenizer.c --- a/Parser/tokenizer.c Thu Jan 12 22:38:13 2012 +0100 +++ b/Parser/tokenizer.c Thu Jan 12 22:46:19 2012 +0100 @@ -1412,13 +1412,15 @@ /* Identifier (most frequent token!) */ nonascii = 0; if (is_potential_identifier_start(c)) { - /* Process b"", r"" and br"" */ - if (c == 'b' || c == 'B') { - c = tok_nextc(tok); - if (c == '"' || c == '\'') - goto letter_quote; - } - if (c == 'r' || c == 'R') { + /* Process b"", r"", br"" and rb"" */ + int saw_b = 0, saw_r = 0; + while (1) { + if (!saw_b && (c == 'b' || c == 'B')) + saw_b = 1; + else if (!saw_r && (c == 'r' || c == 'R')) + saw_r = 1; + else + break; c = tok_nextc(tok); if (c == '"' || c == '\'') goto letter_quote; diff -r 96525742d188 -r bbed36370b08 Python/ast.c --- a/Python/ast.c Thu Jan 12 22:38:13 2012 +0100 +++ b/Python/ast.c Thu Jan 12 22:46:19 2012 +0100 @@ -3744,13 +3744,18 @@ int rawmode = 0; int need_encoding; if (isalpha(quote)) { - if (quote == 'b' || quote == 'B') { - quote = *++s; - *bytesmode = 1; - } - if (quote == 'r' || quote == 'R') { - quote = *++s; - rawmode = 1; + while (!*bytesmode || !rawmode) { + if (quote == 'b' || quote == 'B') { + quote = *++s; + *bytesmode = 1; + } + else if (quote == 'r' || quote == 'R') { + quote = *++s; + rawmode = 1; + } + else { + break; + } } } if (quote != '\'' && quote != '\"') {