diff -r c3fa46d85857 Lib/lib2to3/pgen2/tokenize.py --- a/Lib/lib2to3/pgen2/tokenize.py Tue Jul 21 00:54:19 2015 -0700 +++ b/Lib/lib2to3/pgen2/tokenize.py Tue Jul 21 16:23:40 2015 +0300 @@ -369,6 +369,7 @@ # 'stashed' and 'ctx' are used for async/await parsing stashed = None ctx = [('sync', 0)] + in_async = 0 while 1: # loop over lines in stream try: @@ -436,6 +437,14 @@ "unindent does not match any outer indentation level", ("", lnum, pos, line)) indents = indents[:-1] + + cur_indent = indents[-1] + while len(ctx) > 1 and ctx[-1][1] >= cur_indent: + if ctx[-1][0] == 'async': + in_async -= 1 + assert in_async >= 0 + ctx.pop() + yield (DEDENT, '', (lnum, pos), (lnum, pos), line) else: # continued statement @@ -499,7 +508,7 @@ yield (STRING, token, spos, epos, line) elif initial in namechars: # ordinary name if token in ('async', 'await'): - if ctx[-1][0] == 'async' and ctx[-1][1] < indents[-1]: + if in_async: yield (ASYNC if token == 'async' else AWAIT, token, spos, epos, line) continue @@ -515,6 +524,7 @@ and stashed[1] == 'async'): ctx.append(('async', indents[-1])) + in_async += 1 yield (ASYNC, stashed[1], stashed[2], stashed[3], diff -r c3fa46d85857 Lib/test/badsyntax_async1.py --- a/Lib/test/badsyntax_async1.py Tue Jul 21 00:54:19 2015 -0700 +++ b/Lib/test/badsyntax_async1.py Tue Jul 21 16:23:40 2015 +0300 @@ -1,3 +1,2 @@ -async def foo(): - def foo(a=await something()): - pass +async def foo(a=await something()): + pass diff -r c3fa46d85857 Lib/test/badsyntax_async2.py --- a/Lib/test/badsyntax_async2.py Tue Jul 21 00:54:19 2015 -0700 +++ b/Lib/test/badsyntax_async2.py Tue Jul 21 16:23:40 2015 +0300 @@ -1,3 +1,2 @@ -async def foo(): - def foo(a:await something()): - pass +async def foo(a:await something()): + pass diff -r c3fa46d85857 Lib/test/badsyntax_async4.py --- a/Lib/test/badsyntax_async4.py Tue Jul 21 00:54:19 2015 -0700 +++ b/Lib/test/badsyntax_async4.py Tue Jul 21 16:23:40 2015 +0300 @@ -1,2 +1,2 @@ async def foo(): - async def foo(): await something() + await diff -r c3fa46d85857 Lib/test/badsyntax_async9.py --- a/Lib/test/badsyntax_async9.py Tue Jul 21 00:54:19 2015 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -async def foo(): - await diff -r c3fa46d85857 Lib/test/test_coroutines.py --- a/Lib/test/test_coroutines.py Tue Jul 21 00:54:19 2015 -0700 +++ b/Lib/test/test_coroutines.py Tue Jul 21 16:23:40 2015 +0300 @@ -67,11 +67,11 @@ class AsyncBadSyntaxTest(unittest.TestCase): def test_badsyntax_1(self): - with self.assertRaisesRegex(SyntaxError, 'invalid syntax'): + with self.assertRaisesRegex(SyntaxError, "'await' outside"): import test.badsyntax_async1 def test_badsyntax_2(self): - with self.assertRaisesRegex(SyntaxError, 'invalid syntax'): + with self.assertRaisesRegex(SyntaxError, "'await' outside"): import test.badsyntax_async2 def test_badsyntax_3(self): @@ -103,10 +103,6 @@ import test.badsyntax_async8 def test_badsyntax_9(self): - with self.assertRaisesRegex(SyntaxError, 'invalid syntax'): - import test.badsyntax_async9 - - def test_badsyntax_10(self): ns = {} for comp in {'(await a for a in b)', '[await a for a in b]', @@ -116,6 +112,94 @@ with self.assertRaisesRegex(SyntaxError, 'await.*in comprehen'): exec('async def f():\n\t{}'.format(comp), ns, ns) + def test_badsyntax_10(self): + # Tests for issue 24619 + + samples = [ + """async def foo(): + def bar(): pass + await = 1 + """, + + """async def foo(): + + def bar(): pass + await = 1 + """, + + """async def foo(): + def bar(): pass + if 1: + await = 1 + """, + + """def foo(): + async def bar(): pass + if 1: + await a + """, + + """def foo(): + async def bar(): pass + await a + """, + + """def foo(): + def baz(): pass + async def bar(): pass + await a + """, + + """async def foo(): + def async(): pass + """, + + """async def foo(): + def await(): pass + """, + + """async def foo(): + def bar(): + await + """, + + """async def foo(): + return lambda async: await + """, + + """async def foo(): + return lambda a: await + """, + + """async def foo(a: await b): + pass + """, + + """def baz(): + async def foo(a: await b): + pass + """, + + """async def foo(async): + pass + """, + + """async def foo(await): + pass + """, + + """def foo(): + + async def bar(): pass + + await a + """] + + ns = {} + for code in samples: + with self.subTest(code=code), self.assertRaises(SyntaxError): + exec(code, ns, ns) + class TokenizerRegrTest(unittest.TestCase): @@ -461,8 +545,7 @@ class Awaitable: pass - async def foo(): - return (await Awaitable()) + async def foo(): return await Awaitable() with self.assertRaisesRegex( TypeError, "object Awaitable can't be used in 'await' expression"): diff -r c3fa46d85857 Lib/test/test_grammar.py --- a/Lib/test/test_grammar.py Tue Jul 21 00:54:19 2015 -0700 +++ b/Lib/test/test_grammar.py Tue Jul 21 16:23:40 2015 +0300 @@ -1051,10 +1051,7 @@ async def test(): def sum(): - async = 1 - await = 41 - return async + await - + pass if 1: await someobj() diff -r c3fa46d85857 Lib/test/test_tokenize.py --- a/Lib/test/test_tokenize.py Tue Jul 21 00:54:19 2015 -0700 +++ b/Lib/test/test_tokenize.py Tue Jul 21 16:23:40 2015 +0300 @@ -786,12 +786,12 @@ NAME 'def' (2, 2) (2, 5) NAME 'foo' (2, 6) (2, 9) OP '(' (2, 9) (2, 10) - NAME 'await' (2, 10) (2, 15) + AWAIT 'await' (2, 10) (2, 15) OP ')' (2, 15) (2, 16) OP ':' (2, 16) (2, 17) NEWLINE '\\n' (2, 17) (2, 18) INDENT ' ' (3, 0) (3, 4) - NAME 'await' (3, 4) (3, 9) + AWAIT 'await' (3, 4) (3, 9) OP '=' (3, 10) (3, 11) NUMBER '1' (3, 12) (3, 13) NEWLINE '\\n' (3, 13) (3, 14) @@ -829,6 +829,17 @@ OP ':' (2, 18) (2, 19) NAME 'pass' (2, 20) (2, 24) DEDENT '' (3, 0) (3, 0) + + >>> dump_tokens('''async def foo(async): await''') + ENCODING 'utf-8' (0, 0) (0, 0) + ASYNC 'async' (1, 0) (1, 5) + NAME 'def' (1, 6) (1, 9) + NAME 'foo' (1, 10) (1, 13) + OP '(' (1, 13) (1, 14) + ASYNC 'async' (1, 14) (1, 19) + OP ')' (1, 19) (1, 20) + OP ':' (1, 20) (1, 21) + AWAIT 'await' (1, 22) (1, 27) """ from test import support diff -r c3fa46d85857 Lib/tokenize.py --- a/Lib/tokenize.py Tue Jul 21 00:54:19 2015 -0700 +++ b/Lib/tokenize.py Tue Jul 21 16:23:40 2015 +0300 @@ -501,6 +501,7 @@ # 'stashed' and 'ctx' are used for async/await parsing stashed = None ctx = [('sync', 0)] + in_async = 0 if encoding is not None: if encoding == "utf-8-sig": @@ -580,6 +581,9 @@ cur_indent = indents[-1] while len(ctx) > 1 and ctx[-1][1] >= cur_indent: + if ctx[-1][0] == 'async': + in_async -= 1 + assert in_async >= 0 ctx.pop() yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line) @@ -640,7 +644,7 @@ yield TokenInfo(STRING, token, spos, epos, line) elif initial.isidentifier(): # ordinary name if token in ('async', 'await'): - if ctx[-1][0] == 'async' and ctx[-1][1] < indents[-1]: + if in_async: yield TokenInfo( ASYNC if token == 'async' else AWAIT, token, spos, epos, line) @@ -657,6 +661,7 @@ and stashed.string == 'async'): ctx.append(('async', indents[-1])) + in_async += 1 yield TokenInfo(ASYNC, stashed.string, stashed.start, stashed.end, diff -r c3fa46d85857 Parser/tokenizer.c --- a/Parser/tokenizer.c Tue Jul 21 00:54:19 2015 -0700 +++ b/Parser/tokenizer.c Tue Jul 21 16:23:40 2015 +0300 @@ -31,6 +31,9 @@ || c == '_'\ || (c >= 128)) +#define DEFTYPE_REGULAR 0 +#define DEFTYPE_ASYNC 1 + extern char *PyOS_Readline(FILE *, FILE *, const char *); /* Return malloc'ed string including trailing \n; empty malloc'ed string for EOF; @@ -129,7 +132,9 @@ tok->def = 0; tok->defstack[0] = 0; - tok->deftypestack[0] = 0; + tok->deftypestack[0] = DEFTYPE_REGULAR; + tok->def_async_behind = 0; + tok->def_in_async = 0; tok->atbol = 1; tok->pendin = 0; @@ -1436,6 +1441,10 @@ tok->pendin++; while (tok->def && tok->defstack[tok->def] >= tok->indent) { + if (tok->deftypestack[tok->def] == DEFTYPE_ASYNC) { + tok->def_in_async--; + assert(tok->def_in_async >= 0); + } tok->def--; } @@ -1501,59 +1510,62 @@ tok_len = tok->cur - tok->start; if (tok_len == 3 && memcmp(tok->start, "def", 3) == 0) { - if (tok->def && tok->deftypestack[tok->def] == 3) { - tok->deftypestack[tok->def] = 2; + /* The current token is 'def'. */ + if (tok->def + 1 >= MAXINDENT) { + tok->done = E_TOODEEP; + tok->cur = tok->inp; + return ERRORTOKEN; } - else if (tok->defstack[tok->def] < tok->indent) { + + if (tok->defstack[tok->def] < tok->indent) { /* We advance defs stack only when we see "def" *and* the indentation level was increased relative to the previous "def". */ - - if (tok->def + 1 >= MAXINDENT) { - tok->done = E_TOODEEP; - tok->cur = tok->inp; - return ERRORTOKEN; - } - tok->def++; tok->defstack[tok->def] = tok->indent; - tok->deftypestack[tok->def] = 1; + } + + if (tok->def_async_behind) { + /* The previous token was 'async'. */ + tok->def_async_behind = 0; + tok->deftypestack[tok->def] = DEFTYPE_ASYNC; + tok->def_in_async++; + } + else { + /* This is a regular function (not async def). */ + tok->deftypestack[tok->def] = DEFTYPE_REGULAR; } } else if (tok_len == 5) { if (memcmp(tok->start, "async", 5) == 0) { + /* The current token is 'async'. */ memcpy(&ahead_tok, tok, sizeof(ahead_tok)); + /* Try to look ahead one token. */ ahead_tok_kind = tok_get(&ahead_tok, &ahead_tok_start, &ahead_top_end); if (ahead_tok_kind == NAME && ahead_tok.cur - ahead_tok.start == 3 && - memcmp(ahead_tok.start, "def", 3) == 0) { - - if (tok->def + 1 >= MAXINDENT) { - tok->done = E_TOODEEP; - tok->cur = tok->inp; - return ERRORTOKEN; - } - - tok->def++; - tok->defstack[tok->def] = tok->indent; - tok->deftypestack[tok->def] = 3; - + memcmp(ahead_tok.start, "def", 3) == 0) + { + /* The next token is going to be 'def', so instead of + returning 'async' NAME token, we return ASYNC. */ + tok->def_async_behind = 1; return ASYNC; } - else if (tok->def && tok->deftypestack[tok->def] == 2 - && tok->defstack[tok->def] < tok->indent) { - + else if (tok->def_in_async) + { + /* We're inside an 'async def' function, so we treat + 'async' token as ASYNC, instead of NAME. */ return ASYNC; } } - else if (memcmp(tok->start, "await", 5) == 0 - && tok->def && tok->deftypestack[tok->def] == 2 - && tok->defstack[tok->def] < tok->indent) { - + else if (memcmp(tok->start, "await", 5) == 0 && tok->def_in_async) + { + /* We're inside an 'async def' function, so we treat + 'await' token as AWAIT, instead of NAME. */ return AWAIT; } } diff -r c3fa46d85857 Parser/tokenizer.h --- a/Parser/tokenizer.h Tue Jul 21 00:54:19 2015 -0700 +++ b/Parser/tokenizer.h Tue Jul 21 16:23:40 2015 +0300 @@ -68,10 +68,11 @@ int defstack[MAXINDENT]; /* stack if funcs & indents where they were defined */ - int deftypestack[MAXINDENT]; /* stack of func types - (0 not func; 1: "def name"; - 2: "async def name") */ + int deftypestack[MAXINDENT]; /* stack of func flags */ int def; /* Length of stack of func types */ + int def_async_behind; /* was there an 'async' token before + a 'def' token */ + int def_in_async; }; extern struct tok_state *PyTokenizer_FromString(const char *, int);