changeset: 89345:8d6dd02a973f branch: 3.3 parent: 89342:38a06e411698 user: Terry Jan Reedy date: Sun Feb 23 18:00:31 2014 -0500 files: Lib/test/test_tokenize.py description: Issue #20750, Enable roundtrip tests for new 5-tuple untokenize. The constructed examples and all but 7 of the test/test_*.py files (run with -ucpu) pass. Remove those that fail the new test from the selection list. Patch partly based on patches by G. Brandl (#8478) and G. Rees (#12691). diff -r 38a06e411698 -r 8d6dd02a973f Lib/test/test_tokenize.py --- a/Lib/test/test_tokenize.py Sun Feb 23 19:39:06 2014 +0100 +++ b/Lib/test/test_tokenize.py Sun Feb 23 18:00:31 2014 -0500 @@ -578,9 +578,15 @@ >>> tempdir = os.path.dirname(f) or os.curdir >>> testfiles = glob.glob(os.path.join(tempdir, "test*.py")) -tokenize is broken on test_pep3131.py because regular expressions are broken on -the obscure unicode identifiers in it. *sigh* +Tokenize is broken on test_pep3131.py because regular expressions are +broken on the obscure unicode identifiers in it. *sigh* +With roundtrip extended to test the 5-tuple mode of untokenize, +7 more testfiles fail. Remove them also until the failure is diagnosed. + >>> testfiles.remove(os.path.join(tempdir, "test_pep3131.py")) + >>> for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'): + ... testfiles.remove(os.path.join(tempdir, "test_%s.py") % f) + ... >>> if not support.is_resource_enabled("cpu"): ... testfiles = random.sample(testfiles, 10) ... @@ -659,21 +665,39 @@ def roundtrip(f): """ Test roundtrip for `untokenize`. `f` is an open file or a string. - The source code in f is tokenized, converted back to source code via - tokenize.untokenize(), and tokenized again from the latter. The test - fails if the second tokenization doesn't match the first. + The source code in f is tokenized to both 5- and 2-tuples. + Both sequences are converted back to source code via + tokenize.untokenize(), and the latter tokenized again to 2-tuples. + The test fails if the 3 pair tokenizations do not match. + + When untokenize bugs are fixed, untokenize with 5-tuples should + reproduce code that does not contain a backslash continuation + following spaces. A proper test should test this. + + This function would be more useful for correcting bugs if it reported + the first point of failure, like assertEqual, rather than just + returning False -- or if it were only used in unittests and not + doctest and actually used assertEqual. """ + # Get source code and original tokenizations if isinstance(f, str): - f = BytesIO(f.encode('utf-8')) - try: - token_list = list(tokenize(f.readline)) - finally: + code = f.encode('utf-8') + else: + code = f.read() f.close() - tokens1 = [tok[:2] for tok in token_list] - new_bytes = untokenize(tokens1) - readline = (line for line in new_bytes.splitlines(keepends=True)).__next__ - tokens2 = [tok[:2] for tok in tokenize(readline)] - return tokens1 == tokens2 + readline = iter(code.splitlines(keepends=True)).__next__ + tokens5 = list(tokenize(readline)) + tokens2 = [tok[:2] for tok in tokens5] + # Reproduce tokens2 from pairs + bytes_from2 = untokenize(tokens2) + readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__ + tokens2_from2 = [tok[:2] for tok in tokenize(readline2)] + # Reproduce tokens2 from 5-tuples + bytes_from5 = untokenize(tokens5) + readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__ + tokens2_from5 = [tok[:2] for tok in tokenize(readline5)] + # Compare 3 versions + return tokens2 == tokens2_from2 == tokens2_from5 # This is an example from the docs, set up as a doctest. def decistmt(s):