Skip to content

Commit c977624

Browse files
authored
Refactor PythonConsoleLexer as a DelegatingLexer (#2412)
This is simpler and more reliable than hand-coding the state machine. Fixes #2411
1 parent 50dd4d8 commit c977624

6 files changed

Lines changed: 125 additions & 96 deletions

File tree

pygments/lexers/python.py

Lines changed: 55 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
import re
1212
import keyword
1313

14-
from pygments.lexer import Lexer, RegexLexer, include, bygroups, using, \
15-
default, words, combined, do_insertions, this, line_re
14+
from pygments.lexer import DelegatingLexer, Lexer, RegexLexer, include, \
15+
bygroups, using, default, words, combined, do_insertions, this, line_re
1616
from pygments.util import get_bool_opt, shebang_matches
1717
from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
1818
Number, Punctuation, Generic, Other, Error, Whitespace
@@ -635,8 +635,43 @@ def innerstring_rules(ttype):
635635
def analyse_text(text):
636636
return shebang_matches(text, r'pythonw?2(\.\d)?')
637637

638+
class _PythonConsoleLexerBase(RegexLexer):
639+
name = 'Python console session'
640+
aliases = ['pycon']
641+
mimetypes = ['text/x-python-doctest']
638642

639-
class PythonConsoleLexer(Lexer):
643+
"""Auxiliary lexer for `PythonConsoleLexer`.
644+
645+
Code tokens are output as ``Token.Other.Code``, traceback tokens as
646+
``Token.Other.Traceback``.
647+
"""
648+
tokens = {
649+
'root': [
650+
(r'(>>> )(.*\n)', bygroups(Generic.Prompt, Other.Code), 'continuations'),
651+
# This happens, e.g., when tracebacks are embedded in documentation;
652+
# trailing whitespaces are often stripped in such contexts.
653+
(r'(>>>)(\n)', bygroups(Generic.Prompt, Whitespace)),
654+
(r'(\^C)?Traceback \(most recent call last\):\n', Other.Traceback, 'traceback'),
655+
# SyntaxError starts with this
656+
(r' File "[^"]+", line \d+', Other.Traceback, 'traceback'),
657+
(r'.*\n', Generic.Output),
658+
],
659+
'continuations': [
660+
(r'(\.\.\. )(.*\n)', bygroups(Generic.Prompt, Other.Code)),
661+
# See above.
662+
(r'(\.\.\.)(\n)', bygroups(Generic.Prompt, Whitespace)),
663+
default('#pop'),
664+
],
665+
'traceback': [
666+
# As soon as we see a traceback, consume everything until the next
667+
# >>> prompt.
668+
(r'(?=>>>( |$))', Text, '#pop'),
669+
(r'(KeyboardInterrupt)(\n)', bygroups(Name.Class, Whitespace)),
670+
(r'.*\n', Other.Traceback),
671+
],
672+
}
673+
674+
class PythonConsoleLexer(DelegatingLexer):
640675
"""
641676
For Python console output or doctests, such as:
642677
@@ -659,70 +694,28 @@ class PythonConsoleLexer(Lexer):
659694
.. versionchanged:: 2.5
660695
Now defaults to ``True``.
661696
"""
697+
662698
name = 'Python console session'
663699
aliases = ['pycon']
664700
mimetypes = ['text/x-python-doctest']
665701

666702
def __init__(self, **options):
667-
self.python3 = get_bool_opt(options, 'python3', True)
668-
Lexer.__init__(self, **options)
669-
670-
def get_tokens_unprocessed(self, text):
671-
if self.python3:
672-
pylexer = PythonLexer(**self.options)
673-
tblexer = PythonTracebackLexer(**self.options)
703+
python3 = get_bool_opt(options, 'python3', True)
704+
if python3:
705+
pylexer = PythonLexer
706+
tblexer = PythonTracebackLexer
674707
else:
675-
pylexer = Python2Lexer(**self.options)
676-
tblexer = Python2TracebackLexer(**self.options)
677-
678-
curcode = ''
679-
insertions = []
680-
curtb = ''
681-
tbindex = 0
682-
in_tb = False
683-
for match in line_re.finditer(text):
684-
line = match.group()
685-
if line.startswith('>>> ') or line.startswith('... '):
686-
in_tb = False
687-
insertions.append((len(curcode),
688-
[(0, Generic.Prompt, line[:4])]))
689-
curcode += line[4:]
690-
elif line.rstrip() == '...' and not in_tb:
691-
# only a new >>> prompt can end an exception block
692-
# otherwise an ellipsis in place of the traceback frames
693-
# will be mishandled
694-
insertions.append((len(curcode),
695-
[(0, Generic.Prompt, '...')]))
696-
curcode += line[3:]
697-
else:
698-
if curcode:
699-
yield from do_insertions(
700-
insertions, pylexer.get_tokens_unprocessed(curcode))
701-
curcode = ''
702-
insertions = []
703-
if in_tb:
704-
curtb += line
705-
if not (line.startswith(' ') or line.strip() == '...'):
706-
in_tb = False
707-
for i, t, v in tblexer.get_tokens_unprocessed(curtb):
708-
yield tbindex+i, t, v
709-
curtb = ''
710-
elif (line.startswith('Traceback (most recent call last):') or
711-
re.match(' File "[^"]+", line \\d+\\n$', line)):
712-
in_tb = True
713-
curtb = line
714-
tbindex = match.start()
715-
elif line == 'KeyboardInterrupt\n':
716-
yield match.start(), Name.Class, line
717-
else:
718-
yield match.start(), Generic.Output, line
719-
if curcode:
720-
yield from do_insertions(insertions,
721-
pylexer.get_tokens_unprocessed(curcode))
722-
if curtb:
723-
for i, t, v in tblexer.get_tokens_unprocessed(curtb):
724-
yield tbindex+i, t, v
725-
708+
pylexer = Python2Lexer
709+
tblexer = Python2TracebackLexer
710+
# We have two auxiliary lexers. Use DelegatingLexer twice with
711+
# different tokens. TODO: DelegatingLexer should support this
712+
# directly, by accepting a tuplet of auxiliary lexers and a tuple of
713+
# distinguishing tokens. Then we wouldn't need this intermediary
714+
# class.
715+
class _ReplaceInnerCode(DelegatingLexer):
716+
def __init__(self, **options):
717+
super().__init__(pylexer, _PythonConsoleLexerBase, Other.Code, **options)
718+
super().__init__(tblexer, _ReplaceInnerCode, Other.Traceback, **options)
726719

727720
class PythonTracebackLexer(RegexLexer):
728721
"""
@@ -743,7 +736,7 @@ class PythonTracebackLexer(RegexLexer):
743736
tokens = {
744737
'root': [
745738
(r'\n', Whitespace),
746-
(r'^Traceback \(most recent call last\):\n', Generic.Traceback, 'intb'),
739+
(r'^(\^C)?Traceback \(most recent call last\):\n', Generic.Traceback, 'intb'),
747740
(r'^During handling of the above exception, another '
748741
r'exception occurred:\n\n', Generic.Traceback),
749742
(r'^The above exception was the direct cause of the '

tests/examplefiles/pycon/pycon_ctrlc_traceback.output

Lines changed: 43 additions & 27 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/examplefiles/pycon/pycon_test.pycon.output

Lines changed: 6 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/snippets/pycon/broken_tb.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,9 @@ SyntaxError: EOL while scanning string literal
5454
'EOL while scanning string literal' Name
5555
'\n' Text.Whitespace
5656

57-
'\n' Generic.Output
57+
'\n' Text.Whitespace
5858

59+
'' Text
5960
'>>> ' Generic.Prompt
6061
'exec' Name
6162
'(' Punctuation

tests/snippets/pycon/multiple_tb.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,6 @@ AttributeError: Can't get attribute 'f' on <module '__main__' (<class '_frozen_i
103103
"Can't get attribute 'f' on <module '__main__' (<class '_frozen_importlib.BuiltinImporter'>)>" Name
104104
'\n' Text.Whitespace
105105

106-
"AttributeError: Can't get attribute 'f' on <module '__main__' (<class '_frozen_importlib.BuiltinImporter'>)>\n" Generic.Output
106+
"AttributeError: Can't get attribute 'f' on <module '__main__' (<class '_frozen_importlib.BuiltinImporter'>)>\n" Other
107107

108-
"AttributeError: Can't get attribute 'f' on <module '__main__' (<class '_frozen_importlib.BuiltinImporter'>)>\n" Generic.Output
108+
"AttributeError: Can't get attribute 'f' on <module '__main__' (<class '_frozen_importlib.BuiltinImporter'>)>\n" Other
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
---input---
2+
>>> unterminated_traceback()
3+
Traceback (most recent call last):
4+
>>>
5+
6+
---tokens---
7+
'>>> ' Generic.Prompt
8+
'unterminated_traceback' Name
9+
'(' Punctuation
10+
')' Punctuation
11+
'\n' Text.Whitespace
12+
13+
'Traceback (most recent call last):\n' Generic.Traceback
14+
15+
'' Text
16+
'>>>' Generic.Prompt
17+
'\n' Text.Whitespace

0 commit comments

Comments
 (0)