1111import re
1212import keyword
1313
14- from pygments .lexer import Lexer , RegexLexer , include , bygroups , using , \
15- default , words , combined , do_insertions , this , line_re
14+ from pygments .lexer import DelegatingLexer , Lexer , RegexLexer , include , \
15+ bygroups , using , default , words , combined , do_insertions , this , line_re
1616from pygments .util import get_bool_opt , shebang_matches
1717from pygments .token import Text , Comment , Operator , Keyword , Name , String , \
1818 Number , Punctuation , Generic , Other , Error , Whitespace
@@ -635,8 +635,43 @@ def innerstring_rules(ttype):
635635 def analyse_text (text ):
636636 return shebang_matches (text , r'pythonw?2(\.\d)?' )
637637
638+ class _PythonConsoleLexerBase (RegexLexer ):
639+ name = 'Python console session'
640+ aliases = ['pycon' ]
641+ mimetypes = ['text/x-python-doctest' ]
638642
639- class PythonConsoleLexer (Lexer ):
643+ """Auxiliary lexer for `PythonConsoleLexer`.
644+
645+ Code tokens are output as ``Token.Other.Code``, traceback tokens as
646+ ``Token.Other.Traceback``.
647+ """
648+ tokens = {
649+ 'root' : [
650+ (r'(>>> )(.*\n)' , bygroups (Generic .Prompt , Other .Code ), 'continuations' ),
651+ # This happens, e.g., when tracebacks are embedded in documentation;
652+ # trailing whitespaces are often stripped in such contexts.
653+ (r'(>>>)(\n)' , bygroups (Generic .Prompt , Whitespace )),
654+ (r'(\^C)?Traceback \(most recent call last\):\n' , Other .Traceback , 'traceback' ),
655+ # SyntaxError starts with this
656+ (r' File "[^"]+", line \d+' , Other .Traceback , 'traceback' ),
657+ (r'.*\n' , Generic .Output ),
658+ ],
659+ 'continuations' : [
660+ (r'(\.\.\. )(.*\n)' , bygroups (Generic .Prompt , Other .Code )),
661+ # See above.
662+ (r'(\.\.\.)(\n)' , bygroups (Generic .Prompt , Whitespace )),
663+ default ('#pop' ),
664+ ],
665+ 'traceback' : [
666+ # As soon as we see a traceback, consume everything until the next
667+ # >>> prompt.
668+ (r'(?=>>>( |$))' , Text , '#pop' ),
669+ (r'(KeyboardInterrupt)(\n)' , bygroups (Name .Class , Whitespace )),
670+ (r'.*\n' , Other .Traceback ),
671+ ],
672+ }
673+
674+ class PythonConsoleLexer (DelegatingLexer ):
640675 """
641676 For Python console output or doctests, such as:
642677
@@ -659,70 +694,28 @@ class PythonConsoleLexer(Lexer):
659694 .. versionchanged:: 2.5
660695 Now defaults to ``True``.
661696 """
697+
662698 name = 'Python console session'
663699 aliases = ['pycon' ]
664700 mimetypes = ['text/x-python-doctest' ]
665701
666702 def __init__ (self , ** options ):
667- self .python3 = get_bool_opt (options , 'python3' , True )
668- Lexer .__init__ (self , ** options )
669-
670- def get_tokens_unprocessed (self , text ):
671- if self .python3 :
672- pylexer = PythonLexer (** self .options )
673- tblexer = PythonTracebackLexer (** self .options )
703+ python3 = get_bool_opt (options , 'python3' , True )
704+ if python3 :
705+ pylexer = PythonLexer
706+ tblexer = PythonTracebackLexer
674707 else :
675- pylexer = Python2Lexer (** self .options )
676- tblexer = Python2TracebackLexer (** self .options )
677-
678- curcode = ''
679- insertions = []
680- curtb = ''
681- tbindex = 0
682- in_tb = False
683- for match in line_re .finditer (text ):
684- line = match .group ()
685- if line .startswith ('>>> ' ) or line .startswith ('... ' ):
686- in_tb = False
687- insertions .append ((len (curcode ),
688- [(0 , Generic .Prompt , line [:4 ])]))
689- curcode += line [4 :]
690- elif line .rstrip () == '...' and not in_tb :
691- # only a new >>> prompt can end an exception block
692- # otherwise an ellipsis in place of the traceback frames
693- # will be mishandled
694- insertions .append ((len (curcode ),
695- [(0 , Generic .Prompt , '...' )]))
696- curcode += line [3 :]
697- else :
698- if curcode :
699- yield from do_insertions (
700- insertions , pylexer .get_tokens_unprocessed (curcode ))
701- curcode = ''
702- insertions = []
703- if in_tb :
704- curtb += line
705- if not (line .startswith (' ' ) or line .strip () == '...' ):
706- in_tb = False
707- for i , t , v in tblexer .get_tokens_unprocessed (curtb ):
708- yield tbindex + i , t , v
709- curtb = ''
710- elif (line .startswith ('Traceback (most recent call last):' ) or
711- re .match (' File "[^"]+", line \\ d+\\ n$' , line )):
712- in_tb = True
713- curtb = line
714- tbindex = match .start ()
715- elif line == 'KeyboardInterrupt\n ' :
716- yield match .start (), Name .Class , line
717- else :
718- yield match .start (), Generic .Output , line
719- if curcode :
720- yield from do_insertions (insertions ,
721- pylexer .get_tokens_unprocessed (curcode ))
722- if curtb :
723- for i , t , v in tblexer .get_tokens_unprocessed (curtb ):
724- yield tbindex + i , t , v
725-
708+ pylexer = Python2Lexer
709+ tblexer = Python2TracebackLexer
710+ # We have two auxiliary lexers. Use DelegatingLexer twice with
711+ # different tokens. TODO: DelegatingLexer should support this
712+ # directly, by accepting a tuplet of auxiliary lexers and a tuple of
713+ # distinguishing tokens. Then we wouldn't need this intermediary
714+ # class.
715+ class _ReplaceInnerCode (DelegatingLexer ):
716+ def __init__ (self , ** options ):
717+ super ().__init__ (pylexer , _PythonConsoleLexerBase , Other .Code , ** options )
718+ super ().__init__ (tblexer , _ReplaceInnerCode , Other .Traceback , ** options )
726719
727720class PythonTracebackLexer (RegexLexer ):
728721 """
@@ -743,7 +736,7 @@ class PythonTracebackLexer(RegexLexer):
743736 tokens = {
744737 'root' : [
745738 (r'\n' , Whitespace ),
746- (r'^Traceback \(most recent call last\):\n' , Generic .Traceback , 'intb' ),
739+ (r'^(\^C)? Traceback \(most recent call last\):\n' , Generic .Traceback , 'intb' ),
747740 (r'^During handling of the above exception, another '
748741 r'exception occurred:\n\n' , Generic .Traceback ),
749742 (r'^The above exception was the direct cause of the '
0 commit comments