"Fossies" - the Free Open Source Software Archive  

Member "highlight-4.19/src/core/codegenerator.cpp" (18 Feb 2026, 71414 Bytes) of package /linux/www/highlight-4.19.tar.bz2:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "codegenerator.cpp" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 4.18_vs_4.19.

    1 /***************************************************************************
    2                           codegenerator.cpp  -  description
    3                              -------------------
    4     begin                : Die Jul 9 2002
    5     copyright            : (C) 2002-2026 by Andre Simon
    6     email                : a.simon@mailbox.org
    7  ***************************************************************************/
    8 
    9 
   10 /*
   11 This file is part of Highlight.
   12 
   13 Highlight is free software: you can redistribute it and/or modify
   14 it under the terms of the GNU General Public License as published by
   15 the Free Software Foundation, either version 3 of the License, or
   16 (at your option) any later version.
   17 
   18 Highlight is distributed in the hope that it will be useful,
   19 but WITHOUT ANY WARRANTY; without even the implied warranty of
   20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
   21 GNU General Public License for more details.
   22 
   23 You should have received a copy of the GNU General Public License
   24 along with Highlight.  If not, see <http://www.gnu.org/licenses/>.
   25 */
   26 
   27 
   28 #include <climits>
   29 #include <memory>
   30 
   31 #include <chrono>
   32 #include <thread>
   33 
   34 
   35 #include <boost/xpressive/xpressive_dynamic.hpp>
   36 
   37 #include "codegenerator.h"
   38 
   39 #include "htmlgenerator.h"
   40 #include "xhtmlgenerator.h"
   41 #include "rtfgenerator.h"
   42 #include "latexgenerator.h"
   43 #include "texgenerator.h"
   44 #include "svggenerator.h"
   45 #include "bbcodegenerator.h"
   46 #include "pangogenerator.h"
   47 #include "odtgenerator.h"
   48 #include "astyle/astyle.h"
   49 
   50 //#if !defined (QT)
   51 #include "ansigenerator.h"
   52 #include "xterm256generator.h"
   53 //#endif
   54 
   55 using std::cin;
   56 using std::cout;
   57 using std::ifstream;
   58 using std::ios;
   59 using std::istringstream;
   60 using std::iterator;
   61 using std::flush;
   62 using std::make_pair;
   63 using std::map;
   64 using std::ofstream;
   65 using std::ostream;
   66 using std::ostringstream;
   67 using std::right;
   68 using std::setw;
   69 using std::string;
   70 using std::vector;
   71 
   72 namespace highlight
   73 {
   74 const unsigned int CodeGenerator::NUMBER_BUILTIN_STATES = highlight::KEYWORD;
   75 
   76 // must not start with kw, st, sm prefixes
   77 const string CodeGenerator::STY_NAME_STD="def";
   78 const string CodeGenerator::STY_NAME_STR="sng";
   79 const string CodeGenerator::STY_NAME_NUM="num";
   80 const string CodeGenerator::STY_NAME_SLC="slc";
   81 const string CodeGenerator::STY_NAME_COM="com";
   82 const string CodeGenerator::STY_NAME_ESC="esc";
   83 const string CodeGenerator::STY_NAME_DIR="ppc"; //preprocessor
   84 const string CodeGenerator::STY_NAME_DST="pps"; //preprocessor string
   85 const string CodeGenerator::STY_NAME_LIN="lin";
   86 const string CodeGenerator::STY_NAME_SYM="opt"; //operator
   87 const string CodeGenerator::STY_NAME_IPL="ipl"; //interpolation
   88 
   89 const string CodeGenerator::STY_NAME_HVR="hvr";
   90 const string CodeGenerator::STY_NAME_ERR="err";
   91 const string CodeGenerator::STY_NAME_ERM="erm";
   92 
   93 vector<Diluculum::LuaFunction*> CodeGenerator::pluginChunks;
   94 
   95 
   96 CodeGenerator * CodeGenerator::getInstance ( OutputType type )
   97 {
   98     CodeGenerator* generator=nullptr;
   99     switch ( type ) {
  100     case HTML:
  101         generator = new HtmlGenerator();
  102         break;
  103     case XHTML:
  104         generator = new XHtmlGenerator();
  105         break;
  106     case TEX:
  107         generator = new TexGenerator ();
  108         break;
  109     case LATEX:
  110         generator = new LatexGenerator();
  111         break;
  112     case RTF:
  113         generator = new RtfGenerator ();
  114         break;
  115     case SVG:
  116         generator = new SVGGenerator();
  117         break;
  118     case BBCODE:
  119         generator = new BBCodeGenerator();
  120         break;
  121     case PANGO:
  122         generator = new PangoGenerator();
  123         break;
  124     case ODTFLAT:
  125         generator = new ODTGenerator();
  126         break;
  127     case ESC_ANSI:
  128         generator = new AnsiGenerator();
  129         break;
  130     case ESC_XTERM256:
  131     case ESC_TRUECOLOR:
  132         generator = new Xterm256Generator();
  133         generator->setESCTrueColor(type==ESC_TRUECOLOR);
  134         break;
  135     default:
  136         break;
  137     }
  138     return generator;
  139 }
  140 
  141 
  142 CodeGenerator::CodeGenerator ( highlight::OutputType type )
  143     :currentSyntax(nullptr),
  144      in ( nullptr ),
  145      out ( nullptr ),
  146      encoding ( "none" ),
  147      docTitle ( "Source file" ),
  148      maskWs ( false ),
  149      excludeWs ( false ),
  150      fragmentOutput ( false ),
  151      keepInjections( false ),
  152      showLineNumbers ( false ),
  153      lineNumberFillZeroes ( false ),
  154      printNewLines(true),
  155      omitVersionComment(false),
  156      isolateTags(false),
  157      disableStyleCache(false),
  158      baseFontSize("10"),
  159      lineNumber ( 0 ),
  160      lineNumberOffset ( 0 ),
  161      currentState ( _UNKNOWN ),
  162      currentKeywordClass ( 0 ),
  163      includeStyleDef ( false ),
  164      numberCurrentLine ( false ),
  165      lineIndex ( 0 ),
  166      lastLineLength( 0 ),
  167      syntaxChangeIndex(UINT_MAX),
  168      syntaxChangeLineNo(UINT_MAX),
  169      lineNumberWidth ( 5 ),
  170      startLineCnt( 1 ),
  171      startLineCntCurFile( 1 ),
  172      extraEOFChar( 255 ),
  173      maxLineCnt ( UINT_MAX ),
  174      inputFilesCnt (0),
  175      processedFilesCnt (0),
  176      kwOffset(0),
  177      noTrailingNewLine(0),
  178 
  179      terminatingChar ( '\0' ),
  180      formatter ( nullptr ),
  181      streamIterator ( nullptr ),
  182      formattingEnabled ( false ),
  183      formattingPossible ( false ),
  184      validateInput ( false ),
  185      numberWrappedLines ( true ),
  186      resultOfHook(false),
  187      lineContainedTestCase(false),
  188      lineContainedStmt(false),
  189      applySyntaxTestCase(false),
  190      toggleDynRawString(false),
  191      lsEnableHoverRequests(false),
  192      lsCheckSemanticTokens(false),
  193      lsCheckSyntaxErrors(false),
  194 
  195      keywordCase ( StringTools::CASE_UNCHANGED ),
  196      eolDelimiter ('\n'),
  197      outputType ( type )
  198 {
  199 }
  200 
  201 
  202 CodeGenerator::~CodeGenerator()
  203 {
  204     delete formatter;
  205     delete streamIterator;
  206 
  207     resetSyntaxReaders();
  208 
  209     for (unsigned int i=0; i<pluginChunks.size(); i++) {
  210         delete pluginChunks[i];
  211     }
  212     pluginChunks.clear();
  213 }
  214 
  215 
  216 bool CodeGenerator::initTheme ( const string& themePath, bool loadSemanticStyles)
  217 {
  218     this->themePath=themePath;
  219     bool loadOK = docStyle.load ( themePath, outputType, loadSemanticStyles );
  220     initOutputTags();
  221     return loadOK;
  222 }
  223 
  224 LSResult CodeGenerator::initLanguageServer ( const string& executable, const vector<string> &options,
  225                                              const string& workspace, const string& syntax,
  226                                              int delay, int logLevel, bool legacy )
  227 {
  228     if (LSPClient.isInitialized()) {
  229         return LSResult::INIT_OK;
  230     }
  231 
  232     LSPClient.setLogging(logLevel>1);
  233 
  234     LSPClient.setExecutable(executable);
  235     LSPClient.setWorkspace(workspace);
  236     LSPClient.setOptions(options);
  237     LSPClient.setSyntax(syntax);
  238     LSPClient.setInitializeDelay(delay);
  239     LSPClient.setLegacyProtocol(legacy);
  240     if (!LSPClient.connect()){
  241         return LSResult::INIT_BAD_PIPE;
  242     }
  243 
  244     if (!LSPClient.runInitialize()){
  245         return LSResult::INIT_BAD_REQUEST;
  246     }
  247     for (int i=0; i<docStyle.getSemanticTokenStyleCount();i++) {
  248         currentSyntax->generateNewKWClass(i+1, "st");
  249     }
  250     LSPClient.runInitialized();
  251     updateKeywordClasses();
  252     return LSResult::INIT_OK;
  253 }
  254 
  255 bool CodeGenerator::lsOpenDocument(const string& fileName, const string & suffix){
  256     lsDocumentPath = fileName;
  257     return LSPClient.runDidOpen(fileName, suffix);
  258 }
  259 
  260 bool CodeGenerator::lsCloseDocument(const string& fileName, const string & suffix){
  261     lsDocumentPath.clear();
  262     return LSPClient.runDidClose(fileName, suffix);
  263 }
  264 
  265 bool CodeGenerator::lsAddSemanticInfo(const string& fileName, const string & suffix){
  266     lsCheckSemanticTokens = LSPClient.runSemanticTokensFull(fileName);
  267     return lsCheckSemanticTokens;
  268 }
  269 
  270 bool CodeGenerator::isHoverProvider(){
  271     return LSPClient.isHoverProvider();
  272 }
  273 
  274 bool CodeGenerator::isSemanticTokensProvider(){
  275     return LSPClient.isSemanticTokensProvider();
  276 }
  277 
  278 void CodeGenerator::lsAddHoverInfo(bool hover){
  279     lsEnableHoverRequests = hover;
  280 }
  281 
  282 void CodeGenerator::lsAddSyntaxErrorInfo(bool error) {
  283     lsCheckSyntaxErrors = error;
  284 }
  285 
  286 
  287 void CodeGenerator::exitLanguageServer () {
  288     LSPClient.runShutdown();
  289     LSPClient.runExit();
  290 }
  291 
  292 const string& CodeGenerator::getStyleName()
  293 {
  294     return themePath;
  295 }
  296 
  297 void CodeGenerator::setLineNumberWidth ( int w )
  298 {
  299     lineNumberWidth=w;
  300 }
  301 
  302 int CodeGenerator::getLineNumberWidth()
  303 {
  304     return lineNumberWidth;
  305 }
  306 
  307 void CodeGenerator::setPrintLineNumbers ( bool flag, unsigned int startCnt )
  308 {
  309     showLineNumbers=flag;
  310     lineNumberOffset = startCnt-1;
  311 }
  312 
  313 bool CodeGenerator::getPrintLineNumbers()
  314 {
  315     return showLineNumbers;
  316 }
  317 
  318 void CodeGenerator::setPrintZeroes ( bool flag )
  319 {
  320     lineNumberFillZeroes=flag;
  321 }
  322 
  323 bool CodeGenerator::getPrintZeroes()
  324 {
  325     return lineNumberFillZeroes;
  326 }
  327 
  328 void CodeGenerator::setIncludeStyle ( bool flag )
  329 {
  330     includeStyleDef = flag;
  331 }
  332 
  333 void CodeGenerator::disableTrailingNL ( int flag )
  334 {
  335     noTrailingNewLine = flag;
  336 }
  337 
  338 void CodeGenerator::setStyleInputPath ( const string& path )
  339 {
  340     styleInputPath = path;
  341 }
  342 
  343 void CodeGenerator::setStyleOutputPath ( const string& path )
  344 {
  345     styleOutputPath = path;
  346 }
  347 
  348 void CodeGenerator::setPluginParameter ( const string& param )
  349 {
  350     pluginParameter = param;
  351 }
  352 
  353 const string&  CodeGenerator::getStyleInputPath()
  354 {
  355     return styleInputPath;
  356 }
  357 
  358 const string&  CodeGenerator::getStyleOutputPath()
  359 {
  360     return styleOutputPath;
  361 }
  362 
  363 void CodeGenerator::setFragmentCode ( bool flag )
  364 {
  365     fragmentOutput=flag;
  366 }
  367 
  368 bool CodeGenerator::getFragmentCode()
  369 {
  370     return fragmentOutput;
  371 }
  372 void CodeGenerator::setKeepInjections ( bool flag )
  373 {
  374     keepInjections=flag;
  375 }
  376 
  377 bool CodeGenerator::getKeepInjections()
  378 {
  379     return keepInjections;
  380 }
  381 void CodeGenerator::setValidateInput ( bool flag )
  382 {
  383     validateInput=flag;
  384 }
  385 
  386 bool CodeGenerator::getValidateInput()
  387 {
  388     return validateInput;
  389 }
  390 
  391 void CodeGenerator::setNumberWrappedLines ( bool flag )
  392 {
  393     numberWrappedLines=flag;
  394 }
  395 
  396 bool CodeGenerator::getNumberWrappedLines()
  397 {
  398     return numberWrappedLines;
  399 }
  400 
  401 void CodeGenerator::setOmitVersionComment ( bool flag )
  402 {
  403     omitVersionComment=flag;
  404 }
  405 
  406 bool CodeGenerator::getOmitVersionComment ()
  407 {
  408     return omitVersionComment;
  409 }
  410 
  411 void CodeGenerator::setIsolateTags ( bool flag )
  412 {
  413     isolateTags=flag;
  414 }
  415 
  416 bool CodeGenerator::getIsolateTags ()
  417 {
  418     return isolateTags;
  419 }
  420 
  421 void CodeGenerator::setBaseFont ( const string& fontName )
  422 {
  423     baseFont = fontName;
  424 }
  425 
  426 void CodeGenerator::setBaseFontSize ( const string& fontSize)
  427 {
  428     baseFontSize = fontSize;
  429 }
  430 
  431 void CodeGenerator::setStyleCaching ( bool flag )
  432 {
  433     disableStyleCache=!flag;
  434 }
  435 
  436 const string CodeGenerator::getBaseFont() const
  437 {
  438     if ( !baseFont.empty() ) return baseFont;
  439     switch ( outputType ) {
  440     case HTML:
  441     case XHTML:
  442     case SVG:
  443         return "'Courier New',monospace";
  444         break;
  445     case LATEX:
  446         return "ttfamily";
  447         break;
  448     case TEX:
  449         return "tt";
  450         break;
  451     default:
  452         return "Courier New";
  453     }
  454 }
  455 
  456 const string CodeGenerator::getBaseFontSize()
  457 {
  458     return baseFontSize;
  459 }
  460 
  461 void CodeGenerator::setTitle ( const string & title )
  462 {
  463     if ( !title.empty() ) docTitle= title;
  464 }
  465 
  466 string CodeGenerator::getTitle()
  467 {
  468     return docTitle;
  469 }
  470 
  471 void CodeGenerator::setEncoding ( const string& encodingName )
  472 {
  473     encoding = encodingName;
  474 }
  475 
  476 bool CodeGenerator::formattingDisabled()
  477 {
  478     return !formattingEnabled;
  479 }
  480 
  481 void CodeGenerator::setStartingInputLine ( unsigned int begin )
  482 {
  483     startLineCnt = startLineCntCurFile = begin;
  484 }
  485 
  486 void CodeGenerator::setMaxInputLineCnt ( unsigned int cnt )
  487 {
  488     maxLineCnt = cnt;
  489 }
  490 
  491 void CodeGenerator::setFilesCnt ( unsigned int cnt )
  492 {
  493     inputFilesCnt = cnt;
  494     processedFilesCnt = 0;
  495 }
  496 
  497 bool CodeGenerator::formattingIsPossible()
  498 {
  499     return formattingPossible;
  500 }
  501 unsigned char CodeGenerator::getAdditionalEOFChar()
  502 {
  503     return extraEOFChar;
  504 }
  505 void CodeGenerator::setAdditionalEOFChar ( unsigned char eofChar )
  506 {
  507     extraEOFChar = eofChar;
  508 }
  509 void CodeGenerator::setPreformatting ( WrapMode lineWrappingStyle,
  510                                        unsigned int lineLength,
  511                                        int numberSpaces )
  512 {
  513     bool enableWrap = lineWrappingStyle!=WRAP_DISABLED;
  514     bool replaceTabs = numberSpaces > 0;
  515 
  516     if ( enableWrap || replaceTabs ) {
  517         preFormatter.setWrap ( enableWrap );
  518         preFormatter.setWrapIndentBraces ( lineWrappingStyle==WRAP_DEFAULT );
  519         preFormatter.setWrapLineLength ( lineLength );
  520         preFormatter.setReplaceTabs ( replaceTabs );
  521         preFormatter.setNumberSpaces ( numberSpaces );
  522     }
  523 }
  524 
  525 void CodeGenerator::setKeyWordCase ( StringTools::KeywordCase keyCase )
  526 {
  527     keywordCase = keyCase;
  528 }
  529 
  530 void CodeGenerator::setEOLDelimiter(char delim)
  531 {
  532     eolDelimiter = delim;
  533 }
  534 
  535 void CodeGenerator::reset()
  536 {
  537     lineIndex = 0;
  538     lineNumber = 0;
  539     line.clear();
  540     preFormatter.reset();
  541     inFile.clear();
  542     outFile.clear();
  543     embedLangDefPath.clear();
  544     printNewLines=true;
  545     syntaxChangeIndex = syntaxChangeLineNo = UINT_MAX;
  546     startLineCntCurFile = startLineCnt;
  547     applySyntaxTestCase=lineContainedTestCase=false;
  548     if (currentSyntax){
  549         vector<int> overrideStyleAttrs=currentSyntax->getOverrideStyleAttributes();
  550         docStyle.overrideAttributes(overrideStyleAttrs);
  551         if (overrideStyleAttrs.size())
  552             disableStyleCache = true;
  553     }
  554 }
  555 
  556 string CodeGenerator::getThemeInitError()
  557 {
  558     return docStyle.getErrorMessage();
  559 }
  560 
  561 string CodeGenerator::getPluginScriptError()
  562 {
  563     return userScriptError;
  564 }
  565 
  566 string CodeGenerator::getSyntaxRegexError()
  567 {
  568     return (currentSyntax)? currentSyntax->getFailedRegex(): "syntax undef";
  569 }
  570 string CodeGenerator::getSyntaxLuaError()
  571 {
  572     return (currentSyntax)? currentSyntax->getLuaErrorText(): "syntax undef";
  573 
  574 }
  575 string CodeGenerator::getSyntaxDescription()
  576 {
  577     return (currentSyntax)? currentSyntax->getDescription(): "syntax undef";
  578 
  579 }
  580 string CodeGenerator::getSyntaxEncodingHint()
  581 {
  582     return (currentSyntax)? currentSyntax->getEncodingHint(): "";
  583 
  584 }
  585 string CodeGenerator::getThemeDescription()
  586 {
  587     return docStyle.getDescription();
  588 }
  589 
  590 string CodeGenerator::getSyntaxCatDescription(){
  591     return (currentSyntax)? currentSyntax->getCategoryDescription(): "";
  592 }
  593 
  594 string CodeGenerator::getThemeCatDescription()
  595 {
  596     return docStyle.getCategoryDescription();
  597 }
  598 
  599 float CodeGenerator::getThemeContrast()
  600 {
  601     return docStyle.getContrast();
  602 }
  603 
  604 unsigned int CodeGenerator::getLineNumber()
  605 {
  606     return lineNumber;
  607 }
  608 bool CodeGenerator::AtEnd(char c) const {
  609     bool instream_eof = in->eof();
  610     if (extraEOFChar == 255)
  611         return instream_eof;
  612 
  613     bool c_null = c == extraEOFChar;
  614     bool instream_peek_null = false;
  615     if (instream_eof == false && c_null == false)
  616         instream_peek_null = in->peek() == extraEOFChar;
  617     bool ret = instream_eof || c_null || instream_peek_null;
  618     return ret;
  619 }
  620 bool CodeGenerator::readNewLine ( string &newLine )
  621 {
  622     bool eof=false;
  623 
  624     if ( lineIndex ) terminatingChar=newLine[lineIndex-1];
  625 
  626     while (!eof && startLineCntCurFile>0) {
  627         if ( formattingPossible && formattingEnabled ) {
  628             eof=!formatter->hasMoreLines();
  629             if ( !eof ) {
  630                 newLine = formatter->nextLine();
  631             }
  632         } else {
  633             eof = AtEnd() || ! getline ( *in, newLine, eolDelimiter );
  634         }
  635         --startLineCntCurFile;
  636     }
  637 
  638     startLineCntCurFile=1;
  639 #ifndef _WIN32
  640     // drop CR of CRLF files
  641     if (!newLine.empty() && newLine[newLine.size() - 1] == '\r')
  642         newLine.erase(newLine.size() - 1);
  643 #endif
  644 
  645     return eof || ( lineNumber == maxLineCnt );
  646 }
  647 
  648 void CodeGenerator::matchRegex ( const string &line, State skipState)
  649 {
  650     regexGroups.clear();
  651     int matchBegin=0;
  652     int groupID=0;
  653 
  654     // cycle through all regex, save the start and ending indices of matches to report them later
  655     for ( unsigned int i=0; i<currentSyntax->getRegexElements().size(); i++ ) {
  656         RegexElement *regexElem = currentSyntax->getRegexElements() [i];
  657 
  658         if (regexElem->open == skipState) continue;
  659 
  660         if (regexElem->constraintLineNum && regexElem->constraintLineNum != lineNumber) {
  661             continue;
  662         }
  663 
  664         if (regexElem->constraintFilename.size() && regexElem->constraintFilename != inFile) {
  665             continue;
  666         }
  667 
  668         boost::xpressive::sregex_iterator cur( line.begin(), line.end(), regexElem->rex );
  669         boost::xpressive::sregex_iterator end;
  670 
  671         for( ; cur != end; ++cur )  {
  672             groupID = ( regexElem->capturingGroup<0 ) ? cur->size()-1 : regexElem->capturingGroup;
  673             matchBegin = cur->position(groupID);
  674 
  675             regexGroups.insert (
  676                 make_pair ( matchBegin + 1, RegexToken ( regexElem->open, cur->length(groupID), regexElem->kwClass, regexElem->langName ) ) );
  677 
  678             // priority regex (match required)
  679             if (regexElem->priority) {
  680                 return;
  681             }
  682         }
  683     }
  684 }
  685 
  686 unsigned char CodeGenerator::getInputChar()
  687 {
  688     // end of line?
  689     if ( lineIndex == line.length() ) {
  690 
  691         //more testing required:
  692         if (outputType==ESC_TRUECOLOR || outputType==ESC_XTERM256)
  693             lastLineLength=StringTools::utf8_strlen(line + lsSyntaxErrorDesc);
  694 
  695         bool eof=false;
  696         if ( preFormatter.isEnabled() ) {
  697             if ( !preFormatter.hasMoreLines() ) {
  698                 eof=readNewLine ( line );
  699                 preFormatter.setLine ( line );
  700                 ++lineNumber;
  701                 numberCurrentLine = true;
  702             } else {
  703                 if (numberWrappedLines)
  704                     ++lineNumber;
  705                 numberCurrentLine = numberWrappedLines;
  706             }
  707 
  708             line = preFormatter.getNextLine();
  709         } else {
  710             eof=readNewLine ( line );
  711             ++lineNumber;
  712 
  713             numberCurrentLine = true;
  714         }
  715         lineIndex=0;
  716 
  717         if (!lineContainedTestCase && applySyntaxTestCase){
  718             stateTraceTest = stateTraceCurrent;
  719             stateTraceCurrent.clear();
  720         }
  721 
  722         lineContainedTestCase=false;
  723         lineContainedStmt=false;
  724         matchRegex ( line );
  725 
  726         return ( eof ) ?'\0':'\n';
  727     }
  728 
  729     return line[lineIndex++];
  730 }
  731 
  732 /** changing this method requires regression testing with nested syntax files (HTML+PHP+JS+CSS,
  733  *  Coffeescript with block regex, Pas + ASM)
  734  *  especially nested syntax in one line
  735  */
  736 State CodeGenerator::getCurrentState (State oldState)
  737 {
  738     unsigned char c='\0';
  739 
  740     if ( token.length() ==0 ) {
  741         c=getInputChar();
  742     } else {
  743         lineIndex-= ( token.length()-1 );
  744         c=token[0];
  745     }
  746     if ( c=='\n' ) {
  747         return _EOL;   // End of line
  748     }
  749 
  750     if ( c=='\0' ) {
  751         return _EOF;   // End of file
  752     }
  753 
  754     if ( c==' ' || c=='\t' ) {
  755         token= c;
  756         return _WS;    // White space
  757     }
  758 
  759     if ( applySyntaxTestCase && ( c=='^' || c=='<') && (oldState == ML_COMMENT || oldState==SL_COMMENT)  ) {
  760         token= c;
  761         return _TESTPOS;
  762     }
  763 
  764     // at this position the syntax change takes place
  765     if (lineIndex >= syntaxChangeIndex-1 || syntaxChangeLineNo < lineNumber){
  766         loadEmbeddedLang(embedLangDefPath);  // load new syntax
  767         matchRegex(line);                    // recognize new patterns in the (remaining) line
  768         syntaxChangeIndex = syntaxChangeLineNo = UINT_MAX;
  769     }
  770 
  771 SKIP_EMBEDDED:
  772 
  773     if (lsCheckSyntaxErrors && LSPClient.errorExists(lineNumber, lineIndex)) {
  774         highlight::SemanticToken errorToken = LSPClient.getError(lineNumber, lineIndex);
  775         token = line.substr ( lineIndex-1, errorToken.length);
  776         lineIndex += errorToken.length-1;
  777         lsSyntaxErrorDesc = errorToken.id;
  778 
  779         //std::cerr <<"error num "<<lineNumber<< " idx "<<lineIndex<< " error "<<errorToken.id<< "\n";
  780         return SYNTAX_ERROR;
  781     }
  782 
  783     if (lsCheckSemanticTokens && LSPClient.tokenExists(lineNumber, lineIndex)) {
  784         highlight::SemanticToken semToken = LSPClient.getToken(lineNumber, lineIndex);
  785         int semStyleKwId = docStyle.getSemanticStyle(semToken.id);
  786         if (semStyleKwId) {
  787             token = line.substr ( lineIndex-1, semToken.length);
  788             lineIndex += semToken.length-1;
  789 
  790             currentKeywordClass = semStyleKwId + kwOffset;  // +offset of missing kw groups in the theme
  791             //std::cerr <<"l "<<lineNumber<<  "t "<<token<< " semStyleKwId "<< semStyleKwId << "  off "<<kwOffset<<" -> "  << semToken.id <<"\n";
  792             return KEYWORD;
  793         }
  794     }
  795 
  796     // Test if a regular expression was found at the current position
  797     if ( !regexGroups.empty() ) {
  798         if ( regexGroups.count ( lineIndex ) ) {
  799             token = line.substr ( lineIndex-1, regexGroups[lineIndex].length );
  800 
  801             unsigned int oldIndex= lineIndex;
  802             if ( regexGroups[oldIndex].length>1 ) lineIndex+= regexGroups[oldIndex].length-1;
  803 
  804             if ( regexGroups[oldIndex].state==EMBEDDED_CODE_BEGIN ) {
  805                 //do not handle a nested section if the syntax is marked as "sealed"
  806                 if (embedLangDefPath.length()==0 || currentSyntax->allowsInnerSection(embedLangDefPath) ) {
  807                     embedLangDefPath = currentSyntax->getNewPath(regexGroups[oldIndex].name);
  808                     //remember position
  809                     syntaxChangeIndex = lineIndex+2;
  810                     syntaxChangeLineNo = lineNumber;
  811                 }
  812 
  813                 // repeat parsing of this line without nested state recognition to highlight opening delimiter in the host syntax
  814                 matchRegex(line, EMBEDDED_CODE_BEGIN);
  815                 lineIndex = oldIndex;
  816                 goto SKIP_EMBEDDED; // this is how it should be done
  817             }
  818 
  819             if ( regexGroups[oldIndex].state==IDENTIFIER_BEGIN || regexGroups[oldIndex].state==KEYWORD ) {
  820                 string reservedWord= ( currentSyntax->isIgnoreCase() ) ? StringTools::change_case ( token ) :token;
  821                 currentKeywordClass=currentSyntax->getKeywordListGroup ( reservedWord ); //check in lists (no regex)
  822 
  823                 if ( !currentKeywordClass && regexGroups[oldIndex].state==KEYWORD ){
  824                     currentKeywordClass = regexGroups[oldIndex].kwClass;
  825                 }
  826                 return validateState(( currentKeywordClass ) ? KEYWORD : STANDARD, oldState );
  827             } else {
  828                 return validateState(regexGroups[oldIndex].state, oldState);
  829             }
  830         }
  831     }
  832 
  833     // Character not referring to any state
  834     token = c;
  835     return STANDARD;
  836 }
  837 
  838 State CodeGenerator::validateState(State newState, State oldState)
  839 {
  840 
  841     if (currentSyntax->getValidateStateChangeFct()) {
  842         Diluculum::LuaValueList params;
  843         params.emplace_back(oldState);
  844         params.emplace_back(newState);
  845         params.emplace_back(token);
  846         params.emplace_back(getCurrentKeywordClassId() );
  847         params.emplace_back(lineNumber );
  848         params.emplace_back(lineIndex-(unsigned int)token.length() );
  849 
  850         Diluculum::LuaValueList res=
  851             currentSyntax->getLuaState()->call ( *currentSyntax->getValidateStateChangeFct(),
  852                     params,"getValidateStateChangeFct call")  ;
  853 
  854         resultOfHook = res.size()>=1;
  855         if (resultOfHook) {
  856 
  857             setOverrideParams();
  858 
  859             auto validatedState = (State)res[0].asInteger();
  860             if ( validatedState== _REJECT) {
  861 
  862                 // proceed using only the first character of the token
  863                 if (res.size()==1) {
  864                     lineIndex -= (token.length() -1);
  865                     token=token.substr(0, 1);
  866                 }
  867 
  868                 //experimental for slim.lang: evaluate second return arg after _REJECT
  869                 if (res.size()>=2) {
  870                     lineIndex -= (token.length() );
  871                     token.clear();
  872                     return (State)res[1].asInteger();
  873                 }
  874                 return oldState;
  875             }
  876 
  877             return validatedState;
  878         }
  879     }
  880     resultOfHook  = false;
  881 
  882     return newState;
  883 }
  884 
  885 unsigned int CodeGenerator::getCurrentKeywordClassId(){
  886     unsigned int kwClassId=0;
  887 
  888     // this vector contains the defined keyword classes, and currentKeywordClass is its index:
  889     vector<string> kwClasses=currentSyntax->getKeywordClasses();
  890 
  891     if (currentKeywordClass && currentKeywordClass<=kwClasses.size()) {
  892         string kwClassName=kwClasses[currentKeywordClass-1];
  893         if (kwClassName.size()==3)
  894             kwClassId = kwClassName[2] - 'a' + 1;
  895     }
  896     return kwClassId;
  897 }
  898 
  899 //it is faster to pass ostream reference
  900 void CodeGenerator::maskString ( ostream& ss, const string & s )
  901 {
  902     string escHoverText;
  903 
  904     if (lsEnableHoverRequests && (currentState==STANDARD || currentState==NUMBER || currentState==KEYWORD)) {
  905 
  906         string hoverText = LSPClient.runHover(lsDocumentPath, lineIndex - s.size(), lineNumber-1);
  907 
  908         for(const auto &c : hoverText)
  909         {
  910             if (isascii(c))
  911                 escHoverText.append(maskCharacter(c));
  912         }
  913     }
  914 
  915     if (escHoverText.size()) {
  916         ss << getHoverTagOpen(escHoverText);
  917     }
  918 
  919     for (const auto &c : s)
  920     {
  921         ss << maskCharacter ( c );
  922     }
  923 
  924     if (escHoverText.size()) {
  925         ss << getHoverTagClose();
  926     }
  927 
  928     // The test markers position should also be deternmined by calculating the code points
  929     if ( applySyntaxTestCase ) {
  930 
  931         PositionState ps(currentState, getCurrentKeywordClassId(), false);
  932 
  933         //TODO avoid repeated string comparison:
  934         int slen = encoding=="utf-8" ? StringTools::utf8_strlen(s) : s.length();
  935         for (int i=0; i< slen; i++ ) {
  936             stateTraceCurrent.push_back(ps);
  937         }
  938         if (stateTraceCurrent.size()>200)
  939             stateTraceCurrent.erase(stateTraceCurrent.begin(), stateTraceCurrent.begin() + 100 );
  940     }
  941 }
  942 
  943 void CodeGenerator::printSyntaxError ( ostream& ss ) {
  944     if ( !lsSyntaxErrorDesc.empty()) {
  945         ss << openTags[ highlight::SYNTAX_ERROR_MSG ];
  946 
  947         for(const auto &c : lsSyntaxErrorDesc)
  948         {
  949             ss << maskCharacter ( c );
  950         }
  951 
  952         ss << closeTags[ highlight::SYNTAX_ERROR_MSG ];
  953         lsSyntaxErrorDesc.clear();
  954     }
  955 }
  956 
  957 Diluculum::LuaValueList CodeGenerator::callDecorateFct(const string& token)
  958 {
  959 
  960     Diluculum::LuaValueList params;
  961     params.emplace_back(token);
  962     params.emplace_back(currentState);
  963     params.emplace_back(currentKeywordClass);
  964     params.emplace_back(lineContainedStmt);
  965     params.emplace_back(lineNumber );
  966     params.emplace_back(lineIndex-(unsigned int)token.length() );
  967 
  968     return currentSyntax->getLuaState()->call ( *currentSyntax->getDecorateFct(),
  969             params,"getDecorateFct call")  ;
  970 }
  971 
  972 void CodeGenerator::printMaskedToken (bool flushWhiteSpace, StringTools::KeywordCase tcase )
  973 {
  974     if ( flushWhiteSpace )
  975         flushWs(1);
  976     string caseToken = StringTools::change_case ( token, tcase );
  977     if (currentSyntax->getDecorateFct()) {
  978 
  979         Diluculum::LuaValueList res=callDecorateFct(caseToken);
  980         if (res.size()==1) {
  981             *out<<res[0].asString();
  982         } else {
  983             maskString ( *out, caseToken );
  984         }
  985     } else {
  986         maskString ( *out, caseToken );
  987     }
  988 
  989     // check this *after* the decorate call
  990     if (   currentState == STANDARD || currentState == KEYWORD || currentState == NUMBER
  991         || currentState == STRING || currentState == IDENTIFIER_BEGIN) {
  992         lineContainedStmt = true;
  993     }
  994     token.clear();
  995 }
  996 
  997 bool CodeGenerator::styleFound()
  998 {
  999     return docStyle.found();
 1000 }
 1001 
 1002 bool CodeGenerator::printIndexFile ( const vector<string> &fileList, const string &outPath )
 1003 {
 1004     return true;
 1005 }
 1006 
 1007 bool CodeGenerator::initIndentationScheme ( const string &indentScheme )
 1008 {
 1009 
 1010     if ( formatter!=nullptr ) {
 1011         return true;
 1012     }
 1013 
 1014     if ( !indentScheme.size() ) return false;
 1015 
 1016     formatter=new astyle::ASFormatter();
 1017 
 1018     if ( indentScheme=="allman" || indentScheme=="bsd" || indentScheme=="ansi" ) {
 1019         formatter->setFormattingStyle ( astyle::STYLE_ALLMAN );
 1020     } else if ( indentScheme=="kr"||indentScheme=="k&r"||indentScheme=="k/r" ) {
 1021         formatter->setFormattingStyle ( astyle::STYLE_KR );
 1022     } else if ( indentScheme=="java" ) {
 1023         formatter->setFormattingStyle ( astyle::STYLE_JAVA );
 1024     } else if ( indentScheme=="stroustrup" ) {
 1025         formatter->setFormattingStyle ( astyle::STYLE_STROUSTRUP );
 1026     } else if ( indentScheme=="whitesmith" ) {
 1027         formatter->setFormattingStyle ( astyle::STYLE_WHITESMITH );
 1028     } else if ( indentScheme=="banner" || indentScheme=="ratliff") {
 1029         formatter->setFormattingStyle ( astyle::STYLE_RATLIFF );
 1030     } else if ( indentScheme=="gnu" ) {
 1031         formatter->setFormattingStyle ( astyle::STYLE_GNU );
 1032     } else if ( indentScheme=="linux" ) {
 1033         formatter->setFormattingStyle ( astyle::STYLE_LINUX );
 1034     } else if ( indentScheme=="horstmann" ) {
 1035         formatter->setFormattingStyle ( astyle::STYLE_HORSTMANN );
 1036     } else if ( indentScheme=="otbs" ||  indentScheme=="1tbs") {
 1037         formatter->setFormattingStyle ( astyle::STYLE_1TBS );
 1038     } else if ( indentScheme=="google") {
 1039         formatter->setFormattingStyle ( astyle::STYLE_GOOGLE );
 1040     } else if ( indentScheme=="pico" ||  indentScheme=="a11") {
 1041         formatter->setFormattingStyle ( astyle::STYLE_PICO );
 1042     } else if ( indentScheme=="lisp" ||  indentScheme=="python"||  indentScheme=="a12") {
 1043         formatter->setFormattingStyle ( astyle::STYLE_LISP );
 1044     } else if ( indentScheme=="vtk") {
 1045         formatter->setFormattingStyle ( astyle::STYLE_VTK );
 1046     } else if ( indentScheme=="mozilla") {
 1047         formatter->setFormattingStyle ( astyle::STYLE_MOZILLA );
 1048     } else if ( indentScheme=="webkit") {
 1049         formatter->setFormattingStyle ( astyle::STYLE_WEBKIT );
 1050     } else if ( indentScheme!="user" ){
 1051         return false;
 1052     }
 1053     return formattingEnabled=true;
 1054 }
 1055 
 1056 LoadResult CodeGenerator::loadLanguage ( const string& langDefPath, bool embedded )
 1057 {
 1058 
 1059     if (!embedded) {
 1060         while (!nestedLangs.empty()) {
 1061             nestedLangs.pop();
 1062         }
 1063     }
 1064 
 1065     bool reloadNecessary= currentSyntax ? currentSyntax->needsReload ( langDefPath ): true;
 1066     LoadResult result=LOAD_OK;
 1067     if ( reloadNecessary ) {
 1068         if (syntaxReaders.count(langDefPath)) {
 1069             currentSyntax=syntaxReaders[langDefPath];
 1070             result=LOAD_OK;
 1071         } else {
 1072 
 1073             currentSyntax=new SyntaxReader();
 1074             result=currentSyntax->load(langDefPath, pluginParameter, outputType);
 1075             syntaxReaders[langDefPath]=currentSyntax;
 1076         }
 1077 
 1078         if ( result==LOAD_OK ) {
 1079             formattingPossible=currentSyntax->enableReformatting();
 1080             updateKeywordClasses();
 1081         }
 1082     }
 1083 
 1084     kwOffset=currentSyntax->getKeywordCount() - docStyle.getKeywordStyleCount();
 1085 
 1086     return result;
 1087 }
 1088 
 1089 bool CodeGenerator::validateInputStream()
 1090 {
 1091     if ( !in ) return false;
 1092 
 1093     // it is not possible to move stream pointer back with stdin
 1094     if ( ( int ) in->tellg() == -1 ) // -1 : stdin
 1095         return true;
 1096 
 1097     // Sources: http://en.wikipedia.org/wiki/Magic_number_(programming)
 1098     // Magic configuration of "file"
 1099     // This is intended for web plugins - only check filetypes often found in the net
 1100     char magic_gif[]    = {'G','I','F','8', 0};
 1101     char magic_png[]    = {'\x89','P','N','G', 0};
 1102     char magic_java[]   = {'\xCA','\xFE','\xBA','\xBE', 0};
 1103     char magic_jpeg[]   = {'\xFF','\xD8','\xFF', 0};
 1104     char magic_bmp[]    = {'B','M', 0};
 1105     char magic_pdf[]    = {'%','P','D','F', 0};
 1106     char magic_utf8[]   = {'\xEF','\xBB','\xBF',0};
 1107     char magic_rar[]    = {'R','a','r','!', 0};
 1108     char magic_zip[]    = {'P','K','\x03','\x04', 0};
 1109     char magic_ace[]    = {'*','*','A','C','E','*','*', 0};
 1110     char magic_tgz[]    = {'\x8b','\x1f', '\x00', '\x08', 0};
 1111     char magic_bzip[]   = {'B','Z', 0};
 1112 
 1113     char* magic_table[] = {magic_utf8,
 1114                            magic_gif, magic_png, magic_jpeg, magic_bmp, magic_pdf,
 1115                            magic_java,
 1116                            magic_rar, magic_zip, magic_ace, magic_tgz, magic_bzip,
 1117                            nullptr
 1118                           };
 1119 
 1120     char buffer [10]= {0};
 1121     in->read ( buffer,8 );  //only read the first 8 bytes of input stream
 1122 
 1123     int magic_index=0;
 1124     while ( magic_table[magic_index] ) {
 1125         if ( !strncmp ( buffer, magic_table[magic_index], strlen ( magic_table[magic_index] ) ) ) {
 1126             break;
 1127         }
 1128         magic_index++;
 1129     }
 1130     int streamReadPos=0;
 1131     if ( magic_table[magic_index] == magic_utf8 ) {
 1132         //setEncoding("utf-8");
 1133         streamReadPos=3; // remove UTF-8 magic number from output
 1134     }
 1135 
 1136     in -> seekg ( streamReadPos, ios::beg );
 1137     in-> clear();  // clear fail bit to continue reading
 1138 
 1139     return !magic_table[magic_index] // points to 0 if no pattern was found
 1140            || magic_table[magic_index] == magic_utf8;
 1141 }
 1142 
 1143 void CodeGenerator::applyPluginChunk(const string& fctName, string *result, bool *keepDefault) {
 1144 
 1145     if (currentSyntax && pluginChunks.size()) {
 1146 
 1147         Diluculum::LuaState luaState;
 1148 
 1149         Diluculum::LuaValueList chunkParams;
 1150         chunkParams.emplace_back(currentSyntax->getDescription());
 1151         for (unsigned int i=0; i<pluginChunks.size(); i++) {
 1152             luaState.call(*pluginChunks[i], chunkParams, "format user function");
 1153         }
 1154 
 1155         if (luaState.globals().count(fctName)) {
 1156             auto* documentFct=new Diluculum::LuaFunction(luaState[fctName].value().asFunction());
 1157 
 1158             luaState["HL_PLUGIN_PARAM"] = pluginParameter;
 1159             luaState["HL_OUTPUT"] = outputType;
 1160             luaState["HL_FORMAT_HTML"]=HTML;
 1161             luaState["HL_FORMAT_XHTML"]=XHTML;
 1162             luaState["HL_FORMAT_TEX"]=TEX;
 1163             luaState["HL_FORMAT_LATEX"]=LATEX;
 1164             luaState["HL_FORMAT_RTF"]=RTF;
 1165             luaState["HL_FORMAT_ANSI"]=ESC_ANSI;
 1166             luaState["HL_FORMAT_XTERM256"]=ESC_XTERM256;
 1167             luaState["HL_FORMAT_TRUECOLOR"]=ESC_TRUECOLOR;
 1168             luaState["HL_FORMAT_SVG"]=SVG;
 1169             luaState["HL_FORMAT_BBCODE"]=BBCODE;
 1170             luaState["HL_FORMAT_PANGO"]=PANGO;
 1171             luaState["HL_FORMAT_ODT"]=ODTFLAT;
 1172 
 1173             Diluculum::LuaValueList params;
 1174             Diluculum::LuaValueMap options;
 1175             options[Diluculum::LuaValue("title")] =  Diluculum::LuaValue( docTitle );
 1176             options[Diluculum::LuaValue("encoding")] =  Diluculum::LuaValue(encoding);
 1177             options[Diluculum::LuaValue("fragment")] =  Diluculum::LuaValue(fragmentOutput);
 1178             options[Diluculum::LuaValue("font")] =  Diluculum::LuaValue(getBaseFont());
 1179             options[Diluculum::LuaValue("fontsize")] =  Diluculum::LuaValue(getBaseFontSize());
 1180 
 1181             params.emplace_back(inputFilesCnt);
 1182             params.emplace_back(processedFilesCnt);
 1183             params.emplace_back(options);
 1184 
 1185             Diluculum::LuaValueList res=luaState.call ( *documentFct, params, fctName+" call");
 1186             if (res.size()>=1) {
 1187                 *keepDefault=false;
 1188                 *result = res[0].asString();
 1189                 if (res.size()==2)
 1190                     *keepDefault = res[1].asBoolean();
 1191             }
 1192             delete documentFct;
 1193         }
 1194     }
 1195 }
 1196 
 1197 void CodeGenerator::printHeader()
 1198 {
 1199     bool keepDefaultHeader=true;
 1200     string pluginHeader;
 1201 
 1202     processedFilesCnt++;
 1203 
 1204     applyPluginChunk("DocumentHeader", &pluginHeader, &keepDefaultHeader);
 1205 
 1206     if ( ! fragmentOutput && keepDefaultHeader)
 1207         *out << getHeader();
 1208 
 1209     *out << pluginHeader;
 1210 
 1211     if ( !fragmentOutput || keepInjections)
 1212         *out << currentSyntax->getHeaderInjection();
 1213 }
 1214 
 1215 void CodeGenerator::printFooter()
 1216 {
 1217 
 1218     bool keepDefaultFooter=true;
 1219     string pluginFooter;
 1220 
 1221     applyPluginChunk("DocumentFooter", &pluginFooter, &keepDefaultFooter);
 1222 
 1223     if ( !fragmentOutput || keepInjections)
 1224         *out << currentSyntax->getFooterInjection();
 1225 
 1226     *out << pluginFooter;
 1227 
 1228     if ( ! fragmentOutput && keepDefaultFooter )
 1229         *out << getFooter();
 1230 }
 1231 
 1232 ParseError CodeGenerator::generateFile ( const string &inFileName,
 1233         const string &outFileName )
 1234 {
 1235     if ( !docStyle.found() ) {
 1236         return BAD_STYLE;
 1237     }
 1238 
 1239     reset();
 1240 
 1241     ParseError error=PARSE_OK;
 1242 
 1243     inFile=inFileName;
 1244     outFile=outFileName;
 1245 
 1246     in = ( inFileName.empty() ? &cin :new ifstream ( inFileName.c_str() ) );
 1247 
 1248     if ( validateInput )
 1249         if ( !validateInputStream() ) error= BAD_INPUT;
 1250 
 1251     if ( !in->fail() && error==PARSE_OK ) {
 1252         out = ( outFileName.empty() ? &cout :new ofstream ( outFileName.c_str() ) );
 1253         if ( out->fail() ) {
 1254             error=BAD_OUTPUT;
 1255         }
 1256     }
 1257 
 1258     if ( in->fail() ) {
 1259         error=BAD_INPUT;
 1260     }
 1261 
 1262     if ( error==PARSE_OK ) {
 1263         initASStream();
 1264         currentSyntax->setInputFileName(inFile);
 1265         printHeader();
 1266         printBody();
 1267         printFooter();
 1268     }
 1269 
 1270     if ( !outFileName.empty() ) {
 1271         delete out;
 1272         out=nullptr;
 1273     }
 1274     if ( !inFileName.empty() ) {
 1275         delete in;
 1276         in=nullptr;
 1277     }
 1278     return error;
 1279 }
 1280 
 1281 string CodeGenerator::generateString ( const string &input )
 1282 {
 1283 
 1284     if ( !docStyle.found() ) {
 1285         return "";
 1286     }
 1287 
 1288     reset();
 1289 
 1290     in = new istringstream ( input );
 1291     out = new ostringstream ();
 1292 
 1293     if ( in->fail() || out->fail() ) {
 1294         return "";
 1295     }
 1296 
 1297     initASStream();
 1298 
 1299     printHeader();
 1300     printBody();
 1301     printFooter();
 1302 
 1303     string result = static_cast<ostringstream*> ( out )->str();
 1304 
 1305     delete out;
 1306     out=nullptr;
 1307     delete in;
 1308     in=nullptr;
 1309 
 1310     return result;
 1311 }
 1312 
 1313 void CodeGenerator::initASStream() {
 1314     if ( formatter != nullptr ) {
 1315         if (streamIterator) delete streamIterator;
 1316         streamIterator =  new astyle::ASStreamIterator ( in, extraEOFChar );
 1317         formatter->init ( streamIterator );
 1318         string desc = currentSyntax->getDescription();
 1319         if (desc=="C#") {
 1320             formatter->setSharpStyle();
 1321         } else if (desc=="Java") {
 1322             formatter->setJavaStyle();
 1323         } else if (desc=="Javascript") {
 1324             formatter->setJSStyle();
 1325         } else if (desc=="Objective C") {
 1326             formatter->setObjCStyle();
 1327         } else {
 1328             formatter->setCStyle();
 1329         }
 1330 
 1331     }
 1332 }
 1333 
 1334 string CodeGenerator::generateStringFromFile ( const string &inFileName )
 1335 {
 1336 
 1337     if ( !docStyle.found() ) {
 1338         return "";
 1339     }
 1340 
 1341     reset();
 1342 
 1343     inFile = inFileName;
 1344 
 1345     in = new ifstream ( inFileName.c_str() );
 1346     out = new ostringstream ();
 1347 
 1348     if ( in->fail() || out->fail() ) {
 1349         return "";
 1350     }
 1351 
 1352     if ( validateInput && !validateInputStream() ) {
 1353         return "ERROR: detected binary input";
 1354     }
 1355 
 1356     initASStream();
 1357 
 1358     currentSyntax->setInputFileName(inFile);
 1359 
 1360     printHeader();
 1361     printBody();
 1362     printFooter();
 1363 
 1364     string result = static_cast<ostringstream*> ( out )->str();
 1365 
 1366     delete out;
 1367     out=nullptr;
 1368     delete in;
 1369     in=nullptr;
 1370 
 1371     return result;
 1372 }
 1373 
 1374 unsigned int CodeGenerator::getStyleID ( State s, unsigned int kwClassID )
 1375 {
 1376     if ( s==KEYWORD && kwClassID ) {
 1377         return NUMBER_BUILTIN_STATES + kwClassID-1;
 1378     }
 1379     return ( unsigned int ) s ;
 1380 }
 1381 
 1382 void CodeGenerator::openTag ( State s )
 1383 {
 1384     *out << openTags[ ( unsigned int ) s];
 1385     currentState=s;
 1386 }
 1387 
 1388 void CodeGenerator::closeTag ( State s )
 1389 {
 1390     *out << closeTags[ ( unsigned int ) s];
 1391     flushWs(2);
 1392     currentState=_UNKNOWN;
 1393 }
 1394 
 1395 void CodeGenerator::openKWTag ( unsigned int kwClassID )
 1396 {
 1397     *out << openTags.at(getStyleID ( KEYWORD, kwClassID ) );
 1398     currentState=KEYWORD;
 1399 }
 1400 
 1401 void CodeGenerator::closeKWTag ( unsigned int kwClassID )
 1402 {
 1403     *out << closeTags.at(getStyleID ( KEYWORD, kwClassID ) );
 1404     flushWs(3);
 1405     currentState=_UNKNOWN;
 1406 }
 1407 
 1408 bool CodeGenerator::loadEmbeddedLang(const string&embedLangDefPath)
 1409 {
 1410     if (nestedLangs.empty()) {
 1411         nestedLangs.push(currentSyntax->getCurrentPath() );
 1412     }
 1413     if (nestedLangs.top() != embedLangDefPath) {
 1414         nestedLangs.push(embedLangDefPath);
 1415     }
 1416     LoadResult res = loadLanguage(embedLangDefPath, true);
 1417     //pass end delimiter regex to syntax description
 1418     currentSyntax->restoreLangEndDelim(embedLangDefPath);
 1419     return res == LOAD_OK;
 1420 }
 1421 
 1422 ///////////////////////////////////////////////////////////////////////////////
 1423 
 1424 void CodeGenerator::processRootState()
 1425 {
 1426     bool eof=false,
 1427          firstLine=true; // avoid newline before printing the first output line
 1428 
 1429     applySyntaxTestCase = inFile.find("syntax_test_")!=string::npos;
 1430 
 1431     if ( currentSyntax->highlightingDisabled() ) {
 1432         string line;
 1433         while ( getline ( *in, line ) && lineNumber < maxLineCnt ) {
 1434             ++lineNumber;
 1435             insertLineNumber ( !firstLine );
 1436             flushWs(4);
 1437             firstLine=false;
 1438             if (lineNumber>=startLineCntCurFile && lineNumber <=maxLineCnt)
 1439                 maskString ( *out, line );
 1440         }
 1441         *out << flush;
 1442         return;
 1443     }
 1444 
 1445     State state=STANDARD;
 1446     openTag ( STANDARD );
 1447 
 1448     do {
 1449         // determine next state
 1450         state= getCurrentState(STANDARD);
 1451 
 1452         // handle current state
 1453         switch ( state ) {
 1454         case KEYWORD:
 1455             closeTag ( STANDARD );
 1456             eof=processKeywordState ( state );
 1457             openTag ( STANDARD );
 1458             break;
 1459         case NUMBER:
 1460             closeTag ( STANDARD );
 1461             eof=processNumberState();
 1462             openTag ( STANDARD );
 1463             break;
 1464         case ML_COMMENT:
 1465             closeTag ( STANDARD );
 1466             eof=processMultiLineCommentState();
 1467             openTag ( STANDARD );
 1468             break;
 1469         case SL_COMMENT:
 1470             closeTag ( STANDARD );
 1471             eof=processSingleLineCommentState();
 1472             openTag ( STANDARD );
 1473             break;
 1474         case STRING:
 1475             closeTag ( STANDARD );
 1476             eof=processStringState ( STANDARD );
 1477             openTag ( STANDARD );
 1478             break;
 1479         case DIRECTIVE:
 1480             closeTag ( STANDARD );
 1481             eof=processDirectiveState();
 1482             openTag ( STANDARD );
 1483             break;
 1484         case ESC_CHAR:
 1485             closeTag ( STANDARD );
 1486             eof=processEscapeCharState();
 1487             openTag ( STANDARD );
 1488             break;
 1489         case SYMBOL:
 1490             closeTag ( STANDARD );
 1491             eof=processSymbolState();
 1492             openTag ( STANDARD );
 1493             break;
 1494         case EMBEDDED_CODE_END:
 1495             closeTag ( STANDARD );
 1496             eof=processSyntaxChangeState(state);
 1497             openTag ( STANDARD );
 1498             break;
 1499         case SYNTAX_ERROR:
 1500             closeTag ( STANDARD );
 1501             eof=processSyntaxErrorState();
 1502             openTag ( STANDARD );
 1503             break;
 1504 
 1505         case _EOL:
 1506             // XTERM256 fix (issue with less cmd)
 1507             if  (!firstLine || showLineNumbers) {
 1508                 closeTag ( STANDARD );
 1509             }
 1510             insertLineNumber(!firstLine);
 1511             if (!firstLine || showLineNumbers) {
 1512                 flushWs(5);
 1513                 stateTraceCurrent.clear();
 1514                 openTag ( STANDARD );
 1515             }
 1516             firstLine=false;
 1517             break;
 1518         case _EOF:
 1519             eof=true;
 1520             break;
 1521         case _WS:
 1522             processWsState();
 1523             break;
 1524         default:
 1525             printMaskedToken();
 1526             break;
 1527         }
 1528     } while ( !eof );
 1529 
 1530     if (token.size() || lineNumber>1 || (outputType!=ESC_TRUECOLOR && outputType!=ESC_XTERM256))
 1531         closeTag ( STANDARD );
 1532 
 1533     if (currentSyntax->getDecorateLineEndFct()) {
 1534         Diluculum::LuaValueList res=callDecorateLineFct(false);
 1535         if (res.size()==1) {
 1536             *out << res[0].asString();
 1537         }
 1538     }
 1539 
 1540     printNewLines = noTrailingNewLine==0 || ( noTrailingNewLine==2 && ( token.size() || lineNumber>1) );
 1541     *out << getNewLine();
 1542     *out << flush;
 1543 }
 1544 
 1545 bool CodeGenerator::processSyntaxChangeState(State myState)
 1546 {
 1547     State newState=STANDARD;
 1548     bool eof=false,
 1549          exitState=false;
 1550 
 1551     openTag ( KEYWORD );
 1552     do {
 1553 
 1554         if (myState==EMBEDDED_CODE_END) {
 1555             if (!nestedLangs.empty()) {
 1556                 nestedLangs.pop();
 1557             }
 1558             // load host language syntax
 1559             if (!nestedLangs.empty()) {
 1560                 loadLanguage(nestedLangs.top(), true);
 1561             }
 1562             matchRegex(line, EMBEDDED_CODE_BEGIN); // match remaining line using the host syntax
 1563         }
 1564 
 1565         printMaskedToken ( newState!=_WS );
 1566 
 1567         newState= getCurrentState(myState);
 1568 
 1569         switch ( newState ) {
 1570         case _WS:
 1571             processWsState();
 1572             break;
 1573         case _EOL:
 1574             insertLineNumber();
 1575             exitState=true;
 1576             break;
 1577         case _EOF:
 1578             eof = true;
 1579             break;
 1580         default:
 1581             exitState=true;
 1582             break;
 1583         }
 1584     } while (  !exitState  &&  !eof );
 1585     closeTag ( KEYWORD );
 1586 
 1587     return eof;
 1588 }
 1589 
 1590 
 1591 bool CodeGenerator::processKeywordState ( State myState )
 1592 {
 1593     State newState=STANDARD;
 1594     unsigned int myClassID=currentKeywordClass;
 1595     bool eof=false,
 1596          exitState=false;
 1597 
 1598     openKWTag ( myClassID );
 1599     do {
 1600         printMaskedToken ( newState!=_WS,
 1601                            ( currentSyntax->isIgnoreCase() ) ? keywordCase : StringTools::CASE_UNCHANGED );
 1602         newState= getCurrentState(myState);
 1603         switch ( newState ) {
 1604         case _WS:
 1605             processWsState();
 1606             exitState=isolateTags;
 1607             break;
 1608         case _EOL:
 1609             insertLineNumber();
 1610             exitState=true;
 1611 
 1612             break;
 1613         case _EOF:
 1614             eof = true;
 1615             break;
 1616         case KEYWORD_END:
 1617             exitState=true;
 1618             break;
 1619         default:
 1620             exitState= ( myClassID!=currentKeywordClass ) || ( myState!=newState );
 1621             break;
 1622         }
 1623     } while ( !exitState  &&  !eof );
 1624 
 1625     closeKWTag ( myClassID );
 1626 
 1627     currentKeywordClass=0;
 1628     return eof;
 1629 }
 1630 
 1631 bool CodeGenerator::processNumberState()
 1632 {
 1633     State newState=STANDARD;
 1634     bool eof=false,
 1635          exitState=false;
 1636     openTag ( NUMBER );
 1637     do {
 1638         printMaskedToken ( newState!=_WS );
 1639         newState= getCurrentState(NUMBER);
 1640         switch ( newState ) {
 1641         case _WS:
 1642             processWsState();
 1643             exitState=isolateTags;
 1644             break;
 1645         case _EOL:
 1646             insertLineNumber();
 1647             exitState=true;
 1648             break;
 1649         case _EOF:
 1650             eof = true;
 1651             break;
 1652         default:
 1653             exitState=newState!=NUMBER;
 1654             break;
 1655         }
 1656     } while ( !exitState && !eof );
 1657 
 1658     closeTag ( NUMBER );
 1659     return eof;
 1660 }
 1661 
 1662 
 1663 bool CodeGenerator::processMultiLineCommentState()
 1664 {
 1665     int commentCount=1;
 1666     int openDelimID=currentSyntax->getOpenDelimiterID ( token, ML_COMMENT);
 1667     State newState=STANDARD;
 1668     bool eof=false, exitState=false, containedTestCase=false;
 1669     unsigned int startColumn=lineIndex - token.size() ;
 1670     openTag ( ML_COMMENT );
 1671     do {
 1672         printMaskedToken (newState!=_WS );
 1673         newState= getCurrentState(ML_COMMENT);
 1674 
 1675         switch ( newState ) {
 1676         case _WS:
 1677             processWsState();
 1678             break;
 1679         case _EOL:
 1680             wsBuffer += closeTags[ML_COMMENT];
 1681             insertLineNumber();
 1682             wsBuffer += openTags[ML_COMMENT];
 1683             startColumn=0;
 1684             break;
 1685         case _EOF:
 1686             eof = true;
 1687             break;
 1688         case _TESTPOS:
 1689             runSyntaxTestcases(token=="<" ? startColumn : lineIndex - 1 );
 1690             printMaskedToken();
 1691             containedTestCase=true;
 1692             break;
 1693         case ML_COMMENT:
 1694 
 1695             if ( currentSyntax->allowNestedMLComments() ) {
 1696                 ++commentCount;
 1697             }
 1698             // if delimiters are equal, close the comment by continuing to
 1699             // ML_COMMENT_END section
 1700             if (currentSyntax->delimiterIsDistinct(currentSyntax->getOpenDelimiterID ( token, ML_COMMENT  ))) break;
 1701 
 1702         case ML_COMMENT_END:
 1703 
 1704             if (!currentSyntax->matchesOpenDelimiter (token,  ML_COMMENT_END, openDelimID)) {
 1705                 break;
 1706             }
 1707             commentCount--;
 1708             if ( !commentCount ) {
 1709                 printMaskedToken();
 1710                 exitState=true;
 1711             }
 1712             break;
 1713         default:
 1714             break;
 1715         }
 1716     } while ( !exitState  &&  !eof );
 1717 
 1718     closeTag ( ML_COMMENT );
 1719 
 1720     if (containedTestCase){
 1721         stateTraceCurrent.clear();
 1722     }
 1723     return eof;
 1724 }
 1725 
 1726 
 1727 bool CodeGenerator::processSingleLineCommentState()
 1728 {
 1729     State newState=STANDARD;
 1730     bool eof=false, exitState=false, containedTestCase=false;
 1731     unsigned int startColumn = lineIndex - token.size() ;
 1732 
 1733     openTag ( SL_COMMENT );
 1734     do {
 1735         printMaskedToken ( newState!=_WS );
 1736         newState= getCurrentState(SL_COMMENT);
 1737 
 1738         switch ( newState ) {
 1739         case _WS:
 1740             processWsState();
 1741             break;
 1742         case _EOL:
 1743             printMaskedToken();
 1744             if ( preFormatter.isEnabled() && preFormatter.isWrappedLine ( lineNumber-1 ) ) {
 1745                 exitState=false;
 1746             } else {
 1747                 exitState=true;
 1748             }
 1749             if ( !exitState ) wsBuffer += closeTags[SL_COMMENT];
 1750             insertLineNumber();
 1751             if ( !exitState ) wsBuffer += openTags[SL_COMMENT];
 1752 
 1753             break;
 1754         case _EOF:
 1755             eof = true;
 1756             break;
 1757         case _TESTPOS:
 1758             runSyntaxTestcases(token=="<" ? startColumn : lineIndex - 1 );
 1759             printMaskedToken();
 1760             containedTestCase=true;
 1761             break;
 1762 
 1763         default:
 1764             break;
 1765         }
 1766     } while ( !exitState  &&  !eof );
 1767 
 1768     closeTag ( SL_COMMENT );
 1769 
 1770     if (containedTestCase) {
 1771         stateTraceCurrent.clear();
 1772     }
 1773 
 1774     return eof;
 1775 }
 1776 
 1777 bool CodeGenerator::processDirectiveState()
 1778 {
 1779     State  newState=STANDARD;
 1780     bool eof=false, exitState=false;
 1781 
 1782     openTag ( DIRECTIVE );
 1783     do {
 1784         printMaskedToken ( newState!=_WS );
 1785         newState= getCurrentState(DIRECTIVE);
 1786         switch ( newState ) {
 1787         case _WS:
 1788             processWsState();
 1789             break;
 1790         case DIRECTIVE_END:
 1791             printMaskedToken();
 1792             exitState=true;
 1793             break;
 1794         case _EOL:
 1795             printMaskedToken();
 1796 
 1797             if ( preFormatter.isEnabled() && preFormatter.isWrappedLine ( lineNumber-1 ) ) {
 1798                 exitState=false;
 1799             } else {
 1800                 if (currentSyntax->getContinuationChar()!=0x13){
 1801                     exitState= ( terminatingChar!=currentSyntax->getContinuationChar() );
 1802                 }
 1803             }
 1804             if ( !exitState ) wsBuffer += closeTags[DIRECTIVE];
 1805             insertLineNumber();
 1806             if ( !exitState ) wsBuffer += openTags[DIRECTIVE];
 1807             break;
 1808         case ML_COMMENT:
 1809             closeTag ( DIRECTIVE );
 1810             eof= processMultiLineCommentState();
 1811             openTag ( DIRECTIVE );
 1812             break;
 1813         case SL_COMMENT:
 1814             closeTag ( DIRECTIVE );
 1815             eof= processSingleLineCommentState();
 1816             openTag ( DIRECTIVE );
 1817             exitState=true;
 1818             break;
 1819         case STRING:
 1820             closeTag ( DIRECTIVE );
 1821             eof=processStringState ( DIRECTIVE );
 1822             openTag ( DIRECTIVE );
 1823             break;
 1824         case _EOF:
 1825             eof = true;
 1826             break;
 1827         default:
 1828             break;
 1829         }
 1830     } while ( !exitState && !eof );
 1831 
 1832     closeTag ( DIRECTIVE );
 1833     return eof;
 1834 }
 1835 
 1836 bool CodeGenerator::processStringState ( State oldState )
 1837 {
 1838     State newState=STANDARD;
 1839     bool eof=false, exitState=false;
 1840     bool returnedFromOtherState=false;
 1841 
 1842     State myState= ( oldState==DIRECTIVE ) ? DIRECTIVE_STRING : STRING;
 1843 
 1844     int openDelimID=currentSyntax->getOpenDelimiterID ( token, myState);
 1845     string openDelim=token;
 1846 
 1847     //Raw String by definition:
 1848     bool isRawString=currentSyntax->delimiterIsRawString(openDelimID) || toggleDynRawString;
 1849 
 1850     // Test if character before string open delimiter token equals to the
 1851     // raw string prefix (Example: r" ", r""" """ in Python)
 1852 
 1853     //Raw String Prefix:
 1854     if ( lineIndex>token.length() &&line[lineIndex-token.length()-1]==currentSyntax->getRawStringPrefix() ) {
 1855         isRawString=true;
 1856     }
 1857 
 1858     openTag ( myState );
 1859     do {
 1860         // true if last token was an escape char
 1861         if ( !returnedFromOtherState ) {
 1862             printMaskedToken (newState!=_WS );
 1863         }
 1864         returnedFromOtherState=false;
 1865         newState= getCurrentState(myState);
 1866 
 1867         switch ( newState ) {
 1868         case _WS:
 1869             processWsState();
 1870             break;
 1871         case _EOL:
 1872             wsBuffer += closeTags[myState];
 1873             insertLineNumber();
 1874             wsBuffer += openTags[myState];
 1875             break;
 1876         case STRING_END:
 1877             if (resultOfHook || currentSyntax->matchesOpenDelimiter (token,  STRING_END, openDelimID)) {
 1878                 if (currentSyntax->assertDelimEqualLength()) {
 1879                     exitState= openDelim.length()==token.length();
 1880                 } else {
 1881                     exitState= true;
 1882                 }
 1883                 printMaskedToken();
 1884             }
 1885             break;
 1886         case STRING:
 1887             // if there exist multiple string delimiters, close string if
 1888             // current delimiter is equal to the opening delimiter
 1889             exitState=currentSyntax->delimiterIsDistinct(currentSyntax->getOpenDelimiterID ( token, STRING  )) && token==openDelim;
 1890             printMaskedToken();
 1891             break;
 1892         case ESC_CHAR:
 1893             if ( !isRawString ) {
 1894                 closeTag ( myState );
 1895                 eof=processEscapeCharState();
 1896                 openTag ( myState );
 1897                 returnedFromOtherState=true;
 1898             } else {
 1899                 // FIXME not a fix for Python r"""\"""
 1900                 exitState=token.size()>1 && token[1] == openDelim[0];
 1901                 printMaskedToken();
 1902             }
 1903             break;
 1904         case STRING_INTERPOLATION:
 1905             closeTag ( myState );
 1906             eof=processInterpolationState();
 1907             openTag ( myState );
 1908             returnedFromOtherState=true;
 1909             break;
 1910 
 1911         case _EOF:
 1912             eof = true;
 1913             break;
 1914         default:
 1915             printMaskedToken();
 1916             break;
 1917         }
 1918     } while ( !exitState && !eof );
 1919 
 1920     closeTag ( myState );
 1921 
 1922     toggleDynRawString = false;
 1923 
 1924     return eof;
 1925 }
 1926 
 1927 bool CodeGenerator::processSymbolState()
 1928 {
 1929     State newState=STANDARD;
 1930     bool eof=false,
 1931          exitState=false;
 1932 
 1933     openTag ( SYMBOL );
 1934     do {
 1935         printMaskedToken ( newState!=_WS );
 1936         newState= getCurrentState(SYMBOL);
 1937         switch ( newState ) {
 1938         case _WS:
 1939             processWsState();
 1940             exitState=isolateTags;
 1941             break;
 1942         case _EOL:
 1943             insertLineNumber();
 1944             exitState=true;
 1945             break;
 1946         case _EOF:
 1947             eof = true;
 1948             break;
 1949         default:
 1950             exitState=newState!=SYMBOL;
 1951             break;
 1952         }
 1953     } while ( !exitState && !eof );
 1954 
 1955     closeTag ( SYMBOL );
 1956     return eof;
 1957 }
 1958 
 1959 bool CodeGenerator::processSyntaxErrorState()
 1960 {
 1961     State newState=STANDARD;
 1962     bool eof=false,
 1963     exitState=false;
 1964 
 1965     openTag ( SYNTAX_ERROR );
 1966     do {
 1967         printMaskedToken ( newState!=_WS );
 1968         newState= getCurrentState(SYNTAX_ERROR);
 1969         switch ( newState ) {
 1970             case _WS:
 1971                 processWsState();
 1972                 exitState=isolateTags;
 1973                 break;
 1974             case _EOL:
 1975                 insertLineNumber();
 1976                 exitState=true;
 1977                 break;
 1978             case _EOF:
 1979                 eof = true;
 1980                 break;
 1981             default:
 1982                 exitState=newState!=SYMBOL;
 1983                 break;
 1984         }
 1985     } while ( !exitState && !eof );
 1986 
 1987     closeTag ( SYNTAX_ERROR );
 1988     return eof;
 1989 }
 1990 
 1991 bool CodeGenerator::processEscapeCharState()
 1992 {
 1993     State newState=STANDARD;
 1994     bool eof=false, exitState=false;
 1995     openTag ( ESC_CHAR );
 1996     do {
 1997         printMaskedToken (newState!=_WS );
 1998         newState= getCurrentState(ESC_CHAR);
 1999         switch ( newState ) {
 2000         case _EOL:
 2001             insertLineNumber();
 2002             exitState=true;
 2003             break;
 2004         case _WS:
 2005             processWsState();
 2006             exitState=isolateTags;
 2007             break;
 2008         case _EOF:
 2009             eof = true;
 2010             break;
 2011         default:
 2012             exitState=newState!=ESC_CHAR;
 2013             break;
 2014         }
 2015     } while ( !exitState && !eof );
 2016 
 2017     closeTag ( ESC_CHAR );
 2018     return eof;
 2019 }
 2020 
 2021 bool CodeGenerator::processInterpolationState()
 2022 {
 2023     State newState=STANDARD;
 2024     bool eof=false, exitState=false;
 2025     openTag ( STRING_INTERPOLATION );
 2026     do {
 2027         printMaskedToken (newState!=_WS );
 2028         newState= getCurrentState(STRING_INTERPOLATION);
 2029         switch ( newState ) {
 2030         case _EOL:
 2031             insertLineNumber();
 2032             exitState=true;
 2033             break;
 2034         case _WS:
 2035             processWsState();
 2036             exitState=isolateTags;
 2037             break;
 2038         case _EOF:
 2039             eof = true;
 2040             break;
 2041         default:
 2042             exitState=newState!=STRING_INTERPOLATION;
 2043             break;
 2044         }
 2045     } while ( !exitState && !eof );
 2046 
 2047     closeTag ( STRING_INTERPOLATION );
 2048     return eof;
 2049 }
 2050 
 2051 void CodeGenerator::processWsState()
 2052 {
 2053 
 2054     if ( !maskWs ) {
 2055         wsBuffer += token;
 2056         token.clear();
 2057         return;
 2058     }
 2059 
 2060     flushWs(6);
 2061 
 2062     int cntWs=0;
 2063     lineIndex--;
 2064     PositionState ps(currentState, 0, true);
 2065 
 2066     while ( line[lineIndex]==' ' || line[lineIndex]=='\t' ) {
 2067         ++cntWs;
 2068         ++lineIndex;
 2069     }
 2070 
 2071     if ( cntWs>1 ) {
 2072 
 2073         unsigned int styleID=getStyleID ( currentState, currentKeywordClass );
 2074         if ( excludeWs && styleID!=_UNKNOWN ) {
 2075             *out << closeTags[styleID];
 2076         }
 2077 
 2078         *out << maskWsBegin;
 2079         for ( int i=0; i<cntWs; i++ ) {
 2080             *out << spacer;
 2081             if (applySyntaxTestCase){
 2082                 stateTraceCurrent.push_back(ps);
 2083             }
 2084         }
 2085         *out << maskWsEnd;
 2086         if ( excludeWs && styleID!=_UNKNOWN ) {
 2087             *out << openTags[styleID];
 2088         }
 2089     } else {
 2090 
 2091         *out << spacer; //Bugfix fehlender Space nach Strings
 2092         if (applySyntaxTestCase){
 2093             stateTraceCurrent.push_back(ps);
 2094         }
 2095     }
 2096 
 2097     spacer = initialSpacer;
 2098 
 2099     token.clear();
 2100 }
 2101 
 2102 void CodeGenerator::flushWs(int arg)
 2103 {
 2104     PositionState ps(currentState, 0, true);
 2105     //workaround condition
 2106     for ( size_t i=0; i<wsBuffer.size() && ((arg > 3) || ( (arg<4) && lineIndex>1)) && applySyntaxTestCase ; i++ ) {
 2107         stateTraceCurrent.push_back(ps);
 2108         //std::cerr <<"\nflush >"<<wsBuffer<<"< arg:"<<arg;
 2109     }
 2110 
 2111     //fix canvas whitespace
 2112     if (wsBuffer.length() && (outputType==ESC_XTERM256 || outputType==ESC_TRUECOLOR) ){
 2113         *out<<maskWsBegin;
 2114     }
 2115 
 2116     *out << wsBuffer;
 2117     wsBuffer.clear();
 2118 }
 2119 
 2120 string CodeGenerator::getTestcaseName(State s, unsigned int kwClass) {
 2121     switch (s) {
 2122 
 2123         case STANDARD:
 2124             return STY_NAME_STD;
 2125         case STRING:
 2126             return STY_NAME_STR;
 2127         case NUMBER:
 2128             return STY_NAME_NUM;
 2129         case SL_COMMENT:
 2130             return STY_NAME_SLC;
 2131         case ML_COMMENT:
 2132             return STY_NAME_COM;
 2133         case ESC_CHAR:
 2134             return STY_NAME_ESC;
 2135         case DIRECTIVE:
 2136             return STY_NAME_DIR;
 2137         case DIRECTIVE_STRING:
 2138             return STY_NAME_DST;
 2139         case SYMBOL:
 2140             return STY_NAME_SYM;
 2141         case STRING_INTERPOLATION:
 2142             return STY_NAME_IPL;
 2143         case SYNTAX_ERROR:
 2144             return STY_NAME_ERR;
 2145         case _WS:
 2146             return "ws";
 2147         case KEYWORD: {
 2148 
 2149             if (!kwClass)
 2150                 return "ws";
 2151 
 2152             char kwName[20] = {0};
 2153             snprintf(kwName, sizeof(kwName), "keyword %c", ('a'+kwClass-1));
 2154 
 2155             return string(kwName);
 2156         }
 2157         default:
 2158             return "unknown_test";
 2159     }
 2160 }
 2161 
 2162 void CodeGenerator::printTrace(const string &s){
 2163     std::cout<<"\n curr "<<lineNumber<<" "<<s<<": ";
 2164     for (unsigned int i=0; i< stateTraceCurrent.size(); i++) {
 2165         std::cout<<" "<<stateTraceCurrent[i].state;
 2166     }
 2167     std::cout<<"\n test "<<lineNumber<<" "<<s<<": ";
 2168     for (unsigned int i=0; i< stateTraceTest.size(); i++) {
 2169         std::cout<<" "<<stateTraceTest[i].state;
 2170     }
 2171     std::cout<<"\n";
 2172 }
 2173 
 2174 //column: lineIndex (not a UTF-8 validated string position)
 2175 void CodeGenerator::runSyntaxTestcases(unsigned int column){
 2176 
 2177     if (encoding=="utf-8")
 2178         column = StringTools::utf8_strlen(line.substr(0, column));
 2179 
 2180     unsigned int assertGroup=0;
 2181     size_t typeDescPos=line.find_first_not_of("\t ^", lineIndex);
 2182     State assertState=_UNKNOWN;
 2183     bool negation=false;
 2184     bool testFailed=false;
 2185 
 2186     ostringstream errMsg;
 2187     string prefix;
 2188     //printTrace("trace 2");
 2189 
 2190     if (typeDescPos!=string::npos) {
 2191 
 2192         if (line[typeDescPos]=='~') {
 2193 
 2194             negation=true;
 2195             prefix="~";
 2196             ++typeDescPos;
 2197         }
 2198 
 2199         if (line.find(STY_NAME_NUM, typeDescPos)==typeDescPos)
 2200             assertState=NUMBER;
 2201         //TODO temp. fix to allow old and new string classes
 2202         else if (line.find(STY_NAME_STR, typeDescPos)==typeDescPos || line.find("str", typeDescPos)==typeDescPos)
 2203             assertState=STRING;
 2204         else if (line.find(STY_NAME_ESC, typeDescPos)==typeDescPos)
 2205             assertState=ESC_CHAR;
 2206         else if (line.find(STY_NAME_IPL, typeDescPos)==typeDescPos)
 2207             assertState=STRING_INTERPOLATION;
 2208         else if (line.find(STY_NAME_SYM, typeDescPos)==typeDescPos)
 2209             assertState=SYMBOL;
 2210         else if (line.find(STY_NAME_DIR, typeDescPos)==typeDescPos)
 2211             assertState=DIRECTIVE;
 2212         else if (line.find(STY_NAME_SLC, typeDescPos)==typeDescPos)
 2213             assertState=SL_COMMENT;
 2214         else if (line.find(STY_NAME_COM, typeDescPos)==typeDescPos)
 2215             assertState=ML_COMMENT;
 2216         else if (line.find("ws", typeDescPos)==typeDescPos)
 2217             assertState=_WS;
 2218         //TODO temp. fix to allow old and new default classes
 2219         else if (line.find(STY_NAME_STD, typeDescPos)==typeDescPos  || line.find("std", typeDescPos)==typeDescPos)
 2220             assertState=STANDARD;
 2221         else if (line.find(STY_NAME_DST, typeDescPos)==typeDescPos)
 2222             assertState=DIRECTIVE_STRING;
 2223 
 2224         else if (line.find("kw", typeDescPos)==typeDescPos || line.find("st", typeDescPos)==typeDescPos) {
 2225             assertState=KEYWORD;
 2226             if (isalpha(line[typeDescPos+2]))
 2227                 assertGroup=line[typeDescPos+2] - 'a' +1;
 2228         }
 2229 
 2230        if (   (assertState!=_WS && stateTraceTest[column].state != assertState && !stateTraceTest[column].isWhiteSpace )
 2231             || (assertState==_WS && !stateTraceTest[column].isWhiteSpace)
 2232             || assertGroup != stateTraceTest[column].kwClass) {
 2233 
 2234             testFailed=!negation;
 2235 
 2236         } else if (negation ) {
 2237 
 2238             //TODO Fix ~ws
 2239             if (assertState!=_WS  && !stateTraceTest[column].isWhiteSpace )
 2240                 testFailed=true;
 2241         }
 2242 
 2243         if (testFailed) {
 2244             errMsg << inFile << " line " << lineNumber << ", column "<< column
 2245                     << ": got " << getTestcaseName(stateTraceTest[column].state, stateTraceTest[column].kwClass)
 2246                     << " instead of " << prefix << getTestcaseName(assertState, assertGroup);
 2247 
 2248             failedPosTests.push_back(errMsg.str());
 2249         }
 2250 
 2251     }
 2252 
 2253     lineContainedTestCase=true;
 2254 }
 2255 
 2256 string CodeGenerator::getNewLine()
 2257 {
 2258     ostringstream ss;
 2259     printSyntaxError(ss);
 2260     if (printNewLines)
 2261         ss << newLineTag;
 2262     return ss.str();
 2263 }
 2264 
 2265 Diluculum::LuaValueList CodeGenerator::callDecorateLineFct(bool isLineStart)
 2266 {
 2267 
 2268     Diluculum::LuaValueList params;
 2269     params.emplace_back(lineNumber);
 2270 
 2271     return currentSyntax->getLuaState()->call ( isLineStart ?
 2272             *currentSyntax->getDecorateLineBeginFct(): *currentSyntax->getDecorateLineEndFct(),
 2273             params,"getDecorateLineFct call");
 2274 }
 2275 
 2276 void CodeGenerator::setOverrideParams() {
 2277     if (currentSyntax->requiresParamUpdate()) {
 2278         if ( currentSyntax->getOverrideConfigVal("state.string.raw")=="true"){
 2279             toggleDynRawString=true; // reset to false in string state fct
 2280         }
 2281         if ( currentSyntax->getOverrideConfigVal("format.maskws")=="true") {
 2282             maskWs=true;
 2283         }
 2284         if ( currentSyntax->getOverrideConfigVal("format.spacer").size()) {
 2285             spacer=currentSyntax->getOverrideConfigVal("format.spacer");
 2286         }
 2287     }
 2288 }
 2289 
 2290 void CodeGenerator::insertLineNumber ( bool insertNewLine )
 2291 {
 2292     if ( insertNewLine ) {
 2293         if (currentSyntax->getDecorateLineEndFct()) {
 2294             Diluculum::LuaValueList res=callDecorateLineFct(false);
 2295             if (res.size()==1) {
 2296                 setOverrideParams();
 2297                 wsBuffer +=res[0].asString();
 2298             }
 2299         }
 2300         wsBuffer += getNewLine();
 2301     }
 2302 
 2303     if (currentSyntax->getDecorateLineBeginFct()) {
 2304         Diluculum::LuaValueList res=callDecorateLineFct(true);
 2305         if (res.size()==1) {
 2306             setOverrideParams();
 2307             wsBuffer += res[0].asString();
 2308         }
 2309     }
 2310 
 2311     if ( showLineNumbers ) {
 2312         ostringstream os;
 2313         ostringstream numberPrefix;
 2314 
 2315         os << setw ( getLineNumberWidth() ) << right;
 2316         if( numberCurrentLine ) {
 2317             if ( lineNumberFillZeroes ) {
 2318                 os.fill ( '0' );
 2319             }
 2320             os << lineNumber+lineNumberOffset;
 2321         } else {
 2322             os << "";
 2323         }
 2324 
 2325         numberPrefix << openTags[LINENUMBER];
 2326         maskString ( numberPrefix, os.str() );
 2327 
 2328         //use initialSpacer here, spacer can be overridden by plug-in (format.spacer)
 2329         numberPrefix << initialSpacer << closeTags[LINENUMBER];
 2330         wsBuffer += numberPrefix.str();
 2331     }
 2332 }
 2333 
 2334 unsigned int CodeGenerator::getLineIndex()
 2335 {
 2336     return lineIndex;
 2337 }
 2338 unsigned int CodeGenerator::getLastLineLength()
 2339 {
 2340     return lastLineLength;
 2341 }
 2342 
 2343 bool CodeGenerator::requiresTwoPassParsing() const {
 2344     if (!currentSyntax) return false;
 2345     return currentSyntax->getPersistentSnippetsNum()>0;
 2346 }
 2347 
 2348 
 2349 bool CodeGenerator::printExternalStyle ( const string &outFile )
 2350 {
 2351     if ( !includeStyleDef ) {
 2352         ostream *cssOutFile = ( outFile.empty() ? &cout :new ofstream ( outFile.c_str() ) );
 2353         if ( !cssOutFile->fail() ) {
 2354             if (!omitVersionComment) {
 2355                 *cssOutFile << styleCommentOpen
 2356                             <<" Style definition file generated by highlight "
 2357                             << Info::getVersion() << ", " << Info::getWebsite()
 2358                             << " " << styleCommentClose << "\n";
 2359             }
 2360             *cssOutFile << getStyleDefinition()
 2361                         << "\n";
 2362             *cssOutFile << readUserStyleDef();
 2363             if ( !outFile.empty() ) delete cssOutFile;
 2364         } else {
 2365             return false;
 2366         }
 2367     }
 2368     return true;
 2369 }
 2370 
 2371 bool CodeGenerator::printPersistentState ( const string &outFile )
 2372 {
 2373     if (!currentSyntax) return false;
 2374 
 2375     ofstream pluginOutFile( outFile.c_str());
 2376     if ( !pluginOutFile.fail() ) {
 2377 
 2378         pluginOutFile   <<"Description=\"Plugin generated by highlight using the --two-pass option\"\n\n"
 2379                         <<"Categories = {\"two-pass\" }\n\n"
 2380                         <<"function syntaxUpdate(desc)\n\n";
 2381 
 2382         pluginOutFile << currentSyntax->getPersistentHookConditions();
 2383 
 2384         for (auto snippet: currentSyntax->getPersistentSnippets())
 2385         {
 2386             pluginOutFile << snippet <<"\n\n";
 2387         }
 2388 
 2389         pluginOutFile<<"end\n\n"
 2390                      <<"Plugins={\n"
 2391                      <<"  { Type=\"lang\", Chunk=syntaxUpdate }\n"
 2392                      <<"}\n";
 2393     } else {
 2394         return false;
 2395     }
 2396 
 2397     return true;
 2398 }
 2399 
 2400 string CodeGenerator::readUserStyleDef()
 2401 {
 2402     ostringstream ostr;
 2403     if ( !styleInputPath.empty() ) {
 2404         ifstream userStyleDef ( styleInputPath.c_str() );
 2405         if ( userStyleDef ) {
 2406             ostr << "\n" << styleCommentOpen
 2407                 << " Content of " << styleInputPath
 2408                 << ": " <<styleCommentClose << "\n";
 2409             string line;
 2410             while ( getline ( userStyleDef, line ) ) {
 2411                 ostr << line << "\n";
 2412             }
 2413             userStyleDef.close();
 2414         } else {
 2415             ostr << styleCommentOpen
 2416                 << " ERROR: Could not include " << styleInputPath
 2417                 << "." << styleCommentClose << "\n";
 2418         }
 2419     }
 2420 
 2421     string injections=docStyle.getInjections();
 2422     if (!injections.empty()) {
 2423         ostr    << "\n" << styleCommentOpen
 2424                 << " Plug-in theme injections: " <<styleCommentClose << "\n";
 2425         ostr << injections<<"\n";
 2426     }
 2427     return ostr.str();
 2428 }
 2429 
 2430 bool CodeGenerator::initPluginScript(const string& script)
 2431 {
 2432 
 2433     if (script.empty()) return true;
 2434 
 2435     try {
 2436 
 2437         userScriptError="";
 2438         Diluculum::LuaState ls;
 2439 
 2440         ls.doFile (script);
 2441         int listIdx=1;
 2442 
 2443         while (ls["Plugins"][listIdx].value() !=Diluculum::Nil) {
 2444 
 2445             // Theme plugins
 2446             if (ls["Plugins"][listIdx]["Type"].value().asString()=="theme") {
 2447                 if (ls["Plugins"][listIdx]["Chunk"].value().type()==LUA_TFUNCTION) {
 2448                     docStyle.addUserChunk(ls["Plugins"][listIdx]["Chunk"].value().asFunction());
 2449                 }
 2450             }
 2451             // Syntax plugins
 2452             else if (ls["Plugins"][listIdx]["Type"].value().asString()=="lang") {
 2453                 if (ls["Plugins"][listIdx]["Chunk"].value().type()==LUA_TFUNCTION) {
 2454                     currentSyntax->addUserChunk(ls["Plugins"][listIdx]["Chunk"].value().asFunction());
 2455                 }
 2456             }
 2457             // Format plugins
 2458             else if (ls["Plugins"][listIdx]["Type"].value().asString()=="format") {
 2459                 if (ls["Plugins"][listIdx]["Chunk"].value().type()==LUA_TFUNCTION) {
 2460                     addUserChunk(ls["Plugins"][listIdx]["Chunk"].value().asFunction());
 2461                 }
 2462             }
 2463 
 2464             listIdx++;
 2465         }
 2466     }  catch (Diluculum::LuaError &err) {
 2467         userScriptError=err.what();
 2468         return false;
 2469     }
 2470     return true;
 2471 }
 2472 
 2473 void CodeGenerator::resetSyntaxReaders() {
 2474     for ( auto it=syntaxReaders.begin(); it!=syntaxReaders.end(); it++ ) {
 2475         delete it->second;
 2476     }
 2477     currentSyntax=nullptr;
 2478     syntaxReaders.clear();
 2479 }
 2480 
 2481 bool CodeGenerator::syntaxRequiresTwoPassRun() {
 2482     if (!currentSyntax) return false;
 2483     return currentSyntax->requiresTwoPassRun();
 2484 }
 2485 
 2486 void CodeGenerator::clearPersistentSnippets(){
 2487     if (currentSyntax) {
 2488         currentSyntax->clearPersistentSnippets();
 2489     }
 2490 }
 2491 
 2492 void CodeGenerator::updateKeywordClasses(){
 2493 
 2494     if (openTags.size()) {
 2495         if ( openTags.size() >NUMBER_BUILTIN_STATES ) {
 2496             // remove dynamic keyword tag delimiters of the old language definition
 2497             auto keyStyleOpenBegin =
 2498             openTags.begin() + NUMBER_BUILTIN_STATES;
 2499             auto keyStyleCloseBegin =
 2500             closeTags.begin() + NUMBER_BUILTIN_STATES;
 2501             openTags.erase ( keyStyleOpenBegin, openTags.end() );
 2502             closeTags.erase ( keyStyleCloseBegin, closeTags.end() );
 2503         }
 2504         // add new keyword tag delimiters
 2505 
 2506         for ( unsigned int i=0; i< currentSyntax->getKeywordClasses().size(); i++ ) {
 2507             openTags.push_back ( getKeywordOpenTag ( i ) );
 2508             closeTags.push_back ( getKeywordCloseTag ( i ) );
 2509         }
 2510     }
 2511 }
 2512 
 2513 
 2514 }