"Fossies" - the Free Open Source Software Archive

Member "highlight-4.19/src/core/codegenerator.cpp" (18 Feb 2026, 71414 Bytes) of package /linux/www/highlight-4.19.tar.bz2:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "codegenerator.cpp" see the
Fossies "Dox" file reference documentation and the latest
Fossies "Diffs" side-by-side code changes report:
4.18_vs_4.19.
1 /***************************************************************************
2 codegenerator.cpp - description
3 -------------------
4 begin : Die Jul 9 2002
5 copyright : (C) 2002-2026 by Andre Simon
6 email : a.simon@mailbox.org
7 ***************************************************************************/
8
9
10 /*
11 This file is part of Highlight.
12
13 Highlight is free software: you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation, either version 3 of the License, or
16 (at your option) any later version.
17
18 Highlight is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with Highlight. If not, see <http://www.gnu.org/licenses/>.
25 */
26
27
28 #include <climits>
29 #include <memory>
30
31 #include <chrono>
32 #include <thread>
33
34
35 #include <boost/xpressive/xpressive_dynamic.hpp>
36
37 #include "codegenerator.h"
38
39 #include "htmlgenerator.h"
40 #include "xhtmlgenerator.h"
41 #include "rtfgenerator.h"
42 #include "latexgenerator.h"
43 #include "texgenerator.h"
44 #include "svggenerator.h"
45 #include "bbcodegenerator.h"
46 #include "pangogenerator.h"
47 #include "odtgenerator.h"
48 #include "astyle/astyle.h"
49
50 //#if !defined (QT)
51 #include "ansigenerator.h"
52 #include "xterm256generator.h"
53 //#endif
54
55 using std::cin;
56 using std::cout;
57 using std::ifstream;
58 using std::ios;
59 using std::istringstream;
60 using std::iterator;
61 using std::flush;
62 using std::make_pair;
63 using std::map;
64 using std::ofstream;
65 using std::ostream;
66 using std::ostringstream;
67 using std::right;
68 using std::setw;
69 using std::string;
70 using std::vector;
71
72 namespace highlight
73 {
74 const unsigned int CodeGenerator::NUMBER_BUILTIN_STATES = highlight::KEYWORD;
75
76 // must not start with kw, st, sm prefixes
77 const string CodeGenerator::STY_NAME_STD="def";
78 const string CodeGenerator::STY_NAME_STR="sng";
79 const string CodeGenerator::STY_NAME_NUM="num";
80 const string CodeGenerator::STY_NAME_SLC="slc";
81 const string CodeGenerator::STY_NAME_COM="com";
82 const string CodeGenerator::STY_NAME_ESC="esc";
83 const string CodeGenerator::STY_NAME_DIR="ppc"; //preprocessor
84 const string CodeGenerator::STY_NAME_DST="pps"; //preprocessor string
85 const string CodeGenerator::STY_NAME_LIN="lin";
86 const string CodeGenerator::STY_NAME_SYM="opt"; //operator
87 const string CodeGenerator::STY_NAME_IPL="ipl"; //interpolation
88
89 const string CodeGenerator::STY_NAME_HVR="hvr";
90 const string CodeGenerator::STY_NAME_ERR="err";
91 const string CodeGenerator::STY_NAME_ERM="erm";
92
93 vector<Diluculum::LuaFunction*> CodeGenerator::pluginChunks;
94
95
96 CodeGenerator * CodeGenerator::getInstance ( OutputType type )
97 {
98 CodeGenerator* generator=nullptr;
99 switch ( type ) {
100 case HTML:
101 generator = new HtmlGenerator();
102 break;
103 case XHTML:
104 generator = new XHtmlGenerator();
105 break;
106 case TEX:
107 generator = new TexGenerator ();
108 break;
109 case LATEX:
110 generator = new LatexGenerator();
111 break;
112 case RTF:
113 generator = new RtfGenerator ();
114 break;
115 case SVG:
116 generator = new SVGGenerator();
117 break;
118 case BBCODE:
119 generator = new BBCodeGenerator();
120 break;
121 case PANGO:
122 generator = new PangoGenerator();
123 break;
124 case ODTFLAT:
125 generator = new ODTGenerator();
126 break;
127 case ESC_ANSI:
128 generator = new AnsiGenerator();
129 break;
130 case ESC_XTERM256:
131 case ESC_TRUECOLOR:
132 generator = new Xterm256Generator();
133 generator->setESCTrueColor(type==ESC_TRUECOLOR);
134 break;
135 default:
136 break;
137 }
138 return generator;
139 }
140
141
142 CodeGenerator::CodeGenerator ( highlight::OutputType type )
143 :currentSyntax(nullptr),
144 in ( nullptr ),
145 out ( nullptr ),
146 encoding ( "none" ),
147 docTitle ( "Source file" ),
148 maskWs ( false ),
149 excludeWs ( false ),
150 fragmentOutput ( false ),
151 keepInjections( false ),
152 showLineNumbers ( false ),
153 lineNumberFillZeroes ( false ),
154 printNewLines(true),
155 omitVersionComment(false),
156 isolateTags(false),
157 disableStyleCache(false),
158 baseFontSize("10"),
159 lineNumber ( 0 ),
160 lineNumberOffset ( 0 ),
161 currentState ( _UNKNOWN ),
162 currentKeywordClass ( 0 ),
163 includeStyleDef ( false ),
164 numberCurrentLine ( false ),
165 lineIndex ( 0 ),
166 lastLineLength( 0 ),
167 syntaxChangeIndex(UINT_MAX),
168 syntaxChangeLineNo(UINT_MAX),
169 lineNumberWidth ( 5 ),
170 startLineCnt( 1 ),
171 startLineCntCurFile( 1 ),
172 extraEOFChar( 255 ),
173 maxLineCnt ( UINT_MAX ),
174 inputFilesCnt (0),
175 processedFilesCnt (0),
176 kwOffset(0),
177 noTrailingNewLine(0),
178
179 terminatingChar ( '\0' ),
180 formatter ( nullptr ),
181 streamIterator ( nullptr ),
182 formattingEnabled ( false ),
183 formattingPossible ( false ),
184 validateInput ( false ),
185 numberWrappedLines ( true ),
186 resultOfHook(false),
187 lineContainedTestCase(false),
188 lineContainedStmt(false),
189 applySyntaxTestCase(false),
190 toggleDynRawString(false),
191 lsEnableHoverRequests(false),
192 lsCheckSemanticTokens(false),
193 lsCheckSyntaxErrors(false),
194
195 keywordCase ( StringTools::CASE_UNCHANGED ),
196 eolDelimiter ('\n'),
197 outputType ( type )
198 {
199 }
200
201
202 CodeGenerator::~CodeGenerator()
203 {
204 delete formatter;
205 delete streamIterator;
206
207 resetSyntaxReaders();
208
209 for (unsigned int i=0; i<pluginChunks.size(); i++) {
210 delete pluginChunks[i];
211 }
212 pluginChunks.clear();
213 }
214
215
216 bool CodeGenerator::initTheme ( const string& themePath, bool loadSemanticStyles)
217 {
218 this->themePath=themePath;
219 bool loadOK = docStyle.load ( themePath, outputType, loadSemanticStyles );
220 initOutputTags();
221 return loadOK;
222 }
223
224 LSResult CodeGenerator::initLanguageServer ( const string& executable, const vector<string> &options,
225 const string& workspace, const string& syntax,
226 int delay, int logLevel, bool legacy )
227 {
228 if (LSPClient.isInitialized()) {
229 return LSResult::INIT_OK;
230 }
231
232 LSPClient.setLogging(logLevel>1);
233
234 LSPClient.setExecutable(executable);
235 LSPClient.setWorkspace(workspace);
236 LSPClient.setOptions(options);
237 LSPClient.setSyntax(syntax);
238 LSPClient.setInitializeDelay(delay);
239 LSPClient.setLegacyProtocol(legacy);
240 if (!LSPClient.connect()){
241 return LSResult::INIT_BAD_PIPE;
242 }
243
244 if (!LSPClient.runInitialize()){
245 return LSResult::INIT_BAD_REQUEST;
246 }
247 for (int i=0; i<docStyle.getSemanticTokenStyleCount();i++) {
248 currentSyntax->generateNewKWClass(i+1, "st");
249 }
250 LSPClient.runInitialized();
251 updateKeywordClasses();
252 return LSResult::INIT_OK;
253 }
254
255 bool CodeGenerator::lsOpenDocument(const string& fileName, const string & suffix){
256 lsDocumentPath = fileName;
257 return LSPClient.runDidOpen(fileName, suffix);
258 }
259
260 bool CodeGenerator::lsCloseDocument(const string& fileName, const string & suffix){
261 lsDocumentPath.clear();
262 return LSPClient.runDidClose(fileName, suffix);
263 }
264
265 bool CodeGenerator::lsAddSemanticInfo(const string& fileName, const string & suffix){
266 lsCheckSemanticTokens = LSPClient.runSemanticTokensFull(fileName);
267 return lsCheckSemanticTokens;
268 }
269
270 bool CodeGenerator::isHoverProvider(){
271 return LSPClient.isHoverProvider();
272 }
273
274 bool CodeGenerator::isSemanticTokensProvider(){
275 return LSPClient.isSemanticTokensProvider();
276 }
277
278 void CodeGenerator::lsAddHoverInfo(bool hover){
279 lsEnableHoverRequests = hover;
280 }
281
282 void CodeGenerator::lsAddSyntaxErrorInfo(bool error) {
283 lsCheckSyntaxErrors = error;
284 }
285
286
287 void CodeGenerator::exitLanguageServer () {
288 LSPClient.runShutdown();
289 LSPClient.runExit();
290 }
291
292 const string& CodeGenerator::getStyleName()
293 {
294 return themePath;
295 }
296
297 void CodeGenerator::setLineNumberWidth ( int w )
298 {
299 lineNumberWidth=w;
300 }
301
302 int CodeGenerator::getLineNumberWidth()
303 {
304 return lineNumberWidth;
305 }
306
307 void CodeGenerator::setPrintLineNumbers ( bool flag, unsigned int startCnt )
308 {
309 showLineNumbers=flag;
310 lineNumberOffset = startCnt-1;
311 }
312
313 bool CodeGenerator::getPrintLineNumbers()
314 {
315 return showLineNumbers;
316 }
317
318 void CodeGenerator::setPrintZeroes ( bool flag )
319 {
320 lineNumberFillZeroes=flag;
321 }
322
323 bool CodeGenerator::getPrintZeroes()
324 {
325 return lineNumberFillZeroes;
326 }
327
328 void CodeGenerator::setIncludeStyle ( bool flag )
329 {
330 includeStyleDef = flag;
331 }
332
333 void CodeGenerator::disableTrailingNL ( int flag )
334 {
335 noTrailingNewLine = flag;
336 }
337
338 void CodeGenerator::setStyleInputPath ( const string& path )
339 {
340 styleInputPath = path;
341 }
342
343 void CodeGenerator::setStyleOutputPath ( const string& path )
344 {
345 styleOutputPath = path;
346 }
347
348 void CodeGenerator::setPluginParameter ( const string& param )
349 {
350 pluginParameter = param;
351 }
352
353 const string& CodeGenerator::getStyleInputPath()
354 {
355 return styleInputPath;
356 }
357
358 const string& CodeGenerator::getStyleOutputPath()
359 {
360 return styleOutputPath;
361 }
362
363 void CodeGenerator::setFragmentCode ( bool flag )
364 {
365 fragmentOutput=flag;
366 }
367
368 bool CodeGenerator::getFragmentCode()
369 {
370 return fragmentOutput;
371 }
372 void CodeGenerator::setKeepInjections ( bool flag )
373 {
374 keepInjections=flag;
375 }
376
377 bool CodeGenerator::getKeepInjections()
378 {
379 return keepInjections;
380 }
381 void CodeGenerator::setValidateInput ( bool flag )
382 {
383 validateInput=flag;
384 }
385
386 bool CodeGenerator::getValidateInput()
387 {
388 return validateInput;
389 }
390
391 void CodeGenerator::setNumberWrappedLines ( bool flag )
392 {
393 numberWrappedLines=flag;
394 }
395
396 bool CodeGenerator::getNumberWrappedLines()
397 {
398 return numberWrappedLines;
399 }
400
401 void CodeGenerator::setOmitVersionComment ( bool flag )
402 {
403 omitVersionComment=flag;
404 }
405
406 bool CodeGenerator::getOmitVersionComment ()
407 {
408 return omitVersionComment;
409 }
410
411 void CodeGenerator::setIsolateTags ( bool flag )
412 {
413 isolateTags=flag;
414 }
415
416 bool CodeGenerator::getIsolateTags ()
417 {
418 return isolateTags;
419 }
420
421 void CodeGenerator::setBaseFont ( const string& fontName )
422 {
423 baseFont = fontName;
424 }
425
426 void CodeGenerator::setBaseFontSize ( const string& fontSize)
427 {
428 baseFontSize = fontSize;
429 }
430
431 void CodeGenerator::setStyleCaching ( bool flag )
432 {
433 disableStyleCache=!flag;
434 }
435
436 const string CodeGenerator::getBaseFont() const
437 {
438 if ( !baseFont.empty() ) return baseFont;
439 switch ( outputType ) {
440 case HTML:
441 case XHTML:
442 case SVG:
443 return "'Courier New',monospace";
444 break;
445 case LATEX:
446 return "ttfamily";
447 break;
448 case TEX:
449 return "tt";
450 break;
451 default:
452 return "Courier New";
453 }
454 }
455
456 const string CodeGenerator::getBaseFontSize()
457 {
458 return baseFontSize;
459 }
460
461 void CodeGenerator::setTitle ( const string & title )
462 {
463 if ( !title.empty() ) docTitle= title;
464 }
465
466 string CodeGenerator::getTitle()
467 {
468 return docTitle;
469 }
470
471 void CodeGenerator::setEncoding ( const string& encodingName )
472 {
473 encoding = encodingName;
474 }
475
476 bool CodeGenerator::formattingDisabled()
477 {
478 return !formattingEnabled;
479 }
480
481 void CodeGenerator::setStartingInputLine ( unsigned int begin )
482 {
483 startLineCnt = startLineCntCurFile = begin;
484 }
485
486 void CodeGenerator::setMaxInputLineCnt ( unsigned int cnt )
487 {
488 maxLineCnt = cnt;
489 }
490
491 void CodeGenerator::setFilesCnt ( unsigned int cnt )
492 {
493 inputFilesCnt = cnt;
494 processedFilesCnt = 0;
495 }
496
497 bool CodeGenerator::formattingIsPossible()
498 {
499 return formattingPossible;
500 }
501 unsigned char CodeGenerator::getAdditionalEOFChar()
502 {
503 return extraEOFChar;
504 }
505 void CodeGenerator::setAdditionalEOFChar ( unsigned char eofChar )
506 {
507 extraEOFChar = eofChar;
508 }
509 void CodeGenerator::setPreformatting ( WrapMode lineWrappingStyle,
510 unsigned int lineLength,
511 int numberSpaces )
512 {
513 bool enableWrap = lineWrappingStyle!=WRAP_DISABLED;
514 bool replaceTabs = numberSpaces > 0;
515
516 if ( enableWrap || replaceTabs ) {
517 preFormatter.setWrap ( enableWrap );
518 preFormatter.setWrapIndentBraces ( lineWrappingStyle==WRAP_DEFAULT );
519 preFormatter.setWrapLineLength ( lineLength );
520 preFormatter.setReplaceTabs ( replaceTabs );
521 preFormatter.setNumberSpaces ( numberSpaces );
522 }
523 }
524
525 void CodeGenerator::setKeyWordCase ( StringTools::KeywordCase keyCase )
526 {
527 keywordCase = keyCase;
528 }
529
530 void CodeGenerator::setEOLDelimiter(char delim)
531 {
532 eolDelimiter = delim;
533 }
534
535 void CodeGenerator::reset()
536 {
537 lineIndex = 0;
538 lineNumber = 0;
539 line.clear();
540 preFormatter.reset();
541 inFile.clear();
542 outFile.clear();
543 embedLangDefPath.clear();
544 printNewLines=true;
545 syntaxChangeIndex = syntaxChangeLineNo = UINT_MAX;
546 startLineCntCurFile = startLineCnt;
547 applySyntaxTestCase=lineContainedTestCase=false;
548 if (currentSyntax){
549 vector<int> overrideStyleAttrs=currentSyntax->getOverrideStyleAttributes();
550 docStyle.overrideAttributes(overrideStyleAttrs);
551 if (overrideStyleAttrs.size())
552 disableStyleCache = true;
553 }
554 }
555
556 string CodeGenerator::getThemeInitError()
557 {
558 return docStyle.getErrorMessage();
559 }
560
561 string CodeGenerator::getPluginScriptError()
562 {
563 return userScriptError;
564 }
565
566 string CodeGenerator::getSyntaxRegexError()
567 {
568 return (currentSyntax)? currentSyntax->getFailedRegex(): "syntax undef";
569 }
570 string CodeGenerator::getSyntaxLuaError()
571 {
572 return (currentSyntax)? currentSyntax->getLuaErrorText(): "syntax undef";
573
574 }
575 string CodeGenerator::getSyntaxDescription()
576 {
577 return (currentSyntax)? currentSyntax->getDescription(): "syntax undef";
578
579 }
580 string CodeGenerator::getSyntaxEncodingHint()
581 {
582 return (currentSyntax)? currentSyntax->getEncodingHint(): "";
583
584 }
585 string CodeGenerator::getThemeDescription()
586 {
587 return docStyle.getDescription();
588 }
589
590 string CodeGenerator::getSyntaxCatDescription(){
591 return (currentSyntax)? currentSyntax->getCategoryDescription(): "";
592 }
593
594 string CodeGenerator::getThemeCatDescription()
595 {
596 return docStyle.getCategoryDescription();
597 }
598
599 float CodeGenerator::getThemeContrast()
600 {
601 return docStyle.getContrast();
602 }
603
604 unsigned int CodeGenerator::getLineNumber()
605 {
606 return lineNumber;
607 }
608 bool CodeGenerator::AtEnd(char c) const {
609 bool instream_eof = in->eof();
610 if (extraEOFChar == 255)
611 return instream_eof;
612
613 bool c_null = c == extraEOFChar;
614 bool instream_peek_null = false;
615 if (instream_eof == false && c_null == false)
616 instream_peek_null = in->peek() == extraEOFChar;
617 bool ret = instream_eof || c_null || instream_peek_null;
618 return ret;
619 }
620 bool CodeGenerator::readNewLine ( string &newLine )
621 {
622 bool eof=false;
623
624 if ( lineIndex ) terminatingChar=newLine[lineIndex-1];
625
626 while (!eof && startLineCntCurFile>0) {
627 if ( formattingPossible && formattingEnabled ) {
628 eof=!formatter->hasMoreLines();
629 if ( !eof ) {
630 newLine = formatter->nextLine();
631 }
632 } else {
633 eof = AtEnd() || ! getline ( *in, newLine, eolDelimiter );
634 }
635 --startLineCntCurFile;
636 }
637
638 startLineCntCurFile=1;
639 #ifndef _WIN32
640 // drop CR of CRLF files
641 if (!newLine.empty() && newLine[newLine.size() - 1] == '\r')
642 newLine.erase(newLine.size() - 1);
643 #endif
644
645 return eof || ( lineNumber == maxLineCnt );
646 }
647
648 void CodeGenerator::matchRegex ( const string &line, State skipState)
649 {
650 regexGroups.clear();
651 int matchBegin=0;
652 int groupID=0;
653
654 // cycle through all regex, save the start and ending indices of matches to report them later
655 for ( unsigned int i=0; i<currentSyntax->getRegexElements().size(); i++ ) {
656 RegexElement *regexElem = currentSyntax->getRegexElements() [i];
657
658 if (regexElem->open == skipState) continue;
659
660 if (regexElem->constraintLineNum && regexElem->constraintLineNum != lineNumber) {
661 continue;
662 }
663
664 if (regexElem->constraintFilename.size() && regexElem->constraintFilename != inFile) {
665 continue;
666 }
667
668 boost::xpressive::sregex_iterator cur( line.begin(), line.end(), regexElem->rex );
669 boost::xpressive::sregex_iterator end;
670
671 for( ; cur != end; ++cur ) {
672 groupID = ( regexElem->capturingGroup<0 ) ? cur->size()-1 : regexElem->capturingGroup;
673 matchBegin = cur->position(groupID);
674
675 regexGroups.insert (
676 make_pair ( matchBegin + 1, RegexToken ( regexElem->open, cur->length(groupID), regexElem->kwClass, regexElem->langName ) ) );
677
678 // priority regex (match required)
679 if (regexElem->priority) {
680 return;
681 }
682 }
683 }
684 }
685
686 unsigned char CodeGenerator::getInputChar()
687 {
688 // end of line?
689 if ( lineIndex == line.length() ) {
690
691 //more testing required:
692 if (outputType==ESC_TRUECOLOR || outputType==ESC_XTERM256)
693 lastLineLength=StringTools::utf8_strlen(line + lsSyntaxErrorDesc);
694
695 bool eof=false;
696 if ( preFormatter.isEnabled() ) {
697 if ( !preFormatter.hasMoreLines() ) {
698 eof=readNewLine ( line );
699 preFormatter.setLine ( line );
700 ++lineNumber;
701 numberCurrentLine = true;
702 } else {
703 if (numberWrappedLines)
704 ++lineNumber;
705 numberCurrentLine = numberWrappedLines;
706 }
707
708 line = preFormatter.getNextLine();
709 } else {
710 eof=readNewLine ( line );
711 ++lineNumber;
712
713 numberCurrentLine = true;
714 }
715 lineIndex=0;
716
717 if (!lineContainedTestCase && applySyntaxTestCase){
718 stateTraceTest = stateTraceCurrent;
719 stateTraceCurrent.clear();
720 }
721
722 lineContainedTestCase=false;
723 lineContainedStmt=false;
724 matchRegex ( line );
725
726 return ( eof ) ?'\0':'\n';
727 }
728
729 return line[lineIndex++];
730 }
731
732 /** changing this method requires regression testing with nested syntax files (HTML+PHP+JS+CSS,
733 * Coffeescript with block regex, Pas + ASM)
734 * especially nested syntax in one line
735 */
736 State CodeGenerator::getCurrentState (State oldState)
737 {
738 unsigned char c='\0';
739
740 if ( token.length() ==0 ) {
741 c=getInputChar();
742 } else {
743 lineIndex-= ( token.length()-1 );
744 c=token[0];
745 }
746 if ( c=='\n' ) {
747 return _EOL; // End of line
748 }
749
750 if ( c=='\0' ) {
751 return _EOF; // End of file
752 }
753
754 if ( c==' ' || c=='\t' ) {
755 token= c;
756 return _WS; // White space
757 }
758
759 if ( applySyntaxTestCase && ( c=='^' || c=='<') && (oldState == ML_COMMENT || oldState==SL_COMMENT) ) {
760 token= c;
761 return _TESTPOS;
762 }
763
764 // at this position the syntax change takes place
765 if (lineIndex >= syntaxChangeIndex-1 || syntaxChangeLineNo < lineNumber){
766 loadEmbeddedLang(embedLangDefPath); // load new syntax
767 matchRegex(line); // recognize new patterns in the (remaining) line
768 syntaxChangeIndex = syntaxChangeLineNo = UINT_MAX;
769 }
770
771 SKIP_EMBEDDED:
772
773 if (lsCheckSyntaxErrors && LSPClient.errorExists(lineNumber, lineIndex)) {
774 highlight::SemanticToken errorToken = LSPClient.getError(lineNumber, lineIndex);
775 token = line.substr ( lineIndex-1, errorToken.length);
776 lineIndex += errorToken.length-1;
777 lsSyntaxErrorDesc = errorToken.id;
778
779 //std::cerr <<"error num "<<lineNumber<< " idx "<<lineIndex<< " error "<<errorToken.id<< "\n";
780 return SYNTAX_ERROR;
781 }
782
783 if (lsCheckSemanticTokens && LSPClient.tokenExists(lineNumber, lineIndex)) {
784 highlight::SemanticToken semToken = LSPClient.getToken(lineNumber, lineIndex);
785 int semStyleKwId = docStyle.getSemanticStyle(semToken.id);
786 if (semStyleKwId) {
787 token = line.substr ( lineIndex-1, semToken.length);
788 lineIndex += semToken.length-1;
789
790 currentKeywordClass = semStyleKwId + kwOffset; // +offset of missing kw groups in the theme
791 //std::cerr <<"l "<<lineNumber<< "t "<<token<< " semStyleKwId "<< semStyleKwId << " off "<<kwOffset<<" -> " << semToken.id <<"\n";
792 return KEYWORD;
793 }
794 }
795
796 // Test if a regular expression was found at the current position
797 if ( !regexGroups.empty() ) {
798 if ( regexGroups.count ( lineIndex ) ) {
799 token = line.substr ( lineIndex-1, regexGroups[lineIndex].length );
800
801 unsigned int oldIndex= lineIndex;
802 if ( regexGroups[oldIndex].length>1 ) lineIndex+= regexGroups[oldIndex].length-1;
803
804 if ( regexGroups[oldIndex].state==EMBEDDED_CODE_BEGIN ) {
805 //do not handle a nested section if the syntax is marked as "sealed"
806 if (embedLangDefPath.length()==0 || currentSyntax->allowsInnerSection(embedLangDefPath) ) {
807 embedLangDefPath = currentSyntax->getNewPath(regexGroups[oldIndex].name);
808 //remember position
809 syntaxChangeIndex = lineIndex+2;
810 syntaxChangeLineNo = lineNumber;
811 }
812
813 // repeat parsing of this line without nested state recognition to highlight opening delimiter in the host syntax
814 matchRegex(line, EMBEDDED_CODE_BEGIN);
815 lineIndex = oldIndex;
816 goto SKIP_EMBEDDED; // this is how it should be done
817 }
818
819 if ( regexGroups[oldIndex].state==IDENTIFIER_BEGIN || regexGroups[oldIndex].state==KEYWORD ) {
820 string reservedWord= ( currentSyntax->isIgnoreCase() ) ? StringTools::change_case ( token ) :token;
821 currentKeywordClass=currentSyntax->getKeywordListGroup ( reservedWord ); //check in lists (no regex)
822
823 if ( !currentKeywordClass && regexGroups[oldIndex].state==KEYWORD ){
824 currentKeywordClass = regexGroups[oldIndex].kwClass;
825 }
826 return validateState(( currentKeywordClass ) ? KEYWORD : STANDARD, oldState );
827 } else {
828 return validateState(regexGroups[oldIndex].state, oldState);
829 }
830 }
831 }
832
833 // Character not referring to any state
834 token = c;
835 return STANDARD;
836 }
837
838 State CodeGenerator::validateState(State newState, State oldState)
839 {
840
841 if (currentSyntax->getValidateStateChangeFct()) {
842 Diluculum::LuaValueList params;
843 params.emplace_back(oldState);
844 params.emplace_back(newState);
845 params.emplace_back(token);
846 params.emplace_back(getCurrentKeywordClassId() );
847 params.emplace_back(lineNumber );
848 params.emplace_back(lineIndex-(unsigned int)token.length() );
849
850 Diluculum::LuaValueList res=
851 currentSyntax->getLuaState()->call ( *currentSyntax->getValidateStateChangeFct(),
852 params,"getValidateStateChangeFct call") ;
853
854 resultOfHook = res.size()>=1;
855 if (resultOfHook) {
856
857 setOverrideParams();
858
859 auto validatedState = (State)res[0].asInteger();
860 if ( validatedState== _REJECT) {
861
862 // proceed using only the first character of the token
863 if (res.size()==1) {
864 lineIndex -= (token.length() -1);
865 token=token.substr(0, 1);
866 }
867
868 //experimental for slim.lang: evaluate second return arg after _REJECT
869 if (res.size()>=2) {
870 lineIndex -= (token.length() );
871 token.clear();
872 return (State)res[1].asInteger();
873 }
874 return oldState;
875 }
876
877 return validatedState;
878 }
879 }
880 resultOfHook = false;
881
882 return newState;
883 }
884
885 unsigned int CodeGenerator::getCurrentKeywordClassId(){
886 unsigned int kwClassId=0;
887
888 // this vector contains the defined keyword classes, and currentKeywordClass is its index:
889 vector<string> kwClasses=currentSyntax->getKeywordClasses();
890
891 if (currentKeywordClass && currentKeywordClass<=kwClasses.size()) {
892 string kwClassName=kwClasses[currentKeywordClass-1];
893 if (kwClassName.size()==3)
894 kwClassId = kwClassName[2] - 'a' + 1;
895 }
896 return kwClassId;
897 }
898
899 //it is faster to pass ostream reference
900 void CodeGenerator::maskString ( ostream& ss, const string & s )
901 {
902 string escHoverText;
903
904 if (lsEnableHoverRequests && (currentState==STANDARD || currentState==NUMBER || currentState==KEYWORD)) {
905
906 string hoverText = LSPClient.runHover(lsDocumentPath, lineIndex - s.size(), lineNumber-1);
907
908 for(const auto &c : hoverText)
909 {
910 if (isascii(c))
911 escHoverText.append(maskCharacter(c));
912 }
913 }
914
915 if (escHoverText.size()) {
916 ss << getHoverTagOpen(escHoverText);
917 }
918
919 for (const auto &c : s)
920 {
921 ss << maskCharacter ( c );
922 }
923
924 if (escHoverText.size()) {
925 ss << getHoverTagClose();
926 }
927
928 // The test markers position should also be deternmined by calculating the code points
929 if ( applySyntaxTestCase ) {
930
931 PositionState ps(currentState, getCurrentKeywordClassId(), false);
932
933 //TODO avoid repeated string comparison:
934 int slen = encoding=="utf-8" ? StringTools::utf8_strlen(s) : s.length();
935 for (int i=0; i< slen; i++ ) {
936 stateTraceCurrent.push_back(ps);
937 }
938 if (stateTraceCurrent.size()>200)
939 stateTraceCurrent.erase(stateTraceCurrent.begin(), stateTraceCurrent.begin() + 100 );
940 }
941 }
942
943 void CodeGenerator::printSyntaxError ( ostream& ss ) {
944 if ( !lsSyntaxErrorDesc.empty()) {
945 ss << openTags[ highlight::SYNTAX_ERROR_MSG ];
946
947 for(const auto &c : lsSyntaxErrorDesc)
948 {
949 ss << maskCharacter ( c );
950 }
951
952 ss << closeTags[ highlight::SYNTAX_ERROR_MSG ];
953 lsSyntaxErrorDesc.clear();
954 }
955 }
956
957 Diluculum::LuaValueList CodeGenerator::callDecorateFct(const string& token)
958 {
959
960 Diluculum::LuaValueList params;
961 params.emplace_back(token);
962 params.emplace_back(currentState);
963 params.emplace_back(currentKeywordClass);
964 params.emplace_back(lineContainedStmt);
965 params.emplace_back(lineNumber );
966 params.emplace_back(lineIndex-(unsigned int)token.length() );
967
968 return currentSyntax->getLuaState()->call ( *currentSyntax->getDecorateFct(),
969 params,"getDecorateFct call") ;
970 }
971
972 void CodeGenerator::printMaskedToken (bool flushWhiteSpace, StringTools::KeywordCase tcase )
973 {
974 if ( flushWhiteSpace )
975 flushWs(1);
976 string caseToken = StringTools::change_case ( token, tcase );
977 if (currentSyntax->getDecorateFct()) {
978
979 Diluculum::LuaValueList res=callDecorateFct(caseToken);
980 if (res.size()==1) {
981 *out<<res[0].asString();
982 } else {
983 maskString ( *out, caseToken );
984 }
985 } else {
986 maskString ( *out, caseToken );
987 }
988
989 // check this *after* the decorate call
990 if ( currentState == STANDARD || currentState == KEYWORD || currentState == NUMBER
991 || currentState == STRING || currentState == IDENTIFIER_BEGIN) {
992 lineContainedStmt = true;
993 }
994 token.clear();
995 }
996
997 bool CodeGenerator::styleFound()
998 {
999 return docStyle.found();
1000 }
1001
1002 bool CodeGenerator::printIndexFile ( const vector<string> &fileList, const string &outPath )
1003 {
1004 return true;
1005 }
1006
1007 bool CodeGenerator::initIndentationScheme ( const string &indentScheme )
1008 {
1009
1010 if ( formatter!=nullptr ) {
1011 return true;
1012 }
1013
1014 if ( !indentScheme.size() ) return false;
1015
1016 formatter=new astyle::ASFormatter();
1017
1018 if ( indentScheme=="allman" || indentScheme=="bsd" || indentScheme=="ansi" ) {
1019 formatter->setFormattingStyle ( astyle::STYLE_ALLMAN );
1020 } else if ( indentScheme=="kr"||indentScheme=="k&r"||indentScheme=="k/r" ) {
1021 formatter->setFormattingStyle ( astyle::STYLE_KR );
1022 } else if ( indentScheme=="java" ) {
1023 formatter->setFormattingStyle ( astyle::STYLE_JAVA );
1024 } else if ( indentScheme=="stroustrup" ) {
1025 formatter->setFormattingStyle ( astyle::STYLE_STROUSTRUP );
1026 } else if ( indentScheme=="whitesmith" ) {
1027 formatter->setFormattingStyle ( astyle::STYLE_WHITESMITH );
1028 } else if ( indentScheme=="banner" || indentScheme=="ratliff") {
1029 formatter->setFormattingStyle ( astyle::STYLE_RATLIFF );
1030 } else if ( indentScheme=="gnu" ) {
1031 formatter->setFormattingStyle ( astyle::STYLE_GNU );
1032 } else if ( indentScheme=="linux" ) {
1033 formatter->setFormattingStyle ( astyle::STYLE_LINUX );
1034 } else if ( indentScheme=="horstmann" ) {
1035 formatter->setFormattingStyle ( astyle::STYLE_HORSTMANN );
1036 } else if ( indentScheme=="otbs" || indentScheme=="1tbs") {
1037 formatter->setFormattingStyle ( astyle::STYLE_1TBS );
1038 } else if ( indentScheme=="google") {
1039 formatter->setFormattingStyle ( astyle::STYLE_GOOGLE );
1040 } else if ( indentScheme=="pico" || indentScheme=="a11") {
1041 formatter->setFormattingStyle ( astyle::STYLE_PICO );
1042 } else if ( indentScheme=="lisp" || indentScheme=="python"|| indentScheme=="a12") {
1043 formatter->setFormattingStyle ( astyle::STYLE_LISP );
1044 } else if ( indentScheme=="vtk") {
1045 formatter->setFormattingStyle ( astyle::STYLE_VTK );
1046 } else if ( indentScheme=="mozilla") {
1047 formatter->setFormattingStyle ( astyle::STYLE_MOZILLA );
1048 } else if ( indentScheme=="webkit") {
1049 formatter->setFormattingStyle ( astyle::STYLE_WEBKIT );
1050 } else if ( indentScheme!="user" ){
1051 return false;
1052 }
1053 return formattingEnabled=true;
1054 }
1055
1056 LoadResult CodeGenerator::loadLanguage ( const string& langDefPath, bool embedded )
1057 {
1058
1059 if (!embedded) {
1060 while (!nestedLangs.empty()) {
1061 nestedLangs.pop();
1062 }
1063 }
1064
1065 bool reloadNecessary= currentSyntax ? currentSyntax->needsReload ( langDefPath ): true;
1066 LoadResult result=LOAD_OK;
1067 if ( reloadNecessary ) {
1068 if (syntaxReaders.count(langDefPath)) {
1069 currentSyntax=syntaxReaders[langDefPath];
1070 result=LOAD_OK;
1071 } else {
1072
1073 currentSyntax=new SyntaxReader();
1074 result=currentSyntax->load(langDefPath, pluginParameter, outputType);
1075 syntaxReaders[langDefPath]=currentSyntax;
1076 }
1077
1078 if ( result==LOAD_OK ) {
1079 formattingPossible=currentSyntax->enableReformatting();
1080 updateKeywordClasses();
1081 }
1082 }
1083
1084 kwOffset=currentSyntax->getKeywordCount() - docStyle.getKeywordStyleCount();
1085
1086 return result;
1087 }
1088
1089 bool CodeGenerator::validateInputStream()
1090 {
1091 if ( !in ) return false;
1092
1093 // it is not possible to move stream pointer back with stdin
1094 if ( ( int ) in->tellg() == -1 ) // -1 : stdin
1095 return true;
1096
1097 // Sources: http://en.wikipedia.org/wiki/Magic_number_(programming)
1098 // Magic configuration of "file"
1099 // This is intended for web plugins - only check filetypes often found in the net
1100 char magic_gif[] = {'G','I','F','8', 0};
1101 char magic_png[] = {'\x89','P','N','G', 0};
1102 char magic_java[] = {'\xCA','\xFE','\xBA','\xBE', 0};
1103 char magic_jpeg[] = {'\xFF','\xD8','\xFF', 0};
1104 char magic_bmp[] = {'B','M', 0};
1105 char magic_pdf[] = {'%','P','D','F', 0};
1106 char magic_utf8[] = {'\xEF','\xBB','\xBF',0};
1107 char magic_rar[] = {'R','a','r','!', 0};
1108 char magic_zip[] = {'P','K','\x03','\x04', 0};
1109 char magic_ace[] = {'*','*','A','C','E','*','*', 0};
1110 char magic_tgz[] = {'\x8b','\x1f', '\x00', '\x08', 0};
1111 char magic_bzip[] = {'B','Z', 0};
1112
1113 char* magic_table[] = {magic_utf8,
1114 magic_gif, magic_png, magic_jpeg, magic_bmp, magic_pdf,
1115 magic_java,
1116 magic_rar, magic_zip, magic_ace, magic_tgz, magic_bzip,
1117 nullptr
1118 };
1119
1120 char buffer [10]= {0};
1121 in->read ( buffer,8 ); //only read the first 8 bytes of input stream
1122
1123 int magic_index=0;
1124 while ( magic_table[magic_index] ) {
1125 if ( !strncmp ( buffer, magic_table[magic_index], strlen ( magic_table[magic_index] ) ) ) {
1126 break;
1127 }
1128 magic_index++;
1129 }
1130 int streamReadPos=0;
1131 if ( magic_table[magic_index] == magic_utf8 ) {
1132 //setEncoding("utf-8");
1133 streamReadPos=3; // remove UTF-8 magic number from output
1134 }
1135
1136 in -> seekg ( streamReadPos, ios::beg );
1137 in-> clear(); // clear fail bit to continue reading
1138
1139 return !magic_table[magic_index] // points to 0 if no pattern was found
1140 || magic_table[magic_index] == magic_utf8;
1141 }
1142
1143 void CodeGenerator::applyPluginChunk(const string& fctName, string *result, bool *keepDefault) {
1144
1145 if (currentSyntax && pluginChunks.size()) {
1146
1147 Diluculum::LuaState luaState;
1148
1149 Diluculum::LuaValueList chunkParams;
1150 chunkParams.emplace_back(currentSyntax->getDescription());
1151 for (unsigned int i=0; i<pluginChunks.size(); i++) {
1152 luaState.call(*pluginChunks[i], chunkParams, "format user function");
1153 }
1154
1155 if (luaState.globals().count(fctName)) {
1156 auto* documentFct=new Diluculum::LuaFunction(luaState[fctName].value().asFunction());
1157
1158 luaState["HL_PLUGIN_PARAM"] = pluginParameter;
1159 luaState["HL_OUTPUT"] = outputType;
1160 luaState["HL_FORMAT_HTML"]=HTML;
1161 luaState["HL_FORMAT_XHTML"]=XHTML;
1162 luaState["HL_FORMAT_TEX"]=TEX;
1163 luaState["HL_FORMAT_LATEX"]=LATEX;
1164 luaState["HL_FORMAT_RTF"]=RTF;
1165 luaState["HL_FORMAT_ANSI"]=ESC_ANSI;
1166 luaState["HL_FORMAT_XTERM256"]=ESC_XTERM256;
1167 luaState["HL_FORMAT_TRUECOLOR"]=ESC_TRUECOLOR;
1168 luaState["HL_FORMAT_SVG"]=SVG;
1169 luaState["HL_FORMAT_BBCODE"]=BBCODE;
1170 luaState["HL_FORMAT_PANGO"]=PANGO;
1171 luaState["HL_FORMAT_ODT"]=ODTFLAT;
1172
1173 Diluculum::LuaValueList params;
1174 Diluculum::LuaValueMap options;
1175 options[Diluculum::LuaValue("title")] = Diluculum::LuaValue( docTitle );
1176 options[Diluculum::LuaValue("encoding")] = Diluculum::LuaValue(encoding);
1177 options[Diluculum::LuaValue("fragment")] = Diluculum::LuaValue(fragmentOutput);
1178 options[Diluculum::LuaValue("font")] = Diluculum::LuaValue(getBaseFont());
1179 options[Diluculum::LuaValue("fontsize")] = Diluculum::LuaValue(getBaseFontSize());
1180
1181 params.emplace_back(inputFilesCnt);
1182 params.emplace_back(processedFilesCnt);
1183 params.emplace_back(options);
1184
1185 Diluculum::LuaValueList res=luaState.call ( *documentFct, params, fctName+" call");
1186 if (res.size()>=1) {
1187 *keepDefault=false;
1188 *result = res[0].asString();
1189 if (res.size()==2)
1190 *keepDefault = res[1].asBoolean();
1191 }
1192 delete documentFct;
1193 }
1194 }
1195 }
1196
1197 void CodeGenerator::printHeader()
1198 {
1199 bool keepDefaultHeader=true;
1200 string pluginHeader;
1201
1202 processedFilesCnt++;
1203
1204 applyPluginChunk("DocumentHeader", &pluginHeader, &keepDefaultHeader);
1205
1206 if ( ! fragmentOutput && keepDefaultHeader)
1207 *out << getHeader();
1208
1209 *out << pluginHeader;
1210
1211 if ( !fragmentOutput || keepInjections)
1212 *out << currentSyntax->getHeaderInjection();
1213 }
1214
1215 void CodeGenerator::printFooter()
1216 {
1217
1218 bool keepDefaultFooter=true;
1219 string pluginFooter;
1220
1221 applyPluginChunk("DocumentFooter", &pluginFooter, &keepDefaultFooter);
1222
1223 if ( !fragmentOutput || keepInjections)
1224 *out << currentSyntax->getFooterInjection();
1225
1226 *out << pluginFooter;
1227
1228 if ( ! fragmentOutput && keepDefaultFooter )
1229 *out << getFooter();
1230 }
1231
1232 ParseError CodeGenerator::generateFile ( const string &inFileName,
1233 const string &outFileName )
1234 {
1235 if ( !docStyle.found() ) {
1236 return BAD_STYLE;
1237 }
1238
1239 reset();
1240
1241 ParseError error=PARSE_OK;
1242
1243 inFile=inFileName;
1244 outFile=outFileName;
1245
1246 in = ( inFileName.empty() ? &cin :new ifstream ( inFileName.c_str() ) );
1247
1248 if ( validateInput )
1249 if ( !validateInputStream() ) error= BAD_INPUT;
1250
1251 if ( !in->fail() && error==PARSE_OK ) {
1252 out = ( outFileName.empty() ? &cout :new ofstream ( outFileName.c_str() ) );
1253 if ( out->fail() ) {
1254 error=BAD_OUTPUT;
1255 }
1256 }
1257
1258 if ( in->fail() ) {
1259 error=BAD_INPUT;
1260 }
1261
1262 if ( error==PARSE_OK ) {
1263 initASStream();
1264 currentSyntax->setInputFileName(inFile);
1265 printHeader();
1266 printBody();
1267 printFooter();
1268 }
1269
1270 if ( !outFileName.empty() ) {
1271 delete out;
1272 out=nullptr;
1273 }
1274 if ( !inFileName.empty() ) {
1275 delete in;
1276 in=nullptr;
1277 }
1278 return error;
1279 }
1280
1281 string CodeGenerator::generateString ( const string &input )
1282 {
1283
1284 if ( !docStyle.found() ) {
1285 return "";
1286 }
1287
1288 reset();
1289
1290 in = new istringstream ( input );
1291 out = new ostringstream ();
1292
1293 if ( in->fail() || out->fail() ) {
1294 return "";
1295 }
1296
1297 initASStream();
1298
1299 printHeader();
1300 printBody();
1301 printFooter();
1302
1303 string result = static_cast<ostringstream*> ( out )->str();
1304
1305 delete out;
1306 out=nullptr;
1307 delete in;
1308 in=nullptr;
1309
1310 return result;
1311 }
1312
1313 void CodeGenerator::initASStream() {
1314 if ( formatter != nullptr ) {
1315 if (streamIterator) delete streamIterator;
1316 streamIterator = new astyle::ASStreamIterator ( in, extraEOFChar );
1317 formatter->init ( streamIterator );
1318 string desc = currentSyntax->getDescription();
1319 if (desc=="C#") {
1320 formatter->setSharpStyle();
1321 } else if (desc=="Java") {
1322 formatter->setJavaStyle();
1323 } else if (desc=="Javascript") {
1324 formatter->setJSStyle();
1325 } else if (desc=="Objective C") {
1326 formatter->setObjCStyle();
1327 } else {
1328 formatter->setCStyle();
1329 }
1330
1331 }
1332 }
1333
1334 string CodeGenerator::generateStringFromFile ( const string &inFileName )
1335 {
1336
1337 if ( !docStyle.found() ) {
1338 return "";
1339 }
1340
1341 reset();
1342
1343 inFile = inFileName;
1344
1345 in = new ifstream ( inFileName.c_str() );
1346 out = new ostringstream ();
1347
1348 if ( in->fail() || out->fail() ) {
1349 return "";
1350 }
1351
1352 if ( validateInput && !validateInputStream() ) {
1353 return "ERROR: detected binary input";
1354 }
1355
1356 initASStream();
1357
1358 currentSyntax->setInputFileName(inFile);
1359
1360 printHeader();
1361 printBody();
1362 printFooter();
1363
1364 string result = static_cast<ostringstream*> ( out )->str();
1365
1366 delete out;
1367 out=nullptr;
1368 delete in;
1369 in=nullptr;
1370
1371 return result;
1372 }
1373
1374 unsigned int CodeGenerator::getStyleID ( State s, unsigned int kwClassID )
1375 {
1376 if ( s==KEYWORD && kwClassID ) {
1377 return NUMBER_BUILTIN_STATES + kwClassID-1;
1378 }
1379 return ( unsigned int ) s ;
1380 }
1381
1382 void CodeGenerator::openTag ( State s )
1383 {
1384 *out << openTags[ ( unsigned int ) s];
1385 currentState=s;
1386 }
1387
1388 void CodeGenerator::closeTag ( State s )
1389 {
1390 *out << closeTags[ ( unsigned int ) s];
1391 flushWs(2);
1392 currentState=_UNKNOWN;
1393 }
1394
1395 void CodeGenerator::openKWTag ( unsigned int kwClassID )
1396 {
1397 *out << openTags.at(getStyleID ( KEYWORD, kwClassID ) );
1398 currentState=KEYWORD;
1399 }
1400
1401 void CodeGenerator::closeKWTag ( unsigned int kwClassID )
1402 {
1403 *out << closeTags.at(getStyleID ( KEYWORD, kwClassID ) );
1404 flushWs(3);
1405 currentState=_UNKNOWN;
1406 }
1407
1408 bool CodeGenerator::loadEmbeddedLang(const string&embedLangDefPath)
1409 {
1410 if (nestedLangs.empty()) {
1411 nestedLangs.push(currentSyntax->getCurrentPath() );
1412 }
1413 if (nestedLangs.top() != embedLangDefPath) {
1414 nestedLangs.push(embedLangDefPath);
1415 }
1416 LoadResult res = loadLanguage(embedLangDefPath, true);
1417 //pass end delimiter regex to syntax description
1418 currentSyntax->restoreLangEndDelim(embedLangDefPath);
1419 return res == LOAD_OK;
1420 }
1421
1422 ///////////////////////////////////////////////////////////////////////////////
1423
1424 void CodeGenerator::processRootState()
1425 {
1426 bool eof=false,
1427 firstLine=true; // avoid newline before printing the first output line
1428
1429 applySyntaxTestCase = inFile.find("syntax_test_")!=string::npos;
1430
1431 if ( currentSyntax->highlightingDisabled() ) {
1432 string line;
1433 while ( getline ( *in, line ) && lineNumber < maxLineCnt ) {
1434 ++lineNumber;
1435 insertLineNumber ( !firstLine );
1436 flushWs(4);
1437 firstLine=false;
1438 if (lineNumber>=startLineCntCurFile && lineNumber <=maxLineCnt)
1439 maskString ( *out, line );
1440 }
1441 *out << flush;
1442 return;
1443 }
1444
1445 State state=STANDARD;
1446 openTag ( STANDARD );
1447
1448 do {
1449 // determine next state
1450 state= getCurrentState(STANDARD);
1451
1452 // handle current state
1453 switch ( state ) {
1454 case KEYWORD:
1455 closeTag ( STANDARD );
1456 eof=processKeywordState ( state );
1457 openTag ( STANDARD );
1458 break;
1459 case NUMBER:
1460 closeTag ( STANDARD );
1461 eof=processNumberState();
1462 openTag ( STANDARD );
1463 break;
1464 case ML_COMMENT:
1465 closeTag ( STANDARD );
1466 eof=processMultiLineCommentState();
1467 openTag ( STANDARD );
1468 break;
1469 case SL_COMMENT:
1470 closeTag ( STANDARD );
1471 eof=processSingleLineCommentState();
1472 openTag ( STANDARD );
1473 break;
1474 case STRING:
1475 closeTag ( STANDARD );
1476 eof=processStringState ( STANDARD );
1477 openTag ( STANDARD );
1478 break;
1479 case DIRECTIVE:
1480 closeTag ( STANDARD );
1481 eof=processDirectiveState();
1482 openTag ( STANDARD );
1483 break;
1484 case ESC_CHAR:
1485 closeTag ( STANDARD );
1486 eof=processEscapeCharState();
1487 openTag ( STANDARD );
1488 break;
1489 case SYMBOL:
1490 closeTag ( STANDARD );
1491 eof=processSymbolState();
1492 openTag ( STANDARD );
1493 break;
1494 case EMBEDDED_CODE_END:
1495 closeTag ( STANDARD );
1496 eof=processSyntaxChangeState(state);
1497 openTag ( STANDARD );
1498 break;
1499 case SYNTAX_ERROR:
1500 closeTag ( STANDARD );
1501 eof=processSyntaxErrorState();
1502 openTag ( STANDARD );
1503 break;
1504
1505 case _EOL:
1506 // XTERM256 fix (issue with less cmd)
1507 if (!firstLine || showLineNumbers) {
1508 closeTag ( STANDARD );
1509 }
1510 insertLineNumber(!firstLine);
1511 if (!firstLine || showLineNumbers) {
1512 flushWs(5);
1513 stateTraceCurrent.clear();
1514 openTag ( STANDARD );
1515 }
1516 firstLine=false;
1517 break;
1518 case _EOF:
1519 eof=true;
1520 break;
1521 case _WS:
1522 processWsState();
1523 break;
1524 default:
1525 printMaskedToken();
1526 break;
1527 }
1528 } while ( !eof );
1529
1530 if (token.size() || lineNumber>1 || (outputType!=ESC_TRUECOLOR && outputType!=ESC_XTERM256))
1531 closeTag ( STANDARD );
1532
1533 if (currentSyntax->getDecorateLineEndFct()) {
1534 Diluculum::LuaValueList res=callDecorateLineFct(false);
1535 if (res.size()==1) {
1536 *out << res[0].asString();
1537 }
1538 }
1539
1540 printNewLines = noTrailingNewLine==0 || ( noTrailingNewLine==2 && ( token.size() || lineNumber>1) );
1541 *out << getNewLine();
1542 *out << flush;
1543 }
1544
1545 bool CodeGenerator::processSyntaxChangeState(State myState)
1546 {
1547 State newState=STANDARD;
1548 bool eof=false,
1549 exitState=false;
1550
1551 openTag ( KEYWORD );
1552 do {
1553
1554 if (myState==EMBEDDED_CODE_END) {
1555 if (!nestedLangs.empty()) {
1556 nestedLangs.pop();
1557 }
1558 // load host language syntax
1559 if (!nestedLangs.empty()) {
1560 loadLanguage(nestedLangs.top(), true);
1561 }
1562 matchRegex(line, EMBEDDED_CODE_BEGIN); // match remaining line using the host syntax
1563 }
1564
1565 printMaskedToken ( newState!=_WS );
1566
1567 newState= getCurrentState(myState);
1568
1569 switch ( newState ) {
1570 case _WS:
1571 processWsState();
1572 break;
1573 case _EOL:
1574 insertLineNumber();
1575 exitState=true;
1576 break;
1577 case _EOF:
1578 eof = true;
1579 break;
1580 default:
1581 exitState=true;
1582 break;
1583 }
1584 } while ( !exitState && !eof );
1585 closeTag ( KEYWORD );
1586
1587 return eof;
1588 }
1589
1590
1591 bool CodeGenerator::processKeywordState ( State myState )
1592 {
1593 State newState=STANDARD;
1594 unsigned int myClassID=currentKeywordClass;
1595 bool eof=false,
1596 exitState=false;
1597
1598 openKWTag ( myClassID );
1599 do {
1600 printMaskedToken ( newState!=_WS,
1601 ( currentSyntax->isIgnoreCase() ) ? keywordCase : StringTools::CASE_UNCHANGED );
1602 newState= getCurrentState(myState);
1603 switch ( newState ) {
1604 case _WS:
1605 processWsState();
1606 exitState=isolateTags;
1607 break;
1608 case _EOL:
1609 insertLineNumber();
1610 exitState=true;
1611
1612 break;
1613 case _EOF:
1614 eof = true;
1615 break;
1616 case KEYWORD_END:
1617 exitState=true;
1618 break;
1619 default:
1620 exitState= ( myClassID!=currentKeywordClass ) || ( myState!=newState );
1621 break;
1622 }
1623 } while ( !exitState && !eof );
1624
1625 closeKWTag ( myClassID );
1626
1627 currentKeywordClass=0;
1628 return eof;
1629 }
1630
1631 bool CodeGenerator::processNumberState()
1632 {
1633 State newState=STANDARD;
1634 bool eof=false,
1635 exitState=false;
1636 openTag ( NUMBER );
1637 do {
1638 printMaskedToken ( newState!=_WS );
1639 newState= getCurrentState(NUMBER);
1640 switch ( newState ) {
1641 case _WS:
1642 processWsState();
1643 exitState=isolateTags;
1644 break;
1645 case _EOL:
1646 insertLineNumber();
1647 exitState=true;
1648 break;
1649 case _EOF:
1650 eof = true;
1651 break;
1652 default:
1653 exitState=newState!=NUMBER;
1654 break;
1655 }
1656 } while ( !exitState && !eof );
1657
1658 closeTag ( NUMBER );
1659 return eof;
1660 }
1661
1662
1663 bool CodeGenerator::processMultiLineCommentState()
1664 {
1665 int commentCount=1;
1666 int openDelimID=currentSyntax->getOpenDelimiterID ( token, ML_COMMENT);
1667 State newState=STANDARD;
1668 bool eof=false, exitState=false, containedTestCase=false;
1669 unsigned int startColumn=lineIndex - token.size() ;
1670 openTag ( ML_COMMENT );
1671 do {
1672 printMaskedToken (newState!=_WS );
1673 newState= getCurrentState(ML_COMMENT);
1674
1675 switch ( newState ) {
1676 case _WS:
1677 processWsState();
1678 break;
1679 case _EOL:
1680 wsBuffer += closeTags[ML_COMMENT];
1681 insertLineNumber();
1682 wsBuffer += openTags[ML_COMMENT];
1683 startColumn=0;
1684 break;
1685 case _EOF:
1686 eof = true;
1687 break;
1688 case _TESTPOS:
1689 runSyntaxTestcases(token=="<" ? startColumn : lineIndex - 1 );
1690 printMaskedToken();
1691 containedTestCase=true;
1692 break;
1693 case ML_COMMENT:
1694
1695 if ( currentSyntax->allowNestedMLComments() ) {
1696 ++commentCount;
1697 }
1698 // if delimiters are equal, close the comment by continuing to
1699 // ML_COMMENT_END section
1700 if (currentSyntax->delimiterIsDistinct(currentSyntax->getOpenDelimiterID ( token, ML_COMMENT ))) break;
1701
1702 case ML_COMMENT_END:
1703
1704 if (!currentSyntax->matchesOpenDelimiter (token, ML_COMMENT_END, openDelimID)) {
1705 break;
1706 }
1707 commentCount--;
1708 if ( !commentCount ) {
1709 printMaskedToken();
1710 exitState=true;
1711 }
1712 break;
1713 default:
1714 break;
1715 }
1716 } while ( !exitState && !eof );
1717
1718 closeTag ( ML_COMMENT );
1719
1720 if (containedTestCase){
1721 stateTraceCurrent.clear();
1722 }
1723 return eof;
1724 }
1725
1726
1727 bool CodeGenerator::processSingleLineCommentState()
1728 {
1729 State newState=STANDARD;
1730 bool eof=false, exitState=false, containedTestCase=false;
1731 unsigned int startColumn = lineIndex - token.size() ;
1732
1733 openTag ( SL_COMMENT );
1734 do {
1735 printMaskedToken ( newState!=_WS );
1736 newState= getCurrentState(SL_COMMENT);
1737
1738 switch ( newState ) {
1739 case _WS:
1740 processWsState();
1741 break;
1742 case _EOL:
1743 printMaskedToken();
1744 if ( preFormatter.isEnabled() && preFormatter.isWrappedLine ( lineNumber-1 ) ) {
1745 exitState=false;
1746 } else {
1747 exitState=true;
1748 }
1749 if ( !exitState ) wsBuffer += closeTags[SL_COMMENT];
1750 insertLineNumber();
1751 if ( !exitState ) wsBuffer += openTags[SL_COMMENT];
1752
1753 break;
1754 case _EOF:
1755 eof = true;
1756 break;
1757 case _TESTPOS:
1758 runSyntaxTestcases(token=="<" ? startColumn : lineIndex - 1 );
1759 printMaskedToken();
1760 containedTestCase=true;
1761 break;
1762
1763 default:
1764 break;
1765 }
1766 } while ( !exitState && !eof );
1767
1768 closeTag ( SL_COMMENT );
1769
1770 if (containedTestCase) {
1771 stateTraceCurrent.clear();
1772 }
1773
1774 return eof;
1775 }
1776
1777 bool CodeGenerator::processDirectiveState()
1778 {
1779 State newState=STANDARD;
1780 bool eof=false, exitState=false;
1781
1782 openTag ( DIRECTIVE );
1783 do {
1784 printMaskedToken ( newState!=_WS );
1785 newState= getCurrentState(DIRECTIVE);
1786 switch ( newState ) {
1787 case _WS:
1788 processWsState();
1789 break;
1790 case DIRECTIVE_END:
1791 printMaskedToken();
1792 exitState=true;
1793 break;
1794 case _EOL:
1795 printMaskedToken();
1796
1797 if ( preFormatter.isEnabled() && preFormatter.isWrappedLine ( lineNumber-1 ) ) {
1798 exitState=false;
1799 } else {
1800 if (currentSyntax->getContinuationChar()!=0x13){
1801 exitState= ( terminatingChar!=currentSyntax->getContinuationChar() );
1802 }
1803 }
1804 if ( !exitState ) wsBuffer += closeTags[DIRECTIVE];
1805 insertLineNumber();
1806 if ( !exitState ) wsBuffer += openTags[DIRECTIVE];
1807 break;
1808 case ML_COMMENT:
1809 closeTag ( DIRECTIVE );
1810 eof= processMultiLineCommentState();
1811 openTag ( DIRECTIVE );
1812 break;
1813 case SL_COMMENT:
1814 closeTag ( DIRECTIVE );
1815 eof= processSingleLineCommentState();
1816 openTag ( DIRECTIVE );
1817 exitState=true;
1818 break;
1819 case STRING:
1820 closeTag ( DIRECTIVE );
1821 eof=processStringState ( DIRECTIVE );
1822 openTag ( DIRECTIVE );
1823 break;
1824 case _EOF:
1825 eof = true;
1826 break;
1827 default:
1828 break;
1829 }
1830 } while ( !exitState && !eof );
1831
1832 closeTag ( DIRECTIVE );
1833 return eof;
1834 }
1835
1836 bool CodeGenerator::processStringState ( State oldState )
1837 {
1838 State newState=STANDARD;
1839 bool eof=false, exitState=false;
1840 bool returnedFromOtherState=false;
1841
1842 State myState= ( oldState==DIRECTIVE ) ? DIRECTIVE_STRING : STRING;
1843
1844 int openDelimID=currentSyntax->getOpenDelimiterID ( token, myState);
1845 string openDelim=token;
1846
1847 //Raw String by definition:
1848 bool isRawString=currentSyntax->delimiterIsRawString(openDelimID) || toggleDynRawString;
1849
1850 // Test if character before string open delimiter token equals to the
1851 // raw string prefix (Example: r" ", r""" """ in Python)
1852
1853 //Raw String Prefix:
1854 if ( lineIndex>token.length() &&line[lineIndex-token.length()-1]==currentSyntax->getRawStringPrefix() ) {
1855 isRawString=true;
1856 }
1857
1858 openTag ( myState );
1859 do {
1860 // true if last token was an escape char
1861 if ( !returnedFromOtherState ) {
1862 printMaskedToken (newState!=_WS );
1863 }
1864 returnedFromOtherState=false;
1865 newState= getCurrentState(myState);
1866
1867 switch ( newState ) {
1868 case _WS:
1869 processWsState();
1870 break;
1871 case _EOL:
1872 wsBuffer += closeTags[myState];
1873 insertLineNumber();
1874 wsBuffer += openTags[myState];
1875 break;
1876 case STRING_END:
1877 if (resultOfHook || currentSyntax->matchesOpenDelimiter (token, STRING_END, openDelimID)) {
1878 if (currentSyntax->assertDelimEqualLength()) {
1879 exitState= openDelim.length()==token.length();
1880 } else {
1881 exitState= true;
1882 }
1883 printMaskedToken();
1884 }
1885 break;
1886 case STRING:
1887 // if there exist multiple string delimiters, close string if
1888 // current delimiter is equal to the opening delimiter
1889 exitState=currentSyntax->delimiterIsDistinct(currentSyntax->getOpenDelimiterID ( token, STRING )) && token==openDelim;
1890 printMaskedToken();
1891 break;
1892 case ESC_CHAR:
1893 if ( !isRawString ) {
1894 closeTag ( myState );
1895 eof=processEscapeCharState();
1896 openTag ( myState );
1897 returnedFromOtherState=true;
1898 } else {
1899 // FIXME not a fix for Python r"""\"""
1900 exitState=token.size()>1 && token[1] == openDelim[0];
1901 printMaskedToken();
1902 }
1903 break;
1904 case STRING_INTERPOLATION:
1905 closeTag ( myState );
1906 eof=processInterpolationState();
1907 openTag ( myState );
1908 returnedFromOtherState=true;
1909 break;
1910
1911 case _EOF:
1912 eof = true;
1913 break;
1914 default:
1915 printMaskedToken();
1916 break;
1917 }
1918 } while ( !exitState && !eof );
1919
1920 closeTag ( myState );
1921
1922 toggleDynRawString = false;
1923
1924 return eof;
1925 }
1926
1927 bool CodeGenerator::processSymbolState()
1928 {
1929 State newState=STANDARD;
1930 bool eof=false,
1931 exitState=false;
1932
1933 openTag ( SYMBOL );
1934 do {
1935 printMaskedToken ( newState!=_WS );
1936 newState= getCurrentState(SYMBOL);
1937 switch ( newState ) {
1938 case _WS:
1939 processWsState();
1940 exitState=isolateTags;
1941 break;
1942 case _EOL:
1943 insertLineNumber();
1944 exitState=true;
1945 break;
1946 case _EOF:
1947 eof = true;
1948 break;
1949 default:
1950 exitState=newState!=SYMBOL;
1951 break;
1952 }
1953 } while ( !exitState && !eof );
1954
1955 closeTag ( SYMBOL );
1956 return eof;
1957 }
1958
1959 bool CodeGenerator::processSyntaxErrorState()
1960 {
1961 State newState=STANDARD;
1962 bool eof=false,
1963 exitState=false;
1964
1965 openTag ( SYNTAX_ERROR );
1966 do {
1967 printMaskedToken ( newState!=_WS );
1968 newState= getCurrentState(SYNTAX_ERROR);
1969 switch ( newState ) {
1970 case _WS:
1971 processWsState();
1972 exitState=isolateTags;
1973 break;
1974 case _EOL:
1975 insertLineNumber();
1976 exitState=true;
1977 break;
1978 case _EOF:
1979 eof = true;
1980 break;
1981 default:
1982 exitState=newState!=SYMBOL;
1983 break;
1984 }
1985 } while ( !exitState && !eof );
1986
1987 closeTag ( SYNTAX_ERROR );
1988 return eof;
1989 }
1990
1991 bool CodeGenerator::processEscapeCharState()
1992 {
1993 State newState=STANDARD;
1994 bool eof=false, exitState=false;
1995 openTag ( ESC_CHAR );
1996 do {
1997 printMaskedToken (newState!=_WS );
1998 newState= getCurrentState(ESC_CHAR);
1999 switch ( newState ) {
2000 case _EOL:
2001 insertLineNumber();
2002 exitState=true;
2003 break;
2004 case _WS:
2005 processWsState();
2006 exitState=isolateTags;
2007 break;
2008 case _EOF:
2009 eof = true;
2010 break;
2011 default:
2012 exitState=newState!=ESC_CHAR;
2013 break;
2014 }
2015 } while ( !exitState && !eof );
2016
2017 closeTag ( ESC_CHAR );
2018 return eof;
2019 }
2020
2021 bool CodeGenerator::processInterpolationState()
2022 {
2023 State newState=STANDARD;
2024 bool eof=false, exitState=false;
2025 openTag ( STRING_INTERPOLATION );
2026 do {
2027 printMaskedToken (newState!=_WS );
2028 newState= getCurrentState(STRING_INTERPOLATION);
2029 switch ( newState ) {
2030 case _EOL:
2031 insertLineNumber();
2032 exitState=true;
2033 break;
2034 case _WS:
2035 processWsState();
2036 exitState=isolateTags;
2037 break;
2038 case _EOF:
2039 eof = true;
2040 break;
2041 default:
2042 exitState=newState!=STRING_INTERPOLATION;
2043 break;
2044 }
2045 } while ( !exitState && !eof );
2046
2047 closeTag ( STRING_INTERPOLATION );
2048 return eof;
2049 }
2050
2051 void CodeGenerator::processWsState()
2052 {
2053
2054 if ( !maskWs ) {
2055 wsBuffer += token;
2056 token.clear();
2057 return;
2058 }
2059
2060 flushWs(6);
2061
2062 int cntWs=0;
2063 lineIndex--;
2064 PositionState ps(currentState, 0, true);
2065
2066 while ( line[lineIndex]==' ' || line[lineIndex]=='\t' ) {
2067 ++cntWs;
2068 ++lineIndex;
2069 }
2070
2071 if ( cntWs>1 ) {
2072
2073 unsigned int styleID=getStyleID ( currentState, currentKeywordClass );
2074 if ( excludeWs && styleID!=_UNKNOWN ) {
2075 *out << closeTags[styleID];
2076 }
2077
2078 *out << maskWsBegin;
2079 for ( int i=0; i<cntWs; i++ ) {
2080 *out << spacer;
2081 if (applySyntaxTestCase){
2082 stateTraceCurrent.push_back(ps);
2083 }
2084 }
2085 *out << maskWsEnd;
2086 if ( excludeWs && styleID!=_UNKNOWN ) {
2087 *out << openTags[styleID];
2088 }
2089 } else {
2090
2091 *out << spacer; //Bugfix fehlender Space nach Strings
2092 if (applySyntaxTestCase){
2093 stateTraceCurrent.push_back(ps);
2094 }
2095 }
2096
2097 spacer = initialSpacer;
2098
2099 token.clear();
2100 }
2101
2102 void CodeGenerator::flushWs(int arg)
2103 {
2104 PositionState ps(currentState, 0, true);
2105 //workaround condition
2106 for ( size_t i=0; i<wsBuffer.size() && ((arg > 3) || ( (arg<4) && lineIndex>1)) && applySyntaxTestCase ; i++ ) {
2107 stateTraceCurrent.push_back(ps);
2108 //std::cerr <<"\nflush >"<<wsBuffer<<"< arg:"<<arg;
2109 }
2110
2111 //fix canvas whitespace
2112 if (wsBuffer.length() && (outputType==ESC_XTERM256 || outputType==ESC_TRUECOLOR) ){
2113 *out<<maskWsBegin;
2114 }
2115
2116 *out << wsBuffer;
2117 wsBuffer.clear();
2118 }
2119
2120 string CodeGenerator::getTestcaseName(State s, unsigned int kwClass) {
2121 switch (s) {
2122
2123 case STANDARD:
2124 return STY_NAME_STD;
2125 case STRING:
2126 return STY_NAME_STR;
2127 case NUMBER:
2128 return STY_NAME_NUM;
2129 case SL_COMMENT:
2130 return STY_NAME_SLC;
2131 case ML_COMMENT:
2132 return STY_NAME_COM;
2133 case ESC_CHAR:
2134 return STY_NAME_ESC;
2135 case DIRECTIVE:
2136 return STY_NAME_DIR;
2137 case DIRECTIVE_STRING:
2138 return STY_NAME_DST;
2139 case SYMBOL:
2140 return STY_NAME_SYM;
2141 case STRING_INTERPOLATION:
2142 return STY_NAME_IPL;
2143 case SYNTAX_ERROR:
2144 return STY_NAME_ERR;
2145 case _WS:
2146 return "ws";
2147 case KEYWORD: {
2148
2149 if (!kwClass)
2150 return "ws";
2151
2152 char kwName[20] = {0};
2153 snprintf(kwName, sizeof(kwName), "keyword %c", ('a'+kwClass-1));
2154
2155 return string(kwName);
2156 }
2157 default:
2158 return "unknown_test";
2159 }
2160 }
2161
2162 void CodeGenerator::printTrace(const string &s){
2163 std::cout<<"\n curr "<<lineNumber<<" "<<s<<": ";
2164 for (unsigned int i=0; i< stateTraceCurrent.size(); i++) {
2165 std::cout<<" "<<stateTraceCurrent[i].state;
2166 }
2167 std::cout<<"\n test "<<lineNumber<<" "<<s<<": ";
2168 for (unsigned int i=0; i< stateTraceTest.size(); i++) {
2169 std::cout<<" "<<stateTraceTest[i].state;
2170 }
2171 std::cout<<"\n";
2172 }
2173
2174 //column: lineIndex (not a UTF-8 validated string position)
2175 void CodeGenerator::runSyntaxTestcases(unsigned int column){
2176
2177 if (encoding=="utf-8")
2178 column = StringTools::utf8_strlen(line.substr(0, column));
2179
2180 unsigned int assertGroup=0;
2181 size_t typeDescPos=line.find_first_not_of("\t ^", lineIndex);
2182 State assertState=_UNKNOWN;
2183 bool negation=false;
2184 bool testFailed=false;
2185
2186 ostringstream errMsg;
2187 string prefix;
2188 //printTrace("trace 2");
2189
2190 if (typeDescPos!=string::npos) {
2191
2192 if (line[typeDescPos]=='~') {
2193
2194 negation=true;
2195 prefix="~";
2196 ++typeDescPos;
2197 }
2198
2199 if (line.find(STY_NAME_NUM, typeDescPos)==typeDescPos)
2200 assertState=NUMBER;
2201 //TODO temp. fix to allow old and new string classes
2202 else if (line.find(STY_NAME_STR, typeDescPos)==typeDescPos || line.find("str", typeDescPos)==typeDescPos)
2203 assertState=STRING;
2204 else if (line.find(STY_NAME_ESC, typeDescPos)==typeDescPos)
2205 assertState=ESC_CHAR;
2206 else if (line.find(STY_NAME_IPL, typeDescPos)==typeDescPos)
2207 assertState=STRING_INTERPOLATION;
2208 else if (line.find(STY_NAME_SYM, typeDescPos)==typeDescPos)
2209 assertState=SYMBOL;
2210 else if (line.find(STY_NAME_DIR, typeDescPos)==typeDescPos)
2211 assertState=DIRECTIVE;
2212 else if (line.find(STY_NAME_SLC, typeDescPos)==typeDescPos)
2213 assertState=SL_COMMENT;
2214 else if (line.find(STY_NAME_COM, typeDescPos)==typeDescPos)
2215 assertState=ML_COMMENT;
2216 else if (line.find("ws", typeDescPos)==typeDescPos)
2217 assertState=_WS;
2218 //TODO temp. fix to allow old and new default classes
2219 else if (line.find(STY_NAME_STD, typeDescPos)==typeDescPos || line.find("std", typeDescPos)==typeDescPos)
2220 assertState=STANDARD;
2221 else if (line.find(STY_NAME_DST, typeDescPos)==typeDescPos)
2222 assertState=DIRECTIVE_STRING;
2223
2224 else if (line.find("kw", typeDescPos)==typeDescPos || line.find("st", typeDescPos)==typeDescPos) {
2225 assertState=KEYWORD;
2226 if (isalpha(line[typeDescPos+2]))
2227 assertGroup=line[typeDescPos+2] - 'a' +1;
2228 }
2229
2230 if ( (assertState!=_WS && stateTraceTest[column].state != assertState && !stateTraceTest[column].isWhiteSpace )
2231 || (assertState==_WS && !stateTraceTest[column].isWhiteSpace)
2232 || assertGroup != stateTraceTest[column].kwClass) {
2233
2234 testFailed=!negation;
2235
2236 } else if (negation ) {
2237
2238 //TODO Fix ~ws
2239 if (assertState!=_WS && !stateTraceTest[column].isWhiteSpace )
2240 testFailed=true;
2241 }
2242
2243 if (testFailed) {
2244 errMsg << inFile << " line " << lineNumber << ", column "<< column
2245 << ": got " << getTestcaseName(stateTraceTest[column].state, stateTraceTest[column].kwClass)
2246 << " instead of " << prefix << getTestcaseName(assertState, assertGroup);
2247
2248 failedPosTests.push_back(errMsg.str());
2249 }
2250
2251 }
2252
2253 lineContainedTestCase=true;
2254 }
2255
2256 string CodeGenerator::getNewLine()
2257 {
2258 ostringstream ss;
2259 printSyntaxError(ss);
2260 if (printNewLines)
2261 ss << newLineTag;
2262 return ss.str();
2263 }
2264
2265 Diluculum::LuaValueList CodeGenerator::callDecorateLineFct(bool isLineStart)
2266 {
2267
2268 Diluculum::LuaValueList params;
2269 params.emplace_back(lineNumber);
2270
2271 return currentSyntax->getLuaState()->call ( isLineStart ?
2272 *currentSyntax->getDecorateLineBeginFct(): *currentSyntax->getDecorateLineEndFct(),
2273 params,"getDecorateLineFct call");
2274 }
2275
2276 void CodeGenerator::setOverrideParams() {
2277 if (currentSyntax->requiresParamUpdate()) {
2278 if ( currentSyntax->getOverrideConfigVal("state.string.raw")=="true"){
2279 toggleDynRawString=true; // reset to false in string state fct
2280 }
2281 if ( currentSyntax->getOverrideConfigVal("format.maskws")=="true") {
2282 maskWs=true;
2283 }
2284 if ( currentSyntax->getOverrideConfigVal("format.spacer").size()) {
2285 spacer=currentSyntax->getOverrideConfigVal("format.spacer");
2286 }
2287 }
2288 }
2289
2290 void CodeGenerator::insertLineNumber ( bool insertNewLine )
2291 {
2292 if ( insertNewLine ) {
2293 if (currentSyntax->getDecorateLineEndFct()) {
2294 Diluculum::LuaValueList res=callDecorateLineFct(false);
2295 if (res.size()==1) {
2296 setOverrideParams();
2297 wsBuffer +=res[0].asString();
2298 }
2299 }
2300 wsBuffer += getNewLine();
2301 }
2302
2303 if (currentSyntax->getDecorateLineBeginFct()) {
2304 Diluculum::LuaValueList res=callDecorateLineFct(true);
2305 if (res.size()==1) {
2306 setOverrideParams();
2307 wsBuffer += res[0].asString();
2308 }
2309 }
2310
2311 if ( showLineNumbers ) {
2312 ostringstream os;
2313 ostringstream numberPrefix;
2314
2315 os << setw ( getLineNumberWidth() ) << right;
2316 if( numberCurrentLine ) {
2317 if ( lineNumberFillZeroes ) {
2318 os.fill ( '0' );
2319 }
2320 os << lineNumber+lineNumberOffset;
2321 } else {
2322 os << "";
2323 }
2324
2325 numberPrefix << openTags[LINENUMBER];
2326 maskString ( numberPrefix, os.str() );
2327
2328 //use initialSpacer here, spacer can be overridden by plug-in (format.spacer)
2329 numberPrefix << initialSpacer << closeTags[LINENUMBER];
2330 wsBuffer += numberPrefix.str();
2331 }
2332 }
2333
2334 unsigned int CodeGenerator::getLineIndex()
2335 {
2336 return lineIndex;
2337 }
2338 unsigned int CodeGenerator::getLastLineLength()
2339 {
2340 return lastLineLength;
2341 }
2342
2343 bool CodeGenerator::requiresTwoPassParsing() const {
2344 if (!currentSyntax) return false;
2345 return currentSyntax->getPersistentSnippetsNum()>0;
2346 }
2347
2348
2349 bool CodeGenerator::printExternalStyle ( const string &outFile )
2350 {
2351 if ( !includeStyleDef ) {
2352 ostream *cssOutFile = ( outFile.empty() ? &cout :new ofstream ( outFile.c_str() ) );
2353 if ( !cssOutFile->fail() ) {
2354 if (!omitVersionComment) {
2355 *cssOutFile << styleCommentOpen
2356 <<" Style definition file generated by highlight "
2357 << Info::getVersion() << ", " << Info::getWebsite()
2358 << " " << styleCommentClose << "\n";
2359 }
2360 *cssOutFile << getStyleDefinition()
2361 << "\n";
2362 *cssOutFile << readUserStyleDef();
2363 if ( !outFile.empty() ) delete cssOutFile;
2364 } else {
2365 return false;
2366 }
2367 }
2368 return true;
2369 }
2370
2371 bool CodeGenerator::printPersistentState ( const string &outFile )
2372 {
2373 if (!currentSyntax) return false;
2374
2375 ofstream pluginOutFile( outFile.c_str());
2376 if ( !pluginOutFile.fail() ) {
2377
2378 pluginOutFile <<"Description=\"Plugin generated by highlight using the --two-pass option\"\n\n"
2379 <<"Categories = {\"two-pass\" }\n\n"
2380 <<"function syntaxUpdate(desc)\n\n";
2381
2382 pluginOutFile << currentSyntax->getPersistentHookConditions();
2383
2384 for (auto snippet: currentSyntax->getPersistentSnippets())
2385 {
2386 pluginOutFile << snippet <<"\n\n";
2387 }
2388
2389 pluginOutFile<<"end\n\n"
2390 <<"Plugins={\n"
2391 <<" { Type=\"lang\", Chunk=syntaxUpdate }\n"
2392 <<"}\n";
2393 } else {
2394 return false;
2395 }
2396
2397 return true;
2398 }
2399
2400 string CodeGenerator::readUserStyleDef()
2401 {
2402 ostringstream ostr;
2403 if ( !styleInputPath.empty() ) {
2404 ifstream userStyleDef ( styleInputPath.c_str() );
2405 if ( userStyleDef ) {
2406 ostr << "\n" << styleCommentOpen
2407 << " Content of " << styleInputPath
2408 << ": " <<styleCommentClose << "\n";
2409 string line;
2410 while ( getline ( userStyleDef, line ) ) {
2411 ostr << line << "\n";
2412 }
2413 userStyleDef.close();
2414 } else {
2415 ostr << styleCommentOpen
2416 << " ERROR: Could not include " << styleInputPath
2417 << "." << styleCommentClose << "\n";
2418 }
2419 }
2420
2421 string injections=docStyle.getInjections();
2422 if (!injections.empty()) {
2423 ostr << "\n" << styleCommentOpen
2424 << " Plug-in theme injections: " <<styleCommentClose << "\n";
2425 ostr << injections<<"\n";
2426 }
2427 return ostr.str();
2428 }
2429
2430 bool CodeGenerator::initPluginScript(const string& script)
2431 {
2432
2433 if (script.empty()) return true;
2434
2435 try {
2436
2437 userScriptError="";
2438 Diluculum::LuaState ls;
2439
2440 ls.doFile (script);
2441 int listIdx=1;
2442
2443 while (ls["Plugins"][listIdx].value() !=Diluculum::Nil) {
2444
2445 // Theme plugins
2446 if (ls["Plugins"][listIdx]["Type"].value().asString()=="theme") {
2447 if (ls["Plugins"][listIdx]["Chunk"].value().type()==LUA_TFUNCTION) {
2448 docStyle.addUserChunk(ls["Plugins"][listIdx]["Chunk"].value().asFunction());
2449 }
2450 }
2451 // Syntax plugins
2452 else if (ls["Plugins"][listIdx]["Type"].value().asString()=="lang") {
2453 if (ls["Plugins"][listIdx]["Chunk"].value().type()==LUA_TFUNCTION) {
2454 currentSyntax->addUserChunk(ls["Plugins"][listIdx]["Chunk"].value().asFunction());
2455 }
2456 }
2457 // Format plugins
2458 else if (ls["Plugins"][listIdx]["Type"].value().asString()=="format") {
2459 if (ls["Plugins"][listIdx]["Chunk"].value().type()==LUA_TFUNCTION) {
2460 addUserChunk(ls["Plugins"][listIdx]["Chunk"].value().asFunction());
2461 }
2462 }
2463
2464 listIdx++;
2465 }
2466 } catch (Diluculum::LuaError &err) {
2467 userScriptError=err.what();
2468 return false;
2469 }
2470 return true;
2471 }
2472
2473 void CodeGenerator::resetSyntaxReaders() {
2474 for ( auto it=syntaxReaders.begin(); it!=syntaxReaders.end(); it++ ) {
2475 delete it->second;
2476 }
2477 currentSyntax=nullptr;
2478 syntaxReaders.clear();
2479 }
2480
2481 bool CodeGenerator::syntaxRequiresTwoPassRun() {
2482 if (!currentSyntax) return false;
2483 return currentSyntax->requiresTwoPassRun();
2484 }
2485
2486 void CodeGenerator::clearPersistentSnippets(){
2487 if (currentSyntax) {
2488 currentSyntax->clearPersistentSnippets();
2489 }
2490 }
2491
2492 void CodeGenerator::updateKeywordClasses(){
2493
2494 if (openTags.size()) {
2495 if ( openTags.size() >NUMBER_BUILTIN_STATES ) {
2496 // remove dynamic keyword tag delimiters of the old language definition
2497 auto keyStyleOpenBegin =
2498 openTags.begin() + NUMBER_BUILTIN_STATES;
2499 auto keyStyleCloseBegin =
2500 closeTags.begin() + NUMBER_BUILTIN_STATES;
2501 openTags.erase ( keyStyleOpenBegin, openTags.end() );
2502 closeTags.erase ( keyStyleCloseBegin, closeTags.end() );
2503 }
2504 // add new keyword tag delimiters
2505
2506 for ( unsigned int i=0; i< currentSyntax->getKeywordClasses().size(); i++ ) {
2507 openTags.push_back ( getKeywordOpenTag ( i ) );
2508 closeTags.push_back ( getKeywordCloseTag ( i ) );
2509 }
2510 }
2511 }
2512
2513
2514 }