Proof of concept

ang-zeyu · ang-zeyu · commit 3f1e5dcca4a8 · 2020-02-15T23:30:18.000+08:00
diff --git a/src/lib/markbind/src/patches/htmlparser2.js b/src/lib/markbind/src/patches/htmlparser2.js
@@ -101,6 +101,10 @@ var i = 0,
     SPECIAL_SCRIPT            = j++,
     SPECIAL_STYLE             = j++;
 
+function whitespace(c) {
+	return c === " " || c === "\n" || c === "\t" || c === "\f" || c === "\r";
+}
+
 Tokenizer.prototype._stateMarkdown = function(c){
  	if(c === '`') {
  		this._state = TEXT;
@@ -129,6 +133,152 @@ Tokenizer.prototype._stateText = function(c){
 	}
 };
 
+
+// Tags will be provided by plugins
+const specialTagNames = [
+	'abc',
+	'cab',
+	'script',
+	'style',
+];
+
+Tokenizer.prototype.isFirstCharacterSpecialTagCharacter = function(c) {
+	this._specialFunctions = specialTagNames
+		.map((str, index) => ({
+			index,
+			nextTestIndex: 0,
+		}))
+		.filter(indexObj => c.toLowerCase() === specialTagNames[indexObj.index][indexObj.nextTestIndex])
+		.map(indexObj => ({
+			index: indexObj.index,
+			nextTestIndex: indexObj.nextTestIndex + 1,
+			hasFinishedMatching: specialTagNames[indexObj.index][indexObj.nextTestIndex + 1] === undefined,
+		}));
+
+	return this._specialFunctions.length > 0;
+};
+
+Tokenizer.prototype.processSpecialFunctions = function(c) {
+	let matchIndex;
+
+	this._specialFunctions = this._specialFunctions
+		.filter((indexObj) => {
+			if (indexObj.hasFinishedMatching) {
+				matchIndex = indexObj.index;
+
+				return c === "/" || c === ">" || whitespace(c);
+			}
+
+			return c.toLowerCase() === specialTagNames[indexObj.index][indexObj.nextTestIndex]
+		})
+		.map(indexObj => ({
+				index: indexObj.index,
+				nextTestIndex: indexObj.nextTestIndex + 1,
+				hasFinishedMatching: specialTagNames[indexObj.index][indexObj.nextTestIndex + 1] === undefined,
+			}));
+
+	return {
+		matchIndex,
+		hasMatching: this._specialFunctions.length > 0,
+	};
+};
+
+Tokenizer.prototype._stateBeforeTagName = function(c) {
+	if (c === "/") {
+		this._state = BEFORE_CLOSING_TAG_NAME;
+	} else if (c === "<") {
+		this._cbs.ontext(this._getSection());
+		this._sectionStart = this._index;
+	} else if (c === ">" || this._special !== SPECIAL_NONE || whitespace(c)) {
+		this._state = TEXT;
+	} else if (c === "!") {
+		this._state = BEFORE_DECLARATION;
+		this._sectionStart = this._index + 1;
+	} else if (c === "?") {
+		this._state = IN_PROCESSING_INSTRUCTION;
+		this._sectionStart = this._index + 1;
+	} else {
+		this._state = !this._xmlMode && this.isFirstCharacterSpecialTagCharacter(c)
+			? BEFORE_SPECIAL
+			: IN_TAG_NAME;
+		this._sectionStart = this._index;
+	}
+};
+
+
+Tokenizer.prototype._stateBeforeSpecial = function(c) {
+	const result = this.processSpecialFunctions(c);
+	if (!result.matchIndex && result.hasMatching) {
+		this._state = BEFORE_SPECIAL;
+		return;
+	}
+
+	if (result.matchIndex) {
+		this._special = result.matchIndex;
+		this._nextSpecialClosingTagMatchIndex = 0;
+	}
+	this._state = IN_TAG_NAME;
+	this._index--; //consume the token again
+};
+
+Tokenizer.prototype.processSpecialClosingTagCharacter = function(c) {
+	let finishedMatching = false;
+	let matchNext = false;
+
+	if (specialTagNames[this._special][this._nextSpecialClosingTagMatchIndex] === undefined) {
+		this._nextSpecialClosingTagMatchIndex = 0;
+		finishedMatching = c === ">" || whitespace(c);
+	} else if (specialTagNames[this._special][this._nextSpecialClosingTagMatchIndex] === c.toLowerCase()) {
+		this._nextSpecialClosingTagMatchIndex += 1;
+		matchNext = true;
+	} else {
+		// reset
+		this._nextSpecialClosingTagMatchIndex = 0;
+	}
+
+	return {
+		finishedMatching,
+		matchNext,
+	};
+};
+
+Tokenizer.prototype._stateBeforeCloseingTagName = function(c) {
+	if (whitespace(c));
+	else if (c === ">") {
+		this._state = TEXT;
+	} else if (this._special !== SPECIAL_NONE) {
+		if (this.processSpecialClosingTagCharacter(c)) {
+			this._state = BEFORE_SPECIAL_END;
+		} else {
+			this._state = TEXT;
+			this._index--;
+		}
+	} else {
+		this._state = IN_CLOSING_TAG_NAME;
+		this._sectionStart = this._index;
+	}
+};
+
+Tokenizer.prototype._stateBeforeSpecialEnd = function(c) {
+	const result = this.processSpecialClosingTagCharacter(c);
+	if (result.matchNext) {
+		this._state = BEFORE_SPECIAL_END;
+		return;
+	}
+
+	if (result.finishedMatching) {
+		this._sectionStart = this._index - specialTagNames[this._special].length;
+		this._special = SPECIAL_NONE;
+		this._state = IN_CLOSING_TAG_NAME;
+		this._index--; //reconsume the token
+		return;
+	}
+
+	this._index--;
+	this._state = TEXT;
+};
+
+
 Tokenizer.prototype._parse = function(){
 	while(this._index < this._buffer.length && this._running){
 		var c = this._buffer.charAt(this._index);