first commit

This commit is contained in:
s.golasch
2023-08-01 13:49:46 +02:00
commit 1fc239fd54
20238 changed files with 3112246 additions and 0 deletions

View File

@@ -0,0 +1,80 @@
'use strict';
exports.assign = function (tokenizer) {
//NOTE: obtain Tokenizer proto this way to avoid module circular references
var tokenizerProto = Object.getPrototypeOf(tokenizer);
tokenizer.tokenStartLoc = -1;
//NOTE: add location info builder method
tokenizer._attachLocationInfo = function (token) {
token.location = {
start: this.tokenStartLoc,
end: -1
};
};
//NOTE: patch token creation methods and attach location objects
tokenizer._createStartTagToken = function (tagNameFirstCh) {
tokenizerProto._createStartTagToken.call(this, tagNameFirstCh);
this._attachLocationInfo(this.currentToken);
};
tokenizer._createEndTagToken = function (tagNameFirstCh) {
tokenizerProto._createEndTagToken.call(this, tagNameFirstCh);
this._attachLocationInfo(this.currentToken);
};
tokenizer._createCommentToken = function () {
tokenizerProto._createCommentToken.call(this);
this._attachLocationInfo(this.currentToken);
};
tokenizer._createDoctypeToken = function (doctypeNameFirstCh) {
tokenizerProto._createDoctypeToken.call(this, doctypeNameFirstCh);
this._attachLocationInfo(this.currentToken);
};
tokenizer._createCharacterToken = function (type, ch) {
tokenizerProto._createCharacterToken.call(this, type, ch);
this._attachLocationInfo(this.currentCharacterToken);
};
//NOTE: patch token emission methods to determine end location
tokenizer._emitCurrentToken = function () {
//NOTE: if we have pending character token make it's end location equal to the
//current token's start location.
if (this.currentCharacterToken)
this.currentCharacterToken.location.end = this.currentToken.location.start;
this.currentToken.location.end = this.preprocessor.pos + 1;
tokenizerProto._emitCurrentToken.call(this);
};
tokenizer._emitCurrentCharacterToken = function () {
//NOTE: if we have character token and it's location wasn't set in the _emitCurrentToken(),
//then set it's location at the current preprocessor position
if (this.currentCharacterToken && this.currentCharacterToken.location.end === -1) {
//NOTE: we don't need to increment preprocessor position, since character token
//emission is always forced by the start of the next character token here.
//So, we already have advanced position.
this.currentCharacterToken.location.end = this.preprocessor.pos;
}
tokenizerProto._emitCurrentCharacterToken.call(this);
};
//NOTE: patch initial states for each mode to obtain token start position
Object.keys(tokenizerProto.MODE)
.map(function (modeName) {
return tokenizerProto.MODE[modeName];
})
.forEach(function (state) {
tokenizer[state] = function (cp) {
this.tokenStartLoc = this.preprocessor.pos;
tokenizerProto[state].call(this, cp);
};
});
};

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,115 @@
'use strict';
var UNICODE = require('../common/unicode');
//Aliases
var $ = UNICODE.CODE_POINTS;
//Utils
//OPTIMIZATION: these utility functions should not be moved out of this module. V8 Crankshaft will not inline
//this functions if they will be situated in another module due to context switch.
//Always perform inlining check before modifying this functions ('node --trace-inlining').
function isReservedCodePoint(cp) {
return cp >= 0xD800 && cp <= 0xDFFF || cp > 0x10FFFF;
}
function isSurrogatePair(cp1, cp2) {
return cp1 >= 0xD800 && cp1 <= 0xDBFF && cp2 >= 0xDC00 && cp2 <= 0xDFFF;
}
function getSurrogatePairCodePoint(cp1, cp2) {
return (cp1 - 0xD800) * 0x400 + 0x2400 + cp2;
}
//Preprocessor
//NOTE: HTML input preprocessing
//(see: http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#preprocessing-the-input-stream)
var Preprocessor = module.exports = function (html) {
this.write(html);
//NOTE: one leading U+FEFF BYTE ORDER MARK character must be ignored if any are present in the input stream.
this.pos = this.html.charCodeAt(0) === $.BOM ? 0 : -1;
this.gapStack = [];
this.lastGapPos = -1;
this.skipNextNewLine = false;
};
Preprocessor.prototype.write = function (html) {
if (this.html) {
this.html = this.html.substring(0, this.pos + 1) +
html +
this.html.substring(this.pos + 1, this.html.length);
}
else
this.html = html;
this.lastCharPos = this.html.length - 1;
};
Preprocessor.prototype.advanceAndPeekCodePoint = function () {
this.pos++;
if (this.pos > this.lastCharPos)
return $.EOF;
var cp = this.html.charCodeAt(this.pos);
//NOTE: any U+000A LINE FEED (LF) characters that immediately follow a U+000D CARRIAGE RETURN (CR) character
//must be ignored.
if (this.skipNextNewLine && cp === $.LINE_FEED) {
this.skipNextNewLine = false;
this._addGap();
return this.advanceAndPeekCodePoint();
}
//NOTE: all U+000D CARRIAGE RETURN (CR) characters must be converted to U+000A LINE FEED (LF) characters
if (cp === $.CARRIAGE_RETURN) {
this.skipNextNewLine = true;
return $.LINE_FEED;
}
this.skipNextNewLine = false;
//OPTIMIZATION: first perform check if the code point in the allowed range that covers most common
//HTML input (e.g. ASCII codes) to avoid performance-cost operations for high-range code points.
return cp >= 0xD800 ? this._processHighRangeCodePoint(cp) : cp;
};
Preprocessor.prototype._processHighRangeCodePoint = function (cp) {
//NOTE: try to peek a surrogate pair
if (this.pos !== this.lastCharPos) {
var nextCp = this.html.charCodeAt(this.pos + 1);
if (isSurrogatePair(cp, nextCp)) {
//NOTE: we have a surrogate pair. Peek pair character and recalculate code point.
this.pos++;
cp = getSurrogatePairCodePoint(cp, nextCp);
//NOTE: add gap that should be avoided during retreat
this._addGap();
}
}
if (isReservedCodePoint(cp))
cp = $.REPLACEMENT_CHARACTER;
return cp;
};
Preprocessor.prototype._addGap = function () {
this.gapStack.push(this.lastGapPos);
this.lastGapPos = this.pos;
};
Preprocessor.prototype.retreat = function () {
if (this.pos === this.lastGapPos) {
this.lastGapPos = this.gapStack.pop();
this.pos--;
}
this.pos--;
};

2317
build/node_modules/parse5/lib/tokenization/tokenizer.js generated vendored Normal file

File diff suppressed because it is too large Load Diff