summaryrefslogtreecommitdiff
path: root/bin/wiki/ImportarDesdeURL/node_modules/parse5/lib/tokenizer/preprocessor.js
diff options
context:
space:
mode:
Diffstat (limited to 'bin/wiki/ImportarDesdeURL/node_modules/parse5/lib/tokenizer/preprocessor.js')
-rw-r--r--bin/wiki/ImportarDesdeURL/node_modules/parse5/lib/tokenizer/preprocessor.js147
1 files changed, 147 insertions, 0 deletions
diff --git a/bin/wiki/ImportarDesdeURL/node_modules/parse5/lib/tokenizer/preprocessor.js b/bin/wiki/ImportarDesdeURL/node_modules/parse5/lib/tokenizer/preprocessor.js
new file mode 100644
index 00000000..715810ca
--- /dev/null
+++ b/bin/wiki/ImportarDesdeURL/node_modules/parse5/lib/tokenizer/preprocessor.js
@@ -0,0 +1,147 @@
+'use strict';
+
+var UNICODE = require('../common/unicode');
+
+//Aliases
+var $ = UNICODE.CODE_POINTS;
+
+//Utils
+
+//OPTIMIZATION: these utility functions should not be moved out of this module. V8 Crankshaft will not inline
+//this functions if they will be situated in another module due to context switch.
+//Always perform inlining check before modifying this functions ('node --trace-inlining').
+function isSurrogatePair(cp1, cp2) {
+ return cp1 >= 0xD800 && cp1 <= 0xDBFF && cp2 >= 0xDC00 && cp2 <= 0xDFFF;
+}
+
+function getSurrogatePairCodePoint(cp1, cp2) {
+ return (cp1 - 0xD800) * 0x400 + 0x2400 + cp2;
+}
+
+
+//Const
+var DEFAULT_BUFFER_WATERLINE = 1 << 16;
+
+
+//Preprocessor
+//NOTE: HTML input preprocessing
+//(see: http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#preprocessing-the-input-stream)
+var Preprocessor = module.exports = function () {
+ this.html = null;
+
+ this.pos = -1;
+ this.lastGapPos = -1;
+ this.lastCharPos = -1;
+
+ this.gapStack = [];
+
+ this.skipNextNewLine = false;
+
+ this.lastChunkWritten = false;
+ this.endOfChunkHit = false;
+ this.bufferWaterline = DEFAULT_BUFFER_WATERLINE;
+};
+
+Preprocessor.prototype.dropParsedChunk = function () {
+ if (this.pos > this.bufferWaterline) {
+ this.lastCharPos -= this.pos;
+ this.html = this.html.substring(this.pos);
+ this.pos = 0;
+ this.lastGapPos = -1;
+ this.gapStack = [];
+ }
+};
+
+Preprocessor.prototype._addGap = function () {
+ this.gapStack.push(this.lastGapPos);
+ this.lastGapPos = this.pos;
+};
+
+Preprocessor.prototype._processHighRangeCodePoint = function (cp) {
+ //NOTE: try to peek a surrogate pair
+ if (this.pos !== this.lastCharPos) {
+ var nextCp = this.html.charCodeAt(this.pos + 1);
+
+ if (isSurrogatePair(cp, nextCp)) {
+ //NOTE: we have a surrogate pair. Peek pair character and recalculate code point.
+ this.pos++;
+ cp = getSurrogatePairCodePoint(cp, nextCp);
+
+ //NOTE: add gap that should be avoided during retreat
+ this._addGap();
+ }
+ }
+
+ // NOTE: we've hit the end of chunk, stop processing at this point
+ else if (!this.lastChunkWritten) {
+ this.endOfChunkHit = true;
+ return $.EOF;
+ }
+
+ return cp;
+};
+
+Preprocessor.prototype.write = function (chunk, isLastChunk) {
+ if (this.html)
+ this.html += chunk;
+
+ else
+ this.html = chunk;
+
+ this.lastCharPos = this.html.length - 1;
+ this.endOfChunkHit = false;
+ this.lastChunkWritten = isLastChunk;
+};
+
+Preprocessor.prototype.insertHtmlAtCurrentPos = function (chunk) {
+ this.html = this.html.substring(0, this.pos + 1) +
+ chunk +
+ this.html.substring(this.pos + 1, this.html.length);
+
+ this.lastCharPos = this.html.length - 1;
+ this.endOfChunkHit = false;
+};
+
+
+Preprocessor.prototype.advance = function () {
+ this.pos++;
+
+ if (this.pos > this.lastCharPos) {
+ if (!this.lastChunkWritten)
+ this.endOfChunkHit = true;
+
+ return $.EOF;
+ }
+
+ var cp = this.html.charCodeAt(this.pos);
+
+ //NOTE: any U+000A LINE FEED (LF) characters that immediately follow a U+000D CARRIAGE RETURN (CR) character
+ //must be ignored.
+ if (this.skipNextNewLine && cp === $.LINE_FEED) {
+ this.skipNextNewLine = false;
+ this._addGap();
+ return this.advance();
+ }
+
+ //NOTE: all U+000D CARRIAGE RETURN (CR) characters must be converted to U+000A LINE FEED (LF) characters
+ if (cp === $.CARRIAGE_RETURN) {
+ this.skipNextNewLine = true;
+ return $.LINE_FEED;
+ }
+
+ this.skipNextNewLine = false;
+
+ //OPTIMIZATION: first perform check if the code point in the allowed range that covers most common
+ //HTML input (e.g. ASCII codes) to avoid performance-cost operations for high-range code points.
+ return cp >= 0xD800 ? this._processHighRangeCodePoint(cp) : cp;
+};
+
+Preprocessor.prototype.retreat = function () {
+ if (this.pos === this.lastGapPos) {
+ this.lastGapPos = this.gapStack.pop();
+ this.pos--;
+ }
+
+ this.pos--;
+};
+