diff options
Diffstat (limited to 'bin/wiki/ImportarDesdeURL/node_modules/parse5/lib/tokenizer/preprocessor.js')
-rw-r--r-- | bin/wiki/ImportarDesdeURL/node_modules/parse5/lib/tokenizer/preprocessor.js | 147 |
1 files changed, 147 insertions, 0 deletions
diff --git a/bin/wiki/ImportarDesdeURL/node_modules/parse5/lib/tokenizer/preprocessor.js b/bin/wiki/ImportarDesdeURL/node_modules/parse5/lib/tokenizer/preprocessor.js new file mode 100644 index 00000000..715810ca --- /dev/null +++ b/bin/wiki/ImportarDesdeURL/node_modules/parse5/lib/tokenizer/preprocessor.js @@ -0,0 +1,147 @@ +'use strict'; + +var UNICODE = require('../common/unicode'); + +//Aliases +var $ = UNICODE.CODE_POINTS; + +//Utils + +//OPTIMIZATION: these utility functions should not be moved out of this module. V8 Crankshaft will not inline +//this functions if they will be situated in another module due to context switch. +//Always perform inlining check before modifying this functions ('node --trace-inlining'). +function isSurrogatePair(cp1, cp2) { + return cp1 >= 0xD800 && cp1 <= 0xDBFF && cp2 >= 0xDC00 && cp2 <= 0xDFFF; +} + +function getSurrogatePairCodePoint(cp1, cp2) { + return (cp1 - 0xD800) * 0x400 + 0x2400 + cp2; +} + + +//Const +var DEFAULT_BUFFER_WATERLINE = 1 << 16; + + +//Preprocessor +//NOTE: HTML input preprocessing +//(see: http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#preprocessing-the-input-stream) +var Preprocessor = module.exports = function () { + this.html = null; + + this.pos = -1; + this.lastGapPos = -1; + this.lastCharPos = -1; + + this.gapStack = []; + + this.skipNextNewLine = false; + + this.lastChunkWritten = false; + this.endOfChunkHit = false; + this.bufferWaterline = DEFAULT_BUFFER_WATERLINE; +}; + +Preprocessor.prototype.dropParsedChunk = function () { + if (this.pos > this.bufferWaterline) { + this.lastCharPos -= this.pos; + this.html = this.html.substring(this.pos); + this.pos = 0; + this.lastGapPos = -1; + this.gapStack = []; + } +}; + +Preprocessor.prototype._addGap = function () { + this.gapStack.push(this.lastGapPos); + this.lastGapPos = this.pos; +}; + +Preprocessor.prototype._processHighRangeCodePoint = function (cp) { + //NOTE: try to peek a surrogate pair + if (this.pos !== this.lastCharPos) { + var nextCp = this.html.charCodeAt(this.pos + 1); + + if (isSurrogatePair(cp, nextCp)) { + //NOTE: we have a surrogate pair. Peek pair character and recalculate code point. + this.pos++; + cp = getSurrogatePairCodePoint(cp, nextCp); + + //NOTE: add gap that should be avoided during retreat + this._addGap(); + } + } + + // NOTE: we've hit the end of chunk, stop processing at this point + else if (!this.lastChunkWritten) { + this.endOfChunkHit = true; + return $.EOF; + } + + return cp; +}; + +Preprocessor.prototype.write = function (chunk, isLastChunk) { + if (this.html) + this.html += chunk; + + else + this.html = chunk; + + this.lastCharPos = this.html.length - 1; + this.endOfChunkHit = false; + this.lastChunkWritten = isLastChunk; +}; + +Preprocessor.prototype.insertHtmlAtCurrentPos = function (chunk) { + this.html = this.html.substring(0, this.pos + 1) + + chunk + + this.html.substring(this.pos + 1, this.html.length); + + this.lastCharPos = this.html.length - 1; + this.endOfChunkHit = false; +}; + + +Preprocessor.prototype.advance = function () { + this.pos++; + + if (this.pos > this.lastCharPos) { + if (!this.lastChunkWritten) + this.endOfChunkHit = true; + + return $.EOF; + } + + var cp = this.html.charCodeAt(this.pos); + + //NOTE: any U+000A LINE FEED (LF) characters that immediately follow a U+000D CARRIAGE RETURN (CR) character + //must be ignored. + if (this.skipNextNewLine && cp === $.LINE_FEED) { + this.skipNextNewLine = false; + this._addGap(); + return this.advance(); + } + + //NOTE: all U+000D CARRIAGE RETURN (CR) characters must be converted to U+000A LINE FEED (LF) characters + if (cp === $.CARRIAGE_RETURN) { + this.skipNextNewLine = true; + return $.LINE_FEED; + } + + this.skipNextNewLine = false; + + //OPTIMIZATION: first perform check if the code point in the allowed range that covers most common + //HTML input (e.g. ASCII codes) to avoid performance-cost operations for high-range code points. + return cp >= 0xD800 ? this._processHighRangeCodePoint(cp) : cp; +}; + +Preprocessor.prototype.retreat = function () { + if (this.pos === this.lastGapPos) { + this.lastGapPos = this.gapStack.pop(); + this.pos--; + } + + this.pos--; +}; + |