diff options
Diffstat (limited to 'bin/wiki/ImportarDesdeURL/node_modules/htmlparser2/lib/Parser.js')
-rw-r--r-- | bin/wiki/ImportarDesdeURL/node_modules/htmlparser2/lib/Parser.js | 382 |
1 files changed, 382 insertions, 0 deletions
diff --git a/bin/wiki/ImportarDesdeURL/node_modules/htmlparser2/lib/Parser.js b/bin/wiki/ImportarDesdeURL/node_modules/htmlparser2/lib/Parser.js new file mode 100644 index 00000000..25ebdb1f --- /dev/null +++ b/bin/wiki/ImportarDesdeURL/node_modules/htmlparser2/lib/Parser.js @@ -0,0 +1,382 @@ +var Tokenizer = require("./Tokenizer.js"); + +/* + Options: + + xmlMode: Disables the special behavior for script/style tags (false by default) + lowerCaseAttributeNames: call .toLowerCase for each attribute name (true if xmlMode is `false`) + lowerCaseTags: call .toLowerCase for each tag name (true if xmlMode is `false`) +*/ + +/* + Callbacks: + + oncdataend, + oncdatastart, + onclosetag, + oncomment, + oncommentend, + onerror, + onopentag, + onprocessinginstruction, + onreset, + ontext +*/ + +var formTags = { + input: true, + option: true, + optgroup: true, + select: true, + button: true, + datalist: true, + textarea: true +}; + +var openImpliesClose = { + tr: { tr: true, th: true, td: true }, + th: { th: true }, + td: { thead: true, th: true, td: true }, + body: { head: true, link: true, script: true }, + li: { li: true }, + p: { p: true }, + h1: { p: true }, + h2: { p: true }, + h3: { p: true }, + h4: { p: true }, + h5: { p: true }, + h6: { p: true }, + select: formTags, + input: formTags, + output: formTags, + button: formTags, + datalist: formTags, + textarea: formTags, + option: { option: true }, + optgroup: { optgroup: true } +}; + +var voidElements = { + __proto__: null, + area: true, + base: true, + basefont: true, + br: true, + col: true, + command: true, + embed: true, + frame: true, + hr: true, + img: true, + input: true, + isindex: true, + keygen: true, + link: true, + meta: true, + param: true, + source: true, + track: true, + wbr: true +}; + +var foreignContextElements = { + __proto__: null, + math: true, + svg: true +}; +var htmlIntegrationElements = { + __proto__: null, + mi: true, + mo: true, + mn: true, + ms: true, + mtext: true, + "annotation-xml": true, + foreignObject: true, + desc: true, + title: true +}; + +var re_nameEnd = /\s|\//; + +function Parser(cbs, options) { + this._options = options || {}; + this._cbs = cbs || {}; + + this._tagname = ""; + this._attribname = ""; + this._attribvalue = ""; + this._attribs = null; + this._stack = []; + this._foreignContext = []; + + this.startIndex = 0; + this.endIndex = null; + + this._lowerCaseTagNames = + "lowerCaseTags" in this._options + ? !!this._options.lowerCaseTags + : !this._options.xmlMode; + this._lowerCaseAttributeNames = + "lowerCaseAttributeNames" in this._options + ? !!this._options.lowerCaseAttributeNames + : !this._options.xmlMode; + + if (this._options.Tokenizer) { + Tokenizer = this._options.Tokenizer; + } + this._tokenizer = new Tokenizer(this._options, this); + + if (this._cbs.onparserinit) this._cbs.onparserinit(this); +} + +require("inherits")(Parser, require("events").EventEmitter); + +Parser.prototype._updatePosition = function(initialOffset) { + if (this.endIndex === null) { + if (this._tokenizer._sectionStart <= initialOffset) { + this.startIndex = 0; + } else { + this.startIndex = this._tokenizer._sectionStart - initialOffset; + } + } else this.startIndex = this.endIndex + 1; + this.endIndex = this._tokenizer.getAbsoluteIndex(); +}; + +//Tokenizer event handlers +Parser.prototype.ontext = function(data) { + this._updatePosition(1); + this.endIndex--; + + if (this._cbs.ontext) this._cbs.ontext(data); +}; + +Parser.prototype.onopentagname = function(name) { + if (this._lowerCaseTagNames) { + name = name.toLowerCase(); + } + + this._tagname = name; + + if (!this._options.xmlMode && name in openImpliesClose) { + for ( + var el; + (el = this._stack[this._stack.length - 1]) in + openImpliesClose[name]; + this.onclosetag(el) + ); + } + + if (this._options.xmlMode || !(name in voidElements)) { + this._stack.push(name); + if (name in foreignContextElements) this._foreignContext.push(true); + else if (name in htmlIntegrationElements) + this._foreignContext.push(false); + } + + if (this._cbs.onopentagname) this._cbs.onopentagname(name); + if (this._cbs.onopentag) this._attribs = {}; +}; + +Parser.prototype.onopentagend = function() { + this._updatePosition(1); + + if (this._attribs) { + if (this._cbs.onopentag) + this._cbs.onopentag(this._tagname, this._attribs); + this._attribs = null; + } + + if ( + !this._options.xmlMode && + this._cbs.onclosetag && + this._tagname in voidElements + ) { + this._cbs.onclosetag(this._tagname); + } + + this._tagname = ""; +}; + +Parser.prototype.onclosetag = function(name) { + this._updatePosition(1); + + if (this._lowerCaseTagNames) { + name = name.toLowerCase(); + } + + if (name in foreignContextElements || name in htmlIntegrationElements) { + this._foreignContext.pop(); + } + + if ( + this._stack.length && + (!(name in voidElements) || this._options.xmlMode) + ) { + var pos = this._stack.lastIndexOf(name); + if (pos !== -1) { + if (this._cbs.onclosetag) { + pos = this._stack.length - pos; + while (pos--) this._cbs.onclosetag(this._stack.pop()); + } else this._stack.length = pos; + } else if (name === "p" && !this._options.xmlMode) { + this.onopentagname(name); + this._closeCurrentTag(); + } + } else if (!this._options.xmlMode && (name === "br" || name === "p")) { + this.onopentagname(name); + this._closeCurrentTag(); + } +}; + +Parser.prototype.onselfclosingtag = function() { + if ( + this._options.xmlMode || + this._options.recognizeSelfClosing || + this._foreignContext[this._foreignContext.length - 1] + ) { + this._closeCurrentTag(); + } else { + this.onopentagend(); + } +}; + +Parser.prototype._closeCurrentTag = function() { + var name = this._tagname; + + this.onopentagend(); + + //self-closing tags will be on the top of the stack + //(cheaper check than in onclosetag) + if (this._stack[this._stack.length - 1] === name) { + if (this._cbs.onclosetag) { + this._cbs.onclosetag(name); + } + this._stack.pop(); + + } +}; + +Parser.prototype.onattribname = function(name) { + if (this._lowerCaseAttributeNames) { + name = name.toLowerCase(); + } + this._attribname = name; +}; + +Parser.prototype.onattribdata = function(value) { + this._attribvalue += value; +}; + +Parser.prototype.onattribend = function() { + if (this._cbs.onattribute) + this._cbs.onattribute(this._attribname, this._attribvalue); + if ( + this._attribs && + !Object.prototype.hasOwnProperty.call(this._attribs, this._attribname) + ) { + this._attribs[this._attribname] = this._attribvalue; + } + this._attribname = ""; + this._attribvalue = ""; +}; + +Parser.prototype._getInstructionName = function(value) { + var idx = value.search(re_nameEnd), + name = idx < 0 ? value : value.substr(0, idx); + + if (this._lowerCaseTagNames) { + name = name.toLowerCase(); + } + + return name; +}; + +Parser.prototype.ondeclaration = function(value) { + if (this._cbs.onprocessinginstruction) { + var name = this._getInstructionName(value); + this._cbs.onprocessinginstruction("!" + name, "!" + value); + } +}; + +Parser.prototype.onprocessinginstruction = function(value) { + if (this._cbs.onprocessinginstruction) { + var name = this._getInstructionName(value); + this._cbs.onprocessinginstruction("?" + name, "?" + value); + } +}; + +Parser.prototype.oncomment = function(value) { + this._updatePosition(4); + + if (this._cbs.oncomment) this._cbs.oncomment(value); + if (this._cbs.oncommentend) this._cbs.oncommentend(); +}; + +Parser.prototype.oncdata = function(value) { + this._updatePosition(1); + + if (this._options.xmlMode || this._options.recognizeCDATA) { + if (this._cbs.oncdatastart) this._cbs.oncdatastart(); + if (this._cbs.ontext) this._cbs.ontext(value); + if (this._cbs.oncdataend) this._cbs.oncdataend(); + } else { + this.oncomment("[CDATA[" + value + "]]"); + } +}; + +Parser.prototype.onerror = function(err) { + if (this._cbs.onerror) this._cbs.onerror(err); +}; + +Parser.prototype.onend = function() { + if (this._cbs.onclosetag) { + for ( + var i = this._stack.length; + i > 0; + this._cbs.onclosetag(this._stack[--i]) + ); + } + if (this._cbs.onend) this._cbs.onend(); +}; + +//Resets the parser to a blank state, ready to parse a new HTML document +Parser.prototype.reset = function() { + if (this._cbs.onreset) this._cbs.onreset(); + this._tokenizer.reset(); + + this._tagname = ""; + this._attribname = ""; + this._attribs = null; + this._stack = []; + + if (this._cbs.onparserinit) this._cbs.onparserinit(this); +}; + +//Parses a complete HTML document and pushes it to the handler +Parser.prototype.parseComplete = function(data) { + this.reset(); + this.end(data); +}; + +Parser.prototype.write = function(chunk) { + this._tokenizer.write(chunk); +}; + +Parser.prototype.end = function(chunk) { + this._tokenizer.end(chunk); +}; + +Parser.prototype.pause = function() { + this._tokenizer.pause(); +}; + +Parser.prototype.resume = function() { + this._tokenizer.resume(); +}; + +//alias for backwards compat +Parser.prototype.parseChunk = Parser.prototype.write; +Parser.prototype.done = Parser.prototype.end; + +module.exports = Parser; |