diff options
Diffstat (limited to 'bin/wiki/ImportarDesdeURL/node_modules/metascraper-publisher/index.js')
-rw-r--r-- | bin/wiki/ImportarDesdeURL/node_modules/metascraper-publisher/index.js | 53 |
1 files changed, 53 insertions, 0 deletions
diff --git a/bin/wiki/ImportarDesdeURL/node_modules/metascraper-publisher/index.js b/bin/wiki/ImportarDesdeURL/node_modules/metascraper-publisher/index.js new file mode 100644 index 00000000..60f63aa0 --- /dev/null +++ b/bin/wiki/ImportarDesdeURL/node_modules/metascraper-publisher/index.js @@ -0,0 +1,53 @@ +'use strict' + +const { $jsonld, publisher } = require('@metascraper/helpers') + +const REGEX_RSS = /^(.*?)\s[-|]\satom$/i +const REGEX_TITLE = /^.*?[-|]\s+(.*)$/ + +/** + * Wrap a rule with validation and formatting logic. + * + * @param {Function} rule + * @return {Function} wrapped + */ + +const wrap = rule => ({ htmlDom, url }) => { + const value = rule(htmlDom, url) + return publisher(value) +} + +const getFromTitle = (text, regex) => { + const matches = regex.exec(text) + if (!matches) return false + let result = matches[1] + while (regex.test(result)) result = regex.exec(result)[1] + return result +} + +/** + * Rules. + */ + +module.exports = () => ({ + publisher: [ + wrap($jsonld('publisher.name')), + wrap($ => $('meta[property="og:site_name"]').attr('content')), + wrap($ => $('meta[name*="application-name" i]').attr('content')), + wrap($ => $('meta[property="al:android:app_name"]').attr('content')), + wrap($ => $('meta[property="al:iphone:app_name"]').attr('content')), + wrap($ => $('meta[property="al:ipad:app_name"]').attr('content')), + wrap($ => $('meta[name="publisher" i]').attr('content')), + wrap($ => $('meta[name="twitter:app:name:iphone"]').attr('content')), + wrap($ => $('meta[name="twitter:app:name:ipad"]').attr('content')), + wrap($ => $('meta[name="twitter:app:name:googleplay"]').attr('content')), + wrap($ => $('#logo').text()), + wrap($ => $('.logo').text()), + wrap($ => $('a[class*="brand" i]').text()), + wrap($ => $('[class*="brand" i]').text()), + wrap($ => $('[class*="logo" i] a img[alt]').attr('alt')), + wrap($ => $('[class*="logo" i] img[alt]').attr('alt')), + wrap($ => getFromTitle($('title').text(), REGEX_TITLE)), + wrap($ => getFromTitle($('link[type*="xml" i]').attr('title'), REGEX_RSS)) + ] +}) |