summaryrefslogtreecommitdiff
path: root/bin/wiki/ImportarDesdeURL/node_modules/metascraper-publisher/index.js
diff options
context:
space:
mode:
Diffstat (limited to 'bin/wiki/ImportarDesdeURL/node_modules/metascraper-publisher/index.js')
-rw-r--r--bin/wiki/ImportarDesdeURL/node_modules/metascraper-publisher/index.js53
1 files changed, 53 insertions, 0 deletions
diff --git a/bin/wiki/ImportarDesdeURL/node_modules/metascraper-publisher/index.js b/bin/wiki/ImportarDesdeURL/node_modules/metascraper-publisher/index.js
new file mode 100644
index 00000000..60f63aa0
--- /dev/null
+++ b/bin/wiki/ImportarDesdeURL/node_modules/metascraper-publisher/index.js
@@ -0,0 +1,53 @@
+'use strict'
+
+const { $jsonld, publisher } = require('@metascraper/helpers')
+
+const REGEX_RSS = /^(.*?)\s[-|]\satom$/i
+const REGEX_TITLE = /^.*?[-|]\s+(.*)$/
+
+/**
+ * Wrap a rule with validation and formatting logic.
+ *
+ * @param {Function} rule
+ * @return {Function} wrapped
+ */
+
+const wrap = rule => ({ htmlDom, url }) => {
+ const value = rule(htmlDom, url)
+ return publisher(value)
+}
+
+const getFromTitle = (text, regex) => {
+ const matches = regex.exec(text)
+ if (!matches) return false
+ let result = matches[1]
+ while (regex.test(result)) result = regex.exec(result)[1]
+ return result
+}
+
+/**
+ * Rules.
+ */
+
+module.exports = () => ({
+ publisher: [
+ wrap($jsonld('publisher.name')),
+ wrap($ => $('meta[property="og:site_name"]').attr('content')),
+ wrap($ => $('meta[name*="application-name" i]').attr('content')),
+ wrap($ => $('meta[property="al:android:app_name"]').attr('content')),
+ wrap($ => $('meta[property="al:iphone:app_name"]').attr('content')),
+ wrap($ => $('meta[property="al:ipad:app_name"]').attr('content')),
+ wrap($ => $('meta[name="publisher" i]').attr('content')),
+ wrap($ => $('meta[name="twitter:app:name:iphone"]').attr('content')),
+ wrap($ => $('meta[name="twitter:app:name:ipad"]').attr('content')),
+ wrap($ => $('meta[name="twitter:app:name:googleplay"]').attr('content')),
+ wrap($ => $('#logo').text()),
+ wrap($ => $('.logo').text()),
+ wrap($ => $('a[class*="brand" i]').text()),
+ wrap($ => $('[class*="brand" i]').text()),
+ wrap($ => $('[class*="logo" i] a img[alt]').attr('alt')),
+ wrap($ => $('[class*="logo" i] img[alt]').attr('alt')),
+ wrap($ => getFromTitle($('title').text(), REGEX_TITLE)),
+ wrap($ => getFromTitle($('link[type*="xml" i]').attr('title'), REGEX_RSS))
+ ]
+})