summaryrefslogtreecommitdiff
path: root/bin/wiki/ImportarDesdeURL/node_modules/metascraper-publisher/index.js
blob: 60f63aa0d8cfca2285f836e9e877cd644654672a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
'use strict'

const { $jsonld, publisher } = require('@metascraper/helpers')

const REGEX_RSS = /^(.*?)\s[-|]\satom$/i
const REGEX_TITLE = /^.*?[-|]\s+(.*)$/

/**
 * Wrap a rule with validation and formatting logic.
 *
 * @param {Function} rule
 * @return {Function} wrapped
 */

const wrap = rule => ({ htmlDom, url }) => {
  const value = rule(htmlDom, url)
  return publisher(value)
}

const getFromTitle = (text, regex) => {
  const matches = regex.exec(text)
  if (!matches) return false
  let result = matches[1]
  while (regex.test(result)) result = regex.exec(result)[1]
  return result
}

/**
 * Rules.
 */

module.exports = () => ({
  publisher: [
    wrap($jsonld('publisher.name')),
    wrap($ => $('meta[property="og:site_name"]').attr('content')),
    wrap($ => $('meta[name*="application-name" i]').attr('content')),
    wrap($ => $('meta[property="al:android:app_name"]').attr('content')),
    wrap($ => $('meta[property="al:iphone:app_name"]').attr('content')),
    wrap($ => $('meta[property="al:ipad:app_name"]').attr('content')),
    wrap($ => $('meta[name="publisher" i]').attr('content')),
    wrap($ => $('meta[name="twitter:app:name:iphone"]').attr('content')),
    wrap($ => $('meta[name="twitter:app:name:ipad"]').attr('content')),
    wrap($ => $('meta[name="twitter:app:name:googleplay"]').attr('content')),
    wrap($ => $('#logo').text()),
    wrap($ => $('.logo').text()),
    wrap($ => $('a[class*="brand" i]').text()),
    wrap($ => $('[class*="brand" i]').text()),
    wrap($ => $('[class*="logo" i] a img[alt]').attr('alt')),
    wrap($ => $('[class*="logo" i] img[alt]').attr('alt')),
    wrap($ => getFromTitle($('title').text(), REGEX_TITLE)),
    wrap($ => getFromTitle($('link[type*="xml" i]').attr('title'), REGEX_RSS))
  ]
})