1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
|
'use strict'
const { $jsonld, publisher } = require('@metascraper/helpers')
const REGEX_RSS = /^(.*?)\s[-|]\satom$/i
const REGEX_TITLE = /^.*?[-|]\s+(.*)$/
/**
* Wrap a rule with validation and formatting logic.
*
* @param {Function} rule
* @return {Function} wrapped
*/
const wrap = rule => ({ htmlDom, url }) => {
const value = rule(htmlDom, url)
return publisher(value)
}
const getFromTitle = (text, regex) => {
const matches = regex.exec(text)
if (!matches) return false
let result = matches[1]
while (regex.test(result)) result = regex.exec(result)[1]
return result
}
/**
* Rules.
*/
module.exports = () => ({
publisher: [
wrap($jsonld('publisher.name')),
wrap($ => $('meta[property="og:site_name"]').attr('content')),
wrap($ => $('meta[name*="application-name" i]').attr('content')),
wrap($ => $('meta[property="al:android:app_name"]').attr('content')),
wrap($ => $('meta[property="al:iphone:app_name"]').attr('content')),
wrap($ => $('meta[property="al:ipad:app_name"]').attr('content')),
wrap($ => $('meta[name="publisher" i]').attr('content')),
wrap($ => $('meta[name="twitter:app:name:iphone"]').attr('content')),
wrap($ => $('meta[name="twitter:app:name:ipad"]').attr('content')),
wrap($ => $('meta[name="twitter:app:name:googleplay"]').attr('content')),
wrap($ => $('#logo').text()),
wrap($ => $('.logo').text()),
wrap($ => $('a[class*="brand" i]').text()),
wrap($ => $('[class*="brand" i]').text()),
wrap($ => $('[class*="logo" i] a img[alt]').attr('alt')),
wrap($ => $('[class*="logo" i] img[alt]').attr('alt')),
wrap($ => getFromTitle($('title').text(), REGEX_TITLE)),
wrap($ => getFromTitle($('link[type*="xml" i]').attr('title'), REGEX_RSS))
]
})
|