1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
|
'use strict'
const { $jsonld, $filter, author } = require('@metascraper/helpers')
const REGEX_STRICT = /^\S+\s+\S+/
/**
* Wrap a rule with validation and formatting logic.
*
* @param {Function} rule
* @return {Function} wrapped
*/
const wrap = rule => ({ htmlDom, url }) => {
const value = rule(htmlDom, url)
return author(value)
}
/**
* Enforce stricter matching for a `rule`.
*
* @param {Function} rule
* @return {Function} stricter
*/
const strict = rule => $ => {
const value = rule($)
return REGEX_STRICT.test(value) && value
}
/**
* Rules.
*/
module.exports = () => ({
author: [
wrap($jsonld('author.name')),
wrap($ => $('meta[name="author"]').attr('content')),
wrap($ => $('meta[property="author"]').attr('content')),
wrap($ => $('meta[property="article:author"]').attr('content')),
wrap($ => $filter($, $('[itemprop*="author" i] [itemprop="name"]'))),
wrap($ => $filter($, $('[itemprop*="author" i]'))),
wrap($ => $filter($, $('[rel="author"]'))),
strict(wrap($ => $filter($, $('a[class*="author" i]')))),
strict(wrap($ => $filter($, $('[class*="author" i] a')))),
strict(wrap($ => $filter($, $('a[href*="/author/" i]')))),
wrap($ => $filter($, $('a[class*="screenname" i]'))),
strict(wrap($ => $filter($, $('[class*="author" i]')))),
strict(wrap($ => $filter($, $('[class*="byline" i]'))))
]
})
|