summaryrefslogtreecommitdiff
path: root/www/wiki/extensions/UploadWizard/includes/UploadWizardFlickrBlacklist.php
blob: 8bcc87259764560323f53556d4efdc8890201753 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
<?php

/**
 * Checks Flickr images against a blacklist of users
 */
class UploadWizardFlickrBlacklist {
	/**
	 * Regexp to extract photo id (as match group 1) from a static image URL.
	 */
	const IMAGE_URL_REGEXP = '!static\.?flickr\.com/[^/]+/([0-9]+)_!';

	/**
	 * Regexp to extract photo id (as match group 1) from a photo page URL.
	 */
	const PHOTO_URL_REGEXP = '!flickr\.com/(?:x/t/[^/]+/)?photos/[^/]+/([0-9]+)!';

	/**
	 * An array of the blacklisted Flickr NSIDs and path_aliases.
	 * Used as an in-memory cache to speed successive lookups; null means not yet initialized.
	 * @var array|null
	 */
	protected static $blacklist;

	/**
	 * @var string
	 */
	protected $flickrApiKey;

	/**
	 * @var string
	 */
	protected $flickrApiUrl;

	/**
	 * Name of the wiki page which contains the NSID blacklist.
	 *
	 * The page should contain usernames (either the path_alias - the human-readable username
	 * in the URL - or the NSID) separated by whitespace. It is not required to contain both
	 * path_alias and NSID for the same user.
	 *
	 * Lines starting with # are ignored.
	 * @var string
	 */
	protected $flickrBlacklistPage;

	/**
	 * @var IContextSource
	 */
	protected $context;

	/**
	 * Sets options based on a config array such as UploadWizardConfig::getConfig().
	 * @param array $options an array with 'flickrApiKey', 'flickrApiUrl' and
	 *     'flickrBlacklistPage' keys
	 * @param IContextSource $context
	 */
	public function __construct( array $options, IContextSource $context ) {
		$this->flickrApiKey = $options['flickrApiKey'];
		$this->flickrApiUrl = $options['flickrApiUrl'];
		$this->flickrBlacklistPage = $options['flickrBlacklistPage'];
		$this->context = $context;
	}

	/**
	 * @param string $url
	 * @return bool
	 */
	public function isBlacklisted( $url ) {
		$blacklist = $this->getBlacklist();

		$flickrPhotoId = $this->getPhotoIdFromUrl( $url );
		if ( $flickrPhotoId ) {
			$userIds = $this->getUserIdsFromPhotoId( $flickrPhotoId );
			return (bool)array_intersect( $userIds, $blacklist );
		}
		// FIXME should we tell the user we did not recognize the URL?
		return false;
	}

	/**
	 * Returns the blacklist, which is a non-associative array of user NSIDs and path_aliases
	 * (the name name which can be seen in the pretty URL). For a given user, usually only one
	 * of the NSID and the path_alias will be present; it is the responsibility of the consumers
	 * of the blacklist to check it against both.
	 * @return array
	 */
	public function getBlacklist() {
		if ( !isset( self::$blacklist ) ) {
			self::$blacklist = [];
			if ( $this->flickrBlacklistPage ) {
				$title = Title::newFromText( $this->flickrBlacklistPage );
				$page = WikiPage::factory( $title );
				$text = ContentHandler::getContentText( $page->getContent() );
				$text = preg_replace( '/^\s*#.*$/m', '', $text );
				preg_match_all( '/\S+/', $text, $match );
				self::$blacklist = $match[0];
			}
		}
		return self::$blacklist;
	}

	/**
	 * Takes a Flickr photo page URL or a direct image URL, returns photo id (or false on failure).
	 * @param string $url
	 * @return string|bool
	 */
	protected function getPhotoIdFromUrl( $url ) {
		if ( preg_match( self::IMAGE_URL_REGEXP, $url, $matches ) ) {
			return $matches[1];
		} elseif ( preg_match( self::PHOTO_URL_REGEXP, $url, $matches ) ) {
			return $matches[1];
		} else {
			return false;
		}
	}

	/**
	 * Takes a photo ID, returns owner's NSID and path_alias
	 * (the username which appears in the URL), if available.
	 * @param string $flickrPhotoId
	 * @return array an array containing the NSID first and the path_alias second. The path_alias
	 *     is not guaranteed to exist, in which case the array will have a single item;
	 *     if there is no such photo (or some other error happened), the array will be empty.
	 */
	protected function getUserIdsFromPhotoId( $flickrPhotoId ) {
		$userIds = [];
		$params = [
			'postData' => [
				'method' => 'flickr.photos.getInfo',
				'api_key' => $this->flickrApiKey,
				'photo_id' => $flickrPhotoId,
				'format' => 'json',
				'nojsoncallback' => 1,
			],
		];
		$response = Http::post( $this->flickrApiUrl, $params );
		if ( $response !== false ) {
			$response = json_decode( $response, true );
		}
		if ( isset( $response['photo']['owner']['nsid'] ) ) {
			$userIds[] = $response['photo']['owner']['nsid'];
		}
		// what Flickr calls 'username' can change at any time and so is worthless for blacklisting
		// path_alias is the username in the pretty URL; once set, it cannot be changed.
		if ( isset( $response['photo']['owner']['path_alias'] ) ) {
			$userIds[] = $response['photo']['owner']['path_alias'];
		}
		return $userIds;
	}
}