summaryrefslogtreecommitdiff
path: root/www/wiki/extensions/Translate/MessageChecks.php
blob: 270378474af95ccdc3582bfb3aa5359124451a73 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
<?php
/**
 * Message checking framework.
 *
 * @file
 * @defgroup MessageCheckers Message Checkers
 * @author Niklas Laxström
 * @license GPL-2.0-or-later
 */

/**
 * Message checkers try to find common mistakes so that translators can fix
 * them quickly. To implement your own checks, extend this class and add a
 * method of the following type:
 * @code
 * protected function myCheck( $messages, $code, &$warnings ) {
 *     foreach ( $messages as $message ) {
 *         $key = $message->key();
 *         $translation = $message->translation();
 *         if ( strpos( $translation, 'smelly' ) !== false ) {
 *             $warnings[$key][] = array(
 *                 array( 'badword', 'smelly', $key, $code ),
 *                 'translate-checks-badword', // Needs to be defined in i18n file
 *                 array( 'PARAMS', 'smelly' ),
 *             );
 *         }
 *     }
 * }
 * @endcode
 *
 * Warnings are of format: <pre>
 * $warnings[$key][] = array(
 *    # check idenfitication
 *    array( 'printf', $subcheck, $key, $code ),
 *    # check warning message
 *    'translate-checks-parameters-unknown',
 *    # optional special param list, formatted later with Language::commaList()
 *    array( 'PARAMS', $params ),
 *    # optional number of params, formatted later with Language::formatNum()
 *    array( 'COUNT', count( $params ) ),
 *    'Any other parameters to the message',
 * </pre>
 *
 * @ingroup MessageCheckers
 */
class MessageChecker {
	protected $checks = [];
	protected $group;
	private static $globalBlacklist;

	/**
	 * Constructs a suitable checker for given message group.
	 * @param MessageGroup $group
	 */
	public function __construct( MessageGroup $group ) {
		global $wgTranslateCheckBlacklist;

		if ( $wgTranslateCheckBlacklist === false ) {
			self::$globalBlacklist = [];
		} elseif ( self::$globalBlacklist === null ) {
			$file = $wgTranslateCheckBlacklist;
			$list = PHPVariableLoader::loadVariableFromPHPFile( $file, 'checkBlacklist' );
			$keys = [ 'group', 'check', 'subcheck', 'code', 'message' ];

			foreach ( $list as $key => $pattern ) {
				foreach ( $keys as $checkKey ) {
					if ( !isset( $pattern[$checkKey] ) ) {
						$list[$key][$checkKey] = '#';
					} elseif ( is_array( $pattern[$checkKey] ) ) {
						$list[$key][$checkKey] =
							array_map( [ $this, 'foldValue' ], $pattern[$checkKey] );
					} else {
						$list[$key][$checkKey] = $this->foldValue( $pattern[$checkKey] );
					}
				}
			}

			self::$globalBlacklist = $list;
		}

		$this->group = $group;
	}

	/**
	 * Normalises check keys.
	 * @param string $value check key
	 * @return string Normalised check key
	 */
	protected function foldValue( $value ) {
		return str_replace( ' ', '_', strtolower( $value ) );
	}

	/**
	 * Set the tests for this checker. Array of callables with descriptive keys.
	 * @param array $checks List of checks (suitable methods in this class)
	 */
	public function setChecks( array $checks ) {
		foreach ( $checks as $k => $c ) {
			if ( !is_callable( $c ) ) {
				unset( $checks[$k] );
				wfWarn( "Check function for check $k is not callable" );
			}
		}
		$this->checks = $checks;
	}

	/**
	 * Adds one tests for this checker.
	 * @see setChecks()
	 * @param callable $check
	 */
	public function addCheck( callable $check ) {
		$this->checks[] = $check;
	}

	/**
	 * Checks one message, returns array of warnings that can be passed to
	 * OutputPage::addWikiMsg or similar.
	 *
	 * @param TMessage $message
	 * @param string $code Language code
	 * @return array
	 */
	public function checkMessage( TMessage $message, $code ) {
		$warningsArray = [];
		$messages = [ $message ];

		foreach ( $this->checks as $check ) {
			call_user_func_array( $check, [ $messages, $code, &$warningsArray ] );
		}

		$warningsArray = $this->filterWarnings( $warningsArray );
		if ( !count( $warningsArray ) ) {
			return [];
		}

		$warnings = $warningsArray[$message->key()];
		$warnings = $this->fixMessageParams( $warnings );

		return $warnings;
	}

	/**
	 * Checks one message, returns true if any check matches.
	 * @param TMessage $message
	 * @param string $code Language code
	 * @return bool True if there is a problem, false otherwise.
	 */
	public function checkMessageFast( TMessage $message, $code ) {
		$warningsArray = [];
		$messages = [ $message ];

		foreach ( $this->checks as $check ) {
			call_user_func_array( $check, [ $messages, $code, &$warningsArray ] );
			if ( count( $warningsArray ) ) {
				return true;
			}
		}

		return false;
	}

	/**
	 * Filters warnings defined in check-blacklist.php.
	 * @param array $warningsArray List of warnings produces by checkMessage().
	 * @return array List of filtered warnings.
	 */
	protected function filterWarnings( array $warningsArray ) {
		$groupId = $this->group->getId();

		// There is an array of messages...
		foreach ( $warningsArray as $mkey => $warnings ) {
			// ... each which has an array of warnings.
			foreach ( $warnings as $wkey => $warning ) {
				$check = array_shift( $warning );
				// Check if the key is blacklisted...
				foreach ( self::$globalBlacklist as $pattern ) {
					if ( !$this->match( $pattern['group'], $groupId ) ) {
						continue;
					}
					if ( !$this->match( $pattern['check'], $check[0] ) ) {
						continue;
					}
					if ( !$this->match( $pattern['subcheck'], $check[1] ) ) {
						continue;
					}
					if ( !$this->match( $pattern['message'], $check[2] ) ) {
						continue;
					}
					if ( !$this->match( $pattern['code'], $check[3] ) ) {
						continue;
					}

					// If all of the aboce match, filter the check
					unset( $warningsArray[$mkey][$wkey] );
				}
			}
		}

		return $warningsArray;
	}

	/**
	 * Matches check information against blacklist pattern.
	 * @param string $pattern
	 * @param string $value The actual value in the warnings produces by the check
	 * @return bool True of the pattern matches the value.
	 */
	protected function match( $pattern, $value ) {
		if ( $pattern === '#' ) {
			return true;
		} elseif ( is_array( $pattern ) ) {
			return in_array( strtolower( $value ), $pattern, true );
		} else {
			return strtolower( $value ) === $pattern;
		}
	}

	/**
	 * Converts the special params to something nice. Currently useless, but
	 * useful if in the future blacklist can work with parameter level too.
	 * @param array $warnings List of warnings
	 * @throws MWException
	 * @return array List of warning messages with parameters.
	 */
	protected function fixMessageParams( array $warnings ) {
		$lang = RequestContext::getMain()->getLanguage();

		foreach ( $warnings as $wkey => $warning ) {
			array_shift( $warning );
			$message = [ array_shift( $warning ) ];

			foreach ( $warning as $param ) {
				if ( !is_array( $param ) ) {
					$message[] = $param;
				} else {
					list( $type, $value ) = $param;
					if ( $type === 'COUNT' ) {
						$message[] = $lang->formatNum( $value );
					} elseif ( $type === 'PARAMS' ) {
						$message[] = $lang->commaList( $value );
					} else {
						throw new MWException( "Unknown type $type" );
					}
				}
			}
			$warnings[$wkey] = $message;
		}

		return $warnings;
	}

	/**
	 * Compares two arrays return items that don't exist in the latter.
	 * @param array $defs
	 * @param array $trans
	 * @return array Items of $defs that are not in $trans.
	 */
	protected static function compareArrays( array $defs, array $trans ) {
		$missing = [];

		foreach ( $defs as $defVar ) {
			if ( !in_array( $defVar, $trans ) ) {
				$missing[] = $defVar;
			}
		}

		return $missing;
	}

	/**
	 * Checks for missing and unknown printf formatting characters in
	 * translations.
	 * @param TMessage[] $messages Iterable list of TMessage objects.
	 * @param string $code Language code
	 * @param array &$warnings Array where warnings are appended to.
	 */
	protected function printfCheck( $messages, $code, array &$warnings ) {
		$this->parameterCheck( $messages, $code, $warnings, '/%(\d+\$)?[sduf]/U' );
	}

	/**
	 * Checks for missing and unknown Ruby variables (%{var}) in
	 * translations.
	 * @param TMessage[] $messages Iterable list of TMessage objects.
	 * @param string $code Language code
	 * @param array &$warnings Array where warnings are appended to.
	 */
	protected function rubyVariableCheck( $messages, $code, array &$warnings ) {
		$this->parameterCheck( $messages, $code, $warnings, '/%{[a-zA-Z_]+}/' );
	}

	/**
	 * Checks for missing and unknown python string interpolation operators in
	 * translations.
	 * @param TMessage[] $messages Iterable list of TMessage objects.
	 * @param string $code Language code
	 * @param array &$warnings Array where warnings are appended to.
	 */
	protected function pythonInterpolationCheck( $messages, $code, array &$warnings ) {
		$pattern = '/\%\([a-zA-Z0-9]*?\)[diouxXeEfFgGcrs]/U';
		$this->parameterCheck( $messages, $code, $warnings, $pattern );
	}

	/**
	 * Checks if the translation has even number of opening and closing
	 * parentheses. {, [ and ( are checked.
	 * Note that this will not add a warning if the message definition
	 * has an unbalanced amount of braces.
	 *
	 * @param TMessage[] $messages Iterable list of TMessage objects.
	 * @param string $code Language code
	 * @param array &$warnings Array where warnings are appended to.
	 */
	protected function braceBalanceCheck( $messages, $code, array &$warnings ) {
		foreach ( $messages as $message ) {
			$key = $message->key();
			$translation = $message->translation();
			$translation = preg_replace( '/[^{}[\]()]/u', '', $translation );

			$subcheck = 'brace';
			$counts = [
				'{' => 0, '}' => 0,
				'[' => 0, ']' => 0,
				'(' => 0, ')' => 0,
			];

			$len = strlen( $translation );
			for ( $i = 0; $i < $len; $i++ ) {
				$char = $translation[$i];
				$counts[$char]++;
			}

			$definition = $message->definition();

			$balance = [];
			if ( $counts['['] !== $counts[']'] && self::checkStringCountEqual( $definition, '[', ']' ) ) {
				$balance[] = '[]: ' . ( $counts['['] - $counts[']'] );
			}

			if ( $counts['{'] !== $counts['}'] && self::checkStringCountEqual( $definition, '{', '}' ) ) {
				$balance[] = '{}: ' . ( $counts['{'] - $counts['}'] );
			}

			if ( $counts['('] !== $counts[')'] && self::checkStringCountEqual( $definition, '(', ')' ) ) {
				$balance[] = '(): ' . ( $counts['('] - $counts[')'] );
			}

			if ( count( $balance ) ) {
				$warnings[$key][] = [
					[ 'balance', $subcheck, $key, $code ],
					'translate-checks-balance',
					[ 'PARAMS', $balance ],
					[ 'COUNT', count( $balance ) ],
				];
			}
		}
	}

	/**
	 * @param string $source
	 * @param string $str1
	 * @param string $str2
	 * @return bool whether $source has an equal number of occurences of $str1 and $str2
	 */
	protected static function checkStringCountEqual( $source, $str1, $str2 ) {
		return substr_count( $source, $str1 ) === substr_count( $source, $str2 );
	}

	/**
	 * Checks for missing and unknown printf formatting characters in
	 * translations.
	 * @param TMessage[] $messages Iterable list of TMessage objects.
	 * @param string $code Language code
	 * @param array &$warnings Array where warnings are appended to.
	 * @param string $pattern Regular expression for matching variables.
	 */
	protected function parameterCheck( $messages, $code, array &$warnings, $pattern ) {
		foreach ( $messages as $message ) {
			$key = $message->key();
			$definition = $message->definition();
			$translation = $message->translation();

			preg_match_all( $pattern, $definition, $defVars );
			preg_match_all( $pattern, $translation, $transVars );

			// Check for missing variables in the translation
			$subcheck = 'missing';
			$params = self::compareArrays( $defVars[0], $transVars[0] );

			if ( count( $params ) ) {
				$warnings[$key][] = [
					[ 'variable', $subcheck, $key, $code ],
					'translate-checks-parameters',
					[ 'PARAMS', $params ],
					[ 'COUNT', count( $params ) ],
				];
			}

			// Check for unknown variables in the translatio
			$subcheck = 'unknown';
			$params = self::compareArrays( $transVars[0], $defVars[0] );

			if ( count( $params ) ) {
				$warnings[$key][] = [
					[ 'variable', $subcheck, $key, $code ],
					'translate-checks-parameters-unknown',
					[ 'PARAMS', $params ],
					[ 'COUNT', count( $params ) ],
				];
			}
		}
	}

	/**
	 * @param TMessage[] $messages Iterable list of TMessage objects.
	 * @param string $code Language code
	 * @param array &$warnings Array where warnings are appended to.
	 */
	protected function balancedTagsCheck( $messages, $code, array &$warnings ) {
		foreach ( $messages as $message ) {
			$key = $message->key();
			$translation = $message->translation();

			libxml_use_internal_errors( true );
			libxml_clear_errors();
			$doc = simplexml_load_string( Xml::tags( 'root', null, $translation ) );
			if ( $doc ) {
				continue;
			}

			$errors = libxml_get_errors();
			$params = [];
			foreach ( $errors as $error ) {
				if ( $error->code !== 76 && $error->code !== 73 ) {
					continue;
				}
				$params[] = "<br />• [{$error->code}] $error->message";
			}

			if ( !count( $params ) ) {
				continue;
			}

			$warnings[$key][] = [
				[ 'tags', 'balance', $key, $code ],
				'translate-checks-format',
				[ 'PARAMS', $params ],
				[ 'COUNT', count( $params ) ],
			];
		}

		libxml_clear_errors();
	}
}