summaryrefslogtreecommitdiff
path: root/www/wiki/extensions/Translate/utils/ExternalMessageSourceStateComparator.php
blob: 02407fd86fc21bad82dc42da9061e65f19b11bb5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
<?php

/**
 * Finds external changes for file based message groups.
 *
 * @author Niklas Laxström
 * @license GPL-2.0-or-later
 * @since 2013.12
 */
class ExternalMessageSourceStateComparator {
	/** Process all languages supported by the message group */
	const ALL_LANGUAGES = 'all languages';

	protected $changes = [];

	/**
	 * Finds changes in external sources compared to wiki state.
	 *
	 * The returned array is as following:
	 * - First level is indexed by language code
	 * - Second level is indexed by change type:
	 * - - addition (new message in the file)
	 * - - deletion (message in wiki not present in the file)
	 * - - change (difference in content)
	 * - Third level is a list of changes
	 * - Fourth level is change properties
	 * - - key (the message key)
	 * - - content (the message content in external source, null for deletions)
	 *
	 * @param FileBasedMessageGroup $group
	 * @param array|string $languages
	 * @throws MWException
	 * @return array array[language code][change type] = change.
	 */
	public function processGroup( FileBasedMessageGroup $group, $languages ) {
		$this->changes = [];
		$processAll = false;

		if ( $languages === self::ALL_LANGUAGES ) {
			$processAll = true;
			$languages = $group->getTranslatableLanguages();

			// This means all languages
			if ( $languages === null ) {
				$languages = TranslateUtils::getLanguageNames( 'en' );
			}

			$languages = array_keys( $languages );
		} elseif ( !is_array( $languages ) ) {
			throw new MWException( 'Invalid input given for $languages' );
		}

		// Process the source language before others. Source language might not
		// be included in $group->getTranslatableLanguages(). The expected
		// behavior is that source language is always processed when given
		// self::ALL_LANGUAGES.
		$sourceLanguage = $group->getSourceLanguage();
		$index = array_search( $sourceLanguage, $languages );
		if ( $processAll || $index !== false ) {
			unset( $languages[$index] );
			$this->processLanguage( $group, $sourceLanguage );
		}

		foreach ( $languages as $code ) {
			$this->processLanguage( $group, $code );
		}

		return $this->changes;
	}

	protected function processLanguage( FileBasedMessageGroup $group, $code ) {
		$cache = new MessageGroupCache( $group, $code );
		$reason = 0;
		if ( !$cache->isValid( $reason ) ) {
			$this->addMessageUpdateChanges( $group, $code, $reason, $cache );

			if ( !isset( $this->changes[$code] ) ) {
				/* Update the cache immediately if file and wiki state match.
				 * Otherwise the cache will get outdated compared to file state
				 * and will give false positive conflicts later. */
				$cache->create();
			}
		}
	}

	/**
	 * This is the detective novel. We have three sources of information:
	 * - current message state in the file
	 * - current message state in the wiki
	 * - cached message state since cache was last build
	 *   (usually after export from wiki)
	 *
	 * Now we must try to guess what in earth has driven the file state and
	 * wiki state out of sync. Then we must compile list of events that would
	 * bring those to sync. Types of events are addition, deletion, (content)
	 * change and possible rename in the future. After that the list of events
	 * are stored for later processing of a translation administrator, who can
	 * decide what actions to take on those events to bring the state more or
	 * less in sync.
	 *
	 * @param FileBasedMessageGroup $group
	 * @param string $code Language code.
	 * @param int $reason
	 * @param MessageGroupCache $cache
	 * @throws MWException
	 */
	protected function addMessageUpdateChanges( FileBasedMessageGroup $group, $code,
		$reason, $cache
	) {
		/* This throws a warning if message definitions are not yet
		 * cached and will read the file for definitions. */
		Wikimedia\suppressWarnings();
		$wiki = $group->initCollection( $code );
		Wikimedia\restoreWarnings();
		$wiki->filter( 'hastranslation', false );
		$wiki->loadTranslations();
		$wikiKeys = $wiki->getMessageKeys();

		// By-pass cached message definitions
		/** @var FFS $ffs */
		$ffs = $group->getFFS();
		if ( $code === $group->getSourceLanguage() && !$ffs->exists( $code ) ) {
			$path = $group->getSourceFilePath( $code );
			throw new MWException( "Source message file for {$group->getId()} does not exist: $path" );
		}

		$file = $ffs->read( $code );

		// Does not exist
		if ( $file === false ) {
			return;
		}

		// Something went wrong
		if ( !isset( $file['MESSAGES'] ) ) {
			$id = $group->getId();
			$ffsClass = get_class( $ffs );

			error_log( "$id has an FFS ($ffsClass) - it didn't return cake for $code" );

			return;
		}

		$fileKeys = array_keys( $file['MESSAGES'] );

		$common = array_intersect( $fileKeys, $wikiKeys );

		$supportsFuzzy = $ffs->supportsFuzzy();

		foreach ( $common as $key ) {
			$sourceContent = $file['MESSAGES'][$key];
			/** @var TMessage $wikiMessage */
			$wikiMessage = $wiki[$key];
			$wikiContent = $wikiMessage->translation();

			// @todo: Fuzzy checking can also be moved to $ffs->isContentEqual();
			// If FFS doesn't support it, ignore fuzziness as difference
			$wikiContent = str_replace( TRANSLATE_FUZZY, '', $wikiContent );

			// But if it does, ensure we have exactly one fuzzy marker prefixed
			if ( $supportsFuzzy === 'yes' && $wikiMessage->hasTag( 'fuzzy' ) ) {
				$wikiContent = TRANSLATE_FUZZY . $wikiContent;
			}

			if ( $ffs->isContentEqual( $sourceContent, $wikiContent ) ) {
				// File and wiki stage agree, nothing to do
				continue;
			}

			// Check against interim cache to see whether we have changes
			// in the wiki, in the file or both.

			if ( $reason !== MessageGroupCache::NO_CACHE ) {
				$cacheContent = $cache->get( $key );

				/* We want to ignore the common situation that the string
				 * in the wiki has been changed since the last export.
				 * Hence we check that source === cache && cache !== wiki
				 * and if so we skip this string. */
				if (
					!$ffs->isContentEqual( $wikiContent, $cacheContent ) &&
					$ffs->isContentEqual( $sourceContent, $cacheContent )
				) {
					continue;
				}
			}

			$this->addChange( 'change', $code, $key, $sourceContent );
		}

		$added = array_diff( $fileKeys, $wikiKeys );
		foreach ( $added as $key ) {
			$sourceContent = $file['MESSAGES'][$key];
			if ( trim( $sourceContent ) === '' ) {
				continue;
			}
			$this->addChange( 'addition', $code, $key, $sourceContent );
		}

		/* Should the cache not exist, don't consider the messages
		 * missing from the file as deleted - they probably aren't
		 * yet exported. For example new language translations are
		 * exported the first time. */
		if ( $reason !== MessageGroupCache::NO_CACHE ) {
			$deleted = array_diff( $wikiKeys, $fileKeys );
			foreach ( $deleted as $key ) {
				if ( $cache->get( $key ) === false ) {
					/* This message has never existed in the cache, so it
					 * must be a newly made in the wiki. */
					continue;
				}
				$this->addChange( 'deletion', $code, $key, null );
			}
		}
	}

	protected function addChange( $type, $language, $key, $content ) {
		$this->changes[$language][$type][] = [
			'key' => $key,
			'content' => $content,
		];
	}
}