summaryrefslogtreecommitdiff
path: root/www/wiki/extensions/LocalisationUpdate/includes
diff options
context:
space:
mode:
authorYaco <franco@reevo.org>2020-06-04 11:01:00 -0300
committerYaco <franco@reevo.org>2020-06-04 11:01:00 -0300
commitfc7369835258467bf97eb64f184b93691f9a9fd5 (patch)
treedaabd60089d2dd76d9f5fb416b005fbe159c799d /www/wiki/extensions/LocalisationUpdate/includes
first commit
Diffstat (limited to 'www/wiki/extensions/LocalisationUpdate/includes')
-rw-r--r--www/wiki/extensions/LocalisationUpdate/includes/LocalisationUpdate.php73
-rw-r--r--www/wiki/extensions/LocalisationUpdate/includes/QuickArrayReader.php214
-rw-r--r--www/wiki/extensions/LocalisationUpdate/includes/Updater.php204
-rw-r--r--www/wiki/extensions/LocalisationUpdate/includes/fetcher/Fetcher.php30
-rw-r--r--www/wiki/extensions/LocalisationUpdate/includes/fetcher/FetcherFactory.php25
-rw-r--r--www/wiki/extensions/LocalisationUpdate/includes/fetcher/FileSystemFetcher.php47
-rw-r--r--www/wiki/extensions/LocalisationUpdate/includes/fetcher/GitHubFetcher.php47
-rw-r--r--www/wiki/extensions/LocalisationUpdate/includes/fetcher/HttpFetcher.php49
-rw-r--r--www/wiki/extensions/LocalisationUpdate/includes/finder/Finder.php124
-rw-r--r--www/wiki/extensions/LocalisationUpdate/includes/reader/JSONReader.php37
-rw-r--r--www/wiki/extensions/LocalisationUpdate/includes/reader/PHPReader.php61
-rw-r--r--www/wiki/extensions/LocalisationUpdate/includes/reader/Reader.php21
-rw-r--r--www/wiki/extensions/LocalisationUpdate/includes/reader/ReaderFactory.php38
13 files changed, 970 insertions, 0 deletions
diff --git a/www/wiki/extensions/LocalisationUpdate/includes/LocalisationUpdate.php b/www/wiki/extensions/LocalisationUpdate/includes/LocalisationUpdate.php
new file mode 100644
index 00000000..a0b5f044
--- /dev/null
+++ b/www/wiki/extensions/LocalisationUpdate/includes/LocalisationUpdate.php
@@ -0,0 +1,73 @@
+<?php
+
+/**
+ * Class for localization update hooks and static methods.
+ */
+class LocalisationUpdate {
+ /**
+ * Hook: LocalisationCacheRecacheFallback
+ * @param LocalisationCache $lc
+ * @param string $code
+ * @param array &$cache
+ * @return true
+ */
+ public static function onRecacheFallback( LocalisationCache $lc, $code, array &$cache ) {
+ $dir = self::getDirectory();
+ if ( !$dir ) {
+ return true;
+ }
+
+ $fileName = "$dir/" . self::getFilename( $code );
+ if ( is_readable( $fileName ) ) {
+ $data = FormatJson::decode( file_get_contents( $fileName ), true );
+ $cache['messages'] = array_merge( $cache['messages'], $data );
+ }
+
+ return true;
+ }
+
+ /**
+ * Hook: LocalisationCacheRecache
+ * @param LocalisationCache $lc
+ * @param string $code
+ * @param array &$cache
+ * @return true
+ */
+ public static function onRecache( LocalisationCache $lc, $code, array &$cache ) {
+ $dir = self::getDirectory();
+ if ( !$dir ) {
+ return true;
+ }
+
+ $codeSequence = array_merge( [ $code ], $cache['fallbackSequence'] );
+ foreach ( $codeSequence as $csCode ) {
+ $fileName = "$dir/" . self::getFilename( $csCode );
+ $cache['deps'][] = new FileDependency( $fileName );
+ }
+
+ return true;
+ }
+
+ /**
+ * Returns a directory where updated translations are stored.
+ *
+ * @return string|false False if not configured.
+ * @since 1.1
+ */
+ public static function getDirectory() {
+ global $wgLocalisationUpdateDirectory, $wgCacheDirectory;
+
+ return $wgLocalisationUpdateDirectory ?: $wgCacheDirectory;
+ }
+
+ /**
+ * Returns a filename where updated translations are stored.
+ *
+ * @param string $language Language tag
+ * @return string
+ * @since 1.1
+ */
+ public static function getFilename( $language ) {
+ return "l10nupdate-$language.json";
+ }
+}
diff --git a/www/wiki/extensions/LocalisationUpdate/includes/QuickArrayReader.php b/www/wiki/extensions/LocalisationUpdate/includes/QuickArrayReader.php
new file mode 100644
index 00000000..0314ee68
--- /dev/null
+++ b/www/wiki/extensions/LocalisationUpdate/includes/QuickArrayReader.php
@@ -0,0 +1,214 @@
+<?php
+
+/**
+ * Quickie parser class that can happily read the subset of PHP we need
+ * for our localization arrays safely.
+ *
+ * Still an order of magnitude slower than eval().
+ */
+class QuickArrayReader {
+ private $vars = [];
+
+ /**
+ * @param string $string
+ */
+ function __construct( $string ) {
+ $scalarTypes = [
+ T_LNUMBER => true,
+ T_DNUMBER => true,
+ T_STRING => true,
+ T_CONSTANT_ENCAPSED_STRING => true,
+ ];
+ $skipTypes = [
+ T_WHITESPACE => true,
+ T_COMMENT => true,
+ T_DOC_COMMENT => true,
+ ];
+ $tokens = token_get_all( $string );
+ $count = count( $tokens );
+ for ( $i = 0; $i < $count; ) {
+ while ( isset( $skipTypes[$tokens[$i][0]] ) ) {
+ $i++;
+ }
+ switch ( $tokens[$i][0] ) {
+ case T_OPEN_TAG:
+ $i++;
+ break;
+ case T_VARIABLE:
+ // '$messages' -> 'messages'
+ $varname = trim( substr( $tokens[$i][1], 1 ) );
+ $varindex = null;
+
+ while ( isset( $skipTypes[$tokens[++$i][0]] ) ) {
+ }
+
+ if ( $tokens[$i] === '[' ) {
+ while ( isset( $skipTypes[$tokens[++$i][0]] ) ) {
+ }
+
+ if ( isset( $scalarTypes[$tokens[$i][0]] ) ) {
+ $varindex = $this->parseScalar( $tokens[$i] );
+ } else {
+ throw $this->except( $tokens[$i], 'scalar index' );
+ }
+ while ( isset( $skipTypes[$tokens[++$i][0]] ) ) {
+ }
+
+ if ( $tokens[$i] !== ']' ) {
+ throw $this->except( $tokens[$i], ']' );
+ }
+ while ( isset( $skipTypes[$tokens[++$i][0]] ) ) {
+ }
+ }
+
+ if ( $tokens[$i] !== '=' ) {
+ throw $this->except( $tokens[$i], '=' );
+ }
+ while ( isset( $skipTypes[$tokens[++$i][0]] ) ) {
+ }
+
+ if ( isset( $scalarTypes[$tokens[$i][0]] ) ) {
+ $buildval = $this->parseScalar( $tokens[$i] );
+ } elseif ( $tokens[$i][0] === T_ARRAY ) {
+ while ( isset( $skipTypes[$tokens[++$i][0]] ) ) {
+ }
+ if ( $tokens[$i] !== '(' ) {
+ throw $this->except( $tokens[$i], '(' );
+ }
+ $buildval = [];
+ do {
+ while ( isset( $skipTypes[$tokens[++$i][0]] ) ) {
+ }
+
+ if ( $tokens[$i] === ')' ) {
+ break;
+ }
+ if ( isset( $scalarTypes[$tokens[$i][0]] ) ) {
+ $key = $this->parseScalar( $tokens[$i] );
+ }
+ while ( isset( $skipTypes[$tokens[++$i][0]] ) ) {
+ }
+
+ if ( $tokens[$i][0] !== T_DOUBLE_ARROW ) {
+ throw $this->except( $tokens[$i], '=>' );
+ }
+ while ( isset( $skipTypes[$tokens[++$i][0]] ) ) {
+ }
+
+ if ( isset( $scalarTypes[$tokens[$i][0]] ) ) {
+ $val = $this->parseScalar( $tokens[$i] );
+ }
+ wfSuppressWarnings();
+ $buildval[$key] = $val;
+ wfRestoreWarnings();
+ while ( isset( $skipTypes[$tokens[++$i][0]] ) ) {
+ }
+
+ if ( $tokens[$i] === ',' ) {
+ continue;
+ } elseif ( $tokens[$i] === ')' ) {
+ break;
+ } else {
+ throw $this->except( $tokens[$i], ', or )' );
+ }
+ } while ( true );
+ } else {
+ throw $this->except( $tokens[$i], 'scalar or array' );
+ }
+ if ( is_null( $varindex ) ) {
+ $this->vars[$varname] = $buildval;
+ } else {
+ wfSuppressWarnings();
+ $this->vars[$varname][$varindex] = $buildval;
+ wfRestoreWarnings();
+ }
+ while ( isset( $skipTypes[$tokens[++$i][0]] ) ) {
+ }
+ if ( $tokens[$i] !== ';' ) {
+ throw $this->except( $tokens[$i], ';' );
+ }
+ $i++;
+ break;
+ default:
+ throw $this->except( $tokens[$i], 'open tag, whitespace, or variable.' );
+ }
+ }
+ }
+
+ /**
+ * @param string $got
+ * @param string $expected
+ * @return Exception
+ */
+ private function except( $got, $expected ) {
+ if ( is_array( $got ) ) {
+ $got = token_name( $got[0] ) . " ('" . $got[1] . "')";
+ } else {
+ $got = "'" . $got . "'";
+ }
+
+ return new Exception( "Expected $expected, got $got" );
+ }
+
+ /**
+ * Parse a scalar value in PHP
+ *
+ * @param string $token
+ *
+ * @return mixed Parsed value
+ */
+ function parseScalar( $token ) {
+ if ( is_array( $token ) ) {
+ $str = $token[1];
+ } else {
+ $str = $token;
+ }
+ if ( $str !== '' && $str[0] == '\'' ) {
+ // Single-quoted string
+ // @fixme trim() call is due to mystery bug where whitespace gets
+ // appended to the token; without it we ended up reading in the
+ // extra quote on the end!
+ return strtr( substr( trim( $str ), 1, -1 ),
+ [ '\\\'' => '\'', '\\\\' => '\\' ] );
+ }
+
+ wfSuppressWarnings();
+ if ( $str !== '' && $str[0] == '"' ) {
+ // Double-quoted string
+ // @fixme trim() call is due to mystery bug where whitespace gets
+ // appended to the token; without it we ended up reading in the
+ // extra quote on the end!
+ wfRestoreWarnings();
+ return stripcslashes( substr( trim( $str ), 1, -1 ) );
+ }
+ wfRestoreWarnings();
+
+ if ( substr( $str, 0, 4 ) === 'true' ) {
+ return true;
+ }
+
+ if ( substr( $str, 0, 5 ) === 'false' ) {
+ return false;
+ }
+
+ if ( substr( $str, 0, 4 ) === 'null' ) {
+ return null;
+ }
+
+ // Must be some kind of numeric value, so let PHP's weak typing
+ // be useful for a change
+ return $str;
+ }
+
+ /**
+ * @param string $varname
+ * @return null|string|array
+ */
+ function getVar( $varname ) {
+ if ( isset( $this->vars[$varname] ) ) {
+ return $this->vars[$varname];
+ } else {
+ return null;
+ }
+ }
+}
diff --git a/www/wiki/extensions/LocalisationUpdate/includes/Updater.php b/www/wiki/extensions/LocalisationUpdate/includes/Updater.php
new file mode 100644
index 00000000..863dc04a
--- /dev/null
+++ b/www/wiki/extensions/LocalisationUpdate/includes/Updater.php
@@ -0,0 +1,204 @@
+<?php
+/**
+ * @file
+ * @author Niklas Laxström
+ * @license GPL-2.0-or-later
+ */
+
+namespace LocalisationUpdate;
+
+/**
+ * Executes the localisation update.
+ */
+class Updater {
+ /**
+ * Whether the path is a pattern and thus we need to use appropriate
+ * code for fetching directories.
+ *
+ * @param string $path Url
+ * @return bool
+ */
+ public function isDirectory( $path ) {
+ $filename = basename( $path );
+ return strpos( $filename, '*' ) !== false;
+ }
+
+ /**
+ * Expands repository relative path to full url with the given repository
+ * patterns. Extra variables in $info are used as variables and will be
+ * replaced the pattern.
+ *
+ * @param array $info Component information.
+ * @param array $repos Repository information.
+ * @return string
+ */
+ public function expandRemotePath( $info, $repos ) {
+ $pattern = $repos[$info['repo']];
+ unset( $info['repo'], $info['orig'] );
+
+ // This assumes all other keys are used as variables
+ // in the pattern. For example name -> %NAME%.
+ $keys = [];
+ foreach ( array_keys( $info ) as $key ) {
+ $keys[] = '%' . strtoupper( $key ) . '%';
+ }
+
+ $values = array_values( $info );
+ return str_replace( $keys, $values, $pattern );
+ }
+
+ /**
+ * Parses translations from given list of files.
+ *
+ * @param ReaderFactory $readerFactory Factory to construct parsers.
+ * @param array $files List of files with their contents as array values.
+ * @return array List of translations indexed by language code.
+ */
+ public function readMessages( ReaderFactory $readerFactory, array $files ) {
+ $messages = [];
+
+ foreach ( $files as $filename => $contents ) {
+ $reader = $readerFactory->getReader( $filename );
+ try {
+ $parsed = $reader->parse( $contents );
+ } catch ( \Exception $e ) {
+ trigger_error( __METHOD__ . ": Unable to parse messages from $filename", E_USER_WARNING );
+ continue;
+ }
+
+ foreach ( $parsed as $code => $langMessages ) {
+ if ( !isset( $messages[$code] ) ) {
+ $messages[$code] = [];
+ }
+ $messages[$code] = array_merge( $messages[$code], $langMessages );
+ }
+
+ $c = array_sum( array_map( 'count', $parsed ) );
+ // Useful for debugging, maybe create interface to pass this to the script?
+ # echo "$filename with " . get_class( $reader ) . " and $c\n";
+ }
+
+ return $messages;
+ }
+
+ /**
+ * Find new and changed translations in $remote and returns them.
+ *
+ * @param array $origin
+ * @param array $remote
+ * @param array $blacklist Array of message keys to ignore, keys as as array keys.
+ * @return array
+ */
+ public function findChangedTranslations( $origin, $remote, $blacklist = [] ) {
+ $changed = [];
+ foreach ( $remote as $key => $value ) {
+ if ( isset( $blacklist[$key] ) ) {
+ continue;
+ }
+
+ if ( !isset( $origin[$key] ) || $value !== $origin[$key] ) {
+ $changed[$key] = $value;
+ }
+ }
+ return $changed;
+ }
+
+ /**
+ * Fetches files from given Url pattern.
+ *
+ * @param FetcherFactory $factory Factory to construct fetchers.
+ * @param string $path Url to the file or pattern of files.
+ * @return array List of Urls with file contents as path.
+ */
+ public function fetchFiles( FetcherFactory $factory, $path ) {
+ $fetcher = $factory->getFetcher( $path );
+
+ if ( $this->isDirectory( $path ) ) {
+ $files = $fetcher->fetchDirectory( $path );
+ } else {
+ $files = [ $path => $fetcher->fetchFile( $path ) ];
+ }
+
+ // Remove files which were not found
+ return array_filter( $files );
+ }
+
+ public function execute(
+ Finder $finder,
+ ReaderFactory $readerFactory,
+ FetcherFactory $fetcherFactory,
+ array $repos,
+ $logger
+ ) {
+ $components = $finder->getComponents();
+
+ $updatedMessages = [];
+
+ foreach ( $components as $key => $info ) {
+ $logger->logInfo( "Updating component $key" );
+
+ $originFiles = $this->fetchFiles( $fetcherFactory, $info['orig'] );
+ $remotePath = $this->expandRemotePath( $info, $repos );
+ try {
+ $remoteFiles = $this->fetchFiles( $fetcherFactory, $remotePath );
+ } catch ( \Exception $e ) {
+ $logger->logError( __METHOD__ . ": Unable to fetch messages from $remotePath" );
+ continue;
+ }
+
+ if ( $remoteFiles === [] ) {
+ // Small optimization: if nothing to compare with, skip
+ continue;
+ }
+
+ $originMessages = $this->readMessages( $readerFactory, $originFiles );
+ $remoteMessages = $this->readMessages( $readerFactory, $remoteFiles );
+
+ if ( !isset( $remoteMessages['en'] ) ) {
+ // Could not find remote messages
+ continue;
+ }
+
+ // If remote translation in English is not present or differs, we do not want
+ // translations for other languages for those messages, as they are either not
+ // used in this version of code or can be incompatible.
+ $forbiddenKeys = $this->findChangedTranslations(
+ $originMessages['en'],
+ $remoteMessages['en']
+ );
+
+ // We never accept updates for English strings
+ unset( $originMessages['en'], $remoteMessages['en'] );
+
+ // message: string in all languages; translation: string in one language.
+ foreach ( $remoteMessages as $language => $remoteTranslations ) {
+ // Check for completely new languages
+ $originTranslations = [];
+ if ( isset( $originMessages[$language] ) ) {
+ $originTranslations = $originMessages[$language];
+ }
+
+ $updatedTranslations = $this->findChangedTranslations(
+ $originTranslations,
+ $remoteTranslations,
+ $forbiddenKeys
+ );
+
+ // Avoid empty arrays
+ if ( $updatedTranslations === [] ) {
+ continue;
+ }
+
+ if ( !isset( $updatedMessages[$language] ) ) {
+ $updatedMessages[$language] = [];
+ }
+
+ // In case of conflicts, which should not exist, this prefers the
+ // first translation seen.
+ $updatedMessages[$language] += $updatedTranslations;
+ }
+ }
+
+ return $updatedMessages;
+ }
+}
diff --git a/www/wiki/extensions/LocalisationUpdate/includes/fetcher/Fetcher.php b/www/wiki/extensions/LocalisationUpdate/includes/fetcher/Fetcher.php
new file mode 100644
index 00000000..62febac8
--- /dev/null
+++ b/www/wiki/extensions/LocalisationUpdate/includes/fetcher/Fetcher.php
@@ -0,0 +1,30 @@
+<?php
+/**
+ * @file
+ * @author Niklas Laxström
+ * @license GPL-2.0-or-later
+ */
+
+namespace LocalisationUpdate;
+
+/**
+ * Interface for classes which fetch files over different protocols and ways.
+ */
+interface Fetcher {
+ /**
+ * Fetches a single resource.
+ *
+ * @param string $url
+ * @return bool|string False on failure.
+ */
+ public function fetchFile( $url );
+
+ /**
+ * Fetch a list of resources. This has the benefit of being able to pick up
+ * new languages as they appear if languages are stored in separate files.
+ *
+ * @param string $pattern
+ * @return array
+ */
+ public function fetchDirectory( $pattern );
+}
diff --git a/www/wiki/extensions/LocalisationUpdate/includes/fetcher/FetcherFactory.php b/www/wiki/extensions/LocalisationUpdate/includes/fetcher/FetcherFactory.php
new file mode 100644
index 00000000..9273c935
--- /dev/null
+++ b/www/wiki/extensions/LocalisationUpdate/includes/fetcher/FetcherFactory.php
@@ -0,0 +1,25 @@
+<?php
+/**
+ * @file
+ * @author Niklas Laxström
+ * @license GPL-2.0-or-later
+ */
+
+namespace LocalisationUpdate;
+
+/**
+ * Constructs fetchers based on the repository urls.
+ */
+class FetcherFactory {
+ public function getFetcher( $path ) {
+ if ( strpos( $path, 'https://raw.github.com/' ) === 0 ) {
+ return new GitHubFetcher();
+ } elseif ( strpos( $path, 'http://' ) === 0 ) {
+ return new HttpFetcher();
+ } elseif ( strpos( $path, 'https://' ) === 0 ) {
+ return new HttpFetcher();
+ } else {
+ return new FileSystemFetcher();
+ }
+ }
+}
diff --git a/www/wiki/extensions/LocalisationUpdate/includes/fetcher/FileSystemFetcher.php b/www/wiki/extensions/LocalisationUpdate/includes/fetcher/FileSystemFetcher.php
new file mode 100644
index 00000000..43daa65d
--- /dev/null
+++ b/www/wiki/extensions/LocalisationUpdate/includes/fetcher/FileSystemFetcher.php
@@ -0,0 +1,47 @@
+<?php
+/**
+ * @file
+ * @author Niklas Laxström
+ * @license GPL-2.0-or-later
+ */
+
+namespace LocalisationUpdate;
+
+/**
+ * Accesses file system directly.
+ */
+class FileSystemFetcher implements Fetcher {
+ /**
+ * @param string $url
+ *
+ * @return bool|string
+ */
+ public function fetchFile( $url ) {
+ // Remove the protocol prefix
+ $url = preg_replace( '~^file://~', '', $url );
+
+ if ( !is_readable( $url ) ) {
+ return false;
+ }
+
+ return file_get_contents( $url );
+ }
+
+ /**
+ * @param string $pattern
+ *
+ * @return array
+ */
+ public function fetchDirectory( $pattern ) {
+ // Remove the protocol prefix
+ $pattern = preg_replace( '~^file://~', '', $pattern );
+
+ $data = [];
+ foreach ( glob( $pattern ) as $file ) {
+ if ( is_readable( $file ) ) {
+ $data["file://$file"] = file_get_contents( $file );
+ }
+ }
+ return $data;
+ }
+}
diff --git a/www/wiki/extensions/LocalisationUpdate/includes/fetcher/GitHubFetcher.php b/www/wiki/extensions/LocalisationUpdate/includes/fetcher/GitHubFetcher.php
new file mode 100644
index 00000000..f8bea9c1
--- /dev/null
+++ b/www/wiki/extensions/LocalisationUpdate/includes/fetcher/GitHubFetcher.php
@@ -0,0 +1,47 @@
+<?php
+/**
+ * @file
+ * @author Niklas Laxström
+ * @license GPL-2.0-or-later
+ */
+
+namespace LocalisationUpdate;
+
+/**
+ * This class uses GitHub api to obtain a list of files present in a directory
+ * to avoid fetching files that don't exist.
+ *
+ * @todo Could use file hashes to 1) avoid fetching files with same hash as
+ * the source. 2) avoid fetching files which haven't changed since last check
+ * if we store them.
+ */
+class GitHubFetcher extends HttpFetcher {
+ /**
+ * @param string $pattern
+ *
+ * @return array
+ * @throws \Exception
+ */
+ public function fetchDirectory( $pattern ) {
+ $domain = preg_quote( 'https://raw.github.com/', '~' );
+ $p = "~^$domain(?P<org>[^/]+)/(?P<repo>[^/]+)/(?P<branch>[^/]+)/(?P<path>.+)/.+$~";
+ preg_match( $p, $pattern, $m );
+
+ $apiURL = "https://api.github.com/repos/{$m['org']}/{$m['repo']}/contents/{$m['path']}";
+ $json = \Http::get( $apiURL );
+ if ( !$json ) {
+ throw new \Exception( "Unable to get directory listing for {$m['org']}/{$m['repo']}" );
+ }
+
+ $files = [];
+ $json = \FormatJson::decode( $json, true );
+ foreach ( $json as $fileinfo ) {
+ $fileurl = dirname( $pattern ) . '/' . $fileinfo['name'];
+ $file = $this->fetchFile( $fileurl );
+ if ( $file ) {
+ $files[$fileurl] = $file;
+ }
+ }
+ return $files;
+ }
+}
diff --git a/www/wiki/extensions/LocalisationUpdate/includes/fetcher/HttpFetcher.php b/www/wiki/extensions/LocalisationUpdate/includes/fetcher/HttpFetcher.php
new file mode 100644
index 00000000..72988e98
--- /dev/null
+++ b/www/wiki/extensions/LocalisationUpdate/includes/fetcher/HttpFetcher.php
@@ -0,0 +1,49 @@
+<?php
+/**
+ * @file
+ * @author Niklas Laxström
+ * @license GPL-2.0-or-later
+ */
+
+namespace LocalisationUpdate;
+
+/**
+ * Fetches files over HTTP(s).
+ */
+class HttpFetcher implements Fetcher {
+ /**
+ * @param string $url
+ *
+ * @return bool|string
+ */
+ public function fetchFile( $url ) {
+ return \Http::get( $url );
+ }
+
+ /**
+ * This is horribly inefficient. Subclasses have more efficient
+ * implementation of this.
+ * @param string $pattern
+ * @return array
+ */
+ public function fetchDirectory( $pattern ) {
+ $files = [];
+
+ $languages = \Language::fetchLanguageNames( null, 'mwfile' );
+
+ foreach ( array_keys( $languages ) as $code ) {
+ // Hack for core
+ if ( strpos( $pattern, 'Messages*.php' ) !== false ) {
+ $code = ucfirst( strtr( $code, '-', '_' ) );
+ }
+
+ $url = str_replace( '*', $code, $pattern );
+ $file = $this->fetchFile( $url );
+ if ( $file ) {
+ $files[$url] = $file;
+ }
+ }
+
+ return $files;
+ }
+}
diff --git a/www/wiki/extensions/LocalisationUpdate/includes/finder/Finder.php b/www/wiki/extensions/LocalisationUpdate/includes/finder/Finder.php
new file mode 100644
index 00000000..acebc42a
--- /dev/null
+++ b/www/wiki/extensions/LocalisationUpdate/includes/finder/Finder.php
@@ -0,0 +1,124 @@
+<?php
+/**
+ * @file
+ * @author Niklas Laxström
+ * @license GPL-2.0-or-later
+ */
+
+namespace LocalisationUpdate;
+
+/**
+ * Interface for classes which provide list of components, which should be
+ * included for l10n updates.
+ */
+class Finder {
+
+ /**
+ * @var array
+ */
+ private $php;
+
+ /**
+ * @var array
+ */
+ private $json;
+
+ /**
+ * @var string
+ */
+ private $core;
+ /**
+ * @param array $php See $wgExtensionMessagesFiles
+ * @param array $json See $wgMessagesDirs
+ * @param string $core Absolute path to MediaWiki core
+ */
+ public function __construct( $php, $json, $core ) {
+ $this->php = $php;
+ $this->json = $json;
+ $this->core = $core;
+ }
+
+ /**
+ * @return array
+ */
+ public function getComponents() {
+ $components = [];
+
+ // For older versions of Mediawiki, pull json updates even though its still using php
+ if ( !isset( $this->json['core'] ) ) {
+ $components['core'] = [
+ 'repo' => 'mediawiki',
+ 'orig' => "file://{$this->core}/languages/messages/Messages*.php",
+ 'path' => 'languages/messages/i18n/*.json',
+ ];
+ }
+
+ foreach ( $this->json as $key => $value ) {
+ // Json should take priority if both exist
+ unset( $this->php[$key] );
+
+ foreach ( (array)$value as $subkey => $subvalue ) {
+ // Mediawiki core files
+ $matches = [];
+ if ( preg_match( '~/(?P<path>(?:includes|languages|resources)/.*)$~', $subvalue, $matches ) ) {
+ $components["$key-$subkey"] = [
+ 'repo' => 'mediawiki',
+ 'orig' => "file://$value/*.json",
+ 'path' => "{$matches['path']}/*.json",
+ ];
+ continue;
+ }
+
+ $item = $this->getItem( 'extensions', $subvalue );
+ if ( $item !== null ) {
+ $item['repo'] = 'extension';
+ $components["$key-$subkey"] = $item;
+ continue;
+ }
+
+ $item = $this->getItem( 'skins', $subvalue );
+ if ( $item !== null ) {
+ $item['repo'] = 'skin';
+ $components["$key-$subkey"] = $item;
+ continue;
+ }
+ }
+ }
+
+ foreach ( $this->php as $key => $value ) {
+ $matches = [];
+ $ok = preg_match( '~/extensions/(?P<name>[^/]+)/(?P<path>.*\.i18n\.php)$~', $value, $matches );
+ if ( !$ok ) {
+ continue;
+ }
+
+ $components[$key] = [
+ 'repo' => 'extension',
+ 'name' => $matches['name'],
+ 'orig' => "file://$value",
+ 'path' => $matches['path'],
+ ];
+ }
+
+ return $components;
+ }
+
+ /**
+ * @param string $dir extensions or skins
+ * @param string $subvalue
+ * @return array|null
+ */
+ private function getItem( $dir, $subvalue ) {
+ // This ignores magic, alias etc. non message files
+ $matches = [];
+ if ( !preg_match( "~/$dir/(?P<name>[^/]+)/(?P<path>.*)$~", $subvalue, $matches ) ) {
+ return null;
+ }
+
+ return [
+ 'name' => $matches['name'],
+ 'orig' => "file://$subvalue/*.json",
+ 'path' => "{$matches['path']}/*.json",
+ ];
+ }
+}
diff --git a/www/wiki/extensions/LocalisationUpdate/includes/reader/JSONReader.php b/www/wiki/extensions/LocalisationUpdate/includes/reader/JSONReader.php
new file mode 100644
index 00000000..e8613660
--- /dev/null
+++ b/www/wiki/extensions/LocalisationUpdate/includes/reader/JSONReader.php
@@ -0,0 +1,37 @@
+<?php
+/**
+ * @file
+ * @author Niklas Laxström
+ * @license GPL-2.0-or-later
+ */
+
+namespace LocalisationUpdate;
+
+/**
+ * Reads MediaWiki JSON i18n files.
+ */
+class JSONReader implements Reader {
+ /// @var string Language tag
+ protected $code;
+
+ public function __construct( $code = null ) {
+ $this->code = $code;
+ }
+
+ /**
+ * @param string $contents
+ *
+ * @return array
+ */
+ public function parse( $contents ) {
+ $messages = \FormatJson::decode( $contents, true );
+ unset( $messages['@metadata'] );
+
+ if ( $this->code ) {
+ return [ $this->code => $messages ];
+ }
+
+ // Assuming that the array is keyed by language codes
+ return $messages;
+ }
+}
diff --git a/www/wiki/extensions/LocalisationUpdate/includes/reader/PHPReader.php b/www/wiki/extensions/LocalisationUpdate/includes/reader/PHPReader.php
new file mode 100644
index 00000000..43e4db25
--- /dev/null
+++ b/www/wiki/extensions/LocalisationUpdate/includes/reader/PHPReader.php
@@ -0,0 +1,61 @@
+<?php
+/**
+ * @file
+ * @author Niklas Laxström
+ * @license GPL-2.0-or-later
+ */
+
+namespace LocalisationUpdate;
+
+/**
+ * Reads MediaWiki PHP i18n files.
+ */
+class PHPReader implements Reader {
+ /// @var string Language tag
+ protected $code;
+
+ public function __construct( $code = null ) {
+ $this->code = $code;
+ }
+
+ /**
+ * @param string $contents
+ *
+ * @return array
+ */
+ public function parse( $contents ) {
+ if ( strpos( $contents, '$messages' ) === false ) {
+ // This happens for some core languages that only have a fallback.
+ return [];
+ }
+
+ $php = $this->cleanupFile( $contents );
+ $reader = new \QuickArrayReader( "<?php $php" );
+ $messages = $reader->getVar( 'messages' );
+
+ if ( $this->code ) {
+ return [ $this->code => $messages ];
+ }
+
+ // Assuming that the array is keyed by language codes
+ return $messages;
+ }
+
+ /**
+ * Removes all unneeded content from a file and returns it.
+ *
+ * @param string $contents String
+ * @return string PHP code without PHP tags
+ */
+ protected function cleanupFile( $contents ) {
+ // We hate the windows vs linux linebreaks.
+ $contents = preg_replace( '/\r\n?/', "\n", $contents );
+
+ // We only want message arrays.
+ $results = [];
+ preg_match_all( '/\$messages(?:.*\s)*?\);/', $contents, $results );
+
+ // But we want them all in one string.
+ return implode( "\n\n", $results[0] );
+ }
+}
diff --git a/www/wiki/extensions/LocalisationUpdate/includes/reader/Reader.php b/www/wiki/extensions/LocalisationUpdate/includes/reader/Reader.php
new file mode 100644
index 00000000..fd37322e
--- /dev/null
+++ b/www/wiki/extensions/LocalisationUpdate/includes/reader/Reader.php
@@ -0,0 +1,21 @@
+<?php
+/**
+ * @file
+ * @author Niklas Laxström
+ * @license GPL-2.0-or-later
+ */
+
+namespace LocalisationUpdate;
+
+/**
+ * Interface for file readers.
+ */
+interface Reader {
+ /**
+ * Returns a list of messages indexed by language code. Example
+ * array( 'en' => array( 'key' => 'value' ) );
+ * @param string $contents File contents as a string.
+ * @return array
+ */
+ public function parse( $contents );
+}
diff --git a/www/wiki/extensions/LocalisationUpdate/includes/reader/ReaderFactory.php b/www/wiki/extensions/LocalisationUpdate/includes/reader/ReaderFactory.php
new file mode 100644
index 00000000..44bc5e40
--- /dev/null
+++ b/www/wiki/extensions/LocalisationUpdate/includes/reader/ReaderFactory.php
@@ -0,0 +1,38 @@
+<?php
+/**
+ * @file
+ * @author Niklas Laxström
+ * @license GPL-2.0-or-later
+ */
+
+namespace LocalisationUpdate;
+
+/**
+ * Constructs readers for files based on the names.
+ */
+class ReaderFactory {
+ /**
+ * Constructs a suitable reader for a given path.
+ * @param string $filename Usually a relative path to the file name.
+ * @return Reader
+ * @throws Exception
+ */
+ public function getReader( $filename ) {
+ if ( preg_match( '/i18n\.php$/', $filename ) ) {
+ return new PHPReader();
+ }
+
+ // Ugly hack for core i18n files
+ if ( preg_match( '/Messages(.*)\.php$/', $filename ) ) {
+ $code = \Language::getCodeFromFileName( basename( $filename ), 'Messages' );
+ return new PHPReader( $code );
+ }
+
+ if ( preg_match( '/\.json/', $filename ) ) {
+ $code = basename( $filename, '.json' );
+ return new JSONReader( $code );
+ }
+
+ throw new \Exception( "Unknown file format: " . $filename );
+ }
+}