diff options
author | Yaco <franco@reevo.org> | 2020-06-04 11:01:00 -0300 |
---|---|---|
committer | Yaco <franco@reevo.org> | 2020-06-04 11:01:00 -0300 |
commit | fc7369835258467bf97eb64f184b93691f9a9fd5 (patch) | |
tree | daabd60089d2dd76d9f5fb416b005fbe159c799d /www/wiki/extensions/LocalisationUpdate/includes |
first commit
Diffstat (limited to 'www/wiki/extensions/LocalisationUpdate/includes')
13 files changed, 970 insertions, 0 deletions
diff --git a/www/wiki/extensions/LocalisationUpdate/includes/LocalisationUpdate.php b/www/wiki/extensions/LocalisationUpdate/includes/LocalisationUpdate.php new file mode 100644 index 00000000..a0b5f044 --- /dev/null +++ b/www/wiki/extensions/LocalisationUpdate/includes/LocalisationUpdate.php @@ -0,0 +1,73 @@ +<?php + +/** + * Class for localization update hooks and static methods. + */ +class LocalisationUpdate { + /** + * Hook: LocalisationCacheRecacheFallback + * @param LocalisationCache $lc + * @param string $code + * @param array &$cache + * @return true + */ + public static function onRecacheFallback( LocalisationCache $lc, $code, array &$cache ) { + $dir = self::getDirectory(); + if ( !$dir ) { + return true; + } + + $fileName = "$dir/" . self::getFilename( $code ); + if ( is_readable( $fileName ) ) { + $data = FormatJson::decode( file_get_contents( $fileName ), true ); + $cache['messages'] = array_merge( $cache['messages'], $data ); + } + + return true; + } + + /** + * Hook: LocalisationCacheRecache + * @param LocalisationCache $lc + * @param string $code + * @param array &$cache + * @return true + */ + public static function onRecache( LocalisationCache $lc, $code, array &$cache ) { + $dir = self::getDirectory(); + if ( !$dir ) { + return true; + } + + $codeSequence = array_merge( [ $code ], $cache['fallbackSequence'] ); + foreach ( $codeSequence as $csCode ) { + $fileName = "$dir/" . self::getFilename( $csCode ); + $cache['deps'][] = new FileDependency( $fileName ); + } + + return true; + } + + /** + * Returns a directory where updated translations are stored. + * + * @return string|false False if not configured. + * @since 1.1 + */ + public static function getDirectory() { + global $wgLocalisationUpdateDirectory, $wgCacheDirectory; + + return $wgLocalisationUpdateDirectory ?: $wgCacheDirectory; + } + + /** + * Returns a filename where updated translations are stored. + * + * @param string $language Language tag + * @return string + * @since 1.1 + */ + public static function getFilename( $language ) { + return "l10nupdate-$language.json"; + } +} diff --git a/www/wiki/extensions/LocalisationUpdate/includes/QuickArrayReader.php b/www/wiki/extensions/LocalisationUpdate/includes/QuickArrayReader.php new file mode 100644 index 00000000..0314ee68 --- /dev/null +++ b/www/wiki/extensions/LocalisationUpdate/includes/QuickArrayReader.php @@ -0,0 +1,214 @@ +<?php + +/** + * Quickie parser class that can happily read the subset of PHP we need + * for our localization arrays safely. + * + * Still an order of magnitude slower than eval(). + */ +class QuickArrayReader { + private $vars = []; + + /** + * @param string $string + */ + function __construct( $string ) { + $scalarTypes = [ + T_LNUMBER => true, + T_DNUMBER => true, + T_STRING => true, + T_CONSTANT_ENCAPSED_STRING => true, + ]; + $skipTypes = [ + T_WHITESPACE => true, + T_COMMENT => true, + T_DOC_COMMENT => true, + ]; + $tokens = token_get_all( $string ); + $count = count( $tokens ); + for ( $i = 0; $i < $count; ) { + while ( isset( $skipTypes[$tokens[$i][0]] ) ) { + $i++; + } + switch ( $tokens[$i][0] ) { + case T_OPEN_TAG: + $i++; + break; + case T_VARIABLE: + // '$messages' -> 'messages' + $varname = trim( substr( $tokens[$i][1], 1 ) ); + $varindex = null; + + while ( isset( $skipTypes[$tokens[++$i][0]] ) ) { + } + + if ( $tokens[$i] === '[' ) { + while ( isset( $skipTypes[$tokens[++$i][0]] ) ) { + } + + if ( isset( $scalarTypes[$tokens[$i][0]] ) ) { + $varindex = $this->parseScalar( $tokens[$i] ); + } else { + throw $this->except( $tokens[$i], 'scalar index' ); + } + while ( isset( $skipTypes[$tokens[++$i][0]] ) ) { + } + + if ( $tokens[$i] !== ']' ) { + throw $this->except( $tokens[$i], ']' ); + } + while ( isset( $skipTypes[$tokens[++$i][0]] ) ) { + } + } + + if ( $tokens[$i] !== '=' ) { + throw $this->except( $tokens[$i], '=' ); + } + while ( isset( $skipTypes[$tokens[++$i][0]] ) ) { + } + + if ( isset( $scalarTypes[$tokens[$i][0]] ) ) { + $buildval = $this->parseScalar( $tokens[$i] ); + } elseif ( $tokens[$i][0] === T_ARRAY ) { + while ( isset( $skipTypes[$tokens[++$i][0]] ) ) { + } + if ( $tokens[$i] !== '(' ) { + throw $this->except( $tokens[$i], '(' ); + } + $buildval = []; + do { + while ( isset( $skipTypes[$tokens[++$i][0]] ) ) { + } + + if ( $tokens[$i] === ')' ) { + break; + } + if ( isset( $scalarTypes[$tokens[$i][0]] ) ) { + $key = $this->parseScalar( $tokens[$i] ); + } + while ( isset( $skipTypes[$tokens[++$i][0]] ) ) { + } + + if ( $tokens[$i][0] !== T_DOUBLE_ARROW ) { + throw $this->except( $tokens[$i], '=>' ); + } + while ( isset( $skipTypes[$tokens[++$i][0]] ) ) { + } + + if ( isset( $scalarTypes[$tokens[$i][0]] ) ) { + $val = $this->parseScalar( $tokens[$i] ); + } + wfSuppressWarnings(); + $buildval[$key] = $val; + wfRestoreWarnings(); + while ( isset( $skipTypes[$tokens[++$i][0]] ) ) { + } + + if ( $tokens[$i] === ',' ) { + continue; + } elseif ( $tokens[$i] === ')' ) { + break; + } else { + throw $this->except( $tokens[$i], ', or )' ); + } + } while ( true ); + } else { + throw $this->except( $tokens[$i], 'scalar or array' ); + } + if ( is_null( $varindex ) ) { + $this->vars[$varname] = $buildval; + } else { + wfSuppressWarnings(); + $this->vars[$varname][$varindex] = $buildval; + wfRestoreWarnings(); + } + while ( isset( $skipTypes[$tokens[++$i][0]] ) ) { + } + if ( $tokens[$i] !== ';' ) { + throw $this->except( $tokens[$i], ';' ); + } + $i++; + break; + default: + throw $this->except( $tokens[$i], 'open tag, whitespace, or variable.' ); + } + } + } + + /** + * @param string $got + * @param string $expected + * @return Exception + */ + private function except( $got, $expected ) { + if ( is_array( $got ) ) { + $got = token_name( $got[0] ) . " ('" . $got[1] . "')"; + } else { + $got = "'" . $got . "'"; + } + + return new Exception( "Expected $expected, got $got" ); + } + + /** + * Parse a scalar value in PHP + * + * @param string $token + * + * @return mixed Parsed value + */ + function parseScalar( $token ) { + if ( is_array( $token ) ) { + $str = $token[1]; + } else { + $str = $token; + } + if ( $str !== '' && $str[0] == '\'' ) { + // Single-quoted string + // @fixme trim() call is due to mystery bug where whitespace gets + // appended to the token; without it we ended up reading in the + // extra quote on the end! + return strtr( substr( trim( $str ), 1, -1 ), + [ '\\\'' => '\'', '\\\\' => '\\' ] ); + } + + wfSuppressWarnings(); + if ( $str !== '' && $str[0] == '"' ) { + // Double-quoted string + // @fixme trim() call is due to mystery bug where whitespace gets + // appended to the token; without it we ended up reading in the + // extra quote on the end! + wfRestoreWarnings(); + return stripcslashes( substr( trim( $str ), 1, -1 ) ); + } + wfRestoreWarnings(); + + if ( substr( $str, 0, 4 ) === 'true' ) { + return true; + } + + if ( substr( $str, 0, 5 ) === 'false' ) { + return false; + } + + if ( substr( $str, 0, 4 ) === 'null' ) { + return null; + } + + // Must be some kind of numeric value, so let PHP's weak typing + // be useful for a change + return $str; + } + + /** + * @param string $varname + * @return null|string|array + */ + function getVar( $varname ) { + if ( isset( $this->vars[$varname] ) ) { + return $this->vars[$varname]; + } else { + return null; + } + } +} diff --git a/www/wiki/extensions/LocalisationUpdate/includes/Updater.php b/www/wiki/extensions/LocalisationUpdate/includes/Updater.php new file mode 100644 index 00000000..863dc04a --- /dev/null +++ b/www/wiki/extensions/LocalisationUpdate/includes/Updater.php @@ -0,0 +1,204 @@ +<?php +/** + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +namespace LocalisationUpdate; + +/** + * Executes the localisation update. + */ +class Updater { + /** + * Whether the path is a pattern and thus we need to use appropriate + * code for fetching directories. + * + * @param string $path Url + * @return bool + */ + public function isDirectory( $path ) { + $filename = basename( $path ); + return strpos( $filename, '*' ) !== false; + } + + /** + * Expands repository relative path to full url with the given repository + * patterns. Extra variables in $info are used as variables and will be + * replaced the pattern. + * + * @param array $info Component information. + * @param array $repos Repository information. + * @return string + */ + public function expandRemotePath( $info, $repos ) { + $pattern = $repos[$info['repo']]; + unset( $info['repo'], $info['orig'] ); + + // This assumes all other keys are used as variables + // in the pattern. For example name -> %NAME%. + $keys = []; + foreach ( array_keys( $info ) as $key ) { + $keys[] = '%' . strtoupper( $key ) . '%'; + } + + $values = array_values( $info ); + return str_replace( $keys, $values, $pattern ); + } + + /** + * Parses translations from given list of files. + * + * @param ReaderFactory $readerFactory Factory to construct parsers. + * @param array $files List of files with their contents as array values. + * @return array List of translations indexed by language code. + */ + public function readMessages( ReaderFactory $readerFactory, array $files ) { + $messages = []; + + foreach ( $files as $filename => $contents ) { + $reader = $readerFactory->getReader( $filename ); + try { + $parsed = $reader->parse( $contents ); + } catch ( \Exception $e ) { + trigger_error( __METHOD__ . ": Unable to parse messages from $filename", E_USER_WARNING ); + continue; + } + + foreach ( $parsed as $code => $langMessages ) { + if ( !isset( $messages[$code] ) ) { + $messages[$code] = []; + } + $messages[$code] = array_merge( $messages[$code], $langMessages ); + } + + $c = array_sum( array_map( 'count', $parsed ) ); + // Useful for debugging, maybe create interface to pass this to the script? + # echo "$filename with " . get_class( $reader ) . " and $c\n"; + } + + return $messages; + } + + /** + * Find new and changed translations in $remote and returns them. + * + * @param array $origin + * @param array $remote + * @param array $blacklist Array of message keys to ignore, keys as as array keys. + * @return array + */ + public function findChangedTranslations( $origin, $remote, $blacklist = [] ) { + $changed = []; + foreach ( $remote as $key => $value ) { + if ( isset( $blacklist[$key] ) ) { + continue; + } + + if ( !isset( $origin[$key] ) || $value !== $origin[$key] ) { + $changed[$key] = $value; + } + } + return $changed; + } + + /** + * Fetches files from given Url pattern. + * + * @param FetcherFactory $factory Factory to construct fetchers. + * @param string $path Url to the file or pattern of files. + * @return array List of Urls with file contents as path. + */ + public function fetchFiles( FetcherFactory $factory, $path ) { + $fetcher = $factory->getFetcher( $path ); + + if ( $this->isDirectory( $path ) ) { + $files = $fetcher->fetchDirectory( $path ); + } else { + $files = [ $path => $fetcher->fetchFile( $path ) ]; + } + + // Remove files which were not found + return array_filter( $files ); + } + + public function execute( + Finder $finder, + ReaderFactory $readerFactory, + FetcherFactory $fetcherFactory, + array $repos, + $logger + ) { + $components = $finder->getComponents(); + + $updatedMessages = []; + + foreach ( $components as $key => $info ) { + $logger->logInfo( "Updating component $key" ); + + $originFiles = $this->fetchFiles( $fetcherFactory, $info['orig'] ); + $remotePath = $this->expandRemotePath( $info, $repos ); + try { + $remoteFiles = $this->fetchFiles( $fetcherFactory, $remotePath ); + } catch ( \Exception $e ) { + $logger->logError( __METHOD__ . ": Unable to fetch messages from $remotePath" ); + continue; + } + + if ( $remoteFiles === [] ) { + // Small optimization: if nothing to compare with, skip + continue; + } + + $originMessages = $this->readMessages( $readerFactory, $originFiles ); + $remoteMessages = $this->readMessages( $readerFactory, $remoteFiles ); + + if ( !isset( $remoteMessages['en'] ) ) { + // Could not find remote messages + continue; + } + + // If remote translation in English is not present or differs, we do not want + // translations for other languages for those messages, as they are either not + // used in this version of code or can be incompatible. + $forbiddenKeys = $this->findChangedTranslations( + $originMessages['en'], + $remoteMessages['en'] + ); + + // We never accept updates for English strings + unset( $originMessages['en'], $remoteMessages['en'] ); + + // message: string in all languages; translation: string in one language. + foreach ( $remoteMessages as $language => $remoteTranslations ) { + // Check for completely new languages + $originTranslations = []; + if ( isset( $originMessages[$language] ) ) { + $originTranslations = $originMessages[$language]; + } + + $updatedTranslations = $this->findChangedTranslations( + $originTranslations, + $remoteTranslations, + $forbiddenKeys + ); + + // Avoid empty arrays + if ( $updatedTranslations === [] ) { + continue; + } + + if ( !isset( $updatedMessages[$language] ) ) { + $updatedMessages[$language] = []; + } + + // In case of conflicts, which should not exist, this prefers the + // first translation seen. + $updatedMessages[$language] += $updatedTranslations; + } + } + + return $updatedMessages; + } +} diff --git a/www/wiki/extensions/LocalisationUpdate/includes/fetcher/Fetcher.php b/www/wiki/extensions/LocalisationUpdate/includes/fetcher/Fetcher.php new file mode 100644 index 00000000..62febac8 --- /dev/null +++ b/www/wiki/extensions/LocalisationUpdate/includes/fetcher/Fetcher.php @@ -0,0 +1,30 @@ +<?php +/** + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +namespace LocalisationUpdate; + +/** + * Interface for classes which fetch files over different protocols and ways. + */ +interface Fetcher { + /** + * Fetches a single resource. + * + * @param string $url + * @return bool|string False on failure. + */ + public function fetchFile( $url ); + + /** + * Fetch a list of resources. This has the benefit of being able to pick up + * new languages as they appear if languages are stored in separate files. + * + * @param string $pattern + * @return array + */ + public function fetchDirectory( $pattern ); +} diff --git a/www/wiki/extensions/LocalisationUpdate/includes/fetcher/FetcherFactory.php b/www/wiki/extensions/LocalisationUpdate/includes/fetcher/FetcherFactory.php new file mode 100644 index 00000000..9273c935 --- /dev/null +++ b/www/wiki/extensions/LocalisationUpdate/includes/fetcher/FetcherFactory.php @@ -0,0 +1,25 @@ +<?php +/** + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +namespace LocalisationUpdate; + +/** + * Constructs fetchers based on the repository urls. + */ +class FetcherFactory { + public function getFetcher( $path ) { + if ( strpos( $path, 'https://raw.github.com/' ) === 0 ) { + return new GitHubFetcher(); + } elseif ( strpos( $path, 'http://' ) === 0 ) { + return new HttpFetcher(); + } elseif ( strpos( $path, 'https://' ) === 0 ) { + return new HttpFetcher(); + } else { + return new FileSystemFetcher(); + } + } +} diff --git a/www/wiki/extensions/LocalisationUpdate/includes/fetcher/FileSystemFetcher.php b/www/wiki/extensions/LocalisationUpdate/includes/fetcher/FileSystemFetcher.php new file mode 100644 index 00000000..43daa65d --- /dev/null +++ b/www/wiki/extensions/LocalisationUpdate/includes/fetcher/FileSystemFetcher.php @@ -0,0 +1,47 @@ +<?php +/** + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +namespace LocalisationUpdate; + +/** + * Accesses file system directly. + */ +class FileSystemFetcher implements Fetcher { + /** + * @param string $url + * + * @return bool|string + */ + public function fetchFile( $url ) { + // Remove the protocol prefix + $url = preg_replace( '~^file://~', '', $url ); + + if ( !is_readable( $url ) ) { + return false; + } + + return file_get_contents( $url ); + } + + /** + * @param string $pattern + * + * @return array + */ + public function fetchDirectory( $pattern ) { + // Remove the protocol prefix + $pattern = preg_replace( '~^file://~', '', $pattern ); + + $data = []; + foreach ( glob( $pattern ) as $file ) { + if ( is_readable( $file ) ) { + $data["file://$file"] = file_get_contents( $file ); + } + } + return $data; + } +} diff --git a/www/wiki/extensions/LocalisationUpdate/includes/fetcher/GitHubFetcher.php b/www/wiki/extensions/LocalisationUpdate/includes/fetcher/GitHubFetcher.php new file mode 100644 index 00000000..f8bea9c1 --- /dev/null +++ b/www/wiki/extensions/LocalisationUpdate/includes/fetcher/GitHubFetcher.php @@ -0,0 +1,47 @@ +<?php +/** + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +namespace LocalisationUpdate; + +/** + * This class uses GitHub api to obtain a list of files present in a directory + * to avoid fetching files that don't exist. + * + * @todo Could use file hashes to 1) avoid fetching files with same hash as + * the source. 2) avoid fetching files which haven't changed since last check + * if we store them. + */ +class GitHubFetcher extends HttpFetcher { + /** + * @param string $pattern + * + * @return array + * @throws \Exception + */ + public function fetchDirectory( $pattern ) { + $domain = preg_quote( 'https://raw.github.com/', '~' ); + $p = "~^$domain(?P<org>[^/]+)/(?P<repo>[^/]+)/(?P<branch>[^/]+)/(?P<path>.+)/.+$~"; + preg_match( $p, $pattern, $m ); + + $apiURL = "https://api.github.com/repos/{$m['org']}/{$m['repo']}/contents/{$m['path']}"; + $json = \Http::get( $apiURL ); + if ( !$json ) { + throw new \Exception( "Unable to get directory listing for {$m['org']}/{$m['repo']}" ); + } + + $files = []; + $json = \FormatJson::decode( $json, true ); + foreach ( $json as $fileinfo ) { + $fileurl = dirname( $pattern ) . '/' . $fileinfo['name']; + $file = $this->fetchFile( $fileurl ); + if ( $file ) { + $files[$fileurl] = $file; + } + } + return $files; + } +} diff --git a/www/wiki/extensions/LocalisationUpdate/includes/fetcher/HttpFetcher.php b/www/wiki/extensions/LocalisationUpdate/includes/fetcher/HttpFetcher.php new file mode 100644 index 00000000..72988e98 --- /dev/null +++ b/www/wiki/extensions/LocalisationUpdate/includes/fetcher/HttpFetcher.php @@ -0,0 +1,49 @@ +<?php +/** + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +namespace LocalisationUpdate; + +/** + * Fetches files over HTTP(s). + */ +class HttpFetcher implements Fetcher { + /** + * @param string $url + * + * @return bool|string + */ + public function fetchFile( $url ) { + return \Http::get( $url ); + } + + /** + * This is horribly inefficient. Subclasses have more efficient + * implementation of this. + * @param string $pattern + * @return array + */ + public function fetchDirectory( $pattern ) { + $files = []; + + $languages = \Language::fetchLanguageNames( null, 'mwfile' ); + + foreach ( array_keys( $languages ) as $code ) { + // Hack for core + if ( strpos( $pattern, 'Messages*.php' ) !== false ) { + $code = ucfirst( strtr( $code, '-', '_' ) ); + } + + $url = str_replace( '*', $code, $pattern ); + $file = $this->fetchFile( $url ); + if ( $file ) { + $files[$url] = $file; + } + } + + return $files; + } +} diff --git a/www/wiki/extensions/LocalisationUpdate/includes/finder/Finder.php b/www/wiki/extensions/LocalisationUpdate/includes/finder/Finder.php new file mode 100644 index 00000000..acebc42a --- /dev/null +++ b/www/wiki/extensions/LocalisationUpdate/includes/finder/Finder.php @@ -0,0 +1,124 @@ +<?php +/** + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +namespace LocalisationUpdate; + +/** + * Interface for classes which provide list of components, which should be + * included for l10n updates. + */ +class Finder { + + /** + * @var array + */ + private $php; + + /** + * @var array + */ + private $json; + + /** + * @var string + */ + private $core; + /** + * @param array $php See $wgExtensionMessagesFiles + * @param array $json See $wgMessagesDirs + * @param string $core Absolute path to MediaWiki core + */ + public function __construct( $php, $json, $core ) { + $this->php = $php; + $this->json = $json; + $this->core = $core; + } + + /** + * @return array + */ + public function getComponents() { + $components = []; + + // For older versions of Mediawiki, pull json updates even though its still using php + if ( !isset( $this->json['core'] ) ) { + $components['core'] = [ + 'repo' => 'mediawiki', + 'orig' => "file://{$this->core}/languages/messages/Messages*.php", + 'path' => 'languages/messages/i18n/*.json', + ]; + } + + foreach ( $this->json as $key => $value ) { + // Json should take priority if both exist + unset( $this->php[$key] ); + + foreach ( (array)$value as $subkey => $subvalue ) { + // Mediawiki core files + $matches = []; + if ( preg_match( '~/(?P<path>(?:includes|languages|resources)/.*)$~', $subvalue, $matches ) ) { + $components["$key-$subkey"] = [ + 'repo' => 'mediawiki', + 'orig' => "file://$value/*.json", + 'path' => "{$matches['path']}/*.json", + ]; + continue; + } + + $item = $this->getItem( 'extensions', $subvalue ); + if ( $item !== null ) { + $item['repo'] = 'extension'; + $components["$key-$subkey"] = $item; + continue; + } + + $item = $this->getItem( 'skins', $subvalue ); + if ( $item !== null ) { + $item['repo'] = 'skin'; + $components["$key-$subkey"] = $item; + continue; + } + } + } + + foreach ( $this->php as $key => $value ) { + $matches = []; + $ok = preg_match( '~/extensions/(?P<name>[^/]+)/(?P<path>.*\.i18n\.php)$~', $value, $matches ); + if ( !$ok ) { + continue; + } + + $components[$key] = [ + 'repo' => 'extension', + 'name' => $matches['name'], + 'orig' => "file://$value", + 'path' => $matches['path'], + ]; + } + + return $components; + } + + /** + * @param string $dir extensions or skins + * @param string $subvalue + * @return array|null + */ + private function getItem( $dir, $subvalue ) { + // This ignores magic, alias etc. non message files + $matches = []; + if ( !preg_match( "~/$dir/(?P<name>[^/]+)/(?P<path>.*)$~", $subvalue, $matches ) ) { + return null; + } + + return [ + 'name' => $matches['name'], + 'orig' => "file://$subvalue/*.json", + 'path' => "{$matches['path']}/*.json", + ]; + } +} diff --git a/www/wiki/extensions/LocalisationUpdate/includes/reader/JSONReader.php b/www/wiki/extensions/LocalisationUpdate/includes/reader/JSONReader.php new file mode 100644 index 00000000..e8613660 --- /dev/null +++ b/www/wiki/extensions/LocalisationUpdate/includes/reader/JSONReader.php @@ -0,0 +1,37 @@ +<?php +/** + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +namespace LocalisationUpdate; + +/** + * Reads MediaWiki JSON i18n files. + */ +class JSONReader implements Reader { + /// @var string Language tag + protected $code; + + public function __construct( $code = null ) { + $this->code = $code; + } + + /** + * @param string $contents + * + * @return array + */ + public function parse( $contents ) { + $messages = \FormatJson::decode( $contents, true ); + unset( $messages['@metadata'] ); + + if ( $this->code ) { + return [ $this->code => $messages ]; + } + + // Assuming that the array is keyed by language codes + return $messages; + } +} diff --git a/www/wiki/extensions/LocalisationUpdate/includes/reader/PHPReader.php b/www/wiki/extensions/LocalisationUpdate/includes/reader/PHPReader.php new file mode 100644 index 00000000..43e4db25 --- /dev/null +++ b/www/wiki/extensions/LocalisationUpdate/includes/reader/PHPReader.php @@ -0,0 +1,61 @@ +<?php +/** + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +namespace LocalisationUpdate; + +/** + * Reads MediaWiki PHP i18n files. + */ +class PHPReader implements Reader { + /// @var string Language tag + protected $code; + + public function __construct( $code = null ) { + $this->code = $code; + } + + /** + * @param string $contents + * + * @return array + */ + public function parse( $contents ) { + if ( strpos( $contents, '$messages' ) === false ) { + // This happens for some core languages that only have a fallback. + return []; + } + + $php = $this->cleanupFile( $contents ); + $reader = new \QuickArrayReader( "<?php $php" ); + $messages = $reader->getVar( 'messages' ); + + if ( $this->code ) { + return [ $this->code => $messages ]; + } + + // Assuming that the array is keyed by language codes + return $messages; + } + + /** + * Removes all unneeded content from a file and returns it. + * + * @param string $contents String + * @return string PHP code without PHP tags + */ + protected function cleanupFile( $contents ) { + // We hate the windows vs linux linebreaks. + $contents = preg_replace( '/\r\n?/', "\n", $contents ); + + // We only want message arrays. + $results = []; + preg_match_all( '/\$messages(?:.*\s)*?\);/', $contents, $results ); + + // But we want them all in one string. + return implode( "\n\n", $results[0] ); + } +} diff --git a/www/wiki/extensions/LocalisationUpdate/includes/reader/Reader.php b/www/wiki/extensions/LocalisationUpdate/includes/reader/Reader.php new file mode 100644 index 00000000..fd37322e --- /dev/null +++ b/www/wiki/extensions/LocalisationUpdate/includes/reader/Reader.php @@ -0,0 +1,21 @@ +<?php +/** + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +namespace LocalisationUpdate; + +/** + * Interface for file readers. + */ +interface Reader { + /** + * Returns a list of messages indexed by language code. Example + * array( 'en' => array( 'key' => 'value' ) ); + * @param string $contents File contents as a string. + * @return array + */ + public function parse( $contents ); +} diff --git a/www/wiki/extensions/LocalisationUpdate/includes/reader/ReaderFactory.php b/www/wiki/extensions/LocalisationUpdate/includes/reader/ReaderFactory.php new file mode 100644 index 00000000..44bc5e40 --- /dev/null +++ b/www/wiki/extensions/LocalisationUpdate/includes/reader/ReaderFactory.php @@ -0,0 +1,38 @@ +<?php +/** + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +namespace LocalisationUpdate; + +/** + * Constructs readers for files based on the names. + */ +class ReaderFactory { + /** + * Constructs a suitable reader for a given path. + * @param string $filename Usually a relative path to the file name. + * @return Reader + * @throws Exception + */ + public function getReader( $filename ) { + if ( preg_match( '/i18n\.php$/', $filename ) ) { + return new PHPReader(); + } + + // Ugly hack for core i18n files + if ( preg_match( '/Messages(.*)\.php$/', $filename ) ) { + $code = \Language::getCodeFromFileName( basename( $filename ), 'Messages' ); + return new PHPReader( $code ); + } + + if ( preg_match( '/\.json/', $filename ) ) { + $code = basename( $filename, '.json' ); + return new JSONReader( $code ); + } + + throw new \Exception( "Unknown file format: " . $filename ); + } +} |