diff options
author | Yaco <franco@reevo.org> | 2020-06-04 11:01:00 -0300 |
---|---|---|
committer | Yaco <franco@reevo.org> | 2020-06-04 11:01:00 -0300 |
commit | fc7369835258467bf97eb64f184b93691f9a9fd5 (patch) | |
tree | daabd60089d2dd76d9f5fb416b005fbe159c799d /www/wiki/extensions/Translate/ffs |
first commit
Diffstat (limited to 'www/wiki/extensions/Translate/ffs')
19 files changed, 4980 insertions, 0 deletions
diff --git a/www/wiki/extensions/Translate/ffs/AmdFFS.php b/www/wiki/extensions/Translate/ffs/AmdFFS.php new file mode 100644 index 00000000..bd759e8f --- /dev/null +++ b/www/wiki/extensions/Translate/ffs/AmdFFS.php @@ -0,0 +1,174 @@ +<?php +/** + * Support for the AMD i18n message file format (used by require.js and Dojo). See: + * http://requirejs.org/docs/api.html#i18n + * + * A limitation is that it only accepts json compatible structures inside the define + * wrapper function. For example the following example is not ok since there are no + * quotation marks around the keys: + * define({ + * key1: "somevalue", + * key2: "anothervalue" + * }); + * + * Instead it should look like: + * define({ + * "key1": "somevalue", + * "key2": "anothervalue" + * }); + * + * It also supports the top-level bundle with a root construction and language indicators. + * The following example will give the same messages as above: + * define({ + * "root": { + * "key1": "somevalue", + * "key2": "anothervalue" + * }, + * "sv": true + * }); + * + * Note that it does not support exporting with the root construction, there is only support + * for reading it. However, this is not a serious limitation as Translatewiki doesn't export + * the base language. + * + * @file + * @author Matthias Palmér + * @copyright Copyright © 2011-2015, MetaSolutions AB + * @license GPL-2.0-or-later + */ + +/** + * AmdFFS implements a message format where messages are encoded + * as key-value pairs in JSON objects wrapped in a define call. + * + * @ingroup FFS + * @since 2015.02 + */ +class AmdFFS extends SimpleFFS { + + /** + * @param string $data + * @return bool + */ + public static function isValid( $data ) { + $data = self::extractMessagePart( $data ); + return is_array( FormatJson::decode( $data, /*as array*/true ) ); + } + + public function getFileExtensions() { + return [ '.js' ]; + } + + /** + * @param string $data + * @return array Parsed data. + */ + public function readFromVariable( $data ) { + $authors = self::extractAuthors( $data ); + $data = self::extractMessagePart( $data ); + $messages = (array)FormatJson::decode( $data, /*as array*/true ); + $metadata = []; + + // Take care of regular language bundles, as well as the root bundle. + if ( isset( $messages['root'] ) ) { + $messages = $this->group->getMangler()->mangle( $messages['root'] ); + } else { + $messages = $this->group->getMangler()->mangle( $messages ); + } + + return [ + 'MESSAGES' => $messages, + 'AUTHORS' => $authors, + 'METADATA' => $metadata, + ]; + } + + /** + * @param MessageCollection $collection + * @return string + */ + protected function writeReal( MessageCollection $collection ) { + $messages = []; + $mangler = $this->group->getMangler(); + + /** @var ThinMessage $m */ + foreach ( $collection as $key => $m ) { + $value = $m->translation(); + if ( $value === null ) { + continue; + } + + if ( $m->hasTag( 'fuzzy' ) ) { + $value = str_replace( TRANSLATE_FUZZY, '', $value ); + } + + $key = $mangler->unmangle( $key ); + $messages[$key] = $value; + } + + // Do not create empty files + if ( !count( $messages ) ) { + return ''; + } + $header = $this->header( $collection->code, $collection->getAuthors() ); + return $header . FormatJson::encode( $messages, "\t", FormatJson::UTF8_OK ) . ");\n"; + } + + /** + * @param string $data + * @return string of JSON + */ + private static function extractMessagePart( $data ) { + // Find the start and end of the data section (enclosed in the define function call). + $dataStart = strpos( $data, 'define(' ) + 6; + $dataEnd = strrpos( $data, ')' ); + + // Strip everything outside of the data section. + return substr( $data, $dataStart + 1, $dataEnd - $dataStart - 1 ); + } + + /** + * @param string $data + * @return array + */ + private static function extractAuthors( $data ) { + preg_match_all( '~\n \* - (.+)~', $data, $result ); + return $result[1]; + } + + /** + * @param string $code + * @param array $authors + * @return string + */ + private function header( $code, $authors ) { + global $wgSitename; + + $name = TranslateUtils::getLanguageName( $code ); + $authorsList = $this->authorsList( $authors ); + + return <<<EOT +/** + * Messages for $name + * Exported from $wgSitename + * +{$authorsList} + */ +define( +EOT; + } + + /** + * @param string[] $authors + * @return string + */ + private function authorsList( array $authors ) { + if ( $authors === [] ) { + return ''; + } + + $prefix = ' * - '; + $authorList = implode( "\n$prefix", $authors ); + return " * Translators:\n$prefix$authorList"; + } +} diff --git a/www/wiki/extensions/Translate/ffs/AndroidXmlFFS.php b/www/wiki/extensions/Translate/ffs/AndroidXmlFFS.php new file mode 100644 index 00000000..2e947c61 --- /dev/null +++ b/www/wiki/extensions/Translate/ffs/AndroidXmlFFS.php @@ -0,0 +1,185 @@ +<?php +/** + * Support for XML translation format used by Android. + * + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +/** + * Support for XML translation format used by Android. + * @since 2012-08-19 + * @ingroup FFS + */ +class AndroidXmlFFS extends SimpleFFS { + public function __construct( FileBasedMessageGroup $group ) { + parent::__construct( $group ); + $this->flattener = $this->getFlattener(); + } + + public function supportsFuzzy() { + return 'yes'; + } + + public function getFileExtensions() { + return [ '.xml' ]; + } + + /** + * @param string $data + * @return array Parsed data. + */ + public function readFromVariable( $data ) { + $reader = new SimpleXMLElement( $data ); + + $messages = []; + $mangler = $this->group->getMangler(); + + /** @var SimpleXMLElement $element */ + foreach ( $reader as $element ) { + $key = (string)$element['name']; + + if ( $element->getName() === 'string' ) { + $value = $this->readElementContents( $element ); + } elseif ( $element->getName() === 'plurals' ) { + $forms = []; + foreach ( $element as $item ) { + $forms[(string)$item['quantity']] = $this->readElementContents( $item ); + } + $value = $this->flattener->flattenCLDRPlurals( $forms ); + } else { + wfDebug( __METHOD__ . ': Unknown XML element name.' ); + continue; + } + + if ( isset( $element['fuzzy'] ) && (string)$element['fuzzy'] === 'true' ) { + $value = TRANSLATE_FUZZY . $value; + } + + $messages[$key] = $value; + } + + return [ + 'AUTHORS' => $this->scrapeAuthors( $data ), + 'MESSAGES' => $mangler->mangle( $messages ), + ]; + } + + protected function scrapeAuthors( $string ) { + $match = []; + preg_match( '~<!-- Authors:\n((?:\* .*\n)*)-->~', $string, $match ); + if ( !$match ) { + return []; + } + + $authors = $matches = []; + preg_match_all( '~\* (.*)~', $match[ 1 ], $matches ); + foreach ( $matches[1] as $author ) { + // PHP7: \u{2011} + $authors[] = str_replace( "\xE2\x80\x91\xE2\x80\x91", '--', $author ); + } + return $authors; + } + + protected function readElementContents( $element ) { + return stripcslashes( (string)$element ); + } + + protected function formatElementContents( $contents ) { + // Kudos to the brilliant person who invented this braindead file format + $escaped = addcslashes( $contents, '"\'' ); + if ( substr( $escaped, 0, 1 ) === '@' ) { + // '@' at beginning of string refers to another string by name. + // Add backslash to escape it too. + $escaped = '\\' . $escaped; + } + // All html entities seen would be inserted by translators themselves. + // Treat them as plain text. + $escaped = str_replace( '&', '&', $escaped ); + + // Newlines must be escaped + $escaped = str_replace( "\n", '\n', $escaped ); + return $escaped; + } + + protected function doAuthors( MessageCollection $collection ) { + $authors = $collection->getAuthors(); + $authors = $this->filterAuthors( $authors, $collection->code ); + + if ( !$authors ) { + return ''; + } + + $output = "\n<!-- Authors:\n"; + + foreach ( $authors as $author ) { + // Since -- is not allowed in XML comments, we rewrite them to + // U+2011 (non-breaking hyphen). PHP7: \u{2011} + $author = str_replace( '--', "\xE2\x80\x91\xE2\x80\x91", $author ); + $output .= "* $author\n"; + } + + $output .= "-->\n"; + + return $output; + } + + protected function writeReal( MessageCollection $collection ) { + $template = '<?xml version="1.0" encoding="utf-8"?>'; + $template .= $this->doAuthors( $collection ); + $template .= '<resources></resources>'; + + $writer = new SimpleXMLElement( $template ); + $mangler = $this->group->getMangler(); + + $collection->filter( 'hastranslation', false ); + if ( count( $collection ) === 0 ) { + return ''; + } + + /** + * @var $m TMessage + */ + foreach ( $collection as $key => $m ) { + $key = $mangler->unmangle( $key ); + + $value = $m->translation(); + $value = str_replace( TRANSLATE_FUZZY, '', $value ); + + $plurals = $this->flattener->unflattenCLDRPlurals( '', $value ); + + if ( $plurals === false ) { + $element = $writer->addChild( 'string', $this->formatElementContents( $value ) ); + } else { + $element = $writer->addChild( 'plurals' ); + foreach ( $plurals as $quantity => $content ) { + $item = $element->addChild( 'item', $this->formatElementContents( $content ) ); + $item->addAttribute( 'quantity', $quantity ); + } + } + + $element->addAttribute( 'name', $key ); + // This is non-standard + if ( $m->hasTag( 'fuzzy' ) ) { + $element->addAttribute( 'fuzzy', 'true' ); + } + } + + // Make the output pretty with DOMDocument + $dom = new DOMDocument( '1.0' ); + $dom->formatOutput = true; + $dom->loadXML( $writer->asXML() ); + + return $dom->saveXML(); + } + + protected function getFlattener() { + $flattener = new ArrayFlattener( '', true ); + return $flattener; + } + + public function isContentEqual( $a, $b ) { + return $this->flattener->compareContent( $a, $b ); + } +} diff --git a/www/wiki/extensions/Translate/ffs/AppleFFS.php b/www/wiki/extensions/Translate/ffs/AppleFFS.php new file mode 100644 index 00000000..de0f79e3 --- /dev/null +++ b/www/wiki/extensions/Translate/ffs/AppleFFS.php @@ -0,0 +1,214 @@ +<?php + +/** + * AppleFFS class implements support for Apple .strings files. + * This class reads and writes only UTF-8 files. + * + * This class has not yet been battle-tested, so beware. + * + * @author Brion Vibber <bvibber@wikimedia.org> + * + * @ingroup FFS + * @since 2014.02 + */ +class AppleFFS extends SimpleFFS { + public function supportsFuzzy() { + return 'write'; + } + + public function getFileExtensions() { + return [ '.strings' ]; + } + + /** + * @param string $data + * @return array Parsed data. + * @throws MWException + */ + public function readFromVariable( $data ) { + $lines = explode( "\n", $data ); + $authors = $messages = []; + $linecontinuation = false; + + $value = ''; + foreach ( $lines as $line ) { + if ( $linecontinuation ) { + $linecontinuation = false; + $valuecont = $line; + $value .= $valuecont; + } else { + if ( $line === '' ) { + continue; + } + + if ( substr( $line, 0, 2 ) === '//' ) { + // Single-line comment + $match = []; + $ok = preg_match( '~//\s*Author:\s*(.*)~', $line, $match ); + if ( $ok ) { + $authors[] = $match[1]; + } + continue; + } + + if ( substr( $line, 0, 2 ) === '/*' ) { + if ( strpos( $line, '*/', 2 ) === false ) { + $linecontinuation = true; + } + continue; + } + + list( $key, $value ) = self::readRow( $line ); + $messages[$key] = $value; + } + } + + $messages = $this->group->getMangler()->mangle( $messages ); + + return [ + 'AUTHORS' => $authors, + 'MESSAGES' => $messages, + ]; + } + + /** + * Parses non-empty strings file row to key and value. + * @param string $line + * @throws MWException + * @return array array( string $key, string $val ) + */ + public static function readRow( $line ) { + $match = []; + if ( preg_match( '/^"((?:\\\"|[^"])*)"\s*=\s*"((?:\\\"|[^"])*)"\s*;\s*$/', $line, $match ) ) { + $key = self::unescapeString( $match[1] ); + $value = self::unescapeString( $match[2] ); + if ( $key === '' ) { + throw new MWException( "Empty key in line $line" ); + } + return [ $key, $value ]; + } else { + throw new MWException( "Unrecognized line format: $line" ); + } + } + + /** + * @param MessageCollection $collection + * @return string + */ + protected function writeReal( MessageCollection $collection ) { + $header = $this->doHeader( $collection ); + $header .= $this->doAuthors( $collection ); + $header .= "\n"; + + $output = ''; + $mangler = $this->group->getMangler(); + + /** + * @var TMessage $m + */ + foreach ( $collection as $key => $m ) { + $value = $m->translation(); + $value = str_replace( TRANSLATE_FUZZY, '', $value ); + + if ( $value === '' ) { + continue; + } + + // Just to give an overview of translation quality. + if ( $m->hasTag( 'fuzzy' ) ) { + $output .= "// Fuzzy\n"; + } + + $key = $mangler->unmangle( $key ); + $output .= self::writeRow( $key, $value ); + } + + if ( $output ) { + $data = $header . $output; + } else { + $data = $header; + } + + return $data; + } + + /** + * Writes well-formed properties file row with key and value. + * @param string $key + * @param string $value + * @return string + */ + public static function writeRow( $key, $value ) { + return self::quoteString( $key ) . ' = ' . self::quoteString( $value ) . ';' . "\n"; + } + + /** + * Quote and escape Obj-C-style strings for .strings format. + * + * @param string $str + * @return string + */ + protected static function quoteString( $str ) { + return '"' . self::escapeString( $str ) . '"'; + } + + /** + * Escape Obj-C-style strings; use backslash-escapes etc. + * + * @param string $str + * @return string + */ + protected static function escapeString( $str ) { + $str = addcslashes( $str, '\\"' ); + $str = str_replace( "\n", '\\n', $str ); + return $str; + } + + /** + * Unescape Obj-C-style strings; can include backslash-escapes + * + * @todo support \UXXXX + * + * @param string $str + * @return string + */ + protected static function unescapeString( $str ) { + return stripcslashes( $str ); + } + + /** + * @param MessageCollection $collection + * @return string + */ + protected function doHeader( MessageCollection $collection ) { + if ( isset( $this->extra['header'] ) ) { + $output = $this->extra['header']; + } else { + global $wgSitename; + + $code = $collection->code; + $name = TranslateUtils::getLanguageName( $code ); + $native = TranslateUtils::getLanguageName( $code, $code ); + $output = "// Messages for $name ($native)\n"; + $output .= "// Exported from $wgSitename\n"; + } + + return $output; + } + + /** + * @param MessageCollection $collection + * @return string + */ + protected function doAuthors( MessageCollection $collection ) { + $output = ''; + $authors = $collection->getAuthors(); + $authors = $this->filterAuthors( $authors, $collection->code ); + + foreach ( $authors as $author ) { + $output .= "// Author: $author\n"; + } + + return $output; + } +} diff --git a/www/wiki/extensions/Translate/ffs/DtdFFS.php b/www/wiki/extensions/Translate/ffs/DtdFFS.php new file mode 100644 index 00000000..aed82fca --- /dev/null +++ b/www/wiki/extensions/Translate/ffs/DtdFFS.php @@ -0,0 +1,113 @@ +<?php +/** + * Implements FFS for DTD file format. + * + * @file + * @author Guillaume Duhamel + * @author Niklas Laxström + * @author Siebrand Mazeland + * @copyright Copyright © 2009-2010, Guillaume Duhamel, Niklas Laxström, Siebrand Mazeland + * @license GPL-2.0-or-later + */ + +/** + * File format support for DTD. + * + * @ingroup FFS + */ +class DtdFFS extends SimpleFFS { + public function getFileExtensions() { + return [ '.dtd' ]; + } + + /** + * @param string $data + * @return array Parsed data. + */ + public function readFromVariable( $data ) { + preg_match_all( ',# Author: ([^\n]+)\n,', $data, $matches ); + $authors = []; + + $count = count( $matches[1] ); + for ( $i = 0; $i < $count; $i++ ) { + $authors[] = $matches[1][$i]; + } + + preg_match_all( ',<!ENTITY[ ]+([^ ]+)\s+"([^"]+)"[^>]*>,', $data, $matches ); + + $keys = $matches[1]; + $values = $matches[2]; + + $messages = []; + + $count = count( $matches[1] ); + for ( $i = 0; $i < $count; $i++ ) { + $messages[$keys[$i]] = str_replace( + [ '"', '"', ''' ], + [ '"', '"', "'" ], + $values[$i] ); + } + + $messages = $this->group->getMangler()->mangle( $messages ); + + return [ + 'AUTHORS' => $authors, + 'MESSAGES' => $messages, + ]; + } + + protected function writeReal( MessageCollection $collection ) { + $collection->loadTranslations(); + + $header = "<!--\n"; + $header .= $this->doHeader( $collection ); + $header .= $this->doAuthors( $collection ); + $header .= "-->\n"; + + $output = ''; + $mangler = $this->group->getMangler(); + + /** + * @var TMessage $m + */ + foreach ( $collection as $key => $m ) { + $key = $mangler->unmangle( $key ); + $trans = $m->translation(); + $trans = str_replace( TRANSLATE_FUZZY, '', $trans ); + + if ( $trans === '' ) { + continue; + } + + $trans = str_replace( '"', '"', $trans ); + $output .= "<!ENTITY $key \"$trans\">\n"; + } + + return $output ? $header . $output : false; + } + + protected function doHeader( MessageCollection $collection ) { + global $wgSitename; + + $code = $collection->code; + $name = TranslateUtils::getLanguageName( $code ); + $native = TranslateUtils::getLanguageName( $code, $code ); + + $output = "# Messages for $name ($native)\n"; + $output .= "# Exported from $wgSitename\n\n"; + + return $output; + } + + protected function doAuthors( MessageCollection $collection ) { + $output = ''; + $authors = $collection->getAuthors(); + $authors = $this->filterAuthors( $authors, $collection->code ); + + foreach ( $authors as $author ) { + $output .= "# Author: $author\n"; + } + + return $output; + } +} diff --git a/www/wiki/extensions/Translate/ffs/FFS.php b/www/wiki/extensions/Translate/ffs/FFS.php new file mode 100644 index 00000000..2c89f736 --- /dev/null +++ b/www/wiki/extensions/Translate/ffs/FFS.php @@ -0,0 +1,109 @@ +<?php +/** + * File format support classes. + * + * These classes handle parsing and generating various different + * file formats where translation messages are stored. + * + * @file + * @defgroup FFS File format support + * @author Niklas Laxström + * @copyright Copyright © 2008-2013, Niklas Laxström + * @license GPL-2.0-or-later + */ + +/** + * Interface for file system support classes. + * @ingroup FFS + */ +interface FFS { + public function __construct( FileBasedMessageGroup $group ); + + /** + * Set the file's location in the system + * @param string $target Filesystem path for exported files. + */ + public function setWritePath( $target ); + + /** + * Get the file's location in the system + * @return string + */ + public function getWritePath(); + + /** + * Will parse messages, authors, and any custom data from the file + * and return it in associative array with keys like \c AUTHORS and + * \c MESSAGES. + * @param string $code Language code. + * @return array|bool Parsed data or false on failure. + */ + public function read( $code ); + + /** + * Same as read(), but takes the data as a parameter. The caller + * is supposed to know in what language the translations are. + * @param string $data Formatted messages. + * @return array Parsed data. + */ + public function readFromVariable( $data ); + + /** + * Writes to the location provided with setWritePath and group specific + * directory structure. Exports translations included in the given + * collection with any special handling needed. + * @param MessageCollection $collection + */ + public function write( MessageCollection $collection ); + + /** + * Quick shortcut for getting the plain exported data. + * Same as write(), but returns the output instead of writing it into + * a file. + * @param MessageCollection $collection + * @return string + */ + public function writeIntoVariable( MessageCollection $collection ); + + /** + * Query the capabilities of this FFS. Allowed values are: + * - yes + * - write (ignored on read) + * - no (stripped on write) + * @return string + * @since 2013-03-05 + */ + public function supportsFuzzy(); + + /** + * Checks whether two strings are equal. Sometimes same content might + * have multiple representations. The main case are inline plurals, + * which in some formats require expansion at export time. + * + * @param string $a + * @param string $b + * @return bool + * @since 2016.11 + */ + public function isContentEqual( $a, $b ); + + /** + * Return the commonly used file extensions for these formats. + * Include the dot. + * @return string[] + * @since 2013-04 + */ + public function getFileExtensions(); + + /** + * Allows to skip writing the export output into a file. This is useful + * to skip updates that would only update irrelevant parts, such as the + * timestamp of the export. + * + * @param string $a The existing content. + * @param string $b The new export content. + * @return bool + * @since 2017.04 + */ + public function shouldOverwrite( $a, $b ); +} diff --git a/www/wiki/extensions/Translate/ffs/FlatPhpFFS.php b/www/wiki/extensions/Translate/ffs/FlatPhpFFS.php new file mode 100644 index 00000000..830b437b --- /dev/null +++ b/www/wiki/extensions/Translate/ffs/FlatPhpFFS.php @@ -0,0 +1,152 @@ +<?php +/** + * PHP variables file format handler. + * + * @file + * @author Niklas Laxström + * @author Siebrand Mazeland + * @copyright Copyright © 2008-2010, Niklas Laxström, Siebrand Mazeland + * @license GPL-2.0-or-later + */ + +/** + * Implements file format support for PHP files which consist of multiple + * variable assignments. + */ +class FlatPhpFFS extends SimpleFFS implements MetaYamlSchemaExtender { + public function getFileExtensions() { + return [ '.php' ]; + } + + /** + * @param string $data + * @return array Parsed data. + */ + public function readFromVariable( $data ) { + # Authors first + $matches = []; + preg_match_all( '/^ \* @author\s+(.+)$/m', $data, $matches ); + $authors = $matches[1]; + + # Then messages + $matches = []; + $regex = '/^\$(.*?)\s*=\s*[\'"](.*?)[\'"];.*?$/mus'; + preg_match_all( $regex, $data, $matches, PREG_SET_ORDER ); + $messages = []; + + foreach ( $matches as $_ ) { + $legal = Title::legalChars(); + $key = preg_replace_callback( "/([^$legal]|\\\\)/u", + function ( $m ) { + return '\x' . dechex( ord( $m[0] ) ); + }, + $_[1] + ); + $value = str_replace( [ "\'", "\\\\" ], [ "'", "\\" ], $_[2] ); + $messages[$key] = $value; + } + + $messages = $this->group->getMangler()->mangle( $messages ); + + return [ + 'AUTHORS' => $authors, + 'MESSAGES' => $messages, + ]; + } + + protected function writeReal( MessageCollection $collection ) { + if ( isset( $this->extra['header'] ) ) { + $output = $this->extra['header']; + } else { + $output = "<?php\n"; + } + + $output .= $this->doHeader( $collection ); + + $mangler = $this->group->getMangler(); + + /** + * @var TMessage $item + */ + foreach ( $collection as $item ) { + $key = $mangler->unmangle( $item->key() ); + $key = stripcslashes( $key ); + + $value = $item->translation(); + if ( $value === null ) { + continue; + } + + $value = str_replace( TRANSLATE_FUZZY, '', $value ); + $value = addcslashes( $value, "'" ); + + $output .= "\$$key = '$value';\n"; + } + + return $output; + } + + protected function doHeader( MessageCollection $collection ) { + global $wgSitename, $wgTranslateDocumentationLanguageCode; + + $code = $collection->code; + $name = TranslateUtils::getLanguageName( $code ); + $native = TranslateUtils::getLanguageName( $code, $code ); + + if ( $wgTranslateDocumentationLanguageCode ) { + $docu = "\n * See the $wgTranslateDocumentationLanguageCode 'language' for " . + 'message documentation incl. usage of parameters'; + } else { + $docu = ''; + } + + $authors = $this->doAuthors( $collection ); + + $output = <<<PHP +/** $name ($native) + * $docu + * To improve a translation please visit http://$wgSitename + * + * @ingroup Language + * @file + * +$authors */ + + +PHP; + + return $output; + } + + protected function doAuthors( MessageCollection $collection ) { + $output = ''; + $authors = $collection->getAuthors(); + $authors = $this->filterAuthors( $authors, $collection->code ); + + foreach ( $authors as $author ) { + $output .= " * @author $author\n"; + } + + return $output; + } + + public static function getExtraSchema() { + $schema = [ + 'root' => [ + '_type' => 'array', + '_children' => [ + 'FILES' => [ + '_type' => 'array', + '_children' => [ + 'header' => [ + '_type' => 'text', + ], + ] + ] + ] + ] + ]; + + return $schema; + } +} diff --git a/www/wiki/extensions/Translate/ffs/GettextFFS.php b/www/wiki/extensions/Translate/ffs/GettextFFS.php new file mode 100644 index 00000000..c404da34 --- /dev/null +++ b/www/wiki/extensions/Translate/ffs/GettextFFS.php @@ -0,0 +1,711 @@ +<?php +/** + * Gettext file format handler for both old and new style message groups. + * + * @author Niklas Laxström + * @author Siebrand Mazeland + * @copyright Copyright © 2008-2010, Niklas Laxström, Siebrand Mazeland + * @license GPL-2.0-or-later + * @file + */ + +/** + * Identifies Gettext plural exceptions. + */ +class GettextPluralException extends MWException { +} + +/** + * New-style FFS class that implements support for gettext file format. + * @ingroup FFS + */ +class GettextFFS extends SimpleFFS implements MetaYamlSchemaExtender { + public function supportsFuzzy() { + return 'yes'; + } + + public function getFileExtensions() { + return [ '.pot', '.po' ]; + } + + protected $offlineMode = false; + + /** + * @param bool $value + */ + public function setOfflineMode( $value ) { + $this->offlineMode = $value; + } + + /** + * @param string $data + * @return array + */ + public function readFromVariable( $data ) { + # Authors first + $matches = []; + preg_match_all( '/^#\s*Author:\s*(.*)$/m', $data, $matches ); + $authors = $matches[1]; + + # Then messages and everything else + $parsedData = $this->parseGettext( $data ); + $parsedData['AUTHORS'] = $authors; + + foreach ( $parsedData['MESSAGES'] as $key => $value ) { + if ( $value === '' ) { + unset( $parsedData['MESSAGES'][$key] ); + } + } + + return $parsedData; + } + + public function parseGettext( $data ) { + $mangler = $this->group->getMangler(); + $useCtxtAsKey = isset( $this->extra['CtxtAsKey'] ) && $this->extra['CtxtAsKey']; + $keyAlgorithm = 'simple'; + if ( isset( $this->extra['keyAlgorithm'] ) ) { + $keyAlgorithm = $this->extra['keyAlgorithm']; + } + + return self::parseGettextData( $data, $useCtxtAsKey, $mangler, $keyAlgorithm ); + } + + /** + * Parses gettext file as string into internal representation. + * @param string $data + * @param bool $useCtxtAsKey Whether to create message keys from the context + * or use msgctxt (non-standard po-files) + * @param StringMangler $mangler + * @param string $keyAlgorithm Key generation algorithm, see generateKeyFromItem + * @throws MWException + * @return array + */ + public static function parseGettextData( $data, $useCtxtAsKey, $mangler, $keyAlgorithm ) { + $potmode = false; + + // Normalise newlines, to make processing easier + $data = str_replace( "\r\n", "\n", $data ); + + /* Delimit the file into sections, which are separated by two newlines. + * We are permissive and accept more than two. This parsing method isn't + * efficient wrt memory, but was easy to implement */ + $sections = preg_split( '/\n{2,}/', $data ); + + /* First one isn't an actual message. We'll handle it specially below */ + $headerSection = array_shift( $sections ); + /* Since this is the header section, we are only interested in the tags + * and msgid is empty. Somewhere we should extract the header comments + * too */ + $match = self::expectKeyword( 'msgstr', $headerSection ); + if ( $match !== null ) { + $headerBlock = self::formatForWiki( $match, 'trim' ); + $headers = self::parseHeaderTags( $headerBlock ); + + // Check for pot-mode by checking if the header is fuzzy + $flags = self::parseFlags( $headerSection ); + if ( in_array( 'fuzzy', $flags, true ) ) { + $potmode = true; + } + } else { + throw new MWException( "Gettext file header was not found:\n\n$data" ); + } + + $template = []; + $messages = []; + + // Extract some metadata from headers for easier use + $metadata = []; + if ( isset( $headers['X-Language-Code'] ) ) { + $metadata['code'] = $headers['X-Language-Code']; + } + + if ( isset( $headers['X-Message-Group'] ) ) { + $metadata['group'] = $headers['X-Message-Group']; + } + + /* At this stage we are only interested how many plurals forms we should + * be expecting when parsing the rest of this file. */ + $pluralCount = false; + if ( isset( $headers['Plural-Forms'] ) && + preg_match( '/nplurals=([0-9]+).*;/', $headers['Plural-Forms'], $matches ) + ) { + $pluralCount = $metadata['plural'] = $matches[1]; + } + + // Then parse the messages + foreach ( $sections as $section ) { + $item = self::parseGettextSection( $section, $pluralCount, $metadata ); + if ( $item === false ) { + continue; + } + + if ( $useCtxtAsKey ) { + if ( !isset( $item['ctxt'] ) ) { + error_log( "ctxt missing for: $section" ); + continue; + } + $key = $item['ctxt']; + } else { + $key = self::generateKeyFromItem( $item, $keyAlgorithm ); + } + + $key = $mangler->mangle( $key ); + $messages[$key] = $potmode ? $item['id'] : $item['str']; + $template[$key] = $item; + } + + return [ + 'MESSAGES' => $messages, + 'TEMPLATE' => $template, + 'METADATA' => $metadata, + 'HEADERS' => $headers + ]; + } + + public static function parseGettextSection( $section, $pluralCount, &$metadata ) { + if ( trim( $section ) === '' ) { + return false; + } + + /* These inactive sections are of no interest to us. Multiline mode + * is needed because there may be flags or other annoying stuff + * before the commented out sections. + */ + if ( preg_match( '/^#~/m', $section ) ) { + return false; + } + + $item = [ + 'ctxt' => false, + 'id' => '', + 'str' => '', + 'flags' => [], + 'comments' => [], + ]; + + $match = self::expectKeyword( 'msgid', $section ); + if ( $match !== null ) { + $item['id'] = self::formatForWiki( $match ); + } else { + throw new MWException( "Unable to parse msgid:\n\n$section" ); + } + + $match = self::expectKeyword( 'msgctxt', $section ); + if ( $match !== null ) { + $item['ctxt'] = self::formatForWiki( $match ); + } + + $pluralMessage = false; + $match = self::expectKeyword( 'msgid_plural', $section ); + if ( $match !== null ) { + $pluralMessage = true; + $plural = self::formatForWiki( $match ); + $item['id'] = "{{PLURAL:GETTEXT|{$item['id']}|$plural}}"; + } + + if ( $pluralMessage ) { + $pluralMessageText = self::processGettextPluralMessage( $pluralCount, $section ); + + // Keep the translation empty if no form has translation + if ( $pluralMessageText !== '' ) { + $item['str'] = $pluralMessageText; + } + } else { + $match = self::expectKeyword( 'msgstr', $section ); + if ( $match !== null ) { + $item['str'] = self::formatForWiki( $match ); + } else { + throw new MWException( "Unable to parse msgstr:\n\n$section" ); + } + } + + // Parse flags + $flags = self::parseFlags( $section ); + foreach ( $flags as $key => $flag ) { + if ( $flag === 'fuzzy' ) { + $item['str'] = TRANSLATE_FUZZY . $item['str']; + unset( $flags[$key] ); + } + } + $item['flags'] = $flags; + + // Rest of the comments + $matches = []; + if ( preg_match_all( '/^#(.?) (.*)$/m', $section, $matches, PREG_SET_ORDER ) ) { + foreach ( $matches as $match ) { + if ( $match[1] !== ',' && strpos( $match[1], '[Wiki]' ) !== 0 ) { + $item['comments'][$match[1]][] = $match[2]; + } + } + } + + return $item; + } + + public static function processGettextPluralMessage( $pluralCount, $section ) { + $actualForms = []; + + for ( $i = 0; $i < $pluralCount; $i++ ) { + $match = self::expectKeyword( "msgstr\\[$i\\]", $section ); + + if ( $match !== null ) { + $actualForms[] = self::formatForWiki( $match ); + } else { + $actualForms[] = ''; + error_log( "Plural $i not found, expecting total of $pluralCount for $section" ); + } + } + + if ( array_sum( array_map( 'strlen', $actualForms ) ) > 0 ) { + return '{{PLURAL:GETTEXT|' . implode( '|', $actualForms ) . '}}'; + } else { + return ''; + } + } + + public static function parseFlags( $section ) { + $matches = []; + if ( preg_match( '/^#,(.*)$/mu', $section, $matches ) ) { + return array_map( 'trim', explode( ',', $matches[1] ) ); + } else { + return []; + } + } + + public static function expectKeyword( $name, $section ) { + /* Catches the multiline textblock that comes after keywords msgid, + * msgstr, msgid_plural, msgctxt. + */ + $poformat = '".*"\n?(^".*"$\n?)*'; + + $matches = []; + if ( preg_match( "/^$name\s($poformat)/mx", $section, $matches ) ) { + return $matches[1]; + } else { + return null; + } + } + + /** + * Generates unique key for each message. Changing this WILL BREAK ALL + * existing pages! + * @param array $item As returned by parseGettextSection + * @param string $algorithm Algorithm used to generate message keys: simple or legacy + * @return string + */ + public static function generateKeyFromItem( array $item, $algorithm = 'simple' ) { + $lang = Language::factory( 'en' ); + + if ( $item['ctxt'] === '' ) { + /* Messages with msgctxt as empty string should be different + * from messages without any msgctxt. To avoid BC break make + * the empty ctxt a special case */ + $hash = sha1( $item['id'] . 'MSGEMPTYCTXT' ); + } else { + $hash = sha1( $item['ctxt'] . $item['id'] ); + } + + if ( $algorithm === 'simple' ) { + $hash = substr( $hash, 0, 6 ); + if ( !is_callable( [ $lang, 'truncateForDatabase' ] ) ) { + // Backwards compatibility code; remove once MW 1.30 is + // no longer supported (aka once MW 1.33 is released) + $snippet = $lang->truncate( $item['id'], 30, '' ); + } else { + $snippet = $lang->truncateForDatabase( $item['id'], 30, '' ); + } + $snippet = str_replace( ' ', '_', trim( $snippet ) ); + } else { // legacy + global $wgLegalTitleChars; + $snippet = $item['id']; + $snippet = preg_replace( "/[^$wgLegalTitleChars]/", ' ', $snippet ); + $snippet = preg_replace( "/[:&%\/_]/", ' ', $snippet ); + $snippet = preg_replace( '/ {2,}/', ' ', $snippet ); + if ( !is_callable( [ $lang, 'truncateForDatabase' ] ) ) { + // Backwards compatibility code; remove once MW 1.30 is + // no longer supported (aka once MW 1.33 is released) + $snippet = $lang->truncate( $snippet, 30, '' ); + } else { + $snippet = $lang->truncateForDatabase( $snippet, 30, '' ); + } + $snippet = str_replace( ' ', '_', trim( $snippet ) ); + } + + return "$hash-$snippet"; + } + + /** + * This parses the Gettext text block format. Since trailing whitespace is + * not allowed in MediaWiki pages, the default action is to append + * \-character at the end of the message. You can also choose to ignore it + * and use the trim action instead. + * @param string $data + * @param string $whitespace + * @throws MWException + * @return string + */ + public static function formatForWiki( $data, $whitespace = 'mark' ) { + $quotePattern = '/(^"|"$\n?)/m'; + $data = preg_replace( $quotePattern, '', $data ); + $data = stripcslashes( $data ); + + if ( preg_match( '/\s$/', $data ) ) { + if ( $whitespace === 'mark' ) { + $data .= '\\'; + } elseif ( $whitespace === 'trim' ) { + $data = rtrim( $data ); + } else { + // @todo Only triggered if there is trailing whitespace + throw new MWException( 'Unknown action for whitespace' ); + } + } + + return $data; + } + + public static function parseHeaderTags( $headers ) { + $tags = []; + foreach ( explode( "\n", $headers ) as $line ) { + if ( strpos( $line, ':' ) === false ) { + error_log( __METHOD__ . ": $line" ); + } + list( $key, $value ) = explode( ':', $line, 2 ); + $tags[trim( $key )] = trim( $value ); + } + + return $tags; + } + + protected function writeReal( MessageCollection $collection ) { + $pot = $this->read( 'en' ); + $template = $this->read( $collection->code ); + $pluralCount = false; + $output = $this->doGettextHeader( $collection, $template, $pluralCount ); + + /** @var TMessage $m */ + foreach ( $collection as $key => $m ) { + $transTemplate = $template['TEMPLATE'][$key] ?? []; + $potTemplate = $pot['TEMPLATE'][$key] ?? []; + + $output .= $this->formatMessageBlock( $key, $m, $transTemplate, $potTemplate, $pluralCount ); + } + + return $output; + } + + protected function doGettextHeader( MessageCollection $collection, $template, &$pluralCount ) { + global $wgSitename; + + $code = $collection->code; + $name = TranslateUtils::getLanguageName( $code ); + $native = TranslateUtils::getLanguageName( $code, $code ); + $authors = $this->doAuthors( $collection ); + if ( isset( $this->extra['header'] ) ) { + $extra = "# --\n" . $this->extra['header']; + } else { + $extra = ''; + } + + $output = <<<PHP +# Translation of {$this->group->getLabel()} to $name ($native) +# Exported from $wgSitename +# +$authors$extra +PHP; + + // Make sure there is no empty line before msgid + $output = trim( $output ) . "\n"; + + $specs = $template['HEADERS'] ?? []; + + $timestamp = wfTimestampNow(); + $specs['PO-Revision-Date'] = self::formatTime( $timestamp ); + if ( $this->offlineMode ) { + $specs['POT-Creation-Date'] = self::formatTime( $timestamp ); + } elseif ( $this->group instanceof MessageGroupBase ) { + $specs['X-POT-Import-Date'] = self::formatTime( wfTimestamp( TS_MW, $this->getPotTime() ) ); + } + $specs['Content-Type'] = 'text/plain; charset=UTF-8'; + $specs['Content-Transfer-Encoding'] = '8bit'; + $specs['Language'] = LanguageCode::bcp47( $this->group->mapCode( $code ) ); + Hooks::run( 'Translate:GettextFFS:headerFields', [ &$specs, $this->group, $code ] ); + $specs['X-Generator'] = $this->getGenerator(); + + if ( $this->offlineMode ) { + $specs['X-Language-Code'] = $code; + $specs['X-Message-Group'] = $this->group->getId(); + } + + $plural = self::getPluralRule( $code ); + if ( $plural ) { + $specs['Plural-Forms'] = $plural; + } elseif ( !isset( $specs['Plural-Forms'] ) ) { + $specs['Plural-Forms'] = 'nplurals=2; plural=(n != 1);'; + } + + $match = []; + preg_match( '/nplurals=(\d+);/', $specs['Plural-Forms'], $match ); + $pluralCount = $match[1]; + + $output .= 'msgid ""' . "\n"; + $output .= 'msgstr ""' . "\n"; + $output .= '""' . "\n"; + + foreach ( $specs as $k => $v ) { + $output .= self::escape( "$k: $v\n" ) . "\n"; + } + + $output .= "\n"; + + return $output; + } + + protected function doAuthors( MessageCollection $collection ) { + $output = ''; + $authors = $collection->getAuthors(); + $authors = $this->filterAuthors( $authors, $collection->code ); + + foreach ( $authors as $author ) { + $output .= "# Author: $author\n"; + } + + return $output; + } + + /** + * @param string $key + * @param TMessage $m + * @param array $trans + * @param array $pot + * @param int $pluralCount + * @return string + */ + protected function formatMessageBlock( $key, $m, $trans, $pot, $pluralCount ) { + $header = $this->formatDocumentation( $key ); + $content = ''; + + $comments = self::chainGetter( 'comments', $pot, $trans, [] ); + foreach ( $comments as $type => $typecomments ) { + foreach ( $typecomments as $comment ) { + $header .= "#$type $comment\n"; + } + } + + $flags = self::chainGetter( 'flags', $pot, $trans, [] ); + $flags = array_merge( $m->getTags(), $flags ); + + if ( $this->offlineMode ) { + $content .= 'msgctxt ' . self::escape( $key ) . "\n"; + } else { + $ctxt = self::chainGetter( 'ctxt', $pot, $trans, false ); + if ( $ctxt !== false ) { + $content .= 'msgctxt ' . self::escape( $ctxt ) . "\n"; + } + } + + $msgid = $m->definition(); + $msgstr = $m->translation(); + if ( strpos( $msgstr, TRANSLATE_FUZZY ) !== false ) { + $msgstr = str_replace( TRANSLATE_FUZZY, '', $msgstr ); + // Might by fuzzy infile + $flags[] = 'fuzzy'; + } + + if ( preg_match( '/{{PLURAL:GETTEXT/i', $msgid ) ) { + $forms = $this->splitPlural( $msgid, 2 ); + $content .= 'msgid ' . self::escape( $forms[0] ) . "\n"; + $content .= 'msgid_plural ' . self::escape( $forms[1] ) . "\n"; + + try { + $forms = $this->splitPlural( $msgstr, $pluralCount ); + foreach ( $forms as $index => $form ) { + $content .= "msgstr[$index] " . self::escape( $form ) . "\n"; + } + } catch ( GettextPluralException $e ) { + $flags[] = 'invalid-plural'; + for ( $i = 0; $i < $pluralCount; $i++ ) { + $content .= "msgstr[$i] \"\"\n"; + } + } + } else { + $content .= 'msgid ' . self::escape( $msgid ) . "\n"; + $content .= 'msgstr ' . self::escape( $msgstr ) . "\n"; + } + + if ( $flags ) { + sort( $flags ); + $header .= '#, ' . implode( ', ', array_unique( $flags ) ) . "\n"; + } + + $output = $header ?: "#\n"; + $output .= $content . "\n"; + + return $output; + } + + /** + * @param string $key + * @param array $a + * @param array $b + * @param mixed $default + * @return mixed + */ + protected static function chainGetter( $key, $a, $b, $default ) { + if ( isset( $a[$key] ) ) { + return $a[$key]; + } elseif ( isset( $b[$key] ) ) { + return $b[$key]; + } else { + return $default; + } + } + + protected static function formatTime( $time ) { + $lang = Language::factory( 'en' ); + + return $lang->sprintfDate( 'xnY-xnm-xnd xnH:xni:xns+0000', $time ); + } + + protected function getPotTime() { + $defs = new MessageGroupCache( $this->group ); + + return $defs->exists() ? $defs->getTimestamp() : wfTimestampNow(); + } + + protected function getGenerator() { + return 'MediaWiki ' . SpecialVersion::getVersion() . + '; Translate ' . TRANSLATE_VERSION; + } + + protected function formatDocumentation( $key ) { + global $wgTranslateDocumentationLanguageCode; + + if ( !$this->offlineMode ) { + return ''; + } + + $code = $wgTranslateDocumentationLanguageCode; + if ( !$code ) { + return ''; + } + + $documentation = TranslateUtils::getMessageContent( $key, $code, $this->group->getNamespace() ); + if ( !is_string( $documentation ) ) { + return ''; + } + + $lines = explode( "\n", $documentation ); + $out = ''; + foreach ( $lines as $line ) { + $out .= "#. [Wiki] $line\n"; + } + + return $out; + } + + protected static function escape( $line ) { + // There may be \ as a last character, for keeping trailing whitespace + $line = preg_replace( '/(\s)\\\\$/', '\1', $line ); + $line = addcslashes( $line, '\\"' ); + $line = str_replace( "\n", '\n', $line ); + $line = '"' . $line . '"'; + + return $line; + } + + /** + * Returns plural rule for Gettext. + * @param string $code Language code. + * @return string + */ + public static function getPluralRule( $code ) { + $rulefile = __DIR__ . '/../data/plural-gettext.txt'; + $rules = file_get_contents( $rulefile ); + foreach ( explode( "\n", $rules ) as $line ) { + if ( trim( $line ) === '' ) { + continue; + } + list( $rulecode, $rule ) = explode( "\t", $line ); + if ( $rulecode === $code ) { + return $rule; + } + } + + return ''; + } + + protected function splitPlural( $text, $forms ) { + if ( $forms === 1 ) { + return $text; + } + + $placeholder = TranslateUtils::getPlaceholder(); + # |/| is commonly used in KDE to support inflections + $text = str_replace( '|/|', $placeholder, $text ); + + $plurals = []; + $match = preg_match_all( '/{{PLURAL:GETTEXT\|(.*)}}/iUs', $text, $plurals ); + if ( !$match ) { + throw new GettextPluralException( "Failed to find plural in: $text" ); + } + + $splitPlurals = []; + for ( $i = 0; $i < $forms; $i++ ) { + # Start with the hole string + $pluralForm = $text; + # Loop over *each* {{PLURAL}} instance and replace + # it with the plural form belonging to this index + foreach ( $plurals[0] as $index => $definition ) { + $parsedFormsArray = explode( '|', $plurals[1][$index] ); + if ( !isset( $parsedFormsArray[$i] ) ) { + error_log( "Too few plural forms in: $text" ); + $pluralForm = ''; + } else { + $pluralForm = str_replace( $pluralForm, $definition, $parsedFormsArray[$i] ); + } + } + + $pluralForm = str_replace( $placeholder, '|/|', $pluralForm ); + $splitPlurals[$i] = $pluralForm; + } + + return $splitPlurals; + } + + public function shouldOverwrite( $a, $b ) { + $regex = '/^"(.+)-Date: \d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d\+\d\d\d\d\\\\n"$/m'; + + $a = preg_replace( $regex, '', $a ); + $b = preg_replace( $regex, '', $b ); + + return $a !== $b; + } + + public static function getExtraSchema() { + $schema = [ + 'root' => [ + '_type' => 'array', + '_children' => [ + 'FILES' => [ + '_type' => 'array', + '_children' => [ + 'header' => [ + '_type' => 'text', + ], + 'keyAlgorithm' => [ + '_type' => 'enum', + '_values' => [ 'simple', 'legacy' ], + ], + 'CtxtAsKey' => [ + '_type' => 'boolean', + ], + ] + ] + ] + ] + ]; + + return $schema; + } +} diff --git a/www/wiki/extensions/Translate/ffs/IniFFS.php b/www/wiki/extensions/Translate/ffs/IniFFS.php new file mode 100644 index 00000000..95ab8bcb --- /dev/null +++ b/www/wiki/extensions/Translate/ffs/IniFFS.php @@ -0,0 +1,114 @@ +<?php +/** + * Support for ini message file format. + * + * @file + * @author Niklas Laxström + * @copyright Copyright © 2012-2013, Niklas Laxström + * @license GPL-2.0-or-later + */ + +/** + * IniFFS currently parses and generates flat ini files with language + * code as header key. + * + * @ingroup FFS + * @since 2012-11-19 + */ +class IniFFS extends SimpleFFS { + public static function isValid( $data ) { + $conf = [ 'BASIC' => [ 'class' => 'FileBasedMessageGroup', 'namespace' => 8 ] ]; + /** + * @var FileBasedMessageGroup $group + */ + $group = MessageGroupBase::factory( $conf ); + + Wikimedia\suppressWarnings(); + $ffs = new self( $group ); + $parsed = $ffs->readFromVariable( $data ); + Wikimedia\restoreWarnings(); + + return (bool)count( $parsed['MESSAGES'] ); + } + + public function supportsFuzzy() { + return 'write'; + } + + public function getFileExtensions() { + return [ '.ini' ]; + } + + /** + * @param string $data + * @return array Parsed data. + */ + public function readFromVariable( $data ) { + $authors = []; + preg_match_all( '/^; Author: (.*)$/m', $data, $matches, PREG_SET_ORDER ); + foreach ( $matches as $match ) { + $authors[] = $match[1]; + } + + // Remove comments + $data = preg_replace( '/^\s*;.*$/m', '', $data ); + // Make sure values are quoted, PHP barks on stuff like ?{}|&~![()^ + $data = preg_replace( '/(^.+?=\s*)([^\'"].+)$/m', '\1"\2"', $data ); + + $messages = parse_ini_string( $data ); + if ( is_array( $messages ) ) { + $messages = $this->group->getMangler()->mangle( $messages ); + } else { + $messages = null; + } + + return [ + 'MESSAGES' => $messages, + 'AUTHORS' => $authors, + ]; + } + + protected function writeReal( MessageCollection $collection ) { + $output = ''; + $mangler = $this->group->getMangler(); + + /** + * @var $m ThinMessage + */ + foreach ( $collection as $key => $m ) { + $value = $m->translation(); + if ( $value === null ) { + continue; + } + + $comment = ''; + + if ( $m->hasTag( 'fuzzy' ) ) { + $value = str_replace( TRANSLATE_FUZZY, '', $value ); + $comment = "; Fuzzy\n"; + } + + $key = $mangler->unmangle( $key ); + $output .= "$comment$key = $value\n"; + } + + // Do not create empty files + if ( $output === '' ) { + return ''; + } + + global $wgSitename; + // Accumulator + $header = "; Exported from $wgSitename\n"; + + $authors = $collection->getAuthors(); + $authors = $this->filterAuthors( $authors, $collection->getLanguage() ); + foreach ( $authors as $author ) { + $header .= "; Author: $author\n"; + } + + $header .= '[' . $collection->getLanguage() . "]\n"; + + return $header . $output; + } +} diff --git a/www/wiki/extensions/Translate/ffs/IntuitionTextdomains.php b/www/wiki/extensions/Translate/ffs/IntuitionTextdomains.php new file mode 100644 index 00000000..c3f637d2 --- /dev/null +++ b/www/wiki/extensions/Translate/ffs/IntuitionTextdomains.php @@ -0,0 +1,108 @@ +<?php +/** + * Class for Intuition for Translatewiki.net + * + * @file + * @author Niklas Laxström + * @author Krinkle + * @copyright Copyright © 2008-2013, Niklas Laxström + * @copyright Copyright © 2011, Krinkle + * @license GPL-2.0-or-later + */ + +/** + * Support for tools using Intuition at the Toolserver and Wikimedia Labs. + */ +class PremadeIntuitionTextdomains extends PremadeMediawikiExtensionGroups { + protected $useConfigure = false; + protected $groups; + protected $idPrefix = 'tsint-'; + protected $namespace = NS_INTUITION; + + protected function processGroups( $groups ) { + $fixedGroups = []; + foreach ( $groups as $g ) { + if ( !is_array( $g ) ) { + $g = [ $g ]; + } + + $name = $g['name']; + $sanitizedName = preg_replace( '/\s+/', '', strtolower( $name ) ); + + if ( isset( $g['id'] ) ) { + $id = $g['id']; + } else { + $id = $this->idPrefix . $sanitizedName; + } + + if ( isset( $g['file'] ) ) { + $file = $g['file']; + } else { + // Canonical names for Intuition text-domains are lowercase + // eg. "MyTool" -> "mytool/en.json" + $file = "$sanitizedName/%CODE%.json"; + } + + if ( isset( $g['descmsg'] ) ) { + $descmsg = $g['descmsg']; + } else { + $descmsg = "$id-desc"; + } + + if ( isset( $g['url'] ) ) { + $url = $g['url']; + } else { + $url = false; + } + + $newgroup = [ + 'name' => 'Intuition - ' . $name, + 'file' => $file, + 'descmsg' => $descmsg, + 'url' => $url, + ]; + + // Prefix is required, if not customized use the sanitized name + if ( !isset( $g['prefix'] ) ) { + $g['prefix'] = "$sanitizedName-"; + } + + // All messages are prefixed with their groupname + $g['mangle'] = [ '*' ]; + + // Prevent E_NOTICE undefined index. + // PremadeMediawikiExtensionGroups::factory should probably check this better instead + if ( !isset( $g['ignored'] ) ) { + $g['ignored'] = []; + } + + if ( !isset( $g['optional'] ) ) { + $g['optional'] = []; + } + + $g['format'] = 'json'; + + $copyvars = [ + 'aliasfile', + 'desc', + 'format', + 'ignored', + 'magicfile', + 'mangle', + 'optional', + 'prefix', + 'var', + ]; + + foreach ( $copyvars as $var ) { + if ( isset( $g[$var] ) ) { + $newgroup[$var] = $g[$var]; + } + } + + $fixedGroups[$id] = $newgroup; + } + + return $fixedGroups; + } +} diff --git a/www/wiki/extensions/Translate/ffs/JavaFFS.php b/www/wiki/extensions/Translate/ffs/JavaFFS.php new file mode 100644 index 00000000..4485b8bf --- /dev/null +++ b/www/wiki/extensions/Translate/ffs/JavaFFS.php @@ -0,0 +1,272 @@ +<?php + +/** + * JavaFFS class implements support for Java properties files. + * This class reads and writes only utf-8 files. Java projects + * need to run native2ascii on them before using them. + * + * This class adds a new item into FILES section of group configuration: + * \c keySeparator which defaults to '='. + * @ingroup FFS + */ +class JavaFFS extends SimpleFFS implements MetaYamlSchemaExtender { + public function supportsFuzzy() { + return 'write'; + } + + public function getFileExtensions() { + return [ '.properties' ]; + } + + protected $keySeparator = '='; + + /** + * @param FileBasedMessageGroup $group + */ + public function __construct( FileBasedMessageGroup $group ) { + parent::__construct( $group ); + + if ( isset( $this->extra['keySeparator'] ) ) { + $this->keySeparator = $this->extra['keySeparator']; + } + } + + /** + * @param string $data + * @return array Parsed data. + * @throws MWException + */ + public function readFromVariable( $data ) { + $data = self::fixNewLines( $data ); + $lines = array_map( 'ltrim', explode( "\n", $data ) ); + $authors = $messages = []; + $linecontinuation = false; + + $key = ''; + $value = ''; + foreach ( $lines as $line ) { + if ( $linecontinuation ) { + $linecontinuation = false; + $valuecont = $line; + $valuecont = str_replace( '\n', "\n", $valuecont ); + $value .= $valuecont; + } else { + if ( $line === '' ) { + continue; + } + + if ( $line[0] === '#' || $line[0] === '!' ) { + $match = []; + $ok = preg_match( '/#\s*Author:\s*(.*)/', $line, $match ); + + if ( $ok ) { + $authors[] = $match[1]; + } + + continue; + } + + if ( strpos( $line, $this->keySeparator ) === false ) { + throw new MWException( "Line without separator '{$this->keySeparator}': $line." ); + } + + list( $key, $value ) = self::readRow( $line, $this->keySeparator ); + if ( $key === '' ) { + throw new MWException( "Empty key in line $line." ); + } + } + + // @todo This doesn't handle the pathological case of even number of trailing \ + if ( strlen( $value ) && $value[strlen( $value ) - 1] === "\\" ) { + $value = substr( $value, 0, strlen( $value ) - 1 ); + $linecontinuation = true; + } else { + $messages[$key] = ltrim( $value ); + } + } + + $messages = $this->group->getMangler()->mangle( $messages ); + + return [ + 'AUTHORS' => $authors, + 'MESSAGES' => $messages, + ]; + } + + /** + * @param MessageCollection $collection + * @return string + */ + protected function writeReal( MessageCollection $collection ) { + $header = $this->doHeader( $collection ); + $header .= $this->doAuthors( $collection ); + $header .= "\n"; + + $output = ''; + $mangler = $this->group->getMangler(); + + /** + * @var TMessage $m + */ + foreach ( $collection as $key => $m ) { + $value = $m->translation(); + $value = str_replace( TRANSLATE_FUZZY, '', $value ); + + if ( $value === '' ) { + continue; + } + + // Just to give an overview of translation quality. + if ( $m->hasTag( 'fuzzy' ) ) { + $output .= "# Fuzzy\n"; + } + + $key = $mangler->unmangle( $key ); + $output .= self::writeRow( $key, $this->keySeparator, $value ); + } + + if ( $output ) { + return $header . $output; + } + + return ''; + } + + /** + * Writes well-formed properties file row with key and value. + * @param string $key + * @param string $sep + * @param string $value + * @return string + * @since 2012-03-28 + */ + public static function writeRow( $key, $sep, $value ) { + /* Keys containing the separator need escaping. Also escape comment + * characters, though strictly they would only need escaping when + * they are the first character. Plus the escape character itself. */ + $key = addcslashes( $key, "#!$sep\\" ); + // Make sure we do not slip newlines trough... it would be fatal. + $value = str_replace( "\n", '\\n', $value ); + + return "$key$sep$value\n"; + } + + /** + * Parses non-empty properties file row to key and value. + * @param string $line + * @param string $sep + * @return string[] + * @since 2012-03-28 + */ + public static function readRow( $line, $sep ) { + if ( strpos( $line, '\\' ) === false ) { + /* Nothing appears to be escaped in this line. + * Just read the key and the value. */ + list( $key, $value ) = explode( $sep, $line, 2 ); + } else { + /* There might be escaped separators in the key. + * Using slower method to find the separator. */ + + /* Make the key default to empty instead of value, because + * empty key causes error on callers, while empty value + * wouldn't. */ + $key = ''; + $value = $line; + + /* Find the first unescaped separator. Example: + * First line is the string being read, second line is the + * value of $escaped after having read the above character. + * + * ki\ts\\s\=a = koira + * 0010010010000 + * ^ Not separator because $escaped was true + * ^ Split the string into key and value here + */ + + $len = strlen( $line ); + $escaped = false; + for ( $i = 0; $i < $len; $i++ ) { + $char = $line[$i]; + if ( $char === '\\' ) { + $escaped = !$escaped; + } elseif ( $escaped ) { + $escaped = false; + } elseif ( $char === $sep ) { + $key = substr( $line, 0, $i ); + // Excluding the separator character from the value + $value = substr( $line, $i + 1 ); + break; + } + } + } + + /* We usually don't want to expand things like \t in values since + * translators cannot easily input those. But in keys we do. + * \n is exception we do handle in values. */ + $key = trim( $key ); + $key = stripcslashes( $key ); + $value = ltrim( $value ); + $value = str_replace( '\n', "\n", $value ); + + return [ $key, $value ]; + } + + /** + * @param MessageCollection $collection + * @return string + */ + protected function doHeader( MessageCollection $collection ) { + if ( isset( $this->extra['header'] ) ) { + $output = $this->extra['header']; + } else { + global $wgSitename; + + $code = $collection->code; + $name = TranslateUtils::getLanguageName( $code ); + $native = TranslateUtils::getLanguageName( $code, $code ); + $output = "# Messages for $name ($native)\n"; + $output .= "# Exported from $wgSitename\n"; + } + + return $output; + } + + /** + * @param MessageCollection $collection + * @return string + */ + protected function doAuthors( MessageCollection $collection ) { + $output = ''; + $authors = $collection->getAuthors(); + $authors = $this->filterAuthors( $authors, $collection->code ); + + foreach ( $authors as $author ) { + $output .= "# Author: $author\n"; + } + + return $output; + } + + public static function getExtraSchema() { + $schema = [ + 'root' => [ + '_type' => 'array', + '_children' => [ + 'FILES' => [ + '_type' => 'array', + '_children' => [ + 'header' => [ + '_type' => 'text', + ], + 'keySeparator' => [ + '_type' => 'text', + ], + ] + ] + ] + ] + ]; + + return $schema; + } +} diff --git a/www/wiki/extensions/Translate/ffs/JavaScriptFFS.php b/www/wiki/extensions/Translate/ffs/JavaScriptFFS.php new file mode 100644 index 00000000..e77724f2 --- /dev/null +++ b/www/wiki/extensions/Translate/ffs/JavaScriptFFS.php @@ -0,0 +1,286 @@ +<?php + +/** + * Generic file format support for JavaScript formatted files. + * @ingroup FFS + */ +abstract class JavaScriptFFS extends SimpleFFS { + public function getFileExtensions() { + return [ '.js' ]; + } + + /** + * Message keys format. + * + * @param string $key + * + * @return string + */ + abstract protected function transformKey( $key ); + + /** + * Header of message file. + * + * @param string $code + * @param string[] $authors + */ + abstract protected function header( $code, array $authors ); + + /** + * Footer of message file. + */ + abstract protected function footer(); + + /** + * @param string $data + * @return array Parsed data. + */ + public function readFromVariable( $data ) { + /* Parse authors list */ + $authors = preg_replace( "#/\* Translators\:\n(.*?)\n \*/(.*)#s", '$1', $data ); + if ( $authors === $data ) { + $authors = []; + } else { + $authors = explode( "\n", $authors ); + $count = count( $authors ); + for ( $i = 0; $i < $count; $i++ ) { + // Each line should look like " * - Translatorname" + $authors[$i] = substr( $authors[$i], 6 ); + } + } + + /* Pre-processing of messages */ + + /** + * Find the start and end of the data section (enclosed in curly braces). + */ + $dataStart = strpos( $data, '{' ); + $dataEnd = strrpos( $data, '}' ); + + /** + * Strip everything outside of the data section. + */ + $data = substr( $data, $dataStart + 1, $dataEnd - $dataStart - 1 ); + + /** + * Strip comments. + */ + $data = preg_replace( '#^(\s*?)//(.*?)$#m', '', $data ); + + /** + * Replace message endings with double quotes. + */ + $data = preg_replace( "#\'\,\n#", "\",\n", $data ); + + /** + * Strip excess whitespace. + */ + $data = trim( $data ); + + /** + * Per-key message processing. + */ + + /** + * Break in to segments. + */ + $data = explode( "\",\n", $data ); + + $messages = []; + foreach ( $data as $segment ) { + /** + * Add back trailing quote, removed by explosion. + */ + $segment .= '"'; + + /** + * Concatenate separated strings. + */ + $segment = str_replace( '"+', '" +', $segment ); + $segment = explode( '" +', $segment ); + $count = count( $segment ); + for ( $i = 0; $i < $count; $i++ ) { + $segment[$i] = ltrim( ltrim( $segment[$i] ), '"' ); + } + $segment = implode( $segment ); + + /** + * Remove line breaks between message keys and messages. + */ + $segment = preg_replace( "#\:(\s+)[\\\"\']#", ': "', $segment ); + + /** + * Break in to key and message. + */ + $segments = explode( ': "', $segment ); + + /** + * Strip excess whitespace from key and value, then quotation marks. + */ + $key = trim( trim( $segments[0] ), "'\"" ); + $value = trim( trim( $segments[1] ), "'\"" ); + + /** + * Unescape any JavaScript string syntax and append to message array. + */ + $messages[$key] = self::unescapeJsString( $value ); + } + + $messages = $this->group->getMangler()->mangle( $messages ); + + return [ + 'AUTHORS' => $authors, + 'MESSAGES' => $messages + ]; + } + + /** + * @param MessageCollection $collection + * @return string + */ + public function writeReal( MessageCollection $collection ) { + $header = $this->header( $collection->code, $collection->getAuthors() ); + + $mangler = $this->group->getMangler(); + + /** + * Get and write messages. + */ + $body = ''; + /** + * @var TMessage $message + */ + foreach ( $collection as $message ) { + if ( strlen( $message->translation() ) === 0 ) { + continue; + } + + $key = $mangler->unmangle( $message->key() ); + $key = $this->transformKey( self::escapeJsString( $key ) ); + + $translation = self::escapeJsString( $message->translation() ); + + $body .= "\t{$key}: \"{$translation}\",\n"; + } + + if ( strlen( $body ) === 0 ) { + return false; + } + + /** + * Strip last comma, re-add trailing newlines. + */ + $body = substr( $body, 0, -2 ); + $body .= "\n"; + + return $header . $body . $this->footer(); + } + + /** + * @param string[] $authors + * @return string + */ + protected function authorsList( array $authors ) { + if ( $authors === [] ) { + return ''; + } + + $authorsList = ''; + foreach ( $authors as $author ) { + $authorsList .= " * - $author\n"; + } + + // Remove trailing newline, and return. + return substr( " * Translators:\n$authorsList", 0, -1 ); + } + + // See ECMA 262 section 7.8.4 for string literal format + private static $pairs = [ + "\\" => "\\\\", + "\"" => "\\\"", + "'" => "\\'", + "\n" => "\\n", + "\r" => "\\r", + + // To avoid closing the element or CDATA section. + '<' => "\\x3c", + '>' => "\\x3e", + + // To avoid any complaints about bad entity refs. + '&' => "\\x26", + + /* + * Work around https://bugzilla.mozilla.org/show_bug.cgi?id=274152 + * Encode certain Unicode formatting chars so affected + * versions of Gecko do not misinterpret our strings; + * this is a common problem with Farsi text. + */ + "\xe2\x80\x8c" => "\\u200c", // ZERO WIDTH NON-JOINER + "\xe2\x80\x8d" => "\\u200d", // ZERO WIDTH JOINER + ]; + + /** + * @param string $string + * @return string + */ + protected static function escapeJsString( $string ) { + return strtr( $string, self::$pairs ); + } + + /** + * @param string $string + * @return string + */ + protected static function unescapeJsString( $string ) { + return strtr( $string, array_flip( self::$pairs ) ); + } +} + +/** + * File format support for Shapado, which uses JavaScript based format. + * @ingroup FFS + */ +class ShapadoJsFFS extends JavaScriptFFS { + + /** + * @param string $key + * + * @return string + */ + protected function transformKey( $key ) { + return $key; + } + + /** + * @param string $code + * @param string[] $authors + * @return string + */ + protected function header( $code, array $authors ) { + global $wgSitename; + + $name = TranslateUtils::getLanguageName( $code ); + $native = TranslateUtils::getLanguageName( $code, $code ); + $authorsList = $this->authorsList( $authors ); + + /** @cond doxygen_bug */ + return <<<EOT +/** Messages for $name ($native) + * Exported from $wgSitename + * +{$authorsList} + */ + +var I18n = { + +EOT; + /** @endcond */ + } + + /** + * @return string + */ + protected function footer() { + return "};\n\n"; + } +} diff --git a/www/wiki/extensions/Translate/ffs/JsonFFS.php b/www/wiki/extensions/Translate/ffs/JsonFFS.php new file mode 100644 index 00000000..b12ac1c7 --- /dev/null +++ b/www/wiki/extensions/Translate/ffs/JsonFFS.php @@ -0,0 +1,175 @@ +<?php +/** + * Support for JSON message file format. + * + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +/** + * JsonFFS implements a message format where messages are encoded + * as key-value pairs in JSON objects. The format is extended to + * support author information under the special @metadata key. + * + * @ingroup FFS + * @since 2012-09-21 + */ +class JsonFFS extends SimpleFFS { + /** + * @param string $data + * @return bool + */ + public static function isValid( $data ) { + return is_array( FormatJson::decode( $data, /*as array*/true ) ); + } + + /** + * @param FileBasedMessageGroup $group + */ + public function __construct( FileBasedMessageGroup $group ) { + parent::__construct( $group ); + $this->flattener = $this->getFlattener(); + } + + public function getFileExtensions() { + return [ '.json' ]; + } + + /** + * @param string $data + * @return array Parsed data. + */ + public function readFromVariable( $data ) { + $messages = (array)FormatJson::decode( $data, /*as array*/true ); + $authors = []; + $metadata = []; + + if ( isset( $messages['@metadata']['authors'] ) ) { + $authors = (array)$messages['@metadata']['authors']; + unset( $messages['@metadata']['authors'] ); + } + + if ( isset( $messages['@metadata'] ) ) { + $metadata = $messages['@metadata']; + } + + unset( $messages['@metadata'] ); + + if ( $this->flattener ) { + $messages = $this->flattener->flatten( $messages ); + } + + $messages = $this->group->getMangler()->mangle( $messages ); + + return [ + 'MESSAGES' => $messages, + 'AUTHORS' => $authors, + 'METADATA' => $metadata, + ]; + } + + /** + * @param MessageCollection $collection + * @return string + */ + protected function writeReal( MessageCollection $collection ) { + $messages = []; + $template = $this->read( $collection->getLanguage() ); + + $messages['@metadata'] = []; + if ( isset( $template['METADATA'] ) ) { + $messages['@metadata'] = $template['METADATA']; + } + + $authors = $collection->getAuthors(); + $authors = $this->filterAuthors( $authors, $collection->code ); + + if ( isset( $template['AUTHORS'] ) ) { + $authors = array_unique( array_merge( $template['AUTHORS'], $authors ) ); + } + + if ( $authors !== [] ) { + $messages['@metadata']['authors'] = array_values( $authors ); + } + + $mangler = $this->group->getMangler(); + + /** + * @var $m ThinMessage + */ + foreach ( $collection as $key => $m ) { + $value = $m->translation(); + if ( $value === null ) { + continue; + } + + if ( $m->hasTag( 'fuzzy' ) ) { + $value = str_replace( TRANSLATE_FUZZY, '', $value ); + } + + $key = $mangler->unmangle( $key ); + $messages[$key] = $value; + } + + // Do not create empty files. Check that something besides @metadata is present. + if ( count( $messages ) < 2 ) { + return ''; + } + + if ( $this->flattener ) { + $messages = $this->flattener->unflatten( $messages ); + } + + if ( isset( $this->extra['includeMetadata'] ) && !$this->extra['includeMetadata'] ) { + unset( $messages['@metadata'] ); + } + + return FormatJson::encode( $messages, "\t", FormatJson::ALL_OK ) . "\n"; + } + + protected function getFlattener() { + if ( !isset( $this->extra['nestingSeparator'] ) ) { + return null; + } + + $parseCLDRPlurals = $this->extra['parseCLDRPlurals'] ?? false; + $flattener = new ArrayFlattener( $this->extra['nestingSeparator'], $parseCLDRPlurals ); + + return $flattener; + } + + public function isContentEqual( $a, $b ) { + if ( $this->flattener ) { + return $this->flattener->compareContent( $a, $b ); + } else { + return parent::isContentEqual( $a, $b ); + } + } + + public static function getExtraSchema() { + $schema = [ + 'root' => [ + '_type' => 'array', + '_children' => [ + 'FILES' => [ + '_type' => 'array', + '_children' => [ + 'nestingSeparator' => [ + '_type' => 'text', + ], + 'parseCLDRPlurals' => [ + '_type' => 'boolean', + ], + 'includeMetadata' => [ + '_type' => 'boolean', + ] + ] + ] + ] + ] + ]; + + return $schema; + } +} diff --git a/www/wiki/extensions/Translate/ffs/MediaWikiComplexMessages.php b/www/wiki/extensions/Translate/ffs/MediaWikiComplexMessages.php new file mode 100644 index 00000000..fcd8a314 --- /dev/null +++ b/www/wiki/extensions/Translate/ffs/MediaWikiComplexMessages.php @@ -0,0 +1,766 @@ +<?php +/** + * Classes for complex messages (%MediaWiki special page aliases, namespace names, magic words). + * + * @file + * @author Niklas Laxström + * @copyright Copyright © 2008-2010, Niklas Laxström + * @license GPL-2.0-or-later + */ + +/** + * Base class which implements handling and translation interface of + * non-message %MediaWiki items. + * @todo Needs documentation. + */ +abstract class ComplexMessages { + const LANG_MASTER = 0; + const LANG_CHAIN = 1; + const LANG_CURRENT = 2; + const PLACEHOLDER = 'languagecodeplaceholder'; + + protected $language; + protected $targetHtmlCode; + protected $targetDir; + protected $id = '__BUG__'; + protected $variable = '__BUG__'; + protected $data = []; + protected $elementsInArray = true; + protected $databaseMsg = '__BUG__'; + protected $chainable = false; + protected $firstMagic = false; + protected $constants = []; + + protected $tableAttributes = [ + 'class' => 'wikitable', + 'border' => '2', + 'cellpadding' => '4', + 'cellspacing' => '0', + 'style' => 'background-color: #F9F9F9; border: 1px #AAAAAA solid; border-collapse: collapse;', + ]; + + public function __construct( $langCode ) { + $this->language = $langCode; + + $language = Language::factory( $langCode ); + $this->targetHtmlCode = $language->getHtmlCode(); + $this->targetDir = $language->getDir(); + } + + public function getTitle() { + // Give grep a chance to find the usages: + // translate-magic-special, translate-magic-words, translate-magic-namespace + return wfMessage( 'translate-magic-' . $this->id )->text(); + } + + // Data retrieval + protected $init = false; + + public function getGroups() { + if ( !$this->init ) { + $saved = $this->getSavedData(); + foreach ( $this->data as &$group ) { + $this->getData( $group, $saved ); + } + $this->init = true; + } + + return $this->data; + } + + public function cleanData( $defs, $current ) { + foreach ( $current as $item => $values ) { + if ( !$this->elementsInArray ) { + break; + } + + if ( !isset( $defs[$item] ) ) { + unset( $current[$item] ); + continue; + } + + foreach ( $values as $index => $value ) { + if ( in_array( $value, $defs[$item], true ) ) { + unset( $current[$item][$index] ); + } + } + } + + return $current; + } + + public function mergeMagic( $defs, $current ) { + foreach ( $current as $item => &$values ) { + $newchain = $defs[$item]; + array_splice( $newchain, 1, 0, $values ); + $values = $newchain; + } + + return $current; + } + + public function getData( &$group, $savedData ) { + $defs = $this->readVariable( $group, 'en' ); + $code = $this->language; + + $current = $savedData + $this->readVariable( $group, $code ); + + // Clean up duplicates to definitions from saved data + $current = $this->cleanData( $defs, $current ); + + $chain = $current; + if ( $this->chainable ) { + foreach ( Language::getFallbacksFor( $code ) as $code ) { + $fbdata = $this->readVariable( $group, $code ); + if ( $this->firstMagic ) { + $fbdata = $this->cleanData( $defs, $fbdata ); + } + + $chain = array_merge_recursive( $chain, $fbdata ); + } + } + + if ( $this->firstMagic ) { + $chain = $this->mergeMagic( $defs, $chain ); + } + + $data = $group['data'] = [ $defs, $chain, $current ]; + + return $data; + } + + /** + * Gets data from request. Needs to be run before the form is displayed and + * validation. Not needed for export, which uses request directly. + * @param WebRequest $request + */ + public function loadFromRequest( WebRequest $request ) { + $saved = $this->parse( $this->formatForSave( $request ) ); + foreach ( $this->data as &$group ) { + $this->getData( $group, $saved ); + } + } + + /** + * Gets saved data from Mediawiki namespace + * @return Array + */ + protected function getSavedData() { + $data = TranslateUtils::getMessageContent( $this->databaseMsg, $this->language ); + + if ( !$data ) { + return []; + } else { + return $this->parse( $data ); + } + } + + protected function parse( $data ) { + $lines = array_map( 'trim', explode( "\n", $data ) ); + $array = []; + foreach ( $lines as $line ) { + if ( $line === '' || $line[0] === '#' || $line[0] === '<' ) { + continue; + } + + if ( strpos( $line, '=' ) === false ) { + continue; + } + + list( $name, $values ) = array_map( 'trim', explode( '=', $line, 2 ) ); + if ( $name === '' || $values === '' ) { + continue; + } + + $data = array_map( 'trim', explode( ',', $values ) ); + $array[$name] = $data; + } + + return $array; + } + + /** + * Return an array of keys that can be used to iterate over all keys + * @param string $group + * @return Array of keys for data + */ + protected function getIterator( $group ) { + $groups = $this->getGroups(); + + return array_keys( $groups[$group]['data'][self::LANG_MASTER] ); + } + + protected function val( $group, $type, $key ) { + $array = $this->getGroups(); + Wikimedia\suppressWarnings(); + $subarray = $array[$group]['data'][$type][$key]; + Wikimedia\restoreWarnings(); + if ( $this->elementsInArray ) { + if ( !$subarray || !count( $subarray ) ) { + return []; + } + } else { + if ( !$subarray ) { + return []; + } + } + + if ( !is_array( $subarray ) ) { + $subarray = [ $subarray ]; + } + + return $subarray; + } + + /** + * @param string $group + * @param string $code + * @return array + */ + protected function readVariable( $group, $code ) { + $file = $group['file']; + if ( !$group['code'] ) { + $file = str_ireplace( self::PLACEHOLDER, str_replace( '-', '_', ucfirst( $code ) ), $file ); + } + + ${$group['var']} = []; # Initialize + if ( file_exists( $file ) ) { + require $file; # Include + } + + if ( $group['code'] ) { + Wikimedia\suppressWarnings(); + $data = (array)${$group['var']} [$code]; + Wikimedia\restoreWarnings(); + } else { + $data = ${$group['var']}; + } + + return self::arrayMapRecursive( 'strval', $data ); + } + + public static function arrayMapRecursive( $callback, $data ) { + foreach ( $data as $index => $values ) { + if ( is_array( $values ) ) { + $data[$index] = self::arrayMapRecursive( $callback, $values ); + } else { + $data[$index] = call_user_func( $callback, $values ); + } + } + + return $data; + } + + // Data retrieval + + // Output + public function header( $title ) { + $colspan = [ 'colspan' => 3 ]; + $header = Xml::element( 'th', $colspan, $this->getTitle() . ' - ' . $title ); + $subheading[] = '<th>' . wfMessage( 'translate-magic-cm-original' )->escaped() . '</th>'; + $subheading[] = '<th>' . wfMessage( 'translate-magic-cm-current' )->escaped() . '</th>'; + $subheading[] = '<th>' . wfMessage( 'translate-magic-cm-to-be' )->escaped() . '</th>'; + + return '<tr>' . $header . '</tr>' . + '<tr>' . implode( "\n", $subheading ) . '</tr>'; + } + + public function output() { + $colspan = [ 'colspan' => 3 ]; + + $s = Xml::openElement( 'table', $this->tableAttributes ); + + foreach ( array_keys( $this->data ) as $group ) { + $s .= $this->header( $this->data[$group]['label'] ); + + foreach ( $this->getIterator( $group ) as $key ) { + $rowContents = ''; + + $value = $this->val( $group, self::LANG_MASTER, $key ); + if ( $this->firstMagic ) { + array_shift( $value ); + } + + $value = array_map( 'htmlspecialchars', $value ); + // Force ltr direction. The source is pretty much guaranteed to be English-based. + $rowContents .= '<td dir="ltr">' . $this->formatElement( $value ) . '</td>'; + + $value = $this->val( $group, self::LANG_CHAIN, $key ); + if ( $this->firstMagic ) { + array_shift( $value ); + } + + // Apply bidi-isolation to each value. + // The values can both RTL and LTR and mixing them in a comma list + // can mix things up. + foreach ( $value as &$currentTranslation ) { + $currentTranslation = Xml::element( 'bdi', null, $currentTranslation ); + } + $value = $this->highlight( $key, $value ); + $rowContents .= '<td>' . $this->formatElement( $value ) . '</td>'; + + $value = $this->val( $group, self::LANG_CURRENT, $key ); + $rowContents .= '<td>'; + $rowContents .= $this->editElement( $key, $this->formatElement( $value ) ); + $rowContents .= '</td>'; + + $s .= Xml::tags( 'tr', [ 'id' => "mw-sp-magic-$key" ], $rowContents ); + } + } + + $context = RequestContext::getMain(); + + if ( $context->getUser()->isAllowed( 'translate' ) ) { + $s .= '<tr>' . Xml::tags( 'td', $colspan, $this->getButtons() ) . '<tr>'; + } + + $s .= Xml::closeElement( 'table' ); + + return Xml::tags( + 'form', + [ + 'method' => 'post', + 'action' => $context->getRequest()->getRequestURL() + ], + $s + ); + } + + public function getButtons() { + return Xml::inputLabel( + wfMessage( 'translate-magic-cm-comment' )->text(), + 'comment', + 'sp-translate-magic-comment' + ) . + Xml::submitButton( + wfMessage( 'translate-magic-cm-save' )->text(), + [ 'name' => 'savetodb' ] + ); + } + + public function formatElement( $element ) { + if ( !count( $element ) ) { + return ''; + } + + if ( is_array( $element ) ) { + $element = array_map( 'trim', $element ); + $element = implode( ', ', $element ); + } + + return trim( $element ); + } + + protected function getKeyForEdit( $key ) { + return Sanitizer::escapeId( 'sp-translate-magic-cm-' . $this->id . $key ); + } + + public function editElement( $key, $contents ) { + return Xml::input( $this->getKeyForEdit( $key ), 40, $contents, [ + 'lang' => $this->targetHtmlCode, + 'dir' => $this->targetDir, + ] ); + } + + // Output + + // Save to database + + protected function getKeyForSave() { + return $this->databaseMsg . '/' . $this->language; + } + + /** + * @param WebRequest $request + * @return string + */ + protected function formatForSave( WebRequest $request ) { + $text = ''; + + // Do not replace spaces by underscores for magic words. See bug T48613 + $replaceSpace = $request->getVal( 'module' ) !== 'magic'; + + foreach ( array_keys( $this->data ) as $group ) { + foreach ( $this->getIterator( $group ) as $key ) { + $data = $request->getText( $this->getKeyForEdit( $key ) ); + // Make a nice array out of the submit with trimmed values. + $data = array_map( 'trim', explode( ',', $data ) ); + + if ( $replaceSpace ) { + // Normalise: Replace spaces with underscores. + $data = str_replace( ' ', '_', $data ); + } + + // Create final format. + $data = implode( ', ', $data ); + if ( $data !== '' ) { + $text .= "$key = $data\n"; + } + } + } + + return $text; + } + + /** + * @param WebRequest $request + * @throws MWException + */ + public function save( $request ) { + $title = Title::newFromText( 'MediaWiki:' . $this->getKeyForSave() ); + $page = WikiPage::factory( $title ); + + $data = "# DO NOT EDIT THIS PAGE DIRECTLY! Use [[Special:AdvancedTranslate]].\n<pre>\n" . + $this->formatForSave( $request ) . "\n</pre>"; + + $comment = $request->getText( + 'comment', + wfMessage( 'translate-magic-cm-updatedusing' )->inContentLanguage()->text() + ); + + $content = ContentHandler::makeContent( $data, $title ); + $status = $page->doEditContent( $content, $comment ); + + if ( $status === false || ( is_object( $status ) && !$status->isOK() ) ) { + throw new MWException( wfMessage( 'translate-magic-cm-savefailed' )->text() ); + } + + /* Reset outdated array */ + $this->init = false; + } + + // Save to database + + // Export + public function validate( array &$errors, $filter = false ) { + $used = []; + foreach ( array_keys( $this->data ) as $group ) { + if ( $filter !== false && !in_array( $group, (array)$filter, true ) ) { + continue; + } + + $this->validateEach( $errors, $group, $used ); + } + } + + protected function validateEach( array &$errors, $group, &$used ) { + foreach ( $this->getIterator( $group ) as $key ) { + $values = $this->val( $group, self::LANG_CURRENT, $key ); + $link = Xml::element( 'a', [ 'href' => "#mw-sp-magic-$key" ], $key ); + + if ( count( $values ) !== count( array_filter( $values ) ) ) { + $errors[] = "There is empty value in $link."; + } + + foreach ( $values as $v ) { + if ( isset( $used[$v] ) ) { + $otherkey = $used[$v]; + $first = Xml::element( + 'a', + [ 'href' => "#mw-sp-magic-$otherkey" ], + $otherkey + ); + $errors[] = "Translation <strong>$v</strong> is used more than once " . + "for $first and $link."; + } else { + $used[$v] = $key; + } + } + } + } + + public function export( $filter = false ) { + $text = ''; + $errors = []; + $this->validate( $errors, $filter ); + foreach ( $errors as $_ ) { + $text .= "#!!# $_\n"; + } + + foreach ( $this->getGroups() as $group => $data ) { + if ( $filter !== false && !in_array( $group, (array)$filter, true ) ) { + continue; + } + + $text .= $this->exportEach( $group, $data ); + } + + return $text; + } + + protected function exportEach( $group, $data ) { + $var = $data['var']; + $items = $data['data']; + + $extra = $data['code'] ? "['{$this->language}']" : ''; + + $out = ''; + + $indexKeys = []; + foreach ( array_keys( $items[self::LANG_MASTER] ) as $key ) { + $indexKeys[$key] = $this->constants[$key] ?? "'$key'"; + } + + $padTo = max( array_map( 'strlen', $indexKeys ) ) + 3; + + foreach ( $this->getIterator( $group ) as $key ) { + $temp = "\t{$indexKeys[$key]}"; + + while ( strlen( $temp ) <= $padTo ) { + $temp .= ' '; + } + + $from = self::LANG_CURRENT; + // Abuse of the firstMagic property, should use something proper + if ( $this->firstMagic ) { + $from = self::LANG_CHAIN; + } + + // Check for translations + $val = $this->val( $group, self::LANG_CURRENT, $key ); + if ( !$val || !count( $val ) ) { + continue; + } + + // Then get the data we really want + $val = $this->val( $group, $from, $key ); + + // Remove duplicated entries, causes problems with magic words + // Just to be sure, it should not be possible to save invalid data anymore + $val = array_unique( $val /* @todo SORT_REGULAR */ ); + + // So do empty elements... + foreach ( $val as $k => $v ) { + if ( $v === '' ) { + unset( $val[$k] ); + } + } + + // Another check + if ( !count( $val ) ) { + continue; + } + + $normalized = array_map( [ $this, 'normalize' ], $val ); + if ( $this->elementsInArray ) { + $temp .= '=> array( ' . implode( ', ', $normalized ) . ' ),'; + } else { + $temp .= '=> ' . implode( ', ', $normalized ) . ','; + } + $out .= $temp . "\n"; + } + + if ( $out !== '' ) { + $text = "# {$data['label']} \n"; + $text .= "\$$var$extra = array(\n" . $out . ");\n\n"; + + return $text; + } else { + return ''; + } + } + + /** + * Returns string with quotes that should be valid php + * @param string $data + * @throws MWException + * @return string + */ + protected function normalize( $data ) { + # Escape quotes + if ( !is_string( $data ) ) { + throw new MWException(); + } + $data = preg_replace( "/(?<!\\\\)'/", "\'", trim( $data ) ); + + return "'$data'"; + } + + // Export + public function highlight( $key, $values ) { + return $values; + } +} + +/** + * Adds support for translating special page aliases via Special:AdvancedTranslate. + * @todo Needs documentation. + */ +class SpecialPageAliasesCM extends ComplexMessages { + protected $id = SpecialMagic::MODULE_SPECIAL; + protected $databaseMsg = 'sp-translate-data-SpecialPageAliases'; + protected $chainable = true; + + public function __construct( $code ) { + parent::__construct( $code ); + $this->data['core'] = [ + 'label' => 'MediaWiki Core', + 'var' => 'specialPageAliases', + 'file' => Language::getMessagesFileName( self::PLACEHOLDER ), + 'code' => false, + ]; + + $groups = MessageGroups::singleton()->getGroups(); + foreach ( $groups as $g ) { + if ( !$g instanceof MediaWikiExtensionMessageGroup ) { + continue; + } + $conf = $g->getConfiguration(); + if ( !isset( $conf['FILES']['aliasFileSource'] ) ) { + continue; + } + $file = $g->replaceVariables( $conf['FILES']['aliasFileSource'], 'en' ); + if ( file_exists( $file ) ) { + $this->data[$g->getId()] = [ + 'label' => $g->getLabel(), + 'var' => 'specialPageAliases', + 'file' => $file, + 'code' => $code, + ]; + } + } + } + + public function highlight( $key, $values ) { + if ( count( $values ) ) { + if ( !isset( $values[0] ) ) { + throw new MWException( 'Something missing from values: ' . + print_r( $values, true ) ); + } + + $values[0] = "<strong>$values[0]</strong>"; + } + + return $values; + } + + protected function validateEach( array &$errors, $group, &$used ) { + parent::validateEach( $errors, $group, $used ); + foreach ( $this->getIterator( $group ) as $key ) { + $values = $this->val( $group, self::LANG_CURRENT, $key ); + + foreach ( $values as $_ ) { + Wikimedia\suppressWarnings(); + $title = SpecialPage::getTitleFor( $_ ); + Wikimedia\restoreWarnings(); + $link = Xml::element( 'a', [ 'href' => "#mw-sp-magic-$key" ], $key ); + if ( $title === null ) { + if ( $_ !== '' ) { + // Empty values checked elsewhere + $errors[] = "Translation <strong>$_</strong> is invalid title in $link."; + } + } else { + $text = $title->getText(); + $dbkey = $title->getDBkey(); + if ( $text !== $_ && $dbkey !== $_ ) { + $errors[] = "Translation <strong>$_</strong> for $link is not in " . + "normalised form, which is <strong>$text</strong>"; + } + } + } + } + } +} + +/** + * Adds support for translating magic words via Special:AdvancedTranslate. + * @todo Needs documentation. + */ +class MagicWordsCM extends ComplexMessages { + protected $id = SpecialMagic::MODULE_MAGIC; + protected $firstMagic = true; + protected $chainable = true; + protected $databaseMsg = 'sp-translate-data-MagicWords'; + + public function __construct( $code ) { + parent::__construct( $code ); + $this->data['core'] = [ + 'label' => 'MediaWiki Core', + 'var' => 'magicWords', + 'file' => Language::getMessagesFileName( self::PLACEHOLDER ), + 'code' => false, + ]; + + $groups = MessageGroups::singleton()->getGroups(); + foreach ( $groups as $g ) { + if ( !$g instanceof MediaWikiExtensionMessageGroup ) { + continue; + } + $conf = $g->getConfiguration(); + if ( !isset( $conf['FILES']['magicFileSource'] ) ) { + continue; + } + $file = $g->replaceVariables( $conf['FILES']['magicFileSource'], 'en' ); + if ( file_exists( $file ) ) { + $this->data[$g->getId()] = [ + 'label' => $g->getLabel(), + 'var' => 'magicWords', + 'file' => $file, + 'code' => $code, + ]; + } + } + } + + public function highlight( $key, $values ) { + if ( count( $values ) && $key === 'redirect' ) { + $values[0] = "<strong>$values[0]</strong>"; + } + + return $values; + } +} + +/** + * Adds support for translating namespace names via Special:AdvancedTranslate. + * @todo Needs documentation. + */ +class NamespaceCM extends ComplexMessages { + protected $id = SpecialMagic::MODULE_NAMESPACE; + protected $elementsInArray = false; + protected $databaseMsg = 'sp-translate-data-Namespaces'; + + public function __construct( $code ) { + parent::__construct( $code ); + $this->data['core'] = [ + 'label' => 'MediaWiki Core', + 'var' => 'namespaceNames', + 'file' => Language::getMessagesFileName( self::PLACEHOLDER ), + 'code' => false, + ]; + } + + protected $constants = [ + -2 => 'NS_MEDIA', + -1 => 'NS_SPECIAL', + 0 => 'NS_MAIN', + 1 => 'NS_TALK', + 2 => 'NS_USER', + 3 => 'NS_USER_TALK', + 4 => 'NS_PROJECT', + 5 => 'NS_PROJECT_TALK', + 6 => 'NS_FILE', + 7 => 'NS_FILE_TALK', + 8 => 'NS_MEDIAWIKI', + 9 => 'NS_MEDIAWIKI_TALK', + 10 => 'NS_TEMPLATE', + 11 => 'NS_TEMPLATE_TALK', + 12 => 'NS_HELP', + 13 => 'NS_HELP_TALK', + 14 => 'NS_CATEGORY', + 15 => 'NS_CATEGORY_TALK', + ]; + + protected function validateEach( array &$errors, $group, &$used ) { + parent::validateEach( $errors, $group, $used ); + foreach ( $this->getIterator( $group ) as $key ) { + $values = $this->val( $group, self::LANG_CURRENT, $key ); + + if ( count( $values ) > 1 ) { + $link = Xml::element( 'a', [ 'href' => "#mw-sp-magic-$key" ], $key ); + $errors[] = "Namespace $link can have only one translation. Replace the " . + 'translation with a new one, and notify staff about the change.'; + } + } + } +} diff --git a/www/wiki/extensions/Translate/ffs/MediaWikiExtensionFFS.php b/www/wiki/extensions/Translate/ffs/MediaWikiExtensionFFS.php new file mode 100644 index 00000000..f6aadf32 --- /dev/null +++ b/www/wiki/extensions/Translate/ffs/MediaWikiExtensionFFS.php @@ -0,0 +1,288 @@ +<?php +/** + * Support for the ugly file format that is used by MediaWiki extensions. + * + * @file + * @author Niklas Laxström + * @copyright Copyright © 2012-2013, Niklas Laxström + * @license GPL-2.0+ + */ + +/** + * Manipulates ExtensionName.i18n.php style files. + * + * @ingroup FFS + * @since 2012-10-20 + */ +class MediaWikiExtensionFFS extends SimpleFFS { + public function supportsFuzzy() { + return 'write'; + } + + public function getFileExtensions() { + return array( '.i18n.php' ); + } + + /** + * To avoid parsing full files again and again when reading or exporting + * multiple languages, keep cache of the sections of the latest active file. + * @var array + */ + protected static $cache = array(); + + /** + * @param string $data Full file contents + * @param string $filename Full path to file for debugging + * @return string[] Sections indexed by language code, or 0 for header section + * @throws MWException + */ + protected function splitSections( $data, $filename = 'unknown' ) { + $data = SimpleFFS::fixNewLines( $data ); + + $splitter = '$messages = array();'; + + $pos = strpos( $data, $splitter ); + if ( $pos === false ) { + throw new MWException( "MWEFFS1: File $filename: splitter not found" ); + } + + $offset = $pos + strlen( $splitter ); + $header = substr( $data, 0, $offset ); + + $pattern = '(?: /\*\* .*? \*/ \n )? (?: \\$.*? \n\);(?:\n\n|\s+\z) )'; + $regexp = "~$pattern~xsu"; + $matches = array(); + preg_match_all( $regexp, $data, $matches, PREG_SET_ORDER, $offset ); + + $sections = array(); + $sections[] = $header; + + foreach ( $matches as $data ) { + $pattern = "\\\$messages\['([a-z-]+)'\]"; + $regexp = "~$pattern~su"; + $matches = array(); + if ( !preg_match( $regexp, $data[0], $matches ) ) { + throw new MWException( "MWEFFS2: File $filename: malformed section: {$data[0]}" ); + } + $code = $matches[1]; + // Normalize number of newlines after each section + $sections[$code] = rtrim( $data[0] ); + } + + return $sections; + } + + /** + * @param string $code Language code. + * @return array|bool + */ + public function read( $code ) { + $filename = $this->group->getSourceFilePath( $code ); + if ( !file_exists( $filename ) ) { + return false; + } + + if ( isset( self::$cache[$filename]['parsed'][$code] ) ) { + return self::$cache[$filename]['parsed'][$code]; + } + + if ( !isset( self::$cache[$filename] ) ) { + // Clear the cache if the filename changes to reduce memory use + self::$cache = array(); + + $contents = file_get_contents( $filename ); + self::$cache[$filename]['sections'] = + $this->splitSections( $contents, $filename ); + } + + // Shorten + $cache = &self::$cache[$filename]; + + $value = false; + if ( isset( $cache['sections'][$code] ) ) { + $value = $this->readFromVariable( $cache['sections'][$code] ); + } + + $cache['parsed'][$code] = $value; + + return $value; + } + + /** + * @param string $data + * @return array Parsed data. + * @throws MWException + */ + public function readFromVariable( $data ) { + $messages = array(); + eval( $data ); + + $c = count( $messages ); + if ( $c !== 1 ) { + throw new MWException( "MWEFFS3: Expected 1, got $c: $data" ); + } + + $messages = array_shift( $messages ); + $mangler = $this->group->getMangler(); + $messages = $mangler->mangle( $messages ); + + return array( + 'MESSAGES' => $messages, + ); + } + + // Handled in writeReal + protected function tryReadSource( $filename, MessageCollection $collection ) { + } + + /** + * @param MessageCollection $collection + * @return string + */ + protected function writeReal( MessageCollection $collection ) { + $mangler = $this->group->getMangler(); + $code = $collection->getLanguage(); + + $block = $this->generateMessageBlock( $collection, $mangler ); + if ( $block === false ) { + return ''; + } + + // Ugly code, relies on side effects + // Avoid parsing stuff with fake language code + // Premature optimization + $this->read( 'mul' ); + $filename = $this->group->getSourceFilePath( $code ); + $cache = &self::$cache[$filename]; + + // Generating authors + if ( isset( $cache['sections'][$code] ) ) { + // More premature optimization + $fromFile = self::parseAuthorsFromString( $cache['sections'][$code] ); + $collection->addCollectionAuthors( $fromFile ); + } + + $authors = $collection->getAuthors(); + $authors = $this->filterAuthors( $authors, $code ); + + $authorList = ''; + foreach ( $authors as $author ) { + $authorList .= "\n * @author $author"; + } + + // And putting all together + $name = TranslateUtils::getLanguageName( $code ); + $native = TranslateUtils::getLanguageName( $code, $code ); + + $section = <<<PHP +/** $name ($native)$authorList + */ +\$messages['$code'] = array($block); +PHP; + + // Store the written part, so that when next language is called, + // the new version will be used (instead of the old parsed version + $cache['sections'][$code] = $section; + + // Make a copy we can alter + $sections = $cache['sections']; + $priority = array(); + + global $wgTranslateDocumentationLanguageCode; + $codes = array( + 0, // File header + $this->group->getSourceLanguage(), + $wgTranslateDocumentationLanguageCode, + ); + foreach ( $codes as $pcode ) { + if ( isset( $sections[$pcode] ) ) { + $priority[] = $sections[$pcode]; + unset( $sections[$pcode] ); + } + } + + ksort( $sections ); + + return implode( "\n\n", $priority ) . "\n\n" . implode( "\n\n", $sections ) . "\n"; + } + + protected function generateMessageBlock( MessageCollection $collection, StringMatcher $mangler ) { + $block = ''; + /** + * @var TMessage $m + */ + foreach ( $collection as $key => $m ) { + $value = $m->translation(); + if ( $value === null ) { + continue; + } + + $key = $mangler->unmangle( $key ); + $value = str_replace( TRANSLATE_FUZZY, '', $value ); + $fuzzy = $m->hasTag( 'fuzzy' ) ? ' # Fuzzy' : ''; + + $key = self::quote( $key ); + $value = self::quote( $value ); + $block .= "\t$key => $value,$fuzzy\n"; + } + + // Do not create empty sections + if ( $block === '' ) { + return false; + } + + return "\n$block"; + } + + /** + * Scans for \@author tags in the string. + * @param string $string String containing the comments of a section + * @return string[] List of authors + */ + protected static function parseAuthorsFromString( $string ) { + preg_match_all( '/@author (.*)/', $string, $m ); + + return $m[1]; + } + + /** + * Tries to find optimal way to quote a string by choosing + * either double quotes or single quotes depending on how + * many escapes are needed. + * @param string $value The string to quote. + * @return string String suitable for inclusion in PHP code + */ + protected static function quote( $value ) { + # Check for the appropriate apostrophe and add the value + # Quote \ here, because it needs always escaping + $value = addcslashes( $value, '\\' ); + + # For readability + $single = "'"; + $double = '"'; + $quote = $single; // Default + + # It is safe to use '-quoting, unless there is '-quote in the text + if ( strpos( $value, $single ) !== false ) { + # In case there are no variables that need to be escaped, just use "-quote + if ( strpos( $value, $double ) === false && !preg_match( '/\$[^0-9]/', $value ) ) { + $quote = $double; + } else { + # Something needs quoting, so pick the quote which causes less quoting + $doubleEsc = substr_count( $value, $double ) + substr_count( $value, '$' ); + $singleEsc = substr_count( $value, $single ); + + if ( $doubleEsc < $singleEsc ) { + $quote = $double; + $extra = '$'; + } else { + $extra = ''; + } + + $value = addcslashes( $value, $quote . $extra ); + } + } + + return $quote . $value . $quote; + } +} diff --git a/www/wiki/extensions/Translate/ffs/MediaWikiExtensions.php b/www/wiki/extensions/Translate/ffs/MediaWikiExtensions.php new file mode 100644 index 00000000..123c5012 --- /dev/null +++ b/www/wiki/extensions/Translate/ffs/MediaWikiExtensions.php @@ -0,0 +1,372 @@ +<?php +/** + * Classes for %MediaWiki extension translation. + * + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +/** + * Class which handles special definition format for %MediaWiki extensions and skins. + */ +class PremadeMediawikiExtensionGroups { + /** @var bool */ + protected $useConfigure = true; + + /** @var string */ + protected $idPrefix = 'ext-'; + + /** @var int */ + protected $namespace = NS_MEDIAWIKI; + + /** + * @var string + * @see __construct + */ + protected $path; + + /** + * @var string + * @see __construct + */ + protected $definitionFile; + + /** + * @param string $def Absolute path to the definition file. See + * tests/data/mediawiki-extensions.txt for example. + * @param string $path General prefix to the file locations without + * the extension specific part. Should start with %GROUPROOT%/ or + * otherwise export path will be wrong. The export path is + * constructed by replacing %GROUPROOT%/ with target directory. + */ + public function __construct( $def, $path ) { + $this->definitionFile = $def; + $this->path = $path; + } + + /** + * Whether to use the Configure extension to load extension home pages. + * + * @since 2012-03-22 + * @param bool $value Whether Configure should be used. + */ + public function setUseConfigure( $value ) { + $this->useConfigure = $value; + } + + /** + * How to prefix message group ids. + * + * @since 2012-03-22 + * @param string $value + */ + public function setGroupPrefix( $value ) { + $this->idPrefix = $value; + } + + /** + * Which namespace holds the messages. + * + * @since 2012-03-22 + * @param int $value + */ + public function setNamespace( $value ) { + $this->namespace = $value; + } + + /** + * Makes an group id from extension name + * @param string $name + * @return string + */ + public static function foldId( $name ) { + return preg_replace( '/\s+/', '', strtolower( $name ) ); + } + + /** + * Hook: TranslatePostInitGroups + * @param array &$list + * @param array &$deps + * @return true + */ + public function register( array &$list, array &$deps ) { + $groups = $this->parseFile(); + $groups = $this->processGroups( $groups ); + foreach ( $groups as $id => $g ) { + $list[$id] = $this->createMessageGroup( $id, $g ); + } + + $deps[] = new FileDependency( $this->definitionFile ); + + return true; + } + + /** + * Creates MediaWikiExtensionMessageGroup objects from parsed data. + * @param string $id unique group id already prefixed + * @param array $info array of group info + * @return MediaWikiExtensionMessageGroup + */ + protected function createMessageGroup( $id, $info ) { + $conf = []; + $conf['BASIC']['class'] = 'MediaWikiExtensionMessageGroup'; + $conf['BASIC']['id'] = $id; + $conf['BASIC']['namespace'] = $this->namespace; + $conf['BASIC']['label'] = $info['name']; + + if ( isset( $info['desc'] ) ) { + $conf['BASIC']['description'] = $info['desc']; + } else { + $conf['BASIC']['descriptionmsg'] = $info['descmsg']; + $conf['BASIC']['extensionurl'] = $info['url']; + } + + $conf['FILES']['class'] = 'JsonFFS'; + $conf['FILES']['sourcePattern'] = $this->path . '/' . $info['file']; + + // @todo Find a better way + if ( isset( $info['aliasfile'] ) ) { + $conf['FILES']['aliasFileSource'] = $this->path . '/' . $info['aliasfile']; + $conf['FILES']['aliasFile'] = $info['aliasfile']; + } + if ( isset( $info['magicfile'] ) ) { + $conf['FILES']['magicFileSource'] = $this->path . '/' . $info['magicfile']; + $conf['FILES']['magicFile'] = $info['magicfile']; + } + + if ( isset( $info['prefix'] ) ) { + $conf['MANGLER']['class'] = 'StringMatcher'; + $conf['MANGLER']['prefix'] = $info['prefix']; + $conf['MANGLER']['patterns'] = $info['mangle']; + + $mangler = new StringMatcher( $info['prefix'], $info['mangle'] ); + if ( isset( $info['ignored'] ) ) { + $info['ignored'] = $mangler->mangle( $info['ignored'] ); + } + if ( isset( $info['optional'] ) ) { + $info['optional'] = $mangler->mangle( $info['optional'] ); + } + } + + $conf['CHECKER']['class'] = 'MediaWikiMessageChecker'; + $conf['CHECKER']['checks'] = [ + 'pluralCheck', + 'pluralFormsCheck', + 'wikiParameterCheck', + 'wikiLinksCheck', + 'braceBalanceCheck', + 'pagenameMessagesCheck', + 'miscMWChecks', + ]; + + $conf['INSERTABLES']['class'] = 'MediaWikiInsertablesSuggester'; + + if ( isset( $info['optional'] ) ) { + $conf['TAGS']['optional'] = $info['optional']; + } + if ( isset( $info['ignored'] ) ) { + $conf['TAGS']['ignored'] = $info['ignored']; + } + + if ( isset( $info['languages'] ) ) { + $conf['LANGUAGES'] = [ + 'whitelist' => [], + 'blacklist' => [], + ]; + + foreach ( $info['languages'] as $tagSpec ) { + if ( preg_match( '/^([+-])?(.+)$/', $tagSpec, $m ) ) { + list( , $sign, $tag ) = $m; + if ( $sign === '+' ) { + $conf['LANGUAGES']['whitelist'][] = $tag; + } elseif ( $sign === '-' ) { + $conf['LANGUAGES']['blacklist'][] = $tag; + } else { + $conf['LANGUAGES']['blacklist'] = '*'; + $conf['LANGUAGES']['whitelist'][] = $tag; + } + } + } + } + + return MessageGroupBase::factory( $conf ); + } + + protected function parseFile() { + $defines = file_get_contents( $this->definitionFile ); + $linefeed = '(\r\n|\n)'; + $sections = array_map( + 'trim', + preg_split( "/$linefeed{2,}/", $defines, -1, PREG_SPLIT_NO_EMPTY ) + ); + $groups = []; + + foreach ( $sections as $section ) { + $lines = array_map( 'trim', preg_split( "/$linefeed/", $section ) ); + $newgroup = []; + + foreach ( $lines as $line ) { + if ( $line === '' || $line[0] === '#' ) { + continue; + } + + if ( strpos( $line, '=' ) === false ) { + if ( empty( $newgroup['name'] ) ) { + $newgroup['name'] = $line; + } else { + throw new MWException( 'Trying to define name twice: ' . $line ); + } + } else { + list( $key, $value ) = array_map( 'trim', explode( '=', $line, 2 ) ); + switch ( $key ) { + case 'aliasfile': + case 'desc': + case 'descmsg': + case 'file': + case 'id': + case 'magicfile': + case 'var': + $newgroup[$key] = $value; + break; + case 'optional': + case 'ignored': + case 'languages': + $values = array_map( 'trim', explode( ',', $value ) ); + if ( !isset( $newgroup[$key] ) ) { + $newgroup[$key] = []; + } + $newgroup[$key] = array_merge( $newgroup[$key], $values ); + break; + case 'prefix': + list( $prefix, $messages ) = array_map( + 'trim', + explode( '|', $value, 2 ) + ); + if ( isset( $newgroup['prefix'] ) && $newgroup['prefix'] !== $prefix ) { + throw new MWException( + "Only one prefix supported: {$newgroup['prefix']} !== $prefix" + ); + } + $newgroup['prefix'] = $prefix; + + if ( !isset( $newgroup['mangle'] ) ) { + $newgroup['mangle'] = []; + } + + $messages = array_map( 'trim', explode( ',', $messages ) ); + $newgroup['mangle'] = array_merge( $newgroup['mangle'], $messages ); + break; + default: + throw new MWException( 'Unknown key:' . $key ); + } + } + } + + if ( count( $newgroup ) ) { + if ( empty( $newgroup['name'] ) ) { + throw new MWException( "Name missing\n" . print_r( $newgroup, true ) ); + } + $groups[] = $newgroup; + } + } + + return $groups; + } + + protected function processGroups( $groups ) { + $configureData = $this->loadConfigureExtensionData(); + $fixedGroups = []; + foreach ( $groups as $g ) { + if ( !is_array( $g ) ) { + $g = [ $g ]; + } + + $name = $g['name']; + + if ( isset( $g['id'] ) ) { + $id = $g['id']; + } else { + $id = $this->idPrefix . preg_replace( '/\s+/', '', strtolower( $name ) ); + } + + if ( !isset( $g['file'] ) ) { + $file = preg_replace( '/\s+/', '', "$name/i18n/%CODE%.json" ); + } else { + $file = $g['file']; + } + + if ( isset( $g['descmsg'] ) ) { + $descmsg = $g['descmsg']; + } else { + $descmsg = str_replace( $this->idPrefix, '', $id ) . '-desc'; + } + + $configureId = self::foldId( $name ); + if ( isset( $configureData[$configureId]['url'] ) ) { + $url = $configureData[$configureId]['url']; + } else { + $url = false; + } + + $newgroup = [ + 'name' => $name, + 'file' => $file, + 'descmsg' => $descmsg, + 'url' => $url, + ]; + + $copyvars = [ + 'aliasfile', + 'desc', + 'ignored', + 'languages', + 'magicfile', + 'mangle', + 'optional', + 'prefix', + 'var', + ]; + + foreach ( $copyvars as $var ) { + if ( isset( $g[$var] ) ) { + $newgroup[$var] = $g[$var]; + } + } + + // Mark some fixed form optional messages automatically + if ( !isset( $newgroup['optional' ] ) ) { + $newgroup['optional'] = []; + } + + // Mark extension name and skin names optional. + $newgroup['optional'][] = '*-extensionname'; + $newgroup['optional'][] = 'skinname-*'; + + $fixedGroups[$id] = $newgroup; + } + + return $fixedGroups; + } + + protected function loadConfigureExtensionData() { + if ( !$this->useConfigure ) { + return []; + } + + global $wgAutoloadClasses; + + $postfix = 'Configure/load_txt_def/TxtDef.php'; + if ( !file_exists( "{$this->path}/$postfix" ) ) { + return []; + } + + $wgAutoloadClasses['TxtDef'] = "{$this->path}/$postfix"; + $tmp = TxtDef::loadFromFile( "{$this->path}/Configure/settings/Settings-ext.txt" ); + + return array_combine( + array_map( [ __CLASS__, 'foldId' ], array_keys( $tmp ) ), + array_values( $tmp ) + ); + } +} diff --git a/www/wiki/extensions/Translate/ffs/RubyYamlFFS.php b/www/wiki/extensions/Translate/ffs/RubyYamlFFS.php new file mode 100644 index 00000000..f0c978db --- /dev/null +++ b/www/wiki/extensions/Translate/ffs/RubyYamlFFS.php @@ -0,0 +1,170 @@ +<?php + +/** + * Extends YamlFFS with Ruby (on Rails) style plural support. Supports subkeys + * zero, one, many, few, other and two for each message using plural with + * {{count}} variable. + * @ingroup FFS + */ +class RubyYamlFFS extends YamlFFS { + protected static $pluralWords = array( + 'zero' => 1, + 'one' => 1, + 'many' => 1, + 'few' => 1, + 'other' => 1, + 'two' => 1 + ); + + public function getFileExtensions() { + return array( '.yml', '.yaml' ); + } + + /** + * Flattens ruby plural arrays into special plural syntax. + * + * @param array $messages Array of keys and values + * + * @throws MWException + * @return bool|string + */ + public function flattenPlural( $messages ) { + + $pluralKeys = false; + $nonPluralKeys = false; + foreach ( $messages as $key => $value ) { + if ( is_array( $value ) ) { + # Plurals can only happen in the lowest level of the structure + return false; + } + + # Check if we find any reserved plural keyword + if ( isset( self::$pluralWords[$key] ) ) { + $pluralKeys = true; + } else { + $nonPluralKeys = true; + } + } + + # No plural keys at all, we can skip + if ( !$pluralKeys ) { + return false; + } + + # Mixed plural keys with other keys, should not happen + if ( $nonPluralKeys ) { + $keys = implode( ', ', array_keys( $messages ) ); + throw new MWException( "Reserved plural keywords mixed with other keys: $keys." ); + } + + $pls = '{{PLURAL'; + foreach ( $messages as $key => $value ) { + if ( $key === 'other' ) { + continue; + } + + $pls .= "|$key=$value"; + } + + // Put the "other" alternative last, without other= prefix. + $other = isset( $messages['other'] ) ? '|' . $messages['other'] : ''; + $pls .= "$other}}"; + + return $pls; + } + + /** + * Converts the special plural syntax to array or ruby style plurals + * + * @param string $key Message key prefix + * @param string $message The plural string + * + * @return bool|array + */ + public function unflattenPlural( $key, $message ) { + // Quick escape. + if ( strpos( $message, '{{PLURAL' ) === false ) { + return array( $key => $message ); + } + + /* + * Replace all variables with placeholders. Possible source of bugs + * if other characters that given below are used. + */ + $regex = '~\{[a-zA-Z_-]+}~'; + $placeholders = array(); + $match = array(); + + while ( preg_match( $regex, $message, $match ) ) { + $uniqkey = TranslateUtils::getPlaceholder(); + $placeholders[$uniqkey] = $match[0]; + $search = preg_quote( $match[0], '~' ); + $message = preg_replace( "~$search~", $uniqkey, $message ); + } + + // Then replace (possible multiple) plural instances into placeholders. + $regex = '~\{\{PLURAL\|(.*?)}}~s'; + $matches = array(); + $match = array(); + + while ( preg_match( $regex, $message, $match ) ) { + $uniqkey = TranslateUtils::getPlaceholder(); + $matches[$uniqkey] = $match; + $message = preg_replace( $regex, $uniqkey, $message, 1 ); + } + + // No plurals, should not happen. + if ( !count( $matches ) ) { + return false; + } + + // The final array of alternative plurals forms. + $alts = array(); + + /* + * Then loop trough each plural block and replacing the placeholders + * to construct the alternatives. Produces invalid output if there is + * multiple plural bocks which don't have the same set of keys. + */ + $pluralChoice = implode( '|', array_keys( self::$pluralWords ) ); + $regex = "~($pluralChoice)\s*=\s*(.+)~s"; + foreach ( $matches as $ph => $plu ) { + $forms = explode( '|', $plu[1] ); + + foreach ( $forms as $form ) { + if ( $form === '' ) { + continue; + } + + $match = array(); + if ( preg_match( $regex, $form, $match ) ) { + $formWord = "$key.{$match[1]}"; + $value = $match[2]; + } else { + $formWord = "$key.other"; + $value = $form; + } + + if ( !isset( $alts[$formWord] ) ) { + $alts[$formWord] = $message; + } + + $string = $alts[$formWord]; + $alts[$formWord] = str_replace( $ph, $value, $string ); + } + } + + // Replace other variables. + foreach ( $alts as &$value ) { + $value = str_replace( array_keys( $placeholders ), array_values( $placeholders ), $value ); + } + + if ( !isset( $alts["$key.other"] ) ) { + wfWarn( "Other not set for key $key" ); + + return false; + } + + return $alts; + } +} diff --git a/www/wiki/extensions/Translate/ffs/SimpleFFS.php b/www/wiki/extensions/Translate/ffs/SimpleFFS.php new file mode 100644 index 00000000..53bc9b71 --- /dev/null +++ b/www/wiki/extensions/Translate/ffs/SimpleFFS.php @@ -0,0 +1,379 @@ +<?php +/** + * File format support classes. + * + * @file + * @author Niklas Laxström + */ + +/** + * A very basic FFS module that implements some basic functionality and + * a simple binary based file format. + * Other FFS classes can extend SimpleFFS and override suitable methods. + * @ingroup FFS + */ + +use UtfNormal\Validator; + +class SimpleFFS implements FFS { + public function supportsFuzzy() { + return 'no'; + } + + public function getFileExtensions() { + return []; + } + + /** + * @var FileBasedMessageGroup + */ + protected $group; + + protected $writePath; + + /** + * Stores the FILES section of the YAML configuration, + * which can be accessed for extra FFS class specific options. + */ + protected $extra; + + const RECORD_SEPARATOR = "\0"; + const PART_SEPARATOR = "\0\0\0\0"; + + public function __construct( FileBasedMessageGroup $group ) { + $this->setGroup( $group ); + $conf = $group->getConfiguration(); + $this->extra = $conf['FILES']; + } + + /** + * @param FileBasedMessageGroup $group + */ + public function setGroup( FileBasedMessageGroup $group ) { + $this->group = $group; + } + + /** + * @return FileBasedMessageGroup + */ + public function getGroup() { + return $this->group; + } + + /** + * @param string $writePath + */ + public function setWritePath( $writePath ) { + $this->writePath = $writePath; + } + + /** + * @return string + */ + public function getWritePath() { + return $this->writePath; + } + + /** + * Returns true if the file for this message group in a given language + * exists. If no $code is given, the groups source language is assumed. + * NB: Some formats store all languages in the same file, and then this + * function will return true even if there are no translations to that + * language. + * + * @param string|bool $code + * @return bool + */ + public function exists( $code = false ) { + if ( $code === false ) { + $code = $this->group->getSourceLanguage(); + } + + $filename = $this->group->getSourceFilePath( $code ); + if ( $filename === null ) { + return false; + } + + return file_exists( $filename ); + } + + /** + * Reads messages from the file in a given language and returns an array + * of AUTHORS, MESSAGES and possibly other properties. + * + * @param string $code Language code. + * @return array|bool False if the file does not exist + * @throws MWException if the file is not readable or has bad encoding + */ + public function read( $code ) { + if ( !$this->exists( $code ) ) { + return false; + } + + $filename = $this->group->getSourceFilePath( $code ); + $input = file_get_contents( $filename ); + if ( $input === false ) { + throw new MWException( "Unable to read file $filename." ); + } + + if ( !StringUtils::isUtf8( $input ) ) { + throw new MWException( "Contents of $filename are not valid utf-8." ); + } + + $input = Validator::cleanUp( $input ); + + try { + return $this->readFromVariable( $input ); + } catch ( Exception $e ) { + throw new MWException( "Parsing $filename failed: " . $e->getMessage() ); + } + } + + /** + * Parse the message data given as a string in the SimpleFFS format + * and return it as an array of AUTHORS and MESSAGES. + * + * @param string $data + * @return array Parsed data. + * @throws MWException + */ + public function readFromVariable( $data ) { + $parts = explode( self::PART_SEPARATOR, $data ); + + if ( count( $parts ) !== 2 ) { + throw new MWException( 'Wrong number of parts.' ); + } + + list( $authorsPart, $messagesPart ) = $parts; + $authors = explode( self::RECORD_SEPARATOR, $authorsPart ); + $messages = []; + + foreach ( explode( self::RECORD_SEPARATOR, $messagesPart ) as $line ) { + if ( $line === '' ) { + continue; + } + + $lineParts = explode( '=', $line, 2 ); + + if ( count( $lineParts ) !== 2 ) { + throw new MWException( "Wrong number of parts in line $line." ); + } + + list( $key, $message ) = $lineParts; + $key = trim( $key ); + $messages[$key] = $message; + } + + $messages = $this->group->getMangler()->mangle( $messages ); + + return [ + 'AUTHORS' => $authors, + 'MESSAGES' => $messages, + ]; + } + + /** + * Write the collection to file. + * + * @param MessageCollection $collection + * @throws MWException + */ + public function write( MessageCollection $collection ) { + $writePath = $this->writePath; + + if ( $writePath === null ) { + throw new MWException( 'Write path is not set.' ); + } + + if ( !file_exists( $writePath ) ) { + throw new MWException( "Write path '$writePath' does not exist." ); + } + + if ( !is_writable( $writePath ) ) { + throw new MWException( "Write path '$writePath' is not writable." ); + } + + $targetFile = $writePath . '/' . $this->group->getTargetFilename( $collection->code ); + + $targetFileExists = file_exists( $targetFile ); + + if ( $targetFileExists ) { + $this->tryReadSource( $targetFile, $collection ); + } else { + $sourceFile = $this->group->getSourceFilePath( $collection->code ); + $this->tryReadSource( $sourceFile, $collection ); + } + + $output = $this->writeReal( $collection ); + if ( !$output ) { + return; + } + + // Some file formats might have changing parts, such as timestamp. + // This allows the file handler to skip updating files, where only + // the timestamp would change. + if ( $targetFileExists ) { + $oldContent = $this->tryReadFile( $targetFile ); + if ( !$this->shouldOverwrite( $oldContent, $output ) ) { + return; + } + } + + wfMkdirParents( dirname( $targetFile ), null, __METHOD__ ); + file_put_contents( $targetFile, $output ); + } + + /** + * Read a collection and return it as a SimpleFFS formatted string. + * + * @param MessageCollection $collection + * @return string + */ + public function writeIntoVariable( MessageCollection $collection ) { + $sourceFile = $this->group->getSourceFilePath( $collection->code ); + $this->tryReadSource( $sourceFile, $collection ); + + return $this->writeReal( $collection ); + } + + /** + * @param MessageCollection $collection + * @return string + */ + protected function writeReal( MessageCollection $collection ) { + $output = ''; + + $authors = $collection->getAuthors(); + $authors = $this->filterAuthors( $authors, $collection->code ); + + $output .= implode( self::RECORD_SEPARATOR, $authors ); + $output .= self::PART_SEPARATOR; + + $mangler = $this->group->getMangler(); + + /** @var TMessage $m */ + foreach ( $collection as $key => $m ) { + $key = $mangler->unmangle( $key ); + $trans = $m->translation(); + $output .= "$key=$trans" . self::RECORD_SEPARATOR; + } + + return $output; + } + + /** + * This tries to pick up external authors in the source files so that they + * are not lost if those authors are not among those who have translated in + * the wiki. + * + * @todo Get rid of this + * @param string $filename + * @param MessageCollection $collection + */ + protected function tryReadSource( $filename, MessageCollection $collection ) { + if ( get_class( $this->group->getFFS() ) !== get_class( $this ) ) { + return; + } + + $sourceText = $this->tryReadFile( $filename ); + + // No need to do anything in SimpleFFS if it's false, + // it only reads author data from it. + if ( $sourceText !== false ) { + $sourceData = $this->readFromVariable( $sourceText ); + + if ( isset( $sourceData['AUTHORS'] ) ) { + $collection->addCollectionAuthors( $sourceData['AUTHORS'] ); + } + } + } + + /** + * Read the contents of $filename and return it as a string. + * Return false if the file doesn't exist. + * Throw an exception if the file isn't readable + * or if the reading fails strangely. + * + * @param string $filename + * @return bool|string + * @throws MWException + */ + protected function tryReadFile( $filename ) { + if ( !$filename ) { + return false; + } + + if ( !file_exists( $filename ) ) { + return false; + } + + if ( !is_readable( $filename ) ) { + throw new MWException( "File $filename is not readable." ); + } + + $data = file_get_contents( $filename ); + if ( $data === false ) { + throw new MWException( "Unable to read file $filename." ); + } + + return $data; + } + + /** + * Remove blacklisted authors. + * + * @param array $authors + * @param string $code + * @return array + */ + protected function filterAuthors( array $authors, $code ) { + global $wgTranslateAuthorBlacklist; + $groupId = $this->group->getId(); + + foreach ( $authors as $i => $v ) { + $hash = "$groupId;$code;$v"; + + $blacklisted = false; + foreach ( $wgTranslateAuthorBlacklist as $rule ) { + list( $type, $regex ) = $rule; + + if ( preg_match( $regex, $hash ) ) { + if ( $type === 'white' ) { + $blacklisted = false; + break; + } else { + $blacklisted = true; + } + } + } + + if ( $blacklisted ) { + unset( $authors[$i] ); + } + } + + return $authors; + } + + /** + * Replaces all Windows and Mac line endings with Unix line endings. + * This is needed in some file types. + * + * @param string $data + * @return string + */ + public static function fixNewLines( $data ) { + $data = str_replace( "\r\n", "\n", $data ); + $data = str_replace( "\r", "\n", $data ); + + return $data; + } + + public function isContentEqual( $a, $b ) { + return $a === $b; + } + + public function shouldOverwrite( $a, $b ) { + return true; + } +} diff --git a/www/wiki/extensions/Translate/ffs/XliffFFS.php b/www/wiki/extensions/Translate/ffs/XliffFFS.php new file mode 100644 index 00000000..cef5d5bb --- /dev/null +++ b/www/wiki/extensions/Translate/ffs/XliffFFS.php @@ -0,0 +1,192 @@ +<?php +/** + * Partial support for the Xliff translation format. + * + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +/** + * Partial support for the Xliff translation format. + * @since 2013-04 + * @ingroup FFS + */ +class XliffFFS extends SimpleFFS { + public static function isValid( $data ) { + $doc = new DomDocument( '1.0' ); + $doc->loadXML( $data ); + + $errors = libxml_get_errors(); + if ( $errors ) { + return false; + } + + if ( strpos( $data, 'version="1.2">' ) !== false ) { + $schema = __DIR__ . '/../data/xliff-core-1.2-transitional.xsd'; + if ( !$doc->schemaValidate( $schema ) ) { + return false; + } + } + + return true; + } + + public function getFileExtensions() { + return [ '.xlf', '.xliff', '.xml' ]; + } + + /** + * @param string $data + * @param string $element + * @return array Parsed data. + */ + public function readFromVariable( $data, $element = 'target' ) { + $messages = []; + $mangler = $this->group->getMangler(); + + $reader = new SimpleXMLElement( $data ); + $reader->registerXPathNamespace( + 'xliff', + 'urn:oasis:names:tc:xliff:document:1.2' + ); + + $items = array_merge( + $reader->xpath( '//trans-unit' ), + $reader->xpath( '//xliff:trans-unit' ) + ); + + foreach ( $items as $item ) { + /** @var SimpleXMLElement $source */ + $source = $item->$element; + + if ( !$source ) { + continue; + } + + $key = (string)$item['id']; + + /* In case there are tags inside the element, preserve + * them. */ + $dom = new DOMDocument( '1.0' ); + $dom->loadXML( $source->asXML() ); + $value = self::getInnerXml( $dom->documentElement ); + + /* This might not be 100% according to the spec, but + * for now if there is explicit approved=no, mark it + * as fuzzy, but don't do that if the attribute is not + * set */ + if ( (string)$source['state'] === 'needs-l10n' ) { + $value = TRANSLATE_FUZZY . $value; + } + + // Strip CDATA if present + $value = preg_replace( '/<!\[CDATA\[(.*?)\]\]>/s', '\1', $value ); + + $messages[$key] = $value; + } + + return [ + 'MESSAGES' => $mangler->mangle( $messages ), + ]; + } + + /** + * @param string $code Language code. + * @return array|bool + * @throws MWException + */ + public function read( $code ) { + if ( !$this->exists( $code ) ) { + return false; + } + + $filename = $this->group->getSourceFilePath( $code ); + $input = file_get_contents( $filename ); + if ( $input === false ) { + throw new MWException( "Unable to read file $filename." ); + } + + $element = $code === $this->group->getSourceLanguage() ? 'source' : 'target'; + + return $this->readFromVariable( $input, $element ); + } + + /** + * Gets the html inside en element without the element itself. + * + * @param DomElement $node + * @return string + */ + public static function getInnerXml( DomElement $node ) { + $text = ''; + foreach ( $node->childNodes as $child ) { + $text .= $child->ownerDocument->saveXML( $child ); + } + + return $text; + } + + protected function writeReal( MessageCollection $collection ) { + $mangler = $this->group->getMangler(); + + $template = new DomDocument( '1.0' ); + $template->preserveWhiteSpace = false; + $template->formatOutput = true; + + // Try to use the definition file as template + $sourceLanguage = $this->group->getSourceLanguage(); + $sourceFile = $this->group->getSourceFilePath( $sourceLanguage ); + if ( file_exists( $sourceFile ) ) { + $template->load( $sourceFile ); + } else { + // Else use standard template + $template->load( __DIR__ . '/../data/xliff-template.xml' ); + } + + $list = $template->getElementsByTagName( 'body' )->item( 0 ); + $list->nodeValue = null; + + /** @var TMessage $m */ + foreach ( $collection as $key => $m ) { + $key = $mangler->unmangle( $key ); + + $value = $m->translation(); + $value = str_replace( TRANSLATE_FUZZY, '', $value ); + + // @todo Support placeholder tags etc. + $source = $template->createDocumentFragment(); + $source->appendXML( htmlspecialchars( $m->definition() ) ); + + $target = $template->createDocumentFragment(); + $target->appendXML( htmlspecialchars( $value ) ); + + $sourceElement = $template->createElement( 'source' ); + $sourceElement->appendChild( $source ); + + $targetElement = $template->createElement( 'target' ); + $targetElement->appendChild( $target ); + if ( $m->getProperty( 'status' ) === 'fuzzy' ) { + $targetElement->setAttribute( 'state', 'needs-l10n' ); + } + if ( $m->getProperty( 'status' ) === 'proofread' ) { + $targetElement->setAttribute( 'state', 'signed-off' ); + } + + $transUnit = $template->createElement( 'trans-unit' ); + $transUnit->setAttribute( 'id', $key ); + $transUnit->appendChild( $sourceElement ); + $transUnit->appendChild( $targetElement ); + + $list->appendChild( $transUnit ); + } + + $template->encoding = 'UTF-8'; + + return $template->saveXML(); + } + + public function supportsFuzzy() { + return 'yes'; + } +} diff --git a/www/wiki/extensions/Translate/ffs/YamlFFS.php b/www/wiki/extensions/Translate/ffs/YamlFFS.php new file mode 100644 index 00000000..aaa93702 --- /dev/null +++ b/www/wiki/extensions/Translate/ffs/YamlFFS.php @@ -0,0 +1,200 @@ +<?php + +/** + * Implements support for message storage in YAML format. + * + * This class adds new key into FILES section: \c codeAsRoot. + * If it is set to true, all messages will under language code. + * @ingroup FFS + */ +class YamlFFS extends SimpleFFS implements MetaYamlSchemaExtender { + /** + * @param FileBasedMessageGroup $group + */ + public function __construct( FileBasedMessageGroup $group ) { + parent::__construct( $group ); + $this->flattener = $this->getFlattener(); + } + + public function getFileExtensions() { + return [ '.yaml', '.yml' ]; + } + + /** + * @param string $data + * @return array Parsed data. + */ + public function readFromVariable( $data ) { + // Authors first. + $matches = []; + preg_match_all( '/^#\s*Author:\s*(.*)$/m', $data, $matches ); + $authors = $matches[1]; + + // Then messages. + $messages = TranslateYaml::loadString( $data ); + + // Some groups have messages under language code + if ( isset( $this->extra['codeAsRoot'] ) ) { + $messages = array_shift( $messages ); + } + + $messages = $this->flatten( $messages ); + $messages = $this->group->getMangler()->mangle( $messages ); + foreach ( $messages as &$value ) { + $value = rtrim( $value, "\n" ); + } + + return [ + 'AUTHORS' => $authors, + 'MESSAGES' => $messages, + ]; + } + + /** + * @param MessageCollection $collection + * @return string + */ + protected function writeReal( MessageCollection $collection ) { + $output = $this->doHeader( $collection ); + $output .= $this->doAuthors( $collection ); + + $mangler = $this->group->getMangler(); + + $messages = []; + /** + * @var $m TMessage + */ + foreach ( $collection as $key => $m ) { + $key = $mangler->unmangle( $key ); + $value = $m->translation(); + $value = str_replace( TRANSLATE_FUZZY, '', $value ); + + if ( $value === '' ) { + continue; + } + + $messages[$key] = $value; + } + + if ( !count( $messages ) ) { + return false; + } + + $messages = $this->unflatten( $messages ); + + // Some groups have messages under language code. + if ( isset( $this->extra['codeAsRoot'] ) ) { + $code = $this->group->mapCode( $collection->code ); + $messages = [ $code => $messages ]; + } + + $output .= TranslateYaml::dump( $messages ); + + return $output; + } + + /** + * @param MessageCollection $collection + * @return string + */ + protected function doHeader( MessageCollection $collection ) { + global $wgSitename; + global $wgTranslateYamlLibrary; + + $code = $collection->code; + $name = TranslateUtils::getLanguageName( $code ); + $native = TranslateUtils::getLanguageName( $code, $code ); + $output = "# Messages for $name ($native)\n"; + $output .= "# Exported from $wgSitename\n"; + + if ( isset( $wgTranslateYamlLibrary ) ) { + $output .= "# Export driver: $wgTranslateYamlLibrary\n"; + } + + return $output; + } + + /** + * @param MessageCollection $collection + * @return string + */ + protected function doAuthors( MessageCollection $collection ) { + $output = ''; + $authors = $collection->getAuthors(); + $authors = $this->filterAuthors( $authors, $collection->code ); + + foreach ( $authors as $author ) { + $output .= "# Author: $author\n"; + } + + return $output; + } + + /** + * Obtains object used to flatten and unflatten arrays. In this implementation + * we use the ArrayFlattener class which also supports CLDR pluralization rules. + * + * @return object with flatten, unflatten methods + */ + protected function getFlattener() { + $nestingSeparator = $this->extra['nestingSeparator'] ?? '.'; + $parseCLDRPlurals = $this->extra['parseCLDRPlurals'] ?? false; + + // Instantiate helper class for flattening and unflattening nested arrays + return new ArrayFlattener( $nestingSeparator, $parseCLDRPlurals ); + } + + /** + * Flattens multidimensional array by using the path to the value as key + * with each individual key separated by a dot. + * + * @param array $messages + * + * @return array + */ + protected function flatten( $messages ) { + return $this->flattener->flatten( $messages ); + } + + /** + * Performs the reverse operation of flatten. Each dot (or custom separator) + * in the key starts a new subarray in the final array. + * + * @param array $messages + * + * @return array + */ + protected function unflatten( $messages ) { + return $this->flattener->unflatten( $messages ); + } + + public function isContentEqual( $a, $b ) { + return $this->flattener->compareContent( $a, $b ); + } + + public static function getExtraSchema() { + $schema = [ + 'root' => [ + '_type' => 'array', + '_children' => [ + 'FILES' => [ + '_type' => 'array', + '_children' => [ + 'codeAsRoot' => [ + '_type' => 'boolean', + ], + 'nestingSeparator' => [ + '_type' => 'text', + ], + 'parseCLDRPlurals' => [ + '_type' => 'boolean', + ] + ] + ] + ] + ] + ]; + + return $schema; + } +} |