summaryrefslogtreecommitdiff
path: root/www/wiki/extensions/Translate/utils/ArrayFlattener.php
diff options
context:
space:
mode:
Diffstat (limited to 'www/wiki/extensions/Translate/utils/ArrayFlattener.php')
-rw-r--r--www/wiki/extensions/Translate/utils/ArrayFlattener.php297
1 files changed, 297 insertions, 0 deletions
diff --git a/www/wiki/extensions/Translate/utils/ArrayFlattener.php b/www/wiki/extensions/Translate/utils/ArrayFlattener.php
new file mode 100644
index 00000000..c5e61769
--- /dev/null
+++ b/www/wiki/extensions/Translate/utils/ArrayFlattener.php
@@ -0,0 +1,297 @@
+<?php
+/**
+ * Flattens message arrays for further processing. Supports parsing CLDR
+ * plural messages and converting them into MediaWiki's {{PLURAL}} syntax
+ * in a single message.
+ *
+ * @file
+ * @author Niklas Laxström
+ * @author Erik Moeller
+ * @license GPL-2.0-or-later
+ * @since 2016.01
+ */
+
+class ArrayFlattener {
+ protected $sep;
+ protected $parseCLDRPlurals;
+
+ // For CLDR pluralization rules
+ protected static $pluralWords = [
+ 'zero' => 1,
+ 'one' => 1,
+ 'many' => 1,
+ 'few' => 1,
+ 'other' => 1,
+ 'two' => 1
+ ];
+
+ public function __construct( $sep = '.', $parseCLDRPlurals = false ) {
+ $this->sep = $sep;
+ $this->parseCLDRPlurals = $parseCLDRPlurals;
+ }
+
+ /**
+ * Flattens multidimensional array.
+ *
+ * @param array $unflat Array of messages
+ * @return array
+ */
+ public function flatten( array $unflat ) {
+ $flat = [];
+
+ foreach ( $unflat as $key => $value ) {
+ if ( !is_array( $value ) ) {
+ $flat[$key] = $value;
+ continue;
+ }
+
+ $plurals = false;
+ if ( $this->parseCLDRPlurals ) {
+ $plurals = $this->flattenCLDRPlurals( $value );
+ }
+
+ if ( $this->parseCLDRPlurals && $plurals ) {
+ $flat[$key] = $plurals;
+ } else {
+ $temp = [];
+ foreach ( $value as $subKey => $subValue ) {
+ $newKey = "$key{$this->sep}$subKey";
+ $temp[$newKey] = $subValue;
+ }
+ $flat += $this->flatten( $temp );
+ }
+
+ // Can as well keep only one copy around.
+ unset( $unflat[$key] );
+ }
+
+ return $flat;
+ }
+
+ /**
+ * Flattens arrays that contain CLDR plural keywords into single values using
+ * MediaWiki's plural syntax.
+ *
+ * @param array $messages Array of messages
+ *
+ * @throws MWException
+ * @return bool|string
+ */
+ public function flattenCLDRPlurals( $messages ) {
+ $pluralKeys = false;
+ $nonPluralKeys = false;
+ foreach ( $messages as $key => $value ) {
+ if ( is_array( $value ) ) {
+ // Plurals can only happen in the lowest level of the structure
+ return false;
+ }
+
+ // Check if we find any reserved plural keyword
+ if ( isset( self::$pluralWords[$key] ) ) {
+ $pluralKeys = true;
+ } else {
+ $nonPluralKeys = true;
+ }
+ }
+
+ // No plural keys at all, we can skip
+ if ( !$pluralKeys ) {
+ return false;
+ }
+
+ // Mixed plural keys with other keys, should not happen
+ if ( $nonPluralKeys ) {
+ $keys = implode( ', ', array_keys( $messages ) );
+ throw new MWException( "Reserved plural keywords mixed with other keys: $keys." );
+ }
+
+ $pls = '{{PLURAL';
+ foreach ( $messages as $key => $value ) {
+ if ( $key === 'other' ) {
+ continue;
+ }
+
+ $pls .= "|$key=$value";
+ }
+
+ // Put the "other" alternative last, without other= prefix.
+ $other = isset( $messages['other'] ) ? '|' . $messages['other'] : '';
+ $pls .= "$other}}";
+
+ return $pls;
+ }
+
+ /**
+ * Performs the reverse operation of flatten.
+ *
+ * @param array $flat Array of messages
+ * @return array
+ */
+ public function unflatten( $flat ) {
+ $unflat = [];
+
+ if ( $this->parseCLDRPlurals ) {
+ $unflattenedPlurals = [];
+ foreach ( $flat as $key => $value ) {
+ $plurals = false;
+ if ( !is_array( $value ) ) {
+ $plurals = $this->unflattenCLDRPlurals( $key, $value );
+ }
+ if ( $plurals ) {
+ $unflattenedPlurals += $plurals;
+ } else {
+ $unflattenedPlurals[$key] = $value;
+ }
+ }
+ $flat = $unflattenedPlurals;
+ }
+
+ foreach ( $flat as $key => $value ) {
+ $path = explode( $this->sep, $key );
+ if ( count( $path ) === 1 ) {
+ $unflat[$key] = $value;
+ continue;
+ }
+
+ $pointer = &$unflat;
+ do {
+ /// Extract the level and make sure it exists.
+ $level = array_shift( $path );
+ if ( !isset( $pointer[$level] ) ) {
+ $pointer[$level] = [];
+ }
+
+ /// Update the pointer to the new reference.
+ $tmpPointer = &$pointer[$level];
+ unset( $pointer );
+ $pointer = &$tmpPointer;
+ unset( $tmpPointer );
+
+ /// If next level is the last, add it into the array.
+ if ( count( $path ) === 1 ) {
+ $lastKey = array_shift( $path );
+ $pointer[$lastKey] = $value;
+ }
+ } while ( count( $path ) );
+ }
+
+ return $unflat;
+ }
+
+ /**
+ * Converts the MediaWiki plural syntax to array of CLDR style plurals
+ *
+ * @param string $key Message key prefix
+ * @param string $message The plural string
+ *
+ * @return bool|array
+ */
+ public function unflattenCLDRPlurals( $key, $message ) {
+ // Quick escape.
+ if ( strpos( $message, '{{PLURAL' ) === false ) {
+ return false;
+ }
+
+ /*
+ * Replace all variables with placeholders. Possible source of bugs
+ * if other characters that given below are used.
+ */
+ $regex = '~\{[a-zA-Z_-]+}~';
+ $placeholders = [];
+ $match = [];
+
+ while ( preg_match( $regex, $message, $match ) ) {
+ $uniqkey = TranslateUtils::getPlaceholder();
+ $placeholders[$uniqkey] = $match[0];
+ $search = preg_quote( $match[0], '~' );
+ $message = preg_replace( "~$search~", $uniqkey, $message );
+ }
+
+ // Then replace (possible multiple) plural instances into placeholders.
+ $regex = '~\{\{PLURAL\|(.*?)}}~s';
+ $matches = [];
+ $match = [];
+
+ while ( preg_match( $regex, $message, $match ) ) {
+ $uniqkey = TranslateUtils::getPlaceholder();
+ $matches[$uniqkey] = $match;
+ $message = preg_replace( $regex, $uniqkey, $message, 1 );
+ }
+
+ // No plurals, should not happen.
+ if ( !count( $matches ) ) {
+ return false;
+ }
+
+ // The final array of alternative plurals forms.
+ $alts = [];
+
+ /*
+ * Then loop trough each plural block and replacing the placeholders
+ * to construct the alternatives. Produces invalid output if there is
+ * multiple plural bocks which don't have the same set of keys.
+ */
+ $pluralChoice = implode( '|', array_keys( self::$pluralWords ) );
+ $regex = "~($pluralChoice)\s*=\s*(.+)~s";
+ foreach ( $matches as $ph => $plu ) {
+ $forms = explode( '|', $plu[1] );
+
+ foreach ( $forms as $form ) {
+ if ( $form === '' ) {
+ continue;
+ }
+
+ $match = [];
+ if ( preg_match( $regex, $form, $match ) ) {
+ $formWord = "$key{$this->sep}{$match[1]}";
+ $value = $match[2];
+ } else {
+ $formWord = "$key{$this->sep}other";
+ $value = $form;
+ }
+
+ if ( !isset( $alts[$formWord] ) ) {
+ $alts[$formWord] = $message;
+ }
+
+ $string = $alts[$formWord];
+ $alts[$formWord] = str_replace( $ph, $value, $string );
+ }
+ }
+
+ // Replace other variables.
+ foreach ( $alts as &$value ) {
+ $value = str_replace( array_keys( $placeholders ), array_values( $placeholders ), $value );
+ }
+
+ if ( !isset( $alts["$key{$this->sep}other"] ) ) {
+ wfWarn( "Other not set for key $key" );
+ }
+
+ return $alts;
+ }
+
+ /**
+ * Compares two strings for equal content, taking PLURAL expansion into account.
+ *
+ * @param string $a
+ * @param string $b
+ * @return bool Whether two strings are equal
+ */
+ public function compareContent( $a, $b ) {
+ if ( !$this->parseCLDRPlurals ) {
+ return $a === $b;
+ }
+
+ $a2 = $this->unflattenCLDRPlurals( 'prefix', $a );
+ $b2 = $this->unflattenCLDRPlurals( 'prefix', $b );
+
+ // Fall back to regular comparison if parsing fails.
+ if ( $a2 === false || $b2 === false ) {
+ return $a === $b;
+ }
+
+ // Require key-value pairs to match, but ignore order and types (all should be strings).
+ return $a2 == $b2;
+ }
+}