diff options
author | Yaco <franco@reevo.org> | 2020-06-04 11:01:00 -0300 |
---|---|---|
committer | Yaco <franco@reevo.org> | 2020-06-04 11:01:00 -0300 |
commit | fc7369835258467bf97eb64f184b93691f9a9fd5 (patch) | |
tree | daabd60089d2dd76d9f5fb416b005fbe159c799d /www/wiki/extensions/Translate/stringmangler |
first commit
Diffstat (limited to 'www/wiki/extensions/Translate/stringmangler')
-rw-r--r-- | www/wiki/extensions/Translate/stringmangler/StringMangler.php | 49 | ||||
-rw-r--r-- | www/wiki/extensions/Translate/stringmangler/StringMatcher.php | 259 |
2 files changed, 308 insertions, 0 deletions
diff --git a/www/wiki/extensions/Translate/stringmangler/StringMangler.php b/www/wiki/extensions/Translate/stringmangler/StringMangler.php new file mode 100644 index 00000000..6e0e1575 --- /dev/null +++ b/www/wiki/extensions/Translate/stringmangler/StringMangler.php @@ -0,0 +1,49 @@ +<?php +/** + * StringMangler interface. + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +/** + * Interface that key-mangling classes must implement. Mangling is done to: + * - converting characters which would be invalid in titles to something valid + * - prefixing a set of messages to avoid conflicts when sharing a namespace + * with multiple message groups. + * + * The operations have to be reversible so that + * x equals unmangle( mangle( x ) ). + */ +interface StringMangler { + /// @todo Does this really need to be in the interface??? + public static function EmptyMatcher(); + + /** + * General way to pass configuration to the mangler. + * @param array $configuration + */ + public function setConf( $configuration ); + + /** + * Match strings against a pattern. + * If string matches, mangle() should mangle the key. + * @param string $string Message key. + * @return bool + */ + public function match( $string ); + + /** + * Mangles a list of message keys. + * @param string|string[] $data Unmangled message keys. + * @return string|string[] Mangled message keys. + */ + public function mangle( $data ); + + /** + * Reverses the operation mangle() did. + * @param string|string[] $data Mangled message keys. + * @return string|string[] Umangled message keys. + */ + public function unmangle( $data ); +} diff --git a/www/wiki/extensions/Translate/stringmangler/StringMatcher.php b/www/wiki/extensions/Translate/stringmangler/StringMatcher.php new file mode 100644 index 00000000..e238062d --- /dev/null +++ b/www/wiki/extensions/Translate/stringmangler/StringMatcher.php @@ -0,0 +1,259 @@ +<?php +/** + * Default StringMangler implementation. + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +/** + * The versatile default implementation of StringMangler interface. + * It supports exact matches and patterns with any-wildcard (*). + * All matching strings are prefixed with the same prefix. + */ +class StringMatcher implements StringMangler, MetaYamlSchemaExtender { + /** @var string Prefix for mangled message keys */ + protected $sPrefix = ''; + /** @var string[] Exact message keys */ + protected $aExact = []; + /** @var string[] Patterns of type foo* */ + protected $aPrefix = []; + /** @var string[] Patterns that contain wildcard anywhere else than in the end */ + protected $aRegex = []; + + /** + * Alias for making NO-OP string mangler. + * + * @return self + */ + public static function EmptyMatcher() { + return new self(); + } + + /** + * Constructor, see EmptyMatcher(); + * + * @param string $prefix + * @param array $patterns + */ + public function __construct( $prefix = '', array $patterns = [] ) { + $this->sPrefix = $prefix; + $this->init( $patterns ); + } + + protected static function getValidKeyChars() { + static $valid = null; + if ( $valid === null ) { + global $wgLegalTitleChars; + $valid = strtr( $wgLegalTitleChars, [ + '=' => '', // equals sign, which is itself usef for escaping + '&' => '', // ampersand, for entities + '%' => '', // percent sign, which is used in URL encoding + ] ); + } + + return $valid; + } + + public function setConf( $conf ) { + $this->sPrefix = $conf['prefix']; + $this->init( $conf['patterns'] ); + } + + /** + * Preprocesses the patterns. + * They are split into exact keys, prefix matches and pattern matches to + * speed up matching process. + * @param string[] $strings Key patterns. + */ + protected function init( array $strings ) { + foreach ( $strings as $string ) { + $pos = strpos( $string, '*' ); + if ( $pos === false ) { + $this->aExact[] = $string; + } elseif ( $pos + 1 === strlen( $string ) ) { + $prefix = substr( $string, 0, -1 ); + $this->aPrefix[$prefix] = strlen( $prefix ); + } else { + $string = str_replace( '\\*', '.+', preg_quote( $string, '/' ) ); + $this->aRegex[] = "/^$string$/"; + } + } + } + + /** + * @param string $string + * @return bool + */ + public function match( $string ) { + if ( in_array( $string, $this->aExact ) ) { + return true; + } + + foreach ( $this->aPrefix as $prefix => $len ) { + if ( strncmp( $string, $prefix, $len ) === 0 ) { + return true; + } + } + + foreach ( $this->aRegex as $regex ) { + if ( preg_match( $regex, $string ) ) { + return true; + } + } + + return false; + } + + /** + * Mangles the input. Input can either be a plain string, a list of strings + * or an associative array. In the last case the keys of the array are + * mangled. + * + * @param string|string[]|array $data + * @return string|string[]|array + * @throws MWException + */ + public function mangle( $data ) { + if ( is_array( $data ) ) { + return $this->mangleArray( $data ); + } elseif ( is_string( $data ) ) { + return $this->mangleString( $data ); + } elseif ( $data === null ) { + return $data; + } else { + throw new MWException( __METHOD__ . ': Unsupported datatype' ); + } + } + + public function unmangle( $data ) { + if ( is_array( $data ) ) { + return $this->mangleArray( $data, true ); + } elseif ( is_string( $data ) ) { + return $this->mangleString( $data, true ); + } elseif ( $data === null ) { + return $data; + } else { + throw new MWException( __METHOD__ . ': Unsupported datatype' ); + } + } + + /** + * Mangles or unmangles single string. + * @param string $string Message key. + * @param bool $reverse Direction of mangling or unmangling. + * @return string + */ + protected function mangleString( $string, $reverse = false ) { + if ( $reverse ) { + return $this->unMangleString( $string ); + } + + if ( $this->match( $string ) ) { + $string = $this->sPrefix . $string; + } + + $escaper = function ( $match ) { + $esc = ''; + foreach ( str_split( $match[ 0 ] ) as $c ) { + $esc .= '=' . sprintf( '%02X', ord( $c ) ); + } + return $esc; + }; + + // Apply a "quoted-printable"-like escaping + $valid = self::getValidKeyChars(); + $string = preg_replace_callback( "/[^$valid]/", $escaper, $string ); + // Additional limitations in MediaWiki, see MediaWikiTitleCodec::splitTitleString + $string = preg_replace_callback( '/(~~~|^[ _]|[ _]$|[ _]{2,}|^:)/', $escaper, $string ); + // TODO: length check + truncation + // TODO: forbid path travels + + return $string; + } + + /** + * Unmangles the message key by removing the prefix it it exists. + * @param string $string Message key. + * @return string Unmangled message key. + */ + protected function unMangleString( $string ) { + // Unescape the "quoted-printable"-like escaping, + // which is applied in mangleString. + $unescapedString = preg_replace_callback( '/=([A-F0-9]{2})/', + function ( $match ) { + return chr( hexdec( $match[0] ) ); + }, + $string + ); + + if ( strncmp( $unescapedString, $this->sPrefix, strlen( $this->sPrefix ) ) === 0 ) { + return substr( $unescapedString, strlen( $this->sPrefix ) ); + } else { + return $unescapedString; + } + } + + /** + * Mangles or unmangles list of strings. If an associative array is given, + * the keys of the array will be mangled. For lists the values are mangled. + * + * @param string[]|array $array Strings. + * @param bool $reverse Direction of mangling or unmangling. + * @return string[]|array (Un)mangled strings. + */ + protected function mangleArray( array $array, $reverse = false ) { + $temp = []; + + if ( !$this->isAssoc( $array ) ) { + foreach ( $array as $key => &$value ) { + $value = $this->mangleString( $value, $reverse ); + $temp[$key] = $value; // Assign a reference + } + } else { + foreach ( $array as $key => &$value ) { + $key = $this->mangleString( $key, $reverse ); + $temp[$key] = $value; // Assign a reference + } + } + + return $temp; + } + + protected function isAssoc( array $array ) { + $assoc = (bool)count( array_filter( array_keys( $array ), 'is_string' ) ); + if ( $assoc ) { + return true; + } + + // Also check that the indexing starts from zero + return !array_key_exists( 0, $array ); + } + + public static function getExtraSchema() { + $schema = [ + 'root' => [ + '_type' => 'array', + '_children' => [ + 'MANGLER' => [ + '_type' => 'array', + '_children' => [ + 'prefix' => [ + '_type' => 'text', + '_not_empty' => true + ], + 'patterns' => [ + '_type' => 'array', + '_required' => true, + '_ignore_extra_keys' => true, + '_children' => [], + ], + ] + ] + ] + ] + ]; + + return $schema; + } +} |