summaryrefslogtreecommitdiff
path: root/www/wiki/extensions/Translate/stringmangler
diff options
context:
space:
mode:
authorYaco <franco@reevo.org>2020-06-04 11:01:00 -0300
committerYaco <franco@reevo.org>2020-06-04 11:01:00 -0300
commitfc7369835258467bf97eb64f184b93691f9a9fd5 (patch)
treedaabd60089d2dd76d9f5fb416b005fbe159c799d /www/wiki/extensions/Translate/stringmangler
first commit
Diffstat (limited to 'www/wiki/extensions/Translate/stringmangler')
-rw-r--r--www/wiki/extensions/Translate/stringmangler/StringMangler.php49
-rw-r--r--www/wiki/extensions/Translate/stringmangler/StringMatcher.php259
2 files changed, 308 insertions, 0 deletions
diff --git a/www/wiki/extensions/Translate/stringmangler/StringMangler.php b/www/wiki/extensions/Translate/stringmangler/StringMangler.php
new file mode 100644
index 00000000..6e0e1575
--- /dev/null
+++ b/www/wiki/extensions/Translate/stringmangler/StringMangler.php
@@ -0,0 +1,49 @@
+<?php
+/**
+ * StringMangler interface.
+ * @file
+ * @author Niklas Laxström
+ * @license GPL-2.0-or-later
+ */
+
+/**
+ * Interface that key-mangling classes must implement. Mangling is done to:
+ * - converting characters which would be invalid in titles to something valid
+ * - prefixing a set of messages to avoid conflicts when sharing a namespace
+ * with multiple message groups.
+ *
+ * The operations have to be reversible so that
+ * x equals unmangle( mangle( x ) ).
+ */
+interface StringMangler {
+ /// @todo Does this really need to be in the interface???
+ public static function EmptyMatcher();
+
+ /**
+ * General way to pass configuration to the mangler.
+ * @param array $configuration
+ */
+ public function setConf( $configuration );
+
+ /**
+ * Match strings against a pattern.
+ * If string matches, mangle() should mangle the key.
+ * @param string $string Message key.
+ * @return bool
+ */
+ public function match( $string );
+
+ /**
+ * Mangles a list of message keys.
+ * @param string|string[] $data Unmangled message keys.
+ * @return string|string[] Mangled message keys.
+ */
+ public function mangle( $data );
+
+ /**
+ * Reverses the operation mangle() did.
+ * @param string|string[] $data Mangled message keys.
+ * @return string|string[] Umangled message keys.
+ */
+ public function unmangle( $data );
+}
diff --git a/www/wiki/extensions/Translate/stringmangler/StringMatcher.php b/www/wiki/extensions/Translate/stringmangler/StringMatcher.php
new file mode 100644
index 00000000..e238062d
--- /dev/null
+++ b/www/wiki/extensions/Translate/stringmangler/StringMatcher.php
@@ -0,0 +1,259 @@
+<?php
+/**
+ * Default StringMangler implementation.
+ * @file
+ * @author Niklas Laxström
+ * @license GPL-2.0-or-later
+ */
+
+/**
+ * The versatile default implementation of StringMangler interface.
+ * It supports exact matches and patterns with any-wildcard (*).
+ * All matching strings are prefixed with the same prefix.
+ */
+class StringMatcher implements StringMangler, MetaYamlSchemaExtender {
+ /** @var string Prefix for mangled message keys */
+ protected $sPrefix = '';
+ /** @var string[] Exact message keys */
+ protected $aExact = [];
+ /** @var string[] Patterns of type foo* */
+ protected $aPrefix = [];
+ /** @var string[] Patterns that contain wildcard anywhere else than in the end */
+ protected $aRegex = [];
+
+ /**
+ * Alias for making NO-OP string mangler.
+ *
+ * @return self
+ */
+ public static function EmptyMatcher() {
+ return new self();
+ }
+
+ /**
+ * Constructor, see EmptyMatcher();
+ *
+ * @param string $prefix
+ * @param array $patterns
+ */
+ public function __construct( $prefix = '', array $patterns = [] ) {
+ $this->sPrefix = $prefix;
+ $this->init( $patterns );
+ }
+
+ protected static function getValidKeyChars() {
+ static $valid = null;
+ if ( $valid === null ) {
+ global $wgLegalTitleChars;
+ $valid = strtr( $wgLegalTitleChars, [
+ '=' => '', // equals sign, which is itself usef for escaping
+ '&' => '', // ampersand, for entities
+ '%' => '', // percent sign, which is used in URL encoding
+ ] );
+ }
+
+ return $valid;
+ }
+
+ public function setConf( $conf ) {
+ $this->sPrefix = $conf['prefix'];
+ $this->init( $conf['patterns'] );
+ }
+
+ /**
+ * Preprocesses the patterns.
+ * They are split into exact keys, prefix matches and pattern matches to
+ * speed up matching process.
+ * @param string[] $strings Key patterns.
+ */
+ protected function init( array $strings ) {
+ foreach ( $strings as $string ) {
+ $pos = strpos( $string, '*' );
+ if ( $pos === false ) {
+ $this->aExact[] = $string;
+ } elseif ( $pos + 1 === strlen( $string ) ) {
+ $prefix = substr( $string, 0, -1 );
+ $this->aPrefix[$prefix] = strlen( $prefix );
+ } else {
+ $string = str_replace( '\\*', '.+', preg_quote( $string, '/' ) );
+ $this->aRegex[] = "/^$string$/";
+ }
+ }
+ }
+
+ /**
+ * @param string $string
+ * @return bool
+ */
+ public function match( $string ) {
+ if ( in_array( $string, $this->aExact ) ) {
+ return true;
+ }
+
+ foreach ( $this->aPrefix as $prefix => $len ) {
+ if ( strncmp( $string, $prefix, $len ) === 0 ) {
+ return true;
+ }
+ }
+
+ foreach ( $this->aRegex as $regex ) {
+ if ( preg_match( $regex, $string ) ) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * Mangles the input. Input can either be a plain string, a list of strings
+ * or an associative array. In the last case the keys of the array are
+ * mangled.
+ *
+ * @param string|string[]|array $data
+ * @return string|string[]|array
+ * @throws MWException
+ */
+ public function mangle( $data ) {
+ if ( is_array( $data ) ) {
+ return $this->mangleArray( $data );
+ } elseif ( is_string( $data ) ) {
+ return $this->mangleString( $data );
+ } elseif ( $data === null ) {
+ return $data;
+ } else {
+ throw new MWException( __METHOD__ . ': Unsupported datatype' );
+ }
+ }
+
+ public function unmangle( $data ) {
+ if ( is_array( $data ) ) {
+ return $this->mangleArray( $data, true );
+ } elseif ( is_string( $data ) ) {
+ return $this->mangleString( $data, true );
+ } elseif ( $data === null ) {
+ return $data;
+ } else {
+ throw new MWException( __METHOD__ . ': Unsupported datatype' );
+ }
+ }
+
+ /**
+ * Mangles or unmangles single string.
+ * @param string $string Message key.
+ * @param bool $reverse Direction of mangling or unmangling.
+ * @return string
+ */
+ protected function mangleString( $string, $reverse = false ) {
+ if ( $reverse ) {
+ return $this->unMangleString( $string );
+ }
+
+ if ( $this->match( $string ) ) {
+ $string = $this->sPrefix . $string;
+ }
+
+ $escaper = function ( $match ) {
+ $esc = '';
+ foreach ( str_split( $match[ 0 ] ) as $c ) {
+ $esc .= '=' . sprintf( '%02X', ord( $c ) );
+ }
+ return $esc;
+ };
+
+ // Apply a "quoted-printable"-like escaping
+ $valid = self::getValidKeyChars();
+ $string = preg_replace_callback( "/[^$valid]/", $escaper, $string );
+ // Additional limitations in MediaWiki, see MediaWikiTitleCodec::splitTitleString
+ $string = preg_replace_callback( '/(~~~|^[ _]|[ _]$|[ _]{2,}|^:)/', $escaper, $string );
+ // TODO: length check + truncation
+ // TODO: forbid path travels
+
+ return $string;
+ }
+
+ /**
+ * Unmangles the message key by removing the prefix it it exists.
+ * @param string $string Message key.
+ * @return string Unmangled message key.
+ */
+ protected function unMangleString( $string ) {
+ // Unescape the "quoted-printable"-like escaping,
+ // which is applied in mangleString.
+ $unescapedString = preg_replace_callback( '/=([A-F0-9]{2})/',
+ function ( $match ) {
+ return chr( hexdec( $match[0] ) );
+ },
+ $string
+ );
+
+ if ( strncmp( $unescapedString, $this->sPrefix, strlen( $this->sPrefix ) ) === 0 ) {
+ return substr( $unescapedString, strlen( $this->sPrefix ) );
+ } else {
+ return $unescapedString;
+ }
+ }
+
+ /**
+ * Mangles or unmangles list of strings. If an associative array is given,
+ * the keys of the array will be mangled. For lists the values are mangled.
+ *
+ * @param string[]|array $array Strings.
+ * @param bool $reverse Direction of mangling or unmangling.
+ * @return string[]|array (Un)mangled strings.
+ */
+ protected function mangleArray( array $array, $reverse = false ) {
+ $temp = [];
+
+ if ( !$this->isAssoc( $array ) ) {
+ foreach ( $array as $key => &$value ) {
+ $value = $this->mangleString( $value, $reverse );
+ $temp[$key] = $value; // Assign a reference
+ }
+ } else {
+ foreach ( $array as $key => &$value ) {
+ $key = $this->mangleString( $key, $reverse );
+ $temp[$key] = $value; // Assign a reference
+ }
+ }
+
+ return $temp;
+ }
+
+ protected function isAssoc( array $array ) {
+ $assoc = (bool)count( array_filter( array_keys( $array ), 'is_string' ) );
+ if ( $assoc ) {
+ return true;
+ }
+
+ // Also check that the indexing starts from zero
+ return !array_key_exists( 0, $array );
+ }
+
+ public static function getExtraSchema() {
+ $schema = [
+ 'root' => [
+ '_type' => 'array',
+ '_children' => [
+ 'MANGLER' => [
+ '_type' => 'array',
+ '_children' => [
+ 'prefix' => [
+ '_type' => 'text',
+ '_not_empty' => true
+ ],
+ 'patterns' => [
+ '_type' => 'array',
+ '_required' => true,
+ '_ignore_extra_keys' => true,
+ '_children' => [],
+ ],
+ ]
+ ]
+ ]
+ ]
+ ];
+
+ return $schema;
+ }
+}