diff options
Diffstat (limited to 'www/wiki/extensions/Translate/utils/MessageIndex.php')
-rw-r--r-- | www/wiki/extensions/Translate/utils/MessageIndex.php | 743 |
1 files changed, 743 insertions, 0 deletions
diff --git a/www/wiki/extensions/Translate/utils/MessageIndex.php b/www/wiki/extensions/Translate/utils/MessageIndex.php new file mode 100644 index 00000000..0015d0aa --- /dev/null +++ b/www/wiki/extensions/Translate/utils/MessageIndex.php @@ -0,0 +1,743 @@ +<?php +/** + * Contains classes for handling the message index. + * + * @file + * @author Niklas Laxstrom + * @copyright Copyright © 2008-2013, Niklas Laxström + * @license GPL-2.0-or-later + */ + +/** + * Creates a database of keys in all groups, so that namespace and key can be + * used to get the groups they belong to. This is used as a fallback when + * loadgroup parameter is not provided in the request, which happens if someone + * reaches a messages from somewhere else than Special:Translate. Also used + * by Special:TranslationStats and alike which need to map lots of titles + * to message groups. + */ +abstract class MessageIndex { + /** + * @var self + */ + protected static $instance; + + /** + * @var MapCacheLRU|null + */ + private static $keysCache; + + /** + * @return self + */ + public static function singleton() { + if ( self::$instance === null ) { + global $wgTranslateMessageIndex; + $params = $wgTranslateMessageIndex; + $class = array_shift( $params ); + self::$instance = new $class( $params ); + } + + return self::$instance; + } + + /** + * Override the global instance, for testing. + * + * @since 2015.04 + * @param MessageIndex $instance + */ + public static function setInstance( self $instance ) { + self::$instance = $instance; + } + + /** + * Retrieves a list of groups given MessageHandle belongs to. + * @since 2012-01-04 + * @param MessageHandle $handle + * @return array + */ + public static function getGroupIds( MessageHandle $handle ) { + global $wgTranslateMessageNamespaces; + + $title = $handle->getTitle(); + + if ( !$title->inNamespaces( $wgTranslateMessageNamespaces ) ) { + return []; + } + + $namespace = $title->getNamespace(); + $key = $handle->getKey(); + $normkey = TranslateUtils::normaliseKey( $namespace, $key ); + + $cache = self::getCache(); + $value = $cache->get( $normkey ); + if ( $value === null ) { + $value = self::singleton()->get( $normkey ); + $value = $value !== null + ? (array)$value + : []; + $cache->set( $normkey, $value ); + } + + return $value; + } + + /** + * @return MapCacheLRU + */ + private static function getCache() { + if ( self::$keysCache === null ) { + self::$keysCache = new MapCacheLRU( 30 ); + } + return self::$keysCache; + } + + /** + * @since 2012-01-04 + * @param MessageHandle $handle + * @return MessageGroup|null + */ + public static function getPrimaryGroupId( MessageHandle $handle ) { + $groups = self::getGroupIds( $handle ); + + return count( $groups ) ? array_shift( $groups ) : null; + } + + /** + * Looks up the stored value for single key. Only for testing. + * @since 2012-04-10 + * @param string $key + * @return string|array|null + */ + protected function get( $key ) { + // Default implementation + $mi = $this->retrieve(); + if ( isset( $mi[$key] ) ) { + return $mi[$key]; + } else { + return null; + } + } + + /** + * @param bool $forRebuild + * @return array + */ + abstract public function retrieve( $forRebuild = false ); + + /** + * @since 2018.01 + * @return string[] + */ + public function getKeys() { + return array_keys( $this->retrieve() ); + } + + abstract protected function store( array $array, array $diff ); + + protected function lock() { + return true; + } + + protected function unlock() { + return true; + } + + public function rebuild() { + static $recursion = 0; + + if ( $recursion > 0 ) { + $msg = __METHOD__ . ': trying to recurse - building the index first time?'; + wfWarn( $msg ); + + $recursion--; + return []; + } + $recursion++; + + $groups = MessageGroups::singleton()->getGroups(); + + if ( !$this->lock() ) { + throw new Exception( __CLASS__ . ': unable to acquire lock' ); + } + + self::getCache()->clear(); + + $new = $old = []; + $old = $this->retrieve( 'rebuild' ); + $postponed = []; + + /** + * @var MessageGroup $g + */ + foreach ( $groups as $g ) { + if ( !$g->exists() ) { + $id = $g->getId(); + wfWarn( __METHOD__ . ": group '$id' is registered but does not exist" ); + continue; + } + + # Skip meta thingies + if ( $g->isMeta() ) { + $postponed[] = $g; + continue; + } + + $this->checkAndAdd( $new, $g ); + } + + foreach ( $postponed as $g ) { + $this->checkAndAdd( $new, $g, true ); + } + + $diff = self::getArrayDiff( $old, $new ); + $this->store( $new, $diff['keys'] ); + $this->unlock(); + $this->clearMessageGroupStats( $diff ); + + $recursion--; + + return $new; + } + + /** + * Compares two associative arrays. + * + * Values must be a string or list of strings. Returns an array of added, + * deleted and modified keys as well as value changes (you can think values + * as categories and keys as pages). Each of the keys ('add', 'del', 'mod' + * respectively) maps to an array whose keys are the changed keys of the + * original arrays and values are lists where first element contains the + * old value and the second element the new value. + * + * @code + * $a = [ 'a' => '1', 'b' => '2', 'c' => '3' ]; + * $b = [ 'b' => '2', 'c' => [ '3', '2' ], 'd' => '4' ]; + * + * self::getArrayDiff( $a, $b ) ) === [ + * 'keys' => [ + * 'add' => [ 'd' => [ [], [ '4' ] ] ], + * 'del' => [ 'a' => [ [ '1' ], [] ] ], + * 'mod' => [ 'c' => [ [ '3' ], [ '3', '2' ] ] ], + * ], + * 'values' => [ 2, 4, 1 ] + * ]; + * @endcode + * + * @param array $old + * @param array $new + * @return array + */ + public static function getArrayDiff( array $old, array $new ) { + $values = []; + $record = function ( $groups ) use ( &$values ) { + foreach ( $groups as $group ) { + $values[$group] = true; + } + }; + + $keys = [ + 'add' => [], + 'del' => [], + 'mod' => [], + ]; + + foreach ( $new as $key => $groups ) { + if ( !isset( $old[$key] ) ) { + $keys['add'][$key] = [ [], (array)$groups ]; + $record( (array)$groups ); + // Using != here on purpose to ignore the order of items + } elseif ( $groups != $old[$key] ) { + $keys['mod'][$key] = [ (array)$old[$key], (array)$groups ]; + $record( array_diff( (array)$old[$key], (array)$groups ) ); + $record( array_diff( (array)$groups, (array)$old[$key] ) ); + } + } + + foreach ( $old as $key => $groups ) { + if ( !isset( $new[$key] ) ) { + $keys['del'][$key] = [ (array)$groups, [] ]; + $record( (array)$groups, [] ); + } + // We already checked for diffs above + } + + return [ + 'keys' => $keys, + 'values' => array_keys( $values ), + ]; + } + + /** + * Purge stuff when set of keys have changed. + * + * @param array $diff + */ + protected function clearMessageGroupStats( array $diff ) { + MessageGroupStats::clearGroup( $diff['values'] ); + + foreach ( $diff['keys'] as $keys ) { + foreach ( $keys as $key => $data ) { + list( $ns, $pagename ) = explode( ':', $key, 2 ); + $title = Title::makeTitle( $ns, $pagename ); + $handle = new MessageHandle( $title ); + list( $oldGroups, $newGroups ) = $data; + Hooks::run( 'TranslateEventMessageMembershipChange', + [ $handle, $oldGroups, $newGroups ] ); + } + } + } + + /** + * @param array &$hugearray + * @param MessageGroup $g + * @param bool $ignore + */ + protected function checkAndAdd( &$hugearray, MessageGroup $g, $ignore = false ) { + if ( method_exists( $g, 'getKeys' ) ) { + $keys = $g->getKeys(); + } else { + $messages = $g->getDefinitions(); + + if ( !is_array( $messages ) ) { + return; + } + + $keys = array_keys( $messages ); + } + + $id = $g->getId(); + + $namespace = $g->getNamespace(); + + foreach ( $keys as $key ) { + # Force all keys to lower case, because the case doesn't matter and it is + # easier to do comparing when the case of first letter is unknown, because + # mediawiki forces it to upper case + $key = TranslateUtils::normaliseKey( $namespace, $key ); + if ( isset( $hugearray[$key] ) ) { + if ( !$ignore ) { + $to = implode( ', ', (array)$hugearray[$key] ); + wfWarn( "Key $key already belongs to $to, conflict with $id" ); + } + + if ( is_array( $hugearray[$key] ) ) { + // Hard work is already done, just add a new reference + $hugearray[$key][] = & $id; + } else { + // Store the actual reference, then remove it from array, to not + // replace the references value, but to store an array of new + // references instead. References are hard! + $value = & $hugearray[$key]; + unset( $hugearray[$key] ); + $hugearray[$key] = [ &$value, &$id ]; + } + } else { + $hugearray[$key] = & $id; + } + } + unset( $id ); // Disconnect the previous references to this $id + } + + /** + * These are probably slower than serialize and unserialize, + * but they are more space efficient because we only need + * strings and arrays. + * @param mixed $data + * @return mixed + */ + protected function serialize( $data ) { + if ( is_array( $data ) ) { + return implode( '|', $data ); + } else { + return $data; + } + } + + protected function unserialize( $data ) { + if ( strpos( $data, '|' ) !== false ) { + return explode( '|', $data ); + } + + return $data; + } +} + +/** + * Storage on serialized file. + * + * This serializes the whole array. Because this format can preserve + * the values which are stored as references inside the array, this is + * the most space efficient storage method and fastest when you want + * the full index. + * + * Unfortunately when the size of index grows to about 50000 items, even + * though it is only 3,5M on disk, it takes 35M when loaded into memory + * and the loading can take more than 0,5 seconds. Because usually we + * need to look up only few keys, it is better to use another backend + * which provides random access - this backend doesn't support that. + */ +class SerializedMessageIndex extends MessageIndex { + /** + * @var array|null + */ + protected $index; + + protected $filename = 'translate_messageindex.ser'; + + /** + * @param bool $forRebuild + * @return array + */ + public function retrieve( $forRebuild = false ) { + if ( $this->index !== null ) { + return $this->index; + } + + $file = TranslateUtils::cacheFile( $this->filename ); + if ( file_exists( $file ) ) { + $this->index = unserialize( file_get_contents( $file ) ); + } else { + $this->index = $this->rebuild(); + } + + return $this->index; + } + + protected function store( array $array, array $diff ) { + $file = TranslateUtils::cacheFile( $this->filename ); + file_put_contents( $file, serialize( $array ) ); + $this->index = $array; + } +} + +/// BC +class FileCachedMessageIndex extends SerializedMessageIndex { +} + +/** + * Storage on the database itself. + * + * This is likely to be the slowest backend. However it scales okay + * and provides random access. It also doesn't need any special setup, + * the database table is added with update.php together with other tables, + * which is the reason this is the default backend. It also works well + * on multi-server setup without needing for shared file storage. + * + * @since 2012-04-12 + */ +class DatabaseMessageIndex extends MessageIndex { + /** + * @var array|null + */ + protected $index; + + protected function lock() { + $dbw = wfGetDB( DB_MASTER ); + + // Any transaction should be flushed after getting the lock to avoid + // stale pre-lock REPEATABLE-READ snapshot data. + $ok = $dbw->lock( 'translate-messageindex', __METHOD__, 30 ); + if ( $ok ) { + $dbw->commit( __METHOD__, 'flush' ); + } + + return $ok; + } + + protected function unlock() { + $fname = __METHOD__; + $dbw = wfGetDB( DB_MASTER ); + // Unlock once the rows are actually unlocked to avoid deadlocks + if ( !$dbw->trxLevel() ) { + $dbw->unlock( 'translate-messageindex', $fname ); + } elseif ( method_exists( $dbw, 'onTransactionResolution' ) ) { // 1.28 + $dbw->onTransactionResolution( function () use ( $dbw, $fname ) { + $dbw->unlock( 'translate-messageindex', $fname ); + } ); + } else { + $dbw->onTransactionIdle( function () use ( $dbw, $fname ) { + $dbw->unlock( 'translate-messageindex', $fname ); + } ); + } + + return true; + } + + /** + * @param bool $forRebuild + * @return array + */ + public function retrieve( $forRebuild = false ) { + if ( $this->index !== null && !$forRebuild ) { + return $this->index; + } + + $dbr = wfGetDB( $forRebuild ? DB_MASTER : DB_REPLICA ); + $res = $dbr->select( 'translate_messageindex', '*', [], __METHOD__ ); + $this->index = []; + foreach ( $res as $row ) { + $this->index[$row->tmi_key] = $this->unserialize( $row->tmi_value ); + } + + return $this->index; + } + + protected function get( $key ) { + $dbr = wfGetDB( DB_REPLICA ); + $value = $dbr->selectField( + 'translate_messageindex', + 'tmi_value', + [ 'tmi_key' => $key ], + __METHOD__ + ); + + if ( is_string( $value ) ) { + $value = $this->unserialize( $value ); + } else { + $value = null; + } + + return $value; + } + + protected function store( array $array, array $diff ) { + $updates = []; + + foreach ( [ $diff['add'], $diff['mod'] ] as $changes ) { + foreach ( $changes as $key => $data ) { + list( $old, $new ) = $data; + $updates[] = [ + 'tmi_key' => $key, + 'tmi_value' => $this->serialize( $new ), + ]; + } + } + + $index = [ 'tmi_key' ]; + $deletions = array_keys( $diff['del'] ); + + $dbw = wfGetDB( DB_MASTER ); + $dbw->startAtomic( __METHOD__ ); + + if ( $updates !== [] ) { + $dbw->replace( 'translate_messageindex', [ $index ], $updates, __METHOD__ ); + } + + if ( $deletions !== [] ) { + $dbw->delete( 'translate_messageindex', [ 'tmi_key' => $deletions ], __METHOD__ ); + } + + $dbw->endAtomic( __METHOD__ ); + + $this->index = $array; + } +} + +/** + * Storage on the object cache. + * + * This can be faster than DatabaseMessageIndex, but it doesn't + * provide random access, and the data is not guaranteed to be persistent. + * + * This is unlikely to be the best backend for you, so don't use it. + */ +class CachedMessageIndex extends MessageIndex { + protected $key = 'translate-messageindex'; + protected $cache; + + /** + * @var array|null + */ + protected $index; + + protected function __construct( array $params ) { + $this->cache = wfGetCache( CACHE_ANYTHING ); + } + + /** + * @param bool $forRebuild + * @return array + */ + public function retrieve( $forRebuild = false ) { + if ( $this->index !== null ) { + return $this->index; + } + + $key = wfMemcKey( $this->key ); + $data = $this->cache->get( $key ); + if ( is_array( $data ) ) { + $this->index = $data; + } else { + $this->index = $this->rebuild(); + } + + return $this->index; + } + + protected function store( array $array, array $diff ) { + $key = wfMemcKey( $this->key ); + $this->cache->set( $key, $array ); + + $this->index = $array; + } +} + +/** + * Storage on CDB files. + * + * This is improved version of SerializedMessageIndex. It uses CDB files + * for storage, which means it provides random access. The CDB files are + * about double the size of serialized files (~7M for 50000 keys). + * + * Loading the whole index is slower than serialized, but about the same + * as for database. Suitable for single-server setups where + * SerializedMessageIndex is too slow for sloading the whole index. + * + * @since 2012-04-10 + */ +class CDBMessageIndex extends MessageIndex { + /** + * @var array|null + */ + protected $index; + + /** + * @var \Cdb\Reader|null + */ + protected $reader; + + /** + * @var string + */ + protected $filename = 'translate_messageindex.cdb'; + + /** + * @param bool $forRebuild + * @return array + */ + public function retrieve( $forRebuild = false ) { + $reader = $this->getReader(); + // This must be below the line above, which may fill the index + if ( $this->index !== null ) { + return $this->index; + } + + $this->index = []; + foreach ( $this->getKeys() as $key ) { + $this->index[$key] = $this->unserialize( $reader->get( $key ) ); + } + + return $this->index; + } + + public function getKeys() { + $reader = $this->getReader(); + $keys = []; + while ( true ) { + $key = $keys === [] ? $reader->firstkey() : $reader->nextkey(); + if ( $key === false ) { + break; + } + $keys[] = $key; + } + + return $keys; + } + + protected function get( $key ) { + $reader = $this->getReader(); + // We might have the full cache loaded + if ( $this->index !== null ) { + if ( isset( $this->index[$key] ) ) { + return $this->index[$key]; + } else { + return null; + } + } + + $value = $reader->get( $key ); + if ( !is_string( $value ) ) { + $value = null; + } else { + $value = $this->unserialize( $value ); + } + + return $value; + } + + protected function store( array $array, array $diff ) { + $this->reader = null; + + $file = TranslateUtils::cacheFile( $this->filename ); + $cache = \Cdb\Writer::open( $file ); + + foreach ( $array as $key => $value ) { + $value = $this->serialize( $value ); + $cache->set( $key, $value ); + } + + $cache->close(); + + $this->index = $array; + } + + protected function getReader() { + if ( $this->reader ) { + return $this->reader; + } + + $file = TranslateUtils::cacheFile( $this->filename ); + if ( !file_exists( $file ) ) { + // Create an empty index to allow rebuild + $this->store( [], [] ); + $this->index = $this->rebuild(); + } + + $this->reader = \Cdb\Reader::open( $file ); + return $this->reader; + } +} + +/** + * Storage on hash. + * + * For testing. + * + * @since 2015.04 + */ +class HashMessageIndex extends MessageIndex { + /** + * @var array + */ + protected $index = []; + + /** + * @param bool $forRebuild + * @return array + */ + public function retrieve( $forRebuild = false ) { + return $this->index; + } + + /** + * @param string $key + * + * @return mixed + */ + protected function get( $key ) { + if ( isset( $this->index[$key] ) ) { + return $this->index[$key]; + } else { + return null; + } + } + + protected function store( array $array, array $diff ) { + $this->index = $array; + } + + protected function clearMessageGroupStats( array $diff ) { + } +} |