diff options
Diffstat (limited to 'www/wiki/extensions/Translate/utils/MessageGroupStats.php')
-rw-r--r-- | www/wiki/extensions/Translate/utils/MessageGroupStats.php | 457 |
1 files changed, 303 insertions, 154 deletions
diff --git a/www/wiki/extensions/Translate/utils/MessageGroupStats.php b/www/wiki/extensions/Translate/utils/MessageGroupStats.php index a0128521..950f45f8 100644 --- a/www/wiki/extensions/Translate/utils/MessageGroupStats.php +++ b/www/wiki/extensions/Translate/utils/MessageGroupStats.php @@ -5,10 +5,12 @@ * @file * @author Wikia (trac.wikia-code.com/browser/wikia/trunk/extensions/wikia/TranslationStatistics) * @author Niklas Laxström - * @copyright Copyright © 2012-2013 Niklas Laxström - * @license GPL-2.0+ + * @license GPL-2.0-or-later */ +use MediaWiki\MediaWikiServices; +use Wikimedia\Rdbms\IDatabase; + /** * This class abstract MessageGroup statistics calculation and storing. * You can access stats easily per language or per group. @@ -25,31 +27,20 @@ class MessageGroupStats { const FUZZY = 2; ///< Array index const PROOFREAD = 3; ///< Array index - /** - * @var float|null - */ - protected static $timeStart = null; - - /** - * @var float|null - */ - protected static $limit = null; + /// If stats are not cached, do not attempt to calculate them on the fly + const FLAG_CACHE_ONLY = 1; + /// Ignore cached values. Useful for updating stale values. + const FLAG_NO_CACHE = 2; /** * @var array[] */ - protected static $updates = array(); + protected static $updates = []; /** - * Set the maximum time statistics are calculated. - * If the time limit is exceeded, the missing - * entries will be null. - * @param $limit float time in seconds + * @var string[] */ - public static function setTimeLimit( $limit ) { - self::$timeStart = microtime( true ); - self::$limit = $limit; - } + private static $languages; /** * Returns empty stats array. Useful because the number of elements @@ -58,7 +49,7 @@ class MessageGroupStats { * @since 2012-09-21 */ public static function getEmptyStats() { - return array( 0, 0, 0, 0 ); + return [ 0, 0, 0, 0 ]; } /** @@ -68,66 +59,82 @@ class MessageGroupStats { * @since 2013-01-02 */ protected static function getUnknownStats() { - return array( null, null, null, null ); + return [ null, null, null, null ]; + } + + private static function isValidLanguage( $code ) { + $languages = self::getLanguages(); + return in_array( $code, $languages ); + } + + private static function isValidMessageGroup( MessageGroup $group = null ) { + /* In case some code calls stats for dynamic groups. Calculating these numbers + * don't make sense for dynamic groups, and would just throw an exception. */ + return $group && !MessageGroups::isDynamic( $group ); } /** * Returns stats for given group in given language. - * @param $id string Group id - * @param $code string Language code + * @param string $id Group id + * @param string $code Language code + * @param int $flags Combination of FLAG_* constants. * @return null[]|int[] */ - public static function forItem( $id, $code ) { - $res = self::selectRowsIdLang( $id, $code ); - $stats = self::extractResults( $res ); - - /* In case some code calls this for dynamic groups, return the default - * values for unknown/incomplete stats. Calculating these numbers don't - * make sense for dynamic groups, and would just throw an exception. */ + public static function forItem( $id, $code, $flags = 0 ) { $group = MessageGroups::getGroup( $id ); - if ( MessageGroups::isDynamic( $group ) ) { - $stats[$id][$code] = self::getUnknownStats(); + if ( !self::isValidMessageGroup( $group ) || !self::isValidLanguage( $code ) ) { + return self::getUnknownStats(); } + $res = self::selectRowsIdLang( [ $id ], [ $code ], $flags ); + $stats = self::extractResults( $res, [ $id ] ); + if ( !isset( $stats[$id][$code] ) ) { - $stats[$id][$code] = self::forItemInternal( $stats, $group, $code ); + $stats[$id][$code] = self::forItemInternal( $stats, $group, $code, $flags ); } - self::queueUpdates(); + self::queueUpdates( $flags ); return $stats[$id][$code]; } /** * Returns stats for all groups in given language. - * @param $code string Language code - * @return Array + * @param string $code Language code + * @param int $flags Combination of FLAG_* constants. + * @return array[] */ - public static function forLanguage( $code ) { - $stats = self::forLanguageInternal( $code ); - $flattened = array(); + public static function forLanguage( $code, $flags = 0 ) { + if ( !self::isValidLanguage( $code ) ) { + return self::getUnknownStats(); + } + + $stats = self::forLanguageInternal( $code, [], $flags ); + $flattened = []; foreach ( $stats as $group => $languages ) { $flattened[$group] = $languages[$code]; } - self::queueUpdates(); + self::queueUpdates( $flags ); return $flattened; } /** * Returns stats for all languages in given group. - * @param $id string Group id - * @return Array + * @param string $id Group id + * @param int $flags Combination of FLAG_* constants. + * @return array[] */ - public static function forGroup( $id ) { + public static function forGroup( $id, $flags = 0 ) { $group = MessageGroups::getGroup( $id ); - if ( $group === null ) { - return array(); + if ( !self::isValidMessageGroup( $group ) ) { + return []; } - $stats = self::forGroupInternal( $group ); - self::queueUpdates(); + $stats = self::forGroupInternal( $group, [], $flags ); + + self::queueUpdates( $flags ); return $stats[$id]; } @@ -136,42 +143,105 @@ class MessageGroupStats { * Returns stats for all group in all languages. * Might be slow, might use lots of memory. * Returns two dimensional array indexed by group and language. - * @return Array + * @param int $flags Combination of FLAG_* constants. + * @return array[] */ - public static function forEverything() { + public static function forEverything( $flags = 0 ) { $groups = MessageGroups::singleton()->getGroups(); - $stats = array(); + $stats = []; foreach ( $groups as $g ) { - $stats = self::forGroupInternal( $g, $stats ); + $stats = self::forGroupInternal( $g, $stats, $flags ); } - self::queueUpdates(); + self::queueUpdates( $flags ); return $stats; } /** - * Clears the cache for all groups associated with the message. + * Recalculate stats for all groups associated with the message. * * Hook: TranslateEventTranslationReview + * @param MessageHandle $handle */ public static function clear( MessageHandle $handle ) { $code = $handle->getCode(); - $ids = $handle->getGroupIds(); - $dbw = wfGetDB( DB_MASTER ); - $conds = array( 'tgs_group' => $ids, 'tgs_lang' => $code ); - $dbw->delete( self::TABLE, $conds, __METHOD__ ); - wfDebugLog( 'messagegroupstats', 'Cleared ' . serialize( $conds ) ); + $groups = self::getSortedGroupsForClearing( $handle->getGroupIds() ); + self::internalClearGroups( $code, $groups ); } + /** + * Recalculate stats for given group(s). + * + * @param string|string[] $id Message group ids. + */ public static function clearGroup( $id ) { - if ( !count( $id ) ) { - return; + $languages = self::getLanguages(); + $groups = self::getSortedGroupsForClearing( (array)$id ); + + // Do one language at a time, to save memory + foreach ( $languages as $code ) { + self::internalClearGroups( $code, $groups ); } - $dbw = wfGetDB( DB_MASTER ); - $conds = array( 'tgs_group' => $id ); - $dbw->delete( self::TABLE, $conds, __METHOD__ ); - wfDebugLog( 'messagegroupstats', 'Cleared ' . serialize( $conds ) ); + } + + /** + * Helper for clear and clearGroup that caches already loaded statistics. + * + * @param string $code + * @param MessageGroup[] $groups + */ + private static function internalClearGroups( $code, array $groups ) { + $stats = []; + foreach ( $groups as $id => $group ) { + // $stats is modified by reference + self::forItemInternal( $stats, $group, $code, 0 ); + } + self::queueUpdates( 0 ); + } + + /** + * Get sorted message groups ids that can be used for efficient clearing. + * + * To optimize performance, we first need to process all non-aggregate groups. + * Because aggregate groups are flattened (see self::expandAggregates), we can + * process them any order and allow use of cache, except for the aggregate groups + * itself. + * + * @param string[] $ids + * @return string[] + */ + private static function getSortedGroupsForClearing( array $ids ) { + $groups = array_map( [ MessageGroups::class, 'getGroup' ], $ids ); + // Sanity: Remove any invalid groups + $groups = array_filter( $groups ); + + $sorted = []; + $aggs = []; + foreach ( $groups as $group ) { + if ( $group instanceof AggregateMessageGroup ) { + $aggs[$group->getId()] = $group; + } else { + $sorted[$group->getId()] = $group; + } + } + + return array_merge( $sorted, $aggs ); + } + + /** + * Get list of supported languages for statistics. + * + * @return string[] + */ + private static function getLanguages() { + if ( self::$languages === null ) { + $languages = array_keys( TranslateUtils::getLanguageNames( 'en' ) ); + sort( $languages ); + self::$languages = $languages; + } + + return self::$languages; } public static function clearLanguage( $code ) { @@ -179,7 +249,7 @@ class MessageGroupStats { return; } $dbw = wfGetDB( DB_MASTER ); - $conds = array( 'tgs_lang' => $code ); + $conds = [ 'tgs_lang' => $code ]; $dbw->delete( self::TABLE, $conds, __METHOD__ ); wfDebugLog( 'messagegroupstats', 'Cleared ' . serialize( $conds ) ); } @@ -193,23 +263,44 @@ class MessageGroupStats { wfDebugLog( 'messagegroupstats', 'Cleared everything :(' ); } - protected static function extractResults( $res, array $stats = array() ) { + /** + * Use this to extract results returned from selectRowsIdLang. You must pass the + * message group ids you want to retrieve. Entries that do not match are not returned. + * + * @param Traversable $res Database result object + * @param string[] $ids List of message group ids + * @param array[] $stats Optional array to append results to. + * @return array[] + */ + protected static function extractResults( $res, array $ids, array $stats = [] ) { + // Map the internal ids back to real ids + $idmap = array_combine( array_map( 'self::getDatabaseIdForGroupId', $ids ), $ids ); + foreach ( $res as $row ) { - $stats[$row->tgs_group][$row->tgs_lang] = self::extractNumbers( $row ); + if ( !isset( $idmap[$row->tgs_group] ) ) { + // Stale entry, ignore for now + // TODO: Schedule for purge + continue; + } + + $realId = $idmap[$row->tgs_group]; + $stats[$realId][$row->tgs_lang] = self::extractNumbers( $row ); } return $stats; } - public static function update( MessageHandle $handle, array $changes = array() ) { + public static function update( MessageHandle $handle, array $changes = [] ) { + $dbids = array_map( 'self::getDatabaseIdForGroupId', $handle->getGroupIds() ); + $dbw = wfGetDB( DB_MASTER ); - $conds = array( - 'tgs_group' => $handle->getGroupIds(), + $conds = [ + 'tgs_group' => $dbids, 'tgs_lang' => $handle->getCode(), - ); + ]; - $values = array(); - foreach ( array( 'total', 'translated', 'fuzzy', 'proofread' ) as $type ) { + $values = []; + foreach ( [ 'total', 'translated', 'fuzzy', 'proofread' ] as $type ) { if ( isset( $changes[$type] ) ) { $values[] = "tgs_$type=tgs_$type" . self::stringifyNumber( $changes[$type] ); @@ -221,33 +312,36 @@ class MessageGroupStats { /** * Returns an array of needed database fields. - * @param $row + * @param stdClass $row * @return array */ protected static function extractNumbers( $row ) { - return array( + return [ self::TOTAL => (int)$row->tgs_total, self::TRANSLATED => (int)$row->tgs_translated, self::FUZZY => (int)$row->tgs_fuzzy, self::PROOFREAD => (int)$row->tgs_proofread, - ); + ]; } /** * @param string $code Language code * @param array[] $stats + * @param int $flags Combination of FLAG_* constants. * @return array[] */ - protected static function forLanguageInternal( $code, array $stats = array() ) { - $res = self::selectRowsIdLang( null, $code ); - $stats = self::extractResults( $res, $stats ); - + protected static function forLanguageInternal( $code, array $stats = [], $flags ) { $groups = MessageGroups::singleton()->getGroups(); + + $ids = array_keys( $groups ); + $res = self::selectRowsIdLang( null, [ $code ], $flags ); + $stats = self::extractResults( $res, $ids, $stats ); + foreach ( $groups as $id => $group ) { if ( isset( $stats[$id][$code] ) ) { continue; } - $stats[$id][$code] = self::forItemInternal( $stats, $group, $code ); + $stats[$id][$code] = self::forItemInternal( $stats, $group, $code, $flags ); } return $stats; @@ -258,7 +352,7 @@ class MessageGroupStats { * @return mixed */ protected static function expandAggregates( AggregateMessageGroup $agg ) { - $flattened = array(); + $flattened = []; /** @var MessageGroup|AggregateMessageGroup $group */ foreach ( $agg->getGroups() as $group ) { @@ -275,22 +369,22 @@ class MessageGroupStats { /** * @param MessageGroup $group * @param array[] $stats + * @param int $flags Combination of FLAG_* constants. * @return array[] */ - protected static function forGroupInternal( $group, array $stats = array() ) { + protected static function forGroupInternal( MessageGroup $group, array $stats = [], $flags ) { $id = $group->getId(); - $res = self::selectRowsIdLang( $id, null ); - $stats = self::extractResults( $res, $stats ); + + $res = self::selectRowsIdLang( [ $id ], null, $flags ); + $stats = self::extractResults( $res, [ $id ], $stats ); # Go over each language filling missing entries - $languages = array_keys( TranslateUtils::getLanguageNames( 'en' ) ); - // This is for calculating things in correct order - sort( $languages ); + $languages = self::getLanguages(); foreach ( $languages as $code ) { if ( isset( $stats[$id][$code] ) ) { continue; } - $stats[$id][$code] = self::forItemInternal( $stats, $group, $code ); + $stats[$id][$code] = self::forItemInternal( $stats, $group, $code, $flags ); } // This is for sorting the values added later in correct order @@ -301,10 +395,23 @@ class MessageGroupStats { return $stats; } - protected static function selectRowsIdLang( $ids = null, $codes = null ) { - $conds = array(); + /** + * Fetch rows from the database. Use extractResults to process this value. + * + * @param null|string[] $ids List of message group ids + * @param null|string[] $codes List of language codes + * @param int $flags Combination of FLAG_* constants. + * @return Traversable Database result object + */ + protected static function selectRowsIdLang( array $ids = null, array $codes = null, $flags ) { + if ( $flags & self::FLAG_NO_CACHE ) { + return []; + } + + $conds = []; if ( $ids !== null ) { - $conds['tgs_group'] = $ids; + $dbids = array_map( 'self::getDatabaseIdForGroupId', $ids ); + $conds['tgs_group'] = $dbids; } if ( $codes !== null ) { @@ -318,63 +425,85 @@ class MessageGroupStats { } /** - * @param array[] $stats + * @param array[] &$stats * @param MessageGroup $group * @param string $code Language code - * + * @param int $flags Combination of FLAG_* constants. * @return null[]|int[] */ - protected static function forItemInternal( &$stats, $group, $code ) { + protected static function forItemInternal( &$stats, MessageGroup $group, $code, $flags ) { $id = $group->getId(); - if ( self::$timeStart !== null && ( microtime( true ) - self::$timeStart ) > self::$limit ) { - return $stats[$id][$code] = self::getUnknownStats(); + if ( $flags & self::FLAG_CACHE_ONLY ) { + $stats[$id][$code] = self::getUnknownStats(); + return $stats[$id][$code]; } if ( $group instanceof AggregateMessageGroup ) { - $aggregates = self::getEmptyStats(); - - $expanded = self::expandAggregates( $group ); - if ( $expanded === array() ) { - return $aggregates; - } - $res = self::selectRowsIdLang( array_keys( $expanded ), $code ); - $stats = self::extractResults( $res, $stats ); - - foreach ( $expanded as $sid => $subgroup ) { - # Discouraged groups may belong to another group, usually if there - # is an aggregate group for all translatable pages. In that case - # calculate and store the statistics, but don't count them as part of - # the aggregate group, so that the numbers in Special:LanguageStats - # add up. The statistics for discouraged groups can still be viewed - # through Special:MessageGroupStats. - if ( !isset( $stats[$sid][$code] ) ) { - $stats[$sid][$code] = self::forItemInternal( $stats, $subgroup, $code ); - } - - $include = Hooks::run( 'Translate:MessageGroupStats:isIncluded', array( $sid, $code ) ); - if ( $include ) { - $aggregates = self::multiAdd( $aggregates, $stats[$sid][$code] ); - } - } - $stats[$id][$code] = $aggregates; + $aggregates = self::calculateAggregageGroup( $stats, $group, $code, $flags ); } else { $aggregates = self::calculateGroup( $group, $code ); } + // Cache for use in subsequent forItemInternal calls + $stats[$id][$code] = $aggregates; // Don't add nulls to the database, causes annoying warnings if ( $aggregates[self::TOTAL] === null ) { return $aggregates; } - self::$updates[] = array( - 'tgs_group' => $id, + self::$updates[] = [ + 'tgs_group' => self::getDatabaseIdForGroupId( $id ), 'tgs_lang' => $code, 'tgs_total' => $aggregates[self::TOTAL], 'tgs_translated' => $aggregates[self::TRANSLATED], 'tgs_fuzzy' => $aggregates[self::FUZZY], 'tgs_proofread' => $aggregates[self::PROOFREAD], - ); + ]; + + // For big and lengthy updates, attempt some interim saves. This might not have + // any effect, because writes to the database may be deferred. + if ( count( self::$updates ) % 100 === 0 ) { + self::queueUpdates( $flags ); + } + + return $aggregates; + } + + private static function calculateAggregageGroup( &$stats, $group, $code, $flags ) { + $aggregates = self::getEmptyStats(); + + $expanded = self::expandAggregates( $group ); + $subGroupIds = array_keys( $expanded ); + + // Performance: if we have per-call cache of stats, do not query them again. + foreach ( $subGroupIds as $index => $sid ) { + if ( isset( $stats[$sid][$code] ) ) { + unset( $subGroupIds[ $index ] ); + } + } + + if ( $subGroupIds !== [] ) { + $res = self::selectRowsIdLang( $subGroupIds, [ $code ], $flags ); + $stats = self::extractResults( $res, $subGroupIds, $stats ); + } + + foreach ( $expanded as $sid => $subgroup ) { + # Discouraged groups may belong to another group, usually if there + # is an aggregate group for all translatable pages. In that case + # calculate and store the statistics, but don't count them as part of + # the aggregate group, so that the numbers in Special:LanguageStats + # add up. The statistics for discouraged groups can still be viewed + # through Special:MessageGroupStats. + if ( !isset( $stats[$sid][$code] ) ) { + $stats[$sid][$code] = self::forItemInternal( $stats, $subgroup, $code, $flags ); + } + + $include = Hooks::run( 'Translate:MessageGroupStats:isIncluded', [ $sid, $code ] ); + if ( $include ) { + $aggregates = self::multiAdd( $aggregates, $stats[$sid][$code] ); + } + } return $aggregates; } @@ -395,16 +524,16 @@ class MessageGroupStats { * @param string $code Language code * @return int[] ( total, translated, fuzzy, proofread ) */ - protected static function calculateGroup( $group, $code ) { + protected static function calculateGroup( MessageGroup $group, $code ) { global $wgTranslateDocumentationLanguageCode; - # Calculate if missing and store in the db + // Calculate if missing and store in the db $collection = $group->initCollection( $code ); if ( $code === $wgTranslateDocumentationLanguageCode ) { $ffs = $group->getFFS(); if ( $ffs instanceof GettextFFS ) { $template = $ffs->read( 'en' ); - $infile = array(); + $infile = []; foreach ( $template['TEMPLATE'] as $key => $data ) { if ( isset( $data['comments']['.'] ) ) { $infile[$key] = '1'; @@ -432,17 +561,17 @@ class MessageGroupStats { $collection->filter( 'reviewer', false ); $proofread = count( $collection ); - return array( + return [ self::TOTAL => $total, self::TRANSLATED => $translated, self::FUZZY => $fuzzy, self::PROOFREAD => $proofread, - ); + ]; } /** * Converts input to "+2" "-4" type of string. - * @param $number int + * @param int $number * @return string */ protected static function stringifyNumber( $number ) { @@ -451,47 +580,67 @@ class MessageGroupStats { return $number < 0 ? "$number" : "+$number"; } - protected static function queueUpdates() { + protected static function queueUpdates( $flags ) { if ( wfReadOnly() ) { return; } - if ( !count( self::$updates ) ) { + if ( self::$updates === [] ) { return; } - $dbw = wfGetDB( DB_MASTER ); + $lb = MediaWikiServices::getInstance()->getDBLoadBalancer(); + $dbw = $lb->getLazyConnectionRef( DB_MASTER ); // avoid connecting yet $table = self::TABLE; $updates = &self::$updates; - self::runWithLock( + $updateOp = self::withLock( $dbw, 'updates', __METHOD__, - function ( $dbw, $method ) use( $table, &$updates ) { - $dbw->insert( - $table, - $updates, - $method, - array( 'IGNORE' ) - ); - - $updates = array(); + function ( IDatabase $dbw, $method ) use ( $table, &$updates ) { + // Maybe another deferred update already processed these + if ( $updates === [] ) { + return; + } + + $primaryKey = [ 'tgs_group', 'tgs_lang' ]; + $dbw->replace( $table, [ $primaryKey ], $updates, $method ); + $updates = []; } ); + + if ( defined( 'MEDIAWIKI_JOB_RUNNER' ) ) { + call_user_func( $updateOp ); + } else { + DeferredUpdates::addCallableUpdate( $updateOp ); + } } - protected static function runWithLock( $dbw, $key, $method, $callback ) { - $dbw->onTransactionIdle( function () use ( $dbw, $key, $method, $callback ) { - $key = 'MessageGroupStats:' . $key; - $locked = $dbw->lock( $key, $method, 1 ); - if ( !$locked ) { - return; // Raced out + protected static function withLock( IDatabase $dbw, $key, $method, $callback ) { + $fname = __METHOD__; + return function () use ( $dbw, $key, $method, $callback, $fname ) { + $lockName = 'MessageGroupStats:' . $key; + if ( !$dbw->lock( $lockName, $fname, 1 ) ) { + return; // raced out } + $dbw->commit( $fname, 'flush' ); call_user_func( $callback, $dbw, $method ); + $dbw->commit( $fname, 'flush' ); + + $dbw->unlock( $lockName, $fname ); + }; + } + + public static function getDatabaseIdForGroupId( $id ) { + // The column is 100 bytes long, but we don't need to use it all + if ( strlen( $id ) <= 72 ) { + return $id; + } - $dbw->unlock( $key, $method ); - } ); + $hash = hash( 'sha256', $id, /*asHex*/false ); + $dbid = substr( $id, 0, 50 ) . '||' . substr( $hash, 0, 20 ); + return $dbid; } } |