summaryrefslogtreecommitdiff
path: root/www/wiki/extensions/Translate/utils/MessageGroupStats.php
diff options
context:
space:
mode:
Diffstat (limited to 'www/wiki/extensions/Translate/utils/MessageGroupStats.php')
-rw-r--r--www/wiki/extensions/Translate/utils/MessageGroupStats.php646
1 files changed, 646 insertions, 0 deletions
diff --git a/www/wiki/extensions/Translate/utils/MessageGroupStats.php b/www/wiki/extensions/Translate/utils/MessageGroupStats.php
new file mode 100644
index 00000000..950f45f8
--- /dev/null
+++ b/www/wiki/extensions/Translate/utils/MessageGroupStats.php
@@ -0,0 +1,646 @@
+<?php
+/**
+ * This file aims to provide efficient mechanism for fetching translation completion stats.
+ *
+ * @file
+ * @author Wikia (trac.wikia-code.com/browser/wikia/trunk/extensions/wikia/TranslationStatistics)
+ * @author Niklas Laxström
+ * @license GPL-2.0-or-later
+ */
+
+use MediaWiki\MediaWikiServices;
+use Wikimedia\Rdbms\IDatabase;
+
+/**
+ * This class abstract MessageGroup statistics calculation and storing.
+ * You can access stats easily per language or per group.
+ * Stat array for each item is of format array( total, translate, fuzzy ).
+ *
+ * @ingroup Stats MessageGroups
+ */
+class MessageGroupStats {
+ /// Name of the database table
+ const TABLE = 'translate_groupstats';
+
+ const TOTAL = 0; ///< Array index
+ const TRANSLATED = 1; ///< Array index
+ const FUZZY = 2; ///< Array index
+ const PROOFREAD = 3; ///< Array index
+
+ /// If stats are not cached, do not attempt to calculate them on the fly
+ const FLAG_CACHE_ONLY = 1;
+ /// Ignore cached values. Useful for updating stale values.
+ const FLAG_NO_CACHE = 2;
+
+ /**
+ * @var array[]
+ */
+ protected static $updates = [];
+
+ /**
+ * @var string[]
+ */
+ private static $languages;
+
+ /**
+ * Returns empty stats array. Useful because the number of elements
+ * may change.
+ * @return int[]
+ * @since 2012-09-21
+ */
+ public static function getEmptyStats() {
+ return [ 0, 0, 0, 0 ];
+ }
+
+ /**
+ * Returns empty stats array that indicates stats are incomplete or
+ * unknown.
+ * @return null[]
+ * @since 2013-01-02
+ */
+ protected static function getUnknownStats() {
+ return [ null, null, null, null ];
+ }
+
+ private static function isValidLanguage( $code ) {
+ $languages = self::getLanguages();
+ return in_array( $code, $languages );
+ }
+
+ private static function isValidMessageGroup( MessageGroup $group = null ) {
+ /* In case some code calls stats for dynamic groups. Calculating these numbers
+ * don't make sense for dynamic groups, and would just throw an exception. */
+ return $group && !MessageGroups::isDynamic( $group );
+ }
+
+ /**
+ * Returns stats for given group in given language.
+ * @param string $id Group id
+ * @param string $code Language code
+ * @param int $flags Combination of FLAG_* constants.
+ * @return null[]|int[]
+ */
+ public static function forItem( $id, $code, $flags = 0 ) {
+ $group = MessageGroups::getGroup( $id );
+ if ( !self::isValidMessageGroup( $group ) || !self::isValidLanguage( $code ) ) {
+ return self::getUnknownStats();
+ }
+
+ $res = self::selectRowsIdLang( [ $id ], [ $code ], $flags );
+ $stats = self::extractResults( $res, [ $id ] );
+
+ if ( !isset( $stats[$id][$code] ) ) {
+ $stats[$id][$code] = self::forItemInternal( $stats, $group, $code, $flags );
+ }
+
+ self::queueUpdates( $flags );
+
+ return $stats[$id][$code];
+ }
+
+ /**
+ * Returns stats for all groups in given language.
+ * @param string $code Language code
+ * @param int $flags Combination of FLAG_* constants.
+ * @return array[]
+ */
+ public static function forLanguage( $code, $flags = 0 ) {
+ if ( !self::isValidLanguage( $code ) ) {
+ return self::getUnknownStats();
+ }
+
+ $stats = self::forLanguageInternal( $code, [], $flags );
+ $flattened = [];
+ foreach ( $stats as $group => $languages ) {
+ $flattened[$group] = $languages[$code];
+ }
+
+ self::queueUpdates( $flags );
+
+ return $flattened;
+ }
+
+ /**
+ * Returns stats for all languages in given group.
+ * @param string $id Group id
+ * @param int $flags Combination of FLAG_* constants.
+ * @return array[]
+ */
+ public static function forGroup( $id, $flags = 0 ) {
+ $group = MessageGroups::getGroup( $id );
+ if ( !self::isValidMessageGroup( $group ) ) {
+ return [];
+ }
+
+ $stats = self::forGroupInternal( $group, [], $flags );
+
+ self::queueUpdates( $flags );
+
+ return $stats[$id];
+ }
+
+ /**
+ * Returns stats for all group in all languages.
+ * Might be slow, might use lots of memory.
+ * Returns two dimensional array indexed by group and language.
+ * @param int $flags Combination of FLAG_* constants.
+ * @return array[]
+ */
+ public static function forEverything( $flags = 0 ) {
+ $groups = MessageGroups::singleton()->getGroups();
+ $stats = [];
+ foreach ( $groups as $g ) {
+ $stats = self::forGroupInternal( $g, $stats, $flags );
+ }
+
+ self::queueUpdates( $flags );
+
+ return $stats;
+ }
+
+ /**
+ * Recalculate stats for all groups associated with the message.
+ *
+ * Hook: TranslateEventTranslationReview
+ * @param MessageHandle $handle
+ */
+ public static function clear( MessageHandle $handle ) {
+ $code = $handle->getCode();
+ $groups = self::getSortedGroupsForClearing( $handle->getGroupIds() );
+ self::internalClearGroups( $code, $groups );
+ }
+
+ /**
+ * Recalculate stats for given group(s).
+ *
+ * @param string|string[] $id Message group ids.
+ */
+ public static function clearGroup( $id ) {
+ $languages = self::getLanguages();
+ $groups = self::getSortedGroupsForClearing( (array)$id );
+
+ // Do one language at a time, to save memory
+ foreach ( $languages as $code ) {
+ self::internalClearGroups( $code, $groups );
+ }
+ }
+
+ /**
+ * Helper for clear and clearGroup that caches already loaded statistics.
+ *
+ * @param string $code
+ * @param MessageGroup[] $groups
+ */
+ private static function internalClearGroups( $code, array $groups ) {
+ $stats = [];
+ foreach ( $groups as $id => $group ) {
+ // $stats is modified by reference
+ self::forItemInternal( $stats, $group, $code, 0 );
+ }
+ self::queueUpdates( 0 );
+ }
+
+ /**
+ * Get sorted message groups ids that can be used for efficient clearing.
+ *
+ * To optimize performance, we first need to process all non-aggregate groups.
+ * Because aggregate groups are flattened (see self::expandAggregates), we can
+ * process them any order and allow use of cache, except for the aggregate groups
+ * itself.
+ *
+ * @param string[] $ids
+ * @return string[]
+ */
+ private static function getSortedGroupsForClearing( array $ids ) {
+ $groups = array_map( [ MessageGroups::class, 'getGroup' ], $ids );
+ // Sanity: Remove any invalid groups
+ $groups = array_filter( $groups );
+
+ $sorted = [];
+ $aggs = [];
+ foreach ( $groups as $group ) {
+ if ( $group instanceof AggregateMessageGroup ) {
+ $aggs[$group->getId()] = $group;
+ } else {
+ $sorted[$group->getId()] = $group;
+ }
+ }
+
+ return array_merge( $sorted, $aggs );
+ }
+
+ /**
+ * Get list of supported languages for statistics.
+ *
+ * @return string[]
+ */
+ private static function getLanguages() {
+ if ( self::$languages === null ) {
+ $languages = array_keys( TranslateUtils::getLanguageNames( 'en' ) );
+ sort( $languages );
+ self::$languages = $languages;
+ }
+
+ return self::$languages;
+ }
+
+ public static function clearLanguage( $code ) {
+ if ( !count( $code ) ) {
+ return;
+ }
+ $dbw = wfGetDB( DB_MASTER );
+ $conds = [ 'tgs_lang' => $code ];
+ $dbw->delete( self::TABLE, $conds, __METHOD__ );
+ wfDebugLog( 'messagegroupstats', 'Cleared ' . serialize( $conds ) );
+ }
+
+ /**
+ * Purges all cached stats.
+ */
+ public static function clearAll() {
+ $dbw = wfGetDB( DB_MASTER );
+ $dbw->delete( self::TABLE, '*' );
+ wfDebugLog( 'messagegroupstats', 'Cleared everything :(' );
+ }
+
+ /**
+ * Use this to extract results returned from selectRowsIdLang. You must pass the
+ * message group ids you want to retrieve. Entries that do not match are not returned.
+ *
+ * @param Traversable $res Database result object
+ * @param string[] $ids List of message group ids
+ * @param array[] $stats Optional array to append results to.
+ * @return array[]
+ */
+ protected static function extractResults( $res, array $ids, array $stats = [] ) {
+ // Map the internal ids back to real ids
+ $idmap = array_combine( array_map( 'self::getDatabaseIdForGroupId', $ids ), $ids );
+
+ foreach ( $res as $row ) {
+ if ( !isset( $idmap[$row->tgs_group] ) ) {
+ // Stale entry, ignore for now
+ // TODO: Schedule for purge
+ continue;
+ }
+
+ $realId = $idmap[$row->tgs_group];
+ $stats[$realId][$row->tgs_lang] = self::extractNumbers( $row );
+ }
+
+ return $stats;
+ }
+
+ public static function update( MessageHandle $handle, array $changes = [] ) {
+ $dbids = array_map( 'self::getDatabaseIdForGroupId', $handle->getGroupIds() );
+
+ $dbw = wfGetDB( DB_MASTER );
+ $conds = [
+ 'tgs_group' => $dbids,
+ 'tgs_lang' => $handle->getCode(),
+ ];
+
+ $values = [];
+ foreach ( [ 'total', 'translated', 'fuzzy', 'proofread' ] as $type ) {
+ if ( isset( $changes[$type] ) ) {
+ $values[] = "tgs_$type=tgs_$type" .
+ self::stringifyNumber( $changes[$type] );
+ }
+ }
+
+ $dbw->update( self::TABLE, $values, $conds, __METHOD__ );
+ }
+
+ /**
+ * Returns an array of needed database fields.
+ * @param stdClass $row
+ * @return array
+ */
+ protected static function extractNumbers( $row ) {
+ return [
+ self::TOTAL => (int)$row->tgs_total,
+ self::TRANSLATED => (int)$row->tgs_translated,
+ self::FUZZY => (int)$row->tgs_fuzzy,
+ self::PROOFREAD => (int)$row->tgs_proofread,
+ ];
+ }
+
+ /**
+ * @param string $code Language code
+ * @param array[] $stats
+ * @param int $flags Combination of FLAG_* constants.
+ * @return array[]
+ */
+ protected static function forLanguageInternal( $code, array $stats = [], $flags ) {
+ $groups = MessageGroups::singleton()->getGroups();
+
+ $ids = array_keys( $groups );
+ $res = self::selectRowsIdLang( null, [ $code ], $flags );
+ $stats = self::extractResults( $res, $ids, $stats );
+
+ foreach ( $groups as $id => $group ) {
+ if ( isset( $stats[$id][$code] ) ) {
+ continue;
+ }
+ $stats[$id][$code] = self::forItemInternal( $stats, $group, $code, $flags );
+ }
+
+ return $stats;
+ }
+
+ /**
+ * @param AggregateMessageGroup $agg
+ * @return mixed
+ */
+ protected static function expandAggregates( AggregateMessageGroup $agg ) {
+ $flattened = [];
+
+ /** @var MessageGroup|AggregateMessageGroup $group */
+ foreach ( $agg->getGroups() as $group ) {
+ if ( $group instanceof AggregateMessageGroup ) {
+ $flattened += self::expandAggregates( $group );
+ } else {
+ $flattened[$group->getId()] = $group;
+ }
+ }
+
+ return $flattened;
+ }
+
+ /**
+ * @param MessageGroup $group
+ * @param array[] $stats
+ * @param int $flags Combination of FLAG_* constants.
+ * @return array[]
+ */
+ protected static function forGroupInternal( MessageGroup $group, array $stats = [], $flags ) {
+ $id = $group->getId();
+
+ $res = self::selectRowsIdLang( [ $id ], null, $flags );
+ $stats = self::extractResults( $res, [ $id ], $stats );
+
+ # Go over each language filling missing entries
+ $languages = self::getLanguages();
+ foreach ( $languages as $code ) {
+ if ( isset( $stats[$id][$code] ) ) {
+ continue;
+ }
+ $stats[$id][$code] = self::forItemInternal( $stats, $group, $code, $flags );
+ }
+
+ // This is for sorting the values added later in correct order
+ foreach ( array_keys( $stats ) as $key ) {
+ ksort( $stats[$key] );
+ }
+
+ return $stats;
+ }
+
+ /**
+ * Fetch rows from the database. Use extractResults to process this value.
+ *
+ * @param null|string[] $ids List of message group ids
+ * @param null|string[] $codes List of language codes
+ * @param int $flags Combination of FLAG_* constants.
+ * @return Traversable Database result object
+ */
+ protected static function selectRowsIdLang( array $ids = null, array $codes = null, $flags ) {
+ if ( $flags & self::FLAG_NO_CACHE ) {
+ return [];
+ }
+
+ $conds = [];
+ if ( $ids !== null ) {
+ $dbids = array_map( 'self::getDatabaseIdForGroupId', $ids );
+ $conds['tgs_group'] = $dbids;
+ }
+
+ if ( $codes !== null ) {
+ $conds['tgs_lang'] = $codes;
+ }
+
+ $dbr = TranslateUtils::getSafeReadDB();
+ $res = $dbr->select( self::TABLE, '*', $conds, __METHOD__ );
+
+ return $res;
+ }
+
+ /**
+ * @param array[] &$stats
+ * @param MessageGroup $group
+ * @param string $code Language code
+ * @param int $flags Combination of FLAG_* constants.
+ * @return null[]|int[]
+ */
+ protected static function forItemInternal( &$stats, MessageGroup $group, $code, $flags ) {
+ $id = $group->getId();
+
+ if ( $flags & self::FLAG_CACHE_ONLY ) {
+ $stats[$id][$code] = self::getUnknownStats();
+ return $stats[$id][$code];
+ }
+
+ if ( $group instanceof AggregateMessageGroup ) {
+ $aggregates = self::calculateAggregageGroup( $stats, $group, $code, $flags );
+ } else {
+ $aggregates = self::calculateGroup( $group, $code );
+ }
+ // Cache for use in subsequent forItemInternal calls
+ $stats[$id][$code] = $aggregates;
+
+ // Don't add nulls to the database, causes annoying warnings
+ if ( $aggregates[self::TOTAL] === null ) {
+ return $aggregates;
+ }
+
+ self::$updates[] = [
+ 'tgs_group' => self::getDatabaseIdForGroupId( $id ),
+ 'tgs_lang' => $code,
+ 'tgs_total' => $aggregates[self::TOTAL],
+ 'tgs_translated' => $aggregates[self::TRANSLATED],
+ 'tgs_fuzzy' => $aggregates[self::FUZZY],
+ 'tgs_proofread' => $aggregates[self::PROOFREAD],
+ ];
+
+ // For big and lengthy updates, attempt some interim saves. This might not have
+ // any effect, because writes to the database may be deferred.
+ if ( count( self::$updates ) % 100 === 0 ) {
+ self::queueUpdates( $flags );
+ }
+
+ return $aggregates;
+ }
+
+ private static function calculateAggregageGroup( &$stats, $group, $code, $flags ) {
+ $aggregates = self::getEmptyStats();
+
+ $expanded = self::expandAggregates( $group );
+ $subGroupIds = array_keys( $expanded );
+
+ // Performance: if we have per-call cache of stats, do not query them again.
+ foreach ( $subGroupIds as $index => $sid ) {
+ if ( isset( $stats[$sid][$code] ) ) {
+ unset( $subGroupIds[ $index ] );
+ }
+ }
+
+ if ( $subGroupIds !== [] ) {
+ $res = self::selectRowsIdLang( $subGroupIds, [ $code ], $flags );
+ $stats = self::extractResults( $res, $subGroupIds, $stats );
+ }
+
+ foreach ( $expanded as $sid => $subgroup ) {
+ # Discouraged groups may belong to another group, usually if there
+ # is an aggregate group for all translatable pages. In that case
+ # calculate and store the statistics, but don't count them as part of
+ # the aggregate group, so that the numbers in Special:LanguageStats
+ # add up. The statistics for discouraged groups can still be viewed
+ # through Special:MessageGroupStats.
+ if ( !isset( $stats[$sid][$code] ) ) {
+ $stats[$sid][$code] = self::forItemInternal( $stats, $subgroup, $code, $flags );
+ }
+
+ $include = Hooks::run( 'Translate:MessageGroupStats:isIncluded', [ $sid, $code ] );
+ if ( $include ) {
+ $aggregates = self::multiAdd( $aggregates, $stats[$sid][$code] );
+ }
+ }
+
+ return $aggregates;
+ }
+
+ public static function multiAdd( &$a, $b ) {
+ if ( $a[0] === null || $b[0] === null ) {
+ return array_fill( 0, count( $a ), null );
+ }
+ foreach ( $a as $i => &$v ) {
+ $v += $b[$i];
+ }
+
+ return $a;
+ }
+
+ /**
+ * @param MessageGroup $group
+ * @param string $code Language code
+ * @return int[] ( total, translated, fuzzy, proofread )
+ */
+ protected static function calculateGroup( MessageGroup $group, $code ) {
+ global $wgTranslateDocumentationLanguageCode;
+ // Calculate if missing and store in the db
+ $collection = $group->initCollection( $code );
+
+ if ( $code === $wgTranslateDocumentationLanguageCode ) {
+ $ffs = $group->getFFS();
+ if ( $ffs instanceof GettextFFS ) {
+ $template = $ffs->read( 'en' );
+ $infile = [];
+ foreach ( $template['TEMPLATE'] as $key => $data ) {
+ if ( isset( $data['comments']['.'] ) ) {
+ $infile[$key] = '1';
+ }
+ }
+ $collection->setInFile( $infile );
+ }
+ }
+
+ $collection->filter( 'ignored' );
+ $collection->filter( 'optional' );
+ // Store the count of real messages for later calculation.
+ $total = count( $collection );
+
+ // Count fuzzy first.
+ $collection->filter( 'fuzzy' );
+ $fuzzy = $total - count( $collection );
+
+ // Count the completed translations.
+ $collection->filter( 'hastranslation', false );
+ $translated = count( $collection );
+
+ // Count how many of the completed translations
+ // have been proofread
+ $collection->filter( 'reviewer', false );
+ $proofread = count( $collection );
+
+ return [
+ self::TOTAL => $total,
+ self::TRANSLATED => $translated,
+ self::FUZZY => $fuzzy,
+ self::PROOFREAD => $proofread,
+ ];
+ }
+
+ /**
+ * Converts input to "+2" "-4" type of string.
+ * @param int $number
+ * @return string
+ */
+ protected static function stringifyNumber( $number ) {
+ $number = (int)$number;
+
+ return $number < 0 ? "$number" : "+$number";
+ }
+
+ protected static function queueUpdates( $flags ) {
+ if ( wfReadOnly() ) {
+ return;
+ }
+
+ if ( self::$updates === [] ) {
+ return;
+ }
+
+ $lb = MediaWikiServices::getInstance()->getDBLoadBalancer();
+ $dbw = $lb->getLazyConnectionRef( DB_MASTER ); // avoid connecting yet
+ $table = self::TABLE;
+ $updates = &self::$updates;
+
+ $updateOp = self::withLock(
+ $dbw,
+ 'updates',
+ __METHOD__,
+ function ( IDatabase $dbw, $method ) use ( $table, &$updates ) {
+ // Maybe another deferred update already processed these
+ if ( $updates === [] ) {
+ return;
+ }
+
+ $primaryKey = [ 'tgs_group', 'tgs_lang' ];
+ $dbw->replace( $table, [ $primaryKey ], $updates, $method );
+ $updates = [];
+ }
+ );
+
+ if ( defined( 'MEDIAWIKI_JOB_RUNNER' ) ) {
+ call_user_func( $updateOp );
+ } else {
+ DeferredUpdates::addCallableUpdate( $updateOp );
+ }
+ }
+
+ protected static function withLock( IDatabase $dbw, $key, $method, $callback ) {
+ $fname = __METHOD__;
+ return function () use ( $dbw, $key, $method, $callback, $fname ) {
+ $lockName = 'MessageGroupStats:' . $key;
+ if ( !$dbw->lock( $lockName, $fname, 1 ) ) {
+ return; // raced out
+ }
+
+ $dbw->commit( $fname, 'flush' );
+ call_user_func( $callback, $dbw, $method );
+ $dbw->commit( $fname, 'flush' );
+
+ $dbw->unlock( $lockName, $fname );
+ };
+ }
+
+ public static function getDatabaseIdForGroupId( $id ) {
+ // The column is 100 bytes long, but we don't need to use it all
+ if ( strlen( $id ) <= 72 ) {
+ return $id;
+ }
+
+ $hash = hash( 'sha256', $id, /*asHex*/false );
+ $dbid = substr( $id, 0, 50 ) . '||' . substr( $hash, 0, 20 );
+ return $dbid;
+ }
+}