diff options
author | Yaco <franco@reevo.org> | 2020-06-04 11:01:00 -0300 |
---|---|---|
committer | Yaco <franco@reevo.org> | 2020-06-04 11:01:00 -0300 |
commit | fc7369835258467bf97eb64f184b93691f9a9fd5 (patch) | |
tree | daabd60089d2dd76d9f5fb416b005fbe159c799d /www/wiki/extensions/Translate/scripts |
first commit
Diffstat (limited to 'www/wiki/extensions/Translate/scripts')
26 files changed, 4652 insertions, 0 deletions
diff --git a/www/wiki/extensions/Translate/scripts/TranslateCliLogger.php b/www/wiki/extensions/Translate/scripts/TranslateCliLogger.php new file mode 100644 index 00000000..6fb8e85f --- /dev/null +++ b/www/wiki/extensions/Translate/scripts/TranslateCliLogger.php @@ -0,0 +1,20 @@ +<?php +/** + * Simple helper to log things to the console. + * + * @author Niklas Laxström + * @license GPL-2.0-or-later + * @file + */ + +use Psr\Log\AbstractLogger; + +class TranslateCliLogger extends AbstractLogger { + public function __construct( callable $logger ) { + $this->logger = $logger; + } + + public function log( $level, $msg, array $context = [] ) { + ( $this->logger )( "LOG $level: $msg" ); + } +} diff --git a/www/wiki/extensions/Translate/scripts/TranslateStatsOutput.php b/www/wiki/extensions/Translate/scripts/TranslateStatsOutput.php new file mode 100644 index 00000000..cbeea176 --- /dev/null +++ b/www/wiki/extensions/Translate/scripts/TranslateStatsOutput.php @@ -0,0 +1,25 @@ +<?php + +/** + * Provides heading, summaryheading and free text addition for stats output in + * wiki format. + * + * @ingroup Stats + */ +class TranslateStatsOutput extends WikiStatsOutput { + public function heading() { + echo '{| class="mw-ext-translate-groupstatistics sortable wikitable" border="2" ' . + 'cellpadding="4" cellspacing="0" style="background-color: #F9F9F9; border: ' . + '1px #AAAAAA solid; border-collapse: collapse; clear:both;" width="100%"' . "\n"; + } + + public function summaryheading() { + echo "\n" . '{| class="mw-ext-translate-groupstatistics sortable wikitable" ' . + 'border="2" cellpadding="4" cellspacing="0" style="background-color: #F9F9F9; ' . + 'border: 1px #AAAAAA solid; border-collapse: collapse; clear:both;"' . "\n"; + } + + public function addFreeText( $freeText ) { + echo $freeText; + } +} diff --git a/www/wiki/extensions/Translate/scripts/characterEditStats.php b/www/wiki/extensions/Translate/scripts/characterEditStats.php new file mode 100644 index 00000000..45b6372e --- /dev/null +++ b/www/wiki/extensions/Translate/scripts/characterEditStats.php @@ -0,0 +1,150 @@ +<?php +/** + * Show number of characters translated over a given period of time. + * + * @author Santhosh Thottingal + * @copyright Copyright © 2013 Santhosh Thottingal + * @license GPL-2.0-or-later + * @file + * @ingroup Script Stats + */ + +// Standard boilerplate to define $IP +if ( getenv( 'MW_INSTALL_PATH' ) !== false ) { + $IP = getenv( 'MW_INSTALL_PATH' ); +} else { + $dir = __DIR__; + $IP = "$dir/../../.."; +} +require_once "$IP/maintenance/Maintenance.php"; + +class CharacterEditStats extends Maintenance { + public function __construct() { + parent::__construct(); + $this->mDescription = 'Script to show number of characters translated .'; + $this->addOption( + 'top', + '(optional) Show given number of language codes (default: show all)', + false, /*required*/ + true /*has arg*/ + ); + $this->addOption( + 'days', + '(optional) Calculate for given number of days (default: 30)', + false, /*required*/ + true /*has arg*/ + ); + $this->addOption( + 'ns', + '(optional) Comma separated list of namespace IDs', + false, /*required*/ + true /*has arg*/ + ); + } + + public function execute() { + global $wgTranslateFuzzyBotName, $wgSitename, $wgTranslateMessageNamespaces; + + $days = (int)$this->getOption( 'days', 30 ); + $top = (int)$this->getOption( 'top', -1 ); + + $namespaces = []; + if ( $this->hasOption( 'ns' ) ) { + $input = explode( ',', $this->getOption( 'ns' ) ); + + foreach ( $input as $namespace ) { + if ( is_numeric( $namespace ) ) { + $namespaces[] = $namespace; + } + } + } else { + $namespaces = $wgTranslateMessageNamespaces; + } + + // Select set of edits to report on + $rows = self::getRevisionsFromHistory( $days, $namespaces ); + + // Get counts for edits per language code after filtering out edits by FuzzyBot + $codes = []; + + foreach ( $rows as $_ ) { + // Filter out edits by $wgTranslateFuzzyBotName + if ( $_->user_text === $wgTranslateFuzzyBotName ) { + continue; + } + + $handle = new MessageHandle( Title::newFromText( $_->title ) ); + $code = $handle->getCode(); + + if ( !isset( $codes[$code] ) ) { + $codes[$code] = 0; + } + + $codes[$code] += $_->length; + } + + // Sort counts and report descending up to $top rows. + arsort( $codes ); + $i = 0; + $total = 0; + $this->output( "Character edit stats for last $days days in $wgSitename\n" ); + $this->output( "code\tname\tedit\n" ); + $this->output( "-----------------------\n" ); + foreach ( $codes as $code => $num ) { + if ( $i++ === $top ) { + break; + } + $language = Language::fetchLanguageName( $code ); + if ( !$language ) { + // this will be very rare, but avoid division by zero in next line + continue; + } + $charRatio = mb_strlen( $language, 'UTF-8' ) / strlen( $language ); + $num = (int)( $num * $charRatio ); + $total += $num; + $this->output( "$code\t$language\t$num\n" ); + } + $this->output( "-----------------------\n" ); + $this->output( "Total\t\t$total\n" ); + } + + private function getRevisionsFromHistory( $days, array $namespaces ) { + $dbr = wfGetDB( DB_REPLICA ); + $cutoff = $dbr->addQuotes( $dbr->timestamp( time() - $days * 24 * 3600 ) ); + + // The field renames are to be compatible with recentchanges table query + if ( is_callable( Revision::class, 'getQueryInfo' ) ) { + $revQuery = Revision::getQueryInfo( [ 'page' ] ); + $revUserText = $revQuery['fields']['rev_user_text'] ?? 'rev_user_text'; + } else { + $revQuery = [ + 'tables' => [ 'revision', 'page' ], + 'joins' => [ + 'page' => [ 'JOIN', 'rev_page = page_id' ], + ] + ]; + $revUserText = 'rev_user_text'; + } + $conds = [ + "rev_timestamp > $cutoff", + 'page_namespace' => $namespaces, + ]; + + $res = $dbr->select( + $revQuery['tables'], + [ + 'title' => 'page_title', + 'user_text' => $revUserText, + 'length' => 'rev_len', + ], + $conds, + __METHOD__, + [], + $revQuery['joins'] + ); + return iterator_to_array( $res ); + } +} + +$maintClass = CharacterEditStats::class; +require_once RUN_MAINTENANCE_IF_MAIN; diff --git a/www/wiki/extensions/Translate/scripts/create-language-models.php b/www/wiki/extensions/Translate/scripts/create-language-models.php new file mode 100644 index 00000000..71a1ce72 --- /dev/null +++ b/www/wiki/extensions/Translate/scripts/create-language-models.php @@ -0,0 +1,234 @@ +<?php +/** + * Create language models for https://github.com/crodas/LanguageDetector based + * on translation data in your wiki. + * + * @author Niklas Laxström + * + * @copyright Copyright © 2013, Niklas Laxström + * @license GPL-2.0-or-later + * @file + */ + +// Standard boilerplate to define $IP +if ( getenv( 'MW_INSTALL_PATH' ) !== false ) { + $IP = getenv( 'MW_INSTALL_PATH' ); +} else { + $dir = __DIR__; + $IP = "$dir/../../.."; +} +require_once "$IP/maintenance/Maintenance.php"; + +class LanguageModelCreator extends Maintenance { + protected $changes = []; + + public function __construct() { + parent::__construct(); + $this->mDescription = <<<TXT +Create language models for https://github.com/crodas/LanguageDetector based +on translation data in your wiki. It is safe to kill and restart the script. +List of pages and filtered language data is cached for 24 hours. Json files +present will be used, so don't forget to delete before new run. +TXT; + } + + public function execute() { + global $wgTranslateMessageNamespaces; + + ini_set( 'memory_limit', -1 ); + + // How many messages per language to use. + // Language is skipped if it has less than 1000 translations. + $messages = 5000; + + $languages = TranslateUtils::getLanguageNames( 'en' ); + $cache = wfGetCache( CACHE_DB ); + $key = wfMemcKey( __METHOD__, $messages ); + + $pages = $cache->get( $key ); + if ( !is_array( $pages ) ) { + $dbr = wfGetDB( DB_REPLICA ); + $conds = []; + $conds[] = 'page_title' . $dbr->buildLike( $dbr->anyString(), '/', $dbr->anyString() ); + $conds['page_namespace'] = $wgTranslateMessageNamespaces; + + echo "Before query\n"; + $res = $dbr->select( + [ 'page' ], + [ 'page_title, page_id' ], + $conds, + __METHOD__ + ); + echo "After query\n"; + + $total = $res->numRows(); + $index = 0; + + foreach ( $res as $row ) { + $index++; + $code = substr( $row->page_title, strrpos( $row->page_title, '/' ) + 1 ); + if ( isset( $languages[$code] ) ) { + $pages[$code][] = $row->page_id; + } + + if ( $index % 10000 === 0 ) { + $progress = number_format( $index / $total * 100, 2 ); + echo "$progress%\n"; + } + } + + echo "\n"; + + foreach ( array_keys( $pages ) as $code ) { + if ( count( $pages[$code] ) > $messages ) { + $pages[$code] = array_slice( $pages[$code], 0, $messages ); + } + + $pages[$code] = implode( '|', $pages[$code] ); + } + + echo "After code map\n"; + + ksort( $pages ); + + echo "After sort map\n"; + + $cache->set( $key, $pages, 3600 * 24 ); + echo "After set map\n"; + } + + unset( $pages['qqq'] ); + unset( $pages['de-formal'] ); + unset( $pages['nl-informal'] ); + unset( $pages['en-gb'] ); + + $pids = []; + $threads = 2; + foreach ( $pages as $code => $pageids ) { + $pid = ( $threads > 1 ) ? pcntl_fork() : -1; + + if ( $pid === 0 ) { + // Child, reseed because there is no bug in PHP: + // https://bugs.php.net/bug.php?id=42465 + mt_srand( getmypid() ); + $this->analyzeLanguage( $code, $pageids ); + exit(); + } elseif ( $pid === -1 ) { + // Fork failed or one thread, do it serialized + $this->analyzeLanguage( $code, $pageids ); + } else { + // Main thread + $pids[] = $pid; + } + + // If we hit the thread limit, wait for any child to finish. + if ( count( $pids ) >= $threads ) { + $status = 0; + $pid = pcntl_wait( $status ); + unset( $pids[$pid] ); + } + } + + foreach ( $pids as $pid ) { + $status = 0; + pcntl_waitpid( $pid, $status ); + } + + $this->output( "Combining languages\n" ); + + $huge = []; + foreach ( glob( 'temp-*.json' ) as $file ) { + $contents = file_get_contents( $file ); + $json = FormatJson::decode( $contents, true ); + + $huge = array_merge( $json, $huge ); + $huge['data'] = array_merge( $json['data'], $huge['data'] ); + } + + $json = FormatJson::encode( $huge, true, FormatJson::ALL_OK ); + file_put_contents( 'translatewiki.net.json', $json ); + } + + protected function analyzeLanguage( $code, $ids ) { + if ( file_exists( "temp-$code.json" ) ) { + $this->output( "$code MODEL EXISTS\n" ); + return; + } + + $text = $this->cacheSourceText( $code, $ids ); + if ( $text === '' ) { + return; + } + + $config = new LanguageDetector\Config; + $config->useMb( true ); + $c = new LanguageDetector\Learn( $config ); + $c->addSample( $code, $text ); + $c->addStepCallback( function ( $lang, $status ) { + echo "Learning {$lang}: $status\n"; + } ); + + $target = LanguageDetector\AbstractFormat::initFormatByPath( "temp-$code.json" ); + $c->save( $target ); + } + + protected function cacheSourceText( $code, $ids ) { + $cache = wfGetCache( CACHE_DB ); + $key = wfMemcKey( __CLASS__, 'cc', $code ); + $text = $cache->get( $key ); + if ( !is_string( $text ) ) { + $snippets = []; + + $ids = explode( '|', $ids ); + + $len = count( $ids ); + + if ( $len < 1000 ) { + $this->output( "$code: $len SKIPPED\n" ); + return ''; + } else { + $this->output( "$code PROCESSING\n" ); + } + + $time = microtime( true ); + + foreach ( $ids as $id ) { + $params = new FauxRequest( [ + 'pageid' => $id, + 'action' => 'parse', + 'prop' => 'text', + 'disablepp' => 'true', + ] ); + + $api = new ApiMain( $params ); + $api->execute(); + + $result = $api->getResult()->getResultData( + null, + [ 'BC' => [] ] + ); + + $text = $result['parse']['text']['*']; + $text = strip_tags( $text ); + $text = str_replace( '!!FUZZY!!', '', $text ); + $text = preg_replace( '/\$[0-9]/', '', $text ); + $text = trim( $text ); + + $snippets[] = $text; + } + + $text = implode( ' ', $snippets ); + $cache->set( $key, $text, 3600 * 24 ); + + $delta = microtime( true ) - $time; + $this->output( "$code TOOK $delta\n" ); + } else { + $this->output( "$code FROM CACHE\n" ); + } + + return $text; + } +} + +$maintClass = LanguageModelCreator::class; +require_once RUN_MAINTENANCE_IF_MAIN; diff --git a/www/wiki/extensions/Translate/scripts/createCheckIndex.php b/www/wiki/extensions/Translate/scripts/createCheckIndex.php new file mode 100644 index 00000000..6b6f4296 --- /dev/null +++ b/www/wiki/extensions/Translate/scripts/createCheckIndex.php @@ -0,0 +1,142 @@ +<?php +/** + * Creates serialised database of messages that need checking for problems. + * + * @author Niklas Laxström + * @author Siebrand Mazeland + * @copyright Copyright © 2008-2013, Niklas Laxström, Siebrand Mazeland + * @license GPL-2.0-or-later + * @file + */ + +// Standard boilerplate to define $IP +if ( getenv( 'MW_INSTALL_PATH' ) !== false ) { + $IP = getenv( 'MW_INSTALL_PATH' ); +} else { + $dir = __DIR__; + $IP = "$dir/../../.."; +} +require_once "$IP/maintenance/Maintenance.php"; + +class CreateCheckIndex extends Maintenance { + public function __construct() { + parent::__construct(); + $this->mDescription = 'Creates serialised database of messages that need ' . + 'checking for problems.'; + $this->addOption( + 'group', + '(optional) Comma separated list of group IDs to process (can use * as wildcard). ' . + 'Default: "*"', + false, /*required*/ + true /*has arg*/ + ); + } + + public function execute() { + $codes = Language::fetchLanguageNames( false ); + + // Exclude the documentation language code + global $wgTranslateDocumentationLanguageCode; + if ( $wgTranslateDocumentationLanguageCode ) { + unset( $codes[$wgTranslateDocumentationLanguageCode] ); + } + + $reqGroups = $this->getOption( 'group' ); + if ( $reqGroups ) { + $reqGroups = explode( ',', $reqGroups ); + $reqGroups = array_map( 'trim', $reqGroups ); + $reqGroups = MessageGroups::expandWildcards( $reqGroups ); + } + + $verbose = isset( $options['verbose'] ); + + $groups = MessageGroups::singleton()->getGroups(); + + /** @var $g MessageGroup */ + foreach ( $groups as $g ) { + $id = $g->getId(); + $sourceLanguage = $g->getSourceLanguage(); + + // Skip groups that are not requested + if ( $reqGroups && !in_array( $id, $reqGroups ) ) { + unset( $g ); + continue; + } + + $checker = $g->getChecker(); + if ( !$checker ) { + unset( $g ); + continue; + } + + // Initialise messages, using unique definitions if appropriate + $collection = $g->initCollection( $sourceLanguage, true ); + if ( !count( $collection ) ) { + continue; + } + + $this->output( "Working with $id: ", $id ); + + // Skip source language code + $langCodes = $codes; + unset( $langCodes[$sourceLanguage] ); + + $langCodes = array_keys( $langCodes ); + sort( $langCodes ); + + foreach ( $langCodes as $code ) { + $this->output( "$code ", $id ); + + $problematic = []; + + $collection->resetForNewLanguage( $code ); + $collection->loadTranslations(); + + global $wgContLang; + + foreach ( $collection as $key => $message ) { + $prob = $checker->checkMessageFast( $message, $code ); + if ( $prob ) { + if ( $verbose ) { + // Print it + $nsText = $wgContLang->getNsText( $g->namespaces[0] ); + $this->output( "# [[$nsText:$key/$code]]\n" ); + } + + // Add it to the array + $problematic[] = [ $g->namespaces[0], "$key/$code" ]; + } + } + + self::tagFuzzy( $problematic ); + } + } + } + + public static function tagFuzzy( $problematic ) { + if ( !count( $problematic ) ) { + return; + } + + $dbw = wfGetDB( DB_MASTER ); + foreach ( $problematic as $p ) { + $title = Title::makeTitleSafe( $p[0], $p[1] ); + $titleText = $title->getDBkey(); + $res = $dbw->select( 'page', [ 'page_id', 'page_latest' ], + [ 'page_namespace' => $p[0], 'page_title' => $titleText ], __METHOD__ ); + + $inserts = []; + foreach ( $res as $r ) { + $inserts = [ + 'rt_page' => $r->page_id, + 'rt_revision' => $r->page_latest, + 'rt_type' => RevTag::getType( 'fuzzy' ) + ]; + } + $dbw->replace( 'revtag', 'rt_type_page_revision', $inserts, __METHOD__ ); + } + } +} + +$maintClass = CreateCheckIndex::class; +require_once RUN_MAINTENANCE_IF_MAIN; diff --git a/www/wiki/extensions/Translate/scripts/createMessageIndex.php b/www/wiki/extensions/Translate/scripts/createMessageIndex.php new file mode 100644 index 00000000..9005fef4 --- /dev/null +++ b/www/wiki/extensions/Translate/scripts/createMessageIndex.php @@ -0,0 +1,34 @@ +<?php +/** + * Creates a database of keys in all groups, so that namespace and key can be + * used to get the group they belong to. This is used as a fallback when there + * is no other way to know which message group a message belongs to. + * + * @author Niklas Laxström + * @license GPL-2.0-or-later + * @file + */ + +// Standard boilerplate to define $IP +if ( getenv( 'MW_INSTALL_PATH' ) !== false ) { + $IP = getenv( 'MW_INSTALL_PATH' ); +} else { + $dir = __DIR__; + $IP = "$dir/../../.."; +} +require_once "$IP/maintenance/Maintenance.php"; + +class CreateMessageIndex extends Maintenance { + public function __construct() { + parent::__construct(); + $this->mDescription = 'Creates or updates a message index.'; + } + + public function execute() { + MessageGroups::singleton()->recache(); + MessageIndex::singleton()->rebuild(); + } +} + +$maintClass = CreateMessageIndex::class; +require_once RUN_MAINTENANCE_IF_MAIN; diff --git a/www/wiki/extensions/Translate/scripts/expand-groupspec.php b/www/wiki/extensions/Translate/scripts/expand-groupspec.php new file mode 100644 index 00000000..a8627cbb --- /dev/null +++ b/www/wiki/extensions/Translate/scripts/expand-groupspec.php @@ -0,0 +1,58 @@ +<?php +/** + * Script that expands a message group specification (such as page-News*,page-Help*). + * + * @license GPL-2.0-or-later + * @file + */ + +// Standard boilerplate to define $IP +if ( getenv( 'MW_INSTALL_PATH' ) !== false ) { + $IP = getenv( 'MW_INSTALL_PATH' ); +} else { + $dir = __DIR__; + $IP = "$dir/../../.."; +} +require_once "$IP/maintenance/Maintenance.php"; + +class TranslateExpandGroupSpec extends Maintenance { + public function __construct() { + parent::__construct(); + $this->mDescription = 'Expands a message group specification.'; + $this->addOption( + 'exportable', + 'List only groups that can be exported', + false, /*required*/ + false /*has arg*/ + ); + + $this->addArg( + 'specification', + 'For example page-*,main', + true, /*required*/ + false /*has arg*/ + ); + } + + public function execute() { + $spec = $this->getArg( 0 ); + $patterns = explode( ',', trim( $spec ) ); + $ids = MessageGroups::expandWildcards( $patterns ); + + if ( $this->getOption( 'exportable' ) ) { + foreach ( $ids as $index => $id ) { + if ( !MessageGroups::getGroup( $id ) instanceof FileBasedMessageGroup ) { + unset( $ids[ $index ] ); + } + } + } + + if ( $ids !== [] ) { + // This should not be affected by --quiet + echo implode( "\n", $ids ) . "\n"; + } + } +} + +$maintClass = TranslateExpandGroupSpec::class; +require_once RUN_MAINTENANCE_IF_MAIN; diff --git a/www/wiki/extensions/Translate/scripts/export.php b/www/wiki/extensions/Translate/scripts/export.php new file mode 100644 index 00000000..469bd2ac --- /dev/null +++ b/www/wiki/extensions/Translate/scripts/export.php @@ -0,0 +1,301 @@ +<?php +/** + * Script to export translations of one message group to file(s). + * + * @author Niklas Laxström + * @author Siebrand Mazeland + * @copyright Copyright © 2008-2013, Niklas Laxström, Siebrand Mazeland + * @license GPL-2.0-or-later + * @file + */ + +// Standard boilerplate to define $IP +if ( getenv( 'MW_INSTALL_PATH' ) !== false ) { + $IP = getenv( 'MW_INSTALL_PATH' ); +} else { + $dir = __DIR__; + $IP = "$dir/../../.."; +} +require_once "$IP/maintenance/Maintenance.php"; + +class CommandlineExport extends Maintenance { + public function __construct() { + parent::__construct(); + $this->mDescription = 'Message exporter.'; + $this->addOption( + 'group', + 'Comma separated list of group IDs (can use * as wildcard)', + true, /*required*/ + true /*has arg*/ + ); + $this->addOption( + 'lang', + 'Comma separated list of language codes or *', + true, /*required*/ + true /*has arg*/ + ); + $this->addOption( + 'target', + 'Target directory for exported files', + true, /*required*/ + true /*has arg*/ + ); + $this->addOption( + 'skip', + '(optional) Languages to skip, comma separated list', + false, /*required*/ + true /*has arg*/ + ); + $this->addOption( + 'skipgroup', + '(optional) Comma separated list of group IDs that should not be exported', + false, /*required*/ + true /*has arg*/ + ); + $this->addOption( + 'threshold', + '(optional) Do not export under this percentage translated', + false, /*required*/ + true /*has arg*/ + ); + $this->addOption( + 'hours', + '(optional) Only export languages with changes in the last given number of hours', + false, /*required*/ + true /*has arg*/ + ); + $this->addOption( + 'ppgettext', + '(optional) Group root path for checkout of product. "msgmerge" will post ' . + 'process on the export result based on the current source file ' . + 'in that location (from sourcePattern or definitionFile)', + false, /*required*/ + true /*has arg*/ + ); + $this->addOption( + 'no-location', + '(optional) Only used combined with "ppgettext". This option will rebuild ' . + 'the gettext file without location information', + false, /*required*/ + false /*has arg*/ + ); + $this->addOption( + 'no-fuzzy', + '(optional) Do not include any messages marked as fuzzy/outdated', + false, /*required*/ + false /*has arg*/ + ); + $this->addOption( + 'codemaponly', + '(optional) Only export languages that have a codeMap entry', + false, /*required*/ + false /*has arg*/ + ); + } + + public function execute() { + $target = $this->getOption( 'target' ); + if ( !is_writable( $target ) ) { + $this->error( "Target directory is not writable ($target).", 1 ); + } + + $threshold = $this->getOption( 'threshold' ); + $noFuzzy = $this->hasOption( 'no-fuzzy' ); + + $noLocation = ''; + if ( $this->hasOption( 'no-location' ) ) { + $noLocation = '--no-location '; + }; + + $skip = []; + if ( $this->hasOption( 'skip' ) ) { + $skip = array_map( 'trim', explode( ',', $this->getOption( 'skip' ) ) ); + } + + $reqLangs = TranslateUtils::parseLanguageCodes( $this->getOption( 'lang' ) ); + $reqLangs = array_flip( $reqLangs ); + foreach ( $skip as $skipLang ) { + unset( $reqLangs[$skipLang] ); + } + $reqLangs = array_flip( $reqLangs ); + + $codemapOnly = $this->hasOption( 'codemaponly' ); + + $groupIds = explode( ',', trim( $this->getOption( 'group' ) ) ); + $groupIds = MessageGroups::expandWildcards( $groupIds ); + $groups = MessageGroups::getGroupsById( $groupIds ); + + /** @var FileBasedMessageGroup $group */ + foreach ( $groups as $groupId => $group ) { + if ( $group->isMeta() ) { + $this->output( "Skipping meta message group $groupId.\n" ); + unset( $groups[$groupId] ); + continue; + } + + if ( !$group instanceof FileBasedMessageGroup ) { + $this->output( "EE2: Unexportable message group $groupId.\n" ); + unset( $groups[$groupId] ); + continue; + } + } + + if ( !count( $groups ) ) { + $this->error( 'EE1: No valid message groups identified.', 1 ); + } + + $changeFilter = false; + $hours = $this->getOption( 'hours' ); + if ( $hours ) { + $namespaces = []; + + /** @var FileBasedMessageGroup $group */ + foreach ( $groups as $group ) { + $namespaces[$group->getNamespace()] = true; + } + + $namespaces = array_keys( $namespaces ); + $bots = true; + + $changeFilter = []; + $rows = TranslateUtils::translationChanges( $hours, $bots, $namespaces ); + foreach ( $rows as $row ) { + $title = Title::makeTitle( $row->rc_namespace, $row->rc_title ); + $handle = new MessageHandle( $title ); + $code = $handle->getCode(); + if ( !$code ) { + continue; + } + $groupIds = $handle->getGroupIds(); + foreach ( $groupIds as $groupId ) { + $changeFilter[$groupId][$code] = true; + } + } + } + + $skipGroups = []; + if ( $this->hasOption( 'skipgroup' ) ) { + $skipGroups = array_map( 'trim', explode( ',', $this->getOption( 'skipgroup' ) ) ); + } + + foreach ( $groups as $groupId => $group ) { + if ( in_array( $groupId, $skipGroups ) ) { + $this->output( "Group $groupId is in skipgroup.\n" ); + continue; + } + + // No changes to this group at all + if ( is_array( $changeFilter ) && !isset( $changeFilter[$groupId] ) ) { + $this->output( "No recent changes to $groupId.\n" ); + continue; + } + + $langs = $reqLangs; + + if ( $codemapOnly ) { + foreach ( $langs as $index => $code ) { + if ( $group->mapCode( $code ) === $code ) { + unset( $langs[$index] ); + } + } + } + + if ( $threshold ) { + $stats = MessageGroupStats::forGroup( $groupId ); + foreach ( $langs as $index => $code ) { + if ( !isset( $stats[$code] ) ) { + unset( $langs[$index] ); + continue; + } + + $total = $stats[$code][MessageGroupStats::TOTAL]; + $translated = $stats[$code][MessageGroupStats::TRANSLATED]; + if ( $translated / $total * 100 < $threshold ) { + unset( $langs[$index] ); + } + } + } + + // Filter out unchanged languages from requested languages + if ( is_array( $changeFilter ) ) { + $langs = array_intersect( $langs, array_keys( $changeFilter[$groupId] ) ); + } + + if ( !count( $langs ) ) { + continue; + } + + $this->output( "Exporting $groupId...\n" ); + + $ffs = $group->getFFS(); + $ffs->setWritePath( $target ); + $sourceLanguage = $group->getSourceLanguage(); + $collection = $group->initCollection( $sourceLanguage ); + + $definitionFile = false; + + if ( $this->hasOption( 'ppgettext' ) && $ffs instanceof GettextFFS ) { + global $wgMaxShellMemory, $wgTranslateGroupRoot; + + // Need more shell memory for msgmerge. + $wgMaxShellMemory = 402400; + + $path = $group->getSourceFilePath( $sourceLanguage ); + $definitionFile = str_replace( + $wgTranslateGroupRoot, + $this->getOption( 'ppgettext' ), + $path + ); + } + + $whitelist = $group->getTranslatableLanguages(); + + foreach ( $langs as $lang ) { + // Do not export languages that are blacklisted (or not whitelisted). + // Also check that whitelist is not null, which means that all + // languages are allowed for translation and export. + if ( is_array( $whitelist ) && !isset( $whitelist[$lang] ) ) { + continue; + } + + $collection->resetForNewLanguage( $lang ); + $collection->loadTranslations(); + // Don't export ignored, unless it is the source language + // or message documentation + global $wgTranslateDocumentationLanguageCode; + if ( $lang !== $wgTranslateDocumentationLanguageCode + && $lang !== $sourceLanguage + ) { + $collection->filter( 'ignored' ); + } + + if ( $noFuzzy ) { + $collection->filter( 'fuzzy' ); + } + + $ffs->write( $collection ); + + // Do post processing if requested. + if ( $definitionFile ) { + if ( is_file( $definitionFile ) ) { + $targetFileName = $ffs->getWritePath() . + '/' . $group->getTargetFilename( $collection->code ); + $cmd = 'msgmerge --quiet ' . $noLocation . '--output-file=' . + $targetFileName . ' ' . $targetFileName . ' ' . $definitionFile; + wfShellExec( $cmd, $ret ); + + // Report on errors. + if ( $ret ) { + $this->error( "ERROR: $ret" ); + } + } else { + $this->error( "$definitionFile does not exist.", 1 ); + } + } + } + } + } +} + +$maintClass = CommandlineExport::class; +require_once RUN_MAINTENANCE_IF_MAIN; diff --git a/www/wiki/extensions/Translate/scripts/fallbacks-graph.php b/www/wiki/extensions/Translate/scripts/fallbacks-graph.php new file mode 100644 index 00000000..e625467b --- /dev/null +++ b/www/wiki/extensions/Translate/scripts/fallbacks-graph.php @@ -0,0 +1,87 @@ +<?php +/** + * Script for creating graphml xml file of language fallbacks. + * + * @author Niklas Laxström + * + * @license GPL-2.0-or-later + * @file + */ + +// Standard boilerplate to define $IP +if ( getenv( 'MW_INSTALL_PATH' ) !== false ) { + $IP = getenv( 'MW_INSTALL_PATH' ); +} else { + $dir = __DIR__; + $IP = "$dir/../../.."; +} +require_once "$IP/maintenance/Maintenance.php"; + +/// Creates graphml xml file of language fallbacks. +class FallbacksCompare extends Maintenance { + public function __construct() { + parent::__construct(); + $this->mDescription = 'Creates graphml xml file of language fallbacks.'; + } + + public function execute() { + $template = <<<XML +<?xml version="1.0" encoding="UTF-8"?> +<graphml + xmlns="http://graphml.graphdrawing.org/xmlns" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns + http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd" + xmlns:y="http://www.yworks.com/xml/graphml"> + + <key id="code" for="node" yfiles.type="nodegraphics"/> + <graph id="G" edgedefault="directed"> +$1 + </graph> +</graphml> + +XML; + + $langs = Language::fetchLanguageNames( null, 'mw' ); + $nodes = $edges = []; + foreach ( $langs as $code => $name ) { + $fallbacks = Language::getFallbacksFor( $code ); + if ( $fallbacks === [ 'en' ] ) { + continue; + } + + $nodes[$code] = $this->createNode( $code ); + + $prev = $code; + foreach ( $fallbacks as $fb ) { + $nodes[$fb] = $this->createNode( $fb ); + $edges[$fb . $prev] = Xml::element( 'edge', [ 'source' => $prev, 'target' => $fb ] ); + $prev = $fb; + } + } + + $output = array_merge( $nodes, $edges ); + $output = "\t\t" . implode( "\n\t\t", $output ); + echo str_replace( '$1', $output, $template ); + } + + protected function createNode( $code ) { + return Xml::openElement( 'node', [ 'id' => $code ] ) + . Xml::openElement( 'data', [ 'key' => 'code' ] ) + . Xml::openElement( 'y:Shapenode' ) + . Xml::element( + 'y:Geometry', + [ 'height' => 30, 'width' => max( 30, 10 * strlen( $code ) ) ], + '' + ) + . Xml::element( 'y:NodeLabel', [], $code ) + . Xml::element( 'y:BorderStyle', [ 'hasColor' => 'false' ], '' ) + . Xml::element( 'y:Fill', [ 'hasColor' => 'false' ], '' ) + . Xml::closeElement( 'y:Shapenode' ) + . Xml::closeElement( 'data' ) + . Xml::closeElement( 'node' ); + } +} + +$maintClass = FallbacksCompare::class; +require_once RUN_MAINTENANCE_IF_MAIN; diff --git a/www/wiki/extensions/Translate/scripts/fuzzy.php b/www/wiki/extensions/Translate/scripts/fuzzy.php new file mode 100644 index 00000000..30aff9c9 --- /dev/null +++ b/www/wiki/extensions/Translate/scripts/fuzzy.php @@ -0,0 +1,300 @@ +<?php +/** + * Command line script to mark translations fuzzy (similar to gettext fuzzy). + * + * @file + * @author Niklas Laxström + * @author Siebrand Mazeland + * @copyright Copyright © 2007-2013, Niklas Laxström, Siebrand Mazeland + * @license GPL-2.0-or-later + */ + +// Standard boilerplate to define $IP +if ( getenv( 'MW_INSTALL_PATH' ) !== false ) { + $IP = getenv( 'MW_INSTALL_PATH' ); +} else { + $dir = __DIR__; + $IP = "$dir/../../.."; +} +require_once "$IP/maintenance/Maintenance.php"; + +# Override the memory limit for wfShellExec, 100 MB appears to be too little +$wgMaxShellMemory = 1024 * 200; + +class Fuzzy extends Maintenance { + public function __construct() { + parent::__construct(); + $this->mDescription = 'Fuzzy bot command line script.'; + $this->addArg( + 'arg', + 'Title pattern or username if user option is provided.' + ); + $this->addOption( + 'really', + '(optional) Really fuzzy, no dry-run' + ); + $this->addOption( + 'skiplanguages', + '(optional) Skip some languages (comma separated)', + false, /*required*/ + true /*has arg*/ + ); + $this->addOption( + 'comment', + '(optional) Comment for updating', + false, /*required*/ + true /*has arg*/ + ); + $this->addOption( + 'user', + '(optional) Fuzzy the translations made by user given as an argument.', + false, /*required*/ + false /*has arg*/ + ); + } + + public function execute() { + $skipLanguages = []; + if ( $this->hasOption( 'skiplanguages' ) ) { + $skipLanguages = array_map( + 'trim', + explode( ',', $this->getOption( 'skiplanguages' ) ) + ); + } + + if ( $this->hasOption( 'user' ) ) { + $user = User::newFromName( $this->getArg( 0 ) ); + $pages = FuzzyScript::getPagesForUser( $user, $skipLanguages ); + } else { + $pages = FuzzyScript::getPagesForPattern( $this->getArg( 0 ), $skipLanguages ); + } + + $bot = new FuzzyScript( $pages ); + $bot->comment = $this->getOption( 'comment' ); + $bot->dryrun = !$this->hasOption( 'really' ); + $bot->setProgressCallback( [ $this, 'myOutput' ] ); + $bot->execute(); + } + + /** + * Public alternative for protected Maintenance::output() as we need to get + * messages from the ChangeSyncer class to the commandline. + * @param string $text The text to show to the user + * @param string|null $channel Unique identifier for the channel. + * @param bool $error Whether this is an error message + */ + public function myOutput( $text, $channel = null, $error = false ) { + if ( $error ) { + $this->error( $text, $channel ); + } else { + $this->output( $text, $channel ); + } + } +} + +/** + * Class for marking translation fuzzy. + */ +class FuzzyScript { + /** + * @var bool Check for configuration problems. + */ + private $allclear = false; + + /** @var callable Function to report progress updates */ + protected $progressCallback; + + /** + * @var bool Dont do anything unless confirmation is given + */ + public $dryrun = true; + + /** + * @var string Edit summary. + */ + public $comment; + + /** + * @param array $pages + */ + public function __construct( $pages ) { + $this->pages = $pages; + $this->allclear = true; + } + + public function setProgressCallback( $callback ) { + $this->progressCallback = $callback; + } + + /// @see Maintenance::output for param docs + protected function reportProgress( $text, $channel, $severity = 'status' ) { + if ( is_callable( $this->progressCallback ) ) { + $useErrorOutput = $severity === 'error'; + call_user_func( $this->progressCallback, $text, $channel, $useErrorOutput ); + } + } + + public function execute() { + if ( !$this->allclear ) { + return; + } + + $msgs = $this->pages; + $count = count( $msgs ); + $this->reportProgress( "Found $count pages to update.", 'pagecount' ); + + foreach ( $msgs as $phpIsStupid ) { + list( $title, $text ) = $phpIsStupid; + $this->updateMessage( $title, TRANSLATE_FUZZY . $text, $this->dryrun, $this->comment ); + unset( $phpIsStupid ); + } + } + + /// Searches pages that match given patterns + public static function getPagesForPattern( $pattern, $skipLanguages = [] ) { + global $wgTranslateMessageNamespaces; + $dbr = wfGetDB( DB_REPLICA ); + + $search = []; + foreach ( (array)$pattern as $title ) { + $title = Title::newFromText( $title ); + $ns = $title->getNamespace(); + if ( !isset( $search[$ns] ) ) { + $search[$ns] = []; + } + $search[$ns][] = 'page_title' . $dbr->buildLike( $title->getDBkey(), $dbr->anyString() ); + } + + $title_conds = []; + foreach ( $search as $ns => $names ) { + if ( $ns === NS_MAIN ) { + $ns = $wgTranslateMessageNamespaces; + } + $titles = $dbr->makeList( $names, LIST_OR ); + $title_conds[] = $dbr->makeList( [ 'page_namespace' => $ns, $titles ], LIST_AND ); + } + + $conds = [ + 'page_latest=rev_id', + 'rev_text_id=old_id', + $dbr->makeList( $title_conds, LIST_OR ), + ]; + + if ( count( $skipLanguages ) ) { + $skiplist = $dbr->makeList( $skipLanguages ); + $conds[] = "substring_index(page_title, '/', -1) NOT IN ($skiplist)"; + } + + $rows = $dbr->select( + [ 'page', 'revision', 'text' ], + [ 'page_title', 'page_namespace', 'old_text', 'old_flags' ], + $conds, + __METHOD__ + ); + + $messagesContents = []; + foreach ( $rows as $row ) { + $title = Title::makeTitle( $row->page_namespace, $row->page_title ); + $messagesContents[] = [ $title, Revision::getRevisionText( $row ) ]; + } + + $rows->free(); + + return $messagesContents; + } + + public static function getPagesForUser( User $user, $skipLanguages = [] ) { + global $wgTranslateMessageNamespaces; + $dbr = wfGetDB( DB_REPLICA ); + + if ( class_exists( ActorMigration::class ) ) { + $revWhere = ActorMigration::newMigration()->getWhere( $dbr, 'rev_user', $user ); + } else { + $revWhere = [ + 'tables' => [], + 'conds' => 'rev_user = ' . (int)$user->getId(), + 'joins' => [], + ]; + } + + $conds = [ + $revWhere['conds'], + 'page_namespace' => $wgTranslateMessageNamespaces, + 'page_title' . $dbr->buildLike( $dbr->anyString(), '/', $dbr->anyString() ), + ]; + + if ( count( $skipLanguages ) ) { + $skiplist = $dbr->makeList( $skipLanguages ); + $conds[] = "substring_index(page_title, '/', -1) NOT IN ($skiplist)"; + } + + $rows = $dbr->select( + [ 'page', 'revision', 'text' ] + $revWhere['tables'], + [ 'page_title', 'page_namespace', 'old_text', 'old_flags' ], + $conds, + __METHOD__, + [], + [ + 'revision' => [ 'JOIN', 'page_latest=rev_id' ], + 'text' => [ 'JOIN', 'rev_text_id=old_id' ], + ] + $revWhere['joins'] + ); + + $messagesContents = []; + foreach ( $rows as $row ) { + $title = Title::makeTitle( $row->page_namespace, $row->page_title ); + $messagesContents[] = [ $title, Revision::getRevisionText( $row ) ]; + } + + $rows->free(); + + return $messagesContents; + } + + /** + * Does the actual edit if possible. + * @param Title $title + * @param string $text + * @param bool $dryrun Whether to really do it or just show what would be done. + * @param string $comment Edit summary. + */ + private function updateMessage( $title, $text, $dryrun, $comment = null ) { + global $wgTranslateDocumentationLanguageCode; + + $this->reportProgress( "Updating {$title->getPrefixedText()}... ", $title ); + if ( !$title instanceof Title ) { + $this->reportProgress( 'INVALID TITLE!', $title ); + + return; + } + + $items = explode( '/', $title->getText(), 2 ); + if ( isset( $items[1] ) && $items[1] === $wgTranslateDocumentationLanguageCode ) { + $this->reportProgress( 'IGNORED!', $title ); + + return; + } + + if ( $dryrun ) { + $this->reportProgress( 'DRY RUN!', $title ); + + return; + } + + $wikipage = new WikiPage( $title ); + $content = ContentHandler::makeContent( $text, $title ); + $status = $wikipage->doEditContent( + $content, + $comment ?: 'Marking as fuzzy', + EDIT_FORCE_BOT | EDIT_UPDATE, + false, /*base revision id*/ + FuzzyBot::getUser() + ); + + $success = $status === true || ( is_object( $status ) && $status->isOK() ); + $this->reportProgress( $success ? 'OK' : 'FAILED', $title ); + } +} + +$maintClass = Fuzzy::class; +require_once RUN_MAINTENANCE_IF_MAIN; diff --git a/www/wiki/extensions/Translate/scripts/groupStatistics.php b/www/wiki/extensions/Translate/scripts/groupStatistics.php new file mode 100644 index 00000000..cc685e0f --- /dev/null +++ b/www/wiki/extensions/Translate/scripts/groupStatistics.php @@ -0,0 +1,681 @@ +<?php +/** + * Commandline script to general statistics about the localisation level of + * one or more message groups. + * + * @file + * @ingroup Script Stats + * @author Niklas Laxström + * @author Siebrand Mazeland + * @copyright Copyright © 2007-2013, Niklas Laxström, Siebrand Mazeland + * @license GPL-2.0-or-later + */ + +// Standard boilerplate to define $IP +if ( getenv( 'MW_INSTALL_PATH' ) !== false ) { + $IP = getenv( 'MW_INSTALL_PATH' ); +} else { + $dir = __DIR__; + $IP = "$dir/../../.."; +} +require_once "$IP/maintenance/Maintenance.php"; + +class GroupStatistics extends Maintenance { + /** + * Array of the most spoken languages in the world. + * Source: https://stats.wikimedia.org/EN/Sitemap.htm. + * + * Key value pairs of: + * [MediaWiki localisation code] => array( + * [position in top 50], + * [speakers in millions], + * [continent where localisation is spoken] + * ) + */ + public $mostSpokenLanguages = [ + 'en' => [ 1, 1500, 'multiple' ], + 'zh-hans' => [ 2, 1300, 'asia' ], + 'zh-hant' => [ 2, 1300, 'asia' ], + 'hi' => [ 3, 550, 'asia' ], + 'ar' => [ 4, 530, 'multiple' ], + 'es' => [ 5, 500, 'multiple' ], + 'ms' => [ 6, 300, 'asia' ], + 'pt' => [ 7, 290, 'multiple' ], + 'pt-br' => [ 7, 290, 'america' ], + 'ru' => [ 8, 278, 'multiple' ], + 'id' => [ 9, 250, 'asia' ], + 'bn' => [ 10, 230, 'asia' ], + 'fr' => [ 11, 200, 'multiple' ], + 'de' => [ 12, 185, 'europe' ], + 'ja' => [ 13, 132, 'asia' ], + 'fa' => [ 14, 107, 'asia' ], + 'pnb' => [ 15, 104, 'asia' ], // Most spoken variant + 'tl' => [ 16, 90, 'asia' ], + 'mr' => [ 17, 90, 'asia' ], + 'vi' => [ 18, 80, 'asia' ], + 'jv' => [ 19, 80, 'asia' ], + 'te' => [ 20, 80, 'asia' ], + 'ko' => [ 21, 78, 'asia' ], + 'wuu' => [ 22, 77, 'asia' ], + 'arz' => [ 23, 76, 'africa' ], + 'th' => [ 24, 73, 'asia' ], + 'yue' => [ 25, 71, 'asia' ], + 'tr' => [ 26, 70, 'multiple' ], + 'it' => [ 27, 70, 'europe' ], + 'ta' => [ 28, 66, 'asia' ], + 'ur' => [ 29, 60, 'asia' ], + 'my' => [ 30, 52, 'asia' ], + 'sw' => [ 31, 50, 'africa' ], + 'nan' => [ 32, 49, 'asia' ], + 'kn' => [ 33, 47, 'asia' ], + 'gu' => [ 34, 46, 'asia' ], + 'uk' => [ 35, 45, 'europe' ], + 'pl' => [ 36, 43, 'europe' ], + 'sd' => [ 37, 41, 'asia' ], + 'ha' => [ 38, 39, 'africa' ], + 'ml' => [ 39, 37, 'asia' ], + 'gan-hans' => [ 40, 35, 'asia' ], + 'gan-hant' => [ 40, 35, 'asia' ], + 'hak' => [ 41, 34, 'asia' ], + 'or' => [ 42, 31, 'asia' ], + 'ne' => [ 43, 30, 'asia' ], + 'ro' => [ 44, 28, 'europe' ], + 'su' => [ 45, 27, 'asia' ], + 'az' => [ 46, 27, 'asia' ], + 'nl' => [ 47, 27, 'europe' ], + 'zu' => [ 48, 26, 'africa' ], + 'ps' => [ 49, 26, 'asia' ], + 'ckb' => [ 50, 26, 'asia' ], + 'ku-latn' => [ 50, 26, 'asia' ], + ]; + + /** + * Variable with key-value pairs with a named index and an array of key-value + * pairs where the key is a MessageGroup ID and the value is a weight of the + * group in the sum of the values for all the groups in the array. + * + * Definitions in this variable can be used to report weighted meta localisation + * scores for the 50 most spoken languages. + * + * @todo Allow weighted reporting for all available languages. + */ + public $localisedWeights = [ + 'wikimedia' => [ + // 'core-0-mostused' => 40, + 'core' => 50, + 'ext-0-wikimedia' => 50 + ], + 'fundraiser' => [ + 'ext-di-di' => 16, + 'ext-di-pfpg' => 84, + ], + 'mediawiki' => [ + // 'core-0-mostused' => 30, + 'core' => 50, + 'ext-0-wikimedia' => 25, + 'ext-0-all' => 25 + ] + ]; + + /** + * Code map to map localisation codes to Wikimedia project codes. Only + * exclusion and remapping is defined here. It is assumed that the first part + * of the localisation code is the WMF project name otherwise (zh-hans -> zh). + */ + public $wikimediaCodeMap = [ + // Codes containing a dash + 'bat-smg' => 'bat-smg', + 'cbk-zam' => 'cbk-zam', + 'map-bms' => 'map-bms', + 'nds-nl' => 'nds-nl', + 'roa-rup' => 'roa-rup', + 'roa-tara' => 'roa-tara', + + // Remaps + 'be-tarask' => 'be-x-old', + 'gsw' => 'als', + 'ike-cans' => 'iu', + 'ike-latn' => 'iu', + 'lzh' => 'zh-classical', + 'nan' => 'zh-min-nan', + 'vro' => 'fiu-vro', + 'yue' => 'zh-yue', + + // Ignored language codes. See reason. + 'als' => '', // gsw + 'be-x-old' => '', // be-tarask + 'crh' => '', // crh-* + 'de-at' => '', // de + 'de-ch' => '', // de + 'de-formal' => '', // de, not reporting formal form + 'dk' => '', // da + 'en-au' => '', // en + 'en-ca' => '', // no MW code + 'en-gb' => '', // no MW code + 'es-419' => '', // no MW code + 'fiu-vro' => '', // vro + 'gan' => '', // gan-* + 'got' => '', // extinct. not reporting formal form + 'hif' => '', // hif-* + 'hu-formal' => '', // not reporting + 'iu' => '', // ike-* + 'kk' => '', // kk-* + 'kk-cn' => '', // kk-arab + 'kk-kz' => '', // kk-cyrl + 'kk-tr' => '', // kk-latn + 'ko-kp' => '', // ko + 'ku' => '', // ku-* + 'ku-arab' => '', // ckb + 'nb' => '', // no + 'nl-be' => '', // no MW code + 'nl-informal' => '', // nl, not reporting informal form + 'ruq' => '', // ruq-* + 'simple' => '', // en + 'sr' => '', // sr-* + 'tg' => '', // tg-* + 'tp' => '', // tokipona + 'tt' => '', // tt-* + 'ug' => '', // ug-* + 'zh' => '', // zh-* + 'zh-classical' => '', // lzh + 'zh-cn' => '', // zh + 'zh-sg' => '', // zh + 'zh-hk' => '', // zh + 'zh-min-nan' => '', // nan + 'zh-mo' => '', // zh + 'zh-my' => '', // zh + 'zh-tw' => '', // zh + 'zh-yue' => '', // yue + ]; + + public function __construct() { + parent::__construct(); + $this->mDescription = 'Script to generate statistics about the localisation ' . + 'level of one or more message groups.'; + $this->addOption( + 'groups', + '(optional) Comma separated list of groups', + false, /*required*/ + true /*has arg*/ + ); + $this->addOption( + 'output', + '(optional) csv: Comma Separated Values, wiki: MediaWiki syntax, ' . + 'text: Text with tabs. Default: default', + false, /*required*/ + true /*has arg*/ + ); + $this->addOption( + 'skiplanguages', + '(optional) Comma separated list of languages to be skipped', + false, /*required*/ + true /*has arg*/ + ); + $this->addOption( + 'skipzero', + '(optional) Skip languages that do not have any localisation at all' + ); + $this->addOption( + 'legenddetail', + '(optional) Page name for legend to be transcluded at the top of the details table', + false, /*required*/ + true /*has arg*/ + ); + $this->addOption( + 'legendsummary', + '(optional) Page name for legend to be transcluded at the top of the summary table', + false, /*required*/ + true /*has arg*/ + ); + $this->addOption( + 'fuzzy', + '(optional) Add column for fuzzy counts' + ); + $this->addOption( + 'speakers', + '(optional) Add column for number of speakers (est.). ' . + 'Only valid when combined with "most"' + ); + $this->addOption( + 'nol10n', + '(optional) Do not add localised language name if I18ntags is installed' + ); + $this->addOption( + 'continent', + '(optional) Add a continent column. Only available when output is ' . + '"wiki" or not specified.' + ); + $this->addOption( + 'summary', + '(optional) Add a summary with counts and scores per continent category ' . + 'and totals. Only available for a valid "most" value.', + false, /*required*/ + true /*has arg*/ + ); + $this->addOption( + 'wmfscore', + 'Only output WMF language code and weighted score for all ' . + 'language codes for weighing group "wikimedia" in CSV. This ' . + 'report must keep a stable layout as it is used/will be ' . + 'used in the Wikimedia statistics.' + ); + $this->addOption( + 'most', + '(optional) "mediawiki" or "wikimedia". Report on the 50 most ' . + 'spoken languages. Skipzero is ignored. If a valid scope is ' . + 'defined, the group list and fuzzy are ignored and the ' . + 'localisation levels are weighted and reported.', + false, /*required*/ + true /*has arg*/ + ); + } + + public function execute() { + $output = $this->getOption( 'output', 'default' ); + + // Select an output engine + switch ( $output ) { + case 'wiki': + $out = new WikiStatsOutput(); + break; + case 'text': + $out = new TextStatsOutput(); + break; + case 'csv': + $out = new CsvStatsOutput(); + break; + default: + $out = new TranslateStatsOutput(); + } + + $skipLanguages = []; + if ( $this->hasOption( 'skiplanguages' ) ) { + $skipLanguages = array_map( + 'trim', + explode( ',', $this->getOption( 'skiplanguages' ) ) + ); + } + + $reportScore = false; + // Check if score should be reported and prepare weights + $most = $this->getOption( 'most' ); + $weights = []; + if ( $most && isset( $this->localisedWeights[$most] ) ) { + $reportScore = true; + + foreach ( $this->localisedWeights[$most] as $weight ) { + $weights[] = $weight; + } + } + + // check if l10n should be done + $l10n = false; + if ( ( $output === 'wiki' || $output === 'default' ) && + !$this->hasOption( 'nol10n' ) + ) { + $l10n = true; + } + + $wmfscore = $this->hasOption( 'wmfscore' ); + + // Get groups from input + $groups = []; + if ( $reportScore ) { + $reqGroups = array_keys( $this->localisedWeights[$most] ); + } elseif ( $wmfscore ) { + $reqGroups = array_keys( $this->localisedWeights['wikimedia'] ); + } else { + $reqGroups = array_map( 'trim', explode( ',', $this->getOption( 'groups' ) ) ); + } + + // List of all groups + $allGroups = MessageGroups::singleton()->getGroups(); + + // Get list of valid groups + foreach ( $reqGroups as $id ) { + // Page translation group ids use spaces which are not nice on command line + $id = str_replace( '_', ' ', $id ); + if ( isset( $allGroups[$id] ) ) { + $groups[$id] = $allGroups[$id]; + } else { + $this->output( "Unknown group: $id" ); + } + } + + if ( $wmfscore ) { + // Override/set parameters + $out = new CsvStatsOutput(); + $reportScore = true; + + $weights = []; + foreach ( $this->localisedWeights['wikimedia'] as $weight ) { + $weights[] = $weight; + } + $wmfscores = []; + } + + if ( !count( $groups ) ) { + $this->error( 'No groups given', true ); + } + + // List of all languages. + $languages = TranslateUtils::getLanguageNames( null ); + // Default sorting order by language code, users can sort wiki output. + ksort( $languages ); + + if ( $this->hasOption( 'legenddetail' ) ) { + $out->addFreeText( '{{' . $this->getOption( 'legenddetail' ) . "}}\n" ); + } + + $totalWeight = 0; + if ( $reportScore ) { + if ( $wmfscore ) { + foreach ( $this->localisedWeights['wikimedia'] as $weight ) { + $totalWeight += $weight; + } + } else { + foreach ( $this->localisedWeights[$most] as $weight ) { + $totalWeight += $weight; + } + } + } + + $showContinent = $this->getOption( 'continent' ); + if ( !$wmfscore ) { + // Output headers + $out->heading(); + + $out->blockstart(); + + if ( $most ) { + $out->element( ( $l10n ? '{{int:translate-gs-pos}}' : 'Pos.' ), true ); + } + + $out->element( ( $l10n ? '{{int:translate-gs-code}}' : 'Code' ), true ); + $out->element( ( $l10n ? '{{int:translate-page-language}}' : 'Language' ), true ); + if ( $showContinent ) { + $out->element( ( $l10n ? '{{int:translate-gs-continent}}' : 'Continent' ), true ); + } + + if ( $most && $this->hasOption( 'speakers' ) ) { + $out->element( ( $l10n ? '{{int:translate-gs-speakers}}' : 'Speakers' ), true ); + } + + if ( $reportScore ) { + $out->element( + ( $l10n ? '{{int:translate-gs-score}}' : 'Score' ) . ' (' . $totalWeight . ')', + true + ); + } + + /** + * @var $g MessageGroup + */ + foreach ( $groups as $g ) { + // Add unprocessed description of group as heading + if ( $reportScore ) { + $gid = $g->getId(); + $heading = $g->getLabel() . ' (' . $this->localisedWeights[$most][$gid] . ')'; + } else { + $heading = $g->getLabel(); + } + $out->element( $heading, true ); + if ( !$reportScore && $this->hasOption( 'fuzzy' ) ) { + $out->element( ( $l10n ? '{{int:translate-percentage-fuzzy}}' : 'Fuzzy' ), true ); + } + } + + $out->blockend(); + } + + $rows = []; + foreach ( $languages as $code => $name ) { + // Skip list + if ( in_array( $code, $skipLanguages ) ) { + continue; + } + $rows[$code] = []; + } + + foreach ( $groups as $groupName => $g ) { + $stats = MessageGroupStats::forGroup( $groupName ); + + // Perform the statistic calculations on every language + foreach ( $languages as $code => $name ) { + // Skip list + if ( !$most && in_array( $code, $skipLanguages ) ) { + continue; + } + + // Do not calculate if we do not need it for anything. + if ( $wmfscore && isset( $this->wikimediaCodeMap[$code] ) + && $this->wikimediaCodeMap[$code] === '' + ) { + continue; + } + + // If --most is set, skip all other + if ( $most && !isset( $this->mostSpokenLanguages[$code] ) ) { + continue; + } + + $total = $stats[$code][MessageGroupStats::TOTAL]; + $translated = $stats[$code][MessageGroupStats::TRANSLATED]; + $fuzzy = $stats[$code][MessageGroupStats::FUZZY]; + + $rows[$code][] = [ false, $translated, $total ]; + + if ( $this->hasOption( 'fuzzy' ) ) { + $rows[$code][] = [ true, $fuzzy, $total ]; + } + } + + unset( $collection ); + } + + // init summary array + $summarise = false; + if ( $this->hasOption( 'summary' ) ) { + $summarise = true; + $summary = []; + } + + foreach ( $languages as $code => $name ) { + // Skip list + if ( !$most && in_array( $code, $skipLanguages ) ) { + continue; + } + + // Skip unneeded + if ( $wmfscore && isset( $this->wikimediaCodeMap[$code] ) + && $this->wikimediaCodeMap[$code] === '' + ) { + continue; + } + + // If --most is set, skip all other + if ( $most && !isset( $this->mostSpokenLanguages[$code] ) ) { + continue; + } + + $columns = $rows[$code]; + + $allZero = true; + foreach ( $columns as $fields ) { + if ( (int)$fields[1] !== 0 ) { + $allZero = false; + } + } + + // Skip dummy languages if requested + if ( $allZero && $this->hasOption( 'skipzero' ) ) { + continue; + } + + // Output the row + if ( !$wmfscore ) { + $out->blockstart(); + } + + // Fill language position field + if ( $most ) { + $out->element( $this->mostSpokenLanguages[$code][0] ); + } + + // Fill language name field + if ( !$wmfscore ) { + // Fill language code field + $out->element( $code ); + + if ( $l10n && function_exists( 'efI18nTagsInit' ) ) { + $out->element( '{{#languagename:' . $code . '}}' ); + } else { + $out->element( $name ); + } + } + + // Fill continent field + if ( $showContinent ) { + if ( $this->mostSpokenLanguages[$code][2] === 'multiple' ) { + $continent = ( $l10n ? '{{int:translate-gs-multiple}}' : 'Multiple' ); + } else { + $continent = $l10n ? + '{{int:timezoneregion-' . $this->mostSpokenLanguages[$code][2] . '}}' : + ucfirst( $this->mostSpokenLanguages[$code][2] ); + } + + $out->element( $continent ); + } + + // Fill speakers field + if ( $most && $this->hasOption( 'speakers' ) ) { + $out->element( number_format( $this->mostSpokenLanguages[$code][1] ) ); + } + + // Fill the score field + if ( $reportScore ) { + // Keep count + $i = 0; + // Start with 0 points + $score = 0; + + foreach ( $columns as $fields ) { + list( , $upper, $total ) = $fields; + // Weigh the score and add it to the current score + $score += ( $weights[$i] * $upper ) / $total; + $i++; + } + + // Report a round numbers + $score = number_format( $score, 0 ); + + if ( $summarise ) { + $continent = $this->mostSpokenLanguages[$code][2]; + if ( isset( $summary[$continent] ) ) { + $newcount = $summary[$continent][0] + 1; + $newscore = $summary[$continent][1] + (int)$score; + } else { + $newcount = 1; + $newscore = $score; + } + + $summary[$continent] = [ $newcount, $newscore ]; + } + + if ( $wmfscore ) { + // Multiple variants can be used for the same wiki. + // Store the scores in an array and output them later + // when they can be averaged. + if ( isset( $this->wikimediaCodeMap[$code] ) ) { + $wmfcode = $this->wikimediaCodeMap[$code]; + } else { + $codeparts = explode( '-', $code ); + $wmfcode = $codeparts[0]; + } + + if ( isset( $wmfscores[$wmfcode] ) ) { + $count = $wmfscores[$wmfcode]['count'] + 1; + $tmpWmfScore = (int)$wmfscores[$wmfcode]['score']; + $tmpWmfCount = (int)$wmfscores[$wmfcode]['count']; + $score = ( ( $tmpWmfCount * $tmpWmfScore ) + (int)$score ) / $count; + $wmfscores[$wmfcode] = [ 'score' => $score, 'count' => $count ]; + } else { + $wmfscores[$wmfcode] = [ 'score' => $score, 'count' => 1 ]; + } + } else { + $out->element( $score ); + } + } + + // Fill fields for groups + if ( !$wmfscore ) { + foreach ( $columns as $fields ) { + list( $invert, $upper, $total ) = $fields; + $c = $out->formatPercent( $upper, $total, $invert ); + $out->element( $c ); + } + + $out->blockend(); + } + } + + $out->footer(); + + if ( $reportScore && $this->hasOption( 'summary' ) ) { + if ( $reportScore && $this->hasOption( 'legendsummary' ) ) { + $out->addFreeText( '{{' . $this->getOption( 'legendsummary' ) . "}}\n" ); + } + + $out->summaryheading(); + + $out->blockstart(); + + $out->element( $l10n ? '{{int:translate-gs-continent}}' : 'Continent', true ); + $out->element( $l10n ? '{{int:translate-gs-count}}' : 'Count', true ); + $out->element( $l10n ? '{{int:translate-gs-avgscore}}' : 'Avg. score', true ); + + $out->blockend(); + + ksort( $summary ); + + $totals = [ 0, 0 ]; + + foreach ( $summary as $key => $values ) { + $out->blockstart(); + + if ( $key === 'multiple' ) { + $out->element( $l10n ? '{{int:translate-gs-multiple}}' : 'Multiple' ); + } else { + $out->element( $l10n ? '{{int:timezoneregion-' . $key . '}}' : ucfirst( $key ) ); + } + $out->element( $values[0] ); + $out->element( number_format( $values[1] / $values[0] ) ); + + $out->blockend(); + + $totals[0] += $values[0]; + $totals[1] += $values[1]; + } + + $out->blockstart(); + $out->element( $l10n ? '{{int:translate-gs-total}}' : 'Total' ); + $out->element( $totals[0] ); + $out->element( number_format( $totals[1] / $totals[0] ) ); + $out->blockend(); + + $out->footer(); + } + + // Custom output + if ( $wmfscore ) { + ksort( $wmfscores ); + + foreach ( $wmfscores as $code => $stats ) { + echo $code . ';' . number_format( $stats['score'] ) . ";\n"; + } + } + } +} + +$maintClass = GroupStatistics::class; +require_once RUN_MAINTENANCE_IF_MAIN; diff --git a/www/wiki/extensions/Translate/scripts/languageeditstats.php b/www/wiki/extensions/Translate/scripts/languageeditstats.php new file mode 100644 index 00000000..7f67a1c4 --- /dev/null +++ b/www/wiki/extensions/Translate/scripts/languageeditstats.php @@ -0,0 +1,107 @@ +<?php +/** + * Shows a top list of language codes with edits in a given time period + * + * @author Niklas Laxström + * @author Siebrand Mazeland + * @copyright Copyright © 2008-2010 Niklas Laxström, Siebrand Mazeland + * @license GPL-2.0-or-later + * @file + * @ingroup Script Stats + */ + +// Standard boilerplate to define $IP +if ( getenv( 'MW_INSTALL_PATH' ) !== false ) { + $IP = getenv( 'MW_INSTALL_PATH' ); +} else { + $dir = __DIR__; + $IP = "$dir/../../.."; +} +require_once "$IP/maintenance/Maintenance.php"; + +class Languageeditstats extends Maintenance { + public function __construct() { + parent::__construct(); + $this->mDescription = 'Script to show number of edits per language for all message groups.'; + $this->addOption( + 'top', + '(optional) Show given number of language codes (default: 10)', + false, /*required*/ + true /*has arg*/ + ); + $this->addOption( + 'days', + '(optional) Calculate for given number of days (default: 7)', + false, /*required*/ + true /*has arg*/ + ); + $this->addOption( + 'bots', + '(optional) Include bot edits' + ); + $this->addOption( + 'ns', + '(optional) Comma separated list of namespace IDs', + false, /*required*/ + true /*has arg*/ + ); + } + + public function execute() { + $hours = ( $this->getOption( 'days' ) ?: 7 ) * 24; + $top = (int)$this->getOption( 'top' ) ?: 10; + $bots = $this->hasOption( 'bots' ); + + $namespaces = []; + if ( $this->hasOption( 'ns' ) ) { + $input = explode( ',', $this->getOption( 'ns' ) ); + + foreach ( $input as $namespace ) { + if ( is_numeric( $namespace ) ) { + array_push( $namespaces, $namespace ); + } + } + } + + /** + * Select set of edits to report on + */ + $rows = TranslateUtils::translationChanges( $hours, $bots, $namespaces ); + + /** + * Get counts for edits per language code after filtering out edits by FuzzyBot + */ + $codes = []; + global $wgTranslateFuzzyBotName; + foreach ( $rows as $_ ) { + // Filter out edits by $wgTranslateFuzzyBotName + if ( $_->rc_user_text === $wgTranslateFuzzyBotName ) { + continue; + } + + list( , $code ) = TranslateUtils::figureMessage( $_->rc_title ); + + if ( !isset( $codes[$code] ) ) { + $codes[$code] = 0; + } + + $codes[$code]++; + } + + /** + * Sort counts and report descending up to $top rows. + */ + arsort( $codes ); + $i = 0; + foreach ( $codes as $code => $num ) { + if ( $i++ === $top ) { + break; + } + + $this->output( "$code\t$num\n" ); + } + } +} + +$maintClass = Languageeditstats::class; +require_once RUN_MAINTENANCE_IF_MAIN; diff --git a/www/wiki/extensions/Translate/scripts/magic-export.php b/www/wiki/extensions/Translate/scripts/magic-export.php new file mode 100644 index 00000000..6284553d --- /dev/null +++ b/www/wiki/extensions/Translate/scripts/magic-export.php @@ -0,0 +1,363 @@ +<?php +/** + * Script to export special page aliases and magic words of extensions. + * + * @author Robert Leverington <robert@rhl.me.uk> + * + * @copyright Copyright © 2010 Robert Leverington + * @license GPL-2.0-or-later + * @file + */ + +// Standard boilerplate to define $IP +if ( getenv( 'MW_INSTALL_PATH' ) !== false ) { + $IP = getenv( 'MW_INSTALL_PATH' ); +} else { + $IP = __DIR__ . '/../../../'; +} +require_once "$IP/maintenance/Maintenance.php"; + +/** + * Maintenance class for the fast export of special page aliases and magic words. + */ +class MagicExport extends Maintenance { + protected $type; + protected $target; + + protected $handles = []; + protected $messagesOld = []; + protected $extraInformation = []; + + public function __construct() { + parent::__construct(); + $this->mDescription = 'Export of aliases and magic words for MediaWiki extensions.'; + $this->addOption( + 'target', + 'Target directory for exported files', + true, /*required*/ + true /*has arg*/ + ); + $this->addOption( + 'type', + 'magic or special', + true, /*required*/ + true /*has arg*/ + ); + } + + public function execute() { + $this->target = $this->getOption( 'target' ); + $this->type = $this->getOption( 'type' ); + + switch ( $this->type ) { + case 'special': + case 'magic': + break; + default: + $this->error( 'Invalid type.', 1 ); + } + + $this->openHandles(); + $this->writeHeaders(); + $this->writeFiles(); + $this->writeFooters(); + $this->closeHandles(); + } + + /** + * Iterate through all groups, loading current data from the existing + * extension and opening message files for message output. + * - If the group does not define a special page alias file or magic + * words file, or that file does not exist, it is ignored silently. + * - If the file does contain a data array (e.g. $aliases) then the + * program exits. + */ + protected function openHandles() { + $this->output( "Opening file handles and loading current data...\n" ); + + $groups = MessageGroups::singleton()->getGroups(); + foreach ( $groups as $group ) { + if ( !$group instanceof MediaWikiExtensionMessageGroup ) { + continue; + } + + $conf = $group->getConfiguration(); + + $inFile = $outFile = null; + + if ( $this->type === 'special' && isset( $conf['FILES']['aliasFile'] ) ) { + $inFile = $conf['FILES']['aliasFileSource']; + $outFile = $conf['FILES']['aliasFile']; + } + + if ( $this->type === 'magic' && isset( $conf['FILES']['magicFile'] ) ) { + $inFile = $conf['FILES']['magicFileSource']; + $outFile = $conf['FILES']['magicFile']; + } + + if ( $inFile === null ) { + continue; + } + + $inFile = $group->replaceVariables( $inFile, 'en' ); + $outFile = $this->target . '/' . $outFile; + $varName = null; + + if ( !is_readable( $inFile ) ) { + $this->error( "File '$inFile' not readable." ); + continue; + } + + include $inFile; + switch ( $this->type ) { + case 'special': + if ( isset( $aliases ) ) { + $this->messagesOld[$group->getId()] = $aliases; + unset( $aliases ); + $varName = '$aliases'; + } elseif ( isset( $specialPageAliases ) ) { + $this->messagesOld[$group->getId()] = $specialPageAliases; + unset( $specialPageAliases ); + $varName = '$specialPageAliases'; + } else { + $this->error( "File '$inFile' does not contain an aliases array." ); + continue 2; + } + break; + case 'magic': + if ( !isset( $magicWords ) ) { + $this->error( "File '$inFile' does not contain a magic words array." ); + continue 2; + } + $this->messagesOld[$group->getId()] = $magicWords; + unset( $magicWords ); + $varName = '$magicWords'; + break; + } + + wfMkdirParents( dirname( $outFile ), null, __METHOD__ ); + $this->handles[$group->getId()] = fopen( $outFile, 'w' ); + $headerInformation = $this->readHeader( $inFile, $varName ); + fwrite( $this->handles[$group->getId()], $headerInformation['fileBegin'] ); + $this->extraInformation[$group->getId()] = $headerInformation; + + $this->output( "\t{$group->getId()}\n" ); + } + } + + protected function readHeader( $file, $varName ) { + $data = file_get_contents( $file ); + + // Seek first '*/'. + $end = strpos( $data, '*/' ); + + // But not when it is the english comment + $varPos = strpos( $data, $varName ); + if ( $varPos && $end && $varPos <= $end ) { + $end = false; + } + + if ( $end === false ) { + $fileBegin = "<?php\n"; + } else { + // Grab header. + $fileBegin = substr( $data, 0, $end + 2 ); + } + + // preserve the phpcs codingStandardsIgnoreFile, if exists + $preserveIgnoreTag = strpos( $data, '@codingStandardsIgnoreFile' ) !== false; + + // preserve the long array syntax, if varName is written with it + $preserveLongArraySyntax = preg_match( + '/' . preg_quote( $varName, '/' ) . '\s*=\s*array\s*\(\s*\)\s*;/', + $data + ); + + // avoid difference by the last character + $preserveNewlineAtEnd = substr( $data, -1 ) === "\n"; + + return [ + 'fileBegin' => $fileBegin, + 'preserveIgnoreTag' => $preserveIgnoreTag, + 'preserveLongArraySyntax' => $preserveLongArraySyntax, + 'preserveNewlineAtEnd' => $preserveNewlineAtEnd, + ]; + } + + /** + * Write the opening of the files for each output file handle. + */ + protected function writeHeaders() { + foreach ( $this->handles as $group => $handle ) { + $arraySyntax = $this->extraInformation[$group]['preserveLongArraySyntax'] + ? 'array()' + : '[]'; + switch ( $this->type ) { + case 'special': + $ignoreTag = $this->extraInformation[$group]['preserveIgnoreTag'] + ? "\n// @codingStandardsIgnoreFile" + : ''; + fwrite( $handle, <<<PHP +$ignoreTag + +\$specialPageAliases = $arraySyntax; +PHP + ); + break; + case 'magic': + fwrite( $handle, <<<PHP + +\$magicWords = $arraySyntax; +PHP + ); + break; + } + } + } + + /** + * Itterate through available languages, loading and parsing the data + * message from the MediaWiki namespace and writing the data to its output + * file handle. + */ + protected function writeFiles() { + $langs = TranslateUtils::parseLanguageCodes( '*' ); + unset( $langs[array_search( 'en', $langs )] ); + $langs = array_merge( [ 'en' ], $langs ); + foreach ( $langs as $l ) { + // Load message page. + switch ( $this->type ) { + case 'special': + $title = Title::makeTitleSafe( NS_MEDIAWIKI, 'Sp-translate-data-SpecialPageAliases/' . $l ); + break; + case 'magic': + $title = Title::makeTitleSafe( NS_MEDIAWIKI, 'Sp-translate-data-MagicWords/' . $l ); + break; + default: + exit( 1 ); + } + + // Parse message page. + if ( !$title || !$title->exists() ) { + $this->output( "Skiping $l...\n" ); + + $messagesNew = []; + } else { + $this->output( "Processing $l...\n" ); + + $page = WikiPage::factory( $title ); + $content = $page->getContent(); + $data = $content->getNativeData(); + + // Parse message file. + $segments = explode( "\n", $data ); + array_shift( $segments ); + array_shift( $segments ); + unset( $segments[count( $segments ) - 1] ); + unset( $segments[count( $segments ) - 1] ); + $messagesNew = []; + foreach ( $segments as $segment ) { + $parts = explode( ' = ', $segment ); + $key = array_shift( $parts ); + $translations = explode( ', ', implode( $parts ) ); + $messagesNew[$key] = $translations; + } + } + + // Write data to handles. + $namesEn = LanguageNames::getNames( 'en' ); + $namesNative = Language::fetchLanguageNames(); + + foreach ( $this->handles as $group => $handle ) { + // Find messages to write to this handle. + $messagesOut = []; + if ( !isset( $this->messagesOld[$group] ) ) { + continue; + } + + foreach ( $this->messagesOld[$group]['en'] as $key => $message ) { + if ( array_key_exists( $key, $messagesNew ) ) { + $messagesOut[$key] = $messagesNew[$key]; + } elseif ( isset( $this->messagesOld[$group][$l][$key] ) ) { + $messagesOut[$key] = $this->messagesOld[$group][$l][$key]; + } + } + if ( $this->extraInformation[$group]['preserveLongArraySyntax'] ) { + $arrayStart = 'array('; + $arrayEnd = ')'; + } else { + $arrayStart = '['; + $arrayEnd = ']'; + } + + // If there are messages to write, write them. + if ( $messagesOut !== [] ) { + $out = ''; + switch ( $this->type ) { + case 'special': + $out .= "\n\n/** {$namesEn[$l]} ({$namesNative[$l]}) " . + "*/\n\$specialPageAliases['{$l}'] = {$arrayStart}\n"; + break; + case 'magic': + $out .= "\n\n/** {$namesEn[$l]} ({$namesNative[$l]}) *" . + "/\n\$magicWords['{$l}'] = {$arrayStart}\n"; + break; + } + foreach ( $messagesOut as $key => $translations ) { + if ( !is_array( $translations ) ) { + $this->error( "$l in $group has not an array..." ); + continue; + } + foreach ( $translations as $id => $translation ) { + $translations[$id] = addslashes( $translation ); + if ( $this->type === 'magic' && $translation === 0 ) { + unset( $translations[$id] ); + } + } + $translations = implode( "', '", $translations ); + switch ( $this->type ) { + case 'special': + $out .= "\t'$key' => $arrayStart '$translations' $arrayEnd,\n"; + break; + case 'magic': + if ( $this->messagesOld[$group]['en'][$key][0] === 0 ) { + $out .= "\t'$key' => $arrayStart 0, '$translations' $arrayEnd,\n"; + } else { + $out .= "\t'$key' => $arrayStart '$translations' $arrayEnd,\n"; + } + break; + } + } + $out .= "$arrayEnd;"; + fwrite( $handle, $out ); + } + } + } + } + + /** + * Do whatever needs doing after writing the primary content. + */ + protected function writeFooters() { + $this->output( "Writing file footers...\n" ); + foreach ( $this->handles as $group => $handle ) { + if ( $this->extraInformation[$group]['preserveNewlineAtEnd'] ) { + // php files should end with a newline + fwrite( $handle, "\n" ); + } + } + } + + /** + * Close all output file handles. + */ + protected function closeHandles() { + $this->output( "Closing file handles...\n" ); + foreach ( $this->handles as $handle ) { + fclose( $handle ); + } + } +} + +$maintClass = MagicExport::class; +require_once RUN_MAINTENANCE_IF_MAIN; diff --git a/www/wiki/extensions/Translate/scripts/migrate-schema2.php b/www/wiki/extensions/Translate/scripts/migrate-schema2.php new file mode 100644 index 00000000..8cf7d6ee --- /dev/null +++ b/www/wiki/extensions/Translate/scripts/migrate-schema2.php @@ -0,0 +1,71 @@ +<?php +/** + * Script to convert Translate extension database schema to v2 + * + * @author Niklas Laxström + * @copyright Copyright © 2011, Niklas Laxström + * @license GPL-2.0-or-later + * @file + */ + +// Standard boilerplate to define $IP +if ( getenv( 'MW_INSTALL_PATH' ) !== false ) { + $IP = getenv( 'MW_INSTALL_PATH' ); +} else { + $dir = __DIR__; + $IP = "$dir/../../.."; +} +require_once "$IP/maintenance/Maintenance.php"; + +/** + * Script to convert Translate extension database schema to v2. + * Essentially gets rid of revtag_type table, which was unnecessary + * abstraction. + */ +class TSchema2 extends Maintenance { + + public function __construct() { + parent::__construct(); + $this->mDescription = 'Migrates database schema to version 2.'; + } + + public function execute() { + $dbw = wfGetDB( DB_MASTER ); + if ( !$dbw->tableExists( 'revtag' ) ) { + $this->error( "Table revtag doesn't exist. Translate extension is not installed?", 1 ); + } + + if ( !$dbw->tableExists( 'revtag_type' ) ) { + $this->error( "Table revtag_type doesn't exist. Migration is already done.", 1 ); + } + + if ( $dbw->getType() !== 'mysql' ) { + $this->error( 'This migration script only supports mysql. Please help ' . + "us to write routine for {$dbw->getType()}.", 1 ); + } + + $table = $dbw->tableName( 'revtag' ); + $dbw->query( "ALTER TABLE $table MODIFY rt_type varbinary(60) not null", __METHOD__ ); + + $res = $dbw->select( + 'revtag_type', + [ 'rtt_id', 'rtt_name' ], + [], + __METHOD__ + ); + + foreach ( $res as $row ) { + $dbw->update( + 'revtag', + [ 'rt_type' => $row->rtt_name ], + [ 'rt_type' => (string)$row->rtt_id ], + __METHOD__ + ); + } + + $dbw->dropTable( 'revtag_type', __METHOD__ ); + } +} + +$maintClass = TSchema2::class; +require_once RUN_MAINTENANCE_IF_MAIN; diff --git a/www/wiki/extensions/Translate/scripts/mwcore-export.php b/www/wiki/extensions/Translate/scripts/mwcore-export.php new file mode 100644 index 00000000..8e352339 --- /dev/null +++ b/www/wiki/extensions/Translate/scripts/mwcore-export.php @@ -0,0 +1,133 @@ +<?php +/** + * Script to export special core features of %MediaWiki. + * + * @author Niklas Laxström + * @author Siebrand Mazeland + * @copyright Copyright © 2009-2013, Niklas Laxström, Siebrand Mazeland + * @license GPL-2.0-or-later + * @file + */ + +// Standard boilerplate to define $IP +if ( getenv( 'MW_INSTALL_PATH' ) !== false ) { + $IP = getenv( 'MW_INSTALL_PATH' ); +} else { + $dir = __DIR__; + $IP = "$dir/../../.."; +} +require_once "$IP/maintenance/Maintenance.php"; + +class MwCoreExport extends Maintenance { + public function __construct() { + parent::__construct(); + $this->mDescription = 'Core special features exporter.'; + $this->addOption( + 'target', + 'Target directory for exported files', + true, /*required*/ + true /*has arg*/ + ); + $this->addOption( + 'lang', + '(optional) Comma separated list of language codes. Default: *', + false, /*required*/ + true /*has arg*/ + ); + $this->addOption( + 'type', + 'Export type: "namespace", "special" or "magic"', + true, /*required*/ + true /*has arg*/ + ); + } + + public function execute() { + if ( !is_writable( $this->getOption( 'target' ) ) ) { + $this->error( 'Target directory is not writable.', 1 ); + } + + $langs = TranslateUtils::parseLanguageCodes( $this->getOption( 'lang', '*' ) ); + $group = MessageGroups::getGroup( 'core' ); + $type = $this->getOption( 'type' ); + + foreach ( $langs as $l ) { + $o = null; + + switch ( $type ) { + case 'special': + $o = new SpecialPageAliasesCM( $l ); + break; + case 'magic': + $o = new MagicWordsCM( $l ); + break; + case 'namespace': + $o = new NamespaceCM( $l ); + break; + default: + $this->error( 'Invalid type: Must be one of special, magic, namespace.', 1 ); + } + + $export = $o->export( 'core' ); + if ( $export === '' ) { + continue; + } + + $matches = []; + preg_match( '~^(\$[a-zA-Z]+)\s*=~m', $export, $matches ); + + if ( !isset( $matches[1] ) ) { + continue; + } + + # remove useles comment + $export = preg_replace( "~^# .*$\n~m", '', $export ); + + if ( strpos( $export, '#!!' ) !== false ) { + $this->error( "There are warnings with $l." ); + } + + $variable = preg_quote( $matches[1], '~' ); + + /** @var FileBasedMessageGroup $group */ + $file = $group->getSourceFilePath( $l ); + // bandage + $real = Language::getFileName( '/messages/Messages', $l ); + $file = preg_replace( '~/i18n/(.+)\.json$~', $real, $file ); + + if ( !file_exists( $file ) ) { + $this->error( "File $file does not exist!" ); + continue; + } + + $data = file_get_contents( $file ); + + $export = trim( $export ) . "\n"; + $escExport = addcslashes( $export, '\\$' ); # Darn backreferences + + $outFile = $this->getOption( 'target' ) . '/' . $group->getTargetFilename( $l ); + $outFile = preg_replace( '~/i18n/(.+)\.json$~', $real, $outFile ); + + $count = 0; + $data = preg_replace( "~$variable\s*=.*?\n\);\n~s", $escExport, $data, 1, $count ); + if ( $count ) { + file_put_contents( $outFile, $data ); + } else { + $this->error( "Adding new entry to $outFile, please double check location." ); + $pos = strpos( $data, '*/' ); + if ( $pos === false ) { + $this->error( '. FAILED! Totally new file? No header?' ); + } else { + $pos += 3; + } + + $data = substr( $data, 0, $pos ) . "\n" . $export . substr( $data, $pos ); + + file_put_contents( $outFile, $data ); + } + } + } +} + +$maintClass = MwCoreExport::class; +require_once RUN_MAINTENANCE_IF_MAIN; diff --git a/www/wiki/extensions/Translate/scripts/plural-comparison.php b/www/wiki/extensions/Translate/scripts/plural-comparison.php new file mode 100644 index 00000000..fe264565 --- /dev/null +++ b/www/wiki/extensions/Translate/scripts/plural-comparison.php @@ -0,0 +1,159 @@ +<?php +/** + * Script for comparing different plural implementations. + * + * @author Niklas Laxström + * + * @copyright Copyright © 2010, Niklas Laxström + * @license GPL-2.0-or-later + * @file + */ + +// Standard boilerplate to define $IP +if ( getenv( 'MW_INSTALL_PATH' ) !== false ) { + $IP = getenv( 'MW_INSTALL_PATH' ); +} else { + $IP = __DIR__ . '/../../..'; +} +require_once "$IP/maintenance/Maintenance.php"; + +/// Script for comparing different plural implementations. +class PluralCompare extends Maintenance { + public function __construct() { + parent::__construct(); + $this->mDescription = 'Script for comparing different plural implementations.'; + } + + public function execute() { + $mwLanguages = $this->loadMediaWiki(); + $gtLanguages = $this->loadGettext(); + $clLanguages = $this->loadCLDR(); + + $all = Language::fetchLanguageNames( null, 'all' ); + $allkeys = array_keys( $all + $mwLanguages + $gtLanguages + $clLanguages ); + sort( $allkeys ); + + $this->output( sprintf( "%12s %3s %3s %4s\n", 'Code', 'MW', 'Get', 'CLDR' ) ); + foreach ( $allkeys as $code ) { + $mw = isset( $mwLanguages[$code] ) ? '+' : ''; + $gt = isset( $gtLanguages[$code] ) ? '+' : ''; + $cl = isset( $clLanguages[$code] ) ? '+' : ''; + + if ( $mw === '' ) { + $fallbacks = Language::getFallbacksFor( $code ); + foreach ( $fallbacks as $fcode ) { + if ( $fcode !== 'en' && isset( $mwLanguages[$fcode] ) ) { + $mw = '.'; + } + } + } + + $error = ''; + if ( substr_count( sprintf( '%s%s%s', $mw, $gt, $cl ), '+' ) > 1 ) { + $error = $this->tryMatch( $code, $mw, $gtLanguages, $clLanguages ); + } + + $this->output( sprintf( "%12s %-3s %-3s %-4s %s\n", $code, $mw, $gt, $cl, $error ) ); + } + } + + protected function tryMatch( $code, $mws, $gtLanguages, $clLanguages ) { + if ( $mws !== '' ) { + $mwExp = true; + $lang = Language::factory( $code ); + } else { + $mwExp = false; + } + + if ( isset( $gtLanguages[$code] ) ) { + $gtExp = 'return (int) ' . str_replace( 'n', '$i', $gtLanguages[$code] ) . ';'; + } else { + $gtExp = false; + } + + if ( isset( $clLanguages[$code] ) ) { + $cldrExp = $clLanguages[$code]; + } else { + $cldrExp = false; + } + + for ( $i = 0; $i <= 250; $i++ ) { + $mw = $gt = $cl = '?'; + + if ( $mwExp ) { + $exp = $lang->getCompiledPluralRules(); + $mw = CLDRPluralRuleEvaluator::evaluateCompiled( $i, $exp ); + } + + if ( $gtExp ) { + $gt = eval( $gtExp ); + } + + if ( $cldrExp ) { + $cl = CLDRPluralRuleEvaluator::evaluate( $i, $cldrExp ); + } + + if ( self::comp( $mw, $gt ) && self::comp( $gt, $cl ) && self::comp( $cl, $mw ) ) { + continue; + } + + return "$i: $mw $gt $cl"; + } + + return ''; + } + + public static function comp( $a, $b ) { + return $a === '?' || $b === '?' || $a === $b; + } + + protected function loadPluralFile( $fileName ) { + $doc = new DOMDocument; + $doc->load( $fileName ); + $rulesets = $doc->getElementsByTagName( 'pluralRules' ); + $plurals = []; + foreach ( $rulesets as $ruleset ) { + $codes = $ruleset->getAttribute( 'locales' ); + $rules = []; + $ruleElements = $ruleset->getElementsByTagName( 'pluralRule' ); + foreach ( $ruleElements as $elt ) { + $rules[] = $elt->nodeValue; + } + foreach ( explode( ' ', $codes ) as $code ) { + $plurals[$code] = $rules; + } + } + + return $plurals; + } + + public function loadCLDR() { + global $IP; + + return $this->loadPluralFile( "$IP/languages/data/plurals.xml" ); + } + + public function loadMediaWiki() { + global $IP; + + $rules = $this->loadPluralFile( "$IP/languages/data/plurals.xml" ); + $rulesMW = $this->loadPluralFile( "$IP/languages/data/plurals-mediawiki.xml" ); + + return array_merge( $rules, $rulesMW ); + } + + public function loadGettext() { + $gtData = file_get_contents( __DIR__ . '/../data/plural-gettext.txt' ); + $gtLanguages = []; + foreach ( preg_split( '/\n|\r/', $gtData, -1, PREG_SPLIT_NO_EMPTY ) as $line ) { + list( $code, $rule ) = explode( "\t", $line ); + $rule = preg_replace( '/^.*?plural=/', '', $rule ); + $gtLanguages[$code] = $rule; + } + + return $gtLanguages; + } +} + +$maintClass = PluralCompare::class; +require_once RUN_MAINTENANCE_IF_MAIN; diff --git a/www/wiki/extensions/Translate/scripts/poimport.php b/www/wiki/extensions/Translate/scripts/poimport.php new file mode 100644 index 00000000..a07481d5 --- /dev/null +++ b/www/wiki/extensions/Translate/scripts/poimport.php @@ -0,0 +1,322 @@ +<?php +/** + * Imports gettext files exported from Special:Translate back. + * + * @author Niklas Laxström + * @author Siebrand Mazeland + * @copyright Copyright © 2007-2013 Niklas Laxström, Siebrand Mazeland + * @license GPL-2.0-or-later + * @file + */ + +// Standard boilerplate to define $IP +if ( getenv( 'MW_INSTALL_PATH' ) !== false ) { + $IP = getenv( 'MW_INSTALL_PATH' ); +} else { + $dir = __DIR__; + $IP = "$dir/../../.."; +} +require_once "$IP/maintenance/Maintenance.php"; + +class Poimport extends Maintenance { + public function __construct() { + parent::__construct(); + $this->mDescription = 'Po file importer (does not make changes unless specified).'; + $this->addOption( + 'file', + 'Gettext file to import (Translate specific formatting)', + true, /*required*/ + true /*has arg*/ + ); + $this->addOption( + 'user', + 'User who makes edits to wiki', + true, /*required*/ + true /*has arg*/ + ); + $this->addOption( + 'really', + '(optional) Actually make changes', + false, /*required*/ + false /*has arg*/ + ); + } + + public function execute() { + // Parse the po file. + $p = new PoImporter( $this->getOption( 'file' ) ); + $p->setProgressCallback( [ $this, 'myOutput' ] ); + list( $changes, $group ) = $p->parse(); + + if ( !count( $changes ) ) { + $this->output( "No changes to import\n" ); + exit( 0 ); + } + + // Import changes to wiki. + $w = new WikiWriter( + $changes, + $group, + $this->getOption( 'user' ), + !$this->hasOption( 'really' ) + ); + + $w->setProgressCallback( [ $this, 'myOutput' ] ); + $w->execute(); + } + + /** + * Public alternative for protected Maintenance::output() as we need to get + * messages from the ChangeSyncer class to the commandline. + * @param string $text The text to show to the user + * @param string|null $channel Unique identifier for the channel. + * @param bool $error Whether this is an error message + */ + public function myOutput( $text, $channel = null, $error = false ) { + if ( $error ) { + $this->error( $text, $channel ); + } else { + $this->output( $text, $channel ); + } + } +} + +/** + * Parses a po file that has been exported from Mediawiki. Other files are not + * supported. + */ +class PoImporter { + /** @var callable Function to report progress updates */ + protected $progressCallback; + + /** + * Path to file to parse. + * @var bool|string + */ + private $file = false; + + /** + * @param string $file File to import + */ + public function __construct( $file ) { + $this->file = $file; + } + + public function setProgressCallback( $callback ) { + $this->progressCallback = $callback; + } + + /// @see Maintenance::output for param docs + protected function reportProgress( $text, $channel = null, $severity = 'status' ) { + if ( is_callable( $this->progressCallback ) ) { + $useErrorOutput = $severity === 'error'; + call_user_func( $this->progressCallback, $text, $channel, $useErrorOutput ); + } + } + + /** + * Loads translations for comparison. + * + * @param string $id Id of MessageGroup. + * @param string $code Language code. + * @return MessageCollection + */ + protected function initMessages( $id, $code ) { + $group = MessageGroups::getGroup( $id ); + + $messages = $group->initCollection( $code ); + $messages->loadTranslations(); + + return $messages; + } + + /** + * Parses relevant stuff from the po file. + * @return array|bool + */ + public function parse() { + $data = file_get_contents( $this->file ); + $data = str_replace( "\r\n", "\n", $data ); + + $matches = []; + if ( preg_match( '/X-Language-Code:\s+(.*)\\\n/', $data, $matches ) ) { + $code = $matches[1]; + $this->reportProgress( "Detected language as $code", 'code' ); + } else { + $this->reportProgress( 'Unable to determine language code', 'code', 'error' ); + + return false; + } + + if ( preg_match( '/X-Message-Group:\s+(.*)\\\n/', $data, $matches ) ) { + $groupId = $matches[1]; + $this->reportProgress( "Detected message group as $groupId", 'group' ); + } else { + $this->reportProgress( 'Unable to determine message group', 'group', 'error' ); + + return false; + } + + $contents = $this->initMessages( $groupId, $code ); + + echo "----\n"; + + $poformat = '".*"\n?(^".*"$\n?)*'; + $quotePattern = '/(^"|"$\n?)/m'; + + $sections = preg_split( '/\n{2,}/', $data ); + $changes = []; + foreach ( $sections as $section ) { + $matches = []; + if ( preg_match( "/^msgctxt\s($poformat)/mx", $section, $matches ) ) { + // Remove quoting + $key = preg_replace( $quotePattern, '', $matches[1] ); + + // Ignore unknown keys + if ( !isset( $contents[$key] ) ) { + continue; + } + } else { + continue; + } + $matches = []; + if ( preg_match( "/^msgstr\s($poformat)/mx", $section, $matches ) ) { + // Remove quoting + $translation = preg_replace( $quotePattern, '', $matches[1] ); + // Restore new lines and remove quoting + $translation = stripcslashes( $translation ); + } else { + continue; + } + + // Fuzzy messages + if ( preg_match( '/^#, fuzzy$/m', $section ) ) { + $translation = TRANSLATE_FUZZY . $translation; + } + + $oldtranslation = (string)$contents[$key]->translation(); + + if ( $translation !== $oldtranslation ) { + if ( $translation === '' ) { + $this->reportProgress( "Skipping empty translation in the po file for $key!\n" ); + } else { + if ( $oldtranslation === '' ) { + $this->reportProgress( "New translation for $key\n" ); + } else { + $this->reportProgress( "Translation of $key differs:\n$translation\n" ); + } + $changes["$key/$code"] = $translation; + } + } + } + + return [ $changes, $groupId ]; + } +} + +/** + * Import changes to wiki as given user + */ +class WikiWriter { + /** @var callable Function to report progress updates */ + protected $progressCallback; + + protected $user; + + private $changes = []; + private $dryrun = true; + private $group = null; + + /** + * @param array $changes Array of key/langcode => translation. + * @param string $groupId Group ID. + * @param string $user User who makes the edits in wiki. + * @param bool $dryrun Do not do anything that affects the database. + */ + public function __construct( $changes, $groupId, $user, $dryrun = true ) { + $this->changes = $changes; + $this->group = MessageGroups::getGroup( $groupId ); + $this->user = User::newFromName( $user ); + $this->dryrun = $dryrun; + } + + public function setProgressCallback( $callback ) { + $this->progressCallback = $callback; + } + + /// @see Maintenance::output for param docs + protected function reportProgress( $text, $channel, $severity = 'status' ) { + if ( is_callable( $this->progressCallback ) ) { + $useErrorOutput = $severity === 'error'; + call_user_func( $this->progressCallback, $text, $channel, $useErrorOutput ); + } + } + + /** + * Updates pages on by one. + */ + public function execute() { + if ( !$this->group ) { + $this->reportProgress( 'Given group does not exist.', 'groupId', 'error' ); + + return; + } + + if ( !$this->user->idForName() ) { + $this->reportProgress( 'Given user does not exist.', 'user', 'error' ); + + return; + } + + $count = count( $this->changes ); + $this->reportProgress( "Going to update $count pages.", 'pagecount' ); + + $ns = $this->group->getNamespace(); + + foreach ( $this->changes as $title => $text ) { + $this->updateMessage( $ns, $title, $text ); + } + } + + /** + * Actually adds the new translation. + * @param int $namespace + * @param string $page + * @param string $text + */ + private function updateMessage( $namespace, $page, $text ) { + $title = Title::makeTitleSafe( $namespace, $page ); + + if ( !$title instanceof Title ) { + $this->reportProgress( 'INVALID TITLE!', $page, 'error' ); + + return; + } + $this->reportProgress( "Updating {$title->getPrefixedText()}... ", $title ); + + if ( $this->dryrun ) { + $this->reportProgress( 'DRY RUN!', $title ); + + return; + } + + $page = WikiPage::factory( $title ); + $content = ContentHandler::makeContent( $text, $title ); + $status = $page->doEditContent( + $content, + 'Updating translation from gettext import', + 0, + false, + $this->user + ); + + if ( $status === true || ( is_object( $status ) && $status->isOK() ) ) { + $this->reportProgress( 'OK!', $title ); + } else { + $this->reportProgress( 'Failed!', $title ); + } + } +} + +$maintClass = Poimport::class; +require_once RUN_MAINTENANCE_IF_MAIN; diff --git a/www/wiki/extensions/Translate/scripts/populateFuzzy.php b/www/wiki/extensions/Translate/scripts/populateFuzzy.php new file mode 100644 index 00000000..32ff07ce --- /dev/null +++ b/www/wiki/extensions/Translate/scripts/populateFuzzy.php @@ -0,0 +1,87 @@ +<?php +/** + * A script to populate fuzzy tags to revtag table. + * + * @author Niklas Laxström + * @copyright Copyright © 2009-2013, Niklas Laxström + * @license GPL-2.0-or-later + * @file + */ + +// Standard boilerplate to define $IP +if ( getenv( 'MW_INSTALL_PATH' ) !== false ) { + $IP = getenv( 'MW_INSTALL_PATH' ); +} else { + $dir = __DIR__; + $IP = "$dir/../../.."; +} +require_once "$IP/maintenance/Maintenance.php"; + +/// A script to populate fuzzy tags to revtag table. +class PopulateFuzzy extends Maintenance { + public function __construct() { + parent::__construct(); + $this->mDescription = 'A script to populate fuzzy tags to revtag table.'; + $this->addOption( + 'namespace', + '(optional) Namepace name or id', + /*required*/false, + /*has arg*/true + ); + } + + public function execute() { + global $wgTranslateMessageNamespaces; + + $namespace = $this->getOption( 'namespace', $wgTranslateMessageNamespaces ); + if ( is_string( $namespace ) && + !MWNamespace::exists( $namespace ) + ) { + $namespace = MWNamespace::getCanonicalIndex( $namespace ); + + if ( $namespace === null ) { + $this->error( 'Bad namespace', true ); + } + } + + $dbw = wfGetDB( DB_MASTER ); + $tables = [ 'page', 'text', 'revision' ]; + $fields = [ 'page_id', 'page_title', 'page_namespace', 'rev_id', 'old_text', 'old_flags' ]; + $conds = [ + 'page_latest = rev_id', + 'old_id = rev_text_id', + 'page_namespace' => $namespace, + ]; + + $limit = 100; + $offset = 0; + while ( true ) { + $inserts = []; + $this->output( '.', 0 ); + $options = [ 'LIMIT' => $limit, 'OFFSET' => $offset ]; + $res = $dbw->select( $tables, $fields, $conds, __METHOD__, $options ); + + if ( !$res->numRows() ) { + break; + } + + foreach ( $res as $r ) { + $text = Revision::getRevisionText( $r ); + if ( strpos( $text, TRANSLATE_FUZZY ) !== false ) { + $inserts[] = [ + 'rt_page' => $r->page_id, + 'rt_revision' => $r->rev_id, + 'rt_type' => RevTag::getType( 'fuzzy' ), + ]; + } + } + + $offset += $limit; + + $dbw->replace( 'revtag', 'rt_type_page_revision', $inserts, __METHOD__ ); + } + } +} + +$maintClass = PopulateFuzzy::class; +require_once RUN_MAINTENANCE_IF_MAIN; diff --git a/www/wiki/extensions/Translate/scripts/processMessageChanges.php b/www/wiki/extensions/Translate/scripts/processMessageChanges.php new file mode 100644 index 00000000..cca7e8cd --- /dev/null +++ b/www/wiki/extensions/Translate/scripts/processMessageChanges.php @@ -0,0 +1,164 @@ +<?php +/** + * Script for processing message changes in file based message groups. + * + * @author Niklas Laxström + * + * @copyright Copyright © 2012-2013, Niklas Laxström + * @license GPL-2.0-or-later + * @file + */ + +// Standard boilerplate to define $IP +if ( getenv( 'MW_INSTALL_PATH' ) !== false ) { + $IP = getenv( 'MW_INSTALL_PATH' ); +} else { + $dir = __DIR__; + $IP = "$dir/../../.."; +} +require_once "$IP/maintenance/Maintenance.php"; + +/** + * Script for processing message changes in file based message groups. + * + * We used to process changes during web request, but that was too slow. With + * this command line script we can do all the work needed even if it takes + * some time. + * + * @since 2012-04-23 + */ +class ProcessMessageChanges extends Maintenance { + protected $changes = []; + + /** + * @var int + */ + protected $counter; + + public function __construct() { + parent::__construct(); + $this->mDescription = 'Script for processing message changes in file based message groups'; + $this->addOption( + 'group', + '(optional) Comma separated list of group IDs to process (can use * as wildcard). ' . + 'Default: "*"', + false, /*required*/ + true /*has arg*/ + ); + $this->addOption( + 'skipgroup', + '(optional) Comma separated list of group IDs to not process (can use * ' . + 'as wildcard). Overrides --group parameter.', + false, /*required*/ + true /*has arg*/ + ); + $this->addOption( + 'name', + '(optional) Unique name to avoid conflicts with multiple invocations of this script.', + false, /*required*/ + true /*has arg*/ + ); + $this->addOption( + 'safe-import', + '(optional) Import "safe" changes: message additions when no other kind of changes.', + false, /*required*/ + false /*has arg*/ + ); + } + + public function execute() { + $groups = $this->getGroups(); + $changes = []; + $comparator = new ExternalMessageSourceStateComparator(); + + $scripted = $this->hasOption( 'safe-import' ); + + /** @var FileBasedMessageGroup $group */ + foreach ( $groups as $id => $group ) { + if ( !$scripted ) { + $this->output( "Processing $id\n" ); + } + $changes[$id] = $comparator->processGroup( $group, $comparator::ALL_LANGUAGES ); + } + + // Remove all groups without changes + $changes = array_filter( $changes ); + + if ( $changes === [] ) { + if ( !$scripted ) { + $this->output( "No changes found\n" ); + } + + return; + } + + if ( $this->hasOption( 'safe-import' ) ) { + $importer = new ExternalMessageSourceStateImporter(); + $info = $importer->importSafe( $changes ); + $this->printChangeInfo( $info ); + + return; + } + + $name = $this->getOption( 'name', MessageChangeStorage::DEFAULT_NAME ); + if ( !MessageChangeStorage::isValidCdbName( $name ) ) { + $this->error( 'Invalid name', 1 ); + } + + $file = MessageChangeStorage::getCdbPath( $name ); + + MessageChangeStorage::writeChanges( $changes, $file ); + $url = SpecialPage::getTitleFor( 'ManageMessageGroups', $name )->getFullURL(); + $this->output( "Process changes at $url\n" ); + } + + /** + * Gets list of message groups filtered by user input. + * @return MessageGroup[] + */ + protected function getGroups() { + $groups = MessageGroups::getGroupsByType( 'FileBasedMessageGroup' ); + + // Include all if option not given + $include = $this->getOption( 'group', '*' ); + $include = explode( ',', $include ); + $include = array_map( 'trim', $include ); + $include = MessageGroups::expandWildcards( $include ); + + // Exclude nothing if option not given + $exclude = $this->getOption( 'skipgroup', '' ); + $exclude = explode( ',', $exclude ); + $exclude = array_map( 'trim', $exclude ); + $exclude = MessageGroups::expandWildcards( $exclude ); + + // Flip to allow isset + $include = array_flip( $include ); + $exclude = array_flip( $exclude ); + + $groups = array_filter( $groups, + function ( MessageGroup $group ) use ( $include, $exclude ) { + $id = $group->getId(); + + return isset( $include[$id] ) && !isset( $exclude[$id] ); + } + ); + + return $groups; + } + + protected function printChangeInfo( array $info ) { + foreach ( $info['processed'] as $group => $count ) { + $this->output( "Imported $count new messages or translations for $group.\n" ); + } + + if ( $info['skipped'] !== [] ) { + $skipped = implode( ', ', array_keys( $info['skipped'] ) ); + $this->output( "There are changes to check for groups $skipped.\n" ); + $url = SpecialPage::getTitleFor( 'ManageMessageGroups', $info['name'] )->getFullURL(); + $this->output( "You can process them at $url\n" ); + } + } +} + +$maintClass = ProcessMessageChanges::class; +require_once RUN_MAINTENANCE_IF_MAIN; diff --git a/www/wiki/extensions/Translate/scripts/refresh-translatable-pages.php b/www/wiki/extensions/Translate/scripts/refresh-translatable-pages.php new file mode 100644 index 00000000..c5e0b106 --- /dev/null +++ b/www/wiki/extensions/Translate/scripts/refresh-translatable-pages.php @@ -0,0 +1,67 @@ +<?php +/** + * Script to ensure all translation pages are up to date. + * + * @author Niklas Laxström + * @license GPL-2.0-or-later + * @file + */ + +// Standard boilerplate to define $IP +if ( getenv( 'MW_INSTALL_PATH' ) !== false ) { + $IP = getenv( 'MW_INSTALL_PATH' ); +} else { + $dir = __DIR__; + $IP = "$dir/../../.."; +} +require_once "$IP/maintenance/Maintenance.php"; + +/** + * Script to ensure all translation pages are up to date + * @since 2013-04 + */ +class RefreshTranslatablePages extends Maintenance { + public function __construct() { + parent::__construct(); + $this->mDescription = 'Ensure all translation pages are up to date.'; + $this->setBatchSize( 300 ); + $this->addOption( 'jobqueue', 'Use JobQueue (asynchronous)' ); + } + + public function execute() { + $groups = MessageGroups::singleton()->getGroups(); + $counter = 0; + $useJobQueue = $this->hasOption( 'jobqueue' ); + + /** @var MessageGroup $group */ + foreach ( $groups as $group ) { + if ( !$group instanceof WikiPageMessageGroup ) { + continue; + } + + $counter++; + if ( ( $counter % $this->mBatchSize ) === 0 ) { + wfWaitForSlaves(); + } + + $page = TranslatablePage::newFromTitle( $group->getTitle() ); + $jobs = TranslationsUpdateJob::getRenderJobs( $page ); + if ( $useJobQueue ) { + JobQueueGroup::singleton()->push( $jobs ); + } else { + foreach ( $jobs as $job ) { + $job->run(); + } + } + } + + if ( $useJobQueue ) { + $this->output( "Queued refresh for $counter translatable pages.\n" ); + } else { + $this->output( "Refreshed $counter translatable pages.\n" ); + } + } +} + +$maintClass = RefreshTranslatablePages::class; +require_once RUN_MAINTENANCE_IF_MAIN; diff --git a/www/wiki/extensions/Translate/scripts/sync-group.php b/www/wiki/extensions/Translate/scripts/sync-group.php new file mode 100644 index 00000000..af2febad --- /dev/null +++ b/www/wiki/extensions/Translate/scripts/sync-group.php @@ -0,0 +1,485 @@ +<?php +/** + * Command line script to import/update source messages and translations into + * the wiki database. + * + * @author Niklas Laxström + * @author Siebrand Mazeland + * @copyright Copyright © 2007-2013, Niklas Laxström + * @copyright Copyright © 2009-2013, Siebrand Mazeland + * @license GPL-2.0-or-later + * @file + */ + +// Standard boilerplate to define $IP +if ( getenv( 'MW_INSTALL_PATH' ) !== false ) { + $IP = getenv( 'MW_INSTALL_PATH' ); +} else { + $dir = __DIR__; + $IP = "$dir/../../.."; +} +require_once "$IP/maintenance/Maintenance.php"; + +# Override the memory limit for wfShellExec, 100 MB seems to be too little for svn +$wgMaxShellMemory = 1024 * 200; + +class SyncGroup extends Maintenance { + public function __construct() { + parent::__construct(); + $this->mDescription = 'Import or update source messages and translations into ' . + 'the wiki database.'; + $this->addOption( + 'git', + '(optional) Use git to retrieve last modified date of i18n files. Will use subversion ' . + 'by default and fallback on filesystem timestamp', + false, /*required*/ + false /*has arg*/ + ); + $this->addOption( + 'group', + 'Comma separated list of group IDs (can use * as wildcard).', + true, /*required*/ + true /*has arg*/ + ); + $this->addOption( + 'lang', + '(optional) Comma separated list of language codes or *', + false, /*required*/ + true /*has arg*/ + ); + $this->addOption( + 'norc', + '(optional) Do not add entries to recent changes table', + false, /*required*/ + false /*has arg*/ + ); + $this->addOption( + 'noask', + '(optional) Skip all conflicts', + false, /*required*/ + false /*has arg*/ + ); + $this->addOption( + 'start', + '(optional) Start of the last export (changes in wiki after will conflict)', + false, /*required*/ + true /*has arg*/ + ); + $this->addOption( + 'end', + '(optional) End of the last export (changes in source after will conflict)', + false, /*required*/ + true /*has arg*/ + ); + $this->addOption( + 'nocolor', + '(optional) Without colors', + false, /*required*/ + false /*has arg*/ + ); + } + + public function execute() { + $groupIds = explode( ',', trim( $this->getOption( 'group' ) ) ); + $groupIds = MessageGroups::expandWildcards( $groupIds ); + $groups = MessageGroups::getGroupsById( $groupIds ); + + if ( !count( $groups ) ) { + $this->error( 'ESG2: No valid message groups identified.', 1 ); + } + + $start = $this->getOption( 'start' ) ? strtotime( $this->getOption( 'start' ) ) : false; + $end = $this->getOption( 'end' ) ? strtotime( $this->getOption( 'end' ) ) : false; + + $this->output( 'Conflict times: ' . wfTimestamp( TS_ISO_8601, $start ) . ' - ' . + wfTimestamp( TS_ISO_8601, $end ) . "\n" ); + + $codes = array_filter( array_map( 'trim', explode( ',', $this->getOption( 'lang' ) ) ) ); + + $supportedCodes = array_keys( TranslateUtils::getLanguageNames( 'en' ) ); + ksort( $supportedCodes ); + + if ( $codes[0] === '*' ) { + $codes = $supportedCodes; + } + + /** @var FileBasedMessageGroup $group */ + foreach ( $groups as $groupId => &$group ) { + if ( $group->isMeta() ) { + $this->output( "Skipping meta message group $groupId.\n" ); + continue; + } + + $this->output( "{$group->getLabel()} ", $group ); + + foreach ( $codes as $code ) { + // No sync possible for unsupported language codes. + if ( !in_array( $code, $supportedCodes ) ) { + $this->output( 'Unsupported code ' . $code . ": skipping.\n" ); + continue; + } + + $file = $group->getSourceFilePath( $code ); + + if ( !$file ) { + continue; + } + + if ( !file_exists( $file ) ) { + continue; + } + + $cs = new ChangeSyncer( $group, $this ); + $cs->setProgressCallback( [ $this, 'myOutput' ] ); + $cs->interactive = !$this->hasOption( 'noask' ); + $cs->nocolor = $this->hasOption( 'nocolor' ); + $cs->norc = $this->hasOption( 'norc' ); + + # @todo FIXME: Make this auto detect. + # Guess last modified date of the file from either git, svn or filesystem + if ( $this->hasOption( 'git' ) ) { + $ts = $cs->getTimestampsFromGit( $file ); + } else { + $ts = $cs->getTimestampsFromSvn( $file ); + } + if ( !$ts ) { + $ts = $cs->getTimestampsFromFs( $file ); + } + + $this->output( "Modify time for $code: " . wfTimestamp( TS_ISO_8601, $ts ) . "\n" ); + + $cs->checkConflicts( $code, $start, $end, $ts ); + } + + unset( $group ); + } + // Print timestamp if the user wants to store it + $this->output( wfTimestamp( TS_RFC2822 ) . "\n" ); + } + + /** + * Public alternative for protected Maintenance::output() as we need to get + * messages from the ChangeSyncer class to the commandline. + * @param string $text The text to show to the user + * @param string|null $channel Unique identifier for the channel. + * @param bool $error Whether this is an error message + */ + public function myOutput( $text, $channel = null, $error = false ) { + if ( $error ) { + $this->error( $text, $channel ); + } else { + $this->output( $text, $channel ); + } + } +} + +/** + * Simple external changes syncer and conflict resolution. + */ +class ChangeSyncer { + /** @var callable Function to report progress updates */ + protected $progressCallback; + + /** @var bool Don't list changes in recent changes table. */ + public $norc = false; + + /** @var bool Whether the script can ask questions. */ + public $interactive = true; + + /** @var bool Disable color output. */ + public $nocolor = false; + + /** @var MessageGroup */ + protected $group; + + /** + * @param MessageGroup $group Message group to synchronise. + * can be relayed back. + */ + public function __construct( MessageGroup $group ) { + $this->group = $group; + } + + public function setProgressCallback( $callback ) { + $this->progressCallback = $callback; + } + + /// @see Maintenance::output for param docs + protected function reportProgress( $text, $channel, $severity = 'status' ) { + if ( is_callable( $this->progressCallback ) ) { + $useErrorOutput = $severity === 'error'; + call_user_func( $this->progressCallback, $text, $channel, $useErrorOutput ); + } + } + + // svn component from pecl doesn't seem to have this in quick sight + /** + * Fetch last changed timestamp for a versioned file for conflict resolution. + * @param string $file Filename with full path. + * @return string Timestamp or false. + */ + public function getTimestampsFromSvn( $file ) { + $file = escapeshellarg( $file ); + $retval = 0; + $output = wfShellExec( "svn info $file 2>/dev/null", $retval ); + + if ( $retval ) { + return false; + } + + $matches = []; + // PHP doesn't allow foo || return false; + // Thank + // you + // PHP (for being an ass)! + $regex = '^Last Changed Date: (.*) \('; + $ok = preg_match( "~$regex~m", $output, $matches ); + if ( $ok ) { + return strtotime( $matches[1] ); + } + + return false; + } + + /** + * Fetch last changed timestamp for a versioned file for conflict resolution. + * @param string $file Filename with full path. + * @return string|bool Timestamp or false. + */ + public function getTimestampsFromGit( $file ) { + $file = escapeshellarg( $file ); + $retval = 0; + $output = wfShellExec( "git log -n 1 --format=%cd $file", $retval ); + + if ( $retval ) { + return false; + } + + return strtotime( $output ); + } + + /** + * Fetch last changed timestamp for any file for conflict resolution. + * @param string $file Filename with full path. + * @return string Timestamp or false. + */ + public function getTimestampsFromFs( $file ) { + if ( !file_exists( $file ) ) { + return false; + } + + $stat = stat( $file ); + + return $stat['mtime']; + } + + /** + * Do some conflict resolution for translations. + * @param string $code Language code. + * @param bool|int $startTs Time of the last export (changes in wiki after + * this will conflict) + * @param bool|int $endTs Time of the last export (changes in source before + * this won't conflict) + * @param bool|int $changeTs When change happened in the source. + */ + public function checkConflicts( $code, $startTs = false, $endTs = false, $changeTs = false ) { + $messages = $this->group->load( $code ); + + if ( !count( $messages ) ) { + return; + } + + $collection = $this->group->initCollection( $code ); + $collection->filter( 'ignored' ); + $collection->loadTranslations(); + + foreach ( $messages as $key => $translation ) { + if ( !isset( $collection[$key] ) ) { + continue; + } + + // @todo Temporary exception. Should be fixed elsewhere more generically. + if ( $translation === '{{PLURAL:GETTEXT|}}' ) { + return; + } + + $title = Title::makeTitleSafe( $this->group->getNamespace(), "$key/$code" ); + + $page = $title->getPrefixedText(); + + if ( $collection[$key]->translation() === null ) { + $this->reportProgress( "Importing $page as a new translation\n", 'importing' ); + $this->import( $title, $translation, 'Importing a new translation' ); + continue; + } + + $current = str_replace( TRANSLATE_FUZZY, '', $collection[$key]->translation() ); + $translation = str_replace( TRANSLATE_FUZZY, '', $translation ); + if ( $translation === $current ) { + continue; + } + + $this->reportProgress( 'Conflict in ' . $this->color( 'bold', $page ) . '!', $page ); + + $iso = 'xnY-xnm-xnd"T"xnH:xni:xns'; + $lang = RequestContext::getMain()->getLanguage(); + + // Finally all is ok, now lets start comparing timestamps + // Make sure we are comparing timestamps in same format + $wikiTs = $this->getLastGoodChange( $title, $startTs ); + if ( $wikiTs ) { + $wikiTs = wfTimestamp( TS_UNIX, $wikiTs ); + $wikiDate = $lang->sprintfDate( $iso, wfTimestamp( TS_MW, $wikiTs ) ); + } else { + $wikiDate = 'Unknown'; + } + + if ( $startTs ) { + $startTs = wfTimestamp( TS_UNIX, $startTs ); + } + + if ( $endTs ) { + $endTs = wfTimestamp( TS_UNIX, $endTs ); + } + if ( $changeTs ) { + $changeTs = wfTimestamp( TS_UNIX, $changeTs ); + $changeDate = $lang->sprintfDate( $iso, wfTimestamp( TS_MW, $changeTs ) ); + } else { + $changeDate = 'Unknown'; + } + + if ( $changeTs ) { + if ( $wikiTs > $startTs && $changeTs <= $endTs ) { + $this->reportProgress( ' →Changed in wiki after export: IGNORE', $page ); + continue; + } elseif ( !$wikiTs || ( $changeTs > $endTs && $wikiTs < $startTs ) ) { + $this->reportProgress( ' →Changed in source after export: IMPORT', $page ); + $this->import( + $title, + $translation, + 'Updating translation from external source' + ); + continue; + } + } + + if ( !$this->interactive ) { + continue; + } + + $this->reportProgress( ' →Needs manual resolution', $page ); + $this->reportProgress( "Source translation at $changeDate:", 'source' ); + $this->reportProgress( $this->color( 'blue', $translation ), 'source' ); + $this->reportProgress( "Wiki translation at $wikiDate:", 'translation' ); + $this->reportProgress( $this->color( 'green', $current ), 'translation' ); + + do { + $this->reportProgress( 'Resolution: [S]kip [I]mport [C]onflict: ', 'foo' ); + // @todo Find an elegant way to use Maintenance::readconsole(). + $action = fgets( STDIN ); + $action = strtoupper( trim( $action ) ); + + if ( $action === 'S' ) { + break; + } + + if ( $action === 'I' ) { + $this->import( + $title, + $translation, + 'Updating translation from external source' + ); + break; + } + + if ( $action === 'C' ) { + $this->import( + $title, + TRANSLATE_FUZZY . $translation, + 'Edit conflict between wiki and source' + ); + break; + } + } while ( true ); + } + } + + /** + * Colors text for shell output + * @param string $color Either blue, green or bold. + * @param string $text + * @return string + */ + public function color( $color, $text ) { + switch ( $color ) { + case 'blue': + return "\033[1;34m$text\033[0m"; + case 'green': + return "\033[1;32m$text\033[0m"; + case 'bold': + return "\033[1m$text\033[0m"; + default: + return $text; + } + } + + /** + * Try to identify when the translation was last changed in the wiki. + * @param Title $title Title of the page which contains translation. + * @param int|bool $startTs Timestamp how far back to go before giving up. + * @return int|bool Timestamp or false. + */ + public function getLastGoodChange( $title, $startTs = false ) { + global $wgTranslateFuzzyBotName; + + $wikiTs = false; + $revision = Revision::newFromTitle( $title ); + while ( $revision ) { + // No need to go back further + if ( $startTs && $wikiTs && ( $wikiTs < $startTs ) ) { + break; + } + + if ( $revision->getUserText( Revision::RAW ) === $wgTranslateFuzzyBotName ) { + $revision = $revision->getPrevious(); + continue; + } + + $wikiTs = wfTimestamp( TS_UNIX, $revision->getTimestamp() ); + break; + } + + return $wikiTs; + } + + /** + * Does the actual edit. + * @param Title $title + * @param string $translation + * @param string $comment Edit summary. + */ + public function import( $title, $translation, $comment ) { + $flags = EDIT_FORCE_BOT; + if ( $this->norc ) { + $flags |= EDIT_SUPPRESS_RC; + } + + $this->reportProgress( "Importing {$title->getPrefixedText()}: ", $title ); + + $wikipage = new WikiPage( $title ); + $content = ContentHandler::makeContent( $translation, $title ); + $status = $wikipage->doEditContent( + $content, + $comment, + $flags, + false, + FuzzyBot::getUser() + ); + + $success = $status === true || ( is_object( $status ) && $status->isOK() ); + $this->reportProgress( $success ? 'OK' : 'FAILED', $title ); + } +} + +$maintClass = SyncGroup::class; +require_once RUN_MAINTENANCE_IF_MAIN; diff --git a/www/wiki/extensions/Translate/scripts/test-mt.php b/www/wiki/extensions/Translate/scripts/test-mt.php new file mode 100644 index 00000000..c335e734 --- /dev/null +++ b/www/wiki/extensions/Translate/scripts/test-mt.php @@ -0,0 +1,92 @@ +<?php +/** + * Script to test web services from the command line + * + * @author Niklas Laxström + * @license GPL-2.0-or-later + * @file + */ + +// Standard boilerplate to define $IP +if ( getenv( 'MW_INSTALL_PATH' ) !== false ) { + $IP = getenv( 'MW_INSTALL_PATH' ); +} else { + $dir = __DIR__; + $IP = "$dir/../../.."; +} +require_once "$IP/maintenance/Maintenance.php"; + +class TestMT extends Maintenance { + public function __construct() { + parent::__construct(); + $this->mDescription = 'Test webservices.'; + + $this->addOption( + 'service', + 'Which service to use', + true, /*required*/ + true /*has arg*/ + ); + + $this->addOption( + 'from', + 'Source language tag', + true, /*required*/ + true /*has arg*/ + ); + + $this->addOption( + 'to', + 'Target language tag', + true, /*required*/ + true /*has arg*/ + ); + + $this->addArg( + 'text', + 'Text to translate', + true /*required*/ + ); + } + + public function execute() { + global $wgTranslateTranslationServices; + + $name = $this->getOption( 'service' ); + + if ( !isset( $wgTranslateTranslationServices[ $name ] ) ) { + $this->fatalError( "Unknown service.\n" ); + } + + $service = TranslationWebService::factory( $name, $wgTranslateTranslationServices[ $name ] ); + $service->setLogger( new TranslateCliLogger( function ( $msg ) { + $this->output( "$msg\n" ); + } ) ); + + $from = $this->getOption( 'from' ); + $to = $this->getOption( 'to' ); + $text = $this->getArg( 0 ); + + if ( !$service->isSupportedLanguagePair( $from, $to ) ) { + $this->fatalError( "Unsupported language pair.\n" ); + } + + $query = $service->getQueries( $text, $from, $to ); + if ( $query === [] ) { + $this->fatalError( "Service query error.\n" ); + } + + $agg = new QueryAggregator(); + $id = $agg->addQuery( $query[ 0 ] ); + $agg->run(); + $res = $agg->getResponse( $id ); + if ( $res === null ) { + $this->fatalError( "Service response error.\n" ); + } + + $this->output( $service->getResultData( $res ), 1 ); + } +} + +$maintClass = TestMT::class; +require_once RUN_MAINTENANCE_IF_MAIN; diff --git a/www/wiki/extensions/Translate/scripts/translator-stats-process.php b/www/wiki/extensions/Translate/scripts/translator-stats-process.php new file mode 100644 index 00000000..72cbbef2 --- /dev/null +++ b/www/wiki/extensions/Translate/scripts/translator-stats-process.php @@ -0,0 +1,119 @@ +<?php +/** + * Script to gather translator stats. + * + * @author Niklas Laxström + * @license GPL-2.0-or-later + * @file + */ + +// Standard boilerplate to define $IP +if ( getenv( 'MW_INSTALL_PATH' ) !== false ) { + $IP = getenv( 'MW_INSTALL_PATH' ); +} else { + $dir = __DIR__; + $IP = "$dir/../../.."; +} +require_once "$IP/maintenance/Maintenance.php"; + +class TSP extends Maintenance { + public function __construct() { + parent::__construct(); + $this->mDescription = 'Script to calculate monthly stats about tsv data produced ' . + 'by translator-stats.php.'; + $this->addArg( + 'file', + 'tsv file to process' + ); + } + + protected function median( $a ) { + sort( $a ); + $len = count( $a ); + if ( $len === 0 ) { + return 0; + } elseif ( $len === 1 ) { + return $a[0]; + } elseif ( $len % 2 === 0 ) { + return $a[$len / 2]; + } else { + return ( $a[floor( $len / 2 )] + $a[ceil( $len / 2 )] ) / 2; + } + } + + public function execute() { + $handle = fopen( $this->getArg( 0 ), 'r' ); + // remove heading + fgets( $handle ); + + $data = []; + while ( true ) { + $l = fgets( $handle ); + if ( $l === false ) { + break; + } + + $fields = explode( "\t", trim( $l, "\n" ) ); + list( $name, $reg, $edits, $translator, $promoted, $method ) = $fields; + $month = substr( $reg, 0, 4 ) . '-' . substr( $reg, 4, 2 ) . '-01'; + $data[$month][] = $fields; + } + + fclose( $handle ); + + ksort( $data ); + + echo "period\tnew\tpromoted\tgood\tmedian promotion time\t" . + "avg promotion time\tsandbox approval rate\n"; + + foreach ( $data as $key => $period ) { + $total = 0; + $promoted = 0; + $good = 0; + $delay = []; + $avg = 'N/A'; + $sbar = []; + + foreach ( $period as $p ) { + list( $name, $reg, $edits, $translator, $promtime, $method ) = $p; + $total++; + if ( $translator === 'translator' ) { + $promoted++; + } + + if ( $edits > 100 ) { + $good++; + } + + if ( $promtime ) { + $delay[] = wfTimestamp( TS_UNIX, $promtime ) - wfTimestamp( TS_UNIX, $reg ); + } + + if ( $method === 'sandbox' ) { + if ( $promtime ) { + $sbar[] = true; + } else { + $sbar[] = false; + } + } + + } + + $median = round( $this->median( $delay ) / 3600 ); + if ( count( $delay ) ) { + $avg = round( array_sum( $delay ) / count( $delay ) / 3600 ); + } + + if ( $sbar === [] ) { + $sbar = 'N/A'; + } else { + $sbar = count( array_filter( $sbar ) ) / count( $sbar ); + } + + echo "$key\t$total\t$promoted\t$good\t$median\t$avg\t$sbar\n"; + } + } +} + +$maintClass = TSP::class; +require_once RUN_MAINTENANCE_IF_MAIN; diff --git a/www/wiki/extensions/Translate/scripts/translator-stats.php b/www/wiki/extensions/Translate/scripts/translator-stats.php new file mode 100644 index 00000000..1847f46d --- /dev/null +++ b/www/wiki/extensions/Translate/scripts/translator-stats.php @@ -0,0 +1,131 @@ +<?php +/** + * Script to gather translator stats. + * + * @author Niklas Laxström + * @license GPL-2.0-or-later + * @file + */ + +// Standard boilerplate to define $IP +if ( getenv( 'MW_INSTALL_PATH' ) !== false ) { + $IP = getenv( 'MW_INSTALL_PATH' ); +} else { + $dir = __DIR__; + $IP = "$dir/../../.."; +} +require_once "$IP/maintenance/Maintenance.php"; + +class TS extends Maintenance { + public function __construct() { + parent::__construct(); + $this->mDescription = 'Script to gather translator stats in tsv format. ' . + 'You can further process the output with translate-stats-process.php'; + } + + public function execute() { + global $wgDisableUserGroupExpiry; + + $dbr = wfGetDB( DB_REPLICA ); + $users = $dbr->select( + [ 'user', 'user_groups' ], + [ + 'user_name', + 'user_registration', + 'user_editcount', + 'ug_group', + ], + [ + 'user_registration is not null' + ], + __METHOD__, + [ + 'ORDER BY' => 'user_id ASC', + ], + [ + 'user_groups' => [ + 'LEFT JOIN', + [ + 'user_id=ug_user', + 'ug_group' => 'translator', + ( isset( $wgDisableUserGroupExpiry ) && !$wgDisableUserGroupExpiry ) ? + 'ug_expiry IS NULL OR ug_expiry >= ' . $dbr->addQuotes( $dbr->timestamp() ) : + '' + ] + ] + ] + ); + + echo "username\tregistration ts\tedit count\tis translator?\tpromoted ts\tmethod\n"; + + $rejected = $dbr->select( + [ 'logging' ], + [ + 'log_title', + 'log_timestamp', + ], + [ + 'log_type' => 'translatorsandbox', + 'log_action' => 'rejected', + ], + __METHOD__ + ); + + foreach ( $rejected as $r ) { + echo "{$r->log_title}\t{$r->log_timestamp}\t0\t\t\tsandbox\n"; + } + + foreach ( $users as $u ) { + $logs = $dbr->select( + 'logging', + [ + 'log_type', + 'log_action', + 'log_timestamp', + 'log_params', + ], + [ + 'log_title' => $u->user_name, + 'log_type' => [ 'rights', 'translatorsandbox' ], + ], + __METHOD__, + [ + 'ORDER BY' => 'log_id ASC', + ] + ); + + $promoted = null; + $method = 'normal'; + foreach ( $logs as $log ) { + if ( $log->log_action === 'promoted' ) { + $promoted = $log->log_timestamp; + $method = 'sandbox'; + break; + } elseif ( $log->log_action === 'rights' ) { + Wikimedia\suppressWarnings(); + $data = unserialize( $log->log_params ); + Wikimedia\restoreWarnings(); + if ( $data === false ) { + $lines = explode( "\n", $log->log_params ); + if ( strpos( $lines[1], 'translator' ) !== false ) { + $promoted = $log->log_timestamp; + break; + } + } elseif ( + isset( $data['5::newgroups'] ) && + in_array( 'translator', $data['5::newgroups'] ) + ) { + $promoted = $log->log_timestamp; + break; + } + } + } + + echo "{$u->user_name}\t{$u->user_registration}\t{$u->user_editcount}" . + "\t{$u->ug_group}\t{$promoted}\t{$method}\n"; + } + } +} + +$maintClass = TS::class; +require_once RUN_MAINTENANCE_IF_MAIN; diff --git a/www/wiki/extensions/Translate/scripts/ttmserver-export.php b/www/wiki/extensions/Translate/scripts/ttmserver-export.php new file mode 100644 index 00000000..64970ec2 --- /dev/null +++ b/www/wiki/extensions/Translate/scripts/ttmserver-export.php @@ -0,0 +1,224 @@ +<?php +/** + * Script to bootstrap TTMServer translation memory + * + * @author Niklas Laxström + * @license GPL-2.0-or-later + * @file + */ + +// Standard boilerplate to define $IP +if ( getenv( 'MW_INSTALL_PATH' ) !== false ) { + $IP = getenv( 'MW_INSTALL_PATH' ); +} else { + $dir = __DIR__; + $IP = "$dir/../../.."; +} +require_once "$IP/maintenance/Maintenance.php"; + +/** + * Script to bootstrap TTMServer translation memory. + * @since 2012-01-26 + */ +class TTMServerBootstrap extends Maintenance { + /** + * @var bool Option for reindexing + */ + protected $reindex; + + public function __construct() { + parent::__construct(); + $this->mDescription = 'Script to bootstrap TTMServer.'; + $this->addOption( + 'threads', + '(optional) Number of threads', + /*required*/false, + /*has arg*/true + ); + $this->addOption( + 'ttmserver', + '(optional) Server configuration identifier', + /*required*/false, + /*has arg*/true + ); + // This option erases all data, empties the index and rebuilds it. + $this->addOption( + 'reindex', + 'Update the index mapping. Warning: Clears all existing data in the index.' + ); + $this->setBatchSize( 500 ); + $this->start = microtime( true ); + } + + public function statusLine( $text, $channel = null ) { + $pid = sprintf( '%5s', getmypid() ); + $prefix = sprintf( '%6.2f', microtime( true ) - $this->start ); + $mem = sprintf( '%5.1fM', ( memory_get_usage( true ) / ( 1024 * 1024 ) ) ); + $this->output( "$pid $prefix $mem $text", $channel ); + } + + public function execute() { + global $wgTranslateTranslationServices, + $wgTranslateTranslationDefaultService; + + $configKey = $this->getOption( 'ttmserver', $wgTranslateTranslationDefaultService ); + if ( !isset( $wgTranslateTranslationServices[$configKey] ) ) { + $this->error( 'Translation memory is not configured properly', 1 ); + } + + $config = $wgTranslateTranslationServices[$configKey]; + $this->reindex = $this->getOption( 'reindex', false ); + + // Do as little as possible in the main thread, to not clobber forked processes. + // See also #resetStateForFork. + $pid = pcntl_fork(); + if ( $pid === 0 ) { + $this->resetStateForFork(); + $this->beginBootStrap( $config ); + exit(); + } elseif ( $pid === -1 ) { + // Fork failed do it serialized + $this->beginBootStrap( $config ); + } else { + // Main thread + $this->statusLine( "Forked thread $pid to handle bootstrapping\n" ); + $status = 0; + pcntl_waitpid( $pid, $status ); + // beginBootStrap probably failed, give up. + if ( $status !== 0 ) { + $this->error( 'Boostrap failed.', 1 ); + } + } + + $threads = $this->getOption( 'threads', 1 ); + $pids = []; + + $groups = MessageGroups::singleton()->getGroups(); + foreach ( $groups as $id => $group ) { + /** @var MessageGroup $group */ + if ( $group->isMeta() ) { + continue; + } + + // Fork to increase speed with parallelism. Also helps with memory usage if there are leaks. + $pid = pcntl_fork(); + + if ( $pid === 0 ) { + $this->resetStateForFork(); + $this->exportGroup( $group, $config ); + exit(); + } elseif ( $pid === -1 ) { + // Fork failed do it serialized + $this->exportGroup( $group, $config ); + } else { + // Main thread + $this->statusLine( "Forked thread $pid to handle $id\n" ); + $pids[$pid] = true; + + // If we hit the thread limit, wait for any child to finish. + if ( count( $pids ) >= $threads ) { + $status = 0; + $pid = pcntl_wait( $status ); + unset( $pids[$pid] ); + } + } + } + + // Return control after all threads have finished. + foreach ( array_keys( $pids ) as $pid ) { + $status = 0; + pcntl_waitpid( $pid, $status ); + } + + // It's okay to do this in the main thread as it is the last thing + $this->endBootstrap( $config ); + } + + protected function beginBootStrap( $config ) { + $server = TTMServer::factory( $config ); + $server->setLogger( $this ); + if ( $server->isFrozen() ) { + $this->error( "The service is frozen, giving up.", 1 ); + } + $this->statusLine( "Cleaning up old entries...\n" ); + if ( $this->reindex ) { + $server->doMappingUpdate(); + } + $server->beginBootstrap(); + } + + protected function endBootstrap( $config ) { + $this->statusLine( "Optimizing...\n" ); + $server = TTMServer::factory( $config ); + $server->setLogger( $this ); + $server->endBootstrap(); + } + + protected function exportGroup( MessageGroup $group, $config ) { + $server = TTMServer::factory( $config ); + $server->setLogger( $this ); + + $id = $group->getId(); + $sourceLanguage = $group->getSourceLanguage(); + + $stats = MessageGroupStats::forGroup( $id ); + + $collection = $group->initCollection( $sourceLanguage ); + $collection->filter( 'ignored' ); + $collection->initMessages(); + + $server->beginBatch(); + + $inserts = []; + foreach ( $collection->keys() as $mkey => $title ) { + $handle = new MessageHandle( $title ); + $inserts[] = [ $handle, $sourceLanguage, $collection[$mkey]->definition() ]; + } + + while ( $inserts !== [] ) { + $batch = array_splice( $inserts, 0, $this->mBatchSize ); + $server->batchInsertDefinitions( $batch ); + } + + $inserts = []; + foreach ( $stats as $targetLanguage => $numbers ) { + if ( $targetLanguage === $sourceLanguage ) { + continue; + } + if ( $numbers[MessageGroupStats::TRANSLATED] === 0 ) { + continue; + } + + $collection->resetForNewLanguage( $targetLanguage ); + $collection->filter( 'ignored' ); + $collection->filter( 'translated', false ); + $collection->loadTranslations(); + + foreach ( $collection->keys() as $mkey => $title ) { + $handle = new MessageHandle( $title ); + $inserts[] = [ $handle, $sourceLanguage, $collection[$mkey]->translation() ]; + } + + while ( count( $inserts ) >= $this->mBatchSize ) { + $batch = array_splice( $inserts, 0, $this->mBatchSize ); + $server->batchInsertTranslations( $batch ); + } + } + + while ( $inserts !== [] ) { + $batch = array_splice( $inserts, 0, $this->mBatchSize ); + $server->batchInsertTranslations( $batch ); + } + + $server->endBatch(); + } + + protected function resetStateForFork() { + // Make sure all existing connections are dead, + // we can't use them in forked children. + MediaWiki\MediaWikiServices::resetChildProcessServices(); + } +} + +$maintClass = TTMServerBootstrap::class; +require_once RUN_MAINTENANCE_IF_MAIN; diff --git a/www/wiki/extensions/Translate/scripts/yaml-tests.php b/www/wiki/extensions/Translate/scripts/yaml-tests.php new file mode 100644 index 00000000..a1f9d961 --- /dev/null +++ b/www/wiki/extensions/Translate/scripts/yaml-tests.php @@ -0,0 +1,96 @@ +<?php +/** + * Script for comparing supported YAML parser implementations + * + * @author Niklas Laxström + * + * @copyright Copyright © 2010, Niklas Laxström + * @license GPL-2.0-or-later + * @file + */ + +// Standard boilerplate to define $IP +if ( getenv( 'MW_INSTALL_PATH' ) !== false ) { + $IP = getenv( 'MW_INSTALL_PATH' ); +} else { + $dir = __DIR__; + $IP = "$dir/../../.."; +} +require_once "$IP/maintenance/Maintenance.php"; + +class YamlTests extends Maintenance { + public function __construct() { + parent::__construct(); + $this->mDescription = 'Script for comparing supported YAML parser implementations.'; + } + + public function execute() { + global $wgTranslateGroupFiles, $wgTranslateYamlLibrary; + $documents = []; + $times = []; + $mems = []; + $mempeaks = []; + + foreach ( [ 'syck', 'spyc', 'phpyaml' ] as $driver ) { + $mempeaks[$driver] = -memory_get_peak_usage( true ); + $mems[$driver] = -memory_get_usage( true ); + $times[$driver] = -microtime( true ); + $wgTranslateYamlLibrary = $driver; + $documents[$driver] = []; + foreach ( $wgTranslateGroupFiles as $file ) { + foreach ( self::parseGroupFile( $file ) as $id => $docu ) { + $documents[$driver]["$file-$id"] = $docu; + } + } + + $times[$driver] += microtime( true ); + $mems[$driver] += memory_get_usage( true ); + $mempeaks[$driver] += memory_get_peak_usage( true ); + + self::sortNestedArrayAssoc( $documents[$driver] ); + file_put_contents( "yaml-test-$driver.txt", var_export( $documents[$driver], true ) ); + file_put_contents( "yaml-output-$driver.txt", TranslateYaml::dump( $documents[$driver] ) ); + } + var_dump( $times ); + var_dump( $mems ); + var_dump( $mempeaks ); + } + + public static function parseGroupFile( $filename ) { + $data = file_get_contents( $filename ); + $documents = preg_split( "/^---$/m", $data, -1, PREG_SPLIT_NO_EMPTY ); + $groups = []; + $template = false; + foreach ( $documents as $document ) { + $document = TranslateYaml::loadString( $document ); + if ( isset( $document['TEMPLATE'] ) ) { + $template = $document['TEMPLATE']; + } else { + if ( !isset( $document['BASIC']['id'] ) ) { + trigger_error( 'No path ./BASIC/id (group id not defined) ' . + "in yaml document located in $filename" ); + continue; + } + $groups[$document['BASIC']['id']] = $document; + } + } + + foreach ( $groups as $i => $group ) { + $groups[$i] = MessageGroupConfigurationParser::mergeTemplate( $template, $group ); + } + + return $groups; + } + + public static function sortNestedArrayAssoc( &$a ) { + ksort( $a ); + foreach ( $a as &$value ) { + if ( is_array( $value ) ) { + self::sortNestedArrayAssoc( $value ); + } + } + } +} + +$maintClass = YamlTests::class; +require_once RUN_MAINTENANCE_IF_MAIN; |