summaryrefslogtreecommitdiff
path: root/www/wiki/extensions/Translate/scripts/characterEditStats.php
blob: 45b6372eaa3d20056c23f915c2f62ed789fd475b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
<?php
/**
 * Show number of characters translated over a given period of time.
 *
 * @author Santhosh Thottingal
 * @copyright Copyright © 2013 Santhosh Thottingal
 * @license GPL-2.0-or-later
 * @file
 * @ingroup Script Stats
 */

// Standard boilerplate to define $IP
if ( getenv( 'MW_INSTALL_PATH' ) !== false ) {
	$IP = getenv( 'MW_INSTALL_PATH' );
} else {
	$dir = __DIR__;
	$IP = "$dir/../../..";
}
require_once "$IP/maintenance/Maintenance.php";

class CharacterEditStats extends Maintenance {
	public function __construct() {
		parent::__construct();
		$this->mDescription = 'Script to show number of characters translated .';
		$this->addOption(
			'top',
			'(optional) Show given number of language codes (default: show all)',
			false, /*required*/
			true /*has arg*/
		);
		$this->addOption(
			'days',
			'(optional) Calculate for given number of days (default: 30)',
			false, /*required*/
			true /*has arg*/
		);
		$this->addOption(
			'ns',
			'(optional) Comma separated list of namespace IDs',
			false, /*required*/
			true /*has arg*/
		);
	}

	public function execute() {
		global $wgTranslateFuzzyBotName, $wgSitename, $wgTranslateMessageNamespaces;

		$days = (int)$this->getOption( 'days', 30 );
		$top = (int)$this->getOption( 'top', -1 );

		$namespaces = [];
		if ( $this->hasOption( 'ns' ) ) {
			$input = explode( ',', $this->getOption( 'ns' ) );

			foreach ( $input as $namespace ) {
				if ( is_numeric( $namespace ) ) {
					$namespaces[] = $namespace;
				}
			}
		} else {
			$namespaces = $wgTranslateMessageNamespaces;
		}

		// Select set of edits to report on
		$rows = self::getRevisionsFromHistory( $days, $namespaces );

		// Get counts for edits per language code after filtering out edits by FuzzyBot
		$codes = [];

		foreach ( $rows as $_ ) {
			// Filter out edits by $wgTranslateFuzzyBotName
			if ( $_->user_text === $wgTranslateFuzzyBotName ) {
				continue;
			}

			$handle = new MessageHandle( Title::newFromText( $_->title ) );
			$code = $handle->getCode();

			if ( !isset( $codes[$code] ) ) {
				$codes[$code] = 0;
			}

			$codes[$code] += $_->length;
		}

		// Sort counts and report descending up to $top rows.
		arsort( $codes );
		$i = 0;
		$total = 0;
		$this->output( "Character edit stats for last $days days in $wgSitename\n" );
		$this->output( "code\tname\tedit\n" );
		$this->output( "-----------------------\n" );
		foreach ( $codes as $code => $num ) {
			if ( $i++ === $top ) {
				break;
			}
			$language = Language::fetchLanguageName( $code );
			if ( !$language ) {
				// this will be very rare, but avoid division by zero in next line
				continue;
			}
			$charRatio = mb_strlen( $language, 'UTF-8' ) / strlen( $language );
			$num = (int)( $num * $charRatio );
			$total += $num;
			$this->output( "$code\t$language\t$num\n" );
		}
		$this->output( "-----------------------\n" );
		$this->output( "Total\t\t$total\n" );
	}

	private function getRevisionsFromHistory( $days, array $namespaces ) {
		$dbr = wfGetDB( DB_REPLICA );
		$cutoff = $dbr->addQuotes( $dbr->timestamp( time() - $days * 24 * 3600 ) );

		// The field renames are to be compatible with recentchanges table query
		if ( is_callable( Revision::class, 'getQueryInfo' ) ) {
			$revQuery = Revision::getQueryInfo( [ 'page' ] );
			$revUserText = $revQuery['fields']['rev_user_text'] ?? 'rev_user_text';
		} else {
			$revQuery = [
				'tables' => [ 'revision', 'page' ],
				'joins' => [
					'page' => [ 'JOIN', 'rev_page = page_id' ],
				]
			];
			$revUserText = 'rev_user_text';
		}
		$conds = [
			"rev_timestamp > $cutoff",
			'page_namespace' => $namespaces,
		];

		$res = $dbr->select(
			$revQuery['tables'],
			[
				'title' => 'page_title',
				'user_text' => $revUserText,
				'length' => 'rev_len',
			],
			$conds,
			__METHOD__,
			[],
			$revQuery['joins']
		);
		return iterator_to_array( $res );
	}
}

$maintClass = CharacterEditStats::class;
require_once RUN_MAINTENANCE_IF_MAIN;