summaryrefslogtreecommitdiff
path: root/www/wiki/extensions/Renameuser/RenameUserJob.php
blob: 38c8bd4b992b715ad650ac8e225836b2fefe1421 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
<?php

/**
 * Custom job to perform updates on tables in busier environments
 *
 * Job parameters include:
 *   - table     : DB table to update
 *   - column    : The *_user_text column to update
 *   - oldname   : The old user name
 *   - newname   : The new user name
 *   - count     : The expected number of rows to update in this batch
 *   - logId     : The ID of the logging table row expected to exist if the rename was committed
 *
 * Additionally, one of the following groups of parameters must be set:
 * a) The timestamp based rename paramaters:
 *   - timestampColumn : The *_timestamp column
 *   - minTimestamp    : The minimum bound of the timestamp column range for this batch
 *   - maxTimestamp    : The maximum bound of the timestamp column range for this batch
 *   - uniqueKey       : A column that is unique (preferrably the PRIMARY KEY) [optional]
 * b) The unique key based rename paramaters:
 *   - uniqueKey : A column that is unique (preferrably the PRIMARY KEY)
 *   - keyId     : A list of values for this column to determine rows to update for this batch
 *
 * To avoid some race conditions, the following parameters should be set:
 *   - userID    : The ID of the user to update
 *   - uidColumn : The *_user_id column
 */
class RenameUserJob extends Job {
	public function __construct( Title $title, $params = [], $id = 0 ) {
		parent::__construct( 'renameUser', $title, $params, $id );
	}

	public function run() {
		global $wgUpdateRowsPerQuery;

		$table = $this->params['table'];
		$column = $this->params['column'];
		$oldname = $this->params['oldname'];
		$newname = $this->params['newname'];
		$count = $this->params['count'];
		if ( isset( $this->params['userID'] ) ) {
			$userID = $this->params['userID'];
			$uidColumn = $this->params['uidColumn'];
		} else {
			$userID = null;
			$uidColumn = null;
		}
		if ( isset( $this->params['timestampColumn'] ) ) {
			$timestampColumn = $this->params['timestampColumn'];
			$minTimestamp = $this->params['minTimestamp'];
			$maxTimestamp = $this->params['maxTimestamp'];
		} else {
			$timestampColumn = null;
			$minTimestamp = null;
			$maxTimestamp = null;
		}
		$uniqueKey = isset( $this->params['uniqueKey'] ) ? $this->params['uniqueKey'] : null;
		$keyId = isset( $this->params['keyId'] ) ? $this->params['keyId'] : null;
		$logId = isset( $this->params['logId'] ) ? $this->params['logId'] : null;

		$dbw = wfGetDB( DB_MASTER );
		if ( $logId ) {
			# Block until the transaction that inserted this job commits.
			# The atomic section is for sanity as FOR UPDATE does not lock in auto-commit mode
			# per http://dev.mysql.com/doc/refman/5.7/en/innodb-locking-reads.html.
			$dbw->startAtomic( __METHOD__ );
			$committed = $dbw->selectField( 'logging',
				'1',
				[ 'log_id' => $logId ],
				__METHOD__,
				[ 'FOR UPDATE' ]
			);
			$dbw->endAtomic( __METHOD__ );
			# If the transaction inserting this job was rolled back, detect that
			if ( $committed === false ) { // rollback happened?
				throw new LogicException( 'Cannot run job if the account rename failed.' );
			}
		}

		# Flush any state snapshot data (and release the lock above)
		$dbw->commit( __METHOD__, 'flush' );

		# Conditions like "*_user_text = 'x'
		$conds = [ $column => $oldname ];
		# If user ID given, add that to condition to avoid rename collisions
		if ( $userID !== null ) {
			$conds[$uidColumn] = $userID;
		}
		# Bound by timestamp if given
		if ( $timestampColumn !== null ) {
			$conds[] = "$timestampColumn >= " . $dbw->addQuotes( $minTimestamp );
			$conds[] = "$timestampColumn <= " . $dbw->addQuotes( $maxTimestamp );
		# Bound by unique key if given (B/C)
		} elseif ( $uniqueKey !== null && $keyId !== null ) {
			$conds[$uniqueKey] = $keyId;
		} else {
			throw new InvalidArgumentException( 'Expected ID batch or time range' );
		}

		$affectedCount = 0;
		# Actually update the rows for this job...
		if ( $uniqueKey !== null ) {
			# Select the rows to update by PRIMARY KEY
			$ids = $dbw->selectFieldValues( $table, $uniqueKey, $conds, __METHOD__ );
			# Update these rows by PRIMARY KEY to avoid slave lag
			foreach ( array_chunk( $ids, $wgUpdateRowsPerQuery ) as $batch ) {
				$dbw->commit( __METHOD__, 'flush' );
				wfWaitForSlaves();

				$dbw->update( $table,
					[ $column => $newname ],
					[ $column => $oldname, $uniqueKey => $batch ],
					__METHOD__
				);
				$affectedCount += $dbw->affectedRows();
			}
		} else {
			# Update the chunk of rows directly
			$dbw->update( $table,
				[ $column => $newname ],
				$conds,
				__METHOD__
			);
			$affectedCount += $dbw->affectedRows();
		}

		# Special case: revisions may be deleted while renaming...
		if ( $affectedCount < $count && $table === 'revision' && $timestampColumn !== null ) {
			# If some revisions were not renamed, they may have been deleted.
			# Do a pass on the archive table to get these straglers...
			$ids = $dbw->selectFieldValues(
				'archive',
				'ar_id',
				[
					'ar_user_text' => $oldname,
					'ar_user' => $userID,
					// No user,rev_id index, so use timestamp to bound
					// the rows. This can use the user,timestamp index.
					"ar_timestamp >= '$minTimestamp'",
					"ar_timestamp <= '$maxTimestamp'"
				],
				__METHOD__
			);
			foreach ( array_chunk( $ids, $wgUpdateRowsPerQuery ) as $batch ) {
				$dbw->commit( __METHOD__, 'flush' );
				wfWaitForSlaves();

				$dbw->update(
					'archive',
					[ 'ar_user_text' => $newname ],
					[ 'ar_user_text' => $oldname, 'ar_id' => $batch ],
					__METHOD__
				);
			}
		}
		# Special case: revisions may be restored while renaming...
		if ( $affectedCount < $count && $table === 'archive' && $timestampColumn !== null ) {
			# If some revisions were not renamed, they may have been restored.
			# Do a pass on the revision table to get these straglers...
			$ids = $dbw->selectFieldValues(
				'revision',
				'rev_id',
				[
					'rev_user_text' => $oldname,
					'rev_user' => $userID,
					// No user,rev_id index, so use timestamp to bound
					// the rows. This can use the user,timestamp index.
					"rev_timestamp >= '$minTimestamp'",
					"rev_timestamp <= '$maxTimestamp'"
				],
				__METHOD__
			);
			foreach ( array_chunk( $ids, $wgUpdateRowsPerQuery ) as $batch ) {
				$dbw->commit( __METHOD__, 'flush' );
				wfWaitForSlaves();

				$dbw->update(
					'revision',
					[ 'rev_user_text' => $newname ],
					[ 'rev_user_text' => $oldname, 'rev_id' => $batch ],
					__METHOD__
				);
			}
		}

		return true;
	}
}