summaryrefslogtreecommitdiff
path: root/www/wiki/extensions/Renameuser/RenameUserJob.php
diff options
context:
space:
mode:
authorYaco <franco@reevo.org>2020-06-04 11:01:00 -0300
committerYaco <franco@reevo.org>2020-06-04 11:01:00 -0300
commitfc7369835258467bf97eb64f184b93691f9a9fd5 (patch)
treedaabd60089d2dd76d9f5fb416b005fbe159c799d /www/wiki/extensions/Renameuser/RenameUserJob.php
first commit
Diffstat (limited to 'www/wiki/extensions/Renameuser/RenameUserJob.php')
-rw-r--r--www/wiki/extensions/Renameuser/RenameUserJob.php188
1 files changed, 188 insertions, 0 deletions
diff --git a/www/wiki/extensions/Renameuser/RenameUserJob.php b/www/wiki/extensions/Renameuser/RenameUserJob.php
new file mode 100644
index 00000000..38c8bd4b
--- /dev/null
+++ b/www/wiki/extensions/Renameuser/RenameUserJob.php
@@ -0,0 +1,188 @@
+<?php
+
+/**
+ * Custom job to perform updates on tables in busier environments
+ *
+ * Job parameters include:
+ * - table : DB table to update
+ * - column : The *_user_text column to update
+ * - oldname : The old user name
+ * - newname : The new user name
+ * - count : The expected number of rows to update in this batch
+ * - logId : The ID of the logging table row expected to exist if the rename was committed
+ *
+ * Additionally, one of the following groups of parameters must be set:
+ * a) The timestamp based rename paramaters:
+ * - timestampColumn : The *_timestamp column
+ * - minTimestamp : The minimum bound of the timestamp column range for this batch
+ * - maxTimestamp : The maximum bound of the timestamp column range for this batch
+ * - uniqueKey : A column that is unique (preferrably the PRIMARY KEY) [optional]
+ * b) The unique key based rename paramaters:
+ * - uniqueKey : A column that is unique (preferrably the PRIMARY KEY)
+ * - keyId : A list of values for this column to determine rows to update for this batch
+ *
+ * To avoid some race conditions, the following parameters should be set:
+ * - userID : The ID of the user to update
+ * - uidColumn : The *_user_id column
+ */
+class RenameUserJob extends Job {
+ public function __construct( Title $title, $params = [], $id = 0 ) {
+ parent::__construct( 'renameUser', $title, $params, $id );
+ }
+
+ public function run() {
+ global $wgUpdateRowsPerQuery;
+
+ $table = $this->params['table'];
+ $column = $this->params['column'];
+ $oldname = $this->params['oldname'];
+ $newname = $this->params['newname'];
+ $count = $this->params['count'];
+ if ( isset( $this->params['userID'] ) ) {
+ $userID = $this->params['userID'];
+ $uidColumn = $this->params['uidColumn'];
+ } else {
+ $userID = null;
+ $uidColumn = null;
+ }
+ if ( isset( $this->params['timestampColumn'] ) ) {
+ $timestampColumn = $this->params['timestampColumn'];
+ $minTimestamp = $this->params['minTimestamp'];
+ $maxTimestamp = $this->params['maxTimestamp'];
+ } else {
+ $timestampColumn = null;
+ $minTimestamp = null;
+ $maxTimestamp = null;
+ }
+ $uniqueKey = isset( $this->params['uniqueKey'] ) ? $this->params['uniqueKey'] : null;
+ $keyId = isset( $this->params['keyId'] ) ? $this->params['keyId'] : null;
+ $logId = isset( $this->params['logId'] ) ? $this->params['logId'] : null;
+
+ $dbw = wfGetDB( DB_MASTER );
+ if ( $logId ) {
+ # Block until the transaction that inserted this job commits.
+ # The atomic section is for sanity as FOR UPDATE does not lock in auto-commit mode
+ # per http://dev.mysql.com/doc/refman/5.7/en/innodb-locking-reads.html.
+ $dbw->startAtomic( __METHOD__ );
+ $committed = $dbw->selectField( 'logging',
+ '1',
+ [ 'log_id' => $logId ],
+ __METHOD__,
+ [ 'FOR UPDATE' ]
+ );
+ $dbw->endAtomic( __METHOD__ );
+ # If the transaction inserting this job was rolled back, detect that
+ if ( $committed === false ) { // rollback happened?
+ throw new LogicException( 'Cannot run job if the account rename failed.' );
+ }
+ }
+
+ # Flush any state snapshot data (and release the lock above)
+ $dbw->commit( __METHOD__, 'flush' );
+
+ # Conditions like "*_user_text = 'x'
+ $conds = [ $column => $oldname ];
+ # If user ID given, add that to condition to avoid rename collisions
+ if ( $userID !== null ) {
+ $conds[$uidColumn] = $userID;
+ }
+ # Bound by timestamp if given
+ if ( $timestampColumn !== null ) {
+ $conds[] = "$timestampColumn >= " . $dbw->addQuotes( $minTimestamp );
+ $conds[] = "$timestampColumn <= " . $dbw->addQuotes( $maxTimestamp );
+ # Bound by unique key if given (B/C)
+ } elseif ( $uniqueKey !== null && $keyId !== null ) {
+ $conds[$uniqueKey] = $keyId;
+ } else {
+ throw new InvalidArgumentException( 'Expected ID batch or time range' );
+ }
+
+ $affectedCount = 0;
+ # Actually update the rows for this job...
+ if ( $uniqueKey !== null ) {
+ # Select the rows to update by PRIMARY KEY
+ $ids = $dbw->selectFieldValues( $table, $uniqueKey, $conds, __METHOD__ );
+ # Update these rows by PRIMARY KEY to avoid slave lag
+ foreach ( array_chunk( $ids, $wgUpdateRowsPerQuery ) as $batch ) {
+ $dbw->commit( __METHOD__, 'flush' );
+ wfWaitForSlaves();
+
+ $dbw->update( $table,
+ [ $column => $newname ],
+ [ $column => $oldname, $uniqueKey => $batch ],
+ __METHOD__
+ );
+ $affectedCount += $dbw->affectedRows();
+ }
+ } else {
+ # Update the chunk of rows directly
+ $dbw->update( $table,
+ [ $column => $newname ],
+ $conds,
+ __METHOD__
+ );
+ $affectedCount += $dbw->affectedRows();
+ }
+
+ # Special case: revisions may be deleted while renaming...
+ if ( $affectedCount < $count && $table === 'revision' && $timestampColumn !== null ) {
+ # If some revisions were not renamed, they may have been deleted.
+ # Do a pass on the archive table to get these straglers...
+ $ids = $dbw->selectFieldValues(
+ 'archive',
+ 'ar_id',
+ [
+ 'ar_user_text' => $oldname,
+ 'ar_user' => $userID,
+ // No user,rev_id index, so use timestamp to bound
+ // the rows. This can use the user,timestamp index.
+ "ar_timestamp >= '$minTimestamp'",
+ "ar_timestamp <= '$maxTimestamp'"
+ ],
+ __METHOD__
+ );
+ foreach ( array_chunk( $ids, $wgUpdateRowsPerQuery ) as $batch ) {
+ $dbw->commit( __METHOD__, 'flush' );
+ wfWaitForSlaves();
+
+ $dbw->update(
+ 'archive',
+ [ 'ar_user_text' => $newname ],
+ [ 'ar_user_text' => $oldname, 'ar_id' => $batch ],
+ __METHOD__
+ );
+ }
+ }
+ # Special case: revisions may be restored while renaming...
+ if ( $affectedCount < $count && $table === 'archive' && $timestampColumn !== null ) {
+ # If some revisions were not renamed, they may have been restored.
+ # Do a pass on the revision table to get these straglers...
+ $ids = $dbw->selectFieldValues(
+ 'revision',
+ 'rev_id',
+ [
+ 'rev_user_text' => $oldname,
+ 'rev_user' => $userID,
+ // No user,rev_id index, so use timestamp to bound
+ // the rows. This can use the user,timestamp index.
+ "rev_timestamp >= '$minTimestamp'",
+ "rev_timestamp <= '$maxTimestamp'"
+ ],
+ __METHOD__
+ );
+ foreach ( array_chunk( $ids, $wgUpdateRowsPerQuery ) as $batch ) {
+ $dbw->commit( __METHOD__, 'flush' );
+ wfWaitForSlaves();
+
+ $dbw->update(
+ 'revision',
+ [ 'rev_user_text' => $newname ],
+ [ 'rev_user_text' => $oldname, 'rev_id' => $batch ],
+ __METHOD__
+ );
+ }
+ }
+
+ return true;
+ }
+}