diff options
author | Yaco <franco@reevo.org> | 2020-06-04 11:01:00 -0300 |
---|---|---|
committer | Yaco <franco@reevo.org> | 2020-06-04 11:01:00 -0300 |
commit | fc7369835258467bf97eb64f184b93691f9a9fd5 (patch) | |
tree | daabd60089d2dd76d9f5fb416b005fbe159c799d /www/wiki/extensions/Renameuser/RenameUserJob.php |
first commit
Diffstat (limited to 'www/wiki/extensions/Renameuser/RenameUserJob.php')
-rw-r--r-- | www/wiki/extensions/Renameuser/RenameUserJob.php | 188 |
1 files changed, 188 insertions, 0 deletions
diff --git a/www/wiki/extensions/Renameuser/RenameUserJob.php b/www/wiki/extensions/Renameuser/RenameUserJob.php new file mode 100644 index 00000000..38c8bd4b --- /dev/null +++ b/www/wiki/extensions/Renameuser/RenameUserJob.php @@ -0,0 +1,188 @@ +<?php + +/** + * Custom job to perform updates on tables in busier environments + * + * Job parameters include: + * - table : DB table to update + * - column : The *_user_text column to update + * - oldname : The old user name + * - newname : The new user name + * - count : The expected number of rows to update in this batch + * - logId : The ID of the logging table row expected to exist if the rename was committed + * + * Additionally, one of the following groups of parameters must be set: + * a) The timestamp based rename paramaters: + * - timestampColumn : The *_timestamp column + * - minTimestamp : The minimum bound of the timestamp column range for this batch + * - maxTimestamp : The maximum bound of the timestamp column range for this batch + * - uniqueKey : A column that is unique (preferrably the PRIMARY KEY) [optional] + * b) The unique key based rename paramaters: + * - uniqueKey : A column that is unique (preferrably the PRIMARY KEY) + * - keyId : A list of values for this column to determine rows to update for this batch + * + * To avoid some race conditions, the following parameters should be set: + * - userID : The ID of the user to update + * - uidColumn : The *_user_id column + */ +class RenameUserJob extends Job { + public function __construct( Title $title, $params = [], $id = 0 ) { + parent::__construct( 'renameUser', $title, $params, $id ); + } + + public function run() { + global $wgUpdateRowsPerQuery; + + $table = $this->params['table']; + $column = $this->params['column']; + $oldname = $this->params['oldname']; + $newname = $this->params['newname']; + $count = $this->params['count']; + if ( isset( $this->params['userID'] ) ) { + $userID = $this->params['userID']; + $uidColumn = $this->params['uidColumn']; + } else { + $userID = null; + $uidColumn = null; + } + if ( isset( $this->params['timestampColumn'] ) ) { + $timestampColumn = $this->params['timestampColumn']; + $minTimestamp = $this->params['minTimestamp']; + $maxTimestamp = $this->params['maxTimestamp']; + } else { + $timestampColumn = null; + $minTimestamp = null; + $maxTimestamp = null; + } + $uniqueKey = isset( $this->params['uniqueKey'] ) ? $this->params['uniqueKey'] : null; + $keyId = isset( $this->params['keyId'] ) ? $this->params['keyId'] : null; + $logId = isset( $this->params['logId'] ) ? $this->params['logId'] : null; + + $dbw = wfGetDB( DB_MASTER ); + if ( $logId ) { + # Block until the transaction that inserted this job commits. + # The atomic section is for sanity as FOR UPDATE does not lock in auto-commit mode + # per http://dev.mysql.com/doc/refman/5.7/en/innodb-locking-reads.html. + $dbw->startAtomic( __METHOD__ ); + $committed = $dbw->selectField( 'logging', + '1', + [ 'log_id' => $logId ], + __METHOD__, + [ 'FOR UPDATE' ] + ); + $dbw->endAtomic( __METHOD__ ); + # If the transaction inserting this job was rolled back, detect that + if ( $committed === false ) { // rollback happened? + throw new LogicException( 'Cannot run job if the account rename failed.' ); + } + } + + # Flush any state snapshot data (and release the lock above) + $dbw->commit( __METHOD__, 'flush' ); + + # Conditions like "*_user_text = 'x' + $conds = [ $column => $oldname ]; + # If user ID given, add that to condition to avoid rename collisions + if ( $userID !== null ) { + $conds[$uidColumn] = $userID; + } + # Bound by timestamp if given + if ( $timestampColumn !== null ) { + $conds[] = "$timestampColumn >= " . $dbw->addQuotes( $minTimestamp ); + $conds[] = "$timestampColumn <= " . $dbw->addQuotes( $maxTimestamp ); + # Bound by unique key if given (B/C) + } elseif ( $uniqueKey !== null && $keyId !== null ) { + $conds[$uniqueKey] = $keyId; + } else { + throw new InvalidArgumentException( 'Expected ID batch or time range' ); + } + + $affectedCount = 0; + # Actually update the rows for this job... + if ( $uniqueKey !== null ) { + # Select the rows to update by PRIMARY KEY + $ids = $dbw->selectFieldValues( $table, $uniqueKey, $conds, __METHOD__ ); + # Update these rows by PRIMARY KEY to avoid slave lag + foreach ( array_chunk( $ids, $wgUpdateRowsPerQuery ) as $batch ) { + $dbw->commit( __METHOD__, 'flush' ); + wfWaitForSlaves(); + + $dbw->update( $table, + [ $column => $newname ], + [ $column => $oldname, $uniqueKey => $batch ], + __METHOD__ + ); + $affectedCount += $dbw->affectedRows(); + } + } else { + # Update the chunk of rows directly + $dbw->update( $table, + [ $column => $newname ], + $conds, + __METHOD__ + ); + $affectedCount += $dbw->affectedRows(); + } + + # Special case: revisions may be deleted while renaming... + if ( $affectedCount < $count && $table === 'revision' && $timestampColumn !== null ) { + # If some revisions were not renamed, they may have been deleted. + # Do a pass on the archive table to get these straglers... + $ids = $dbw->selectFieldValues( + 'archive', + 'ar_id', + [ + 'ar_user_text' => $oldname, + 'ar_user' => $userID, + // No user,rev_id index, so use timestamp to bound + // the rows. This can use the user,timestamp index. + "ar_timestamp >= '$minTimestamp'", + "ar_timestamp <= '$maxTimestamp'" + ], + __METHOD__ + ); + foreach ( array_chunk( $ids, $wgUpdateRowsPerQuery ) as $batch ) { + $dbw->commit( __METHOD__, 'flush' ); + wfWaitForSlaves(); + + $dbw->update( + 'archive', + [ 'ar_user_text' => $newname ], + [ 'ar_user_text' => $oldname, 'ar_id' => $batch ], + __METHOD__ + ); + } + } + # Special case: revisions may be restored while renaming... + if ( $affectedCount < $count && $table === 'archive' && $timestampColumn !== null ) { + # If some revisions were not renamed, they may have been restored. + # Do a pass on the revision table to get these straglers... + $ids = $dbw->selectFieldValues( + 'revision', + 'rev_id', + [ + 'rev_user_text' => $oldname, + 'rev_user' => $userID, + // No user,rev_id index, so use timestamp to bound + // the rows. This can use the user,timestamp index. + "rev_timestamp >= '$minTimestamp'", + "rev_timestamp <= '$maxTimestamp'" + ], + __METHOD__ + ); + foreach ( array_chunk( $ids, $wgUpdateRowsPerQuery ) as $batch ) { + $dbw->commit( __METHOD__, 'flush' ); + wfWaitForSlaves(); + + $dbw->update( + 'revision', + [ 'rev_user_text' => $newname ], + [ 'rev_user_text' => $oldname, 'rev_id' => $batch ], + __METHOD__ + ); + } + } + + return true; + } +} |