diff options
Diffstat (limited to 'www/wiki/maintenance/userDupes.inc')
-rw-r--r-- | www/wiki/maintenance/userDupes.inc | 297 |
1 files changed, 297 insertions, 0 deletions
diff --git a/www/wiki/maintenance/userDupes.inc b/www/wiki/maintenance/userDupes.inc new file mode 100644 index 00000000..69c92658 --- /dev/null +++ b/www/wiki/maintenance/userDupes.inc @@ -0,0 +1,297 @@ +<?php +/** + * Helper class for update.php. + * + * Copyright © 2005 Brion Vibber <brion@pobox.com> + * https://www.mediawiki.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Maintenance + */ + +/** + * Look for duplicate user table entries and optionally prune them. + * + * This is still used by our MysqlUpdater at: + * includes/installer/MysqlUpdater.php + * + * @ingroup Maintenance + */ +class UserDupes { + private $db; + private $reassigned; + private $trimmed; + private $failed; + private $outputCallback; + + function __construct( &$database, $outputCallback ) { + $this->db = $database; + $this->outputCallback = $outputCallback; + } + + /** + * Output some text via the output callback provided + * @param string $str Text to print + */ + private function out( $str ) { + call_user_func( $this->outputCallback, $str ); + } + + /** + * Check if this database's user table has already had a unique + * user_name index applied. + * @return bool + */ + function hasUniqueIndex() { + $info = $this->db->indexInfo( 'user', 'user_name', __METHOD__ ); + if ( !$info ) { + $this->out( "WARNING: doesn't seem to have user_name index at all!\n" ); + + return false; + } + + # Confusingly, 'Non_unique' is 0 for *unique* indexes, + # and 1 for *non-unique* indexes. Pass the crack, MySQL, + # it's obviously some good stuff! + return ( $info[0]->Non_unique == 0 ); + } + + /** + * Checks the database for duplicate user account records + * and remove them in preparation for application of a unique + * index on the user_name field. Returns true if the table is + * clean or if duplicates have been resolved automatically. + * + * May return false if there are unresolvable problems. + * Status information will be echo'd to stdout. + * + * @return bool + */ + function clearDupes() { + return $this->checkDupes( true ); + } + + /** + * Checks the database for duplicate user account records + * in preparation for application of a unique index on the + * user_name field. Returns true if the table is clean or + * if duplicates can be resolved automatically. + * + * Returns false if there are duplicates and resolution was + * not requested. (If doing resolution, edits may be reassigned.) + * Status information will be echo'd to stdout. + * + * @param bool $doDelete Pass true to actually remove things + * from the database; false to just check. + * @return bool + */ + function checkDupes( $doDelete = false ) { + if ( $this->hasUniqueIndex() ) { + echo wfWikiID() . " already has a unique index on its user table.\n"; + + return true; + } + + $this->lock(); + + $this->out( "Checking for duplicate accounts...\n" ); + $dupes = $this->getDupes(); + $count = count( $dupes ); + + $this->out( "Found $count accounts with duplicate records on " . wfWikiID() . ".\n" ); + $this->trimmed = 0; + $this->reassigned = 0; + $this->failed = 0; + foreach ( $dupes as $name ) { + $this->examine( $name, $doDelete ); + } + + $this->unlock(); + + $this->out( "\n" ); + + if ( $this->reassigned > 0 ) { + if ( $doDelete ) { + $this->out( "$this->reassigned duplicate accounts had edits " + . "reassigned to a canonical record id.\n" ); + } else { + $this->out( "$this->reassigned duplicate accounts need to have edits reassigned.\n" ); + } + } + + if ( $this->trimmed > 0 ) { + if ( $doDelete ) { + $this->out( "$this->trimmed duplicate user records were deleted from " + . wfWikiID() . ".\n" ); + } else { + $this->out( "$this->trimmed duplicate user accounts were found on " + . wfWikiID() . " which can be removed safely.\n" ); + } + } + + if ( $this->failed > 0 ) { + $this->out( "Something terribly awry; $this->failed duplicate accounts were not removed.\n" ); + + return false; + } + + if ( $this->trimmed == 0 || $doDelete ) { + $this->out( "It is now safe to apply the unique index on user_name.\n" ); + + return true; + } else { + $this->out( "Run this script again with the --fix option to automatically delete them.\n" ); + + return false; + } + } + + /** + * We don't want anybody to mess with our stuff... + * @access private + */ + function lock() { + $set = [ 'user', 'revision' ]; + $names = array_map( [ $this, 'lockTable' ], $set ); + $tables = implode( ',', $names ); + + $this->db->query( "LOCK TABLES $tables", __METHOD__ ); + } + + function lockTable( $table ) { + return $this->db->tableName( $table ) . ' WRITE'; + } + + /** + * @access private + */ + function unlock() { + $this->db->query( "UNLOCK TABLES", __METHOD__ ); + } + + /** + * Grab usernames for which multiple records are present in the database. + * @return array + * @access private + */ + function getDupes() { + $user = $this->db->tableName( 'user' ); + $result = $this->db->query( + "SELECT user_name,COUNT(*) AS n + FROM $user + GROUP BY user_name + HAVING n > 1", __METHOD__ ); + + $list = []; + foreach ( $result as $row ) { + $list[] = $row->user_name; + } + + return $list; + } + + /** + * Examine user records for the given name. Try to see which record + * will be the one that actually gets used, then check remaining records + * for edits. If the dupes have no edits, we can safely remove them. + * @param string $name + * @param bool $doDelete + * @access private + */ + function examine( $name, $doDelete ) { + $result = $this->db->select( 'user', + [ 'user_id' ], + [ 'user_name' => $name ], + __METHOD__ ); + + $firstRow = $this->db->fetchObject( $result ); + $firstId = $firstRow->user_id; + $this->out( "Record that will be used for '$name' is user_id=$firstId\n" ); + + foreach ( $result as $row ) { + $dupeId = $row->user_id; + $this->out( "... dupe id $dupeId: " ); + $edits = $this->editCount( $dupeId ); + if ( $edits > 0 ) { + $this->reassigned++; + $this->out( "has $edits edits! " ); + if ( $doDelete ) { + $this->reassignEdits( $dupeId, $firstId ); + $newEdits = $this->editCount( $dupeId ); + if ( $newEdits == 0 ) { + $this->out( "confirmed cleaned. " ); + } else { + $this->failed++; + $this->out( "WARNING! $newEdits remaining edits for $dupeId; NOT deleting user.\n" ); + continue; + } + } else { + $this->out( "(will need to reassign edits on fix)" ); + } + } else { + $this->out( "ok, no edits. " ); + } + $this->trimmed++; + if ( $doDelete ) { + $this->trimAccount( $dupeId ); + } + $this->out( "\n" ); + } + } + + /** + * Count the number of edits attributed to this user. + * Does not currently check log table or other things + * where it might show up... + * @param int $userid + * @return int + * @access private + */ + function editCount( $userid ) { + return intval( $this->db->selectField( + 'revision', + 'COUNT(*)', + [ 'rev_user' => $userid ], + __METHOD__ ) ); + } + + /** + * @param int $from + * @param int $to + * @access private + */ + function reassignEdits( $from, $to ) { + $this->out( 'reassigning... ' ); + $this->db->update( 'revision', + [ 'rev_user' => $to ], + [ 'rev_user' => $from ], + __METHOD__ ); + $this->out( "ok. " ); + } + + /** + * Remove a user account line. + * @param int $userid + * @access private + */ + function trimAccount( $userid ) { + $this->out( "deleting..." ); + $this->db->delete( 'user', [ 'user_id' => $userid ], __METHOD__ ); + $this->out( " ok" ); + } +} |