diff options
Diffstat (limited to 'www/wiki/maintenance/namespaceDupes.php')
-rw-r--r-- | www/wiki/maintenance/namespaceDupes.php | 620 |
1 files changed, 620 insertions, 0 deletions
diff --git a/www/wiki/maintenance/namespaceDupes.php b/www/wiki/maintenance/namespaceDupes.php new file mode 100644 index 00000000..3c839216 --- /dev/null +++ b/www/wiki/maintenance/namespaceDupes.php @@ -0,0 +1,620 @@ +<?php +/** + * Check for articles to fix after adding/deleting namespaces + * + * Copyright © 2005-2007 Brion Vibber <brion@pobox.com> + * https://www.mediawiki.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Maintenance + */ + +require_once __DIR__ . '/Maintenance.php'; + +use MediaWiki\Linker\LinkTarget; +use MediaWiki\MediaWikiServices; +use Wikimedia\Rdbms\ResultWrapper; +use Wikimedia\Rdbms\IMaintainableDatabase; + +/** + * Maintenance script that checks for articles to fix after + * adding/deleting namespaces. + * + * @ingroup Maintenance + */ +class NamespaceConflictChecker extends Maintenance { + + /** + * @var IMaintainableDatabase + */ + protected $db; + + private $resolvablePages = 0; + private $totalPages = 0; + + private $resolvableLinks = 0; + private $totalLinks = 0; + + public function __construct() { + parent::__construct(); + $this->addDescription( 'Find and fix pages affected by namespace addition/removal' ); + $this->addOption( 'fix', 'Attempt to automatically fix errors' ); + $this->addOption( 'merge', "Instead of renaming conflicts, do a history merge with " . + "the correct title" ); + $this->addOption( 'add-suffix', "Dupes will be renamed with correct namespace with " . + "<text> appended after the article name", false, true ); + $this->addOption( 'add-prefix', "Dupes will be renamed with correct namespace with " . + "<text> prepended before the article name", false, true ); + $this->addOption( 'source-pseudo-namespace', "Move all pages with the given source " . + "prefix (with an implied colon following it). If --dest-namespace is not specified, " . + "the colon will be replaced with a hyphen.", + false, true ); + $this->addOption( 'dest-namespace', "In combination with --source-pseudo-namespace, " . + "specify the namespace ID of the destination.", false, true ); + $this->addOption( 'move-talk', "If this is specified, pages in the Talk namespace that " . + "begin with a conflicting prefix will be renamed, for example " . + "Talk:File:Foo -> File_Talk:Foo" ); + } + + public function execute() { + $this->db = $this->getDB( DB_MASTER ); + + $options = [ + 'fix' => $this->hasOption( 'fix' ), + 'merge' => $this->hasOption( 'merge' ), + 'add-suffix' => $this->getOption( 'add-suffix', '' ), + 'add-prefix' => $this->getOption( 'add-prefix', '' ), + 'move-talk' => $this->hasOption( 'move-talk' ), + 'source-pseudo-namespace' => $this->getOption( 'source-pseudo-namespace', '' ), + 'dest-namespace' => intval( $this->getOption( 'dest-namespace', 0 ) ) ]; + + if ( $options['source-pseudo-namespace'] !== '' ) { + $retval = $this->checkPrefix( $options ); + } else { + $retval = $this->checkAll( $options ); + } + + if ( $retval ) { + $this->output( "\nLooks good!\n" ); + } else { + $this->output( "\nOh noeees\n" ); + } + } + + /** + * Check all namespaces + * + * @param array $options Associative array of validated command-line options + * + * @return bool + */ + private function checkAll( $options ) { + global $wgContLang, $wgNamespaceAliases, $wgCapitalLinks; + + $spaces = []; + + // List interwikis first, so they'll be overridden + // by any conflicting local namespaces. + foreach ( $this->getInterwikiList() as $prefix ) { + $name = $wgContLang->ucfirst( $prefix ); + $spaces[$name] = 0; + } + + // Now pull in all canonical and alias namespaces... + foreach ( MWNamespace::getCanonicalNamespaces() as $ns => $name ) { + // This includes $wgExtraNamespaces + if ( $name !== '' ) { + $spaces[$name] = $ns; + } + } + foreach ( $wgContLang->getNamespaces() as $ns => $name ) { + if ( $name !== '' ) { + $spaces[$name] = $ns; + } + } + foreach ( $wgNamespaceAliases as $name => $ns ) { + $spaces[$name] = $ns; + } + foreach ( $wgContLang->getNamespaceAliases() as $name => $ns ) { + $spaces[$name] = $ns; + } + + // We'll need to check for lowercase keys as well, + // since we're doing case-sensitive searches in the db. + foreach ( $spaces as $name => $ns ) { + $moreNames = []; + $moreNames[] = $wgContLang->uc( $name ); + $moreNames[] = $wgContLang->ucfirst( $wgContLang->lc( $name ) ); + $moreNames[] = $wgContLang->ucwords( $name ); + $moreNames[] = $wgContLang->ucwords( $wgContLang->lc( $name ) ); + $moreNames[] = $wgContLang->ucwordbreaks( $name ); + $moreNames[] = $wgContLang->ucwordbreaks( $wgContLang->lc( $name ) ); + if ( !$wgCapitalLinks ) { + foreach ( $moreNames as $altName ) { + $moreNames[] = $wgContLang->lcfirst( $altName ); + } + $moreNames[] = $wgContLang->lcfirst( $name ); + } + foreach ( array_unique( $moreNames ) as $altName ) { + if ( $altName !== $name ) { + $spaces[$altName] = $ns; + } + } + } + + // Sort by namespace index, and if there are two with the same index, + // break the tie by sorting by name + $origSpaces = $spaces; + uksort( $spaces, function ( $a, $b ) use ( $origSpaces ) { + if ( $origSpaces[$a] < $origSpaces[$b] ) { + return -1; + } elseif ( $origSpaces[$a] > $origSpaces[$b] ) { + return 1; + } elseif ( $a < $b ) { + return -1; + } elseif ( $a > $b ) { + return 1; + } else { + return 0; + } + } ); + + $ok = true; + foreach ( $spaces as $name => $ns ) { + $ok = $this->checkNamespace( $ns, $name, $options ) && $ok; + } + + $this->output( "{$this->totalPages} pages to fix, " . + "{$this->resolvablePages} were resolvable.\n\n" ); + + foreach ( $spaces as $name => $ns ) { + if ( $ns != 0 ) { + /* Fix up link destinations for non-interwiki links only. + * + * For example if a page has [[Foo:Bar]] and then a Foo namespace + * is introduced, pagelinks needs to be updated to have + * page_namespace = NS_FOO. + * + * If instead an interwiki prefix was introduced called "Foo", + * the link should instead be moved to the iwlinks table. If a new + * language is introduced called "Foo", or if there is a pagelink + * [[fr:Bar]] when interlanguage magic links are turned on, the + * link would have to be moved to the langlinks table. Let's put + * those cases in the too-hard basket for now. The consequences are + * not especially severe. + * @fixme Handle interwiki links, and pagelinks to Category:, File: + * which probably need reparsing. + */ + + $this->checkLinkTable( 'pagelinks', 'pl', $ns, $name, $options ); + $this->checkLinkTable( 'templatelinks', 'tl', $ns, $name, $options ); + + // The redirect table has interwiki links randomly mixed in, we + // need to filter those out. For example [[w:Foo:Bar]] would + // have rd_interwiki=w and rd_namespace=0, which would match the + // query for a conflicting namespace "Foo" if filtering wasn't done. + $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options, + [ 'rd_interwiki' => null ] ); + $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options, + [ 'rd_interwiki' => '' ] ); + } + } + + $this->output( "{$this->totalLinks} links to fix, " . + "{$this->resolvableLinks} were resolvable.\n" ); + + return $ok; + } + + /** + * Get the interwiki list + * + * @return array + */ + private function getInterwikiList() { + $result = MediaWikiServices::getInstance()->getInterwikiLookup()->getAllPrefixes(); + $prefixes = []; + foreach ( $result as $row ) { + $prefixes[] = $row['iw_prefix']; + } + + return $prefixes; + } + + /** + * Check a given prefix and try to move it into the given destination namespace + * + * @param int $ns Destination namespace id + * @param string $name + * @param array $options Associative array of validated command-line options + * @return bool + */ + private function checkNamespace( $ns, $name, $options ) { + $targets = $this->getTargetList( $ns, $name, $options ); + $count = $targets->numRows(); + $this->totalPages += $count; + if ( $count == 0 ) { + return true; + } + + $dryRunNote = $options['fix'] ? '' : ' DRY RUN ONLY'; + + $ok = true; + foreach ( $targets as $row ) { + // Find the new title and determine the action to take + + $newTitle = $this->getDestinationTitle( $ns, $name, + $row->page_namespace, $row->page_title, $options ); + $logStatus = false; + if ( !$newTitle ) { + $logStatus = 'invalid title'; + $action = 'abort'; + } elseif ( $newTitle->exists() ) { + if ( $options['merge'] ) { + if ( $this->canMerge( $row->page_id, $newTitle, $logStatus ) ) { + $action = 'merge'; + } else { + $action = 'abort'; + } + } elseif ( $options['add-prefix'] == '' && $options['add-suffix'] == '' ) { + $action = 'abort'; + $logStatus = 'dest title exists and --add-prefix not specified'; + } else { + $newTitle = $this->getAlternateTitle( $newTitle, $options ); + if ( !$newTitle ) { + $action = 'abort'; + $logStatus = 'alternate title is invalid'; + } elseif ( $newTitle->exists() ) { + $action = 'abort'; + $logStatus = 'title conflict'; + } else { + $action = 'move'; + $logStatus = 'alternate'; + } + } + } else { + $action = 'move'; + $logStatus = 'no conflict'; + } + + // Take the action or log a dry run message + + $logTitle = "id={$row->page_id} ns={$row->page_namespace} dbk={$row->page_title}"; + $pageOK = true; + + switch ( $action ) { + case 'abort': + $this->output( "$logTitle *** $logStatus\n" ); + $pageOK = false; + break; + case 'move': + $this->output( "$logTitle -> " . + $newTitle->getPrefixedDBkey() . " ($logStatus)$dryRunNote\n" ); + + if ( $options['fix'] ) { + $pageOK = $this->movePage( $row->page_id, $newTitle ); + } + break; + case 'merge': + $this->output( "$logTitle => " . + $newTitle->getPrefixedDBkey() . " (merge)$dryRunNote\n" ); + + if ( $options['fix'] ) { + $pageOK = $this->mergePage( $row, $newTitle ); + } + break; + } + + if ( $pageOK ) { + $this->resolvablePages++; + } else { + $ok = false; + } + } + + return $ok; + } + + /** + * Check and repair the destination fields in a link table + * @param string $table The link table name + * @param string $fieldPrefix The field prefix in the link table + * @param int $ns Destination namespace id + * @param string $name + * @param array $options Associative array of validated command-line options + * @param array $extraConds Extra conditions for the SQL query + */ + private function checkLinkTable( $table, $fieldPrefix, $ns, $name, $options, + $extraConds = [] + ) { + $batchConds = []; + $fromField = "{$fieldPrefix}_from"; + $namespaceField = "{$fieldPrefix}_namespace"; + $titleField = "{$fieldPrefix}_title"; + $batchSize = 500; + while ( true ) { + $res = $this->db->select( + $table, + [ $fromField, $namespaceField, $titleField ], + array_merge( $batchConds, $extraConds, [ + $namespaceField => 0, + $titleField . $this->db->buildLike( "$name:", $this->db->anyString() ) + ] ), + __METHOD__, + [ + 'ORDER BY' => [ $titleField, $fromField ], + 'LIMIT' => $batchSize + ] + ); + + if ( $res->numRows() == 0 ) { + break; + } + foreach ( $res as $row ) { + $logTitle = "from={$row->$fromField} ns={$row->$namespaceField} " . + "dbk={$row->$titleField}"; + $destTitle = $this->getDestinationTitle( $ns, $name, + $row->$namespaceField, $row->$titleField, $options ); + $this->totalLinks++; + if ( !$destTitle ) { + $this->output( "$table $logTitle *** INVALID\n" ); + continue; + } + $this->resolvableLinks++; + if ( !$options['fix'] ) { + $this->output( "$table $logTitle -> " . + $destTitle->getPrefixedDBkey() . " DRY RUN\n" ); + continue; + } + + $this->db->update( $table, + // SET + [ + $namespaceField => $destTitle->getNamespace(), + $titleField => $destTitle->getDBkey() + ], + // WHERE + [ + $namespaceField => 0, + $titleField => $row->$titleField, + $fromField => $row->$fromField + ], + __METHOD__, + [ 'IGNORE' ] + ); + $this->output( "$table $logTitle -> " . + $destTitle->getPrefixedDBkey() . "\n" ); + } + $encLastTitle = $this->db->addQuotes( $row->$titleField ); + $encLastFrom = $this->db->addQuotes( $row->$fromField ); + + $batchConds = [ + "$titleField > $encLastTitle " . + "OR ($titleField = $encLastTitle AND $fromField > $encLastFrom)" ]; + + wfWaitForSlaves(); + } + } + + /** + * Move the given pseudo-namespace, either replacing the colon with a hyphen + * (useful for pseudo-namespaces that conflict with interwiki links) or move + * them to another namespace if specified. + * @param array $options Associative array of validated command-line options + * @return bool + */ + private function checkPrefix( $options ) { + $prefix = $options['source-pseudo-namespace']; + $ns = $options['dest-namespace']; + $this->output( "Checking prefix \"$prefix\" vs namespace $ns\n" ); + + return $this->checkNamespace( $ns, $prefix, $options ); + } + + /** + * Find pages in main and talk namespaces that have a prefix of the new + * namespace so we know titles that will need migrating + * + * @param int $ns Destination namespace id + * @param string $name Prefix that is being made a namespace + * @param array $options Associative array of validated command-line options + * + * @return ResultWrapper + */ + private function getTargetList( $ns, $name, $options ) { + if ( $options['move-talk'] && MWNamespace::isSubject( $ns ) ) { + $checkNamespaces = [ NS_MAIN, NS_TALK ]; + } else { + $checkNamespaces = NS_MAIN; + } + + return $this->db->select( 'page', + [ + 'page_id', + 'page_title', + 'page_namespace', + ], + [ + 'page_namespace' => $checkNamespaces, + 'page_title' . $this->db->buildLike( "$name:", $this->db->anyString() ), + ], + __METHOD__ + ); + } + + /** + * Get the preferred destination title for a given target page. + * @param int $ns The destination namespace ID + * @param string $name The conflicting prefix + * @param int $sourceNs The source namespace + * @param int $sourceDbk The source DB key (i.e. page_title) + * @param array $options Associative array of validated command-line options + * @return Title|false + */ + private function getDestinationTitle( $ns, $name, $sourceNs, $sourceDbk, $options ) { + $dbk = substr( $sourceDbk, strlen( "$name:" ) ); + if ( $ns == 0 ) { + // An interwiki; try an alternate encoding with '-' for ':' + $dbk = "$name-" . $dbk; + } + $destNS = $ns; + if ( $sourceNs == NS_TALK && MWNamespace::isSubject( $ns ) ) { + // This is an associated talk page moved with the --move-talk feature. + $destNS = MWNamespace::getTalk( $destNS ); + } + $newTitle = Title::makeTitleSafe( $destNS, $dbk ); + if ( !$newTitle || !$newTitle->canExist() ) { + return false; + } + return $newTitle; + } + + /** + * Get an alternative title to move a page to. This is used if the + * preferred destination title already exists. + * + * @param LinkTarget $linkTarget + * @param array $options Associative array of validated command-line options + * @return Title|bool + */ + private function getAlternateTitle( LinkTarget $linkTarget, $options ) { + $prefix = $options['add-prefix']; + $suffix = $options['add-suffix']; + if ( $prefix == '' && $suffix == '' ) { + return false; + } + while ( true ) { + $dbk = $prefix . $linkTarget->getDBkey() . $suffix; + $title = Title::makeTitleSafe( $linkTarget->getNamespace(), $dbk ); + if ( !$title ) { + return false; + } + if ( !$title->exists() ) { + return $title; + } + } + } + + /** + * Move a page + * + * @param integer $id The page_id + * @param LinkTarget $newLinkTarget The new title link target + * @return bool + */ + private function movePage( $id, LinkTarget $newLinkTarget ) { + $this->db->update( 'page', + [ + "page_namespace" => $newLinkTarget->getNamespace(), + "page_title" => $newLinkTarget->getDBkey(), + ], + [ + "page_id" => $id, + ], + __METHOD__ ); + + // Update *_from_namespace in links tables + $fromNamespaceTables = [ + [ 'pagelinks', 'pl' ], + [ 'templatelinks', 'tl' ], + [ 'imagelinks', 'il' ] ]; + foreach ( $fromNamespaceTables as $tableInfo ) { + list( $table, $fieldPrefix ) = $tableInfo; + $this->db->update( $table, + // SET + [ "{$fieldPrefix}_from_namespace" => $newLinkTarget->getNamespace() ], + // WHERE + [ "{$fieldPrefix}_from" => $id ], + __METHOD__ ); + } + + return true; + } + + /** + * Determine if we can merge a page. + * We check if an inaccessible revision would become the latest and + * deny the merge if so -- it's theoretically possible to update the + * latest revision, but opens a can of worms -- search engine updates, + * recentchanges review, etc. + * + * @param integer $id The page_id + * @param LinkTarget $linkTarget The new link target + * @param string $logStatus This is set to the log status message on failure + * @return bool + */ + private function canMerge( $id, LinkTarget $linkTarget, &$logStatus ) { + $latestDest = Revision::newFromTitle( $linkTarget, 0, Revision::READ_LATEST ); + $latestSource = Revision::newFromPageId( $id, 0, Revision::READ_LATEST ); + if ( $latestSource->getTimestamp() > $latestDest->getTimestamp() ) { + $logStatus = 'cannot merge since source is later'; + return false; + } else { + return true; + } + } + + /** + * Merge page histories + * + * @param stdClass $row Page row + * @param Title $newTitle The new title + * @return bool + */ + private function mergePage( $row, Title $newTitle ) { + $id = $row->page_id; + + // Construct the WikiPage object we will need later, while the + // page_id still exists. Note that this cannot use makeTitleSafe(), + // we are deliberately constructing an invalid title. + $sourceTitle = Title::makeTitle( $row->page_namespace, $row->page_title ); + $sourceTitle->resetArticleID( $id ); + $wikiPage = new WikiPage( $sourceTitle ); + $wikiPage->loadPageData( 'fromdbmaster' ); + + $destId = $newTitle->getArticleID(); + $this->beginTransaction( $this->db, __METHOD__ ); + $this->db->update( 'revision', + // SET + [ 'rev_page' => $destId ], + // WHERE + [ 'rev_page' => $id ], + __METHOD__ ); + + $this->db->delete( 'page', [ 'page_id' => $id ], __METHOD__ ); + + $this->commitTransaction( $this->db, __METHOD__ ); + + /* Call LinksDeletionUpdate to delete outgoing links from the old title, + * and update category counts. + * + * Calling external code with a fake broken Title is a fairly dubious + * idea. It's necessary because it's quite a lot of code to duplicate, + * but that also makes it fragile since it would be easy for someone to + * accidentally introduce an assumption of title validity to the code we + * are calling. + */ + DeferredUpdates::addUpdate( new LinksDeletionUpdate( $wikiPage ) ); + DeferredUpdates::doUpdates(); + + return true; + } +} + +$maintClass = NamespaceConflictChecker::class; +require_once RUN_MAINTENANCE_IF_MAIN; |