diff options
author | Yaco <franco@reevo.org> | 2020-06-04 11:01:00 -0300 |
---|---|---|
committer | Yaco <franco@reevo.org> | 2020-06-04 11:01:00 -0300 |
commit | fc7369835258467bf97eb64f184b93691f9a9fd5 (patch) | |
tree | daabd60089d2dd76d9f5fb416b005fbe159c799d /www/wiki/includes/deferred |
first commit
Diffstat (limited to 'www/wiki/includes/deferred')
18 files changed, 3170 insertions, 0 deletions
diff --git a/www/wiki/includes/deferred/AtomicSectionUpdate.php b/www/wiki/includes/deferred/AtomicSectionUpdate.php new file mode 100644 index 00000000..8b62989b --- /dev/null +++ b/www/wiki/includes/deferred/AtomicSectionUpdate.php @@ -0,0 +1,48 @@ +<?php + +use Wikimedia\Rdbms\IDatabase; + +/** + * Deferrable Update for closure/callback updates via IDatabase::doAtomicSection() + * @since 1.27 + */ +class AtomicSectionUpdate implements DeferrableUpdate, DeferrableCallback { + /** @var IDatabase */ + private $dbw; + /** @var string */ + private $fname; + /** @var callable|null */ + private $callback; + + /** + * @param IDatabase $dbw + * @param string $fname Caller name (usually __METHOD__) + * @param callable $callback + * @see IDatabase::doAtomicSection() + */ + public function __construct( IDatabase $dbw, $fname, callable $callback ) { + $this->dbw = $dbw; + $this->fname = $fname; + $this->callback = $callback; + + if ( $this->dbw->trxLevel() ) { + $this->dbw->onTransactionResolution( [ $this, 'cancelOnRollback' ], $fname ); + } + } + + public function doUpdate() { + if ( $this->callback ) { + $this->dbw->doAtomicSection( $this->fname, $this->callback ); + } + } + + public function cancelOnRollback( $trigger ) { + if ( $trigger === IDatabase::TRIGGER_ROLLBACK ) { + $this->callback = null; + } + } + + public function getOrigin() { + return $this->fname; + } +} diff --git a/www/wiki/includes/deferred/AutoCommitUpdate.php b/www/wiki/includes/deferred/AutoCommitUpdate.php new file mode 100644 index 00000000..f9297af5 --- /dev/null +++ b/www/wiki/includes/deferred/AutoCommitUpdate.php @@ -0,0 +1,62 @@ +<?php + +use Wikimedia\Rdbms\IDatabase; + +/** + * Deferrable Update for closure/callback updates that should use auto-commit mode + * @since 1.28 + */ +class AutoCommitUpdate implements DeferrableUpdate, DeferrableCallback { + /** @var IDatabase */ + private $dbw; + /** @var string */ + private $fname; + /** @var callable|null */ + private $callback; + + /** + * @param IDatabase $dbw + * @param string $fname Caller name (usually __METHOD__) + * @param callable $callback Callback that takes (IDatabase, method name string) + */ + public function __construct( IDatabase $dbw, $fname, callable $callback ) { + $this->dbw = $dbw; + $this->fname = $fname; + $this->callback = $callback; + + if ( $this->dbw->trxLevel() ) { + $this->dbw->onTransactionResolution( [ $this, 'cancelOnRollback' ], $fname ); + } + } + + public function doUpdate() { + if ( !$this->callback ) { + return; + } + + $autoTrx = $this->dbw->getFlag( DBO_TRX ); + $this->dbw->clearFlag( DBO_TRX ); + try { + /** @var Exception $e */ + $e = null; + call_user_func_array( $this->callback, [ $this->dbw, $this->fname ] ); + } catch ( Exception $e ) { + } + if ( $autoTrx ) { + $this->dbw->setFlag( DBO_TRX ); + } + if ( $e ) { + throw $e; + } + } + + public function cancelOnRollback( $trigger ) { + if ( $trigger === IDatabase::TRIGGER_ROLLBACK ) { + $this->callback = null; + } + } + + public function getOrigin() { + return $this->fname; + } +} diff --git a/www/wiki/includes/deferred/CdnCacheUpdate.php b/www/wiki/includes/deferred/CdnCacheUpdate.php new file mode 100644 index 00000000..301c4f3b --- /dev/null +++ b/www/wiki/includes/deferred/CdnCacheUpdate.php @@ -0,0 +1,295 @@ +<?php +/** + * CDN cache purging. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +use Wikimedia\Assert\Assert; +use MediaWiki\MediaWikiServices; + +/** + * Handles purging appropriate CDN URLs given a title (or titles) + * @ingroup Cache + */ +class CdnCacheUpdate implements DeferrableUpdate, MergeableUpdate { + /** @var string[] Collection of URLs to purge */ + protected $urls = []; + + /** + * @param string[] $urlArr Collection of URLs to purge + */ + public function __construct( array $urlArr ) { + $this->urls = $urlArr; + } + + public function merge( MergeableUpdate $update ) { + /** @var CdnCacheUpdate $update */ + Assert::parameterType( __CLASS__, $update, '$update' ); + + $this->urls = array_merge( $this->urls, $update->urls ); + } + + /** + * Create an update object from an array of Title objects, or a TitleArray object + * + * @param Traversable|Title[] $titles + * @param string[] $urlArr + * @return CdnCacheUpdate + */ + public static function newFromTitles( $titles, $urlArr = [] ) { + ( new LinkBatch( $titles ) )->execute(); + /** @var Title $title */ + foreach ( $titles as $title ) { + $urlArr = array_merge( $urlArr, $title->getCdnUrls() ); + } + + return new CdnCacheUpdate( $urlArr ); + } + + /** + * @param Title $title + * @return CdnCacheUpdate + * @deprecated since 1.27 + */ + public static function newSimplePurge( Title $title ) { + return new CdnCacheUpdate( $title->getCdnUrls() ); + } + + /** + * Purges the list of URLs passed to the constructor. + */ + public function doUpdate() { + global $wgCdnReboundPurgeDelay; + + self::purge( $this->urls ); + + if ( $wgCdnReboundPurgeDelay > 0 ) { + JobQueueGroup::singleton()->lazyPush( new CdnPurgeJob( + Title::makeTitle( NS_SPECIAL, 'Badtitle/' . __CLASS__ ), + [ + 'urls' => $this->urls, + 'jobReleaseTimestamp' => time() + $wgCdnReboundPurgeDelay + ] + ) ); + } + } + + /** + * Purges a list of CDN nodes defined in $wgSquidServers. + * $urlArr should contain the full URLs to purge as values + * (example: $urlArr[] = 'http://my.host/something') + * + * @param string[] $urlArr List of full URLs to purge + */ + public static function purge( array $urlArr ) { + global $wgSquidServers, $wgHTCPRouting; + + if ( !$urlArr ) { + return; + } + + // Remove duplicate URLs from list + $urlArr = array_unique( $urlArr ); + + wfDebugLog( 'squid', __METHOD__ . ': ' . implode( ' ', $urlArr ) ); + + // Reliably broadcast the purge to all edge nodes + $relayer = MediaWikiServices::getInstance()->getEventRelayerGroup() + ->getRelayer( 'cdn-url-purges' ); + $ts = microtime( true ); + $relayer->notifyMulti( + 'cdn-url-purges', + array_map( + function ( $url ) use ( $ts ) { + return [ + 'url' => $url, + 'timestamp' => $ts, + ]; + }, + $urlArr + ) + ); + + // Send lossy UDP broadcasting if enabled + if ( $wgHTCPRouting ) { + self::HTCPPurge( $urlArr ); + } + + // Do direct server purges if enabled (this does not scale very well) + if ( $wgSquidServers ) { + // Maximum number of parallel connections per squid + $maxSocketsPerSquid = 8; + // Number of requests to send per socket + // 400 seems to be a good tradeoff, opening a socket takes a while + $urlsPerSocket = 400; + $socketsPerSquid = ceil( count( $urlArr ) / $urlsPerSocket ); + if ( $socketsPerSquid > $maxSocketsPerSquid ) { + $socketsPerSquid = $maxSocketsPerSquid; + } + + $pool = new SquidPurgeClientPool; + $chunks = array_chunk( $urlArr, ceil( count( $urlArr ) / $socketsPerSquid ) ); + foreach ( $wgSquidServers as $server ) { + foreach ( $chunks as $chunk ) { + $client = new SquidPurgeClient( $server ); + foreach ( $chunk as $url ) { + $client->queuePurge( $url ); + } + $pool->addClient( $client ); + } + } + + $pool->run(); + } + } + + /** + * Send Hyper Text Caching Protocol (HTCP) CLR requests. + * + * @throws MWException + * @param string[] $urlArr Collection of URLs to purge + */ + private static function HTCPPurge( array $urlArr ) { + global $wgHTCPRouting, $wgHTCPMulticastTTL; + + // HTCP CLR operation + $htcpOpCLR = 4; + + // @todo FIXME: PHP doesn't support these socket constants (include/linux/in.h) + if ( !defined( "IPPROTO_IP" ) ) { + define( "IPPROTO_IP", 0 ); + define( "IP_MULTICAST_LOOP", 34 ); + define( "IP_MULTICAST_TTL", 33 ); + } + + // pfsockopen doesn't work because we need set_sock_opt + $conn = socket_create( AF_INET, SOCK_DGRAM, SOL_UDP ); + if ( !$conn ) { + $errstr = socket_strerror( socket_last_error() ); + wfDebugLog( 'squid', __METHOD__ . + ": Error opening UDP socket: $errstr" ); + + return; + } + + // Set socket options + socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_LOOP, 0 ); + if ( $wgHTCPMulticastTTL != 1 ) { + // Set multicast time to live (hop count) option on socket + socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_TTL, + $wgHTCPMulticastTTL ); + } + + // Get sequential trx IDs for packet loss counting + $ids = UIDGenerator::newSequentialPerNodeIDs( + 'squidhtcppurge', 32, count( $urlArr ), UIDGenerator::QUICK_VOLATILE + ); + + foreach ( $urlArr as $url ) { + if ( !is_string( $url ) ) { + throw new MWException( 'Bad purge URL' ); + } + $url = self::expand( $url ); + $conf = self::getRuleForURL( $url, $wgHTCPRouting ); + if ( !$conf ) { + wfDebugLog( 'squid', __METHOD__ . + "No HTCP rule configured for URL {$url} , skipping" ); + continue; + } + + if ( isset( $conf['host'] ) && isset( $conf['port'] ) ) { + // Normalize single entries + $conf = [ $conf ]; + } + foreach ( $conf as $subconf ) { + if ( !isset( $subconf['host'] ) || !isset( $subconf['port'] ) ) { + throw new MWException( "Invalid HTCP rule for URL $url\n" ); + } + } + + // Construct a minimal HTCP request diagram + // as per RFC 2756 + // Opcode 'CLR', no response desired, no auth + $htcpTransID = current( $ids ); + next( $ids ); + + $htcpSpecifier = pack( 'na4na*na8n', + 4, 'HEAD', strlen( $url ), $url, + 8, 'HTTP/1.0', 0 ); + + $htcpDataLen = 8 + 2 + strlen( $htcpSpecifier ); + $htcpLen = 4 + $htcpDataLen + 2; + + // Note! Squid gets the bit order of the first + // word wrong, wrt the RFC. Apparently no other + // implementation exists, so adapt to Squid + $htcpPacket = pack( 'nxxnCxNxxa*n', + $htcpLen, $htcpDataLen, $htcpOpCLR, + $htcpTransID, $htcpSpecifier, 2 ); + + wfDebugLog( 'squid', __METHOD__ . + "Purging URL $url via HTCP" ); + foreach ( $conf as $subconf ) { + socket_sendto( $conn, $htcpPacket, $htcpLen, 0, + $subconf['host'], $subconf['port'] ); + } + } + } + + /** + * Expand local URLs to fully-qualified URLs using the internal protocol + * and host defined in $wgInternalServer. Input that's already fully- + * qualified will be passed through unchanged. + * + * This is used to generate purge URLs that may be either local to the + * main wiki or include a non-native host, such as images hosted on a + * second internal server. + * + * Client functions should not need to call this. + * + * @param string $url + * @return string + */ + public static function expand( $url ) { + return wfExpandUrl( $url, PROTO_INTERNAL ); + } + + /** + * Find the HTCP routing rule to use for a given URL. + * @param string $url URL to match + * @param array $rules Array of rules, see $wgHTCPRouting for format and behavior + * @return mixed Element of $rules that matched, or false if nothing matched + */ + private static function getRuleForURL( $url, $rules ) { + foreach ( $rules as $regex => $routing ) { + if ( $regex === '' || preg_match( $regex, $url ) ) { + return $routing; + } + } + + return false; + } +} + +/** + * @deprecated since 1.27 + */ +class SquidUpdate extends CdnCacheUpdate { + // Keep class name for b/c +} diff --git a/www/wiki/includes/deferred/DataUpdate.php b/www/wiki/includes/deferred/DataUpdate.php new file mode 100644 index 00000000..ed9a7462 --- /dev/null +++ b/www/wiki/includes/deferred/DataUpdate.php @@ -0,0 +1,83 @@ +<?php +/** + * Base code for update jobs that do something with some secondary + * data extracted from article. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +/** + * Abstract base class for update jobs that do something with some secondary + * data extracted from article. + */ +abstract class DataUpdate implements DeferrableUpdate { + /** @var mixed Result from LBFactory::getEmptyTransactionTicket() */ + protected $ticket; + /** @var string Short update cause action description */ + protected $causeAction = 'unknown'; + /** @var string Short update cause user description */ + protected $causeAgent = 'unknown'; + + public function __construct() { + // noop + } + + /** + * @param mixed $ticket Result of getEmptyTransactionTicket() + * @since 1.28 + */ + public function setTransactionTicket( $ticket ) { + $this->ticket = $ticket; + } + + /** + * @param string $action Action type + * @param string $user User name + */ + public function setCause( $action, $user ) { + $this->causeAction = $action; + $this->causeAgent = $user; + } + + /** + * @return string + */ + public function getCauseAction() { + return $this->causeAction; + } + + /** + * @return string + */ + public function getCauseAgent() { + return $this->causeAgent; + } + + /** + * Convenience method, calls doUpdate() on every DataUpdate in the array. + * + * @param DataUpdate[] $updates A list of DataUpdate instances + * @throws Exception + * @deprecated Since 1.28 Use DeferredUpdates::execute() + */ + public static function runUpdates( array $updates ) { + foreach ( $updates as $update ) { + $update->doUpdate(); + } + } +} diff --git a/www/wiki/includes/deferred/DeferrableCallback.php b/www/wiki/includes/deferred/DeferrableCallback.php new file mode 100644 index 00000000..2eb0d5df --- /dev/null +++ b/www/wiki/includes/deferred/DeferrableCallback.php @@ -0,0 +1,13 @@ +<?php + +/** + * Callback wrapper that has an originating method + * + * @since 1.28 + */ +interface DeferrableCallback { + /** + * @return string Originating method name + */ + function getOrigin(); +} diff --git a/www/wiki/includes/deferred/DeferrableUpdate.php b/www/wiki/includes/deferred/DeferrableUpdate.php new file mode 100644 index 00000000..5f4d8210 --- /dev/null +++ b/www/wiki/includes/deferred/DeferrableUpdate.php @@ -0,0 +1,14 @@ +<?php + +/** + * Interface that deferrable updates should implement. Basically required so we + * can validate input on DeferredUpdates::addUpdate() + * + * @since 1.19 + */ +interface DeferrableUpdate { + /** + * Perform the actual work + */ + function doUpdate(); +} diff --git a/www/wiki/includes/deferred/DeferredUpdates.php b/www/wiki/includes/deferred/DeferredUpdates.php new file mode 100644 index 00000000..9b25d538 --- /dev/null +++ b/www/wiki/includes/deferred/DeferredUpdates.php @@ -0,0 +1,379 @@ +<?php +/** + * Interface and manager for deferred updates. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ +use Wikimedia\Rdbms\IDatabase; +use MediaWiki\MediaWikiServices; +use Wikimedia\Rdbms\LBFactory; +use Wikimedia\Rdbms\LoadBalancer; + +/** + * Class for managing the deferred updates + * + * In web request mode, deferred updates can be run at the end of the request, either before or + * after the HTTP response has been sent. In either case, they run after the DB commit step. If + * an update runs after the response is sent, it will not block clients. If sent before, it will + * run synchronously. These two modes are defined via PRESEND and POSTSEND constants, the latter + * being the default for addUpdate() and addCallableUpdate(). + * + * Updates that work through this system will be more likely to complete by the time the client + * makes their next request after this one than with the JobQueue system. + * + * In CLI mode, updates run immediately if no DB writes are pending. Otherwise, they run when: + * - a) Any waitForReplication() call if no writes are pending on any DB + * - b) A commit happens on Maintenance::getDB( DB_MASTER ) if no writes are pending on any DB + * - c) EnqueueableDataUpdate tasks may enqueue on commit of Maintenance::getDB( DB_MASTER ) + * - d) At the completion of Maintenance::execute() + * + * When updates are deferred, they go into one two FIFO "top-queues" (one for pre-send and one + * for post-send). Updates enqueued *during* doUpdate() of a "top" update go into the "sub-queue" + * for that update. After that method finishes, the sub-queue is run until drained. This continues + * for each top-queue job until the entire top queue is drained. This happens for the pre-send + * top-queue, and later on, the post-send top-queue, in execute(). + * + * @since 1.19 + */ +class DeferredUpdates { + /** @var DeferrableUpdate[] Updates to be deferred until before request end */ + private static $preSendUpdates = []; + /** @var DeferrableUpdate[] Updates to be deferred until after request end */ + private static $postSendUpdates = []; + + const ALL = 0; // all updates; in web requests, use only after flushing the output buffer + const PRESEND = 1; // for updates that should run before flushing output buffer + const POSTSEND = 2; // for updates that should run after flushing output buffer + + const BIG_QUEUE_SIZE = 100; + + /** @var array|null Information about the current execute() call or null if not running */ + private static $executeContext; + + /** + * Add an update to the deferred list to be run later by execute() + * + * In CLI mode, callback magic will also be used to run updates when safe + * + * @param DeferrableUpdate $update Some object that implements doUpdate() + * @param int $stage DeferredUpdates constant (PRESEND or POSTSEND) (since 1.27) + */ + public static function addUpdate( DeferrableUpdate $update, $stage = self::POSTSEND ) { + global $wgCommandLineMode; + + if ( self::$executeContext && self::$executeContext['stage'] >= $stage ) { + // This is a sub-DeferredUpdate; run it right after its parent update. + // Also, while post-send updates are running, push any "pre-send" jobs to the + // active post-send queue to make sure they get run this round (or at all). + self::$executeContext['subqueue'][] = $update; + + return; + } + + if ( $stage === self::PRESEND ) { + self::push( self::$preSendUpdates, $update ); + } else { + self::push( self::$postSendUpdates, $update ); + } + + // Try to run the updates now if in CLI mode and no transaction is active. + // This covers scripts that don't/barely use the DB but make updates to other stores. + if ( $wgCommandLineMode ) { + self::tryOpportunisticExecute( 'run' ); + } + } + + /** + * Add a callable update. In a lot of cases, we just need a callback/closure, + * defining a new DeferrableUpdate object is not necessary + * + * @see MWCallableUpdate::__construct() + * + * @param callable $callable + * @param int $stage DeferredUpdates constant (PRESEND or POSTSEND) (since 1.27) + * @param IDatabase|IDatabase[]|null $dbw Abort if this DB is rolled back [optional] (since 1.28) + */ + public static function addCallableUpdate( + $callable, $stage = self::POSTSEND, $dbw = null + ) { + self::addUpdate( new MWCallableUpdate( $callable, wfGetCaller(), $dbw ), $stage ); + } + + /** + * Do any deferred updates and clear the list + * + * @param string $mode Use "enqueue" to use the job queue when possible [Default: "run"] + * @param int $stage DeferredUpdates constant (PRESEND, POSTSEND, or ALL) (since 1.27) + */ + public static function doUpdates( $mode = 'run', $stage = self::ALL ) { + $stageEffective = ( $stage === self::ALL ) ? self::POSTSEND : $stage; + + if ( $stage === self::ALL || $stage === self::PRESEND ) { + self::execute( self::$preSendUpdates, $mode, $stageEffective ); + } + + if ( $stage === self::ALL || $stage == self::POSTSEND ) { + self::execute( self::$postSendUpdates, $mode, $stageEffective ); + } + } + + /** + * @param bool $value Whether to just immediately run updates in addUpdate() + * @since 1.28 + * @deprecated 1.29 Causes issues in Web-executed jobs - see T165714 and T100085. + */ + public static function setImmediateMode( $value ) { + wfDeprecated( __METHOD__, '1.29' ); + } + + /** + * @param DeferrableUpdate[] $queue + * @param DeferrableUpdate $update + */ + private static function push( array &$queue, DeferrableUpdate $update ) { + if ( $update instanceof MergeableUpdate ) { + $class = get_class( $update ); // fully-qualified class + if ( isset( $queue[$class] ) ) { + /** @var MergeableUpdate $existingUpdate */ + $existingUpdate = $queue[$class]; + $existingUpdate->merge( $update ); + } else { + $queue[$class] = $update; + } + } else { + $queue[] = $update; + } + } + + /** + * Immediately run/queue a list of updates + * + * @param DeferrableUpdate[] &$queue List of DeferrableUpdate objects + * @param string $mode Use "enqueue" to use the job queue when possible + * @param int $stage Class constant (PRESEND, POSTSEND) (since 1.28) + * @throws ErrorPageError Happens on top-level calls + * @throws Exception Happens on second-level calls + */ + protected static function execute( array &$queue, $mode, $stage ) { + $services = MediaWikiServices::getInstance(); + $stats = $services->getStatsdDataFactory(); + $lbFactory = $services->getDBLoadBalancerFactory(); + $method = RequestContext::getMain()->getRequest()->getMethod(); + + $ticket = $lbFactory->getEmptyTransactionTicket( __METHOD__ ); + + /** @var ErrorPageError $reportableError */ + $reportableError = null; + /** @var DeferrableUpdate[] $updates Snapshot of queue */ + $updates = $queue; + + // Keep doing rounds of updates until none get enqueued... + while ( $updates ) { + $queue = []; // clear the queue + + // Order will be DataUpdate followed by generic DeferrableUpdate tasks + $updatesByType = [ 'data' => [], 'generic' => [] ]; + foreach ( $updates as $du ) { + if ( $du instanceof DataUpdate ) { + $du->setTransactionTicket( $ticket ); + $updatesByType['data'][] = $du; + } else { + $updatesByType['generic'][] = $du; + } + + $name = ( $du instanceof DeferrableCallback ) + ? get_class( $du ) . '-' . $du->getOrigin() + : get_class( $du ); + $stats->increment( 'deferred_updates.' . $method . '.' . $name ); + } + + // Execute all remaining tasks... + foreach ( $updatesByType as $updatesForType ) { + foreach ( $updatesForType as $update ) { + self::$executeContext = [ 'stage' => $stage, 'subqueue' => [] ]; + /** @var DeferrableUpdate $update */ + $guiError = self::runUpdate( $update, $lbFactory, $mode, $stage ); + $reportableError = $reportableError ?: $guiError; + // Do the subqueue updates for $update until there are none + while ( self::$executeContext['subqueue'] ) { + $subUpdate = reset( self::$executeContext['subqueue'] ); + $firstKey = key( self::$executeContext['subqueue'] ); + unset( self::$executeContext['subqueue'][$firstKey] ); + + if ( $subUpdate instanceof DataUpdate ) { + $subUpdate->setTransactionTicket( $ticket ); + } + + $guiError = self::runUpdate( $subUpdate, $lbFactory, $mode, $stage ); + $reportableError = $reportableError ?: $guiError; + } + self::$executeContext = null; + } + } + + $updates = $queue; // new snapshot of queue (check for new entries) + } + + if ( $reportableError ) { + throw $reportableError; // throw the first of any GUI errors + } + } + + /** + * @param DeferrableUpdate $update + * @param LBFactory $lbFactory + * @param string $mode + * @param int $stage + * @return ErrorPageError|null + */ + private static function runUpdate( + DeferrableUpdate $update, LBFactory $lbFactory, $mode, $stage + ) { + $guiError = null; + try { + if ( $mode === 'enqueue' && $update instanceof EnqueueableDataUpdate ) { + // Run only the job enqueue logic to complete the update later + $spec = $update->getAsJobSpecification(); + JobQueueGroup::singleton( $spec['wiki'] )->push( $spec['job'] ); + } elseif ( $update instanceof TransactionRoundDefiningUpdate ) { + $update->doUpdate(); + } else { + // Run the bulk of the update now + $fnameTrxOwner = get_class( $update ) . '::doUpdate'; + $lbFactory->beginMasterChanges( $fnameTrxOwner ); + $update->doUpdate(); + $lbFactory->commitMasterChanges( $fnameTrxOwner ); + } + } catch ( Exception $e ) { + // Reporting GUI exceptions does not work post-send + if ( $e instanceof ErrorPageError && $stage === self::PRESEND ) { + $guiError = $e; + } + MWExceptionHandler::rollbackMasterChangesAndLog( $e ); + } + + return $guiError; + } + + /** + * Run all deferred updates immediately if there are no DB writes active + * + * If $mode is 'run' but there are busy databates, EnqueueableDataUpdate + * tasks will be enqueued anyway for the sake of progress. + * + * @param string $mode Use "enqueue" to use the job queue when possible + * @return bool Whether updates were allowed to run + * @since 1.28 + */ + public static function tryOpportunisticExecute( $mode = 'run' ) { + // execute() loop is already running + if ( self::$executeContext ) { + return false; + } + + // Avoiding running updates without them having outer scope + if ( !self::areDatabaseTransactionsActive() ) { + self::doUpdates( $mode ); + return true; + } + + if ( self::pendingUpdatesCount() >= self::BIG_QUEUE_SIZE ) { + // If we cannot run the updates with outer transaction context, try to + // at least enqueue all the updates that support queueing to job queue + self::$preSendUpdates = self::enqueueUpdates( self::$preSendUpdates ); + self::$postSendUpdates = self::enqueueUpdates( self::$postSendUpdates ); + } + + return !self::pendingUpdatesCount(); + } + + /** + * Enqueue a job for each EnqueueableDataUpdate item and return the other items + * + * @param DeferrableUpdate[] $updates A list of deferred update instances + * @return DeferrableUpdate[] Remaining updates that do not support being queued + */ + private static function enqueueUpdates( array $updates ) { + $remaining = []; + + foreach ( $updates as $update ) { + if ( $update instanceof EnqueueableDataUpdate ) { + $spec = $update->getAsJobSpecification(); + JobQueueGroup::singleton( $spec['wiki'] )->push( $spec['job'] ); + } else { + $remaining[] = $update; + } + } + + return $remaining; + } + + /** + * @return int Number of enqueued updates + * @since 1.28 + */ + public static function pendingUpdatesCount() { + return count( self::$preSendUpdates ) + count( self::$postSendUpdates ); + } + + /** + * @param int $stage DeferredUpdates constant (PRESEND, POSTSEND, or ALL) + * @return DeferrableUpdate[] + * @since 1.29 + */ + public static function getPendingUpdates( $stage = self::ALL ) { + $updates = []; + if ( $stage === self::ALL || $stage === self::PRESEND ) { + $updates = array_merge( $updates, self::$preSendUpdates ); + } + if ( $stage === self::ALL || $stage === self::POSTSEND ) { + $updates = array_merge( $updates, self::$postSendUpdates ); + } + return $updates; + } + + /** + * Clear all pending updates without performing them. Generally, you don't + * want or need to call this. Unit tests need it though. + */ + public static function clearPendingUpdates() { + self::$preSendUpdates = []; + self::$postSendUpdates = []; + } + + /** + * @return bool If a transaction round is active or connection is not ready for commit() + */ + private static function areDatabaseTransactionsActive() { + $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory(); + if ( $lbFactory->hasTransactionRound() ) { + return true; + } + + $connsBusy = false; + $lbFactory->forEachLB( function ( LoadBalancer $lb ) use ( &$connsBusy ) { + $lb->forEachOpenMasterConnection( function ( IDatabase $conn ) use ( &$connsBusy ) { + if ( $conn->writesOrCallbacksPending() || $conn->explicitTrxActive() ) { + $connsBusy = true; + } + } ); + } ); + + return $connsBusy; + } +} diff --git a/www/wiki/includes/deferred/EnqueueableDataUpdate.php b/www/wiki/includes/deferred/EnqueueableDataUpdate.php new file mode 100644 index 00000000..ffeb740d --- /dev/null +++ b/www/wiki/includes/deferred/EnqueueableDataUpdate.php @@ -0,0 +1,15 @@ +<?php +/** + * Interface that marks a DataUpdate as enqueuable via the JobQueue + * + * Such updates must be representable using IJobSpecification, so that + * they can be serialized into jobs and enqueued for later execution + * + * @since 1.27 + */ +interface EnqueueableDataUpdate { + /** + * @return array (wiki => wiki ID, job => IJobSpecification) + */ + public function getAsJobSpecification(); +} diff --git a/www/wiki/includes/deferred/HTMLCacheUpdate.php b/www/wiki/includes/deferred/HTMLCacheUpdate.php new file mode 100644 index 00000000..29846bfb --- /dev/null +++ b/www/wiki/includes/deferred/HTMLCacheUpdate.php @@ -0,0 +1,60 @@ +<?php +/** + * HTML cache invalidation of all pages linking to a given title. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Cache + */ + +/** + * Class to invalidate the HTML cache of all the pages linking to a given title. + * + * @ingroup Cache + */ +class HTMLCacheUpdate extends DataUpdate { + /** @var Title */ + public $mTitle; + + /** @var string */ + public $mTable; + + /** + * @param Title $titleTo + * @param string $table + * @param string $causeAction Triggering action + * @param string $causeAgent Triggering user + */ + function __construct( + Title $titleTo, $table, $causeAction = 'unknown', $causeAgent = 'unknown' + ) { + $this->mTitle = $titleTo; + $this->mTable = $table; + $this->causeAction = $causeAction; + $this->causeAgent = $causeAgent; + } + + public function doUpdate() { + $job = HTMLCacheUpdateJob::newForBacklinks( + $this->mTitle, + $this->mTable, + [ 'causeAction' => $this->getCauseAction(), 'causeAgent' => $this->getCauseAgent() ] + ); + + JobQueueGroup::singleton()->lazyPush( $job ); + } +} diff --git a/www/wiki/includes/deferred/LinksDeletionUpdate.php b/www/wiki/includes/deferred/LinksDeletionUpdate.php new file mode 100644 index 00000000..52e996a0 --- /dev/null +++ b/www/wiki/includes/deferred/LinksDeletionUpdate.php @@ -0,0 +1,242 @@ +<?php +/** + * Updater for link tracking tables after a page edit. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ +use MediaWiki\MediaWikiServices; +use Wikimedia\ScopedCallback; +use Wikimedia\Rdbms\IDatabase; + +/** + * Update object handling the cleanup of links tables after a page was deleted. + */ +class LinksDeletionUpdate extends DataUpdate implements EnqueueableDataUpdate { + /** @var WikiPage */ + protected $page; + /** @var int */ + protected $pageId; + /** @var string */ + protected $timestamp; + + /** @var IDatabase */ + private $db; + + /** + * @param WikiPage $page Page we are updating + * @param int|null $pageId ID of the page we are updating [optional] + * @param string|null $timestamp TS_MW timestamp of deletion + * @throws MWException + */ + function __construct( WikiPage $page, $pageId = null, $timestamp = null ) { + parent::__construct(); + + $this->page = $page; + if ( $pageId ) { + $this->pageId = $pageId; // page ID at time of deletion + } elseif ( $page->exists() ) { + $this->pageId = $page->getId(); + } else { + throw new InvalidArgumentException( "Page ID not known. Page doesn't exist?" ); + } + + $this->timestamp = $timestamp ?: wfTimestampNow(); + } + + public function doUpdate() { + $services = MediaWikiServices::getInstance(); + $config = $services->getMainConfig(); + $lbFactory = $services->getDBLoadBalancerFactory(); + $batchSize = $config->get( 'UpdateRowsPerQuery' ); + + // Page may already be deleted, so don't just getId() + $id = $this->pageId; + + if ( $this->ticket ) { + // Make sure all links update threads see the changes of each other. + // This handles the case when updates have to batched into several COMMITs. + $scopedLock = LinksUpdate::acquirePageLock( $this->getDB(), $id ); + } + + $title = $this->page->getTitle(); + $dbw = $this->getDB(); // convenience + + // Delete restrictions for it + $dbw->delete( 'page_restrictions', [ 'pr_page' => $id ], __METHOD__ ); + + // Fix category table counts + $cats = $dbw->selectFieldValues( + 'categorylinks', + 'cl_to', + [ 'cl_from' => $id ], + __METHOD__ + ); + $catBatches = array_chunk( $cats, $batchSize ); + foreach ( $catBatches as $catBatch ) { + $this->page->updateCategoryCounts( [], $catBatch, $id ); + if ( count( $catBatches ) > 1 ) { + $lbFactory->commitAndWaitForReplication( + __METHOD__, $this->ticket, [ 'domain' => $dbw->getDomainID() ] + ); + } + } + + // Refresh the category table entry if it seems to have no pages. Check + // master for the most up-to-date cat_pages count. + if ( $title->getNamespace() === NS_CATEGORY ) { + $row = $dbw->selectRow( + 'category', + [ 'cat_id', 'cat_title', 'cat_pages', 'cat_subcats', 'cat_files' ], + [ 'cat_title' => $title->getDBkey(), 'cat_pages <= 0' ], + __METHOD__ + ); + if ( $row ) { + $cat = Category::newFromRow( $row, $title ); + // T166757: do the update after the main job DB commit + DeferredUpdates::addCallableUpdate( function () use ( $cat ) { + $cat->refreshCounts(); + } ); + } + } + + $this->batchDeleteByPK( + 'pagelinks', + [ 'pl_from' => $id ], + [ 'pl_from', 'pl_namespace', 'pl_title' ], + $batchSize + ); + $this->batchDeleteByPK( + 'imagelinks', + [ 'il_from' => $id ], + [ 'il_from', 'il_to' ], + $batchSize + ); + $this->batchDeleteByPK( + 'categorylinks', + [ 'cl_from' => $id ], + [ 'cl_from', 'cl_to' ], + $batchSize + ); + $this->batchDeleteByPK( + 'templatelinks', + [ 'tl_from' => $id ], + [ 'tl_from', 'tl_namespace', 'tl_title' ], + $batchSize + ); + $this->batchDeleteByPK( + 'externallinks', + [ 'el_from' => $id ], + [ 'el_id' ], + $batchSize + ); + $this->batchDeleteByPK( + 'langlinks', + [ 'll_from' => $id ], + [ 'll_from', 'll_lang' ], + $batchSize + ); + $this->batchDeleteByPK( + 'iwlinks', + [ 'iwl_from' => $id ], + [ 'iwl_from', 'iwl_prefix', 'iwl_title' ], + $batchSize + ); + + // Delete any redirect entry or page props entries + $dbw->delete( 'redirect', [ 'rd_from' => $id ], __METHOD__ ); + $dbw->delete( 'page_props', [ 'pp_page' => $id ], __METHOD__ ); + + // Find recentchanges entries to clean up... + $rcIdsForTitle = $dbw->selectFieldValues( + 'recentchanges', + 'rc_id', + [ + 'rc_type != ' . RC_LOG, + 'rc_namespace' => $title->getNamespace(), + 'rc_title' => $title->getDBkey(), + 'rc_timestamp < ' . + $dbw->addQuotes( $dbw->timestamp( $this->timestamp ) ) + ], + __METHOD__ + ); + $rcIdsForPage = $dbw->selectFieldValues( + 'recentchanges', + 'rc_id', + [ 'rc_type != ' . RC_LOG, 'rc_cur_id' => $id ], + __METHOD__ + ); + + // T98706: delete by PK to avoid lock contention with RC delete log insertions + $rcIdBatches = array_chunk( array_merge( $rcIdsForTitle, $rcIdsForPage ), $batchSize ); + foreach ( $rcIdBatches as $rcIdBatch ) { + $dbw->delete( 'recentchanges', [ 'rc_id' => $rcIdBatch ], __METHOD__ ); + if ( count( $rcIdBatches ) > 1 ) { + $lbFactory->commitAndWaitForReplication( + __METHOD__, $this->ticket, [ 'domain' => $dbw->getDomainID() ] + ); + } + } + + // Commit and release the lock (if set) + ScopedCallback::consume( $scopedLock ); + } + + private function batchDeleteByPK( $table, array $conds, array $pk, $bSize ) { + $services = MediaWikiServices::getInstance(); + $lbFactory = $services->getDBLoadBalancerFactory(); + $dbw = $this->getDB(); // convenience + + $res = $dbw->select( $table, $pk, $conds, __METHOD__ ); + + $pkDeleteConds = []; + foreach ( $res as $row ) { + $pkDeleteConds[] = $dbw->makeList( (array)$row, LIST_AND ); + if ( count( $pkDeleteConds ) >= $bSize ) { + $dbw->delete( $table, $dbw->makeList( $pkDeleteConds, LIST_OR ), __METHOD__ ); + $lbFactory->commitAndWaitForReplication( + __METHOD__, $this->ticket, [ 'domain' => $dbw->getDomainID() ] + ); + $pkDeleteConds = []; + } + } + + if ( $pkDeleteConds ) { + $dbw->delete( $table, $dbw->makeList( $pkDeleteConds, LIST_OR ), __METHOD__ ); + } + } + + protected function getDB() { + if ( !$this->db ) { + $this->db = wfGetDB( DB_MASTER ); + } + + return $this->db; + } + + public function getAsJobSpecification() { + return [ + 'wiki' => WikiMap::getWikiIdFromDomain( $this->getDB()->getDomainID() ), + 'job' => new JobSpecification( + 'deleteLinks', + [ 'pageId' => $this->pageId, 'timestamp' => $this->timestamp ], + [ 'removeDuplicates' => true ], + $this->page->getTitle() + ) + ]; + } +} diff --git a/www/wiki/includes/deferred/LinksUpdate.php b/www/wiki/includes/deferred/LinksUpdate.php new file mode 100644 index 00000000..89136428 --- /dev/null +++ b/www/wiki/includes/deferred/LinksUpdate.php @@ -0,0 +1,1182 @@ +<?php +/** + * Updater for link tracking tables after a page edit. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +use Wikimedia\Rdbms\IDatabase; +use MediaWiki\MediaWikiServices; +use Wikimedia\ScopedCallback; + +/** + * Class the manages updates of *_link tables as well as similar extension-managed tables + * + * @note: LinksUpdate is managed by DeferredUpdates::execute(). Do not run this in a transaction. + * + * See docs/deferred.txt + */ +class LinksUpdate extends DataUpdate implements EnqueueableDataUpdate { + // @todo make members protected, but make sure extensions don't break + + /** @var int Page ID of the article linked from */ + public $mId; + + /** @var Title Title object of the article linked from */ + public $mTitle; + + /** @var ParserOutput */ + public $mParserOutput; + + /** @var array Map of title strings to IDs for the links in the document */ + public $mLinks; + + /** @var array DB keys of the images used, in the array key only */ + public $mImages; + + /** @var array Map of title strings to IDs for the template references, including broken ones */ + public $mTemplates; + + /** @var array URLs of external links, array key only */ + public $mExternals; + + /** @var array Map of category names to sort keys */ + public $mCategories; + + /** @var array Map of language codes to titles */ + public $mInterlangs; + + /** @var array 2-D map of (prefix => DBK => 1) */ + public $mInterwikis; + + /** @var array Map of arbitrary name to value */ + public $mProperties; + + /** @var bool Whether to queue jobs for recursive updates */ + public $mRecursive; + + /** @var Revision Revision for which this update has been triggered */ + private $mRevision; + + /** + * @var null|array Added links if calculated. + */ + private $linkInsertions = null; + + /** + * @var null|array Deleted links if calculated. + */ + private $linkDeletions = null; + + /** + * @var null|array Added properties if calculated. + */ + private $propertyInsertions = null; + + /** + * @var null|array Deleted properties if calculated. + */ + private $propertyDeletions = null; + + /** + * @var User|null + */ + private $user; + + /** @var IDatabase */ + private $db; + + /** + * @param Title $title Title of the page we're updating + * @param ParserOutput $parserOutput Output from a full parse of this page + * @param bool $recursive Queue jobs for recursive updates? + * @throws MWException + */ + function __construct( Title $title, ParserOutput $parserOutput, $recursive = true ) { + parent::__construct(); + + $this->mTitle = $title; + $this->mId = $title->getArticleID( Title::GAID_FOR_UPDATE ); + + if ( !$this->mId ) { + throw new InvalidArgumentException( + "The Title object yields no ID. Perhaps the page doesn't exist?" + ); + } + + $this->mParserOutput = $parserOutput; + + $this->mLinks = $parserOutput->getLinks(); + $this->mImages = $parserOutput->getImages(); + $this->mTemplates = $parserOutput->getTemplates(); + $this->mExternals = $parserOutput->getExternalLinks(); + $this->mCategories = $parserOutput->getCategories(); + $this->mProperties = $parserOutput->getProperties(); + $this->mInterwikis = $parserOutput->getInterwikiLinks(); + + # Convert the format of the interlanguage links + # I didn't want to change it in the ParserOutput, because that array is passed all + # the way back to the skin, so either a skin API break would be required, or an + # inefficient back-conversion. + $ill = $parserOutput->getLanguageLinks(); + $this->mInterlangs = []; + foreach ( $ill as $link ) { + list( $key, $title ) = explode( ':', $link, 2 ); + $this->mInterlangs[$key] = $title; + } + + foreach ( $this->mCategories as &$sortkey ) { + # If the sortkey is longer then 255 bytes, + # it truncated by DB, and then doesn't get + # matched when comparing existing vs current + # categories, causing T27254. + # Also. substr behaves weird when given "". + if ( $sortkey !== '' ) { + $sortkey = substr( $sortkey, 0, 255 ); + } + } + + $this->mRecursive = $recursive; + + // Avoid PHP 7.1 warning from passing $this by reference + $linksUpdate = $this; + Hooks::run( 'LinksUpdateConstructed', [ &$linksUpdate ] ); + } + + /** + * Update link tables with outgoing links from an updated article + * + * @note: this is managed by DeferredUpdates::execute(). Do not run this in a transaction. + */ + public function doUpdate() { + if ( $this->ticket ) { + // Make sure all links update threads see the changes of each other. + // This handles the case when updates have to batched into several COMMITs. + $scopedLock = self::acquirePageLock( $this->getDB(), $this->mId ); + } + + // Avoid PHP 7.1 warning from passing $this by reference + $linksUpdate = $this; + Hooks::run( 'LinksUpdate', [ &$linksUpdate ] ); + $this->doIncrementalUpdate(); + + // Commit and release the lock (if set) + ScopedCallback::consume( $scopedLock ); + // Run post-commit hooks without DBO_TRX + $this->getDB()->onTransactionIdle( + function () { + // Avoid PHP 7.1 warning from passing $this by reference + $linksUpdate = $this; + Hooks::run( 'LinksUpdateComplete', [ &$linksUpdate, $this->ticket ] ); + }, + __METHOD__ + ); + } + + /** + * Acquire a lock for performing link table updates for a page on a DB + * + * @param IDatabase $dbw + * @param int $pageId + * @param string $why One of (job, atomicity) + * @return ScopedCallback + * @throws RuntimeException + * @since 1.27 + */ + public static function acquirePageLock( IDatabase $dbw, $pageId, $why = 'atomicity' ) { + $key = "LinksUpdate:$why:pageid:$pageId"; + $scopedLock = $dbw->getScopedLockAndFlush( $key, __METHOD__, 15 ); + if ( !$scopedLock ) { + throw new RuntimeException( "Could not acquire lock '$key'." ); + } + + return $scopedLock; + } + + protected function doIncrementalUpdate() { + # Page links + $existingPL = $this->getExistingLinks(); + $this->linkDeletions = $this->getLinkDeletions( $existingPL ); + $this->linkInsertions = $this->getLinkInsertions( $existingPL ); + $this->incrTableUpdate( 'pagelinks', 'pl', $this->linkDeletions, $this->linkInsertions ); + + # Image links + $existingIL = $this->getExistingImages(); + $imageDeletes = $this->getImageDeletions( $existingIL ); + $this->incrTableUpdate( + 'imagelinks', + 'il', + $imageDeletes, + $this->getImageInsertions( $existingIL ) ); + + # Invalidate all image description pages which had links added or removed + $imageUpdates = $imageDeletes + array_diff_key( $this->mImages, $existingIL ); + $this->invalidateImageDescriptions( $imageUpdates ); + + # External links + $existingEL = $this->getExistingExternals(); + $this->incrTableUpdate( + 'externallinks', + 'el', + $this->getExternalDeletions( $existingEL ), + $this->getExternalInsertions( $existingEL ) ); + + # Language links + $existingLL = $this->getExistingInterlangs(); + $this->incrTableUpdate( + 'langlinks', + 'll', + $this->getInterlangDeletions( $existingLL ), + $this->getInterlangInsertions( $existingLL ) ); + + # Inline interwiki links + $existingIW = $this->getExistingInterwikis(); + $this->incrTableUpdate( + 'iwlinks', + 'iwl', + $this->getInterwikiDeletions( $existingIW ), + $this->getInterwikiInsertions( $existingIW ) ); + + # Template links + $existingTL = $this->getExistingTemplates(); + $this->incrTableUpdate( + 'templatelinks', + 'tl', + $this->getTemplateDeletions( $existingTL ), + $this->getTemplateInsertions( $existingTL ) ); + + # Category links + $existingCL = $this->getExistingCategories(); + $categoryDeletes = $this->getCategoryDeletions( $existingCL ); + $this->incrTableUpdate( + 'categorylinks', + 'cl', + $categoryDeletes, + $this->getCategoryInsertions( $existingCL ) ); + $categoryInserts = array_diff_assoc( $this->mCategories, $existingCL ); + $categoryUpdates = $categoryInserts + $categoryDeletes; + + # Page properties + $existingPP = $this->getExistingProperties(); + $this->propertyDeletions = $this->getPropertyDeletions( $existingPP ); + $this->incrTableUpdate( + 'page_props', + 'pp', + $this->propertyDeletions, + $this->getPropertyInsertions( $existingPP ) ); + + # Invalidate the necessary pages + $this->propertyInsertions = array_diff_assoc( $this->mProperties, $existingPP ); + $changed = $this->propertyDeletions + $this->propertyInsertions; + $this->invalidateProperties( $changed ); + + # Invalidate all categories which were added, deleted or changed (set symmetric difference) + $this->invalidateCategories( $categoryUpdates ); + $this->updateCategoryCounts( $categoryInserts, $categoryDeletes ); + + # Refresh links of all pages including this page + # This will be in a separate transaction + if ( $this->mRecursive ) { + $this->queueRecursiveJobs(); + } + + # Update the links table freshness for this title + $this->updateLinksTimestamp(); + } + + /** + * Queue recursive jobs for this page + * + * Which means do LinksUpdate on all pages that include the current page, + * using the job queue. + */ + protected function queueRecursiveJobs() { + $action = $this->getCauseAction(); + $agent = $this->getCauseAgent(); + + self::queueRecursiveJobsForTable( $this->mTitle, 'templatelinks', $action, $agent ); + if ( $this->mTitle->getNamespace() == NS_FILE ) { + // Process imagelinks in case the title is or was a redirect + self::queueRecursiveJobsForTable( $this->mTitle, 'imagelinks', $action, $agent ); + } + + $bc = $this->mTitle->getBacklinkCache(); + // Get jobs for cascade-protected backlinks for a high priority queue. + // If meta-templates change to using a new template, the new template + // should be implicitly protected as soon as possible, if applicable. + // These jobs duplicate a subset of the above ones, but can run sooner. + // Which ever runs first generally no-ops the other one. + $jobs = []; + foreach ( $bc->getCascadeProtectedLinks() as $title ) { + $jobs[] = RefreshLinksJob::newPrioritized( + $title, + [ + 'causeAction' => $action, + 'causeAgent' => $agent + ] + ); + } + JobQueueGroup::singleton()->push( $jobs ); + } + + /** + * Queue a RefreshLinks job for any table. + * + * @param Title $title Title to do job for + * @param string $table Table to use (e.g. 'templatelinks') + * @param string $action Triggering action + * @param string $userName Triggering user name + */ + public static function queueRecursiveJobsForTable( + Title $title, $table, $action = 'unknown', $userName = 'unknown' + ) { + if ( $title->getBacklinkCache()->hasLinks( $table ) ) { + $job = new RefreshLinksJob( + $title, + [ + 'table' => $table, + 'recursive' => true, + ] + Job::newRootJobParams( // "overall" refresh links job info + "refreshlinks:{$table}:{$title->getPrefixedText()}" + ) + [ 'causeAction' => $action, 'causeAgent' => $userName ] + ); + + JobQueueGroup::singleton()->push( $job ); + } + } + + /** + * @param array $cats + */ + private function invalidateCategories( $cats ) { + PurgeJobUtils::invalidatePages( $this->getDB(), NS_CATEGORY, array_keys( $cats ) ); + } + + /** + * Update all the appropriate counts in the category table. + * @param array $added Associative array of category name => sort key + * @param array $deleted Associative array of category name => sort key + */ + private function updateCategoryCounts( array $added, array $deleted ) { + global $wgUpdateRowsPerQuery; + + if ( !$added && !$deleted ) { + return; + } + + $domainId = $this->getDB()->getDomainID(); + $wp = WikiPage::factory( $this->mTitle ); + $lbf = MediaWikiServices::getInstance()->getDBLoadBalancerFactory(); + // T163801: try to release any row locks to reduce contention + $lbf->commitAndWaitForReplication( __METHOD__, $this->ticket, [ 'domain' => $domainId ] ); + + foreach ( array_chunk( array_keys( $added ), $wgUpdateRowsPerQuery ) as $addBatch ) { + $wp->updateCategoryCounts( $addBatch, [], $this->mId ); + $lbf->commitAndWaitForReplication( + __METHOD__, $this->ticket, [ 'domain' => $domainId ] ); + } + + foreach ( array_chunk( array_keys( $deleted ), $wgUpdateRowsPerQuery ) as $deleteBatch ) { + $wp->updateCategoryCounts( [], $deleteBatch, $this->mId ); + $lbf->commitAndWaitForReplication( + __METHOD__, $this->ticket, [ 'domain' => $domainId ] ); + } + } + + /** + * @param array $images + */ + private function invalidateImageDescriptions( $images ) { + PurgeJobUtils::invalidatePages( $this->getDB(), NS_FILE, array_keys( $images ) ); + } + + /** + * Update a table by doing a delete query then an insert query + * @param string $table Table name + * @param string $prefix Field name prefix + * @param array $deletions + * @param array $insertions Rows to insert + */ + private function incrTableUpdate( $table, $prefix, $deletions, $insertions ) { + $services = MediaWikiServices::getInstance(); + $bSize = $services->getMainConfig()->get( 'UpdateRowsPerQuery' ); + $lbf = $services->getDBLoadBalancerFactory(); + + if ( $table === 'page_props' ) { + $fromField = 'pp_page'; + } else { + $fromField = "{$prefix}_from"; + } + + $deleteWheres = []; // list of WHERE clause arrays for each DB delete() call + if ( $table === 'pagelinks' || $table === 'templatelinks' || $table === 'iwlinks' ) { + $baseKey = ( $table === 'iwlinks' ) ? 'iwl_prefix' : "{$prefix}_namespace"; + + $curBatchSize = 0; + $curDeletionBatch = []; + $deletionBatches = []; + foreach ( $deletions as $ns => $dbKeys ) { + foreach ( $dbKeys as $dbKey => $unused ) { + $curDeletionBatch[$ns][$dbKey] = 1; + if ( ++$curBatchSize >= $bSize ) { + $deletionBatches[] = $curDeletionBatch; + $curDeletionBatch = []; + $curBatchSize = 0; + } + } + } + if ( $curDeletionBatch ) { + $deletionBatches[] = $curDeletionBatch; + } + + foreach ( $deletionBatches as $deletionBatch ) { + $deleteWheres[] = [ + $fromField => $this->mId, + $this->getDB()->makeWhereFrom2d( $deletionBatch, $baseKey, "{$prefix}_title" ) + ]; + } + } else { + if ( $table === 'langlinks' ) { + $toField = 'll_lang'; + } elseif ( $table === 'page_props' ) { + $toField = 'pp_propname'; + } else { + $toField = $prefix . '_to'; + } + + $deletionBatches = array_chunk( array_keys( $deletions ), $bSize ); + foreach ( $deletionBatches as $deletionBatch ) { + $deleteWheres[] = [ $fromField => $this->mId, $toField => $deletionBatch ]; + } + } + + $domainId = $this->getDB()->getDomainID(); + + foreach ( $deleteWheres as $deleteWhere ) { + $this->getDB()->delete( $table, $deleteWhere, __METHOD__ ); + $lbf->commitAndWaitForReplication( + __METHOD__, $this->ticket, [ 'domain' => $domainId ] + ); + } + + $insertBatches = array_chunk( $insertions, $bSize ); + foreach ( $insertBatches as $insertBatch ) { + $this->getDB()->insert( $table, $insertBatch, __METHOD__, 'IGNORE' ); + $lbf->commitAndWaitForReplication( + __METHOD__, $this->ticket, [ 'domain' => $domainId ] + ); + } + + if ( count( $insertions ) ) { + Hooks::run( 'LinksUpdateAfterInsert', [ $this, $table, $insertions ] ); + } + } + + /** + * Get an array of pagelinks insertions for passing to the DB + * Skips the titles specified by the 2-D array $existing + * @param array $existing + * @return array + */ + private function getLinkInsertions( $existing = [] ) { + $arr = []; + foreach ( $this->mLinks as $ns => $dbkeys ) { + $diffs = isset( $existing[$ns] ) + ? array_diff_key( $dbkeys, $existing[$ns] ) + : $dbkeys; + foreach ( $diffs as $dbk => $id ) { + $arr[] = [ + 'pl_from' => $this->mId, + 'pl_from_namespace' => $this->mTitle->getNamespace(), + 'pl_namespace' => $ns, + 'pl_title' => $dbk + ]; + } + } + + return $arr; + } + + /** + * Get an array of template insertions. Like getLinkInsertions() + * @param array $existing + * @return array + */ + private function getTemplateInsertions( $existing = [] ) { + $arr = []; + foreach ( $this->mTemplates as $ns => $dbkeys ) { + $diffs = isset( $existing[$ns] ) ? array_diff_key( $dbkeys, $existing[$ns] ) : $dbkeys; + foreach ( $diffs as $dbk => $id ) { + $arr[] = [ + 'tl_from' => $this->mId, + 'tl_from_namespace' => $this->mTitle->getNamespace(), + 'tl_namespace' => $ns, + 'tl_title' => $dbk + ]; + } + } + + return $arr; + } + + /** + * Get an array of image insertions + * Skips the names specified in $existing + * @param array $existing + * @return array + */ + private function getImageInsertions( $existing = [] ) { + $arr = []; + $diffs = array_diff_key( $this->mImages, $existing ); + foreach ( $diffs as $iname => $dummy ) { + $arr[] = [ + 'il_from' => $this->mId, + 'il_from_namespace' => $this->mTitle->getNamespace(), + 'il_to' => $iname + ]; + } + + return $arr; + } + + /** + * Get an array of externallinks insertions. Skips the names specified in $existing + * @param array $existing + * @return array + */ + private function getExternalInsertions( $existing = [] ) { + $arr = []; + $diffs = array_diff_key( $this->mExternals, $existing ); + foreach ( $diffs as $url => $dummy ) { + foreach ( wfMakeUrlIndexes( $url ) as $index ) { + $arr[] = [ + 'el_from' => $this->mId, + 'el_to' => $url, + 'el_index' => $index, + ]; + } + } + + return $arr; + } + + /** + * Get an array of category insertions + * + * @param array $existing Mapping existing category names to sort keys. If both + * match a link in $this, the link will be omitted from the output + * + * @return array + */ + private function getCategoryInsertions( $existing = [] ) { + global $wgContLang, $wgCategoryCollation; + $diffs = array_diff_assoc( $this->mCategories, $existing ); + $arr = []; + foreach ( $diffs as $name => $prefix ) { + $nt = Title::makeTitleSafe( NS_CATEGORY, $name ); + $wgContLang->findVariantLink( $name, $nt, true ); + + if ( $this->mTitle->getNamespace() == NS_CATEGORY ) { + $type = 'subcat'; + } elseif ( $this->mTitle->getNamespace() == NS_FILE ) { + $type = 'file'; + } else { + $type = 'page'; + } + + # Treat custom sortkeys as a prefix, so that if multiple + # things are forced to sort as '*' or something, they'll + # sort properly in the category rather than in page_id + # order or such. + $sortkey = Collation::singleton()->getSortKey( + $this->mTitle->getCategorySortkey( $prefix ) ); + + $arr[] = [ + 'cl_from' => $this->mId, + 'cl_to' => $name, + 'cl_sortkey' => $sortkey, + 'cl_timestamp' => $this->getDB()->timestamp(), + 'cl_sortkey_prefix' => $prefix, + 'cl_collation' => $wgCategoryCollation, + 'cl_type' => $type, + ]; + } + + return $arr; + } + + /** + * Get an array of interlanguage link insertions + * + * @param array $existing Mapping existing language codes to titles + * + * @return array + */ + private function getInterlangInsertions( $existing = [] ) { + $diffs = array_diff_assoc( $this->mInterlangs, $existing ); + $arr = []; + foreach ( $diffs as $lang => $title ) { + $arr[] = [ + 'll_from' => $this->mId, + 'll_lang' => $lang, + 'll_title' => $title + ]; + } + + return $arr; + } + + /** + * Get an array of page property insertions + * @param array $existing + * @return array + */ + function getPropertyInsertions( $existing = [] ) { + $diffs = array_diff_assoc( $this->mProperties, $existing ); + + $arr = []; + foreach ( array_keys( $diffs ) as $name ) { + $arr[] = $this->getPagePropRowData( $name ); + } + + return $arr; + } + + /** + * Returns an associative array to be used for inserting a row into + * the page_props table. Besides the given property name, this will + * include the page id from $this->mId and any property value from + * $this->mProperties. + * + * The array returned will include the pp_sortkey field if this + * is present in the database (as indicated by $wgPagePropsHaveSortkey). + * The sortkey value is currently determined by getPropertySortKeyValue(). + * + * @note this assumes that $this->mProperties[$prop] is defined. + * + * @param string $prop The name of the property. + * + * @return array + */ + private function getPagePropRowData( $prop ) { + global $wgPagePropsHaveSortkey; + + $value = $this->mProperties[$prop]; + + $row = [ + 'pp_page' => $this->mId, + 'pp_propname' => $prop, + 'pp_value' => $value, + ]; + + if ( $wgPagePropsHaveSortkey ) { + $row['pp_sortkey'] = $this->getPropertySortKeyValue( $value ); + } + + return $row; + } + + /** + * Determines the sort key for the given property value. + * This will return $value if it is a float or int, + * 1 or resp. 0 if it is a bool, and null otherwise. + * + * @note In the future, we may allow the sortkey to be specified explicitly + * in ParserOutput::setProperty. + * + * @param mixed $value + * + * @return float|null + */ + private function getPropertySortKeyValue( $value ) { + if ( is_int( $value ) || is_float( $value ) || is_bool( $value ) ) { + return floatval( $value ); + } + + return null; + } + + /** + * Get an array of interwiki insertions for passing to the DB + * Skips the titles specified by the 2-D array $existing + * @param array $existing + * @return array + */ + private function getInterwikiInsertions( $existing = [] ) { + $arr = []; + foreach ( $this->mInterwikis as $prefix => $dbkeys ) { + $diffs = isset( $existing[$prefix] ) + ? array_diff_key( $dbkeys, $existing[$prefix] ) + : $dbkeys; + + foreach ( $diffs as $dbk => $id ) { + $arr[] = [ + 'iwl_from' => $this->mId, + 'iwl_prefix' => $prefix, + 'iwl_title' => $dbk + ]; + } + } + + return $arr; + } + + /** + * Given an array of existing links, returns those links which are not in $this + * and thus should be deleted. + * @param array $existing + * @return array + */ + private function getLinkDeletions( $existing ) { + $del = []; + foreach ( $existing as $ns => $dbkeys ) { + if ( isset( $this->mLinks[$ns] ) ) { + $del[$ns] = array_diff_key( $existing[$ns], $this->mLinks[$ns] ); + } else { + $del[$ns] = $existing[$ns]; + } + } + + return $del; + } + + /** + * Given an array of existing templates, returns those templates which are not in $this + * and thus should be deleted. + * @param array $existing + * @return array + */ + private function getTemplateDeletions( $existing ) { + $del = []; + foreach ( $existing as $ns => $dbkeys ) { + if ( isset( $this->mTemplates[$ns] ) ) { + $del[$ns] = array_diff_key( $existing[$ns], $this->mTemplates[$ns] ); + } else { + $del[$ns] = $existing[$ns]; + } + } + + return $del; + } + + /** + * Given an array of existing images, returns those images which are not in $this + * and thus should be deleted. + * @param array $existing + * @return array + */ + private function getImageDeletions( $existing ) { + return array_diff_key( $existing, $this->mImages ); + } + + /** + * Given an array of existing external links, returns those links which are not + * in $this and thus should be deleted. + * @param array $existing + * @return array + */ + private function getExternalDeletions( $existing ) { + return array_diff_key( $existing, $this->mExternals ); + } + + /** + * Given an array of existing categories, returns those categories which are not in $this + * and thus should be deleted. + * @param array $existing + * @return array + */ + private function getCategoryDeletions( $existing ) { + return array_diff_assoc( $existing, $this->mCategories ); + } + + /** + * Given an array of existing interlanguage links, returns those links which are not + * in $this and thus should be deleted. + * @param array $existing + * @return array + */ + private function getInterlangDeletions( $existing ) { + return array_diff_assoc( $existing, $this->mInterlangs ); + } + + /** + * Get array of properties which should be deleted. + * @param array $existing + * @return array + */ + function getPropertyDeletions( $existing ) { + return array_diff_assoc( $existing, $this->mProperties ); + } + + /** + * Given an array of existing interwiki links, returns those links which are not in $this + * and thus should be deleted. + * @param array $existing + * @return array + */ + private function getInterwikiDeletions( $existing ) { + $del = []; + foreach ( $existing as $prefix => $dbkeys ) { + if ( isset( $this->mInterwikis[$prefix] ) ) { + $del[$prefix] = array_diff_key( $existing[$prefix], $this->mInterwikis[$prefix] ); + } else { + $del[$prefix] = $existing[$prefix]; + } + } + + return $del; + } + + /** + * Get an array of existing links, as a 2-D array + * + * @return array + */ + private function getExistingLinks() { + $res = $this->getDB()->select( 'pagelinks', [ 'pl_namespace', 'pl_title' ], + [ 'pl_from' => $this->mId ], __METHOD__ ); + $arr = []; + foreach ( $res as $row ) { + if ( !isset( $arr[$row->pl_namespace] ) ) { + $arr[$row->pl_namespace] = []; + } + $arr[$row->pl_namespace][$row->pl_title] = 1; + } + + return $arr; + } + + /** + * Get an array of existing templates, as a 2-D array + * + * @return array + */ + private function getExistingTemplates() { + $res = $this->getDB()->select( 'templatelinks', [ 'tl_namespace', 'tl_title' ], + [ 'tl_from' => $this->mId ], __METHOD__ ); + $arr = []; + foreach ( $res as $row ) { + if ( !isset( $arr[$row->tl_namespace] ) ) { + $arr[$row->tl_namespace] = []; + } + $arr[$row->tl_namespace][$row->tl_title] = 1; + } + + return $arr; + } + + /** + * Get an array of existing images, image names in the keys + * + * @return array + */ + private function getExistingImages() { + $res = $this->getDB()->select( 'imagelinks', [ 'il_to' ], + [ 'il_from' => $this->mId ], __METHOD__ ); + $arr = []; + foreach ( $res as $row ) { + $arr[$row->il_to] = 1; + } + + return $arr; + } + + /** + * Get an array of existing external links, URLs in the keys + * + * @return array + */ + private function getExistingExternals() { + $res = $this->getDB()->select( 'externallinks', [ 'el_to' ], + [ 'el_from' => $this->mId ], __METHOD__ ); + $arr = []; + foreach ( $res as $row ) { + $arr[$row->el_to] = 1; + } + + return $arr; + } + + /** + * Get an array of existing categories, with the name in the key and sort key in the value. + * + * @return array + */ + private function getExistingCategories() { + $res = $this->getDB()->select( 'categorylinks', [ 'cl_to', 'cl_sortkey_prefix' ], + [ 'cl_from' => $this->mId ], __METHOD__ ); + $arr = []; + foreach ( $res as $row ) { + $arr[$row->cl_to] = $row->cl_sortkey_prefix; + } + + return $arr; + } + + /** + * Get an array of existing interlanguage links, with the language code in the key and the + * title in the value. + * + * @return array + */ + private function getExistingInterlangs() { + $res = $this->getDB()->select( 'langlinks', [ 'll_lang', 'll_title' ], + [ 'll_from' => $this->mId ], __METHOD__ ); + $arr = []; + foreach ( $res as $row ) { + $arr[$row->ll_lang] = $row->ll_title; + } + + return $arr; + } + + /** + * Get an array of existing inline interwiki links, as a 2-D array + * @return array (prefix => array(dbkey => 1)) + */ + private function getExistingInterwikis() { + $res = $this->getDB()->select( 'iwlinks', [ 'iwl_prefix', 'iwl_title' ], + [ 'iwl_from' => $this->mId ], __METHOD__ ); + $arr = []; + foreach ( $res as $row ) { + if ( !isset( $arr[$row->iwl_prefix] ) ) { + $arr[$row->iwl_prefix] = []; + } + $arr[$row->iwl_prefix][$row->iwl_title] = 1; + } + + return $arr; + } + + /** + * Get an array of existing categories, with the name in the key and sort key in the value. + * + * @return array Array of property names and values + */ + private function getExistingProperties() { + $res = $this->getDB()->select( 'page_props', [ 'pp_propname', 'pp_value' ], + [ 'pp_page' => $this->mId ], __METHOD__ ); + $arr = []; + foreach ( $res as $row ) { + $arr[$row->pp_propname] = $row->pp_value; + } + + return $arr; + } + + /** + * Return the title object of the page being updated + * @return Title + */ + public function getTitle() { + return $this->mTitle; + } + + /** + * Returns parser output + * @since 1.19 + * @return ParserOutput + */ + public function getParserOutput() { + return $this->mParserOutput; + } + + /** + * Return the list of images used as generated by the parser + * @return array + */ + public function getImages() { + return $this->mImages; + } + + /** + * Set the revision corresponding to this LinksUpdate + * + * @since 1.27 + * + * @param Revision $revision + */ + public function setRevision( Revision $revision ) { + $this->mRevision = $revision; + } + + /** + * @since 1.28 + * @return null|Revision + */ + public function getRevision() { + return $this->mRevision; + } + + /** + * Set the User who triggered this LinksUpdate + * + * @since 1.27 + * @param User $user + */ + public function setTriggeringUser( User $user ) { + $this->user = $user; + } + + /** + * @since 1.27 + * @return null|User + */ + public function getTriggeringUser() { + return $this->user; + } + + /** + * Invalidate any necessary link lists related to page property changes + * @param array $changed + */ + private function invalidateProperties( $changed ) { + global $wgPagePropLinkInvalidations; + + foreach ( $changed as $name => $value ) { + if ( isset( $wgPagePropLinkInvalidations[$name] ) ) { + $inv = $wgPagePropLinkInvalidations[$name]; + if ( !is_array( $inv ) ) { + $inv = [ $inv ]; + } + foreach ( $inv as $table ) { + DeferredUpdates::addUpdate( + new HTMLCacheUpdate( $this->mTitle, $table, 'page-props' ) + ); + } + } + } + } + + /** + * Fetch page links added by this LinksUpdate. Only available after the update is complete. + * @since 1.22 + * @return null|array Array of Titles + */ + public function getAddedLinks() { + if ( $this->linkInsertions === null ) { + return null; + } + $result = []; + foreach ( $this->linkInsertions as $insertion ) { + $result[] = Title::makeTitle( $insertion['pl_namespace'], $insertion['pl_title'] ); + } + + return $result; + } + + /** + * Fetch page links removed by this LinksUpdate. Only available after the update is complete. + * @since 1.22 + * @return null|array Array of Titles + */ + public function getRemovedLinks() { + if ( $this->linkDeletions === null ) { + return null; + } + $result = []; + foreach ( $this->linkDeletions as $ns => $titles ) { + foreach ( $titles as $title => $unused ) { + $result[] = Title::makeTitle( $ns, $title ); + } + } + + return $result; + } + + /** + * Fetch page properties added by this LinksUpdate. + * Only available after the update is complete. + * @since 1.28 + * @return null|array + */ + public function getAddedProperties() { + return $this->propertyInsertions; + } + + /** + * Fetch page properties removed by this LinksUpdate. + * Only available after the update is complete. + * @since 1.28 + * @return null|array + */ + public function getRemovedProperties() { + return $this->propertyDeletions; + } + + /** + * Update links table freshness + */ + private function updateLinksTimestamp() { + if ( $this->mId ) { + // The link updates made here only reflect the freshness of the parser output + $timestamp = $this->mParserOutput->getCacheTime(); + $this->getDB()->update( 'page', + [ 'page_links_updated' => $this->getDB()->timestamp( $timestamp ) ], + [ 'page_id' => $this->mId ], + __METHOD__ + ); + } + } + + /** + * @return IDatabase + */ + private function getDB() { + if ( !$this->db ) { + $this->db = wfGetDB( DB_MASTER ); + } + + return $this->db; + } + + public function getAsJobSpecification() { + if ( $this->user ) { + $userInfo = [ + 'userId' => $this->user->getId(), + 'userName' => $this->user->getName(), + ]; + } else { + $userInfo = false; + } + + if ( $this->mRevision ) { + $triggeringRevisionId = $this->mRevision->getId(); + } else { + $triggeringRevisionId = false; + } + + return [ + 'wiki' => WikiMap::getWikiIdFromDomain( $this->getDB()->getDomainID() ), + 'job' => new JobSpecification( + 'refreshLinksPrioritized', + [ + // Reuse the parser cache if it was saved + 'rootJobTimestamp' => $this->mParserOutput->getCacheTime(), + 'useRecursiveLinksUpdate' => $this->mRecursive, + 'triggeringUser' => $userInfo, + 'triggeringRevisionId' => $triggeringRevisionId, + 'causeAction' => $this->getCauseAction(), + 'causeAgent' => $this->getCauseAgent() + ], + [ 'removeDuplicates' => true ], + $this->getTitle() + ) + ]; + } +} diff --git a/www/wiki/includes/deferred/MWCallableUpdate.php b/www/wiki/includes/deferred/MWCallableUpdate.php new file mode 100644 index 00000000..9803b7a4 --- /dev/null +++ b/www/wiki/includes/deferred/MWCallableUpdate.php @@ -0,0 +1,47 @@ +<?php + +use Wikimedia\Rdbms\IDatabase; + +/** + * Deferrable Update for closure/callback + */ +class MWCallableUpdate implements DeferrableUpdate, DeferrableCallback { + /** @var callable|null */ + private $callback; + /** @var string */ + private $fname; + + /** + * @param callable $callback + * @param string $fname Calling method + * @param IDatabase|IDatabase[]|null $dbws Abort if any of the specified DB handles have + * a currently pending transaction which later gets rolled back [optional] (since 1.28) + */ + public function __construct( callable $callback, $fname = 'unknown', $dbws = [] ) { + $this->callback = $callback; + $this->fname = $fname; + + $dbws = is_array( $dbws ) ? $dbws : [ $dbws ]; + foreach ( $dbws as $dbw ) { + if ( $dbw && $dbw->trxLevel() ) { + $dbw->onTransactionResolution( [ $this, 'cancelOnRollback' ], $fname ); + } + } + } + + public function doUpdate() { + if ( $this->callback ) { + call_user_func( $this->callback ); + } + } + + public function cancelOnRollback( $trigger ) { + if ( $trigger === IDatabase::TRIGGER_ROLLBACK ) { + $this->callback = null; + } + } + + public function getOrigin() { + return $this->fname; + } +} diff --git a/www/wiki/includes/deferred/MergeableUpdate.php b/www/wiki/includes/deferred/MergeableUpdate.php new file mode 100644 index 00000000..8eeef13b --- /dev/null +++ b/www/wiki/includes/deferred/MergeableUpdate.php @@ -0,0 +1,16 @@ +<?php + +/** + * Interface that deferrable updates can implement. DeferredUpdates uses this to merge + * all pending updates of PHP class into a single update by calling merge(). + * + * @since 1.27 + */ +interface MergeableUpdate extends DeferrableUpdate { + /** + * Merge this update with $update + * + * @param MergeableUpdate $update Update of the same class type + */ + function merge( MergeableUpdate $update ); +} diff --git a/www/wiki/includes/deferred/SearchUpdate.php b/www/wiki/includes/deferred/SearchUpdate.php new file mode 100644 index 00000000..2766bcb1 --- /dev/null +++ b/www/wiki/includes/deferred/SearchUpdate.php @@ -0,0 +1,225 @@ +<?php +/** + * Search index updater + * + * See deferred.txt + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Search + */ + +use MediaWiki\MediaWikiServices; + +/** + * Database independant search index updater + * + * @ingroup Search + */ +class SearchUpdate implements DeferrableUpdate { + /** @var int Page id being updated */ + private $id = 0; + + /** @var Title Title we're updating */ + private $title; + + /** @var Content|bool Content of the page (not text) */ + private $content; + + /** @var WikiPage **/ + private $page; + + /** + * @param int $id Page id to update + * @param Title|string $title Title of page to update + * @param Content|string|bool $c Content of the page to update. Default: false. + * If a Content object, text will be gotten from it. String is for back-compat. + * Passing false tells the backend to just update the title, not the content + */ + public function __construct( $id, $title, $c = false ) { + if ( is_string( $title ) ) { + $nt = Title::newFromText( $title ); + } else { + $nt = $title; + } + + if ( $nt ) { + $this->id = $id; + // is_string() check is back-compat for ApprovedRevs + if ( is_string( $c ) ) { + $this->content = new TextContent( $c ); + } else { + $this->content = $c ?: false; + } + $this->title = $nt; + } else { + wfDebug( "SearchUpdate object created with invalid title '$title'\n" ); + } + } + + /** + * Perform actual update for the entry + */ + public function doUpdate() { + $config = MediaWikiServices::getInstance()->getSearchEngineConfig(); + + if ( $config->getConfig()->get( 'DisableSearchUpdate' ) || !$this->id ) { + return; + } + + $seFactory = MediaWikiServices::getInstance()->getSearchEngineFactory(); + foreach ( $config->getSearchTypes() as $type ) { + $search = $seFactory->create( $type ); + if ( !$search->supports( 'search-update' ) ) { + continue; + } + + $normalTitle = $this->getNormalizedTitle( $search ); + + if ( $this->getLatestPage() === null ) { + $search->delete( $this->id, $normalTitle ); + continue; + } elseif ( $this->content === false ) { + $search->updateTitle( $this->id, $normalTitle ); + continue; + } + + $text = $search->getTextFromContent( $this->title, $this->content ); + if ( !$search->textAlreadyUpdatedForIndex() ) { + $text = $this->updateText( $text, $search ); + } + + # Perform the actual update + $search->update( $this->id, $normalTitle, $search->normalizeText( $text ) ); + } + } + + /** + * Clean text for indexing. Only really suitable for indexing in databases. + * If you're using a real search engine, you'll probably want to override + * this behavior and do something nicer with the original wikitext. + * @param string $text + * @param SearchEngine $se Search engine + * @return string + */ + public function updateText( $text, SearchEngine $se = null ) { + global $wgContLang; + + # Language-specific strip/conversion + $text = $wgContLang->normalizeForSearch( $text ); + $se = $se ?: MediaWikiServices::getInstance()->newSearchEngine(); + $lc = $se->legalSearchChars() . '&#;'; + + $text = preg_replace( "/<\\/?\\s*[A-Za-z][^>]*?>/", + ' ', $wgContLang->lc( " " . $text . " " ) ); # Strip HTML markup + $text = preg_replace( "/(^|\\n)==\\s*([^\\n]+)\\s*==(\\s)/sD", + "\\1\\2 \\2 \\2\\3", $text ); # Emphasize headings + + # Strip external URLs + $uc = "A-Za-z0-9_\\/:.,~%\\-+&;#?!=()@\\x80-\\xFF"; + $protos = "http|https|ftp|mailto|news|gopher"; + $pat = "/(^|[^\\[])({$protos}):[{$uc}]+([^{$uc}]|$)/"; + $text = preg_replace( $pat, "\\1 \\3", $text ); + + $p1 = "/([^\\[])\\[({$protos}):[{$uc}]+]/"; + $p2 = "/([^\\[])\\[({$protos}):[{$uc}]+\\s+([^\\]]+)]/"; + $text = preg_replace( $p1, "\\1 ", $text ); + $text = preg_replace( $p2, "\\1 \\3 ", $text ); + + # Internal image links + $pat2 = "/\\[\\[image:([{$uc}]+)\\.(gif|png|jpg|jpeg)([^{$uc}])/i"; + $text = preg_replace( $pat2, " \\1 \\3", $text ); + + $text = preg_replace( "/([^{$lc}])([{$lc}]+)]]([a-z]+)/", + "\\1\\2 \\2\\3", $text ); # Handle [[game]]s + + # Strip all remaining non-search characters + $text = preg_replace( "/[^{$lc}]+/", " ", $text ); + + /** + * Handle 's, s' + * + * $text = preg_replace( "/([{$lc}]+)'s /", "\\1 \\1's ", $text ); + * $text = preg_replace( "/([{$lc}]+)s' /", "\\1s ", $text ); + * + * These tail-anchored regexps are insanely slow. The worst case comes + * when Japanese or Chinese text (ie, no word spacing) is written on + * a wiki configured for Western UTF-8 mode. The Unicode characters are + * expanded to hex codes and the "words" are very long paragraph-length + * monstrosities. On a large page the above regexps may take over 20 + * seconds *each* on a 1GHz-level processor. + * + * Following are reversed versions which are consistently fast + * (about 3 milliseconds on 1GHz-level processor). + */ + $text = strrev( preg_replace( "/ s'([{$lc}]+)/", " s'\\1 \\1", strrev( $text ) ) ); + $text = strrev( preg_replace( "/ 's([{$lc}]+)/", " s\\1", strrev( $text ) ) ); + + # Strip wiki '' and ''' + $text = preg_replace( "/''[']*/", " ", $text ); + + return $text; + } + + /** + * Get WikiPage for the SearchUpdate $id using WikiPage::READ_LATEST + * and ensure using the same WikiPage object if there are multiple + * SearchEngine types. + * + * Returns null if a page has been deleted or is not found. + * + * @return WikiPage|null + */ + private function getLatestPage() { + if ( !isset( $this->page ) ) { + $this->page = WikiPage::newFromID( $this->id, WikiPage::READ_LATEST ); + } + + return $this->page; + } + + /** + * Get a normalized string representation of a title suitable for + * including in a search index + * + * @param SearchEngine $search + * @return string A stripped-down title string ready for the search index + */ + private function getNormalizedTitle( SearchEngine $search ) { + global $wgContLang; + + $ns = $this->title->getNamespace(); + $title = $this->title->getText(); + + $lc = $search->legalSearchChars() . '&#;'; + $t = $wgContLang->normalizeForSearch( $title ); + $t = preg_replace( "/[^{$lc}]+/", ' ', $t ); + $t = $wgContLang->lc( $t ); + + # Handle 's, s' + $t = preg_replace( "/([{$lc}]+)'s( |$)/", "\\1 \\1's ", $t ); + $t = preg_replace( "/([{$lc}]+)s'( |$)/", "\\1s ", $t ); + + $t = preg_replace( "/\\s+/", ' ', $t ); + + if ( $ns == NS_FILE ) { + $t = preg_replace( "/ (png|gif|jpg|jpeg|ogg)$/", "", $t ); + } + + return $search->normalizeText( trim( $t ) ); + } +} diff --git a/www/wiki/includes/deferred/SiteStatsUpdate.php b/www/wiki/includes/deferred/SiteStatsUpdate.php new file mode 100644 index 00000000..7cb29509 --- /dev/null +++ b/www/wiki/includes/deferred/SiteStatsUpdate.php @@ -0,0 +1,286 @@ +<?php +/** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ +use MediaWiki\MediaWikiServices; +use Wikimedia\Assert\Assert; +use Wikimedia\Rdbms\IDatabase; + +/** + * Class for handling updates to the site_stats table + */ +class SiteStatsUpdate implements DeferrableUpdate, MergeableUpdate { + /** @var BagOStuff */ + protected $stash; + /** @var int */ + protected $edits = 0; + /** @var int */ + protected $pages = 0; + /** @var int */ + protected $articles = 0; + /** @var int */ + protected $users = 0; + /** @var int */ + protected $images = 0; + + private static $counters = [ 'edits', 'pages', 'articles', 'users', 'images' ]; + + // @todo deprecate this constructor + function __construct( $views, $edits, $good, $pages = 0, $users = 0 ) { + $this->edits = $edits; + $this->articles = $good; + $this->pages = $pages; + $this->users = $users; + + $this->stash = MediaWikiServices::getInstance()->getMainObjectStash(); + } + + public function merge( MergeableUpdate $update ) { + /** @var SiteStatsUpdate $update */ + Assert::parameterType( __CLASS__, $update, '$update' ); + + foreach ( self::$counters as $field ) { + $this->$field += $update->$field; + } + } + + /** + * @param array $deltas + * @return SiteStatsUpdate + */ + public static function factory( array $deltas ) { + $update = new self( 0, 0, 0 ); + + foreach ( $deltas as $name => $unused ) { + if ( !in_array( $name, self::$counters ) ) { // T187585 + throw new UnexpectedValueException( __METHOD__ . ": no field called '$name'" ); + } + } + + foreach ( self::$counters as $field ) { + if ( isset( $deltas[$field] ) && $deltas[$field] ) { + $update->$field = $deltas[$field]; + } + } + + return $update; + } + + public function doUpdate() { + $this->doUpdateContextStats(); + + $rate = MediaWikiServices::getInstance()->getMainConfig()->get( 'SiteStatsAsyncFactor' ); + // If set to do so, only do actual DB updates 1 every $rate times. + // The other times, just update "pending delta" values in memcached. + if ( $rate && ( $rate < 0 || mt_rand( 0, $rate - 1 ) != 0 ) ) { + $this->doUpdatePendingDeltas(); + } else { + // Need a separate transaction because this a global lock + DeferredUpdates::addCallableUpdate( [ $this, 'tryDBUpdateInternal' ] ); + } + } + + /** + * Do not call this outside of SiteStatsUpdate + */ + public function tryDBUpdateInternal() { + $services = MediaWikiServices::getInstance(); + $config = $services->getMainConfig(); + + $dbw = $services->getDBLoadBalancer()->getConnection( DB_MASTER ); + $lockKey = $dbw->getDomainID() . ':site_stats'; // prepend wiki ID + $pd = []; + if ( $config->get( 'SiteStatsAsyncFactor' ) ) { + // Lock the table so we don't have double DB/memcached updates + if ( !$dbw->lock( $lockKey, __METHOD__, 0 ) ) { + $this->doUpdatePendingDeltas(); + + return; + } + $pd = $this->getPendingDeltas(); + // Piggy-back the async deltas onto those of this stats update.... + $this->edits += ( $pd['ss_total_edits']['+'] - $pd['ss_total_edits']['-'] ); + $this->articles += ( $pd['ss_good_articles']['+'] - $pd['ss_good_articles']['-'] ); + $this->pages += ( $pd['ss_total_pages']['+'] - $pd['ss_total_pages']['-'] ); + $this->users += ( $pd['ss_users']['+'] - $pd['ss_users']['-'] ); + $this->images += ( $pd['ss_images']['+'] - $pd['ss_images']['-'] ); + } + + // Build up an SQL query of deltas and apply them... + $updates = ''; + $this->appendUpdate( $updates, 'ss_total_edits', $this->edits ); + $this->appendUpdate( $updates, 'ss_good_articles', $this->articles ); + $this->appendUpdate( $updates, 'ss_total_pages', $this->pages ); + $this->appendUpdate( $updates, 'ss_users', $this->users ); + $this->appendUpdate( $updates, 'ss_images', $this->images ); + if ( $updates != '' ) { + $dbw->update( 'site_stats', [ $updates ], [], __METHOD__ ); + } + + if ( $config->get( 'SiteStatsAsyncFactor' ) ) { + // Decrement the async deltas now that we applied them + $this->removePendingDeltas( $pd ); + // Commit the updates and unlock the table + $dbw->unlock( $lockKey, __METHOD__ ); + } + + // Invalid cache used by parser functions + SiteStats::unload(); + } + + /** + * @param IDatabase $dbw + * @return bool|mixed + */ + public static function cacheUpdate( IDatabase $dbw ) { + $services = MediaWikiServices::getInstance(); + $config = $services->getMainConfig(); + + $dbr = $services->getDBLoadBalancer()->getConnection( DB_REPLICA, 'vslow' ); + # Get non-bot users than did some recent action other than making accounts. + # If account creation is included, the number gets inflated ~20+ fold on enwiki. + $rcQuery = RecentChange::getQueryInfo(); + $activeUsers = $dbr->selectField( + $rcQuery['tables'], + 'COUNT( DISTINCT ' . $rcQuery['fields']['rc_user_text'] . ' )', + [ + 'rc_type != ' . $dbr->addQuotes( RC_EXTERNAL ), // Exclude external (Wikidata) + ActorMigration::newMigration()->isNotAnon( $rcQuery['fields']['rc_user'] ), + 'rc_bot' => 0, + 'rc_log_type != ' . $dbr->addQuotes( 'newusers' ) . ' OR rc_log_type IS NULL', + 'rc_timestamp >= ' . $dbr->addQuotes( + $dbr->timestamp( time() - $config->get( 'ActiveUserDays' ) * 24 * 3600 ) ), + ], + __METHOD__, + [], + $rcQuery['joins'] + ); + $dbw->update( + 'site_stats', + [ 'ss_active_users' => intval( $activeUsers ) ], + [ 'ss_row_id' => 1 ], + __METHOD__ + ); + + // Invalid cache used by parser functions + SiteStats::unload(); + + return $activeUsers; + } + + protected function doUpdateContextStats() { + $stats = MediaWikiServices::getInstance()->getStatsdDataFactory(); + foreach ( [ 'edits', 'articles', 'pages', 'users', 'images' ] as $type ) { + $delta = $this->$type; + if ( $delta !== 0 ) { + $stats->updateCount( "site.$type", $delta ); + } + } + } + + protected function doUpdatePendingDeltas() { + $this->adjustPending( 'ss_total_edits', $this->edits ); + $this->adjustPending( 'ss_good_articles', $this->articles ); + $this->adjustPending( 'ss_total_pages', $this->pages ); + $this->adjustPending( 'ss_users', $this->users ); + $this->adjustPending( 'ss_images', $this->images ); + } + + /** + * @param string &$sql + * @param string $field + * @param int $delta + */ + protected function appendUpdate( &$sql, $field, $delta ) { + if ( $delta ) { + if ( $sql ) { + $sql .= ','; + } + if ( $delta < 0 ) { + $sql .= "$field=$field-" . abs( $delta ); + } else { + $sql .= "$field=$field+" . abs( $delta ); + } + } + } + + /** + * @param BagOStuff $stash + * @param string $type + * @param string $sign ('+' or '-') + * @return string + */ + private function getTypeCacheKey( BagOStuff $stash, $type, $sign ) { + return $stash->makeKey( 'sitestatsupdate', 'pendingdelta', $type, $sign ); + } + + /** + * Adjust the pending deltas for a stat type. + * Each stat type has two pending counters, one for increments and decrements + * @param string $type + * @param int $delta Delta (positive or negative) + */ + protected function adjustPending( $type, $delta ) { + if ( $delta < 0 ) { // decrement + $key = $this->getTypeCacheKey( $this->stash, $type, '-' ); + } else { // increment + $key = $this->getTypeCacheKey( $this->stash, $type, '+' ); + } + + $magnitude = abs( $delta ); + $this->stash->incrWithInit( $key, 0, $magnitude, $magnitude ); + } + + /** + * Get pending delta counters for each stat type + * @return array Positive and negative deltas for each type + */ + protected function getPendingDeltas() { + $pending = []; + foreach ( [ 'ss_total_edits', + 'ss_good_articles', 'ss_total_pages', 'ss_users', 'ss_images' ] as $type + ) { + // Get pending increments and pending decrements + $flg = BagOStuff::READ_LATEST; + $pending[$type]['+'] = (int)$this->stash->get( + $this->getTypeCacheKey( $this->stash, $type, '+' ), + $flg + ); + $pending[$type]['-'] = (int)$this->stash->get( + $this->getTypeCacheKey( $this->stash, $type, '-' ), + $flg + ); + } + + return $pending; + } + + /** + * Reduce pending delta counters after updates have been applied + * @param array $pd Result of getPendingDeltas(), used for DB update + */ + protected function removePendingDeltas( array $pd ) { + foreach ( $pd as $type => $deltas ) { + foreach ( $deltas as $sign => $magnitude ) { + // Lower the pending counter now that we applied these changes + $key = $this->getTypeCacheKey( $this->stash, $type, $sign ); + $this->stash->decr( $key, $magnitude ); + } + } + } +} diff --git a/www/wiki/includes/deferred/SqlDataUpdate.php b/www/wiki/includes/deferred/SqlDataUpdate.php new file mode 100644 index 00000000..2411beff --- /dev/null +++ b/www/wiki/includes/deferred/SqlDataUpdate.php @@ -0,0 +1,40 @@ +<?php +/** + * Base code for update jobs that put some secondary data extracted + * from article content into the database. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +use Wikimedia\Rdbms\IDatabase; + +/** + * @deprecated Since 1.28 Use DataUpdate directly, injecting the database + */ +abstract class SqlDataUpdate extends DataUpdate { + /** @var IDatabase Database connection reference */ + protected $mDb; + /** @var array SELECT options to be used (array) */ + protected $mOptions = []; + + public function __construct() { + parent::__construct(); + + $this->mDb = wfGetLB()->getLazyConnectionRef( DB_MASTER ); + } +} diff --git a/www/wiki/includes/deferred/TransactionRoundDefiningUpdate.php b/www/wiki/includes/deferred/TransactionRoundDefiningUpdate.php new file mode 100644 index 00000000..a32d4a07 --- /dev/null +++ b/www/wiki/includes/deferred/TransactionRoundDefiningUpdate.php @@ -0,0 +1,30 @@ +<?php + +/** + * Deferrable update that must run outside of any explicit LBFactory transaction round + * + * @since 1.31 + */ +class TransactionRoundDefiningUpdate implements DeferrableUpdate, DeferrableCallback { + /** @var callable|null */ + private $callback; + /** @var string */ + private $fname; + + /** + * @param callable $callback + * @param string $fname Calling method + */ + public function __construct( callable $callback, $fname = 'unknown' ) { + $this->callback = $callback; + $this->fname = $fname; + } + + public function doUpdate() { + call_user_func( $this->callback ); + } + + public function getOrigin() { + return $this->fname; + } +} diff --git a/www/wiki/includes/deferred/WANCacheReapUpdate.php b/www/wiki/includes/deferred/WANCacheReapUpdate.php new file mode 100644 index 00000000..5ffc9388 --- /dev/null +++ b/www/wiki/includes/deferred/WANCacheReapUpdate.php @@ -0,0 +1,133 @@ +<?php + +use Psr\Log\LoggerInterface; +use Wikimedia\Rdbms\IDatabase; + +/** + * Class for fixing stale WANObjectCache keys using a purge event source + * + * This is useful for expiring keys that missed fire-and-forget purges. This uses the + * recentchanges table as a reliable stream to make certain keys reach consistency + * as soon as the underlying replica database catches up. These means that critical + * keys will not escape getting purged simply due to brief hiccups in the network, + * which are more prone to happen accross datacenters. + * + * ---- + * "I was trying to cheat death. I was only trying to surmount for a little while the + * darkness that all my life I surely knew was going to come rolling in on me some day + * and obliterate me. I was only to stay alive a little brief while longer, after I was + * already gone. To stay in the light, to be with the living, a little while past my time." + * -- Notes for "Blues of a Lifetime", by [[Cornell Woolrich]] + * + * @since 1.28 + */ +class WANCacheReapUpdate implements DeferrableUpdate { + /** @var IDatabase */ + private $db; + /** @var LoggerInterface */ + private $logger; + + /** + * @param IDatabase $db + * @param LoggerInterface $logger + */ + public function __construct( IDatabase $db, LoggerInterface $logger ) { + $this->db = $db; + $this->logger = $logger; + } + + function doUpdate() { + $reaper = new WANObjectCacheReaper( + ObjectCache::getMainWANInstance(), + ObjectCache::getLocalClusterInstance(), + [ $this, 'getTitleChangeEvents' ], + [ $this, 'getEventAffectedKeys' ], + [ + 'channel' => 'table:recentchanges:' . $this->db->getDomainID(), + 'logger' => $this->logger + ] + ); + + $reaper->invoke( 100 ); + } + + /** + * @see WANObjectCacheRepear + * + * @param int $start + * @param int $id + * @param int $end + * @param int $limit + * @return TitleValue[] + */ + public function getTitleChangeEvents( $start, $id, $end, $limit ) { + $db = $this->db; + $encStart = $db->addQuotes( $db->timestamp( $start ) ); + $encEnd = $db->addQuotes( $db->timestamp( $end ) ); + $id = (int)$id; // cast NULL => 0 since rc_id is an integer + + $res = $db->select( + 'recentchanges', + [ 'rc_namespace', 'rc_title', 'rc_timestamp', 'rc_id' ], + [ + $db->makeList( [ + "rc_timestamp > $encStart", + "rc_timestamp = $encStart AND rc_id > " . $db->addQuotes( $id ) + ], LIST_OR ), + "rc_timestamp < $encEnd" + ], + __METHOD__, + [ 'ORDER BY' => 'rc_timestamp ASC, rc_id ASC', 'LIMIT' => $limit ] + ); + + $events = []; + foreach ( $res as $row ) { + $events[] = [ + 'id' => (int)$row->rc_id, + 'pos' => (int)wfTimestamp( TS_UNIX, $row->rc_timestamp ), + 'item' => new TitleValue( (int)$row->rc_namespace, $row->rc_title ) + ]; + } + + return $events; + } + + /** + * Gets a list of important cache keys associated with a title + * + * @see WANObjectCacheRepear + * @param WANObjectCache $cache + * @param TitleValue $t + * @return string[] + */ + public function getEventAffectedKeys( WANObjectCache $cache, TitleValue $t ) { + /** @var WikiPage[]|LocalFile[]|User[] $entities */ + $entities = []; + + // You can't create a WikiPage for special pages (-1) or other virtual + // namespaces, but special pages do appear in RC sometimes, e.g. for logs + // of AbuseFilter filter changes. + if ( $t->getNamespace() >= 0 ) { + $entities[] = WikiPage::factory( Title::newFromTitleValue( $t ) ); + } + + if ( $t->inNamespace( NS_FILE ) ) { + $entities[] = wfLocalFile( $t->getText() ); + } + if ( $t->inNamespace( NS_USER ) ) { + $entities[] = User::newFromName( $t->getText(), false ); + } + + $keys = []; + foreach ( $entities as $entity ) { + if ( $entity ) { + $keys = array_merge( $keys, $entity->getMutableCacheKeys( $cache ) ); + } + } + if ( $keys ) { + $this->logger->debug( __CLASS__ . ': got key(s) ' . implode( ', ', $keys ) ); + } + + return $keys; + } +} |