summaryrefslogtreecommitdiff
path: root/www/wiki/includes/deferred
diff options
context:
space:
mode:
authorYaco <franco@reevo.org>2020-06-04 11:01:00 -0300
committerYaco <franco@reevo.org>2020-06-04 11:01:00 -0300
commitfc7369835258467bf97eb64f184b93691f9a9fd5 (patch)
treedaabd60089d2dd76d9f5fb416b005fbe159c799d /www/wiki/includes/deferred
first commit
Diffstat (limited to 'www/wiki/includes/deferred')
-rw-r--r--www/wiki/includes/deferred/AtomicSectionUpdate.php48
-rw-r--r--www/wiki/includes/deferred/AutoCommitUpdate.php62
-rw-r--r--www/wiki/includes/deferred/CdnCacheUpdate.php295
-rw-r--r--www/wiki/includes/deferred/DataUpdate.php83
-rw-r--r--www/wiki/includes/deferred/DeferrableCallback.php13
-rw-r--r--www/wiki/includes/deferred/DeferrableUpdate.php14
-rw-r--r--www/wiki/includes/deferred/DeferredUpdates.php379
-rw-r--r--www/wiki/includes/deferred/EnqueueableDataUpdate.php15
-rw-r--r--www/wiki/includes/deferred/HTMLCacheUpdate.php60
-rw-r--r--www/wiki/includes/deferred/LinksDeletionUpdate.php242
-rw-r--r--www/wiki/includes/deferred/LinksUpdate.php1182
-rw-r--r--www/wiki/includes/deferred/MWCallableUpdate.php47
-rw-r--r--www/wiki/includes/deferred/MergeableUpdate.php16
-rw-r--r--www/wiki/includes/deferred/SearchUpdate.php225
-rw-r--r--www/wiki/includes/deferred/SiteStatsUpdate.php286
-rw-r--r--www/wiki/includes/deferred/SqlDataUpdate.php40
-rw-r--r--www/wiki/includes/deferred/TransactionRoundDefiningUpdate.php30
-rw-r--r--www/wiki/includes/deferred/WANCacheReapUpdate.php133
18 files changed, 3170 insertions, 0 deletions
diff --git a/www/wiki/includes/deferred/AtomicSectionUpdate.php b/www/wiki/includes/deferred/AtomicSectionUpdate.php
new file mode 100644
index 00000000..8b62989b
--- /dev/null
+++ b/www/wiki/includes/deferred/AtomicSectionUpdate.php
@@ -0,0 +1,48 @@
+<?php
+
+use Wikimedia\Rdbms\IDatabase;
+
+/**
+ * Deferrable Update for closure/callback updates via IDatabase::doAtomicSection()
+ * @since 1.27
+ */
+class AtomicSectionUpdate implements DeferrableUpdate, DeferrableCallback {
+ /** @var IDatabase */
+ private $dbw;
+ /** @var string */
+ private $fname;
+ /** @var callable|null */
+ private $callback;
+
+ /**
+ * @param IDatabase $dbw
+ * @param string $fname Caller name (usually __METHOD__)
+ * @param callable $callback
+ * @see IDatabase::doAtomicSection()
+ */
+ public function __construct( IDatabase $dbw, $fname, callable $callback ) {
+ $this->dbw = $dbw;
+ $this->fname = $fname;
+ $this->callback = $callback;
+
+ if ( $this->dbw->trxLevel() ) {
+ $this->dbw->onTransactionResolution( [ $this, 'cancelOnRollback' ], $fname );
+ }
+ }
+
+ public function doUpdate() {
+ if ( $this->callback ) {
+ $this->dbw->doAtomicSection( $this->fname, $this->callback );
+ }
+ }
+
+ public function cancelOnRollback( $trigger ) {
+ if ( $trigger === IDatabase::TRIGGER_ROLLBACK ) {
+ $this->callback = null;
+ }
+ }
+
+ public function getOrigin() {
+ return $this->fname;
+ }
+}
diff --git a/www/wiki/includes/deferred/AutoCommitUpdate.php b/www/wiki/includes/deferred/AutoCommitUpdate.php
new file mode 100644
index 00000000..f9297af5
--- /dev/null
+++ b/www/wiki/includes/deferred/AutoCommitUpdate.php
@@ -0,0 +1,62 @@
+<?php
+
+use Wikimedia\Rdbms\IDatabase;
+
+/**
+ * Deferrable Update for closure/callback updates that should use auto-commit mode
+ * @since 1.28
+ */
+class AutoCommitUpdate implements DeferrableUpdate, DeferrableCallback {
+ /** @var IDatabase */
+ private $dbw;
+ /** @var string */
+ private $fname;
+ /** @var callable|null */
+ private $callback;
+
+ /**
+ * @param IDatabase $dbw
+ * @param string $fname Caller name (usually __METHOD__)
+ * @param callable $callback Callback that takes (IDatabase, method name string)
+ */
+ public function __construct( IDatabase $dbw, $fname, callable $callback ) {
+ $this->dbw = $dbw;
+ $this->fname = $fname;
+ $this->callback = $callback;
+
+ if ( $this->dbw->trxLevel() ) {
+ $this->dbw->onTransactionResolution( [ $this, 'cancelOnRollback' ], $fname );
+ }
+ }
+
+ public function doUpdate() {
+ if ( !$this->callback ) {
+ return;
+ }
+
+ $autoTrx = $this->dbw->getFlag( DBO_TRX );
+ $this->dbw->clearFlag( DBO_TRX );
+ try {
+ /** @var Exception $e */
+ $e = null;
+ call_user_func_array( $this->callback, [ $this->dbw, $this->fname ] );
+ } catch ( Exception $e ) {
+ }
+ if ( $autoTrx ) {
+ $this->dbw->setFlag( DBO_TRX );
+ }
+ if ( $e ) {
+ throw $e;
+ }
+ }
+
+ public function cancelOnRollback( $trigger ) {
+ if ( $trigger === IDatabase::TRIGGER_ROLLBACK ) {
+ $this->callback = null;
+ }
+ }
+
+ public function getOrigin() {
+ return $this->fname;
+ }
+}
diff --git a/www/wiki/includes/deferred/CdnCacheUpdate.php b/www/wiki/includes/deferred/CdnCacheUpdate.php
new file mode 100644
index 00000000..301c4f3b
--- /dev/null
+++ b/www/wiki/includes/deferred/CdnCacheUpdate.php
@@ -0,0 +1,295 @@
+<?php
+/**
+ * CDN cache purging.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+use Wikimedia\Assert\Assert;
+use MediaWiki\MediaWikiServices;
+
+/**
+ * Handles purging appropriate CDN URLs given a title (or titles)
+ * @ingroup Cache
+ */
+class CdnCacheUpdate implements DeferrableUpdate, MergeableUpdate {
+ /** @var string[] Collection of URLs to purge */
+ protected $urls = [];
+
+ /**
+ * @param string[] $urlArr Collection of URLs to purge
+ */
+ public function __construct( array $urlArr ) {
+ $this->urls = $urlArr;
+ }
+
+ public function merge( MergeableUpdate $update ) {
+ /** @var CdnCacheUpdate $update */
+ Assert::parameterType( __CLASS__, $update, '$update' );
+
+ $this->urls = array_merge( $this->urls, $update->urls );
+ }
+
+ /**
+ * Create an update object from an array of Title objects, or a TitleArray object
+ *
+ * @param Traversable|Title[] $titles
+ * @param string[] $urlArr
+ * @return CdnCacheUpdate
+ */
+ public static function newFromTitles( $titles, $urlArr = [] ) {
+ ( new LinkBatch( $titles ) )->execute();
+ /** @var Title $title */
+ foreach ( $titles as $title ) {
+ $urlArr = array_merge( $urlArr, $title->getCdnUrls() );
+ }
+
+ return new CdnCacheUpdate( $urlArr );
+ }
+
+ /**
+ * @param Title $title
+ * @return CdnCacheUpdate
+ * @deprecated since 1.27
+ */
+ public static function newSimplePurge( Title $title ) {
+ return new CdnCacheUpdate( $title->getCdnUrls() );
+ }
+
+ /**
+ * Purges the list of URLs passed to the constructor.
+ */
+ public function doUpdate() {
+ global $wgCdnReboundPurgeDelay;
+
+ self::purge( $this->urls );
+
+ if ( $wgCdnReboundPurgeDelay > 0 ) {
+ JobQueueGroup::singleton()->lazyPush( new CdnPurgeJob(
+ Title::makeTitle( NS_SPECIAL, 'Badtitle/' . __CLASS__ ),
+ [
+ 'urls' => $this->urls,
+ 'jobReleaseTimestamp' => time() + $wgCdnReboundPurgeDelay
+ ]
+ ) );
+ }
+ }
+
+ /**
+ * Purges a list of CDN nodes defined in $wgSquidServers.
+ * $urlArr should contain the full URLs to purge as values
+ * (example: $urlArr[] = 'http://my.host/something')
+ *
+ * @param string[] $urlArr List of full URLs to purge
+ */
+ public static function purge( array $urlArr ) {
+ global $wgSquidServers, $wgHTCPRouting;
+
+ if ( !$urlArr ) {
+ return;
+ }
+
+ // Remove duplicate URLs from list
+ $urlArr = array_unique( $urlArr );
+
+ wfDebugLog( 'squid', __METHOD__ . ': ' . implode( ' ', $urlArr ) );
+
+ // Reliably broadcast the purge to all edge nodes
+ $relayer = MediaWikiServices::getInstance()->getEventRelayerGroup()
+ ->getRelayer( 'cdn-url-purges' );
+ $ts = microtime( true );
+ $relayer->notifyMulti(
+ 'cdn-url-purges',
+ array_map(
+ function ( $url ) use ( $ts ) {
+ return [
+ 'url' => $url,
+ 'timestamp' => $ts,
+ ];
+ },
+ $urlArr
+ )
+ );
+
+ // Send lossy UDP broadcasting if enabled
+ if ( $wgHTCPRouting ) {
+ self::HTCPPurge( $urlArr );
+ }
+
+ // Do direct server purges if enabled (this does not scale very well)
+ if ( $wgSquidServers ) {
+ // Maximum number of parallel connections per squid
+ $maxSocketsPerSquid = 8;
+ // Number of requests to send per socket
+ // 400 seems to be a good tradeoff, opening a socket takes a while
+ $urlsPerSocket = 400;
+ $socketsPerSquid = ceil( count( $urlArr ) / $urlsPerSocket );
+ if ( $socketsPerSquid > $maxSocketsPerSquid ) {
+ $socketsPerSquid = $maxSocketsPerSquid;
+ }
+
+ $pool = new SquidPurgeClientPool;
+ $chunks = array_chunk( $urlArr, ceil( count( $urlArr ) / $socketsPerSquid ) );
+ foreach ( $wgSquidServers as $server ) {
+ foreach ( $chunks as $chunk ) {
+ $client = new SquidPurgeClient( $server );
+ foreach ( $chunk as $url ) {
+ $client->queuePurge( $url );
+ }
+ $pool->addClient( $client );
+ }
+ }
+
+ $pool->run();
+ }
+ }
+
+ /**
+ * Send Hyper Text Caching Protocol (HTCP) CLR requests.
+ *
+ * @throws MWException
+ * @param string[] $urlArr Collection of URLs to purge
+ */
+ private static function HTCPPurge( array $urlArr ) {
+ global $wgHTCPRouting, $wgHTCPMulticastTTL;
+
+ // HTCP CLR operation
+ $htcpOpCLR = 4;
+
+ // @todo FIXME: PHP doesn't support these socket constants (include/linux/in.h)
+ if ( !defined( "IPPROTO_IP" ) ) {
+ define( "IPPROTO_IP", 0 );
+ define( "IP_MULTICAST_LOOP", 34 );
+ define( "IP_MULTICAST_TTL", 33 );
+ }
+
+ // pfsockopen doesn't work because we need set_sock_opt
+ $conn = socket_create( AF_INET, SOCK_DGRAM, SOL_UDP );
+ if ( !$conn ) {
+ $errstr = socket_strerror( socket_last_error() );
+ wfDebugLog( 'squid', __METHOD__ .
+ ": Error opening UDP socket: $errstr" );
+
+ return;
+ }
+
+ // Set socket options
+ socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_LOOP, 0 );
+ if ( $wgHTCPMulticastTTL != 1 ) {
+ // Set multicast time to live (hop count) option on socket
+ socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_TTL,
+ $wgHTCPMulticastTTL );
+ }
+
+ // Get sequential trx IDs for packet loss counting
+ $ids = UIDGenerator::newSequentialPerNodeIDs(
+ 'squidhtcppurge', 32, count( $urlArr ), UIDGenerator::QUICK_VOLATILE
+ );
+
+ foreach ( $urlArr as $url ) {
+ if ( !is_string( $url ) ) {
+ throw new MWException( 'Bad purge URL' );
+ }
+ $url = self::expand( $url );
+ $conf = self::getRuleForURL( $url, $wgHTCPRouting );
+ if ( !$conf ) {
+ wfDebugLog( 'squid', __METHOD__ .
+ "No HTCP rule configured for URL {$url} , skipping" );
+ continue;
+ }
+
+ if ( isset( $conf['host'] ) && isset( $conf['port'] ) ) {
+ // Normalize single entries
+ $conf = [ $conf ];
+ }
+ foreach ( $conf as $subconf ) {
+ if ( !isset( $subconf['host'] ) || !isset( $subconf['port'] ) ) {
+ throw new MWException( "Invalid HTCP rule for URL $url\n" );
+ }
+ }
+
+ // Construct a minimal HTCP request diagram
+ // as per RFC 2756
+ // Opcode 'CLR', no response desired, no auth
+ $htcpTransID = current( $ids );
+ next( $ids );
+
+ $htcpSpecifier = pack( 'na4na*na8n',
+ 4, 'HEAD', strlen( $url ), $url,
+ 8, 'HTTP/1.0', 0 );
+
+ $htcpDataLen = 8 + 2 + strlen( $htcpSpecifier );
+ $htcpLen = 4 + $htcpDataLen + 2;
+
+ // Note! Squid gets the bit order of the first
+ // word wrong, wrt the RFC. Apparently no other
+ // implementation exists, so adapt to Squid
+ $htcpPacket = pack( 'nxxnCxNxxa*n',
+ $htcpLen, $htcpDataLen, $htcpOpCLR,
+ $htcpTransID, $htcpSpecifier, 2 );
+
+ wfDebugLog( 'squid', __METHOD__ .
+ "Purging URL $url via HTCP" );
+ foreach ( $conf as $subconf ) {
+ socket_sendto( $conn, $htcpPacket, $htcpLen, 0,
+ $subconf['host'], $subconf['port'] );
+ }
+ }
+ }
+
+ /**
+ * Expand local URLs to fully-qualified URLs using the internal protocol
+ * and host defined in $wgInternalServer. Input that's already fully-
+ * qualified will be passed through unchanged.
+ *
+ * This is used to generate purge URLs that may be either local to the
+ * main wiki or include a non-native host, such as images hosted on a
+ * second internal server.
+ *
+ * Client functions should not need to call this.
+ *
+ * @param string $url
+ * @return string
+ */
+ public static function expand( $url ) {
+ return wfExpandUrl( $url, PROTO_INTERNAL );
+ }
+
+ /**
+ * Find the HTCP routing rule to use for a given URL.
+ * @param string $url URL to match
+ * @param array $rules Array of rules, see $wgHTCPRouting for format and behavior
+ * @return mixed Element of $rules that matched, or false if nothing matched
+ */
+ private static function getRuleForURL( $url, $rules ) {
+ foreach ( $rules as $regex => $routing ) {
+ if ( $regex === '' || preg_match( $regex, $url ) ) {
+ return $routing;
+ }
+ }
+
+ return false;
+ }
+}
+
+/**
+ * @deprecated since 1.27
+ */
+class SquidUpdate extends CdnCacheUpdate {
+ // Keep class name for b/c
+}
diff --git a/www/wiki/includes/deferred/DataUpdate.php b/www/wiki/includes/deferred/DataUpdate.php
new file mode 100644
index 00000000..ed9a7462
--- /dev/null
+++ b/www/wiki/includes/deferred/DataUpdate.php
@@ -0,0 +1,83 @@
+<?php
+/**
+ * Base code for update jobs that do something with some secondary
+ * data extracted from article.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * Abstract base class for update jobs that do something with some secondary
+ * data extracted from article.
+ */
+abstract class DataUpdate implements DeferrableUpdate {
+ /** @var mixed Result from LBFactory::getEmptyTransactionTicket() */
+ protected $ticket;
+ /** @var string Short update cause action description */
+ protected $causeAction = 'unknown';
+ /** @var string Short update cause user description */
+ protected $causeAgent = 'unknown';
+
+ public function __construct() {
+ // noop
+ }
+
+ /**
+ * @param mixed $ticket Result of getEmptyTransactionTicket()
+ * @since 1.28
+ */
+ public function setTransactionTicket( $ticket ) {
+ $this->ticket = $ticket;
+ }
+
+ /**
+ * @param string $action Action type
+ * @param string $user User name
+ */
+ public function setCause( $action, $user ) {
+ $this->causeAction = $action;
+ $this->causeAgent = $user;
+ }
+
+ /**
+ * @return string
+ */
+ public function getCauseAction() {
+ return $this->causeAction;
+ }
+
+ /**
+ * @return string
+ */
+ public function getCauseAgent() {
+ return $this->causeAgent;
+ }
+
+ /**
+ * Convenience method, calls doUpdate() on every DataUpdate in the array.
+ *
+ * @param DataUpdate[] $updates A list of DataUpdate instances
+ * @throws Exception
+ * @deprecated Since 1.28 Use DeferredUpdates::execute()
+ */
+ public static function runUpdates( array $updates ) {
+ foreach ( $updates as $update ) {
+ $update->doUpdate();
+ }
+ }
+}
diff --git a/www/wiki/includes/deferred/DeferrableCallback.php b/www/wiki/includes/deferred/DeferrableCallback.php
new file mode 100644
index 00000000..2eb0d5df
--- /dev/null
+++ b/www/wiki/includes/deferred/DeferrableCallback.php
@@ -0,0 +1,13 @@
+<?php
+
+/**
+ * Callback wrapper that has an originating method
+ *
+ * @since 1.28
+ */
+interface DeferrableCallback {
+ /**
+ * @return string Originating method name
+ */
+ function getOrigin();
+}
diff --git a/www/wiki/includes/deferred/DeferrableUpdate.php b/www/wiki/includes/deferred/DeferrableUpdate.php
new file mode 100644
index 00000000..5f4d8210
--- /dev/null
+++ b/www/wiki/includes/deferred/DeferrableUpdate.php
@@ -0,0 +1,14 @@
+<?php
+
+/**
+ * Interface that deferrable updates should implement. Basically required so we
+ * can validate input on DeferredUpdates::addUpdate()
+ *
+ * @since 1.19
+ */
+interface DeferrableUpdate {
+ /**
+ * Perform the actual work
+ */
+ function doUpdate();
+}
diff --git a/www/wiki/includes/deferred/DeferredUpdates.php b/www/wiki/includes/deferred/DeferredUpdates.php
new file mode 100644
index 00000000..9b25d538
--- /dev/null
+++ b/www/wiki/includes/deferred/DeferredUpdates.php
@@ -0,0 +1,379 @@
+<?php
+/**
+ * Interface and manager for deferred updates.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+use Wikimedia\Rdbms\IDatabase;
+use MediaWiki\MediaWikiServices;
+use Wikimedia\Rdbms\LBFactory;
+use Wikimedia\Rdbms\LoadBalancer;
+
+/**
+ * Class for managing the deferred updates
+ *
+ * In web request mode, deferred updates can be run at the end of the request, either before or
+ * after the HTTP response has been sent. In either case, they run after the DB commit step. If
+ * an update runs after the response is sent, it will not block clients. If sent before, it will
+ * run synchronously. These two modes are defined via PRESEND and POSTSEND constants, the latter
+ * being the default for addUpdate() and addCallableUpdate().
+ *
+ * Updates that work through this system will be more likely to complete by the time the client
+ * makes their next request after this one than with the JobQueue system.
+ *
+ * In CLI mode, updates run immediately if no DB writes are pending. Otherwise, they run when:
+ * - a) Any waitForReplication() call if no writes are pending on any DB
+ * - b) A commit happens on Maintenance::getDB( DB_MASTER ) if no writes are pending on any DB
+ * - c) EnqueueableDataUpdate tasks may enqueue on commit of Maintenance::getDB( DB_MASTER )
+ * - d) At the completion of Maintenance::execute()
+ *
+ * When updates are deferred, they go into one two FIFO "top-queues" (one for pre-send and one
+ * for post-send). Updates enqueued *during* doUpdate() of a "top" update go into the "sub-queue"
+ * for that update. After that method finishes, the sub-queue is run until drained. This continues
+ * for each top-queue job until the entire top queue is drained. This happens for the pre-send
+ * top-queue, and later on, the post-send top-queue, in execute().
+ *
+ * @since 1.19
+ */
+class DeferredUpdates {
+ /** @var DeferrableUpdate[] Updates to be deferred until before request end */
+ private static $preSendUpdates = [];
+ /** @var DeferrableUpdate[] Updates to be deferred until after request end */
+ private static $postSendUpdates = [];
+
+ const ALL = 0; // all updates; in web requests, use only after flushing the output buffer
+ const PRESEND = 1; // for updates that should run before flushing output buffer
+ const POSTSEND = 2; // for updates that should run after flushing output buffer
+
+ const BIG_QUEUE_SIZE = 100;
+
+ /** @var array|null Information about the current execute() call or null if not running */
+ private static $executeContext;
+
+ /**
+ * Add an update to the deferred list to be run later by execute()
+ *
+ * In CLI mode, callback magic will also be used to run updates when safe
+ *
+ * @param DeferrableUpdate $update Some object that implements doUpdate()
+ * @param int $stage DeferredUpdates constant (PRESEND or POSTSEND) (since 1.27)
+ */
+ public static function addUpdate( DeferrableUpdate $update, $stage = self::POSTSEND ) {
+ global $wgCommandLineMode;
+
+ if ( self::$executeContext && self::$executeContext['stage'] >= $stage ) {
+ // This is a sub-DeferredUpdate; run it right after its parent update.
+ // Also, while post-send updates are running, push any "pre-send" jobs to the
+ // active post-send queue to make sure they get run this round (or at all).
+ self::$executeContext['subqueue'][] = $update;
+
+ return;
+ }
+
+ if ( $stage === self::PRESEND ) {
+ self::push( self::$preSendUpdates, $update );
+ } else {
+ self::push( self::$postSendUpdates, $update );
+ }
+
+ // Try to run the updates now if in CLI mode and no transaction is active.
+ // This covers scripts that don't/barely use the DB but make updates to other stores.
+ if ( $wgCommandLineMode ) {
+ self::tryOpportunisticExecute( 'run' );
+ }
+ }
+
+ /**
+ * Add a callable update. In a lot of cases, we just need a callback/closure,
+ * defining a new DeferrableUpdate object is not necessary
+ *
+ * @see MWCallableUpdate::__construct()
+ *
+ * @param callable $callable
+ * @param int $stage DeferredUpdates constant (PRESEND or POSTSEND) (since 1.27)
+ * @param IDatabase|IDatabase[]|null $dbw Abort if this DB is rolled back [optional] (since 1.28)
+ */
+ public static function addCallableUpdate(
+ $callable, $stage = self::POSTSEND, $dbw = null
+ ) {
+ self::addUpdate( new MWCallableUpdate( $callable, wfGetCaller(), $dbw ), $stage );
+ }
+
+ /**
+ * Do any deferred updates and clear the list
+ *
+ * @param string $mode Use "enqueue" to use the job queue when possible [Default: "run"]
+ * @param int $stage DeferredUpdates constant (PRESEND, POSTSEND, or ALL) (since 1.27)
+ */
+ public static function doUpdates( $mode = 'run', $stage = self::ALL ) {
+ $stageEffective = ( $stage === self::ALL ) ? self::POSTSEND : $stage;
+
+ if ( $stage === self::ALL || $stage === self::PRESEND ) {
+ self::execute( self::$preSendUpdates, $mode, $stageEffective );
+ }
+
+ if ( $stage === self::ALL || $stage == self::POSTSEND ) {
+ self::execute( self::$postSendUpdates, $mode, $stageEffective );
+ }
+ }
+
+ /**
+ * @param bool $value Whether to just immediately run updates in addUpdate()
+ * @since 1.28
+ * @deprecated 1.29 Causes issues in Web-executed jobs - see T165714 and T100085.
+ */
+ public static function setImmediateMode( $value ) {
+ wfDeprecated( __METHOD__, '1.29' );
+ }
+
+ /**
+ * @param DeferrableUpdate[] $queue
+ * @param DeferrableUpdate $update
+ */
+ private static function push( array &$queue, DeferrableUpdate $update ) {
+ if ( $update instanceof MergeableUpdate ) {
+ $class = get_class( $update ); // fully-qualified class
+ if ( isset( $queue[$class] ) ) {
+ /** @var MergeableUpdate $existingUpdate */
+ $existingUpdate = $queue[$class];
+ $existingUpdate->merge( $update );
+ } else {
+ $queue[$class] = $update;
+ }
+ } else {
+ $queue[] = $update;
+ }
+ }
+
+ /**
+ * Immediately run/queue a list of updates
+ *
+ * @param DeferrableUpdate[] &$queue List of DeferrableUpdate objects
+ * @param string $mode Use "enqueue" to use the job queue when possible
+ * @param int $stage Class constant (PRESEND, POSTSEND) (since 1.28)
+ * @throws ErrorPageError Happens on top-level calls
+ * @throws Exception Happens on second-level calls
+ */
+ protected static function execute( array &$queue, $mode, $stage ) {
+ $services = MediaWikiServices::getInstance();
+ $stats = $services->getStatsdDataFactory();
+ $lbFactory = $services->getDBLoadBalancerFactory();
+ $method = RequestContext::getMain()->getRequest()->getMethod();
+
+ $ticket = $lbFactory->getEmptyTransactionTicket( __METHOD__ );
+
+ /** @var ErrorPageError $reportableError */
+ $reportableError = null;
+ /** @var DeferrableUpdate[] $updates Snapshot of queue */
+ $updates = $queue;
+
+ // Keep doing rounds of updates until none get enqueued...
+ while ( $updates ) {
+ $queue = []; // clear the queue
+
+ // Order will be DataUpdate followed by generic DeferrableUpdate tasks
+ $updatesByType = [ 'data' => [], 'generic' => [] ];
+ foreach ( $updates as $du ) {
+ if ( $du instanceof DataUpdate ) {
+ $du->setTransactionTicket( $ticket );
+ $updatesByType['data'][] = $du;
+ } else {
+ $updatesByType['generic'][] = $du;
+ }
+
+ $name = ( $du instanceof DeferrableCallback )
+ ? get_class( $du ) . '-' . $du->getOrigin()
+ : get_class( $du );
+ $stats->increment( 'deferred_updates.' . $method . '.' . $name );
+ }
+
+ // Execute all remaining tasks...
+ foreach ( $updatesByType as $updatesForType ) {
+ foreach ( $updatesForType as $update ) {
+ self::$executeContext = [ 'stage' => $stage, 'subqueue' => [] ];
+ /** @var DeferrableUpdate $update */
+ $guiError = self::runUpdate( $update, $lbFactory, $mode, $stage );
+ $reportableError = $reportableError ?: $guiError;
+ // Do the subqueue updates for $update until there are none
+ while ( self::$executeContext['subqueue'] ) {
+ $subUpdate = reset( self::$executeContext['subqueue'] );
+ $firstKey = key( self::$executeContext['subqueue'] );
+ unset( self::$executeContext['subqueue'][$firstKey] );
+
+ if ( $subUpdate instanceof DataUpdate ) {
+ $subUpdate->setTransactionTicket( $ticket );
+ }
+
+ $guiError = self::runUpdate( $subUpdate, $lbFactory, $mode, $stage );
+ $reportableError = $reportableError ?: $guiError;
+ }
+ self::$executeContext = null;
+ }
+ }
+
+ $updates = $queue; // new snapshot of queue (check for new entries)
+ }
+
+ if ( $reportableError ) {
+ throw $reportableError; // throw the first of any GUI errors
+ }
+ }
+
+ /**
+ * @param DeferrableUpdate $update
+ * @param LBFactory $lbFactory
+ * @param string $mode
+ * @param int $stage
+ * @return ErrorPageError|null
+ */
+ private static function runUpdate(
+ DeferrableUpdate $update, LBFactory $lbFactory, $mode, $stage
+ ) {
+ $guiError = null;
+ try {
+ if ( $mode === 'enqueue' && $update instanceof EnqueueableDataUpdate ) {
+ // Run only the job enqueue logic to complete the update later
+ $spec = $update->getAsJobSpecification();
+ JobQueueGroup::singleton( $spec['wiki'] )->push( $spec['job'] );
+ } elseif ( $update instanceof TransactionRoundDefiningUpdate ) {
+ $update->doUpdate();
+ } else {
+ // Run the bulk of the update now
+ $fnameTrxOwner = get_class( $update ) . '::doUpdate';
+ $lbFactory->beginMasterChanges( $fnameTrxOwner );
+ $update->doUpdate();
+ $lbFactory->commitMasterChanges( $fnameTrxOwner );
+ }
+ } catch ( Exception $e ) {
+ // Reporting GUI exceptions does not work post-send
+ if ( $e instanceof ErrorPageError && $stage === self::PRESEND ) {
+ $guiError = $e;
+ }
+ MWExceptionHandler::rollbackMasterChangesAndLog( $e );
+ }
+
+ return $guiError;
+ }
+
+ /**
+ * Run all deferred updates immediately if there are no DB writes active
+ *
+ * If $mode is 'run' but there are busy databates, EnqueueableDataUpdate
+ * tasks will be enqueued anyway for the sake of progress.
+ *
+ * @param string $mode Use "enqueue" to use the job queue when possible
+ * @return bool Whether updates were allowed to run
+ * @since 1.28
+ */
+ public static function tryOpportunisticExecute( $mode = 'run' ) {
+ // execute() loop is already running
+ if ( self::$executeContext ) {
+ return false;
+ }
+
+ // Avoiding running updates without them having outer scope
+ if ( !self::areDatabaseTransactionsActive() ) {
+ self::doUpdates( $mode );
+ return true;
+ }
+
+ if ( self::pendingUpdatesCount() >= self::BIG_QUEUE_SIZE ) {
+ // If we cannot run the updates with outer transaction context, try to
+ // at least enqueue all the updates that support queueing to job queue
+ self::$preSendUpdates = self::enqueueUpdates( self::$preSendUpdates );
+ self::$postSendUpdates = self::enqueueUpdates( self::$postSendUpdates );
+ }
+
+ return !self::pendingUpdatesCount();
+ }
+
+ /**
+ * Enqueue a job for each EnqueueableDataUpdate item and return the other items
+ *
+ * @param DeferrableUpdate[] $updates A list of deferred update instances
+ * @return DeferrableUpdate[] Remaining updates that do not support being queued
+ */
+ private static function enqueueUpdates( array $updates ) {
+ $remaining = [];
+
+ foreach ( $updates as $update ) {
+ if ( $update instanceof EnqueueableDataUpdate ) {
+ $spec = $update->getAsJobSpecification();
+ JobQueueGroup::singleton( $spec['wiki'] )->push( $spec['job'] );
+ } else {
+ $remaining[] = $update;
+ }
+ }
+
+ return $remaining;
+ }
+
+ /**
+ * @return int Number of enqueued updates
+ * @since 1.28
+ */
+ public static function pendingUpdatesCount() {
+ return count( self::$preSendUpdates ) + count( self::$postSendUpdates );
+ }
+
+ /**
+ * @param int $stage DeferredUpdates constant (PRESEND, POSTSEND, or ALL)
+ * @return DeferrableUpdate[]
+ * @since 1.29
+ */
+ public static function getPendingUpdates( $stage = self::ALL ) {
+ $updates = [];
+ if ( $stage === self::ALL || $stage === self::PRESEND ) {
+ $updates = array_merge( $updates, self::$preSendUpdates );
+ }
+ if ( $stage === self::ALL || $stage === self::POSTSEND ) {
+ $updates = array_merge( $updates, self::$postSendUpdates );
+ }
+ return $updates;
+ }
+
+ /**
+ * Clear all pending updates without performing them. Generally, you don't
+ * want or need to call this. Unit tests need it though.
+ */
+ public static function clearPendingUpdates() {
+ self::$preSendUpdates = [];
+ self::$postSendUpdates = [];
+ }
+
+ /**
+ * @return bool If a transaction round is active or connection is not ready for commit()
+ */
+ private static function areDatabaseTransactionsActive() {
+ $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
+ if ( $lbFactory->hasTransactionRound() ) {
+ return true;
+ }
+
+ $connsBusy = false;
+ $lbFactory->forEachLB( function ( LoadBalancer $lb ) use ( &$connsBusy ) {
+ $lb->forEachOpenMasterConnection( function ( IDatabase $conn ) use ( &$connsBusy ) {
+ if ( $conn->writesOrCallbacksPending() || $conn->explicitTrxActive() ) {
+ $connsBusy = true;
+ }
+ } );
+ } );
+
+ return $connsBusy;
+ }
+}
diff --git a/www/wiki/includes/deferred/EnqueueableDataUpdate.php b/www/wiki/includes/deferred/EnqueueableDataUpdate.php
new file mode 100644
index 00000000..ffeb740d
--- /dev/null
+++ b/www/wiki/includes/deferred/EnqueueableDataUpdate.php
@@ -0,0 +1,15 @@
+<?php
+/**
+ * Interface that marks a DataUpdate as enqueuable via the JobQueue
+ *
+ * Such updates must be representable using IJobSpecification, so that
+ * they can be serialized into jobs and enqueued for later execution
+ *
+ * @since 1.27
+ */
+interface EnqueueableDataUpdate {
+ /**
+ * @return array (wiki => wiki ID, job => IJobSpecification)
+ */
+ public function getAsJobSpecification();
+}
diff --git a/www/wiki/includes/deferred/HTMLCacheUpdate.php b/www/wiki/includes/deferred/HTMLCacheUpdate.php
new file mode 100644
index 00000000..29846bfb
--- /dev/null
+++ b/www/wiki/includes/deferred/HTMLCacheUpdate.php
@@ -0,0 +1,60 @@
+<?php
+/**
+ * HTML cache invalidation of all pages linking to a given title.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Cache
+ */
+
+/**
+ * Class to invalidate the HTML cache of all the pages linking to a given title.
+ *
+ * @ingroup Cache
+ */
+class HTMLCacheUpdate extends DataUpdate {
+ /** @var Title */
+ public $mTitle;
+
+ /** @var string */
+ public $mTable;
+
+ /**
+ * @param Title $titleTo
+ * @param string $table
+ * @param string $causeAction Triggering action
+ * @param string $causeAgent Triggering user
+ */
+ function __construct(
+ Title $titleTo, $table, $causeAction = 'unknown', $causeAgent = 'unknown'
+ ) {
+ $this->mTitle = $titleTo;
+ $this->mTable = $table;
+ $this->causeAction = $causeAction;
+ $this->causeAgent = $causeAgent;
+ }
+
+ public function doUpdate() {
+ $job = HTMLCacheUpdateJob::newForBacklinks(
+ $this->mTitle,
+ $this->mTable,
+ [ 'causeAction' => $this->getCauseAction(), 'causeAgent' => $this->getCauseAgent() ]
+ );
+
+ JobQueueGroup::singleton()->lazyPush( $job );
+ }
+}
diff --git a/www/wiki/includes/deferred/LinksDeletionUpdate.php b/www/wiki/includes/deferred/LinksDeletionUpdate.php
new file mode 100644
index 00000000..52e996a0
--- /dev/null
+++ b/www/wiki/includes/deferred/LinksDeletionUpdate.php
@@ -0,0 +1,242 @@
+<?php
+/**
+ * Updater for link tracking tables after a page edit.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+use MediaWiki\MediaWikiServices;
+use Wikimedia\ScopedCallback;
+use Wikimedia\Rdbms\IDatabase;
+
+/**
+ * Update object handling the cleanup of links tables after a page was deleted.
+ */
+class LinksDeletionUpdate extends DataUpdate implements EnqueueableDataUpdate {
+ /** @var WikiPage */
+ protected $page;
+ /** @var int */
+ protected $pageId;
+ /** @var string */
+ protected $timestamp;
+
+ /** @var IDatabase */
+ private $db;
+
+ /**
+ * @param WikiPage $page Page we are updating
+ * @param int|null $pageId ID of the page we are updating [optional]
+ * @param string|null $timestamp TS_MW timestamp of deletion
+ * @throws MWException
+ */
+ function __construct( WikiPage $page, $pageId = null, $timestamp = null ) {
+ parent::__construct();
+
+ $this->page = $page;
+ if ( $pageId ) {
+ $this->pageId = $pageId; // page ID at time of deletion
+ } elseif ( $page->exists() ) {
+ $this->pageId = $page->getId();
+ } else {
+ throw new InvalidArgumentException( "Page ID not known. Page doesn't exist?" );
+ }
+
+ $this->timestamp = $timestamp ?: wfTimestampNow();
+ }
+
+ public function doUpdate() {
+ $services = MediaWikiServices::getInstance();
+ $config = $services->getMainConfig();
+ $lbFactory = $services->getDBLoadBalancerFactory();
+ $batchSize = $config->get( 'UpdateRowsPerQuery' );
+
+ // Page may already be deleted, so don't just getId()
+ $id = $this->pageId;
+
+ if ( $this->ticket ) {
+ // Make sure all links update threads see the changes of each other.
+ // This handles the case when updates have to batched into several COMMITs.
+ $scopedLock = LinksUpdate::acquirePageLock( $this->getDB(), $id );
+ }
+
+ $title = $this->page->getTitle();
+ $dbw = $this->getDB(); // convenience
+
+ // Delete restrictions for it
+ $dbw->delete( 'page_restrictions', [ 'pr_page' => $id ], __METHOD__ );
+
+ // Fix category table counts
+ $cats = $dbw->selectFieldValues(
+ 'categorylinks',
+ 'cl_to',
+ [ 'cl_from' => $id ],
+ __METHOD__
+ );
+ $catBatches = array_chunk( $cats, $batchSize );
+ foreach ( $catBatches as $catBatch ) {
+ $this->page->updateCategoryCounts( [], $catBatch, $id );
+ if ( count( $catBatches ) > 1 ) {
+ $lbFactory->commitAndWaitForReplication(
+ __METHOD__, $this->ticket, [ 'domain' => $dbw->getDomainID() ]
+ );
+ }
+ }
+
+ // Refresh the category table entry if it seems to have no pages. Check
+ // master for the most up-to-date cat_pages count.
+ if ( $title->getNamespace() === NS_CATEGORY ) {
+ $row = $dbw->selectRow(
+ 'category',
+ [ 'cat_id', 'cat_title', 'cat_pages', 'cat_subcats', 'cat_files' ],
+ [ 'cat_title' => $title->getDBkey(), 'cat_pages <= 0' ],
+ __METHOD__
+ );
+ if ( $row ) {
+ $cat = Category::newFromRow( $row, $title );
+ // T166757: do the update after the main job DB commit
+ DeferredUpdates::addCallableUpdate( function () use ( $cat ) {
+ $cat->refreshCounts();
+ } );
+ }
+ }
+
+ $this->batchDeleteByPK(
+ 'pagelinks',
+ [ 'pl_from' => $id ],
+ [ 'pl_from', 'pl_namespace', 'pl_title' ],
+ $batchSize
+ );
+ $this->batchDeleteByPK(
+ 'imagelinks',
+ [ 'il_from' => $id ],
+ [ 'il_from', 'il_to' ],
+ $batchSize
+ );
+ $this->batchDeleteByPK(
+ 'categorylinks',
+ [ 'cl_from' => $id ],
+ [ 'cl_from', 'cl_to' ],
+ $batchSize
+ );
+ $this->batchDeleteByPK(
+ 'templatelinks',
+ [ 'tl_from' => $id ],
+ [ 'tl_from', 'tl_namespace', 'tl_title' ],
+ $batchSize
+ );
+ $this->batchDeleteByPK(
+ 'externallinks',
+ [ 'el_from' => $id ],
+ [ 'el_id' ],
+ $batchSize
+ );
+ $this->batchDeleteByPK(
+ 'langlinks',
+ [ 'll_from' => $id ],
+ [ 'll_from', 'll_lang' ],
+ $batchSize
+ );
+ $this->batchDeleteByPK(
+ 'iwlinks',
+ [ 'iwl_from' => $id ],
+ [ 'iwl_from', 'iwl_prefix', 'iwl_title' ],
+ $batchSize
+ );
+
+ // Delete any redirect entry or page props entries
+ $dbw->delete( 'redirect', [ 'rd_from' => $id ], __METHOD__ );
+ $dbw->delete( 'page_props', [ 'pp_page' => $id ], __METHOD__ );
+
+ // Find recentchanges entries to clean up...
+ $rcIdsForTitle = $dbw->selectFieldValues(
+ 'recentchanges',
+ 'rc_id',
+ [
+ 'rc_type != ' . RC_LOG,
+ 'rc_namespace' => $title->getNamespace(),
+ 'rc_title' => $title->getDBkey(),
+ 'rc_timestamp < ' .
+ $dbw->addQuotes( $dbw->timestamp( $this->timestamp ) )
+ ],
+ __METHOD__
+ );
+ $rcIdsForPage = $dbw->selectFieldValues(
+ 'recentchanges',
+ 'rc_id',
+ [ 'rc_type != ' . RC_LOG, 'rc_cur_id' => $id ],
+ __METHOD__
+ );
+
+ // T98706: delete by PK to avoid lock contention with RC delete log insertions
+ $rcIdBatches = array_chunk( array_merge( $rcIdsForTitle, $rcIdsForPage ), $batchSize );
+ foreach ( $rcIdBatches as $rcIdBatch ) {
+ $dbw->delete( 'recentchanges', [ 'rc_id' => $rcIdBatch ], __METHOD__ );
+ if ( count( $rcIdBatches ) > 1 ) {
+ $lbFactory->commitAndWaitForReplication(
+ __METHOD__, $this->ticket, [ 'domain' => $dbw->getDomainID() ]
+ );
+ }
+ }
+
+ // Commit and release the lock (if set)
+ ScopedCallback::consume( $scopedLock );
+ }
+
+ private function batchDeleteByPK( $table, array $conds, array $pk, $bSize ) {
+ $services = MediaWikiServices::getInstance();
+ $lbFactory = $services->getDBLoadBalancerFactory();
+ $dbw = $this->getDB(); // convenience
+
+ $res = $dbw->select( $table, $pk, $conds, __METHOD__ );
+
+ $pkDeleteConds = [];
+ foreach ( $res as $row ) {
+ $pkDeleteConds[] = $dbw->makeList( (array)$row, LIST_AND );
+ if ( count( $pkDeleteConds ) >= $bSize ) {
+ $dbw->delete( $table, $dbw->makeList( $pkDeleteConds, LIST_OR ), __METHOD__ );
+ $lbFactory->commitAndWaitForReplication(
+ __METHOD__, $this->ticket, [ 'domain' => $dbw->getDomainID() ]
+ );
+ $pkDeleteConds = [];
+ }
+ }
+
+ if ( $pkDeleteConds ) {
+ $dbw->delete( $table, $dbw->makeList( $pkDeleteConds, LIST_OR ), __METHOD__ );
+ }
+ }
+
+ protected function getDB() {
+ if ( !$this->db ) {
+ $this->db = wfGetDB( DB_MASTER );
+ }
+
+ return $this->db;
+ }
+
+ public function getAsJobSpecification() {
+ return [
+ 'wiki' => WikiMap::getWikiIdFromDomain( $this->getDB()->getDomainID() ),
+ 'job' => new JobSpecification(
+ 'deleteLinks',
+ [ 'pageId' => $this->pageId, 'timestamp' => $this->timestamp ],
+ [ 'removeDuplicates' => true ],
+ $this->page->getTitle()
+ )
+ ];
+ }
+}
diff --git a/www/wiki/includes/deferred/LinksUpdate.php b/www/wiki/includes/deferred/LinksUpdate.php
new file mode 100644
index 00000000..89136428
--- /dev/null
+++ b/www/wiki/includes/deferred/LinksUpdate.php
@@ -0,0 +1,1182 @@
+<?php
+/**
+ * Updater for link tracking tables after a page edit.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+use Wikimedia\Rdbms\IDatabase;
+use MediaWiki\MediaWikiServices;
+use Wikimedia\ScopedCallback;
+
+/**
+ * Class the manages updates of *_link tables as well as similar extension-managed tables
+ *
+ * @note: LinksUpdate is managed by DeferredUpdates::execute(). Do not run this in a transaction.
+ *
+ * See docs/deferred.txt
+ */
+class LinksUpdate extends DataUpdate implements EnqueueableDataUpdate {
+ // @todo make members protected, but make sure extensions don't break
+
+ /** @var int Page ID of the article linked from */
+ public $mId;
+
+ /** @var Title Title object of the article linked from */
+ public $mTitle;
+
+ /** @var ParserOutput */
+ public $mParserOutput;
+
+ /** @var array Map of title strings to IDs for the links in the document */
+ public $mLinks;
+
+ /** @var array DB keys of the images used, in the array key only */
+ public $mImages;
+
+ /** @var array Map of title strings to IDs for the template references, including broken ones */
+ public $mTemplates;
+
+ /** @var array URLs of external links, array key only */
+ public $mExternals;
+
+ /** @var array Map of category names to sort keys */
+ public $mCategories;
+
+ /** @var array Map of language codes to titles */
+ public $mInterlangs;
+
+ /** @var array 2-D map of (prefix => DBK => 1) */
+ public $mInterwikis;
+
+ /** @var array Map of arbitrary name to value */
+ public $mProperties;
+
+ /** @var bool Whether to queue jobs for recursive updates */
+ public $mRecursive;
+
+ /** @var Revision Revision for which this update has been triggered */
+ private $mRevision;
+
+ /**
+ * @var null|array Added links if calculated.
+ */
+ private $linkInsertions = null;
+
+ /**
+ * @var null|array Deleted links if calculated.
+ */
+ private $linkDeletions = null;
+
+ /**
+ * @var null|array Added properties if calculated.
+ */
+ private $propertyInsertions = null;
+
+ /**
+ * @var null|array Deleted properties if calculated.
+ */
+ private $propertyDeletions = null;
+
+ /**
+ * @var User|null
+ */
+ private $user;
+
+ /** @var IDatabase */
+ private $db;
+
+ /**
+ * @param Title $title Title of the page we're updating
+ * @param ParserOutput $parserOutput Output from a full parse of this page
+ * @param bool $recursive Queue jobs for recursive updates?
+ * @throws MWException
+ */
+ function __construct( Title $title, ParserOutput $parserOutput, $recursive = true ) {
+ parent::__construct();
+
+ $this->mTitle = $title;
+ $this->mId = $title->getArticleID( Title::GAID_FOR_UPDATE );
+
+ if ( !$this->mId ) {
+ throw new InvalidArgumentException(
+ "The Title object yields no ID. Perhaps the page doesn't exist?"
+ );
+ }
+
+ $this->mParserOutput = $parserOutput;
+
+ $this->mLinks = $parserOutput->getLinks();
+ $this->mImages = $parserOutput->getImages();
+ $this->mTemplates = $parserOutput->getTemplates();
+ $this->mExternals = $parserOutput->getExternalLinks();
+ $this->mCategories = $parserOutput->getCategories();
+ $this->mProperties = $parserOutput->getProperties();
+ $this->mInterwikis = $parserOutput->getInterwikiLinks();
+
+ # Convert the format of the interlanguage links
+ # I didn't want to change it in the ParserOutput, because that array is passed all
+ # the way back to the skin, so either a skin API break would be required, or an
+ # inefficient back-conversion.
+ $ill = $parserOutput->getLanguageLinks();
+ $this->mInterlangs = [];
+ foreach ( $ill as $link ) {
+ list( $key, $title ) = explode( ':', $link, 2 );
+ $this->mInterlangs[$key] = $title;
+ }
+
+ foreach ( $this->mCategories as &$sortkey ) {
+ # If the sortkey is longer then 255 bytes,
+ # it truncated by DB, and then doesn't get
+ # matched when comparing existing vs current
+ # categories, causing T27254.
+ # Also. substr behaves weird when given "".
+ if ( $sortkey !== '' ) {
+ $sortkey = substr( $sortkey, 0, 255 );
+ }
+ }
+
+ $this->mRecursive = $recursive;
+
+ // Avoid PHP 7.1 warning from passing $this by reference
+ $linksUpdate = $this;
+ Hooks::run( 'LinksUpdateConstructed', [ &$linksUpdate ] );
+ }
+
+ /**
+ * Update link tables with outgoing links from an updated article
+ *
+ * @note: this is managed by DeferredUpdates::execute(). Do not run this in a transaction.
+ */
+ public function doUpdate() {
+ if ( $this->ticket ) {
+ // Make sure all links update threads see the changes of each other.
+ // This handles the case when updates have to batched into several COMMITs.
+ $scopedLock = self::acquirePageLock( $this->getDB(), $this->mId );
+ }
+
+ // Avoid PHP 7.1 warning from passing $this by reference
+ $linksUpdate = $this;
+ Hooks::run( 'LinksUpdate', [ &$linksUpdate ] );
+ $this->doIncrementalUpdate();
+
+ // Commit and release the lock (if set)
+ ScopedCallback::consume( $scopedLock );
+ // Run post-commit hooks without DBO_TRX
+ $this->getDB()->onTransactionIdle(
+ function () {
+ // Avoid PHP 7.1 warning from passing $this by reference
+ $linksUpdate = $this;
+ Hooks::run( 'LinksUpdateComplete', [ &$linksUpdate, $this->ticket ] );
+ },
+ __METHOD__
+ );
+ }
+
+ /**
+ * Acquire a lock for performing link table updates for a page on a DB
+ *
+ * @param IDatabase $dbw
+ * @param int $pageId
+ * @param string $why One of (job, atomicity)
+ * @return ScopedCallback
+ * @throws RuntimeException
+ * @since 1.27
+ */
+ public static function acquirePageLock( IDatabase $dbw, $pageId, $why = 'atomicity' ) {
+ $key = "LinksUpdate:$why:pageid:$pageId";
+ $scopedLock = $dbw->getScopedLockAndFlush( $key, __METHOD__, 15 );
+ if ( !$scopedLock ) {
+ throw new RuntimeException( "Could not acquire lock '$key'." );
+ }
+
+ return $scopedLock;
+ }
+
+ protected function doIncrementalUpdate() {
+ # Page links
+ $existingPL = $this->getExistingLinks();
+ $this->linkDeletions = $this->getLinkDeletions( $existingPL );
+ $this->linkInsertions = $this->getLinkInsertions( $existingPL );
+ $this->incrTableUpdate( 'pagelinks', 'pl', $this->linkDeletions, $this->linkInsertions );
+
+ # Image links
+ $existingIL = $this->getExistingImages();
+ $imageDeletes = $this->getImageDeletions( $existingIL );
+ $this->incrTableUpdate(
+ 'imagelinks',
+ 'il',
+ $imageDeletes,
+ $this->getImageInsertions( $existingIL ) );
+
+ # Invalidate all image description pages which had links added or removed
+ $imageUpdates = $imageDeletes + array_diff_key( $this->mImages, $existingIL );
+ $this->invalidateImageDescriptions( $imageUpdates );
+
+ # External links
+ $existingEL = $this->getExistingExternals();
+ $this->incrTableUpdate(
+ 'externallinks',
+ 'el',
+ $this->getExternalDeletions( $existingEL ),
+ $this->getExternalInsertions( $existingEL ) );
+
+ # Language links
+ $existingLL = $this->getExistingInterlangs();
+ $this->incrTableUpdate(
+ 'langlinks',
+ 'll',
+ $this->getInterlangDeletions( $existingLL ),
+ $this->getInterlangInsertions( $existingLL ) );
+
+ # Inline interwiki links
+ $existingIW = $this->getExistingInterwikis();
+ $this->incrTableUpdate(
+ 'iwlinks',
+ 'iwl',
+ $this->getInterwikiDeletions( $existingIW ),
+ $this->getInterwikiInsertions( $existingIW ) );
+
+ # Template links
+ $existingTL = $this->getExistingTemplates();
+ $this->incrTableUpdate(
+ 'templatelinks',
+ 'tl',
+ $this->getTemplateDeletions( $existingTL ),
+ $this->getTemplateInsertions( $existingTL ) );
+
+ # Category links
+ $existingCL = $this->getExistingCategories();
+ $categoryDeletes = $this->getCategoryDeletions( $existingCL );
+ $this->incrTableUpdate(
+ 'categorylinks',
+ 'cl',
+ $categoryDeletes,
+ $this->getCategoryInsertions( $existingCL ) );
+ $categoryInserts = array_diff_assoc( $this->mCategories, $existingCL );
+ $categoryUpdates = $categoryInserts + $categoryDeletes;
+
+ # Page properties
+ $existingPP = $this->getExistingProperties();
+ $this->propertyDeletions = $this->getPropertyDeletions( $existingPP );
+ $this->incrTableUpdate(
+ 'page_props',
+ 'pp',
+ $this->propertyDeletions,
+ $this->getPropertyInsertions( $existingPP ) );
+
+ # Invalidate the necessary pages
+ $this->propertyInsertions = array_diff_assoc( $this->mProperties, $existingPP );
+ $changed = $this->propertyDeletions + $this->propertyInsertions;
+ $this->invalidateProperties( $changed );
+
+ # Invalidate all categories which were added, deleted or changed (set symmetric difference)
+ $this->invalidateCategories( $categoryUpdates );
+ $this->updateCategoryCounts( $categoryInserts, $categoryDeletes );
+
+ # Refresh links of all pages including this page
+ # This will be in a separate transaction
+ if ( $this->mRecursive ) {
+ $this->queueRecursiveJobs();
+ }
+
+ # Update the links table freshness for this title
+ $this->updateLinksTimestamp();
+ }
+
+ /**
+ * Queue recursive jobs for this page
+ *
+ * Which means do LinksUpdate on all pages that include the current page,
+ * using the job queue.
+ */
+ protected function queueRecursiveJobs() {
+ $action = $this->getCauseAction();
+ $agent = $this->getCauseAgent();
+
+ self::queueRecursiveJobsForTable( $this->mTitle, 'templatelinks', $action, $agent );
+ if ( $this->mTitle->getNamespace() == NS_FILE ) {
+ // Process imagelinks in case the title is or was a redirect
+ self::queueRecursiveJobsForTable( $this->mTitle, 'imagelinks', $action, $agent );
+ }
+
+ $bc = $this->mTitle->getBacklinkCache();
+ // Get jobs for cascade-protected backlinks for a high priority queue.
+ // If meta-templates change to using a new template, the new template
+ // should be implicitly protected as soon as possible, if applicable.
+ // These jobs duplicate a subset of the above ones, but can run sooner.
+ // Which ever runs first generally no-ops the other one.
+ $jobs = [];
+ foreach ( $bc->getCascadeProtectedLinks() as $title ) {
+ $jobs[] = RefreshLinksJob::newPrioritized(
+ $title,
+ [
+ 'causeAction' => $action,
+ 'causeAgent' => $agent
+ ]
+ );
+ }
+ JobQueueGroup::singleton()->push( $jobs );
+ }
+
+ /**
+ * Queue a RefreshLinks job for any table.
+ *
+ * @param Title $title Title to do job for
+ * @param string $table Table to use (e.g. 'templatelinks')
+ * @param string $action Triggering action
+ * @param string $userName Triggering user name
+ */
+ public static function queueRecursiveJobsForTable(
+ Title $title, $table, $action = 'unknown', $userName = 'unknown'
+ ) {
+ if ( $title->getBacklinkCache()->hasLinks( $table ) ) {
+ $job = new RefreshLinksJob(
+ $title,
+ [
+ 'table' => $table,
+ 'recursive' => true,
+ ] + Job::newRootJobParams( // "overall" refresh links job info
+ "refreshlinks:{$table}:{$title->getPrefixedText()}"
+ ) + [ 'causeAction' => $action, 'causeAgent' => $userName ]
+ );
+
+ JobQueueGroup::singleton()->push( $job );
+ }
+ }
+
+ /**
+ * @param array $cats
+ */
+ private function invalidateCategories( $cats ) {
+ PurgeJobUtils::invalidatePages( $this->getDB(), NS_CATEGORY, array_keys( $cats ) );
+ }
+
+ /**
+ * Update all the appropriate counts in the category table.
+ * @param array $added Associative array of category name => sort key
+ * @param array $deleted Associative array of category name => sort key
+ */
+ private function updateCategoryCounts( array $added, array $deleted ) {
+ global $wgUpdateRowsPerQuery;
+
+ if ( !$added && !$deleted ) {
+ return;
+ }
+
+ $domainId = $this->getDB()->getDomainID();
+ $wp = WikiPage::factory( $this->mTitle );
+ $lbf = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
+ // T163801: try to release any row locks to reduce contention
+ $lbf->commitAndWaitForReplication( __METHOD__, $this->ticket, [ 'domain' => $domainId ] );
+
+ foreach ( array_chunk( array_keys( $added ), $wgUpdateRowsPerQuery ) as $addBatch ) {
+ $wp->updateCategoryCounts( $addBatch, [], $this->mId );
+ $lbf->commitAndWaitForReplication(
+ __METHOD__, $this->ticket, [ 'domain' => $domainId ] );
+ }
+
+ foreach ( array_chunk( array_keys( $deleted ), $wgUpdateRowsPerQuery ) as $deleteBatch ) {
+ $wp->updateCategoryCounts( [], $deleteBatch, $this->mId );
+ $lbf->commitAndWaitForReplication(
+ __METHOD__, $this->ticket, [ 'domain' => $domainId ] );
+ }
+ }
+
+ /**
+ * @param array $images
+ */
+ private function invalidateImageDescriptions( $images ) {
+ PurgeJobUtils::invalidatePages( $this->getDB(), NS_FILE, array_keys( $images ) );
+ }
+
+ /**
+ * Update a table by doing a delete query then an insert query
+ * @param string $table Table name
+ * @param string $prefix Field name prefix
+ * @param array $deletions
+ * @param array $insertions Rows to insert
+ */
+ private function incrTableUpdate( $table, $prefix, $deletions, $insertions ) {
+ $services = MediaWikiServices::getInstance();
+ $bSize = $services->getMainConfig()->get( 'UpdateRowsPerQuery' );
+ $lbf = $services->getDBLoadBalancerFactory();
+
+ if ( $table === 'page_props' ) {
+ $fromField = 'pp_page';
+ } else {
+ $fromField = "{$prefix}_from";
+ }
+
+ $deleteWheres = []; // list of WHERE clause arrays for each DB delete() call
+ if ( $table === 'pagelinks' || $table === 'templatelinks' || $table === 'iwlinks' ) {
+ $baseKey = ( $table === 'iwlinks' ) ? 'iwl_prefix' : "{$prefix}_namespace";
+
+ $curBatchSize = 0;
+ $curDeletionBatch = [];
+ $deletionBatches = [];
+ foreach ( $deletions as $ns => $dbKeys ) {
+ foreach ( $dbKeys as $dbKey => $unused ) {
+ $curDeletionBatch[$ns][$dbKey] = 1;
+ if ( ++$curBatchSize >= $bSize ) {
+ $deletionBatches[] = $curDeletionBatch;
+ $curDeletionBatch = [];
+ $curBatchSize = 0;
+ }
+ }
+ }
+ if ( $curDeletionBatch ) {
+ $deletionBatches[] = $curDeletionBatch;
+ }
+
+ foreach ( $deletionBatches as $deletionBatch ) {
+ $deleteWheres[] = [
+ $fromField => $this->mId,
+ $this->getDB()->makeWhereFrom2d( $deletionBatch, $baseKey, "{$prefix}_title" )
+ ];
+ }
+ } else {
+ if ( $table === 'langlinks' ) {
+ $toField = 'll_lang';
+ } elseif ( $table === 'page_props' ) {
+ $toField = 'pp_propname';
+ } else {
+ $toField = $prefix . '_to';
+ }
+
+ $deletionBatches = array_chunk( array_keys( $deletions ), $bSize );
+ foreach ( $deletionBatches as $deletionBatch ) {
+ $deleteWheres[] = [ $fromField => $this->mId, $toField => $deletionBatch ];
+ }
+ }
+
+ $domainId = $this->getDB()->getDomainID();
+
+ foreach ( $deleteWheres as $deleteWhere ) {
+ $this->getDB()->delete( $table, $deleteWhere, __METHOD__ );
+ $lbf->commitAndWaitForReplication(
+ __METHOD__, $this->ticket, [ 'domain' => $domainId ]
+ );
+ }
+
+ $insertBatches = array_chunk( $insertions, $bSize );
+ foreach ( $insertBatches as $insertBatch ) {
+ $this->getDB()->insert( $table, $insertBatch, __METHOD__, 'IGNORE' );
+ $lbf->commitAndWaitForReplication(
+ __METHOD__, $this->ticket, [ 'domain' => $domainId ]
+ );
+ }
+
+ if ( count( $insertions ) ) {
+ Hooks::run( 'LinksUpdateAfterInsert', [ $this, $table, $insertions ] );
+ }
+ }
+
+ /**
+ * Get an array of pagelinks insertions for passing to the DB
+ * Skips the titles specified by the 2-D array $existing
+ * @param array $existing
+ * @return array
+ */
+ private function getLinkInsertions( $existing = [] ) {
+ $arr = [];
+ foreach ( $this->mLinks as $ns => $dbkeys ) {
+ $diffs = isset( $existing[$ns] )
+ ? array_diff_key( $dbkeys, $existing[$ns] )
+ : $dbkeys;
+ foreach ( $diffs as $dbk => $id ) {
+ $arr[] = [
+ 'pl_from' => $this->mId,
+ 'pl_from_namespace' => $this->mTitle->getNamespace(),
+ 'pl_namespace' => $ns,
+ 'pl_title' => $dbk
+ ];
+ }
+ }
+
+ return $arr;
+ }
+
+ /**
+ * Get an array of template insertions. Like getLinkInsertions()
+ * @param array $existing
+ * @return array
+ */
+ private function getTemplateInsertions( $existing = [] ) {
+ $arr = [];
+ foreach ( $this->mTemplates as $ns => $dbkeys ) {
+ $diffs = isset( $existing[$ns] ) ? array_diff_key( $dbkeys, $existing[$ns] ) : $dbkeys;
+ foreach ( $diffs as $dbk => $id ) {
+ $arr[] = [
+ 'tl_from' => $this->mId,
+ 'tl_from_namespace' => $this->mTitle->getNamespace(),
+ 'tl_namespace' => $ns,
+ 'tl_title' => $dbk
+ ];
+ }
+ }
+
+ return $arr;
+ }
+
+ /**
+ * Get an array of image insertions
+ * Skips the names specified in $existing
+ * @param array $existing
+ * @return array
+ */
+ private function getImageInsertions( $existing = [] ) {
+ $arr = [];
+ $diffs = array_diff_key( $this->mImages, $existing );
+ foreach ( $diffs as $iname => $dummy ) {
+ $arr[] = [
+ 'il_from' => $this->mId,
+ 'il_from_namespace' => $this->mTitle->getNamespace(),
+ 'il_to' => $iname
+ ];
+ }
+
+ return $arr;
+ }
+
+ /**
+ * Get an array of externallinks insertions. Skips the names specified in $existing
+ * @param array $existing
+ * @return array
+ */
+ private function getExternalInsertions( $existing = [] ) {
+ $arr = [];
+ $diffs = array_diff_key( $this->mExternals, $existing );
+ foreach ( $diffs as $url => $dummy ) {
+ foreach ( wfMakeUrlIndexes( $url ) as $index ) {
+ $arr[] = [
+ 'el_from' => $this->mId,
+ 'el_to' => $url,
+ 'el_index' => $index,
+ ];
+ }
+ }
+
+ return $arr;
+ }
+
+ /**
+ * Get an array of category insertions
+ *
+ * @param array $existing Mapping existing category names to sort keys. If both
+ * match a link in $this, the link will be omitted from the output
+ *
+ * @return array
+ */
+ private function getCategoryInsertions( $existing = [] ) {
+ global $wgContLang, $wgCategoryCollation;
+ $diffs = array_diff_assoc( $this->mCategories, $existing );
+ $arr = [];
+ foreach ( $diffs as $name => $prefix ) {
+ $nt = Title::makeTitleSafe( NS_CATEGORY, $name );
+ $wgContLang->findVariantLink( $name, $nt, true );
+
+ if ( $this->mTitle->getNamespace() == NS_CATEGORY ) {
+ $type = 'subcat';
+ } elseif ( $this->mTitle->getNamespace() == NS_FILE ) {
+ $type = 'file';
+ } else {
+ $type = 'page';
+ }
+
+ # Treat custom sortkeys as a prefix, so that if multiple
+ # things are forced to sort as '*' or something, they'll
+ # sort properly in the category rather than in page_id
+ # order or such.
+ $sortkey = Collation::singleton()->getSortKey(
+ $this->mTitle->getCategorySortkey( $prefix ) );
+
+ $arr[] = [
+ 'cl_from' => $this->mId,
+ 'cl_to' => $name,
+ 'cl_sortkey' => $sortkey,
+ 'cl_timestamp' => $this->getDB()->timestamp(),
+ 'cl_sortkey_prefix' => $prefix,
+ 'cl_collation' => $wgCategoryCollation,
+ 'cl_type' => $type,
+ ];
+ }
+
+ return $arr;
+ }
+
+ /**
+ * Get an array of interlanguage link insertions
+ *
+ * @param array $existing Mapping existing language codes to titles
+ *
+ * @return array
+ */
+ private function getInterlangInsertions( $existing = [] ) {
+ $diffs = array_diff_assoc( $this->mInterlangs, $existing );
+ $arr = [];
+ foreach ( $diffs as $lang => $title ) {
+ $arr[] = [
+ 'll_from' => $this->mId,
+ 'll_lang' => $lang,
+ 'll_title' => $title
+ ];
+ }
+
+ return $arr;
+ }
+
+ /**
+ * Get an array of page property insertions
+ * @param array $existing
+ * @return array
+ */
+ function getPropertyInsertions( $existing = [] ) {
+ $diffs = array_diff_assoc( $this->mProperties, $existing );
+
+ $arr = [];
+ foreach ( array_keys( $diffs ) as $name ) {
+ $arr[] = $this->getPagePropRowData( $name );
+ }
+
+ return $arr;
+ }
+
+ /**
+ * Returns an associative array to be used for inserting a row into
+ * the page_props table. Besides the given property name, this will
+ * include the page id from $this->mId and any property value from
+ * $this->mProperties.
+ *
+ * The array returned will include the pp_sortkey field if this
+ * is present in the database (as indicated by $wgPagePropsHaveSortkey).
+ * The sortkey value is currently determined by getPropertySortKeyValue().
+ *
+ * @note this assumes that $this->mProperties[$prop] is defined.
+ *
+ * @param string $prop The name of the property.
+ *
+ * @return array
+ */
+ private function getPagePropRowData( $prop ) {
+ global $wgPagePropsHaveSortkey;
+
+ $value = $this->mProperties[$prop];
+
+ $row = [
+ 'pp_page' => $this->mId,
+ 'pp_propname' => $prop,
+ 'pp_value' => $value,
+ ];
+
+ if ( $wgPagePropsHaveSortkey ) {
+ $row['pp_sortkey'] = $this->getPropertySortKeyValue( $value );
+ }
+
+ return $row;
+ }
+
+ /**
+ * Determines the sort key for the given property value.
+ * This will return $value if it is a float or int,
+ * 1 or resp. 0 if it is a bool, and null otherwise.
+ *
+ * @note In the future, we may allow the sortkey to be specified explicitly
+ * in ParserOutput::setProperty.
+ *
+ * @param mixed $value
+ *
+ * @return float|null
+ */
+ private function getPropertySortKeyValue( $value ) {
+ if ( is_int( $value ) || is_float( $value ) || is_bool( $value ) ) {
+ return floatval( $value );
+ }
+
+ return null;
+ }
+
+ /**
+ * Get an array of interwiki insertions for passing to the DB
+ * Skips the titles specified by the 2-D array $existing
+ * @param array $existing
+ * @return array
+ */
+ private function getInterwikiInsertions( $existing = [] ) {
+ $arr = [];
+ foreach ( $this->mInterwikis as $prefix => $dbkeys ) {
+ $diffs = isset( $existing[$prefix] )
+ ? array_diff_key( $dbkeys, $existing[$prefix] )
+ : $dbkeys;
+
+ foreach ( $diffs as $dbk => $id ) {
+ $arr[] = [
+ 'iwl_from' => $this->mId,
+ 'iwl_prefix' => $prefix,
+ 'iwl_title' => $dbk
+ ];
+ }
+ }
+
+ return $arr;
+ }
+
+ /**
+ * Given an array of existing links, returns those links which are not in $this
+ * and thus should be deleted.
+ * @param array $existing
+ * @return array
+ */
+ private function getLinkDeletions( $existing ) {
+ $del = [];
+ foreach ( $existing as $ns => $dbkeys ) {
+ if ( isset( $this->mLinks[$ns] ) ) {
+ $del[$ns] = array_diff_key( $existing[$ns], $this->mLinks[$ns] );
+ } else {
+ $del[$ns] = $existing[$ns];
+ }
+ }
+
+ return $del;
+ }
+
+ /**
+ * Given an array of existing templates, returns those templates which are not in $this
+ * and thus should be deleted.
+ * @param array $existing
+ * @return array
+ */
+ private function getTemplateDeletions( $existing ) {
+ $del = [];
+ foreach ( $existing as $ns => $dbkeys ) {
+ if ( isset( $this->mTemplates[$ns] ) ) {
+ $del[$ns] = array_diff_key( $existing[$ns], $this->mTemplates[$ns] );
+ } else {
+ $del[$ns] = $existing[$ns];
+ }
+ }
+
+ return $del;
+ }
+
+ /**
+ * Given an array of existing images, returns those images which are not in $this
+ * and thus should be deleted.
+ * @param array $existing
+ * @return array
+ */
+ private function getImageDeletions( $existing ) {
+ return array_diff_key( $existing, $this->mImages );
+ }
+
+ /**
+ * Given an array of existing external links, returns those links which are not
+ * in $this and thus should be deleted.
+ * @param array $existing
+ * @return array
+ */
+ private function getExternalDeletions( $existing ) {
+ return array_diff_key( $existing, $this->mExternals );
+ }
+
+ /**
+ * Given an array of existing categories, returns those categories which are not in $this
+ * and thus should be deleted.
+ * @param array $existing
+ * @return array
+ */
+ private function getCategoryDeletions( $existing ) {
+ return array_diff_assoc( $existing, $this->mCategories );
+ }
+
+ /**
+ * Given an array of existing interlanguage links, returns those links which are not
+ * in $this and thus should be deleted.
+ * @param array $existing
+ * @return array
+ */
+ private function getInterlangDeletions( $existing ) {
+ return array_diff_assoc( $existing, $this->mInterlangs );
+ }
+
+ /**
+ * Get array of properties which should be deleted.
+ * @param array $existing
+ * @return array
+ */
+ function getPropertyDeletions( $existing ) {
+ return array_diff_assoc( $existing, $this->mProperties );
+ }
+
+ /**
+ * Given an array of existing interwiki links, returns those links which are not in $this
+ * and thus should be deleted.
+ * @param array $existing
+ * @return array
+ */
+ private function getInterwikiDeletions( $existing ) {
+ $del = [];
+ foreach ( $existing as $prefix => $dbkeys ) {
+ if ( isset( $this->mInterwikis[$prefix] ) ) {
+ $del[$prefix] = array_diff_key( $existing[$prefix], $this->mInterwikis[$prefix] );
+ } else {
+ $del[$prefix] = $existing[$prefix];
+ }
+ }
+
+ return $del;
+ }
+
+ /**
+ * Get an array of existing links, as a 2-D array
+ *
+ * @return array
+ */
+ private function getExistingLinks() {
+ $res = $this->getDB()->select( 'pagelinks', [ 'pl_namespace', 'pl_title' ],
+ [ 'pl_from' => $this->mId ], __METHOD__ );
+ $arr = [];
+ foreach ( $res as $row ) {
+ if ( !isset( $arr[$row->pl_namespace] ) ) {
+ $arr[$row->pl_namespace] = [];
+ }
+ $arr[$row->pl_namespace][$row->pl_title] = 1;
+ }
+
+ return $arr;
+ }
+
+ /**
+ * Get an array of existing templates, as a 2-D array
+ *
+ * @return array
+ */
+ private function getExistingTemplates() {
+ $res = $this->getDB()->select( 'templatelinks', [ 'tl_namespace', 'tl_title' ],
+ [ 'tl_from' => $this->mId ], __METHOD__ );
+ $arr = [];
+ foreach ( $res as $row ) {
+ if ( !isset( $arr[$row->tl_namespace] ) ) {
+ $arr[$row->tl_namespace] = [];
+ }
+ $arr[$row->tl_namespace][$row->tl_title] = 1;
+ }
+
+ return $arr;
+ }
+
+ /**
+ * Get an array of existing images, image names in the keys
+ *
+ * @return array
+ */
+ private function getExistingImages() {
+ $res = $this->getDB()->select( 'imagelinks', [ 'il_to' ],
+ [ 'il_from' => $this->mId ], __METHOD__ );
+ $arr = [];
+ foreach ( $res as $row ) {
+ $arr[$row->il_to] = 1;
+ }
+
+ return $arr;
+ }
+
+ /**
+ * Get an array of existing external links, URLs in the keys
+ *
+ * @return array
+ */
+ private function getExistingExternals() {
+ $res = $this->getDB()->select( 'externallinks', [ 'el_to' ],
+ [ 'el_from' => $this->mId ], __METHOD__ );
+ $arr = [];
+ foreach ( $res as $row ) {
+ $arr[$row->el_to] = 1;
+ }
+
+ return $arr;
+ }
+
+ /**
+ * Get an array of existing categories, with the name in the key and sort key in the value.
+ *
+ * @return array
+ */
+ private function getExistingCategories() {
+ $res = $this->getDB()->select( 'categorylinks', [ 'cl_to', 'cl_sortkey_prefix' ],
+ [ 'cl_from' => $this->mId ], __METHOD__ );
+ $arr = [];
+ foreach ( $res as $row ) {
+ $arr[$row->cl_to] = $row->cl_sortkey_prefix;
+ }
+
+ return $arr;
+ }
+
+ /**
+ * Get an array of existing interlanguage links, with the language code in the key and the
+ * title in the value.
+ *
+ * @return array
+ */
+ private function getExistingInterlangs() {
+ $res = $this->getDB()->select( 'langlinks', [ 'll_lang', 'll_title' ],
+ [ 'll_from' => $this->mId ], __METHOD__ );
+ $arr = [];
+ foreach ( $res as $row ) {
+ $arr[$row->ll_lang] = $row->ll_title;
+ }
+
+ return $arr;
+ }
+
+ /**
+ * Get an array of existing inline interwiki links, as a 2-D array
+ * @return array (prefix => array(dbkey => 1))
+ */
+ private function getExistingInterwikis() {
+ $res = $this->getDB()->select( 'iwlinks', [ 'iwl_prefix', 'iwl_title' ],
+ [ 'iwl_from' => $this->mId ], __METHOD__ );
+ $arr = [];
+ foreach ( $res as $row ) {
+ if ( !isset( $arr[$row->iwl_prefix] ) ) {
+ $arr[$row->iwl_prefix] = [];
+ }
+ $arr[$row->iwl_prefix][$row->iwl_title] = 1;
+ }
+
+ return $arr;
+ }
+
+ /**
+ * Get an array of existing categories, with the name in the key and sort key in the value.
+ *
+ * @return array Array of property names and values
+ */
+ private function getExistingProperties() {
+ $res = $this->getDB()->select( 'page_props', [ 'pp_propname', 'pp_value' ],
+ [ 'pp_page' => $this->mId ], __METHOD__ );
+ $arr = [];
+ foreach ( $res as $row ) {
+ $arr[$row->pp_propname] = $row->pp_value;
+ }
+
+ return $arr;
+ }
+
+ /**
+ * Return the title object of the page being updated
+ * @return Title
+ */
+ public function getTitle() {
+ return $this->mTitle;
+ }
+
+ /**
+ * Returns parser output
+ * @since 1.19
+ * @return ParserOutput
+ */
+ public function getParserOutput() {
+ return $this->mParserOutput;
+ }
+
+ /**
+ * Return the list of images used as generated by the parser
+ * @return array
+ */
+ public function getImages() {
+ return $this->mImages;
+ }
+
+ /**
+ * Set the revision corresponding to this LinksUpdate
+ *
+ * @since 1.27
+ *
+ * @param Revision $revision
+ */
+ public function setRevision( Revision $revision ) {
+ $this->mRevision = $revision;
+ }
+
+ /**
+ * @since 1.28
+ * @return null|Revision
+ */
+ public function getRevision() {
+ return $this->mRevision;
+ }
+
+ /**
+ * Set the User who triggered this LinksUpdate
+ *
+ * @since 1.27
+ * @param User $user
+ */
+ public function setTriggeringUser( User $user ) {
+ $this->user = $user;
+ }
+
+ /**
+ * @since 1.27
+ * @return null|User
+ */
+ public function getTriggeringUser() {
+ return $this->user;
+ }
+
+ /**
+ * Invalidate any necessary link lists related to page property changes
+ * @param array $changed
+ */
+ private function invalidateProperties( $changed ) {
+ global $wgPagePropLinkInvalidations;
+
+ foreach ( $changed as $name => $value ) {
+ if ( isset( $wgPagePropLinkInvalidations[$name] ) ) {
+ $inv = $wgPagePropLinkInvalidations[$name];
+ if ( !is_array( $inv ) ) {
+ $inv = [ $inv ];
+ }
+ foreach ( $inv as $table ) {
+ DeferredUpdates::addUpdate(
+ new HTMLCacheUpdate( $this->mTitle, $table, 'page-props' )
+ );
+ }
+ }
+ }
+ }
+
+ /**
+ * Fetch page links added by this LinksUpdate. Only available after the update is complete.
+ * @since 1.22
+ * @return null|array Array of Titles
+ */
+ public function getAddedLinks() {
+ if ( $this->linkInsertions === null ) {
+ return null;
+ }
+ $result = [];
+ foreach ( $this->linkInsertions as $insertion ) {
+ $result[] = Title::makeTitle( $insertion['pl_namespace'], $insertion['pl_title'] );
+ }
+
+ return $result;
+ }
+
+ /**
+ * Fetch page links removed by this LinksUpdate. Only available after the update is complete.
+ * @since 1.22
+ * @return null|array Array of Titles
+ */
+ public function getRemovedLinks() {
+ if ( $this->linkDeletions === null ) {
+ return null;
+ }
+ $result = [];
+ foreach ( $this->linkDeletions as $ns => $titles ) {
+ foreach ( $titles as $title => $unused ) {
+ $result[] = Title::makeTitle( $ns, $title );
+ }
+ }
+
+ return $result;
+ }
+
+ /**
+ * Fetch page properties added by this LinksUpdate.
+ * Only available after the update is complete.
+ * @since 1.28
+ * @return null|array
+ */
+ public function getAddedProperties() {
+ return $this->propertyInsertions;
+ }
+
+ /**
+ * Fetch page properties removed by this LinksUpdate.
+ * Only available after the update is complete.
+ * @since 1.28
+ * @return null|array
+ */
+ public function getRemovedProperties() {
+ return $this->propertyDeletions;
+ }
+
+ /**
+ * Update links table freshness
+ */
+ private function updateLinksTimestamp() {
+ if ( $this->mId ) {
+ // The link updates made here only reflect the freshness of the parser output
+ $timestamp = $this->mParserOutput->getCacheTime();
+ $this->getDB()->update( 'page',
+ [ 'page_links_updated' => $this->getDB()->timestamp( $timestamp ) ],
+ [ 'page_id' => $this->mId ],
+ __METHOD__
+ );
+ }
+ }
+
+ /**
+ * @return IDatabase
+ */
+ private function getDB() {
+ if ( !$this->db ) {
+ $this->db = wfGetDB( DB_MASTER );
+ }
+
+ return $this->db;
+ }
+
+ public function getAsJobSpecification() {
+ if ( $this->user ) {
+ $userInfo = [
+ 'userId' => $this->user->getId(),
+ 'userName' => $this->user->getName(),
+ ];
+ } else {
+ $userInfo = false;
+ }
+
+ if ( $this->mRevision ) {
+ $triggeringRevisionId = $this->mRevision->getId();
+ } else {
+ $triggeringRevisionId = false;
+ }
+
+ return [
+ 'wiki' => WikiMap::getWikiIdFromDomain( $this->getDB()->getDomainID() ),
+ 'job' => new JobSpecification(
+ 'refreshLinksPrioritized',
+ [
+ // Reuse the parser cache if it was saved
+ 'rootJobTimestamp' => $this->mParserOutput->getCacheTime(),
+ 'useRecursiveLinksUpdate' => $this->mRecursive,
+ 'triggeringUser' => $userInfo,
+ 'triggeringRevisionId' => $triggeringRevisionId,
+ 'causeAction' => $this->getCauseAction(),
+ 'causeAgent' => $this->getCauseAgent()
+ ],
+ [ 'removeDuplicates' => true ],
+ $this->getTitle()
+ )
+ ];
+ }
+}
diff --git a/www/wiki/includes/deferred/MWCallableUpdate.php b/www/wiki/includes/deferred/MWCallableUpdate.php
new file mode 100644
index 00000000..9803b7a4
--- /dev/null
+++ b/www/wiki/includes/deferred/MWCallableUpdate.php
@@ -0,0 +1,47 @@
+<?php
+
+use Wikimedia\Rdbms\IDatabase;
+
+/**
+ * Deferrable Update for closure/callback
+ */
+class MWCallableUpdate implements DeferrableUpdate, DeferrableCallback {
+ /** @var callable|null */
+ private $callback;
+ /** @var string */
+ private $fname;
+
+ /**
+ * @param callable $callback
+ * @param string $fname Calling method
+ * @param IDatabase|IDatabase[]|null $dbws Abort if any of the specified DB handles have
+ * a currently pending transaction which later gets rolled back [optional] (since 1.28)
+ */
+ public function __construct( callable $callback, $fname = 'unknown', $dbws = [] ) {
+ $this->callback = $callback;
+ $this->fname = $fname;
+
+ $dbws = is_array( $dbws ) ? $dbws : [ $dbws ];
+ foreach ( $dbws as $dbw ) {
+ if ( $dbw && $dbw->trxLevel() ) {
+ $dbw->onTransactionResolution( [ $this, 'cancelOnRollback' ], $fname );
+ }
+ }
+ }
+
+ public function doUpdate() {
+ if ( $this->callback ) {
+ call_user_func( $this->callback );
+ }
+ }
+
+ public function cancelOnRollback( $trigger ) {
+ if ( $trigger === IDatabase::TRIGGER_ROLLBACK ) {
+ $this->callback = null;
+ }
+ }
+
+ public function getOrigin() {
+ return $this->fname;
+ }
+}
diff --git a/www/wiki/includes/deferred/MergeableUpdate.php b/www/wiki/includes/deferred/MergeableUpdate.php
new file mode 100644
index 00000000..8eeef13b
--- /dev/null
+++ b/www/wiki/includes/deferred/MergeableUpdate.php
@@ -0,0 +1,16 @@
+<?php
+
+/**
+ * Interface that deferrable updates can implement. DeferredUpdates uses this to merge
+ * all pending updates of PHP class into a single update by calling merge().
+ *
+ * @since 1.27
+ */
+interface MergeableUpdate extends DeferrableUpdate {
+ /**
+ * Merge this update with $update
+ *
+ * @param MergeableUpdate $update Update of the same class type
+ */
+ function merge( MergeableUpdate $update );
+}
diff --git a/www/wiki/includes/deferred/SearchUpdate.php b/www/wiki/includes/deferred/SearchUpdate.php
new file mode 100644
index 00000000..2766bcb1
--- /dev/null
+++ b/www/wiki/includes/deferred/SearchUpdate.php
@@ -0,0 +1,225 @@
+<?php
+/**
+ * Search index updater
+ *
+ * See deferred.txt
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Search
+ */
+
+use MediaWiki\MediaWikiServices;
+
+/**
+ * Database independant search index updater
+ *
+ * @ingroup Search
+ */
+class SearchUpdate implements DeferrableUpdate {
+ /** @var int Page id being updated */
+ private $id = 0;
+
+ /** @var Title Title we're updating */
+ private $title;
+
+ /** @var Content|bool Content of the page (not text) */
+ private $content;
+
+ /** @var WikiPage **/
+ private $page;
+
+ /**
+ * @param int $id Page id to update
+ * @param Title|string $title Title of page to update
+ * @param Content|string|bool $c Content of the page to update. Default: false.
+ * If a Content object, text will be gotten from it. String is for back-compat.
+ * Passing false tells the backend to just update the title, not the content
+ */
+ public function __construct( $id, $title, $c = false ) {
+ if ( is_string( $title ) ) {
+ $nt = Title::newFromText( $title );
+ } else {
+ $nt = $title;
+ }
+
+ if ( $nt ) {
+ $this->id = $id;
+ // is_string() check is back-compat for ApprovedRevs
+ if ( is_string( $c ) ) {
+ $this->content = new TextContent( $c );
+ } else {
+ $this->content = $c ?: false;
+ }
+ $this->title = $nt;
+ } else {
+ wfDebug( "SearchUpdate object created with invalid title '$title'\n" );
+ }
+ }
+
+ /**
+ * Perform actual update for the entry
+ */
+ public function doUpdate() {
+ $config = MediaWikiServices::getInstance()->getSearchEngineConfig();
+
+ if ( $config->getConfig()->get( 'DisableSearchUpdate' ) || !$this->id ) {
+ return;
+ }
+
+ $seFactory = MediaWikiServices::getInstance()->getSearchEngineFactory();
+ foreach ( $config->getSearchTypes() as $type ) {
+ $search = $seFactory->create( $type );
+ if ( !$search->supports( 'search-update' ) ) {
+ continue;
+ }
+
+ $normalTitle = $this->getNormalizedTitle( $search );
+
+ if ( $this->getLatestPage() === null ) {
+ $search->delete( $this->id, $normalTitle );
+ continue;
+ } elseif ( $this->content === false ) {
+ $search->updateTitle( $this->id, $normalTitle );
+ continue;
+ }
+
+ $text = $search->getTextFromContent( $this->title, $this->content );
+ if ( !$search->textAlreadyUpdatedForIndex() ) {
+ $text = $this->updateText( $text, $search );
+ }
+
+ # Perform the actual update
+ $search->update( $this->id, $normalTitle, $search->normalizeText( $text ) );
+ }
+ }
+
+ /**
+ * Clean text for indexing. Only really suitable for indexing in databases.
+ * If you're using a real search engine, you'll probably want to override
+ * this behavior and do something nicer with the original wikitext.
+ * @param string $text
+ * @param SearchEngine $se Search engine
+ * @return string
+ */
+ public function updateText( $text, SearchEngine $se = null ) {
+ global $wgContLang;
+
+ # Language-specific strip/conversion
+ $text = $wgContLang->normalizeForSearch( $text );
+ $se = $se ?: MediaWikiServices::getInstance()->newSearchEngine();
+ $lc = $se->legalSearchChars() . '&#;';
+
+ $text = preg_replace( "/<\\/?\\s*[A-Za-z][^>]*?>/",
+ ' ', $wgContLang->lc( " " . $text . " " ) ); # Strip HTML markup
+ $text = preg_replace( "/(^|\\n)==\\s*([^\\n]+)\\s*==(\\s)/sD",
+ "\\1\\2 \\2 \\2\\3", $text ); # Emphasize headings
+
+ # Strip external URLs
+ $uc = "A-Za-z0-9_\\/:.,~%\\-+&;#?!=()@\\x80-\\xFF";
+ $protos = "http|https|ftp|mailto|news|gopher";
+ $pat = "/(^|[^\\[])({$protos}):[{$uc}]+([^{$uc}]|$)/";
+ $text = preg_replace( $pat, "\\1 \\3", $text );
+
+ $p1 = "/([^\\[])\\[({$protos}):[{$uc}]+]/";
+ $p2 = "/([^\\[])\\[({$protos}):[{$uc}]+\\s+([^\\]]+)]/";
+ $text = preg_replace( $p1, "\\1 ", $text );
+ $text = preg_replace( $p2, "\\1 \\3 ", $text );
+
+ # Internal image links
+ $pat2 = "/\\[\\[image:([{$uc}]+)\\.(gif|png|jpg|jpeg)([^{$uc}])/i";
+ $text = preg_replace( $pat2, " \\1 \\3", $text );
+
+ $text = preg_replace( "/([^{$lc}])([{$lc}]+)]]([a-z]+)/",
+ "\\1\\2 \\2\\3", $text ); # Handle [[game]]s
+
+ # Strip all remaining non-search characters
+ $text = preg_replace( "/[^{$lc}]+/", " ", $text );
+
+ /**
+ * Handle 's, s'
+ *
+ * $text = preg_replace( "/([{$lc}]+)'s /", "\\1 \\1's ", $text );
+ * $text = preg_replace( "/([{$lc}]+)s' /", "\\1s ", $text );
+ *
+ * These tail-anchored regexps are insanely slow. The worst case comes
+ * when Japanese or Chinese text (ie, no word spacing) is written on
+ * a wiki configured for Western UTF-8 mode. The Unicode characters are
+ * expanded to hex codes and the "words" are very long paragraph-length
+ * monstrosities. On a large page the above regexps may take over 20
+ * seconds *each* on a 1GHz-level processor.
+ *
+ * Following are reversed versions which are consistently fast
+ * (about 3 milliseconds on 1GHz-level processor).
+ */
+ $text = strrev( preg_replace( "/ s'([{$lc}]+)/", " s'\\1 \\1", strrev( $text ) ) );
+ $text = strrev( preg_replace( "/ 's([{$lc}]+)/", " s\\1", strrev( $text ) ) );
+
+ # Strip wiki '' and '''
+ $text = preg_replace( "/''[']*/", " ", $text );
+
+ return $text;
+ }
+
+ /**
+ * Get WikiPage for the SearchUpdate $id using WikiPage::READ_LATEST
+ * and ensure using the same WikiPage object if there are multiple
+ * SearchEngine types.
+ *
+ * Returns null if a page has been deleted or is not found.
+ *
+ * @return WikiPage|null
+ */
+ private function getLatestPage() {
+ if ( !isset( $this->page ) ) {
+ $this->page = WikiPage::newFromID( $this->id, WikiPage::READ_LATEST );
+ }
+
+ return $this->page;
+ }
+
+ /**
+ * Get a normalized string representation of a title suitable for
+ * including in a search index
+ *
+ * @param SearchEngine $search
+ * @return string A stripped-down title string ready for the search index
+ */
+ private function getNormalizedTitle( SearchEngine $search ) {
+ global $wgContLang;
+
+ $ns = $this->title->getNamespace();
+ $title = $this->title->getText();
+
+ $lc = $search->legalSearchChars() . '&#;';
+ $t = $wgContLang->normalizeForSearch( $title );
+ $t = preg_replace( "/[^{$lc}]+/", ' ', $t );
+ $t = $wgContLang->lc( $t );
+
+ # Handle 's, s'
+ $t = preg_replace( "/([{$lc}]+)'s( |$)/", "\\1 \\1's ", $t );
+ $t = preg_replace( "/([{$lc}]+)s'( |$)/", "\\1s ", $t );
+
+ $t = preg_replace( "/\\s+/", ' ', $t );
+
+ if ( $ns == NS_FILE ) {
+ $t = preg_replace( "/ (png|gif|jpg|jpeg|ogg)$/", "", $t );
+ }
+
+ return $search->normalizeText( trim( $t ) );
+ }
+}
diff --git a/www/wiki/includes/deferred/SiteStatsUpdate.php b/www/wiki/includes/deferred/SiteStatsUpdate.php
new file mode 100644
index 00000000..7cb29509
--- /dev/null
+++ b/www/wiki/includes/deferred/SiteStatsUpdate.php
@@ -0,0 +1,286 @@
+<?php
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+use MediaWiki\MediaWikiServices;
+use Wikimedia\Assert\Assert;
+use Wikimedia\Rdbms\IDatabase;
+
+/**
+ * Class for handling updates to the site_stats table
+ */
+class SiteStatsUpdate implements DeferrableUpdate, MergeableUpdate {
+ /** @var BagOStuff */
+ protected $stash;
+ /** @var int */
+ protected $edits = 0;
+ /** @var int */
+ protected $pages = 0;
+ /** @var int */
+ protected $articles = 0;
+ /** @var int */
+ protected $users = 0;
+ /** @var int */
+ protected $images = 0;
+
+ private static $counters = [ 'edits', 'pages', 'articles', 'users', 'images' ];
+
+ // @todo deprecate this constructor
+ function __construct( $views, $edits, $good, $pages = 0, $users = 0 ) {
+ $this->edits = $edits;
+ $this->articles = $good;
+ $this->pages = $pages;
+ $this->users = $users;
+
+ $this->stash = MediaWikiServices::getInstance()->getMainObjectStash();
+ }
+
+ public function merge( MergeableUpdate $update ) {
+ /** @var SiteStatsUpdate $update */
+ Assert::parameterType( __CLASS__, $update, '$update' );
+
+ foreach ( self::$counters as $field ) {
+ $this->$field += $update->$field;
+ }
+ }
+
+ /**
+ * @param array $deltas
+ * @return SiteStatsUpdate
+ */
+ public static function factory( array $deltas ) {
+ $update = new self( 0, 0, 0 );
+
+ foreach ( $deltas as $name => $unused ) {
+ if ( !in_array( $name, self::$counters ) ) { // T187585
+ throw new UnexpectedValueException( __METHOD__ . ": no field called '$name'" );
+ }
+ }
+
+ foreach ( self::$counters as $field ) {
+ if ( isset( $deltas[$field] ) && $deltas[$field] ) {
+ $update->$field = $deltas[$field];
+ }
+ }
+
+ return $update;
+ }
+
+ public function doUpdate() {
+ $this->doUpdateContextStats();
+
+ $rate = MediaWikiServices::getInstance()->getMainConfig()->get( 'SiteStatsAsyncFactor' );
+ // If set to do so, only do actual DB updates 1 every $rate times.
+ // The other times, just update "pending delta" values in memcached.
+ if ( $rate && ( $rate < 0 || mt_rand( 0, $rate - 1 ) != 0 ) ) {
+ $this->doUpdatePendingDeltas();
+ } else {
+ // Need a separate transaction because this a global lock
+ DeferredUpdates::addCallableUpdate( [ $this, 'tryDBUpdateInternal' ] );
+ }
+ }
+
+ /**
+ * Do not call this outside of SiteStatsUpdate
+ */
+ public function tryDBUpdateInternal() {
+ $services = MediaWikiServices::getInstance();
+ $config = $services->getMainConfig();
+
+ $dbw = $services->getDBLoadBalancer()->getConnection( DB_MASTER );
+ $lockKey = $dbw->getDomainID() . ':site_stats'; // prepend wiki ID
+ $pd = [];
+ if ( $config->get( 'SiteStatsAsyncFactor' ) ) {
+ // Lock the table so we don't have double DB/memcached updates
+ if ( !$dbw->lock( $lockKey, __METHOD__, 0 ) ) {
+ $this->doUpdatePendingDeltas();
+
+ return;
+ }
+ $pd = $this->getPendingDeltas();
+ // Piggy-back the async deltas onto those of this stats update....
+ $this->edits += ( $pd['ss_total_edits']['+'] - $pd['ss_total_edits']['-'] );
+ $this->articles += ( $pd['ss_good_articles']['+'] - $pd['ss_good_articles']['-'] );
+ $this->pages += ( $pd['ss_total_pages']['+'] - $pd['ss_total_pages']['-'] );
+ $this->users += ( $pd['ss_users']['+'] - $pd['ss_users']['-'] );
+ $this->images += ( $pd['ss_images']['+'] - $pd['ss_images']['-'] );
+ }
+
+ // Build up an SQL query of deltas and apply them...
+ $updates = '';
+ $this->appendUpdate( $updates, 'ss_total_edits', $this->edits );
+ $this->appendUpdate( $updates, 'ss_good_articles', $this->articles );
+ $this->appendUpdate( $updates, 'ss_total_pages', $this->pages );
+ $this->appendUpdate( $updates, 'ss_users', $this->users );
+ $this->appendUpdate( $updates, 'ss_images', $this->images );
+ if ( $updates != '' ) {
+ $dbw->update( 'site_stats', [ $updates ], [], __METHOD__ );
+ }
+
+ if ( $config->get( 'SiteStatsAsyncFactor' ) ) {
+ // Decrement the async deltas now that we applied them
+ $this->removePendingDeltas( $pd );
+ // Commit the updates and unlock the table
+ $dbw->unlock( $lockKey, __METHOD__ );
+ }
+
+ // Invalid cache used by parser functions
+ SiteStats::unload();
+ }
+
+ /**
+ * @param IDatabase $dbw
+ * @return bool|mixed
+ */
+ public static function cacheUpdate( IDatabase $dbw ) {
+ $services = MediaWikiServices::getInstance();
+ $config = $services->getMainConfig();
+
+ $dbr = $services->getDBLoadBalancer()->getConnection( DB_REPLICA, 'vslow' );
+ # Get non-bot users than did some recent action other than making accounts.
+ # If account creation is included, the number gets inflated ~20+ fold on enwiki.
+ $rcQuery = RecentChange::getQueryInfo();
+ $activeUsers = $dbr->selectField(
+ $rcQuery['tables'],
+ 'COUNT( DISTINCT ' . $rcQuery['fields']['rc_user_text'] . ' )',
+ [
+ 'rc_type != ' . $dbr->addQuotes( RC_EXTERNAL ), // Exclude external (Wikidata)
+ ActorMigration::newMigration()->isNotAnon( $rcQuery['fields']['rc_user'] ),
+ 'rc_bot' => 0,
+ 'rc_log_type != ' . $dbr->addQuotes( 'newusers' ) . ' OR rc_log_type IS NULL',
+ 'rc_timestamp >= ' . $dbr->addQuotes(
+ $dbr->timestamp( time() - $config->get( 'ActiveUserDays' ) * 24 * 3600 ) ),
+ ],
+ __METHOD__,
+ [],
+ $rcQuery['joins']
+ );
+ $dbw->update(
+ 'site_stats',
+ [ 'ss_active_users' => intval( $activeUsers ) ],
+ [ 'ss_row_id' => 1 ],
+ __METHOD__
+ );
+
+ // Invalid cache used by parser functions
+ SiteStats::unload();
+
+ return $activeUsers;
+ }
+
+ protected function doUpdateContextStats() {
+ $stats = MediaWikiServices::getInstance()->getStatsdDataFactory();
+ foreach ( [ 'edits', 'articles', 'pages', 'users', 'images' ] as $type ) {
+ $delta = $this->$type;
+ if ( $delta !== 0 ) {
+ $stats->updateCount( "site.$type", $delta );
+ }
+ }
+ }
+
+ protected function doUpdatePendingDeltas() {
+ $this->adjustPending( 'ss_total_edits', $this->edits );
+ $this->adjustPending( 'ss_good_articles', $this->articles );
+ $this->adjustPending( 'ss_total_pages', $this->pages );
+ $this->adjustPending( 'ss_users', $this->users );
+ $this->adjustPending( 'ss_images', $this->images );
+ }
+
+ /**
+ * @param string &$sql
+ * @param string $field
+ * @param int $delta
+ */
+ protected function appendUpdate( &$sql, $field, $delta ) {
+ if ( $delta ) {
+ if ( $sql ) {
+ $sql .= ',';
+ }
+ if ( $delta < 0 ) {
+ $sql .= "$field=$field-" . abs( $delta );
+ } else {
+ $sql .= "$field=$field+" . abs( $delta );
+ }
+ }
+ }
+
+ /**
+ * @param BagOStuff $stash
+ * @param string $type
+ * @param string $sign ('+' or '-')
+ * @return string
+ */
+ private function getTypeCacheKey( BagOStuff $stash, $type, $sign ) {
+ return $stash->makeKey( 'sitestatsupdate', 'pendingdelta', $type, $sign );
+ }
+
+ /**
+ * Adjust the pending deltas for a stat type.
+ * Each stat type has two pending counters, one for increments and decrements
+ * @param string $type
+ * @param int $delta Delta (positive or negative)
+ */
+ protected function adjustPending( $type, $delta ) {
+ if ( $delta < 0 ) { // decrement
+ $key = $this->getTypeCacheKey( $this->stash, $type, '-' );
+ } else { // increment
+ $key = $this->getTypeCacheKey( $this->stash, $type, '+' );
+ }
+
+ $magnitude = abs( $delta );
+ $this->stash->incrWithInit( $key, 0, $magnitude, $magnitude );
+ }
+
+ /**
+ * Get pending delta counters for each stat type
+ * @return array Positive and negative deltas for each type
+ */
+ protected function getPendingDeltas() {
+ $pending = [];
+ foreach ( [ 'ss_total_edits',
+ 'ss_good_articles', 'ss_total_pages', 'ss_users', 'ss_images' ] as $type
+ ) {
+ // Get pending increments and pending decrements
+ $flg = BagOStuff::READ_LATEST;
+ $pending[$type]['+'] = (int)$this->stash->get(
+ $this->getTypeCacheKey( $this->stash, $type, '+' ),
+ $flg
+ );
+ $pending[$type]['-'] = (int)$this->stash->get(
+ $this->getTypeCacheKey( $this->stash, $type, '-' ),
+ $flg
+ );
+ }
+
+ return $pending;
+ }
+
+ /**
+ * Reduce pending delta counters after updates have been applied
+ * @param array $pd Result of getPendingDeltas(), used for DB update
+ */
+ protected function removePendingDeltas( array $pd ) {
+ foreach ( $pd as $type => $deltas ) {
+ foreach ( $deltas as $sign => $magnitude ) {
+ // Lower the pending counter now that we applied these changes
+ $key = $this->getTypeCacheKey( $this->stash, $type, $sign );
+ $this->stash->decr( $key, $magnitude );
+ }
+ }
+ }
+}
diff --git a/www/wiki/includes/deferred/SqlDataUpdate.php b/www/wiki/includes/deferred/SqlDataUpdate.php
new file mode 100644
index 00000000..2411beff
--- /dev/null
+++ b/www/wiki/includes/deferred/SqlDataUpdate.php
@@ -0,0 +1,40 @@
+<?php
+/**
+ * Base code for update jobs that put some secondary data extracted
+ * from article content into the database.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+use Wikimedia\Rdbms\IDatabase;
+
+/**
+ * @deprecated Since 1.28 Use DataUpdate directly, injecting the database
+ */
+abstract class SqlDataUpdate extends DataUpdate {
+ /** @var IDatabase Database connection reference */
+ protected $mDb;
+ /** @var array SELECT options to be used (array) */
+ protected $mOptions = [];
+
+ public function __construct() {
+ parent::__construct();
+
+ $this->mDb = wfGetLB()->getLazyConnectionRef( DB_MASTER );
+ }
+}
diff --git a/www/wiki/includes/deferred/TransactionRoundDefiningUpdate.php b/www/wiki/includes/deferred/TransactionRoundDefiningUpdate.php
new file mode 100644
index 00000000..a32d4a07
--- /dev/null
+++ b/www/wiki/includes/deferred/TransactionRoundDefiningUpdate.php
@@ -0,0 +1,30 @@
+<?php
+
+/**
+ * Deferrable update that must run outside of any explicit LBFactory transaction round
+ *
+ * @since 1.31
+ */
+class TransactionRoundDefiningUpdate implements DeferrableUpdate, DeferrableCallback {
+ /** @var callable|null */
+ private $callback;
+ /** @var string */
+ private $fname;
+
+ /**
+ * @param callable $callback
+ * @param string $fname Calling method
+ */
+ public function __construct( callable $callback, $fname = 'unknown' ) {
+ $this->callback = $callback;
+ $this->fname = $fname;
+ }
+
+ public function doUpdate() {
+ call_user_func( $this->callback );
+ }
+
+ public function getOrigin() {
+ return $this->fname;
+ }
+}
diff --git a/www/wiki/includes/deferred/WANCacheReapUpdate.php b/www/wiki/includes/deferred/WANCacheReapUpdate.php
new file mode 100644
index 00000000..5ffc9388
--- /dev/null
+++ b/www/wiki/includes/deferred/WANCacheReapUpdate.php
@@ -0,0 +1,133 @@
+<?php
+
+use Psr\Log\LoggerInterface;
+use Wikimedia\Rdbms\IDatabase;
+
+/**
+ * Class for fixing stale WANObjectCache keys using a purge event source
+ *
+ * This is useful for expiring keys that missed fire-and-forget purges. This uses the
+ * recentchanges table as a reliable stream to make certain keys reach consistency
+ * as soon as the underlying replica database catches up. These means that critical
+ * keys will not escape getting purged simply due to brief hiccups in the network,
+ * which are more prone to happen accross datacenters.
+ *
+ * ----
+ * "I was trying to cheat death. I was only trying to surmount for a little while the
+ * darkness that all my life I surely knew was going to come rolling in on me some day
+ * and obliterate me. I was only to stay alive a little brief while longer, after I was
+ * already gone. To stay in the light, to be with the living, a little while past my time."
+ * -- Notes for "Blues of a Lifetime", by [[Cornell Woolrich]]
+ *
+ * @since 1.28
+ */
+class WANCacheReapUpdate implements DeferrableUpdate {
+ /** @var IDatabase */
+ private $db;
+ /** @var LoggerInterface */
+ private $logger;
+
+ /**
+ * @param IDatabase $db
+ * @param LoggerInterface $logger
+ */
+ public function __construct( IDatabase $db, LoggerInterface $logger ) {
+ $this->db = $db;
+ $this->logger = $logger;
+ }
+
+ function doUpdate() {
+ $reaper = new WANObjectCacheReaper(
+ ObjectCache::getMainWANInstance(),
+ ObjectCache::getLocalClusterInstance(),
+ [ $this, 'getTitleChangeEvents' ],
+ [ $this, 'getEventAffectedKeys' ],
+ [
+ 'channel' => 'table:recentchanges:' . $this->db->getDomainID(),
+ 'logger' => $this->logger
+ ]
+ );
+
+ $reaper->invoke( 100 );
+ }
+
+ /**
+ * @see WANObjectCacheRepear
+ *
+ * @param int $start
+ * @param int $id
+ * @param int $end
+ * @param int $limit
+ * @return TitleValue[]
+ */
+ public function getTitleChangeEvents( $start, $id, $end, $limit ) {
+ $db = $this->db;
+ $encStart = $db->addQuotes( $db->timestamp( $start ) );
+ $encEnd = $db->addQuotes( $db->timestamp( $end ) );
+ $id = (int)$id; // cast NULL => 0 since rc_id is an integer
+
+ $res = $db->select(
+ 'recentchanges',
+ [ 'rc_namespace', 'rc_title', 'rc_timestamp', 'rc_id' ],
+ [
+ $db->makeList( [
+ "rc_timestamp > $encStart",
+ "rc_timestamp = $encStart AND rc_id > " . $db->addQuotes( $id )
+ ], LIST_OR ),
+ "rc_timestamp < $encEnd"
+ ],
+ __METHOD__,
+ [ 'ORDER BY' => 'rc_timestamp ASC, rc_id ASC', 'LIMIT' => $limit ]
+ );
+
+ $events = [];
+ foreach ( $res as $row ) {
+ $events[] = [
+ 'id' => (int)$row->rc_id,
+ 'pos' => (int)wfTimestamp( TS_UNIX, $row->rc_timestamp ),
+ 'item' => new TitleValue( (int)$row->rc_namespace, $row->rc_title )
+ ];
+ }
+
+ return $events;
+ }
+
+ /**
+ * Gets a list of important cache keys associated with a title
+ *
+ * @see WANObjectCacheRepear
+ * @param WANObjectCache $cache
+ * @param TitleValue $t
+ * @return string[]
+ */
+ public function getEventAffectedKeys( WANObjectCache $cache, TitleValue $t ) {
+ /** @var WikiPage[]|LocalFile[]|User[] $entities */
+ $entities = [];
+
+ // You can't create a WikiPage for special pages (-1) or other virtual
+ // namespaces, but special pages do appear in RC sometimes, e.g. for logs
+ // of AbuseFilter filter changes.
+ if ( $t->getNamespace() >= 0 ) {
+ $entities[] = WikiPage::factory( Title::newFromTitleValue( $t ) );
+ }
+
+ if ( $t->inNamespace( NS_FILE ) ) {
+ $entities[] = wfLocalFile( $t->getText() );
+ }
+ if ( $t->inNamespace( NS_USER ) ) {
+ $entities[] = User::newFromName( $t->getText(), false );
+ }
+
+ $keys = [];
+ foreach ( $entities as $entity ) {
+ if ( $entity ) {
+ $keys = array_merge( $keys, $entity->getMutableCacheKeys( $cache ) );
+ }
+ }
+ if ( $keys ) {
+ $this->logger->debug( __CLASS__ . ': got key(s) ' . implode( ', ', $keys ) );
+ }
+
+ return $keys;
+ }
+}