summaryrefslogtreecommitdiff
path: root/www/wiki/extensions/SemanticMediaWiki/src/MediaWiki/Jobs/ParserCachePurgeJob.php
diff options
context:
space:
mode:
Diffstat (limited to 'www/wiki/extensions/SemanticMediaWiki/src/MediaWiki/Jobs/ParserCachePurgeJob.php')
-rw-r--r--www/wiki/extensions/SemanticMediaWiki/src/MediaWiki/Jobs/ParserCachePurgeJob.php264
1 files changed, 264 insertions, 0 deletions
diff --git a/www/wiki/extensions/SemanticMediaWiki/src/MediaWiki/Jobs/ParserCachePurgeJob.php b/www/wiki/extensions/SemanticMediaWiki/src/MediaWiki/Jobs/ParserCachePurgeJob.php
new file mode 100644
index 00000000..333c5b45
--- /dev/null
+++ b/www/wiki/extensions/SemanticMediaWiki/src/MediaWiki/Jobs/ParserCachePurgeJob.php
@@ -0,0 +1,264 @@
+<?php
+
+namespace SMW\MediaWiki\Jobs;
+
+use SMW\MediaWiki\Job;
+use Hooks;
+use SMW\ApplicationFactory;
+use SMW\HashBuilder;
+use SMW\RequestOptions;
+use SMW\SQLStore\QueryDependencyLinksStoreFactory;
+use SMW\Utils\Timer;
+use SMW\DIWikiPage;
+use SMWQuery as Query;
+use Title;
+
+/**
+ * @license GNU GPL v2+
+ * @since 2.3
+ *
+ * @author mwjames
+ */
+class ParserCachePurgeJob extends Job {
+
+ /**
+ * A balanced size that should be carefully monitored in order to not have a
+ * negative impact when running the initial update in online mode.
+ */
+ const CHUNK_SIZE = 300;
+
+ /**
+ * Using DB update execution mode to immediately execute the purge which may
+ * cause a surge in DB inserts.
+ */
+ const EXEC_DB = 'exec.db';
+
+ /**
+ * Using journal update execution mode to pause the execution and temporary
+ * store until an actual page is viewed.
+ */
+ const EXEC_JOURNAL = 'exec.journal';
+
+ /**
+ * @var ApplicationFactory
+ */
+ protected $applicationFactory;
+
+ /**
+ * @var integer
+ */
+ private $limit = self::CHUNK_SIZE;
+
+ /**
+ * @var integer
+ */
+ private $offset = 0;
+
+ /**
+ * @var PageUpdater
+ */
+ protected $pageUpdater;
+
+ /**
+ * @since 2.3
+ *
+ * @param Title $title
+ * @param array $params job parameters
+ */
+ public function __construct( Title $title, $params = [] ) {
+ parent::__construct( 'smw.parserCachePurge', $title, $params );
+ $this->removeDuplicates = true;
+ }
+
+ /**
+ * @see Job::run
+ */
+ public function insert() {
+
+ if (
+ $this->hasParameter( 'is.enabled' ) &&
+ $this->getParameter( 'is.enabled' ) === false ) {
+ return;
+ }
+
+ parent::insert();
+ }
+
+ /**
+ * @see Job::run
+ *
+ * @since 2.3
+ */
+ public function run() {
+
+ Timer::start( __METHOD__ );
+ $this->applicationFactory = ApplicationFactory::getInstance();
+ $this->pageUpdater = $this->applicationFactory->newPageUpdater();
+
+ $count = 0;
+ $linksCount = 0;
+
+ if ( $this->hasParameter( 'limit' ) ) {
+ $this->limit = $this->getParameter( 'limit' );
+ }
+
+ if ( $this->hasParameter( 'offset' ) ) {
+ $this->offset = $this->getParameter( 'offset' );
+ }
+
+ if ( $this->hasParameter( 'idlist' ) ) {
+ $this->purgeTargetLinksFromList( $this->getParameter( 'idlist' ), $count, $linksCount );
+ }
+
+ if ( $this->getParameter( 'exec.mode' ) !== self::EXEC_JOURNAL ) {
+ $this->pageUpdater->addPage( $this->getTitle() );
+ $this->pageUpdater->setOrigin( __METHOD__ );
+ $this->pageUpdater->doPurgeParserCacheAsPool();
+ }
+
+ Hooks::run( 'SMW::Job::AfterParserCachePurgeComplete', [ $this ] );
+
+ $this->applicationFactory->getMediaWikiLogger()->info(
+ [
+ 'Job',
+ "ParserCachePurgeJob",
+ "List count:{count}",
+ "Links count:{linksCount}",
+ "Limit:{limit}",
+ "Offset:{offset}",
+ "procTime in sec: {procTime}"
+ ],
+ [
+ 'method' => __METHOD__,
+ 'role' => 'user',
+ 'procTime' => Timer::getElapsedTime( __METHOD__, 7 ),
+ 'limit' => $this->limit,
+ 'offset' => $this->offset,
+ 'count' => $count,
+ 'linksCount' => $linksCount
+ ]
+ );
+
+ return true;
+ }
+
+ /**
+ * Based on the CHUNK_SIZE, target links are purged in an instant if those
+ * selected entities are < CHUNK_SIZE which should be enough for most
+ * common queries that only share a limited amount of dependencies, yet for
+ * queries that expect a large subject/dependency pool, doing an online update
+ * for all at once is not feasible hence the iterative process of creating
+ * batches that run through the job scheduler.
+ *
+ * @param array|string $idList
+ */
+ private function purgeTargetLinksFromList( $idList, &$listCount, &$linksCount ) {
+
+ if ( is_string( $idList ) && strpos( $idList, '|' ) !== false ) {
+ $idList = explode( '|', $idList );
+ }
+
+ if ( $idList === [] ) {
+ return true;
+ }
+
+ $queryDependencyLinksStoreFactory = $this->applicationFactory->singleton(
+ 'QueryDependencyLinksStoreFactory'
+ );
+
+ $queryDependencyLinksStore = $queryDependencyLinksStoreFactory->newQueryDependencyLinksStore(
+ $this->applicationFactory->getStore()
+ );
+
+ $dependencyLinksUpdateJournal = $queryDependencyLinksStoreFactory->newDependencyLinksUpdateJournal();
+
+ $requestOptions = new RequestOptions();
+
+ // +1 to look ahead
+ $requestOptions->setLimit( $this->limit + 1 );
+ $requestOptions->setOffset( $this->offset );
+ $requestOptions->setOption( 'links.count', 0 );
+
+ $hashList = $queryDependencyLinksStore->findDependencyTargetLinks(
+ $idList,
+ $requestOptions
+ );
+
+ $linksCount = $requestOptions->getOption( 'links.count' );
+
+ // If more results are available then use an iterative increase to fetch
+ // the remaining updates by creating successive jobs
+ if ( $linksCount > $this->limit ) {
+ $job = new self(
+ $this->getTitle(),
+ [
+ 'idlist' => $idList,
+ 'limit' => $this->limit,
+ 'offset' => $this->offset + self::CHUNK_SIZE,
+ 'exec.mode' => $this->getParameter( 'exec.mode' )
+ ]
+ );
+
+ $job->run();
+ }
+
+ if ( $hashList === [] ) {
+ return true;
+ }
+
+ list( $hashList, $queryList ) = $this->splitList( $hashList );
+ $listCount = count( $hashList );
+
+ $cachedQueryResultPrefetcher = $this->applicationFactory->singleton(
+ 'CachedQueryResultPrefetcher'
+ );
+
+ $cachedQueryResultPrefetcher->resetCacheBy(
+ $queryList,
+ 'ParserCachePurgeJob'
+ );
+
+ if ( $this->getParameter( 'exec.mode' ) === self::EXEC_JOURNAL ) {
+ $dependencyLinksUpdateJournal->updateFromList( $hashList, $this->getTitle()->getLatestRevID() );
+ } else{
+ $this->addPagesToUpdater( $hashList );
+ }
+ }
+
+ public function splitList( $hashList ) {
+
+ $targetLinksList = [];
+ $queryList = [];
+
+ foreach ( $hashList as $hash ) {
+
+ if ( $hash instanceof DIWikiPage ) {
+ $hash = $hash->getHash();
+ }
+
+ list( $title, $namespace, $iw, $subobjectname ) = explode( '#', $hash, 4 );
+
+ // QueryResultCache stores queries with they queryID = $subobjectname
+ if ( strpos( $subobjectname, Query::ID_PREFIX ) !== false ) {
+ $queryList[$subobjectname] = true;
+ }
+
+ // We make an assumption (as we avoid to query the DB) about that a
+ // query is bind to its subject by simply removing the subobject
+ // identifier (_QUERY*) and creating the base (or root) subject for
+ // the selected target (embedded query)
+ $targetLinksList[HashBuilder::createHashIdFromSegments( $title, $namespace, $iw )] = true;
+ }
+
+ return [ array_keys( $targetLinksList ), array_keys( $queryList ) ];
+ }
+
+ private function addPagesToUpdater( array $hashList ) {
+ foreach ( $hashList as $hash ) {
+ $this->pageUpdater->addPage(
+ HashBuilder::newTitleFromHash( $hash )
+ );
+ }
+ }
+
+}