summaryrefslogtreecommitdiff
path: root/www/wiki/extensions/SemanticMediaWiki/src/SQLStore/EntityRebuildDispatcher.php
diff options
context:
space:
mode:
Diffstat (limited to 'www/wiki/extensions/SemanticMediaWiki/src/SQLStore/EntityRebuildDispatcher.php')
-rw-r--r--www/wiki/extensions/SemanticMediaWiki/src/SQLStore/EntityRebuildDispatcher.php512
1 files changed, 512 insertions, 0 deletions
diff --git a/www/wiki/extensions/SemanticMediaWiki/src/SQLStore/EntityRebuildDispatcher.php b/www/wiki/extensions/SemanticMediaWiki/src/SQLStore/EntityRebuildDispatcher.php
new file mode 100644
index 00000000..7ee18256
--- /dev/null
+++ b/www/wiki/extensions/SemanticMediaWiki/src/SQLStore/EntityRebuildDispatcher.php
@@ -0,0 +1,512 @@
+<?php
+
+namespace SMW\SQLStore;
+
+use Hooks;
+use SMW\ApplicationFactory;
+use SMW\DIWikiPage;
+use SMW\PropertyRegistry;
+use SMW\SemanticData;
+use SMW\Utils\Lru;
+use SMW\MediaWiki\TitleFactory;
+use Title;
+
+/**
+ * @private
+ *
+ * @license GNU GPL v2+
+ * @since 2.3
+ *
+ * @author Markus Krötzsch
+ * @author Jeroen De Dauw
+ * @author Nischay Nahata
+ * @author mwjames
+ */
+class EntityRebuildDispatcher {
+
+ /**
+ * @var SQLStore
+ */
+ private $store;
+
+ /**
+ * @var TitleFactory
+ */
+ private $titleFactory;
+
+ /**
+ * @var PropertyTableIdReferenceDisposer
+ */
+ private $propertyTableIdReferenceDisposer;
+
+ /**
+ * @var JobFactory
+ */
+ private $jobFactory;
+
+ /**
+ * @var NamespaceExaminer
+ */
+ private $namespaceExaminer;
+
+ /**
+ * @var array
+ */
+ private $options;
+
+ /**
+ * @var array|false
+ */
+ private $namespaces = false;
+
+ /**
+ * @var integer
+ */
+ private $iterationLimit = 1;
+
+ /**
+ * @var integer
+ */
+ private $progress = 1;
+
+ /**
+ * @var array
+ */
+ private $dispatchedEntities = [];
+
+ /**
+ * @var array
+ */
+ private $updateJobs = [];
+
+ /**
+ * @var Lru
+ */
+ private $lru;
+
+ /**
+ * @since 2.3
+ *
+ * @param SQLStore $store
+ * @param TitleFactory $titleFactory
+ */
+ public function __construct( SQLStore $store, TitleFactory $titleFactory ) {
+ $this->store = $store;
+ $this->titleFactory = $titleFactory;
+ $this->propertyTableIdReferenceDisposer = new PropertyTableIdReferenceDisposer( $store );
+ $this->jobFactory = ApplicationFactory::getInstance()->newJobFactory();
+ $this->namespaceExaminer = ApplicationFactory::getInstance()->getNamespaceExaminer();
+ $this->lru = new Lru( 10000 );
+ }
+
+ /**
+ * @since 2.3
+ *
+ * @param array $options
+ */
+ public function setOptions( array $options ) {
+ $this->options = $options;
+ }
+
+ /**
+ * @since 2.3
+ *
+ * @param array|false $namespaces
+ */
+ public function setRestrictionToNamespaces( $namespaces ) {
+ $this->namespaces = $namespaces;
+ }
+
+ /**
+ * @since 2.3
+ *
+ * @param integer $iterationLimit
+ */
+ public function setDispatchRangeLimit( $iterationLimit ) {
+ $this->iterationLimit = (int)$iterationLimit;
+ }
+
+ /**
+ * @since 2.3
+ *
+ * @return integer
+ */
+ public function getMaxId() {
+
+ $db = $this->store->getConnection( 'mw.db' );
+
+ $maxByPageId = (int)$db->selectField(
+ 'page',
+ 'MAX(page_id)',
+ '',
+ __METHOD__
+ );
+
+ $maxBySmwId = (int)$db->selectField(
+ \SMWSql3SmwIds::TABLE_NAME,
+ 'MAX(smw_id)',
+ '',
+ __METHOD__
+ );
+
+ return max( $maxByPageId, $maxBySmwId );
+ }
+
+ /**
+ * Decimal between 0 and 1 to indicate the overall progress of the rebuild
+ * process
+ *
+ * @since 2.3
+ *
+ * @return integer
+ */
+ public function getEstimatedProgress() {
+ return $this->progress;
+ }
+
+ /**
+ * @since 2.4
+ *
+ * @return array
+ */
+ public function getDispatchedEntities() {
+ return $this->dispatchedEntities;
+ }
+
+ /**
+ * Dispatching of a single or a chunk of ids in either online or batch mode
+ * using the JoblruScheduler
+ *
+ * @since 2.3
+ *
+ * @param integer &$id
+ */
+ public function rebuild( &$id ) {
+
+ $this->updateJobs = [];
+ $this->dispatchedEntities = [];
+
+ // was nothing done in this run?
+ $emptyRange = true;
+
+ $this->match_title( $id );
+
+ if ( $this->updateJobs !== [] ) {
+ $emptyRange = false;
+ }
+
+ $this->match_subject( $id, $emptyRange );
+
+ // Deprecated since 2.3, use 'SMW::SQLStore::BeforeDataRebuildJobInsert'
+ \Hooks::run('smwRefreshDataJobs', [ &$this->updateJobs ] );
+
+ Hooks::run( 'SMW::SQLStore::BeforeDataRebuildJobInsert', [ $this->store, &$this->updateJobs ] );
+
+ if ( isset( $this->options['use-job'] ) && $this->options['use-job'] ) {
+ $this->jobFactory->batchInsert( $this->updateJobs );
+ } else {
+ foreach ( $this->updateJobs as $job ) {
+ $job->run();
+ }
+ }
+
+ // -1 means that no next position is available
+ $this->next_position( $id, $emptyRange );
+
+ return $this->progress = $id > 0 ? $id / $this->getMaxId() : 1;
+ }
+
+ /**
+ * @param integer $id
+ * @param UpdateJob[] &$updateJobs
+ */
+ private function match_title( $id ) {
+
+ // Update by MediaWiki page id --> make sure we get all pages.
+ $tids = [];
+
+ // Array of ids
+ for ( $i = $id; $i < $id + $this->iterationLimit; $i++ ) {
+ $tids[] = $i;
+ }
+
+ $titles = $this->titleFactory->newFromIDs( $tids );
+
+ foreach ( $titles as $title ) {
+
+ if ( $this->lru->get( $title->getDBKey() . '#' . $title->getNamespace() ) !== null ) {
+ continue;
+ }
+
+ if ( ( $this->namespaces == false ) || ( in_array( $title->getNamespace(), $this->namespaces ) ) ) {
+ $this->add_update( $title );
+ }
+
+ $this->dispatchedEntities[] = [ 't' => $title->getPrefixedDBKey() ];
+ }
+ }
+
+ private function match_subject( $id, &$emptyRange ) {
+
+ // update by internal SMW id --> make sure we get all objects in SMW
+ $db = $this->store->getConnection( 'mw.db' );
+
+ // MW 1.29+ "Exception thrown with an uncommitted database transaction ...
+ // MWCallableUpdate::doUpdate: transaction round 'SMW\MediaWiki\Jobs\RefreshJob::run' already started"
+ $this->propertyTableIdReferenceDisposer->waitOnTransactionIdle();
+
+ $res = $db->select(
+ \SMWSql3SmwIds::TABLE_NAME,
+ [
+ 'smw_id',
+ 'smw_title',
+ 'smw_namespace',
+ 'smw_iw',
+ 'smw_subobject',
+ 'smw_sortkey',
+ 'smw_proptable_hash',
+ 'smw_rev'
+ ],
+ [
+ "smw_id >= $id ",
+ " smw_id < " . $db->addQuotes( $id + $this->iterationLimit )
+ ],
+ __METHOD__
+ );
+
+ foreach ( $res as $row ) {
+ $emptyRange = false; // note this even if no jobs were created
+
+ if ( $this->namespaces && !in_array( $row->smw_namespace, $this->namespaces ) ) {
+ continue;
+ }
+
+ // If the reference is for some reason created as part of a not
+ // supported namespace, check and clean it!
+ //
+ // The check is required to ensure that annotations let's say
+ // [[Foo::SomeNS:Bar]] (where SomeNS is not enabled for SMW) are not
+ // removed and is kept as long as a reference to `SomeNS:Bar` exists
+ if ( !$this->namespaceExaminer->isSemanticEnabled( (int)$row->smw_namespace ) ) {
+ $this->propertyTableIdReferenceDisposer->removeOutdatedEntityReferencesById( $row->smw_id );
+ continue;
+ }
+
+ // Find page to refresh, even for special properties:
+ if ( $row->smw_title != '' && $row->smw_title{0} != '_' ) {
+ $titleKey = $row->smw_title;
+ } elseif ( $row->smw_namespace == SMW_NS_PROPERTY && $row->smw_iw == '' && $row->smw_subobject == '' ) {
+ $titleKey = str_replace( ' ', '_', PropertyRegistry::getInstance()->findCanonicalPropertyLabelById( $row->smw_title ) );
+ } else {
+ $titleKey = '';
+ }
+
+ $hash = $titleKey . '#' . $row->smw_namespace;
+
+ if ( $row->smw_subobject !== '' && $row->smw_iw !== SMW_SQL3_SMWDELETEIW ) {
+
+ $title = $this->titleFactory->makeTitleSafe( $row->smw_namespace, $titleKey );
+
+ // Remove tangling subobjects without a real page (created by a
+ // page preview etc.) otherwise leave subobjects alone; they ought
+ // to be changed with their pages
+ if ( $title !== null && !$title->exists() ) {
+ $this->propertyTableIdReferenceDisposer->cleanUpTableEntriesById( $row->smw_id );
+ } else {
+ $this->dispatchedEntities[] = [ 's' => $row->smw_title . '#' . $row->smw_namespace . '#' .$row->smw_subobject ];
+ }
+ } elseif ( $this->isPlainObjectValue( $row ) ) {
+ $this->propertyTableIdReferenceDisposer->removeOutdatedEntityReferencesById( $row->smw_id );
+ } elseif ( $row->smw_iw === '' && $titleKey != '' ) {
+
+ if ( $this->lru->get( $hash ) !== null ) {
+ continue;
+ }
+
+ // objects representing pages
+ $title = $this->titleFactory->makeTitleSafe( $row->smw_namespace, $titleKey );
+
+ if ( $title !== null ) {
+ $this->dispatchedEntities[] = [ 's' => $title->getPrefixedDBKey() ];
+ $this->add_update( $title, $row );
+ }
+ } elseif ( $row->smw_iw == SMW_SQL3_SMWREDIIW && $titleKey != '' ) {
+
+ if ( $this->lru->get( $hash ) !== null ) {
+ continue;
+ }
+
+ // TODO: special treatment of redirects needed, since the store will
+ // not act on redirects that did not change according to its records
+ $title = $this->titleFactory->makeTitleSafe( $row->smw_namespace, $titleKey );
+
+ if ( $title !== null && !$title->exists() ) {
+ $this->dispatchedEntities[] = [ 's' => $title->getPrefixedDBKey() ];
+ $this->add_update( $title, $row );
+ }
+
+ $this->propertyTableIdReferenceDisposer->cleanUpTableEntriesById( $row->smw_id );
+ } elseif ( $row->smw_iw == SMW_SQL3_SMWIW_OUTDATED || $row->smw_iw == SMW_SQL3_SMWDELETEIW ) { // remove outdated internal object references
+ $this->propertyTableIdReferenceDisposer->cleanUpTableEntriesById( $row->smw_id );
+ } elseif ( $titleKey != '' ) { // "normal" interwiki pages or outdated internal objects -- delete
+
+ if ( $this->lru->get( $hash ) !== null ) {
+ continue;
+ }
+
+ $subject = new DIWikiPage( $titleKey, $row->smw_namespace, $row->smw_iw );
+ $this->store->updateData( new SemanticData( $subject ) );
+ $this->dispatchedEntities[] = [ 's' => $subject ];
+ }
+
+ if ( $row->smw_namespace == SMW_NS_PROPERTY && $row->smw_iw == '' && $row->smw_subobject == '' ) {
+ $this->findDuplicateProperties( $row );
+ }
+ }
+
+ $db->freeResult( $res );
+ }
+
+ private function isPlainObjectValue( $row ) {
+
+ // A rogue title should never happen
+ if ( $row->smw_title === '' && $row->smw_proptable_hash === null ) {
+ return true;
+ }
+
+ return $row->smw_iw != SMW_SQL3_SMWDELETEIW &&
+ $row->smw_iw != SMW_SQL3_SMWREDIIW &&
+ $row->smw_iw != SMW_SQL3_SMWIW_OUTDATED &&
+ // Leave any pre-defined property (_...) untouched
+ $row->smw_title != '' &&
+ $row->smw_title{0} != '_' &&
+ // smw_proptable_hash === null means it is not a subject but an object value
+ $row->smw_proptable_hash === null;
+ }
+
+ private function findDuplicateProperties( $row ) {
+
+ $db = $this->store->getConnection( 'mw.db' );
+
+ // Use the sortkey (comparing the label and not the "_..." key) in order
+ // to match possible duplicate properties by label (not by key)
+ $duplicates = $db->select(
+ \SMWSql3SmwIds::TABLE_NAME,
+ [
+ 'smw_id',
+ 'smw_title' ],
+ [
+ "smw_id !=" . $db->addQuotes( $row->smw_id ),
+ "smw_sortkey =" . $db->addQuotes( $row->smw_sortkey ),
+ "smw_namespace =" . $row->smw_namespace,
+ "smw_subobject =" . $db->addQuotes( $row->smw_subobject )
+ ],
+ __METHOD__,
+ [
+ 'ORDER BY' => "smw_id ASC"
+ ]
+ );
+
+ if ( $duplicates === false ) {
+ return;
+ }
+
+ // Instead of copying ID's across DB tables have the re-parse to ensure
+ // that all property value ID's are reassigned together while the duplicate
+ // is marked for removal until the next run
+ foreach ( $duplicates as $duplicate ) {
+
+ // If titles don't match then continue because it could be that
+ // Property:Foo with displaytitle foobar -> sortkey ->foobar
+ // Property:Bar with displaytitle foobar -> sortkey ->foobar
+ if ( $row->smw_title !== $duplicate->smw_title ) {
+ continue;
+ }
+
+ $this->store->getObjectIds()->updateInterwikiField(
+ $duplicate->smw_id,
+ new DIWikiPage( $row->smw_title, $row->smw_namespace, SMW_SQL3_SMWDELETEIW )
+ );
+ }
+ }
+
+ private function next_position( &$id, $emptyRange ) {
+
+ $nextPosition = $id + $this->iterationLimit;
+ $db = $this->store->getConnection( 'mw.db' );
+
+ // nothing found, check if there will be more pages later on
+ if ( $emptyRange && $nextPosition > \SMWSql3SmwIds::FXD_PROP_BORDER_ID ) {
+
+ $nextByPageId = (int)$db->selectField(
+ 'page',
+ 'page_id',
+ "page_id >= $nextPosition",
+ __METHOD__,
+ [
+ 'ORDER BY' => "page_id ASC"
+ ]
+ );
+
+ $nextBySmwId = (int)$db->selectField(
+ \SMWSql3SmwIds::TABLE_NAME,
+ 'smw_id',
+ "smw_id >= $nextPosition",
+ __METHOD__,
+ [
+ 'ORDER BY' => "smw_id ASC"
+ ]
+ );
+
+ // Next position is determined by the pool with the maxId
+ $nextPosition = $nextBySmwId != 0 && $nextBySmwId > $nextByPageId ? $nextBySmwId : $nextByPageId;
+ }
+
+ $id = $nextPosition ? $nextPosition : -1;
+ }
+
+ private function add_update( $title, $row = false ) {
+
+ $hash = $title->getDBKey() . '#' . $title->getNamespace();
+ $this->lru->set( $hash, true );
+
+ if ( isset( $this->options['revision-mode'] ) && $this->options['revision-mode'] && !$this->options['force-update'] && $this->matchesLatestRevID( $title, $row ) ) {
+ return $this->dispatchedEntities[] = [ 'skipped' => $title->getPrefixedDBKey() ];
+ }
+
+ $params = [
+ 'origin' => 'EntityRebuildDispatcher'
+ ];
+
+ if ( isset( $this->options['shallow-update'] ) && $this->options['shallow-update'] ) {
+ $params += [ 'shallowUpdate' => true ];
+ } elseif ( isset( $this->options['force-update'] ) && $this->options['force-update'] ) {
+ $params += [ 'forcedUpdate' => true ];
+ }
+
+ $updateJob = $this->jobFactory->newUpdateJob(
+ $title,
+ $params
+ );
+
+ $this->updateJobs[] = $updateJob;
+ }
+
+ private function matchesLatestRevID( $title, $row = false ) {
+
+ $latestRevID = $title->getLatestRevID( Title::GAID_FOR_UPDATE );
+
+ if ( $row !== false ) {
+ return $latestRevID == $row->smw_rev;
+ };
+
+ $rev = $this->store->getObjectIds()->findAssociatedRev(
+ $title->getDBKey(),
+ $title->getNamespace(),
+ $title->getInterwiki()
+ );
+
+ return $latestRevID == $rev;
+ }
+
+}