diff options
Diffstat (limited to 'www/wiki/extensions/SemanticMediaWiki/src/SQLStore/EntityStore/SemanticDataLookup.php')
-rw-r--r-- | www/wiki/extensions/SemanticMediaWiki/src/SQLStore/EntityStore/SemanticDataLookup.php | 478 |
1 files changed, 478 insertions, 0 deletions
diff --git a/www/wiki/extensions/SemanticMediaWiki/src/SQLStore/EntityStore/SemanticDataLookup.php b/www/wiki/extensions/SemanticMediaWiki/src/SQLStore/EntityStore/SemanticDataLookup.php new file mode 100644 index 00000000..ca1d95fa --- /dev/null +++ b/www/wiki/extensions/SemanticMediaWiki/src/SQLStore/EntityStore/SemanticDataLookup.php @@ -0,0 +1,478 @@ +<?php + +namespace SMW\SQLStore\EntityStore; + +use Psr\Log\LoggerAwareTrait; +use RuntimeException; +use SMW\DIProperty; +use SMW\DIWikiPage; +use SMW\RequestOptions; +use SMW\SemanticData; +use SMW\SQLStore\PropertyTableDefinition; +use SMW\SQLStore\SQLStore; +use SMW\SQLStore\TableBuilder\FieldType; +use SMWDataItem as DataItem; + +/** + * @license GNU GPL v2+ + * @since 3.0 + * + * @author mwjames + */ +class SemanticDataLookup { + + use LoggerAwareTrait; + + /** + * @var SQLStore + */ + private $store; + + /** + * @since 3.0 + * + * @param SQLStore $store + */ + public function __construct( SQLStore $store ) { + $this->store = $store; + } + + /** + * @since 3.0 + * + * @param PropertyTableDefinition $propertyTableDef + * @param RequestOptions|null $requestOptions + * + * @return RequestOptions|null + */ + public function newRequestOptions( PropertyTableDefinition $propertyTableDef, DIProperty $property, RequestOptions $requestOptions = null ) { + + if ( $requestOptions === null || !isset( $requestOptions->conditionConstraint ) ) { + return null; + } + + $ropts = new RequestOptions(); + + $ropts->setLimit( $requestOptions->getLimit() ); + $ropts->setOffset( $requestOptions->getOffset() ); + + if ( $propertyTableDef->isFixedPropertyTable() ) { + return $ropts; + } + + $pid = $this->store->getObjectIds()->getSMWPropertyID( + $property + ); + + if ( $pid > 0 ) { + $ropts->addExtraCondition( [ 'p_id' => $pid ] ); + } + + return $ropts; + } + + /** + * @since 3.0 + * + * @param DIWikiPage|SemanticData $object + * + * @return StubSemanticData + * @throws RuntimeException + */ + public function newStubSemanticData( $object ) { + + if ( $object instanceof DIWikiPage ) { + return new StubSemanticData( $object, $this->store, false ); + } + + if ( $object instanceof SemanticData ) { + return StubSemanticData::newFromSemanticData( $object, $this->store ); + } + + throw new RuntimeException( 'Expectd either a DIWikiPage or SemanticData object!' ); + } + + /** + * @since 3.0 + * + * @param SemanticData $semanticData + * + * @return array + */ + public function getTableUsageInfo( SemanticData $semanticData ) { + $state = []; + + foreach ( $semanticData->getProperties() as $property ) { + $state[$this->store->findPropertyTableID( $property )] = true; + } + + return $state; + } + + /** + * @since 3.0 + * + * @param integer $id + * @param DataItem $dataItem + * @param PropertyTableDefinition $propTable + * @param RequestOptions $requestOptions + * + * @return SemanticData + */ + public function getSemanticData( $id, DataItem $dataItem = null, PropertyTableDefinition $propTable, RequestOptions $requestOptions = null ) { + + if ( !$dataItem instanceof DIWikiPage ) { + throw new RuntimeException( 'Expected a DIWikiPage instance' ); + } + + $stubSemanticData = $this->newStubSemanticData( $dataItem ); + + $data = $this->fetchSemanticData( + $id, + $dataItem, + $propTable, + $requestOptions + ); + + foreach ( $data as $d ) { + $stubSemanticData->addPropertyStubValue( reset( $d ), end( $d ) ); + } + + return $stubSemanticData; + } + + /** + * Helper function for reading all data for from a given property table + * (specified by an SMWSQLStore3Table dataItem), based on certain + * restrictions. The function can filter data based on the subject (1) + * or on the property it belongs to (2) -- but one of those must be + * done. The Boolean $issubject is true for (1) and false for (2). + * + * In case (1), the first two parameters are taken to refer to a + * subject; in case (2) they are taken to refer to a property. In any + * case, the retrieval is limited to the specified $proptable. The + * parameters are an internal $id (of a subject or property), and an + * $dataItem (being an DIWikiPage or SMWDIProperty). Moreover, when + * filtering by property, it is assumed that the given $proptable + * belongs to the property: if it is a table with fixed property, it + * will not be checked that this is the same property as the one that + * was given in $dataItem. + * + * In case (1), the result in general is an array of pairs (arrays of + * size 2) consisting of a property key (string), and DB keys (array if + * many, string if one) from which a datvalue dataItem for this value can + * be built. It is possible that some of the DB keys are based on + * internal dataItems; these will be represented by similar result arrays + * of (recursive calls of) fetchSemanticData(). + * + * In case (2), the result is simply an array of DB keys (array) + * without the property keys. Container dataItems will be encoded with + * nested arrays like in case (1). + * + * @param integer $id + * @param DataItem $dataItem + * @param PropertyTableDefinition $propTable + * @param RequestOptions $requestOptions + * + * @return array + */ + public function fetchSemanticData( $id, DataItem $dataItem = null, PropertyTableDefinition $propTable, RequestOptions $requestOptions = null ) { + + $isSubject = $dataItem instanceof DIWikiPage || $dataItem === null; + + // stop if there is not enough data: + // properties always need to be given as dataItem, + // subjects at least if !$proptable->idsubject + if ( ( $id == 0 ) || + ( $dataItem === null && ( !$isSubject || !$propTable->usesIdSubject() ) ) || + ( $propTable->getDIType() === null ) ) { + return []; + } + + $result = []; + $connection = $this->store->getConnection( 'mw.db' ); + + // Build something like: + // + // SELECT o_id AS id0,o0.smw_title AS v0,o0.smw_namespace AS v1,o0.smw_iw + // AS v2,o0.smw_sortkey AS v3,o0.smw_subobject AS v4 + // FROM `smw_fpt_sobj` + // INNER JOIN `smw_object_ids` AS o0 ON o_id=o0.smw_id + // WHERE s_id='852' + // LIMIT 4 + // + // or + // + // SELECT p.smw_title as prop,o_blob AS v0,o_hash AS v1 FROM `smw_di_blob` + // INNER JOIN `smw_object_ids` AS p ON p_id=p.smw_id + // WHERE s_id='80' AND p.smw_iw!=':smw' AND p.smw_iw!=':smw-delete' + + $query = $this->newQuery( + $propTable, + $id, + $isSubject, + $dataItem + ); + + if ( $requestOptions !== null ) { + foreach ( $requestOptions->getExtraConditions() as $extraCondition ) { + if ( isset( $extraCondition['p_id'] ) ) { + $query->condition( $query->eq( 'p_id', $extraCondition['p_id'] ) ); + } + } + } else { + $requestOptions = new RequestOptions(); + } + + $valueCount = 0; + $fieldname = ''; + + $diHandler = $this->store->getDataItemHandlerForDIType( + $propTable->getDiType() + ); + + $valueField = $diHandler->getIndexField(); + $labelField = $diHandler->getLabelField(); + + $fields = $diHandler->getFetchFields(); + + $this->addFields( + $query, + $fields, + $valueField, + $labelField, + $valueCount, + $fieldname + ); + + // Don't use DISTINCT for subject related value match but make sure + // (#3531) it is used when requesting other values in order to retrieve + // all available unique values within the range of the limit + if ( !$isSubject ) { + $requestOptions->setOption( 'DISTINCT', true ); + + // Don't sort, this avoids a SQL `filesort`/`temporary table` usage + // in combination with DISTINCT, values will be listed as-is instead + // of a lexical representation but can be compensated by selecting a + // wider range in case this is used as retrieving "all" values + // for a property + + // SELECT DISTINCT o_id AS id0, o0.smw_title AS v0, o0.smw_namespace + // AS v1, o0.smw_iw AS v2, o0.smw_sortkey AS v3, o0.smw_subobject AS + // v4 FROM `smw_di_wikipage` INNER JOIN `smw_object_ids` AS o0 ON + // o_id=o0.smw_id WHERE (p_id='x') LIMIT 51 + // + // 8.6281ms + // + // vs. + // + // SELECT DISTINCT o_id AS id0, o0.smw_title AS v0, o0.smw_namespace + // AS v1, o0.smw_iw AS v2, o0.smw_sortkey AS v3, o0.smw_subobject AS + // v4 FROM `smw_di_wikipage` INNER JOIN `smw_object_ids` AS o0 ON + // o_id=o0.smw_id WHERE (p_id='x') ORDER BY o_id LIMIT 51 + // + // 24189.0128ms + // + // PS: In case of a `TYPE_WIKIPAGE` entity, sorting by `o_id` + // wouldn't make much sense as it does not guarantee any lexical order + $requestOptions->setOption( 'ORDER BY', false ); + } + + // Apply sorting/string matching; only with given property + if ( !$isSubject ) { + $conds = $this->store->getSQLConditions( + $requestOptions, + $valueField, + $labelField, + $query->hasCondition() + ); + + $query->condition( $conds ); + } else { + $valueField = ''; + } + + $query->options( + $this->store->getSQLOptions( $requestOptions, $valueField ) + ); + + $res = $connection->query( + $query, + __METHOD__ + ); + + foreach ( $res as $row ) { + $propertykey = ''; + + // use joined or predefined property name + if ( $isSubject ) { + $propertykey = $propTable->isFixedPropertyTable() ? $propTable->getFixedProperty() : $row->prop; + } + + $this->resultFromRow( + $result, + $row, + $fields, + $fieldname, + $valueCount, + $isSubject, + $propertykey + ); + } + + $connection->freeResult( $res ); + + // Sorting via PHP for an explicit disabled `ORDER BY` to ensure that + // the result set has at least a lexical order applied for the range of + // retrieved values + if ( $requestOptions->getOption( 'ORDER BY' ) === false ) { + sort( $result ); + } + + return $result; + } + + private function newQuery( $propTable, $id, $isSubject, $dataItem ) { + + $connection = $this->store->getConnection( 'mw.db' ); + $query = $connection->newQuery(); + + $query->type( 'select' ); + $query->table( $propTable->getName() ); + + // Restrict property only + if ( !$isSubject && !$propTable->isFixedPropertyTable() ) { + $query->condition( $query->eq( 'p_id', $id ) ); + } + + // Restrict subject, select property + if ( $isSubject && $propTable->usesIdSubject() ) { + $query->condition( $query->eq( 's_id', $id ) ); + } elseif ( $isSubject ) { + $query->condition( $query->eq( 's_title', $dataItem->getDBkey() ) ); + $query->condition( $query->eq( 's_namespace', $dataItem->getNamespace() ) ); + } + + // Select property name + // In case of a fixed property, no select needed + if ( $isSubject && !$propTable->isFixedPropertyTable() ) { + $query->join( + 'INNER JOIN', + [ SQLStore::ID_TABLE => 'p ON p_id=p.smw_id' ] + ); + + $query->field( 'p.smw_title', 'prop' ); + + // Avoid displaying any property that has been marked deleted or outdated + $query->condition( $query->neq( "p.smw_iw", SMW_SQL3_SMWIW_OUTDATED ) ); + $query->condition( $query->neq( "p.smw_iw", SMW_SQL3_SMWDELETEIW ) ); + } + + return $query; + } + + private function addFields( &$query, $fields, $valueField, $labelField, &$valueCount, &$fieldname ) { + + // Select dataItem column(s) + foreach ( $fields as $fieldname => $fieldType ) { + + // Get data from ID table + if ( $fieldType === FieldType::FIELD_ID ) { + $query->join( + 'INNER JOIN', + [ SQLStore::ID_TABLE => "o$valueCount ON $fieldname=o$valueCount.smw_id" ] + ); + + $query->field( "$fieldname AS id$valueCount" ); + $query->field( "o$valueCount.smw_title AS v$valueCount" ); + $query->field( "o$valueCount.smw_namespace AS v" . ( $valueCount + 1 ) ); + $query->field( "o$valueCount.smw_iw AS v" . ( $valueCount + 2 ) ); + $query->field( "o$valueCount.smw_sortkey AS v" . ( $valueCount + 3 ) ); + $query->field( "o$valueCount.smw_subobject AS v" . ( $valueCount + 4 ) ); + + if ( $valueField == $fieldname ) { + $valueField = "o$valueCount.smw_sortkey"; + } + if ( $labelField == $fieldname ) { + $labelField = "o$valueCount.smw_sortkey"; + } + + $valueCount += 4; + } else { + $query->field( $fieldname, "v$valueCount" ); + } + + $valueCount += 1; + } + + // Postgres + // Function: SMWSQLStore3Readers::fetchSemanticData + // Error: 42P10 ERROR: for SELECT DISTINCT, ORDER BY expressions must appear in select list + if ( !$query->hasField( $valueField ) ) { + $query->field( $valueField, "v" . ( $valueCount + 1 ) ); + } + } + + private function resultFromRow( &$result, $row, $fields, $fieldname, $valueCount, $isSubject, $propertykey ) { + + $hash = ''; + + if ( $isSubject ) { // use joined or predefined property name + $hash = $propertykey; + } + + // Use enclosing array only for results with many values: + if ( $valueCount > 1 ) { + $valueKeys = []; + for ( $i = 0; $i < $valueCount; $i += 1 ) { // read the value fields from the current row + $fieldname = "v$i"; + $valueKeys[] = $row->$fieldname; + } + } else { + $valueKeys = $row->v0; + } + + // #Issue 615 + // If the iw field contains a redirect marker then remove it + if ( isset( $valueKeys[2] ) && ( $valueKeys[2] === SMW_SQL3_SMWREDIIW || $valueKeys[2] === SMW_SQL3_SMWDELETEIW ) ) { + $valueKeys[2] = ''; + } + + // The hash prevents from inserting duplicate entries of the same content + if ( $valueCount > 1 ) { + $hash = md5( $hash . implode( '#', $valueKeys ) ); + } else { + $hash = md5( $hash . $valueKeys ); + } + + // Filter out any accidentally retrieved internal things (interwiki starts with ":"): + if ( $valueCount < 3 || + implode( '', $fields ) !== FieldType::FIELD_ID || + $valueKeys[2] === '' || + $valueKeys[2]{0} != ':' ) { + + if ( isset( $result[$hash] ) ) { + $this->reportDuplicate( $propertykey, $valueKeys ); + } + + if ( $isSubject ) { + $result[$hash] = [ $propertykey, $valueKeys ]; + } else{ + $result[$hash] = $valueKeys; + } + } + } + + private function reportDuplicate( $propertykey, $valueKeys ) { + $this->logger->info( + "Found duplicate entry for {propertykey} with {valueKeys}", + [ + 'method' => __METHOD__, + 'role' => 'user', + 'propertykey' => $propertykey, + 'valueKeys' => ( is_array( $valueKeys ) ? implode( ',', $valueKeys ) : $valueKeys ) + ] + ); + } + +} |