summaryrefslogtreecommitdiff
path: root/www/wiki/extensions/SemanticMediaWiki/src/SQLStore/Lookup/PropertyLabelSimilarityLookup.php
diff options
context:
space:
mode:
Diffstat (limited to 'www/wiki/extensions/SemanticMediaWiki/src/SQLStore/Lookup/PropertyLabelSimilarityLookup.php')
-rw-r--r--www/wiki/extensions/SemanticMediaWiki/src/SQLStore/Lookup/PropertyLabelSimilarityLookup.php318
1 files changed, 318 insertions, 0 deletions
diff --git a/www/wiki/extensions/SemanticMediaWiki/src/SQLStore/Lookup/PropertyLabelSimilarityLookup.php b/www/wiki/extensions/SemanticMediaWiki/src/SQLStore/Lookup/PropertyLabelSimilarityLookup.php
new file mode 100644
index 00000000..16bef594
--- /dev/null
+++ b/www/wiki/extensions/SemanticMediaWiki/src/SQLStore/Lookup/PropertyLabelSimilarityLookup.php
@@ -0,0 +1,318 @@
+<?php
+
+namespace SMW\SQLStore\Lookup;
+
+use Exception;
+use SMW\ApplicationFactory;
+use SMW\DataValueFactory;
+use SMW\DIProperty;
+use SMW\PropertySpecificationLookup;
+use SMW\RequestOptions;
+use SMW\SQLStore\SQLStore;
+use SMW\Store;
+
+/**
+ * @license GNU GPL v2+
+ * @since 2.5
+ *
+ * @author mwjames
+ */
+class PropertyLabelSimilarityLookup {
+
+ /**
+ * @var Store
+ */
+ private $store;
+
+ /**
+ * @var PropertySpecificationLookup
+ */
+ private $propertySpecificationLookup;
+
+ /**
+ * @var integer/float
+ */
+ private $threshold = 50;
+
+ /**
+ * @var DIProperty|null
+ */
+ private $exemptionProperty;
+
+ /**
+ * @var integer
+ */
+ private $lookupCount = 0;
+
+ /**
+ * @since 2.5
+ *
+ * @param Store $store
+ * @param PropertySpecificationLookup|null $propertySpecificationLookup
+ */
+ public function __construct( Store $store, PropertySpecificationLookup $propertySpecificationLookup = null ) {
+ $this->store = $store;
+ $this->propertySpecificationLookup = $propertySpecificationLookup;
+
+ if ( $this->propertySpecificationLookup === null ) {
+ $this->propertySpecificationLookup = ApplicationFactory::getInstance()->getPropertySpecificationLookup();
+ }
+ }
+
+ /**
+ * @since 2.5
+ *
+ * @param integer $threshold
+ *
+ * @return boolean
+ */
+ public function setThreshold( $threshold ) {
+ $this->threshold = $threshold;
+ }
+
+ /**
+ * @note A property that when annotated as part of a property specification
+ * will be used as exemption marker during the similarity comparison.
+ *
+ * @since 2.5
+ *
+ * @param string $exemptionProperty
+ */
+ public function setExemptionProperty( $exemptionProperty ) {
+
+ if ( $exemptionProperty === '' ) {
+ return;
+ }
+
+ $this->exemptionProperty = DataValueFactory::getInstance()->newPropertyValueByLabel( $exemptionProperty )->getDataItem();
+ }
+
+ /**
+ * @since 2.5
+ *
+ * @return DIProperty|null
+ */
+ public function getExemptionProperty() {
+ return $this->exemptionProperty;
+ }
+
+ /**
+ * @since 2.5
+ *
+ * @return integer
+ */
+ public function getLookupCount() {
+ return $this->lookupCount;
+ }
+
+ /**
+ * @since 3.0
+ *
+ * @return integer
+ */
+ public function getPropertyMaxCount() {
+ $statistics = $this->store->getStatistics();
+
+ if ( isset( $statistics['TOTALPROPS'] ) ) {
+ return $statistics['TOTALPROPS'];
+ }
+
+ return 0;
+ }
+
+ /**
+ * @since 2.5
+ *
+ * @param RequestOptions|null $requestOptions
+ *
+ * @return array
+ */
+ public function compareAndFindLabels( RequestOptions $requestOptions = null ) {
+
+ $withType = false;
+ $propertyList = $this->getPropertyList( $requestOptions );
+
+ if ( $requestOptions !== null ) {
+ foreach ( $requestOptions->getExtraConditions() as $extraCondition ) {
+ if ( isset( $extraCondition['type'] ) ) {
+ $withType = $extraCondition['type'];
+ }
+ }
+ }
+
+ $this->lookupCount = count( $propertyList );
+ $similarities = $this->matchLabels( $propertyList, $withType );
+
+ usort( $similarities, function ( $a, $b ) {
+ return $a['similarity'] < $b['similarity'];
+ } );
+
+ return $similarities;
+ }
+
+ private function matchLabels( $propertyList, $withType ) {
+
+ $similarities = [];
+ $lookupComplete = [];
+
+ foreach ( $propertyList as $first ) {
+
+ if ( !$first->isUserDefined() ) {
+ continue;
+ }
+
+ foreach ( $propertyList as $second ) {
+
+ // Was already completed when used as first element
+ if ( isset( $lookupComplete[$second->getKey()] ) ) {
+ continue;
+ }
+
+ if ( $first->getKey() === $second->getKey() || !$second->isUserDefined() ) {
+ continue;
+ }
+
+ $hash = $this->getHash( $first, $second );
+
+ if ( $this->isExempted( $first, $second ) || isset( $similarities[$hash] ) ) {
+ continue;
+ }
+
+ $percent = '';
+
+ similar_text( $first->getLabel(), $second->getLabel(), $percent );
+
+ if ( $percent >= $this->threshold ) {
+ $similarities[$hash] = $this->getSummary( $first, $second, $percent, $withType );
+ }
+ }
+
+ $lookupComplete[$first->getKey()] = true;
+ }
+
+ return $similarities;
+ }
+
+ /**
+ * @since 2.5
+ *
+ * @param DIProperty $first
+ * @param DIProperty $second
+ *
+ * @return boolean
+ */
+ private function isExempted( DIProperty $first, DIProperty $second ) {
+
+ if ( $this->exemptionProperty === null ) {
+ return false;
+ }
+
+ $definedBy = $this->propertySpecificationLookup->getSpecification(
+ $first,
+ $this->exemptionProperty
+ );
+
+ foreach ( $definedBy as $dataItem ) {
+ if ( $dataItem->equals( $second->getCanonicalDiWikiPage() ) ) {
+ return true;
+ }
+ }
+
+ $definedBy = $this->propertySpecificationLookup->getSpecification(
+ $second,
+ $this->exemptionProperty
+ );
+
+ foreach ( $definedBy as $dataItem ) {
+ if ( $dataItem->equals( $first->getCanonicalDiWikiPage() ) ) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ private function getHash( DIProperty $first, DIProperty $second ) {
+
+ $hashing = [];
+ $hashing[] = $first->getKey();
+ $hashing[] = $second->getKey();
+
+ sort( $hashing );
+
+ return md5( implode( '', $hashing ) );
+ }
+
+ private function getSummary( DIProperty $first, DIProperty $second, $percent, $withType ) {
+
+ $summary = [];
+
+ if ( $withType ) {
+ $summary[] = [
+ 'label' => $first->getLabel(),
+ 'type' => $first->findPropertyTypeID()
+ ];
+ } else {
+ $summary[] = $first->getLabel();
+ }
+
+ if ( $withType ) {
+ $summary[] = [
+ 'label' => $second->getLabel(),
+ 'type' => $second->findPropertyTypeID()
+ ];
+ } else {
+ $summary[] = $second->getLabel();
+ }
+
+ return [
+ 'property' => $summary,
+ 'similarity' => round( $percent, 2 )
+ ];
+ }
+
+ private function getPropertyList( RequestOptions $requestOptions = null ) {
+
+ $propertyList = [];
+
+ // the query needs to do the filtering of internal properties, else LIMIT is wrong
+ $options = [ 'ORDER BY' => 'smw_sort' ];
+
+ $conditions = [
+ 'smw_namespace' => SMW_NS_PROPERTY,
+ 'smw_iw' => '',
+ 'smw_subobject' => ''
+ ];
+
+ if ( $requestOptions !== null && $requestOptions->getLimit() > 0 ) {
+ $options['LIMIT'] = $requestOptions->getLimit();
+ $options['OFFSET'] = max( $requestOptions->getOffset(), 0 );
+ }
+
+ if ( $requestOptions !== null && $requestOptions->getStringConditions() ) {
+ $conditions[] = $this->store->getSQLConditions( $requestOptions, '', 'smw_sortkey', false );
+ }
+
+ $connection = $this->store->getConnection( 'mw.db' );
+
+ $res = $connection->select(
+ SQLStore::ID_TABLE,
+ [ 'smw_id', 'smw_title' ],
+ $conditions,
+ __METHOD__,
+ $options
+ );
+
+ foreach ( $res as $row ) {
+
+ try {
+ $propertyList[] = new DIProperty( str_replace( ' ', '_', $row->smw_title ) );
+ } catch ( Exception $e ) {
+ // Do nothing ...
+ }
+ }
+
+ return $propertyList;
+ }
+
+}