summaryrefslogtreecommitdiff
path: root/www/wiki/extensions/SemanticMediaWiki/src/SQLStore/EntityStore/DIHandlers/DIBlobHandler.php
diff options
context:
space:
mode:
Diffstat (limited to 'www/wiki/extensions/SemanticMediaWiki/src/SQLStore/EntityStore/DIHandlers/DIBlobHandler.php')
-rw-r--r--www/wiki/extensions/SemanticMediaWiki/src/SQLStore/EntityStore/DIHandlers/DIBlobHandler.php252
1 files changed, 252 insertions, 0 deletions
diff --git a/www/wiki/extensions/SemanticMediaWiki/src/SQLStore/EntityStore/DIHandlers/DIBlobHandler.php b/www/wiki/extensions/SemanticMediaWiki/src/SQLStore/EntityStore/DIHandlers/DIBlobHandler.php
new file mode 100644
index 00000000..f88ed878
--- /dev/null
+++ b/www/wiki/extensions/SemanticMediaWiki/src/SQLStore/EntityStore/DIHandlers/DIBlobHandler.php
@@ -0,0 +1,252 @@
+<?php
+
+namespace SMW\SQLStore\EntityStore\DIHandlers;
+
+use SMW\SQLStore\EntityStore\DataItemHandler;
+use SMW\SQLStore\EntityStore\Exception\DataItemHandlerException;
+use SMW\SQLStore\TableBuilder\FieldType;
+use SMWDataItem as DataItem;
+use SMWDIBlob as DIBlob;
+
+/**
+ * This class implements Store access to blob (string) data items.
+ *
+ * @license GNU GPL v2+
+ * @since 1.8
+ *
+ * @author Nischay Nahata
+ */
+class DIBlobHandler extends DataItemHandler {
+
+ /**
+ * @since 1.8
+ *
+ * {@inheritDoc}
+ */
+ public function getTableFields() {
+ return [
+ 'o_blob' => FieldType::TYPE_BLOB,
+ 'o_hash' => $this->getCharFieldType()
+ ];
+ }
+
+ /**
+ * @since 1.8
+ *
+ * {@inheritDoc}
+ */
+ public function getFetchFields() {
+ return [
+ 'o_blob' => FieldType::TYPE_BLOB,
+ 'o_hash' => $this->getCharFieldType()
+ ];
+ }
+
+ /**
+ * @since 1.8
+ *
+ * {@inheritDoc}
+ */
+ public function getTableIndexes() {
+ return [
+
+ 's_id,o_hash',
+
+ // pvalue select
+ // SELECT p_id,o_hash FROM `smw_di_blob` WHERE p_id = '310174' AND ( o_hash LIKE '%test%' ) LIMIT 11
+ 'p_id,o_hash',
+ ];
+ }
+
+ /**
+ * @since 3.0
+ *
+ * {@inheritDoc}
+ */
+ public function getIndexHint( $key ) {
+
+ // Store::getPropertySubjects has seen to choose the wrong index
+
+ // SELECT smw_id, smw_title, smw_namespace, smw_iw, smw_subobject, smw_sortkey, smw_sort
+ // FROM `smw_object_ids`
+ // INNER JOIN `smw_di_blob` AS t1 FORCE INDEX(s_id) ON t1.s_id=smw_id
+ // WHERE t1.p_id='310174' AND smw_iw!=':smw'
+ // AND smw_iw!=':smw-delete' AND smw_iw!=':smw-redi'
+ // GROUP BY smw_sort, smw_id LIMIT 26
+ //
+ // 137.4161ms SMWSQLStore3Readers::getPropertySubjects
+ //
+ // vs.
+ //
+ // SELECT smw_id, smw_title, smw_namespace, smw_iw, smw_subobject, smw_sortkey, smw_sort
+ // FROM `smw_object_ids`
+ // INNER JOIN `smw_di_blob` AS t1 ON t1.s_id=smw_id
+ // WHERE t1.p_id='310174' AND smw_iw!=':smw' AND smw_iw!=':smw-delete'
+ // AND smw_iw!=':smw-redi'
+ // GROUP BY smw_sort, smw_id LIMIT 26
+ //
+ // 23482.1451ms SMWSQLStore3Readers::getPropertySubjects
+ if ( 'property.subjects' && $this->isDbType( 'mysql' ) ) {
+ return 's_id';
+ }
+
+ return '';
+ }
+
+ /**
+ * @since 1.8
+ *
+ * {@inheritDoc}
+ */
+ public function getWhereConds( DataItem $dataItem ) {
+
+ $isKeyword = $dataItem->getOption( 'is.keyword' );
+ $text = $dataItem->getString();
+
+ return [
+ 'o_hash' => $isKeyword ? $dataItem->normalize( $text ) : $this->makeHash( $text )
+ ];
+ }
+
+ /**
+ * @since 1.8
+ *
+ * {@inheritDoc}
+ */
+ public function getInsertValues( DataItem $dataItem ) {
+
+ $isKeyword = $dataItem->getOption( 'is.keyword' );
+
+ $text = htmlspecialchars_decode( trim( $dataItem->getString() ), ENT_QUOTES );
+ $hash = $isKeyword ? $dataItem->normalize( $text ) : $this->makeHash( $text );
+
+ if ( $this->isDbType( 'postgres' ) ) {
+ $text = pg_escape_bytea( $text );
+ }
+
+ if ( mb_strlen( $text ) <= $this->getMaxLength() && !$isKeyword ) {
+ $text = null;
+ }
+
+ return [
+ 'o_blob' => $text,
+ 'o_hash' => $hash,
+ ];
+ }
+
+ /**
+ * @since 1.8
+ *
+ * {@inheritDoc}
+ */
+ public function getIndexField() {
+ return 'o_hash';
+ }
+
+ /**
+ * @since 1.8
+ *
+ * {@inheritDoc}
+ */
+ public function getLabelField() {
+ return 'o_hash';
+ }
+
+ /**
+ * @since 1.8
+ *
+ * {@inheritDoc}
+ */
+ public function dataItemFromDBKeys( $dbkeys ) {
+
+ if ( !is_array( $dbkeys ) || count( $dbkeys ) != 2 ) {
+ throw new DataItemHandlerException( 'Failed to create data item from DB keys.' );
+ }
+
+ if ( $this->isDbType( 'postgres' ) ) {
+ $dbkeys[0] = pg_unescape_bytea( $dbkeys[0] );
+ }
+
+ // empty blob: use "hash" string
+ if ( $dbkeys[0] == '' ) {
+ return new DIBlob( $dbkeys[1] );
+ }
+
+ return new DIBlob( $dbkeys[0] );
+ }
+
+ /**
+ * Method to make a hashed representation for strings of length greater
+ * than DIBlobHandler::getMaxLength to be used for selecting and sorting.
+ *
+ * @since 1.8
+ * @param $string string
+ *
+ * @return string
+ */
+ private function makeHash( $string ) {
+
+ $length = $this->getMaxLength();
+
+ if( mb_strlen( $string ) <= $length ) {
+ return $string;
+ }
+
+ return mb_substr( $string, 0, $length - 32 ) . md5( $string );
+ }
+
+ /**
+ * Maximal number of bytes (chars) to be stored in the hash field of
+ * the table. Must not be bigger than 255 (the length of our VARCHAR
+ * field in the DB). Strings that are longer than this will be stored
+ * as a blob, and the hash will only start with the original string
+ * but the last 32 bytes are used for a hash. So the minimal portion
+ * of the string that is stored literally in the hash is 32 chars
+ * less.
+ *
+ * The value of 72 was chosen since it leads to a smaller index size
+ * at the cost of needing more blobs in cases where many strings are
+ * of length 73 to 255. But keeping the index small seems more
+ * important than saving disk space. Also, with 72 bytes there are at
+ * least 40 bytes of content available for sorting and prefix matching,
+ * which should be more than enough in most contexts.
+ *
+ * @since 1.8
+ *
+ * Using `SMW_FIELDT_CHAR_LONG` as option in `smwgFieldTypeFeatures`
+ * will extend the field size to 300 and expands the maximum matchable
+ * string length to 300-32 for LIKE/NLIKE queries.
+ *
+ * @since 3.0
+ */
+ private function getMaxLength() {
+
+ $length = 72;
+
+ if ( $this->isEnabledFeature( SMW_FIELDT_CHAR_LONG ) ) {
+ $length = FieldType::CHAR_LONG_LENGTH;
+ }
+
+ return $length;
+ }
+
+ private function getCharFieldType() {
+
+ $fieldType = FieldType::FIELD_TITLE;
+
+ if ( $this->isEnabledFeature( SMW_FIELDT_CHAR_NOCASE ) ) {
+ $fieldType = FieldType::TYPE_CHAR_NOCASE;
+ }
+
+ if ( $this->isEnabledFeature( SMW_FIELDT_CHAR_LONG ) ) {
+ $fieldType = FieldType::TYPE_CHAR_LONG;
+ }
+
+ if ( $this->isEnabledFeature( SMW_FIELDT_CHAR_LONG ) && $this->isEnabledFeature( SMW_FIELDT_CHAR_NOCASE ) ) {
+ $fieldType = FieldType::TYPE_CHAR_LONG_NOCASE;
+ }
+
+ return $fieldType;
+ }
+
+}