summaryrefslogtreecommitdiff
path: root/www/wiki/extensions/SemanticMediaWiki/src/SQLStore/EntityStore/DIHandlers/DIBlobHandler.php
blob: f88ed878fc2513adb3d982d07dabca27b393e81b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
<?php

namespace SMW\SQLStore\EntityStore\DIHandlers;

use SMW\SQLStore\EntityStore\DataItemHandler;
use SMW\SQLStore\EntityStore\Exception\DataItemHandlerException;
use SMW\SQLStore\TableBuilder\FieldType;
use SMWDataItem as DataItem;
use SMWDIBlob as DIBlob;

/**
 * This class implements Store access to blob (string) data items.
 *
 * @license GNU GPL v2+
 * @since 1.8
 *
 * @author Nischay Nahata
 */
class DIBlobHandler extends DataItemHandler {

	/**
	 * @since 1.8
	 *
	 * {@inheritDoc}
	 */
	public function getTableFields() {
		return [
			'o_blob' => FieldType::TYPE_BLOB,
			'o_hash' => $this->getCharFieldType()
		];
	}

	/**
	 * @since 1.8
	 *
	 * {@inheritDoc}
	 */
	public function getFetchFields() {
		return [
			'o_blob' => FieldType::TYPE_BLOB,
			'o_hash' => $this->getCharFieldType()
		];
	}

	/**
	 * @since 1.8
	 *
	 * {@inheritDoc}
	 */
	public function getTableIndexes() {
		return [

			's_id,o_hash',

			// pvalue select
			// SELECT p_id,o_hash FROM `smw_di_blob` WHERE p_id = '310174' AND ( o_hash LIKE '%test%' ) LIMIT 11
			'p_id,o_hash',
		];
	}

	/**
	 * @since 3.0
	 *
	 * {@inheritDoc}
	 */
	public function getIndexHint( $key ) {

		// Store::getPropertySubjects has seen to choose the wrong index

		// SELECT smw_id, smw_title, smw_namespace, smw_iw, smw_subobject, smw_sortkey, smw_sort
		// FROM `smw_object_ids`
		// INNER JOIN `smw_di_blob` AS t1 FORCE INDEX(s_id) ON t1.s_id=smw_id
		// WHERE t1.p_id='310174' AND smw_iw!=':smw'
		// AND smw_iw!=':smw-delete' AND smw_iw!=':smw-redi'
		// GROUP BY smw_sort, smw_id LIMIT 26
		//
		// 137.4161ms SMWSQLStore3Readers::getPropertySubjects
		//
		// vs.
		//
		// SELECT smw_id, smw_title, smw_namespace, smw_iw, smw_subobject, smw_sortkey, smw_sort
		// FROM `smw_object_ids`
		// INNER JOIN `smw_di_blob` AS t1 ON t1.s_id=smw_id
		// WHERE t1.p_id='310174' AND smw_iw!=':smw' AND smw_iw!=':smw-delete'
		// AND smw_iw!=':smw-redi'
		// GROUP BY smw_sort, smw_id LIMIT 26
		//
		// 23482.1451ms SMWSQLStore3Readers::getPropertySubjects
		if ( 'property.subjects' && $this->isDbType( 'mysql' ) ) {
			return 's_id';
		}

		return '';
	}

	/**
	 * @since 1.8
	 *
	 * {@inheritDoc}
	 */
	public function getWhereConds( DataItem $dataItem ) {

		$isKeyword = $dataItem->getOption( 'is.keyword' );
		$text = $dataItem->getString();

		return [
			'o_hash' => $isKeyword ? $dataItem->normalize( $text ) : $this->makeHash( $text )
		];
	}

	/**
	 * @since 1.8
	 *
	 * {@inheritDoc}
	 */
	public function getInsertValues( DataItem $dataItem ) {

		$isKeyword = $dataItem->getOption( 'is.keyword' );

		$text = htmlspecialchars_decode( trim( $dataItem->getString() ), ENT_QUOTES );
		$hash = $isKeyword ? $dataItem->normalize( $text ) : $this->makeHash( $text );

		if ( $this->isDbType( 'postgres' ) ) {
			$text = pg_escape_bytea( $text );
		}

		if ( mb_strlen( $text ) <= $this->getMaxLength() && !$isKeyword ) {
			$text = null;
		}

		return [
			'o_blob' => $text,
			'o_hash' => $hash,
		];
	}

	/**
	 * @since 1.8
	 *
	 * {@inheritDoc}
	 */
	public function getIndexField() {
		return 'o_hash';
	}

	/**
	 * @since 1.8
	 *
	 * {@inheritDoc}
	 */
	public function getLabelField() {
		return 'o_hash';
	}

	/**
	 * @since 1.8
	 *
	 * {@inheritDoc}
	 */
	public function dataItemFromDBKeys( $dbkeys ) {

		if ( !is_array( $dbkeys ) || count( $dbkeys ) != 2 ) {
			throw new DataItemHandlerException( 'Failed to create data item from DB keys.' );
		}

		if ( $this->isDbType( 'postgres' ) ) {
			$dbkeys[0] = pg_unescape_bytea( $dbkeys[0] );
		}

		// empty blob: use "hash" string
		if ( $dbkeys[0] == '' ) {
			return new DIBlob( $dbkeys[1] );
		}

		return new DIBlob( $dbkeys[0] );
	}

	/**
	* Method to make a hashed representation for strings of length greater
	* than DIBlobHandler::getMaxLength to be used for selecting and sorting.
	*
	* @since 1.8
	* @param $string string
	*
	* @return string
	*/
	private function makeHash( $string ) {

		$length = $this->getMaxLength();

		if( mb_strlen( $string ) <= $length ) {
			return $string;
		}

		return mb_substr( $string, 0, $length - 32 ) . md5( $string );
	}

	/**
	 * Maximal number of bytes (chars) to be stored in the hash field of
	 * the table. Must not be bigger than 255 (the length of our VARCHAR
	 * field in the DB). Strings that are longer than this will be stored
	 * as a blob, and the hash will only start with the original string
	 * but the last 32 bytes are used for a hash. So the minimal portion
	 * of the string that is stored literally in the hash is 32 chars
	 * less.
	 *
	 * The value of 72 was chosen since it leads to a smaller index size
	 * at the cost of needing more blobs in cases where many strings are
	 * of length 73 to 255. But keeping the index small seems more
	 * important than saving disk space. Also, with 72 bytes there are at
	 * least 40 bytes of content available for sorting and prefix matching,
	 * which should be more than enough in most contexts.
	 *
	 * @since 1.8
	 *
	 * Using `SMW_FIELDT_CHAR_LONG` as option in `smwgFieldTypeFeatures`
	 * will extend the field size to 300 and expands the maximum matchable
	 * string length to 300-32 for LIKE/NLIKE queries.
	 *
	 * @since 3.0
	 */
	private function getMaxLength() {

		$length = 72;

		if ( $this->isEnabledFeature( SMW_FIELDT_CHAR_LONG ) ) {
			$length = FieldType::CHAR_LONG_LENGTH;
		}

		return $length;
	}

	private function getCharFieldType() {

		$fieldType = FieldType::FIELD_TITLE;

		if ( $this->isEnabledFeature( SMW_FIELDT_CHAR_NOCASE ) ) {
			$fieldType = FieldType::TYPE_CHAR_NOCASE;
		}

		if ( $this->isEnabledFeature( SMW_FIELDT_CHAR_LONG ) ) {
			$fieldType = FieldType::TYPE_CHAR_LONG;
		}

		if ( $this->isEnabledFeature( SMW_FIELDT_CHAR_LONG ) && $this->isEnabledFeature( SMW_FIELDT_CHAR_NOCASE ) ) {
			$fieldType = FieldType::TYPE_CHAR_LONG_NOCASE;
		}

		return $fieldType;
	}

}