summaryrefslogtreecommitdiff
path: root/www/wiki/extensions/SemanticMediaWiki/src/SQLStore/QueryEngine/QueryEngine.php
blob: 9f9c7b8c321ab18609b0baea2cd6b064010f00f4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
<?php

namespace SMW\SQLStore\QueryEngine;

use Psr\Log\LoggerAwareInterface;
use Psr\Log\LoggerInterface;
use RuntimeException;
use SMW\DIWikiPage;
use SMW\Exception\PredefinedPropertyLabelMismatchException;
use SMW\Query\DebugFormatter;
use SMW\Query\Language\ThingDescription;
use SMW\QueryEngine as QueryEngineInterface;
use SMW\QueryFactory;
use SMWDataItem as DataItem;
use SMWQuery as Query;
use SMWQueryResult as QueryResult;
use SMWSQLStore3 as SQLStore;

/**
 * Class that implements query answering for SQLStore.
 *
 * @license GNU GPL v2+
 * @since 2.2
 *
 * @author Markus Krötzsch
 * @author Jeroen De Dauw
 * @author mwjames
 */
class QueryEngine implements QueryEngineInterface, LoggerAwareInterface {

	/**
	 * @var SQLStore
	 */
	private $store;

	/**
	 * @var LoggerInterface
	 */
	private $logger;

	/**
	 * Query mode copied from given query. Some submethods act differently when
	 * in Query::MODE_DEBUG.
	 *
	 * @var int
	 */
	private $queryMode;

	/**
	 * Array of generated QuerySegment query descriptions (index => object)
	 *
	 * @var QuerySegment[]
	 */
	private $querySegmentList = [];

	/**
	 * Array of sorting requests ("Property_name" => "ASC"/"DESC"). Used during
	 * query processing (where these property names are searched while compiling
	 * the query conditions).
	 *
	 * @var string[]
	 */
	private $sortKeys;

	/**
	 * Local collection of error strings, passed on to callers if possible.
	 *
	 * @var string[]
	 */
	private $errors = [];

	/**
	 * @var QuerySegmentListBuildManager
	 */
	private $querySegmentListBuildManager;

	/**
	 * @var QuerySegmentListProcessor
	 */
	private $querySegmentListProcessor;

	/**
	 * @var EngineOptions
	 */
	private $engineOptions;

	/**
	 * @var QueryFactory
	 */
	private $queryFactory;

	/**
	 * @since 2.2
	 *
	 * @param SQLStore $store
	 * @param QuerySegmentListBuildManager $querySegmentListBuildManager
	 * @param QuerySegmentListProcessor $querySegmentListProcessor
	 * @param EngineOptions $engineOptions
	 */
	public function __construct( SQLStore $store, QuerySegmentListBuildManager $querySegmentListBuildManager, QuerySegmentListProcessor $querySegmentListProcessor, EngineOptions $engineOptions ) {
		$this->store = $store;
		$this->querySegmentListBuildManager = $querySegmentListBuildManager;
		$this->querySegmentListProcessor = $querySegmentListProcessor;
		$this->engineOptions = $engineOptions;
		$this->queryFactory = new QueryFactory();
	}

	/**
	 * @see LoggerAwareInterface::setLogger
	 *
	 * @since 2.5
	 *
	 * @param LoggerInterface $logger
	 */
	public function setLogger( LoggerInterface $logger ) {
		$this->logger = $logger;
	}

	/**
	 * The new SQL store's implementation of query answering. This function
	 * works in two stages: First, the nested conditions of the given query
	 * object are preprocessed to compute an abstract representation of the
	 * SQL query that is to be executed. Since query conditions correspond to
	 * joins with property tables in most cases, this abstract representation
	 * is essentially graph-like description of how property tables are joined.
	 * Moreover, this graph is tree-shaped, since all query conditions are
	 * tree-shaped. Each part of this abstract query structure is represented
	 * by an QuerySegment object in the array querySegmentList.
	 *
	 * As a second stage of processing, the thus prepared SQL query is actually
	 * executed. Typically, this means that the joins are collapsed into one
	 * SQL query to retrieve results. In some cases, such as in dbug mode, the
	 * execution might be restricted and not actually perform the whole query.
	 *
	 * The two-stage process helps to separate tasks, and it also allows for
	 * better optimisations: it is left to the execution engine how exactly the
	 * query result is to be obtained. For example, one could pre-compute
	 * partial suib-results in temporary tables (or even cache them somewhere),
	 * instead of passing one large join query to the DB (of course, it might
	 * be large only if the configuration of SMW allows it). For some DBMS, a
	 * step-wise execution of the query might lead to better performance, since
	 * it exploits the tree-structure of the joins, which is important for fast
	 * processing -- not all DBMS might be able in seeing this by themselves.
	 *
	 * @param Query $query
	 *
	 * @return mixed depends on $query->querymode
	 */
	public function getQueryResult( Query $query ) {

		if ( ( !$this->engineOptions->get( 'smwgIgnoreQueryErrors' ) || $query->getDescription() instanceof ThingDescription ) &&
		     $query->querymode != Query::MODE_DEBUG &&
		     count( $query->getErrors() ) > 0 ) {
			return $this->queryFactory->newQueryResult( $this->store, $query, [], false );
			// NOTE: we check this here to prevent unnecessary work, but we check
			// it after query processing below again in case more errors occurred.
		} elseif ( $query->querymode == Query::MODE_NONE || $query->getLimit() < 1 ) {
			// don't query, but return something to printer
			return $this->queryFactory->newQueryResult( $this->store, $query, [], true );
		}

		$connection = $this->store->getConnection( 'mw.db.queryengine' );

		$this->queryMode = $query->querymode;
		$this->querySegmentList = [];

		$this->errors = [];
		QuerySegment::$qnum = 0;
		$this->sortKeys = $query->sortkeys;

		$rootid = $this->querySegmentListBuildManager->getQuerySegmentFrom(
			$query
		);

		$this->querySegmentList = $this->querySegmentListBuildManager->getQuerySegmentList();
		$this->sortKeys = $this->querySegmentListBuildManager->getSortKeys();
		$this->errors = $this->querySegmentListBuildManager->getErrors();

		// Possibly stop if new errors happened:
		if ( !$this->engineOptions->get( 'smwgIgnoreQueryErrors' ) &&
				$query->querymode != Query::MODE_DEBUG &&
				count( $this->errors ) > 0 ) {
			$query->addErrors( $this->errors );
			return $this->queryFactory->newQueryResult( $this->store, $query, [], false );
		}

		// *** Now execute the computed query ***//
		$this->querySegmentListProcessor->setQueryMode( $this->queryMode );
		$this->querySegmentListProcessor->setQuerySegmentList( $this->querySegmentList );

		// execute query tree, resolve all dependencies
		$this->querySegmentListProcessor->process(
			$rootid
		);

		$this->applyExtraWhereCondition(
			$connection,
			$rootid
		);

		// #835
		// SELECT DISTINCT and ORDER BY RANDOM causes an issue for postgres
		// Disable RANDOM support for postgres
		if ( $connection->isType( 'postgres' ) ) {
			$this->engineOptions->set(
				'smwgQSortFeatures',
				$this->engineOptions->get( 'smwgQSortFeatures' ) & ~SMW_QSORT_RANDOM
			);
		}

		switch ( $query->querymode ) {
			case Query::MODE_DEBUG:
				$result = $this->getDebugQueryResult( $query, $rootid );
			break;
			case Query::MODE_COUNT:
				$result = $this->getCountQueryResult( $query, $rootid );
			break;
			default:
				$result = $this->getInstanceQueryResult( $query, $rootid );
			break;
		}

		$this->querySegmentListProcessor->cleanUp();
		$query->addErrors( $this->errors );

		return $result;
	}

	/**
	 * Using a preprocessed internal query description referenced by $rootid, compute
	 * the proper debug output for the given query.
	 *
	 * @param Query $query
	 * @param integer $rootid
	 *
	 * @return string
	 */
	private function getDebugQueryResult( Query $query, $rootid ) {

		$qobj = $this->querySegmentList[$rootid];
		$entries = [];

		$sqlOptions = $this->getSQLOptions( $query, $rootid );

		$entries['SQL Query'] = '';
		$entries['SQL Explain'] = '';

		$this->doExecuteDebugQueryResult( $qobj, $sqlOptions, $entries );
		$auxtables = '';

		foreach ( $this->querySegmentListProcessor->getExecutedQueries() as $table => $log ) {
			$auxtables .= "<li>Temporary table $table";
			foreach ( $log as $q ) {
				$auxtables .= "<br />&#160;&#160;<tt>$q</tt>";
			}
			$auxtables .= '</li>';
		}

		if ( $auxtables ) {
			$entries['Auxilliary Tables'] = "<ul>$auxtables</ul>";
		} else {
			$entries['Auxilliary Tables'] = 'No auxilliary tables used.';
		}

		return DebugFormatter::getStringFrom( 'SQLStore', $entries, $query );
	}

	private function doExecuteDebugQueryResult( $qobj, $sqlOptions, &$entries ) {

		if ( !isset( $qobj->joinfield ) || $qobj->joinfield === '' ) {
			return $entries['SQL Query'] = 'Empty result, no SQL query created.';
		}

		$connection = $this->store->getConnection( 'mw.db.queryengine' );
		list( $startOpts, $useIndex, $tailOpts ) = $connection->makeSelectOptions( $sqlOptions );

		$sortfields = implode( $qobj->sortfields, ',' );
		$sortfields = $sortfields ? ', ' . $sortfields : '';

		$format = DebugFormatter::getFormat(
			$connection->getType()
		);

		$sql = "SELECT DISTINCT ".
			"$qobj->alias.smw_id AS id," .
			"$qobj->alias.smw_title AS t," .
			"$qobj->alias.smw_namespace AS ns," .
			"$qobj->alias.smw_iw AS iw," .
			"$qobj->alias.smw_subobject AS so," .
			"$qobj->alias.smw_sortkey AS sortkey" .
			"$sortfields " .
			"FROM " .
			$connection->tableName( $qobj->joinTable ) . " AS $qobj->alias" . $qobj->from .
			( $qobj->where === '' ? '':' WHERE ' ) . $qobj->where . "$tailOpts $startOpts $useIndex ".
			"LIMIT " . $sqlOptions['LIMIT'] . ' ' .
			"OFFSET " . $sqlOptions['OFFSET'];

		$res = $connection->query(
			"EXPLAIN $format $sql",
			__METHOD__
		);

		$entries['SQL Explain'] = DebugFormatter::prettifyExplain( $connection->getType(), $res );
		$entries['SQL Query'] = DebugFormatter::prettifySql( $sql, $qobj->alias );

		$connection->freeResult( $res );
	}

	/**
	 * Using a preprocessed internal query description referenced by $rootid, compute
	 * the proper counting output for the given query.
	 *
	 * @param Query $query
	 * @param integer $rootid
	 *
	 * @return integer
	 */
	private function getCountQueryResult( Query $query, $rootid ) {

		$queryResult = $this->queryFactory->newQueryResult(
			$this->store,
			$query,
			[],
			false
		);

		$queryResult->setCountValue( 0 );

		$qobj = $this->querySegmentList[$rootid];

		if ( $qobj->joinfield === '' ) { // empty result, no query needed
			return $queryResult;
		}

		$connection = $this->store->getConnection( 'mw.db.queryengine' );

		$sql_options = [ 'LIMIT' => $query->getLimit() + 1, 'OFFSET' => $query->getOffset() ];

		$res = $connection->select(
			$connection->tableName( $qobj->joinTable ) . " AS $qobj->alias" . $qobj->from,
			"COUNT(DISTINCT $qobj->alias.smw_id) AS count",
			$qobj->where,
			__METHOD__,
			$sql_options
		);

		$row = $connection->fetchObject( $res );
		$count = 0;

		if ( $row !== false ) {
			$count = $row->count;
		}

		$connection->freeResult( $res );

		$queryResult->setCountValue( $count );

		return $queryResult;
	}

	/**
	 * Using a preprocessed internal query description referenced by $rootid,
	 * compute the proper result instance output for the given query.
	 * @todo The SQL standard requires us to select all fields by which we sort, leading
	 * to wrong results regarding the given limit: the user expects limit to be applied to
	 * the number of distinct pages, but we can use DISTINCT only to whole rows. Thus, if
	 * rows contain sortfields, then pages with multiple values for that field are distinct
	 * and appear multiple times in the result. Filtering duplicates in post processing
	 * would still allow such duplicates to push aside wanted values, leading to less than
	 * "limit" results although there would have been "limit" really distinct results. For
	 * this reason, we select sortfields only for POSTGRES. MySQL is able to perform what
	 * we want here. It would be nice if we could eliminate the bug in POSTGRES as well.
	 *
	 * @param Query $query
	 * @param integer $rootid
	 *
	 * @return QueryResult
	 */
	private function getInstanceQueryResult( Query $query, $rootid ) {

		$connection = $this->store->getConnection( 'mw.db.queryengine' );
		$qobj = $this->querySegmentList[$rootid];

		// Empty result, no query needed
		if ( $qobj->joinfield === '' ) {
			return $this->queryFactory->newQueryResult(
				$this->store,
				$query,
				[],
				false
			);
		}

		$sql_options = $this->getSQLOptions( $query, $rootid );

		// Selecting those is required in standard SQL (but MySQL does not require it).
		$sortfields = implode( $qobj->sortfields, ',' );
		$sortfields = $sortfields ? ',' . $sortfields : '';

		$res = $connection->select(
			$connection->tableName( $qobj->joinTable ) . " AS $qobj->alias" . $qobj->from,
			"DISTINCT ".
			"$qobj->alias.smw_id AS id," .
			"$qobj->alias.smw_title AS t," .
			"$qobj->alias.smw_namespace AS ns," .
			"$qobj->alias.smw_iw AS iw," .
			"$qobj->alias.smw_subobject AS so," .
			"$qobj->alias.smw_sortkey AS sortkey" .
			"$sortfields",
			$qobj->where,
			__METHOD__,
			$sql_options
		);

		$results = [];
		$dataItemCache = [];

		$logToTable = [];
		$hasFurtherResults = false;

		 // Number of fetched results ( != number of valid results in
		 // array $results)
		$count = 0;
		$missedCount = 0;

		$diHandler = $this->store->getDataItemHandlerForDIType(
			DataItem::TYPE_WIKIPAGE
		);

		while ( ( $count < $query->getLimit() ) && ( $row = $connection->fetchObject( $res ) ) ) {
			if ( $row->iw === '' || $row->iw{0} != ':' )  {

				// Catch exception for non-existing predefined properties that
				// still registered within non-updated pages (@see bug 48711)
				try {
					$dataItem = $diHandler->dataItemFromDBKeys( [
						$row->t,
						intval( $row->ns ),
						$row->iw,
						'',
						$row->so
					] );
					
					// Register the ID in an event the post-proceesing
					// fails (namespace no longer valid etc.)
					$dataItem->setId( $row->id );
				} catch ( PredefinedPropertyLabelMismatchException $e ) {
					$logToTable[$row->t] = "issue creating a {$row->t} dataitem from a database row";
					$this->log( __METHOD__ . ' ' . $e->getMessage() );
					$dataItem = '';
				}

				if ( $dataItem instanceof DIWikiPage && !isset( $dataItemCache[$dataItem->getHash()] ) ) {
					$count++;
					$dataItemCache[$dataItem->getHash()] = true;
					$results[] = $dataItem;
					// These IDs are usually needed for displaying the page (esp. if more property values are displayed):
					$this->store->smwIds->setCache( $row->t, $row->ns, $row->iw, $row->so, $row->id, $row->sortkey );
				} else {
					$missedCount++;
					$logToTable[$row->t] = "skip result for {$row->t} existing cache entry / query " . $query->getHash();
				}
			} else {
				$missedCount++;
				$logToTable[$row->t] = "skip result for {$row->t} due to an internal `{$row->iw}` pointer / query " . $query->getHash();
			}
		}

		if ( $connection->fetchObject( $res ) ) {
			$count++;
		}

		if ( $logToTable !== [] ) {
			$this->log( __METHOD__ . ' ' . implode( ',', $logToTable ) );
		}

		if ( $count > $query->getLimit() || ( $count + $missedCount ) > $query->getLimit() ) {
			$hasFurtherResults = true;
		};

		$connection->freeResult( $res );

		$queryResult = $this->queryFactory->newQueryResult(
			$this->store,
			$query,
			$results,
			$hasFurtherResults
		);

		return $queryResult;
	}

	private function applyExtraWhereCondition( $connection, $qid ) {

		if ( !isset( $this->querySegmentList[$qid] ) ) {
			return null;
		}

		$qobj = $this->querySegmentList[$qid];

		// Filter elements that should never appear in a result set
		$extraWhereCondition = [
			'del'  => "$qobj->alias.smw_iw!=" . $connection->addQuotes( SMW_SQL3_SMWIW_OUTDATED ) . " AND $qobj->alias.smw_iw!=" . $connection->addQuotes( SMW_SQL3_SMWDELETEIW ),
			'redi' => "$qobj->alias.smw_iw!=" . $connection->addQuotes( SMW_SQL3_SMWREDIIW )
		];

		if ( strpos( $qobj->where, SMW_SQL3_SMWIW_OUTDATED ) === false ) {
			$qobj->where .= $qobj->where === '' ? $extraWhereCondition['del'] : " AND " . $extraWhereCondition['del'];
		}

		if ( strpos( $qobj->where, SMW_SQL3_SMWREDIIW ) === false ) {
			$qobj->where .= $qobj->where === '' ? $extraWhereCondition['redi'] : " AND " . $extraWhereCondition['redi'];
		}

		$this->querySegmentList[$qid] = $qobj;
	}

	/**
	 * Get a SQL option array for the given query and preprocessed query object at given id.
	 *
	 * @param Query $query
	 * @param integer $rootId
	 *
	 * @return array
	 */
	private function getSQLOptions( Query $query, $rootId ) {

		$result = [
			'LIMIT' => $query->getLimit() + 5,
			'OFFSET' => $query->getOffset()
		];

		if ( !$this->engineOptions->isFlagSet( 'smwgQSortFeatures', SMW_QSORT ) ) {
			return $result;
		}

		// Build ORDER BY options using discovered sorting fields.
		$qobj = $this->querySegmentList[$rootId];

		foreach ( $this->sortKeys as $propkey => $order ) {

			if ( !is_string( $propkey ) ) {
				throw new RuntimeException( "Expected a string value as sortkey" );
			}

			if ( ( $order != 'RANDOM' ) && array_key_exists( $propkey, $qobj->sortfields ) ) { // Field was successfully added.

				$list = $qobj->sortfields[$propkey];

				// Contains a compound list of sortfields without order?
				if ( strpos( $list, ',' ) !== false && strpos( $list, $order ) === false ) {
					$list = str_replace( ',', " $order,", $list );
				}

				$result['ORDER BY'] = ( array_key_exists( 'ORDER BY', $result ) ? $result['ORDER BY'] . ', ' : '' ) . $list . " $order ";
			} elseif ( ( $order == 'RANDOM' ) && $this->engineOptions->isFlagSet( 'smwgQSortFeatures', SMW_QSORT_RANDOM ) ) {
				$result['ORDER BY'] = ( array_key_exists( 'ORDER BY', $result ) ? $result['ORDER BY'] . ', ' : '' ) . ' RAND() ';
			}
		}

		return $result;
	}

	private function log( $message, $context = [] ) {

		if ( $this->logger === null ) {
			return;
		}

		$this->logger->info( $message, $context );
	}

}