summaryrefslogtreecommitdiff
path: root/www/wiki/includes/search
diff options
context:
space:
mode:
authorYaco <franco@reevo.org>2020-06-04 11:01:00 -0300
committerYaco <franco@reevo.org>2020-06-04 11:01:00 -0300
commitfc7369835258467bf97eb64f184b93691f9a9fd5 (patch)
treedaabd60089d2dd76d9f5fb416b005fbe159c799d /www/wiki/includes/search
first commit
Diffstat (limited to 'www/wiki/includes/search')
-rw-r--r--www/wiki/includes/search/AugmentPageProps.php20
-rw-r--r--www/wiki/includes/search/DummySearchIndexFieldDefinition.php30
-rw-r--r--www/wiki/includes/search/NullIndexField.php52
-rw-r--r--www/wiki/includes/search/ParserOutputSearchDataExtractor.php96
-rw-r--r--www/wiki/includes/search/PerRowAugmentor.php37
-rw-r--r--www/wiki/includes/search/ResultAugmentor.php13
-rw-r--r--www/wiki/includes/search/ResultSetAugmentor.php13
-rw-r--r--www/wiki/includes/search/SearchDatabase.php61
-rw-r--r--www/wiki/includes/search/SearchEngine.php812
-rw-r--r--www/wiki/includes/search/SearchEngineConfig.php117
-rw-r--r--www/wiki/includes/search/SearchEngineFactory.php65
-rw-r--r--www/wiki/includes/search/SearchExactMatchRescorer.php144
-rw-r--r--www/wiki/includes/search/SearchHighlighter.php566
-rw-r--r--www/wiki/includes/search/SearchIndexField.php98
-rw-r--r--www/wiki/includes/search/SearchIndexFieldDefinition.php153
-rw-r--r--www/wiki/includes/search/SearchMssql.php210
-rw-r--r--www/wiki/includes/search/SearchMySQL.php458
-rw-r--r--www/wiki/includes/search/SearchNearMatchResultSet.php30
-rw-r--r--www/wiki/includes/search/SearchNearMatcher.php167
-rw-r--r--www/wiki/includes/search/SearchOracle.php276
-rw-r--r--www/wiki/includes/search/SearchPostgres.php192
-rw-r--r--www/wiki/includes/search/SearchResult.php283
-rw-r--r--www/wiki/includes/search/SearchResultSet.php279
-rw-r--r--www/wiki/includes/search/SearchSqlite.php312
-rw-r--r--www/wiki/includes/search/SearchSuggestion.php185
-rw-r--r--www/wiki/includes/search/SearchSuggestionSet.php212
-rw-r--r--www/wiki/includes/search/SqlSearchResultSet.php69
27 files changed, 4950 insertions, 0 deletions
diff --git a/www/wiki/includes/search/AugmentPageProps.php b/www/wiki/includes/search/AugmentPageProps.php
new file mode 100644
index 00000000..29bd463d
--- /dev/null
+++ b/www/wiki/includes/search/AugmentPageProps.php
@@ -0,0 +1,20 @@
+<?php
+
+/**
+ * Augment search result set with values of certain page props.
+ */
+class AugmentPageProps implements ResultSetAugmentor {
+ /**
+ * @var array List of properties.
+ */
+ private $propnames;
+
+ public function __construct( $propnames ) {
+ $this->propnames = $propnames;
+ }
+
+ public function augmentAll( SearchResultSet $resultSet ) {
+ $titles = $resultSet->extractTitles();
+ return PageProps::getInstance()->getProperties( $titles, $this->propnames );
+ }
+}
diff --git a/www/wiki/includes/search/DummySearchIndexFieldDefinition.php b/www/wiki/includes/search/DummySearchIndexFieldDefinition.php
new file mode 100644
index 00000000..3ee3ed5a
--- /dev/null
+++ b/www/wiki/includes/search/DummySearchIndexFieldDefinition.php
@@ -0,0 +1,30 @@
+<?php
+
+/**
+ * Dummy implementation of SearchIndexFieldDefinition for testing purposes.
+ *
+ * @since 1.28
+ */
+class DummySearchIndexFieldDefinition extends SearchIndexFieldDefinition {
+
+ /**
+ * @param SearchEngine $engine
+ *
+ * @return array
+ */
+ public function getMapping( SearchEngine $engine ) {
+ $mapping = [
+ 'name' => $this->name,
+ 'type' => $this->type,
+ 'flags' => $this->flags,
+ 'subfields' => []
+ ];
+
+ foreach ( $this->subfields as $subfield ) {
+ $mapping['subfields'][] = $subfield->getMapping( $engine );
+ }
+
+ return $mapping;
+ }
+
+}
diff --git a/www/wiki/includes/search/NullIndexField.php b/www/wiki/includes/search/NullIndexField.php
new file mode 100644
index 00000000..ff1e8cbf
--- /dev/null
+++ b/www/wiki/includes/search/NullIndexField.php
@@ -0,0 +1,52 @@
+<?php
+
+/**
+ * Null index field - means search engine does not implement this field.
+ */
+class NullIndexField implements SearchIndexField {
+
+ /**
+ * Get mapping for specific search engine
+ * @param SearchEngine $engine
+ * @return array|null Null means this field does not map to anything
+ */
+ public function getMapping( SearchEngine $engine ) {
+ return null;
+ }
+
+ /**
+ * Set global flag for this field.
+ *
+ * @param int $flag Bit flag to set/unset
+ * @param bool $unset True if flag should be unset, false by default
+ * @return $this
+ */
+ public function setFlag( $flag, $unset = false ) {
+ }
+
+ /**
+ * Check if flag is set.
+ * @param int $flag
+ * @return int 0 if unset, !=0 if set
+ */
+ public function checkFlag( $flag ) {
+ return 0;
+ }
+
+ /**
+ * Merge two field definitions if possible.
+ *
+ * @param SearchIndexField $that
+ * @return SearchIndexField|false New definition or false if not mergeable.
+ */
+ public function merge( SearchIndexField $that ) {
+ return $that;
+ }
+
+ /**
+ * @inheritDoc
+ */
+ public function getEngineHints( SearchEngine $engine ) {
+ return [];
+ }
+}
diff --git a/www/wiki/includes/search/ParserOutputSearchDataExtractor.php b/www/wiki/includes/search/ParserOutputSearchDataExtractor.php
new file mode 100644
index 00000000..4b60a0c5
--- /dev/null
+++ b/www/wiki/includes/search/ParserOutputSearchDataExtractor.php
@@ -0,0 +1,96 @@
+<?php
+
+namespace MediaWiki\Search;
+
+use Category;
+use ParserOutput;
+use Title;
+
+/**
+ * Extracts data from ParserOutput for indexing in the search engine.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @since 1.28
+ */
+class ParserOutputSearchDataExtractor {
+
+ /**
+ * Get a list of categories, as an array with title text strings.
+ *
+ * @param ParserOutput $parserOutput
+ * @return string[]
+ */
+ public function getCategories( ParserOutput $parserOutput ) {
+ $categories = [];
+
+ foreach ( $parserOutput->getCategoryLinks() as $key ) {
+ $categories[] = Category::newFromName( $key )->getTitle()->getText();
+ }
+
+ return $categories;
+ }
+
+ /**
+ * Get a list of external links from ParserOutput, as an array of strings.
+ *
+ * @param ParserOutput $parserOutput
+ * @return string[]
+ */
+ public function getExternalLinks( ParserOutput $parserOutput ) {
+ return array_keys( $parserOutput->getExternalLinks() );
+ }
+
+ /**
+ * Get a list of outgoing wiki links (including interwiki links), as
+ * an array of prefixed title strings.
+ *
+ * @param ParserOutput $parserOutput
+ * @return string[]
+ */
+ public function getOutgoingLinks( ParserOutput $parserOutput ) {
+ $outgoingLinks = [];
+
+ foreach ( $parserOutput->getLinks() as $linkedNamespace => $namespaceLinks ) {
+ foreach ( array_keys( $namespaceLinks ) as $linkedDbKey ) {
+ $outgoingLinks[] =
+ Title::makeTitle( $linkedNamespace, $linkedDbKey )->getPrefixedDBkey();
+ }
+ }
+
+ return $outgoingLinks;
+ }
+
+ /**
+ * Get a list of templates used in the ParserOutput content, as prefixed title strings
+ *
+ * @param ParserOutput $parserOutput
+ * @return string[]
+ */
+ public function getTemplates( ParserOutput $parserOutput ) {
+ $templates = [];
+
+ foreach ( $parserOutput->getTemplates() as $tNS => $templatesInNS ) {
+ foreach ( array_keys( $templatesInNS ) as $tDbKey ) {
+ $templateTitle = Title::makeTitle( $tNS, $tDbKey );
+ $templates[] = $templateTitle->getPrefixedText();
+ }
+ }
+
+ return $templates;
+ }
+
+}
diff --git a/www/wiki/includes/search/PerRowAugmentor.php b/www/wiki/includes/search/PerRowAugmentor.php
new file mode 100644
index 00000000..a3979f7b
--- /dev/null
+++ b/www/wiki/includes/search/PerRowAugmentor.php
@@ -0,0 +1,37 @@
+<?php
+
+/**
+ * Perform augmentation of each row and return composite result,
+ * indexed by ID.
+ */
+class PerRowAugmentor implements ResultSetAugmentor {
+
+ /**
+ * @var ResultAugmentor
+ */
+ private $rowAugmentor;
+
+ /**
+ * @param ResultAugmentor $augmentor Per-result augmentor to use.
+ */
+ public function __construct( ResultAugmentor $augmentor ) {
+ $this->rowAugmentor = $augmentor;
+ }
+
+ /**
+ * Produce data to augment search result set.
+ * @param SearchResultSet $resultSet
+ * @return array Data for all results
+ */
+ public function augmentAll( SearchResultSet $resultSet ) {
+ $data = [];
+ foreach ( $resultSet->extractResults() as $result ) {
+ $id = $result->getTitle()->getArticleID();
+ if ( !$id ) {
+ continue;
+ }
+ $data[$id] = $this->rowAugmentor->augment( $result );
+ }
+ return $data;
+ }
+}
diff --git a/www/wiki/includes/search/ResultAugmentor.php b/www/wiki/includes/search/ResultAugmentor.php
new file mode 100644
index 00000000..df58e71a
--- /dev/null
+++ b/www/wiki/includes/search/ResultAugmentor.php
@@ -0,0 +1,13 @@
+<?php
+
+/**
+ * Augment search results.
+ */
+interface ResultAugmentor {
+ /**
+ * Produce data to augment search result set.
+ * @param SearchResult $result
+ * @return mixed Data for this result
+ */
+ public function augment( SearchResult $result );
+}
diff --git a/www/wiki/includes/search/ResultSetAugmentor.php b/www/wiki/includes/search/ResultSetAugmentor.php
new file mode 100644
index 00000000..e2d79a9c
--- /dev/null
+++ b/www/wiki/includes/search/ResultSetAugmentor.php
@@ -0,0 +1,13 @@
+<?php
+
+/**
+ * Augment search results.
+ */
+interface ResultSetAugmentor {
+ /**
+ * Produce data to augment search result set.
+ * @param SearchResultSet $resultSet
+ * @return array Data for all results
+ */
+ public function augmentAll( SearchResultSet $resultSet );
+}
diff --git a/www/wiki/includes/search/SearchDatabase.php b/www/wiki/includes/search/SearchDatabase.php
new file mode 100644
index 00000000..643c2c13
--- /dev/null
+++ b/www/wiki/includes/search/SearchDatabase.php
@@ -0,0 +1,61 @@
+<?php
+/**
+ * Database search engine
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Search
+ */
+
+use Wikimedia\Rdbms\IDatabase;
+
+/**
+ * Base search engine base class for database-backed searches
+ * @ingroup Search
+ * @since 1.23
+ */
+class SearchDatabase extends SearchEngine {
+ /**
+ * @var IDatabase Slave database for reading from for results
+ */
+ protected $db;
+
+ /**
+ * @param IDatabase $db The database to search from
+ */
+ public function __construct( IDatabase $db = null ) {
+ if ( $db ) {
+ $this->db = $db;
+ } else {
+ $this->db = wfGetDB( DB_REPLICA );
+ }
+ }
+
+ /**
+ * Return a 'cleaned up' search string
+ *
+ * @param string $text
+ * @return string
+ */
+ protected function filter( $text ) {
+ // List of chars allowed in the search query.
+ // This must include chars used in the search syntax.
+ // Usually " (phrase) or * (wildcards) if supported by the engine
+ $lc = $this->legalSearchChars( self::CHARS_ALL );
+ return trim( preg_replace( "/[^{$lc}]/", " ", $text ) );
+ }
+}
diff --git a/www/wiki/includes/search/SearchEngine.php b/www/wiki/includes/search/SearchEngine.php
new file mode 100644
index 00000000..bfcfb598
--- /dev/null
+++ b/www/wiki/includes/search/SearchEngine.php
@@ -0,0 +1,812 @@
+<?php
+/**
+ * Basic search engine
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Search
+ */
+
+/**
+ * @defgroup Search Search
+ */
+
+use MediaWiki\MediaWikiServices;
+
+/**
+ * Contain a class for special pages
+ * @ingroup Search
+ */
+abstract class SearchEngine {
+ /** @var string */
+ public $prefix = '';
+
+ /** @var int[]|null */
+ public $namespaces = [ NS_MAIN ];
+
+ /** @var int */
+ protected $limit = 10;
+
+ /** @var int */
+ protected $offset = 0;
+
+ /** @var array|string */
+ protected $searchTerms = [];
+
+ /** @var bool */
+ protected $showSuggestion = true;
+ private $sort = 'relevance';
+
+ /** @var array Feature values */
+ protected $features = [];
+
+ /** @const string profile type for completionSearch */
+ const COMPLETION_PROFILE_TYPE = 'completionSearchProfile';
+
+ /** @const string profile type for query independent ranking features */
+ const FT_QUERY_INDEP_PROFILE_TYPE = 'fulltextQueryIndepProfile';
+
+ /** @const int flag for legalSearchChars: includes all chars allowed in a search query */
+ const CHARS_ALL = 1;
+
+ /** @const int flag for legalSearchChars: includes all chars allowed in a search term */
+ const CHARS_NO_SYNTAX = 2;
+
+ /**
+ * Perform a full text search query and return a result set.
+ * If full text searches are not supported or disabled, return null.
+ * STUB
+ *
+ * @param string $term Raw search term
+ * @return SearchResultSet|Status|null
+ */
+ function searchText( $term ) {
+ return null;
+ }
+
+ /**
+ * Perform a title search in the article archive.
+ * NOTE: these results still should be filtered by
+ * matching against PageArchive, permissions checks etc
+ * The results returned by this methods are only sugegstions and
+ * may not end up being shown to the user.
+ *
+ * @param string $term Raw search term
+ * @return Status<Title[]>
+ * @since 1.29
+ */
+ function searchArchiveTitle( $term ) {
+ return Status::newGood( [] );
+ }
+
+ /**
+ * Perform a title-only search query and return a result set.
+ * If title searches are not supported or disabled, return null.
+ * STUB
+ *
+ * @param string $term Raw search term
+ * @return SearchResultSet|null
+ */
+ function searchTitle( $term ) {
+ return null;
+ }
+
+ /**
+ * @since 1.18
+ * @param string $feature
+ * @return bool
+ */
+ public function supports( $feature ) {
+ switch ( $feature ) {
+ case 'search-update':
+ return true;
+ case 'title-suffix-filter':
+ default:
+ return false;
+ }
+ }
+
+ /**
+ * Way to pass custom data for engines
+ * @since 1.18
+ * @param string $feature
+ * @param mixed $data
+ */
+ public function setFeatureData( $feature, $data ) {
+ $this->features[$feature] = $data;
+ }
+
+ /**
+ * Way to retrieve custom data set by setFeatureData
+ * or by the engine itself.
+ * @since 1.29
+ * @param string $feature feature name
+ * @return mixed the feature value or null if unset
+ */
+ public function getFeatureData( $feature ) {
+ if ( isset( $this->features[$feature] ) ) {
+ return $this->features[$feature];
+ }
+ return null;
+ }
+
+ /**
+ * When overridden in derived class, performs database-specific conversions
+ * on text to be used for searching or updating search index.
+ * Default implementation does nothing (simply returns $string).
+ *
+ * @param string $string String to process
+ * @return string
+ */
+ public function normalizeText( $string ) {
+ global $wgContLang;
+
+ // Some languages such as Chinese require word segmentation
+ return $wgContLang->segmentByWord( $string );
+ }
+
+ /**
+ * Transform search term in cases when parts of the query came as different
+ * GET params (when supported), e.g. for prefix queries:
+ * search=test&prefix=Main_Page/Archive -> test prefix:Main Page/Archive
+ * @param string $term
+ * @return string
+ */
+ public function transformSearchTerm( $term ) {
+ return $term;
+ }
+
+ /**
+ * Get service class to finding near matches.
+ * @param Config $config Configuration to use for the matcher.
+ * @return SearchNearMatcher
+ */
+ public function getNearMatcher( Config $config ) {
+ global $wgContLang;
+ return new SearchNearMatcher( $config, $wgContLang );
+ }
+
+ /**
+ * Get near matcher for default SearchEngine.
+ * @return SearchNearMatcher
+ */
+ protected static function defaultNearMatcher() {
+ $config = MediaWikiServices::getInstance()->getMainConfig();
+ return MediaWikiServices::getInstance()->newSearchEngine()->getNearMatcher( $config );
+ }
+
+ /**
+ * If an exact title match can be found, or a very slightly close match,
+ * return the title. If no match, returns NULL.
+ * @deprecated since 1.27; Use SearchEngine::getNearMatcher()
+ * @param string $searchterm
+ * @return Title
+ */
+ public static function getNearMatch( $searchterm ) {
+ return static::defaultNearMatcher()->getNearMatch( $searchterm );
+ }
+
+ /**
+ * Do a near match (see SearchEngine::getNearMatch) and wrap it into a
+ * SearchResultSet.
+ * @deprecated since 1.27; Use SearchEngine::getNearMatcher()
+ * @param string $searchterm
+ * @return SearchResultSet
+ */
+ public static function getNearMatchResultSet( $searchterm ) {
+ return static::defaultNearMatcher()->getNearMatchResultSet( $searchterm );
+ }
+
+ /**
+ * Get chars legal for search
+ * NOTE: usage as static is deprecated and preserved only as BC measure
+ * @param int $type type of search chars (see self::CHARS_ALL
+ * and self::CHARS_NO_SYNTAX). Defaults to CHARS_ALL
+ * @return string
+ */
+ public static function legalSearchChars( $type = self::CHARS_ALL ) {
+ return "A-Za-z_'.0-9\\x80-\\xFF\\-";
+ }
+
+ /**
+ * Set the maximum number of results to return
+ * and how many to skip before returning the first.
+ *
+ * @param int $limit
+ * @param int $offset
+ */
+ function setLimitOffset( $limit, $offset = 0 ) {
+ $this->limit = intval( $limit );
+ $this->offset = intval( $offset );
+ }
+
+ /**
+ * Set which namespaces the search should include.
+ * Give an array of namespace index numbers.
+ *
+ * @param int[]|null $namespaces
+ */
+ function setNamespaces( $namespaces ) {
+ if ( $namespaces ) {
+ // Filter namespaces to only keep valid ones
+ $validNs = $this->searchableNamespaces();
+ $namespaces = array_filter( $namespaces, function ( $ns ) use( $validNs ) {
+ return $ns < 0 || isset( $validNs[$ns] );
+ } );
+ } else {
+ $namespaces = [];
+ }
+ $this->namespaces = $namespaces;
+ }
+
+ /**
+ * Set whether the searcher should try to build a suggestion. Note: some searchers
+ * don't support building a suggestion in the first place and others don't respect
+ * this flag.
+ *
+ * @param bool $showSuggestion Should the searcher try to build suggestions
+ */
+ function setShowSuggestion( $showSuggestion ) {
+ $this->showSuggestion = $showSuggestion;
+ }
+
+ /**
+ * Get the valid sort directions. All search engines support 'relevance' but others
+ * might support more. The default in all implementations should be 'relevance.'
+ *
+ * @since 1.25
+ * @return string[] the valid sort directions for setSort
+ */
+ public function getValidSorts() {
+ return [ 'relevance' ];
+ }
+
+ /**
+ * Set the sort direction of the search results. Must be one returned by
+ * SearchEngine::getValidSorts()
+ *
+ * @since 1.25
+ * @throws InvalidArgumentException
+ * @param string $sort sort direction for query result
+ */
+ public function setSort( $sort ) {
+ if ( !in_array( $sort, $this->getValidSorts() ) ) {
+ throw new InvalidArgumentException( "Invalid sort: $sort. " .
+ "Must be one of: " . implode( ', ', $this->getValidSorts() ) );
+ }
+ $this->sort = $sort;
+ }
+
+ /**
+ * Get the sort direction of the search results
+ *
+ * @since 1.25
+ * @return string
+ */
+ public function getSort() {
+ return $this->sort;
+ }
+
+ /**
+ * Parse some common prefixes: all (search everything)
+ * or namespace names and set the list of namespaces
+ * of this class accordingly.
+ *
+ * @param string $query
+ * @return string
+ */
+ function replacePrefixes( $query ) {
+ $queryAndNs = self::parseNamespacePrefixes( $query );
+ if ( $queryAndNs === false ) {
+ return $query;
+ }
+ $this->namespaces = $queryAndNs[1];
+ return $queryAndNs[0];
+ }
+
+ /**
+ * Parse some common prefixes: all (search everything)
+ * or namespace names
+ *
+ * @param string $query
+ * @return false|array false if no namespace was extracted, an array
+ * with the parsed query at index 0 and an array of namespaces at index
+ * 1 (or null for all namespaces).
+ */
+ public static function parseNamespacePrefixes( $query ) {
+ global $wgContLang;
+
+ $parsed = $query;
+ if ( strpos( $query, ':' ) === false ) { // nothing to do
+ return false;
+ }
+ $extractedNamespace = null;
+
+ $allkeyword = wfMessage( 'searchall' )->inContentLanguage()->text() . ":";
+ if ( strncmp( $query, $allkeyword, strlen( $allkeyword ) ) == 0 ) {
+ $extractedNamespace = null;
+ $parsed = substr( $query, strlen( $allkeyword ) );
+ } elseif ( strpos( $query, ':' ) !== false ) {
+ // TODO: should we unify with PrefixSearch::extractNamespace ?
+ $prefix = str_replace( ' ', '_', substr( $query, 0, strpos( $query, ':' ) ) );
+ $index = $wgContLang->getNsIndex( $prefix );
+ if ( $index !== false ) {
+ $extractedNamespace = [ $index ];
+ $parsed = substr( $query, strlen( $prefix ) + 1 );
+ } else {
+ return false;
+ }
+ }
+
+ if ( trim( $parsed ) == '' ) {
+ $parsed = $query; // prefix was the whole query
+ }
+
+ return [ $parsed, $extractedNamespace ];
+ }
+
+ /**
+ * Find snippet highlight settings for all users
+ * @return array Contextlines, contextchars
+ */
+ public static function userHighlightPrefs() {
+ $contextlines = 2; // Hardcode this. Old defaults sucked. :)
+ $contextchars = 75; // same as above.... :P
+ return [ $contextlines, $contextchars ];
+ }
+
+ /**
+ * Create or update the search index record for the given page.
+ * Title and text should be pre-processed.
+ * STUB
+ *
+ * @param int $id
+ * @param string $title
+ * @param string $text
+ */
+ function update( $id, $title, $text ) {
+ // no-op
+ }
+
+ /**
+ * Update a search index record's title only.
+ * Title should be pre-processed.
+ * STUB
+ *
+ * @param int $id
+ * @param string $title
+ */
+ function updateTitle( $id, $title ) {
+ // no-op
+ }
+
+ /**
+ * Delete an indexed page
+ * Title should be pre-processed.
+ * STUB
+ *
+ * @param int $id Page id that was deleted
+ * @param string $title Title of page that was deleted
+ */
+ function delete( $id, $title ) {
+ // no-op
+ }
+
+ /**
+ * Get the raw text for updating the index from a content object
+ * Nicer search backends could possibly do something cooler than
+ * just returning raw text
+ *
+ * @todo This isn't ideal, we'd really like to have content-specific handling here
+ * @param Title $t Title we're indexing
+ * @param Content $c Content of the page to index
+ * @return string
+ */
+ public function getTextFromContent( Title $t, Content $c = null ) {
+ return $c ? $c->getTextForSearchIndex() : '';
+ }
+
+ /**
+ * If an implementation of SearchEngine handles all of its own text processing
+ * in getTextFromContent() and doesn't require SearchUpdate::updateText()'s
+ * rather silly handling, it should return true here instead.
+ *
+ * @return bool
+ */
+ public function textAlreadyUpdatedForIndex() {
+ return false;
+ }
+
+ /**
+ * Makes search simple string if it was namespaced.
+ * Sets namespaces of the search to namespaces extracted from string.
+ * @param string $search
+ * @return string Simplified search string
+ */
+ protected function normalizeNamespaces( $search ) {
+ // Find a Title which is not an interwiki and is in NS_MAIN
+ $title = Title::newFromText( $search );
+ $ns = $this->namespaces;
+ if ( $title && !$title->isExternal() ) {
+ $ns = [ $title->getNamespace() ];
+ if ( $title->getNamespace() !== NS_MAIN ) {
+ $search = substr( $search, strpos( $search, ':' ) + 1 );
+ }
+ if ( $ns[0] == NS_MAIN ) {
+ $ns = $this->namespaces; // no explicit prefix, use default namespaces
+ Hooks::run( 'PrefixSearchExtractNamespace', [ &$ns, &$search ] );
+ }
+ } else {
+ $title = Title::newFromText( $search . 'Dummy' );
+ if ( $title && $title->getText() == 'Dummy'
+ && $title->getNamespace() != NS_MAIN
+ && !$title->isExternal()
+ ) {
+ $ns = [ $title->getNamespace() ];
+ $search = '';
+ } else {
+ Hooks::run( 'PrefixSearchExtractNamespace', [ &$ns, &$search ] );
+ }
+ }
+
+ $ns = array_map( function ( $space ) {
+ return $space == NS_MEDIA ? NS_FILE : $space;
+ }, $ns );
+
+ $this->setNamespaces( $ns );
+ return $search;
+ }
+
+ /**
+ * Perform a completion search.
+ * Does not resolve namespaces and does not check variants.
+ * Search engine implementations may want to override this function.
+ * @param string $search
+ * @return SearchSuggestionSet
+ */
+ protected function completionSearchBackend( $search ) {
+ $results = [];
+
+ $search = trim( $search );
+
+ if ( !in_array( NS_SPECIAL, $this->namespaces ) && // We do not run hook on Special: search
+ !Hooks::run( 'PrefixSearchBackend',
+ [ $this->namespaces, $search, $this->limit, &$results, $this->offset ]
+ ) ) {
+ // False means hook worked.
+ // FIXME: Yes, the API is weird. That's why it is going to be deprecated.
+
+ return SearchSuggestionSet::fromStrings( $results );
+ } else {
+ // Hook did not do the job, use default simple search
+ $results = $this->simplePrefixSearch( $search );
+ return SearchSuggestionSet::fromTitles( $results );
+ }
+ }
+
+ /**
+ * Perform a completion search.
+ * @param string $search
+ * @return SearchSuggestionSet
+ */
+ public function completionSearch( $search ) {
+ if ( trim( $search ) === '' ) {
+ return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
+ }
+ $search = $this->normalizeNamespaces( $search );
+ return $this->processCompletionResults( $search, $this->completionSearchBackend( $search ) );
+ }
+
+ /**
+ * Perform a completion search with variants.
+ * @param string $search
+ * @return SearchSuggestionSet
+ */
+ public function completionSearchWithVariants( $search ) {
+ if ( trim( $search ) === '' ) {
+ return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
+ }
+ $search = $this->normalizeNamespaces( $search );
+
+ $results = $this->completionSearchBackend( $search );
+ $fallbackLimit = $this->limit - $results->getSize();
+ if ( $fallbackLimit > 0 ) {
+ global $wgContLang;
+
+ $fallbackSearches = $wgContLang->autoConvertToAllVariants( $search );
+ $fallbackSearches = array_diff( array_unique( $fallbackSearches ), [ $search ] );
+
+ foreach ( $fallbackSearches as $fbs ) {
+ $this->setLimitOffset( $fallbackLimit );
+ $fallbackSearchResult = $this->completionSearch( $fbs );
+ $results->appendAll( $fallbackSearchResult );
+ $fallbackLimit -= $fallbackSearchResult->getSize();
+ if ( $fallbackLimit <= 0 ) {
+ break;
+ }
+ }
+ }
+ return $this->processCompletionResults( $search, $results );
+ }
+
+ /**
+ * Extract titles from completion results
+ * @param SearchSuggestionSet $completionResults
+ * @return Title[]
+ */
+ public function extractTitles( SearchSuggestionSet $completionResults ) {
+ return $completionResults->map( function ( SearchSuggestion $sugg ) {
+ return $sugg->getSuggestedTitle();
+ } );
+ }
+
+ /**
+ * Process completion search results.
+ * Resolves the titles and rescores.
+ * @param string $search
+ * @param SearchSuggestionSet $suggestions
+ * @return SearchSuggestionSet
+ */
+ protected function processCompletionResults( $search, SearchSuggestionSet $suggestions ) {
+ $search = trim( $search );
+ // preload the titles with LinkBatch
+ $titles = $suggestions->map( function ( SearchSuggestion $sugg ) {
+ return $sugg->getSuggestedTitle();
+ } );
+ $lb = new LinkBatch( $titles );
+ $lb->setCaller( __METHOD__ );
+ $lb->execute();
+
+ $results = $suggestions->map( function ( SearchSuggestion $sugg ) {
+ return $sugg->getSuggestedTitle()->getPrefixedText();
+ } );
+
+ if ( $this->offset === 0 ) {
+ // Rescore results with an exact title match
+ // NOTE: in some cases like cross-namespace redirects
+ // (frequently used as shortcuts e.g. WP:WP on huwiki) some
+ // backends like Cirrus will return no results. We should still
+ // try an exact title match to workaround this limitation
+ $rescorer = new SearchExactMatchRescorer();
+ $rescoredResults = $rescorer->rescore( $search, $this->namespaces, $results, $this->limit );
+ } else {
+ // No need to rescore if offset is not 0
+ // The exact match must have been returned at position 0
+ // if it existed.
+ $rescoredResults = $results;
+ }
+
+ if ( count( $rescoredResults ) > 0 ) {
+ $found = array_search( $rescoredResults[0], $results );
+ if ( $found === false ) {
+ // If the first result is not in the previous array it
+ // means that we found a new exact match
+ $exactMatch = SearchSuggestion::fromTitle( 0, Title::newFromText( $rescoredResults[0] ) );
+ $suggestions->prepend( $exactMatch );
+ $suggestions->shrink( $this->limit );
+ } else {
+ // if the first result is not the same we need to rescore
+ if ( $found > 0 ) {
+ $suggestions->rescore( $found );
+ }
+ }
+ }
+
+ return $suggestions;
+ }
+
+ /**
+ * Simple prefix search for subpages.
+ * @param string $search
+ * @return Title[]
+ */
+ public function defaultPrefixSearch( $search ) {
+ if ( trim( $search ) === '' ) {
+ return [];
+ }
+
+ $search = $this->normalizeNamespaces( $search );
+ return $this->simplePrefixSearch( $search );
+ }
+
+ /**
+ * Call out to simple search backend.
+ * Defaults to TitlePrefixSearch.
+ * @param string $search
+ * @return Title[]
+ */
+ protected function simplePrefixSearch( $search ) {
+ // Use default database prefix search
+ $backend = new TitlePrefixSearch;
+ return $backend->defaultSearchBackend( $this->namespaces, $search, $this->limit, $this->offset );
+ }
+
+ /**
+ * Make a list of searchable namespaces and their canonical names.
+ * @deprecated since 1.27; use SearchEngineConfig::searchableNamespaces()
+ * @return array
+ */
+ public static function searchableNamespaces() {
+ return MediaWikiServices::getInstance()->getSearchEngineConfig()->searchableNamespaces();
+ }
+
+ /**
+ * Extract default namespaces to search from the given user's
+ * settings, returning a list of index numbers.
+ * @deprecated since 1.27; use SearchEngineConfig::userNamespaces()
+ * @param user $user
+ * @return array
+ */
+ public static function userNamespaces( $user ) {
+ return MediaWikiServices::getInstance()->getSearchEngineConfig()->userNamespaces( $user );
+ }
+
+ /**
+ * An array of namespaces indexes to be searched by default
+ * @deprecated since 1.27; use SearchEngineConfig::defaultNamespaces()
+ * @return array
+ */
+ public static function defaultNamespaces() {
+ return MediaWikiServices::getInstance()->getSearchEngineConfig()->defaultNamespaces();
+ }
+
+ /**
+ * Get a list of namespace names useful for showing in tooltips
+ * and preferences
+ * @deprecated since 1.27; use SearchEngineConfig::namespacesAsText()
+ * @param array $namespaces
+ * @return array
+ */
+ public static function namespacesAsText( $namespaces ) {
+ return MediaWikiServices::getInstance()->getSearchEngineConfig()->namespacesAsText( $namespaces );
+ }
+
+ /**
+ * Load up the appropriate search engine class for the currently
+ * active database backend, and return a configured instance.
+ * @deprecated since 1.27; Use SearchEngineFactory::create
+ * @param string $type Type of search backend, if not the default
+ * @return SearchEngine
+ */
+ public static function create( $type = null ) {
+ return MediaWikiServices::getInstance()->getSearchEngineFactory()->create( $type );
+ }
+
+ /**
+ * Return the search engines we support. If only $wgSearchType
+ * is set, it'll be an array of just that one item.
+ * @deprecated since 1.27; use SearchEngineConfig::getSearchTypes()
+ * @return array
+ */
+ public static function getSearchTypes() {
+ return MediaWikiServices::getInstance()->getSearchEngineConfig()->getSearchTypes();
+ }
+
+ /**
+ * Get a list of supported profiles.
+ * Some search engine implementations may expose specific profiles to fine-tune
+ * its behaviors.
+ * The profile can be passed as a feature data with setFeatureData( $profileType, $profileName )
+ * The array returned by this function contains the following keys:
+ * - name: the profile name to use with setFeatureData
+ * - desc-message: the i18n description
+ * - default: set to true if this profile is the default
+ *
+ * @since 1.28
+ * @param string $profileType the type of profiles
+ * @param User|null $user the user requesting the list of profiles
+ * @return array|null the list of profiles or null if none available
+ */
+ public function getProfiles( $profileType, User $user = null ) {
+ return null;
+ }
+
+ /**
+ * Create a search field definition.
+ * Specific search engines should override this method to create search fields.
+ * @param string $name
+ * @param int $type One of the types in SearchIndexField::INDEX_TYPE_*
+ * @return SearchIndexField
+ * @since 1.28
+ */
+ public function makeSearchFieldMapping( $name, $type ) {
+ return new NullIndexField();
+ }
+
+ /**
+ * Get fields for search index
+ * @since 1.28
+ * @return SearchIndexField[] Index field definitions for all content handlers
+ */
+ public function getSearchIndexFields() {
+ $models = ContentHandler::getContentModels();
+ $fields = [];
+ $seenHandlers = new SplObjectStorage();
+ foreach ( $models as $model ) {
+ try {
+ $handler = ContentHandler::getForModelID( $model );
+ }
+ catch ( MWUnknownContentModelException $e ) {
+ // If we can find no handler, ignore it
+ continue;
+ }
+ // Several models can have the same handler, so avoid processing it repeatedly
+ if ( $seenHandlers->contains( $handler ) ) {
+ // We already did this one
+ continue;
+ }
+ $seenHandlers->attach( $handler );
+ $handlerFields = $handler->getFieldsForSearchIndex( $this );
+ foreach ( $handlerFields as $fieldName => $fieldData ) {
+ if ( empty( $fields[$fieldName] ) ) {
+ $fields[$fieldName] = $fieldData;
+ } else {
+ // TODO: do we allow some clashes with the same type or reject all of them?
+ $mergeDef = $fields[$fieldName]->merge( $fieldData );
+ if ( !$mergeDef ) {
+ throw new InvalidArgumentException( "Duplicate field $fieldName for model $model" );
+ }
+ $fields[$fieldName] = $mergeDef;
+ }
+ }
+ }
+ // Hook to allow extensions to produce search mapping fields
+ Hooks::run( 'SearchIndexFields', [ &$fields, $this ] );
+ return $fields;
+ }
+
+ /**
+ * Augment search results with extra data.
+ *
+ * @param SearchResultSet $resultSet
+ */
+ public function augmentSearchResults( SearchResultSet $resultSet ) {
+ $setAugmentors = [];
+ $rowAugmentors = [];
+ Hooks::run( "SearchResultsAugment", [ &$setAugmentors, &$rowAugmentors ] );
+
+ if ( !$setAugmentors && !$rowAugmentors ) {
+ // We're done here
+ return;
+ }
+
+ // Convert row augmentors to set augmentor
+ foreach ( $rowAugmentors as $name => $row ) {
+ if ( isset( $setAugmentors[$name] ) ) {
+ throw new InvalidArgumentException( "Both row and set augmentors are defined for $name" );
+ }
+ $setAugmentors[$name] = new PerRowAugmentor( $row );
+ }
+
+ foreach ( $setAugmentors as $name => $augmentor ) {
+ $data = $augmentor->augmentAll( $resultSet );
+ if ( $data ) {
+ $resultSet->setAugmentedData( $name, $data );
+ }
+ }
+ }
+}
+
+/**
+ * Dummy class to be used when non-supported Database engine is present.
+ * @todo FIXME: Dummy class should probably try something at least mildly useful,
+ * such as a LIKE search through titles.
+ * @ingroup Search
+ */
+class SearchEngineDummy extends SearchEngine {
+ // no-op
+}
diff --git a/www/wiki/includes/search/SearchEngineConfig.php b/www/wiki/includes/search/SearchEngineConfig.php
new file mode 100644
index 00000000..90f85c3d
--- /dev/null
+++ b/www/wiki/includes/search/SearchEngineConfig.php
@@ -0,0 +1,117 @@
+<?php
+
+/**
+ * Configuration handling class for SearchEngine.
+ * Provides added service over plain configuration.
+ *
+ * @since 1.27
+ */
+class SearchEngineConfig {
+
+ /**
+ * Config object from which the settings will be derived.
+ * @var Config
+ */
+ private $config;
+
+ /**
+ * Current language
+ * @var Language
+ */
+ private $language;
+
+ public function __construct( Config $config, Language $lang ) {
+ $this->config = $config;
+ $this->language = $lang;
+ }
+
+ /**
+ * Retrieve original config.
+ * @return Config
+ */
+ public function getConfig() {
+ return $this->config;
+ }
+
+ /**
+ * Make a list of searchable namespaces and their canonical names.
+ * @return array Namespace ID => name
+ */
+ public function searchableNamespaces() {
+ $arr = [];
+ foreach ( $this->language->getNamespaces() as $ns => $name ) {
+ if ( $ns >= NS_MAIN ) {
+ $arr[$ns] = $name;
+ }
+ }
+
+ Hooks::run( 'SearchableNamespaces', [ &$arr ] );
+ return $arr;
+ }
+
+ /**
+ * Extract default namespaces to search from the given user's
+ * settings, returning a list of index numbers.
+ *
+ * @param user $user
+ * @return int[]
+ */
+ public function userNamespaces( $user ) {
+ $arr = [];
+ foreach ( $this->searchableNamespaces() as $ns => $name ) {
+ if ( $user->getOption( 'searchNs' . $ns ) ) {
+ $arr[] = $ns;
+ }
+ }
+
+ return $arr;
+ }
+
+ /**
+ * An array of namespaces indexes to be searched by default
+ *
+ * @return int[] Namespace IDs
+ */
+ public function defaultNamespaces() {
+ return array_keys( $this->config->get( 'NamespacesToBeSearchedDefault' ), true );
+ }
+
+ /**
+ * Return the search engines we support. If only $wgSearchType
+ * is set, it'll be an array of just that one item.
+ *
+ * @return array
+ */
+ public function getSearchTypes() {
+ $alternatives = $this->config->get( 'SearchTypeAlternatives' ) ?: [];
+ array_unshift( $alternatives, $this->config->get( 'SearchType' ) );
+
+ return $alternatives;
+ }
+
+ /**
+ * Return the search engine configured in $wgSearchType, etc.
+ *
+ * @return string|null
+ */
+ public function getSearchType() {
+ return $this->config->get( 'SearchType' );
+ }
+
+ /**
+ * Get a list of namespace names useful for showing in tooltips
+ * and preferences.
+ *
+ * @param int[] $namespaces
+ * @return string[] List of names
+ */
+ public function namespacesAsText( $namespaces ) {
+ $formatted = array_map( [ $this->language, 'getFormattedNsText' ], $namespaces );
+ foreach ( $formatted as $key => $ns ) {
+ if ( empty( $ns ) ) {
+ $formatted[$key] = wfMessage( 'blanknamespace' )->text();
+ }
+ }
+ return $formatted;
+ }
+}
diff --git a/www/wiki/includes/search/SearchEngineFactory.php b/www/wiki/includes/search/SearchEngineFactory.php
new file mode 100644
index 00000000..8cdca571
--- /dev/null
+++ b/www/wiki/includes/search/SearchEngineFactory.php
@@ -0,0 +1,65 @@
+<?php
+
+use Wikimedia\Rdbms\IDatabase;
+
+/**
+ * Factory class for SearchEngine.
+ * Allows to create engine of the specific type.
+ */
+class SearchEngineFactory {
+ /**
+ * Configuration for SearchEngine classes.
+ * @var SearchEngineConfig
+ */
+ private $config;
+
+ public function __construct( SearchEngineConfig $config ) {
+ $this->config = $config;
+ }
+
+ /**
+ * Create SearchEngine of the given type.
+ * @param string $type
+ * @return SearchEngine
+ */
+ public function create( $type = null ) {
+ $dbr = null;
+
+ $configType = $this->config->getSearchType();
+ $alternatives = $this->config->getSearchTypes();
+
+ if ( $type && in_array( $type, $alternatives ) ) {
+ $class = $type;
+ } elseif ( $configType !== null ) {
+ $class = $configType;
+ } else {
+ $dbr = wfGetDB( DB_REPLICA );
+ $class = self::getSearchEngineClass( $dbr );
+ }
+
+ $search = new $class( $dbr );
+ return $search;
+ }
+
+ /**
+ * @param IDatabase $db
+ * @return string SearchEngine subclass name
+ * @since 1.28
+ */
+ public static function getSearchEngineClass( IDatabase $db ) {
+ switch ( $db->getType() ) {
+ case 'sqlite':
+ return SearchSqlite::class;
+ case 'mysql':
+ return SearchMySQL::class;
+ case 'postgres':
+ return SearchPostgres::class;
+ case 'mssql':
+ return SearchMssql::class;
+ case 'oracle':
+ return SearchOracle::class;
+ default:
+ return SearchEngineDummy::class;
+ }
+ }
+}
diff --git a/www/wiki/includes/search/SearchExactMatchRescorer.php b/www/wiki/includes/search/SearchExactMatchRescorer.php
new file mode 100644
index 00000000..354b3909
--- /dev/null
+++ b/www/wiki/includes/search/SearchExactMatchRescorer.php
@@ -0,0 +1,144 @@
+<?php
+/**
+ * Rescores results from a prefix search/opensearch to make sure the
+ * exact match is the first result.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * An utility class to rescore search results by looking for an exact match
+ * in the db and add the page found to the first position.
+ *
+ * NOTE: extracted from TitlePrefixSearch
+ * @ingroup Search
+ */
+class SearchExactMatchRescorer {
+ /**
+ * Default search backend does proper prefix searching, but custom backends
+ * may sort based on other algorithms that may cause the exact title match
+ * to not be in the results or be lower down the list.
+ * @param string $search the query
+ * @param int[] $namespaces
+ * @param string[] $srchres results
+ * @param int $limit the max number of results to return
+ * @return string[] munged results
+ */
+ public function rescore( $search, $namespaces, $srchres, $limit ) {
+ // Pick namespace (based on PrefixSearch::defaultSearchBackend)
+ $ns = in_array( NS_MAIN, $namespaces ) ? NS_MAIN : reset( $namespaces );
+ $t = Title::newFromText( $search, $ns );
+ if ( !$t || !$t->exists() ) {
+ // No exact match so just return the search results
+ return $srchres;
+ }
+ $string = $t->getPrefixedText();
+ $key = array_search( $string, $srchres );
+ if ( $key !== false ) {
+ // Exact match was in the results so just move it to the front
+ return $this->pullFront( $key, $srchres );
+ }
+ // Exact match not in the search results so check for some redirect handling cases
+ if ( $t->isRedirect() ) {
+ $target = $this->getRedirectTarget( $t );
+ $key = array_search( $target, $srchres );
+ if ( $key !== false ) {
+ // Exact match is a redirect to one of the returned matches so pull the
+ // returned match to the front. This might look odd but the alternative
+ // is to put the redirect in front and drop the match. The name of the
+ // found match is often more descriptive/better formed than the name of
+ // the redirect AND by definition they share a prefix. Hopefully this
+ // choice is less confusing and more helpful. But it might not be. But
+ // it is the choice we're going with for now.
+ return $this->pullFront( $key, $srchres );
+ }
+ $redirectTargetsToRedirect = $this->redirectTargetsToRedirect( $srchres );
+ if ( isset( $redirectTargetsToRedirect[$target] ) ) {
+ // The exact match and something in the results list are both redirects
+ // to the same thing! In this case we'll pull the returned match to the
+ // top following the same logic above. Again, it might not be a perfect
+ // choice but it'll do.
+ return $this->pullFront( $redirectTargetsToRedirect[$target], $srchres );
+ }
+ } else {
+ $redirectTargetsToRedirect = $this->redirectTargetsToRedirect( $srchres );
+ if ( isset( $redirectTargetsToRedirect[$string] ) ) {
+ // The exact match is the target of a redirect already in the results list so remove
+ // the redirect from the results list and push the exact match to the front
+ array_splice( $srchres, $redirectTargetsToRedirect[$string], 1 );
+ array_unshift( $srchres, $string );
+ return $srchres;
+ }
+ }
+
+ // Exact match is totally unique from the other results so just add it to the front
+ array_unshift( $srchres, $string );
+ // And roll one off the end if the results are too long
+ if ( count( $srchres ) > $limit ) {
+ array_pop( $srchres );
+ }
+ return $srchres;
+ }
+
+ /**
+ * @param string[] $titles
+ * @return array redirect target prefixedText to index of title in titles
+ * that is a redirect to it.
+ */
+ private function redirectTargetsToRedirect( array $titles ) {
+ $result = [];
+ foreach ( $titles as $key => $titleText ) {
+ $title = Title::newFromText( $titleText );
+ if ( !$title || !$title->isRedirect() ) {
+ continue;
+ }
+ $target = $this->getRedirectTarget( $title );
+ if ( !$target ) {
+ continue;
+ }
+ $result[$target] = $key;
+ }
+ return $result;
+ }
+
+ /**
+ * Returns an array where the element of $array at index $key becomes
+ * the first element.
+ * @param int $key key to pull to the front
+ * @return array $array with the item at $key pulled to the front
+ */
+ private function pullFront( $key, array $array ) {
+ $cut = array_splice( $array, $key, 1 );
+ array_unshift( $array, $cut[0] );
+ return $array;
+ }
+
+ /**
+ * Get a redirect's destination from a title
+ * @param Title $title A title to redirect. It may not redirect or even exist
+ * @return null|string If title exists and redirects, get the destination's prefixed name
+ */
+ private function getRedirectTarget( $title ) {
+ $page = WikiPage::factory( $title );
+ if ( !$page->exists() ) {
+ return null;
+ }
+ $redir = $page->getRedirectTarget();
+ return $redir ? $redir->getPrefixedText() : null;
+ }
+}
diff --git a/www/wiki/includes/search/SearchHighlighter.php b/www/wiki/includes/search/SearchHighlighter.php
new file mode 100644
index 00000000..20462cf1
--- /dev/null
+++ b/www/wiki/includes/search/SearchHighlighter.php
@@ -0,0 +1,566 @@
+<?php
+/**
+ * Basic search engine highlighting
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Search
+ */
+
+/**
+ * Highlight bits of wikitext
+ *
+ * @ingroup Search
+ */
+class SearchHighlighter {
+ protected $mCleanWikitext = true;
+
+ /**
+ * @warning If you pass false to this constructor, then
+ * the caller is responsible for HTML escaping.
+ * @param bool $cleanupWikitext
+ */
+ function __construct( $cleanupWikitext = true ) {
+ $this->mCleanWikitext = $cleanupWikitext;
+ }
+
+ /**
+ * Wikitext highlighting when $wgAdvancedSearchHighlighting = true
+ *
+ * @param string $text
+ * @param array $terms Terms to highlight (not html escaped but
+ * regex escaped via SearchDatabase::regexTerm())
+ * @param int $contextlines
+ * @param int $contextchars
+ * @return string
+ */
+ public function highlightText( $text, $terms, $contextlines, $contextchars ) {
+ global $wgContLang, $wgSearchHighlightBoundaries;
+
+ if ( $text == '' ) {
+ return '';
+ }
+
+ // spli text into text + templates/links/tables
+ $spat = "/(\\{\\{)|(\\[\\[[^\\]:]+:)|(\n\\{\\|)";
+ // first capture group is for detecting nested templates/links/tables/references
+ $endPatterns = [
+ 1 => '/(\{\{)|(\}\})/', // template
+ 2 => '/(\[\[)|(\]\])/', // image
+ 3 => "/(\n\\{\\|)|(\n\\|\\})/" ]; // table
+
+ // @todo FIXME: This should prolly be a hook or something
+ // instead of hardcoding a class name from the Cite extension
+ if ( class_exists( 'Cite' ) ) {
+ $spat .= '|(<ref>)'; // references via cite extension
+ $endPatterns[4] = '/(<ref>)|(<\/ref>)/';
+ }
+ $spat .= '/';
+ $textExt = []; // text extracts
+ $otherExt = []; // other extracts
+ $start = 0;
+ $textLen = strlen( $text );
+ $count = 0; // sequence number to maintain ordering
+ while ( $start < $textLen ) {
+ // find start of template/image/table
+ if ( preg_match( $spat, $text, $matches, PREG_OFFSET_CAPTURE, $start ) ) {
+ $epat = '';
+ foreach ( $matches as $key => $val ) {
+ if ( $key > 0 && $val[1] != -1 ) {
+ if ( $key == 2 ) {
+ // see if this is an image link
+ $ns = substr( $val[0], 2, -1 );
+ if ( $wgContLang->getNsIndex( $ns ) != NS_FILE ) {
+ break;
+ }
+
+ }
+ $epat = $endPatterns[$key];
+ $this->splitAndAdd( $textExt, $count, substr( $text, $start, $val[1] - $start ) );
+ $start = $val[1];
+ break;
+ }
+ }
+ if ( $epat ) {
+ // find end (and detect any nested elements)
+ $level = 0;
+ $offset = $start + 1;
+ $found = false;
+ while ( preg_match( $epat, $text, $endMatches, PREG_OFFSET_CAPTURE, $offset ) ) {
+ if ( array_key_exists( 2, $endMatches ) ) {
+ // found end
+ if ( $level == 0 ) {
+ $len = strlen( $endMatches[2][0] );
+ $off = $endMatches[2][1];
+ $this->splitAndAdd( $otherExt, $count,
+ substr( $text, $start, $off + $len - $start ) );
+ $start = $off + $len;
+ $found = true;
+ break;
+ } else {
+ // end of nested element
+ $level -= 1;
+ }
+ } else {
+ // nested
+ $level += 1;
+ }
+ $offset = $endMatches[0][1] + strlen( $endMatches[0][0] );
+ }
+ if ( !$found ) {
+ // couldn't find appropriate closing tag, skip
+ $this->splitAndAdd( $textExt, $count, substr( $text, $start, strlen( $matches[0][0] ) ) );
+ $start += strlen( $matches[0][0] );
+ }
+ continue;
+ }
+ }
+ // else: add as text extract
+ $this->splitAndAdd( $textExt, $count, substr( $text, $start ) );
+ break;
+ }
+
+ $all = $textExt + $otherExt; // these have disjunct key sets
+
+ // prepare regexps
+ foreach ( $terms as $index => $term ) {
+ // manually do upper/lowercase stuff for utf-8 since PHP won't do it
+ if ( preg_match( '/[\x80-\xff]/', $term ) ) {
+ $terms[$index] = preg_replace_callback(
+ '/./us',
+ [ $this, 'caseCallback' ],
+ $terms[$index]
+ );
+ } else {
+ $terms[$index] = $term;
+ }
+ }
+ $anyterm = implode( '|', $terms );
+ $phrase = implode( "$wgSearchHighlightBoundaries+", $terms );
+ // @todo FIXME: A hack to scale contextchars, a correct solution
+ // would be to have contextchars actually be char and not byte
+ // length, and do proper utf-8 substrings and lengths everywhere,
+ // but PHP is making that very hard and unclean to implement :(
+ $scale = strlen( $anyterm ) / mb_strlen( $anyterm );
+ $contextchars = intval( $contextchars * $scale );
+
+ $patPre = "(^|$wgSearchHighlightBoundaries)";
+ $patPost = "($wgSearchHighlightBoundaries|$)";
+
+ $pat1 = "/(" . $phrase . ")/ui";
+ $pat2 = "/$patPre(" . $anyterm . ")$patPost/ui";
+
+ $left = $contextlines;
+
+ $snippets = [];
+ $offsets = [];
+
+ // show beginning only if it contains all words
+ $first = 0;
+ $firstText = '';
+ foreach ( $textExt as $index => $line ) {
+ if ( strlen( $line ) > 0 && $line[0] != ';' && $line[0] != ':' ) {
+ $firstText = $this->extract( $line, 0, $contextchars * $contextlines );
+ $first = $index;
+ break;
+ }
+ }
+ if ( $firstText ) {
+ $succ = true;
+ // check if first text contains all terms
+ foreach ( $terms as $term ) {
+ if ( !preg_match( "/$patPre" . $term . "$patPost/ui", $firstText ) ) {
+ $succ = false;
+ break;
+ }
+ }
+ if ( $succ ) {
+ $snippets[$first] = $firstText;
+ $offsets[$first] = 0;
+ }
+ }
+ if ( !$snippets ) {
+ // match whole query on text
+ $this->process( $pat1, $textExt, $left, $contextchars, $snippets, $offsets );
+ // match whole query on templates/tables/images
+ $this->process( $pat1, $otherExt, $left, $contextchars, $snippets, $offsets );
+ // match any words on text
+ $this->process( $pat2, $textExt, $left, $contextchars, $snippets, $offsets );
+ // match any words on templates/tables/images
+ $this->process( $pat2, $otherExt, $left, $contextchars, $snippets, $offsets );
+
+ ksort( $snippets );
+ }
+
+ // add extra chars to each snippet to make snippets constant size
+ $extended = [];
+ if ( count( $snippets ) == 0 ) {
+ // couldn't find the target words, just show beginning of article
+ if ( array_key_exists( $first, $all ) ) {
+ $targetchars = $contextchars * $contextlines;
+ $snippets[$first] = '';
+ $offsets[$first] = 0;
+ }
+ } else {
+ // if begin of the article contains the whole phrase, show only that !!
+ if ( array_key_exists( $first, $snippets ) && preg_match( $pat1, $snippets[$first] )
+ && $offsets[$first] < $contextchars * 2 ) {
+ $snippets = [ $first => $snippets[$first] ];
+ }
+
+ // calc by how much to extend existing snippets
+ $targetchars = intval( ( $contextchars * $contextlines ) / count( $snippets ) );
+ }
+
+ foreach ( $snippets as $index => $line ) {
+ $extended[$index] = $line;
+ $len = strlen( $line );
+ if ( $len < $targetchars - 20 ) {
+ // complete this line
+ if ( $len < strlen( $all[$index] ) ) {
+ $extended[$index] = $this->extract(
+ $all[$index],
+ $offsets[$index],
+ $offsets[$index] + $targetchars,
+ $offsets[$index]
+ );
+ $len = strlen( $extended[$index] );
+ }
+
+ // add more lines
+ $add = $index + 1;
+ while ( $len < $targetchars - 20
+ && array_key_exists( $add, $all )
+ && !array_key_exists( $add, $snippets ) ) {
+ $offsets[$add] = 0;
+ $tt = "\n" . $this->extract( $all[$add], 0, $targetchars - $len, $offsets[$add] );
+ $extended[$add] = $tt;
+ $len += strlen( $tt );
+ $add++;
+ }
+ }
+ }
+
+ // $snippets = array_map( 'htmlspecialchars', $extended );
+ $snippets = $extended;
+ $last = -1;
+ $extract = '';
+ foreach ( $snippets as $index => $line ) {
+ if ( $last == -1 ) {
+ $extract .= $line; // first line
+ } elseif ( $last + 1 == $index
+ && $offsets[$last] + strlen( $snippets[$last] ) >= strlen( $all[$last] )
+ ) {
+ $extract .= " " . $line; // continous lines
+ } else {
+ $extract .= '<b> ... </b>' . $line;
+ }
+
+ $last = $index;
+ }
+ if ( $extract ) {
+ $extract .= '<b> ... </b>';
+ }
+
+ $processed = [];
+ foreach ( $terms as $term ) {
+ if ( !isset( $processed[$term] ) ) {
+ $pat3 = "/$patPre(" . $term . ")$patPost/ui"; // highlight word
+ $extract = preg_replace( $pat3,
+ "\\1<span class='searchmatch'>\\2</span>\\3", $extract );
+ $processed[$term] = true;
+ }
+ }
+
+ return $extract;
+ }
+
+ /**
+ * Split text into lines and add it to extracts array
+ *
+ * @param array &$extracts Index -> $line
+ * @param int &$count
+ * @param string $text
+ */
+ function splitAndAdd( &$extracts, &$count, $text ) {
+ $split = explode( "\n", $this->mCleanWikitext ? $this->removeWiki( $text ) : $text );
+ foreach ( $split as $line ) {
+ $tt = trim( $line );
+ if ( $tt ) {
+ $extracts[$count++] = $tt;
+ }
+ }
+ }
+
+ /**
+ * Do manual case conversion for non-ascii chars
+ *
+ * @param array $matches
+ * @return string
+ */
+ function caseCallback( $matches ) {
+ global $wgContLang;
+ if ( strlen( $matches[0] ) > 1 ) {
+ return '[' . $wgContLang->lc( $matches[0] ) . $wgContLang->uc( $matches[0] ) . ']';
+ } else {
+ return $matches[0];
+ }
+ }
+
+ /**
+ * Extract part of the text from start to end, but by
+ * not chopping up words
+ * @param string $text
+ * @param int $start
+ * @param int $end
+ * @param int &$posStart (out) actual start position
+ * @param int &$posEnd (out) actual end position
+ * @return string
+ */
+ function extract( $text, $start, $end, &$posStart = null, &$posEnd = null ) {
+ if ( $start != 0 ) {
+ $start = $this->position( $text, $start, 1 );
+ }
+ if ( $end >= strlen( $text ) ) {
+ $end = strlen( $text );
+ } else {
+ $end = $this->position( $text, $end );
+ }
+
+ if ( !is_null( $posStart ) ) {
+ $posStart = $start;
+ }
+ if ( !is_null( $posEnd ) ) {
+ $posEnd = $end;
+ }
+
+ if ( $end > $start ) {
+ return substr( $text, $start, $end - $start );
+ } else {
+ return '';
+ }
+ }
+
+ /**
+ * Find a nonletter near a point (index) in the text
+ *
+ * @param string $text
+ * @param int $point
+ * @param int $offset Offset to found index
+ * @return int Nearest nonletter index, or beginning of utf8 char if none
+ */
+ function position( $text, $point, $offset = 0 ) {
+ $tolerance = 10;
+ $s = max( 0, $point - $tolerance );
+ $l = min( strlen( $text ), $point + $tolerance ) - $s;
+ $m = [];
+
+ if ( preg_match(
+ '/[ ,.!?~!@#$%^&*\(\)+=\-\\\|\[\]"\'<>]/',
+ substr( $text, $s, $l ),
+ $m,
+ PREG_OFFSET_CAPTURE
+ ) ) {
+ return $m[0][1] + $s + $offset;
+ } else {
+ // check if point is on a valid first UTF8 char
+ $char = ord( $text[$point] );
+ while ( $char >= 0x80 && $char < 0xc0 ) {
+ // skip trailing bytes
+ $point++;
+ if ( $point >= strlen( $text ) ) {
+ return strlen( $text );
+ }
+ $char = ord( $text[$point] );
+ }
+
+ return $point;
+
+ }
+ }
+
+ /**
+ * Search extracts for a pattern, and return snippets
+ *
+ * @param string $pattern Regexp for matching lines
+ * @param array $extracts Extracts to search
+ * @param int &$linesleft Number of extracts to make
+ * @param int &$contextchars Length of snippet
+ * @param array &$out Map for highlighted snippets
+ * @param array &$offsets Map of starting points of snippets
+ * @protected
+ */
+ function process( $pattern, $extracts, &$linesleft, &$contextchars, &$out, &$offsets ) {
+ if ( $linesleft == 0 ) {
+ return; // nothing to do
+ }
+ foreach ( $extracts as $index => $line ) {
+ if ( array_key_exists( $index, $out ) ) {
+ continue; // this line already highlighted
+ }
+
+ $m = [];
+ if ( !preg_match( $pattern, $line, $m, PREG_OFFSET_CAPTURE ) ) {
+ continue;
+ }
+
+ $offset = $m[0][1];
+ $len = strlen( $m[0][0] );
+ if ( $offset + $len < $contextchars ) {
+ $begin = 0;
+ } elseif ( $len > $contextchars ) {
+ $begin = $offset;
+ } else {
+ $begin = $offset + intval( ( $len - $contextchars ) / 2 );
+ }
+
+ $end = $begin + $contextchars;
+
+ $posBegin = $begin;
+ // basic snippet from this line
+ $out[$index] = $this->extract( $line, $begin, $end, $posBegin );
+ $offsets[$index] = $posBegin;
+ $linesleft--;
+ if ( $linesleft == 0 ) {
+ return;
+ }
+ }
+ }
+
+ /**
+ * Basic wikitext removal
+ * @protected
+ * @param string $text
+ * @return mixed
+ */
+ function removeWiki( $text ) {
+ $text = preg_replace( "/\\{\\{([^|]+?)\\}\\}/", "", $text );
+ $text = preg_replace( "/\\{\\{([^|]+\\|)(.*?)\\}\\}/", "\\2", $text );
+ $text = preg_replace( "/\\[\\[([^|]+?)\\]\\]/", "\\1", $text );
+ $text = preg_replace_callback(
+ "/\\[\\[([^|]+\\|)(.*?)\\]\\]/",
+ [ $this, 'linkReplace' ],
+ $text
+ );
+ $text = preg_replace( "/<\/?[^>]+>/", "", $text );
+ $text = preg_replace( "/'''''/", "", $text );
+ $text = preg_replace( "/('''|<\/?[iIuUbB]>)/", "", $text );
+ $text = preg_replace( "/''/", "", $text );
+
+ // Note, the previous /<\/?[^>]+>/ is insufficient
+ // for XSS safety as the HTML tag can span multiple
+ // search results (T144845).
+ $text = Sanitizer::escapeHtmlAllowEntities( $text );
+ return $text;
+ }
+
+ /**
+ * callback to replace [[target|caption]] kind of links, if
+ * the target is category or image, leave it
+ *
+ * @param array $matches
+ * @return string
+ */
+ function linkReplace( $matches ) {
+ $colon = strpos( $matches[1], ':' );
+ if ( $colon === false ) {
+ return $matches[2]; // replace with caption
+ }
+ global $wgContLang;
+ $ns = substr( $matches[1], 0, $colon );
+ $index = $wgContLang->getNsIndex( $ns );
+ if ( $index !== false && ( $index == NS_FILE || $index == NS_CATEGORY ) ) {
+ return $matches[0]; // return the whole thing
+ } else {
+ return $matches[2];
+ }
+ }
+
+ /**
+ * Simple & fast snippet extraction, but gives completely unrelevant
+ * snippets
+ *
+ * Used when $wgAdvancedSearchHighlighting is false.
+ *
+ * @param string $text
+ * @param array $terms Escaped for regex by SearchDatabase::regexTerm()
+ * @param int $contextlines
+ * @param int $contextchars
+ * @return string
+ */
+ public function highlightSimple( $text, $terms, $contextlines, $contextchars ) {
+ global $wgContLang;
+
+ $lines = explode( "\n", $text );
+
+ $terms = implode( '|', $terms );
+ $max = intval( $contextchars ) + 1;
+ $pat1 = "/(.*)($terms)(.{0,$max})/i";
+
+ $lineno = 0;
+
+ $extract = "";
+ foreach ( $lines as $line ) {
+ if ( 0 == $contextlines ) {
+ break;
+ }
+ ++$lineno;
+ $m = [];
+ if ( !preg_match( $pat1, $line, $m ) ) {
+ continue;
+ }
+ --$contextlines;
+ // truncate function changes ... to relevant i18n message.
+ $pre = $wgContLang->truncate( $m[1], - $contextchars, '...', false );
+
+ if ( count( $m ) < 3 ) {
+ $post = '';
+ } else {
+ $post = $wgContLang->truncate( $m[3], $contextchars, '...', false );
+ }
+
+ $found = $m[2];
+
+ $line = htmlspecialchars( $pre . $found . $post );
+ $pat2 = '/(' . $terms . ")/i";
+ $line = preg_replace( $pat2, "<span class='searchmatch'>\\1</span>", $line );
+
+ $extract .= "${line}\n";
+ }
+
+ return $extract;
+ }
+
+ /**
+ * Returns the first few lines of the text
+ *
+ * @param string $text
+ * @param int $contextlines Max number of returned lines
+ * @param int $contextchars Average number of characters per line
+ * @return string
+ */
+ public function highlightNone( $text, $contextlines, $contextchars ) {
+ $match = [];
+ $text = ltrim( $text ) . "\n"; // make sure the preg_match may find the last line
+ $text = str_replace( "\n\n", "\n", $text ); // remove empty lines
+ preg_match( "/^(.*\n){0,$contextlines}/", $text, $match );
+
+ // Trim and limit to max number of chars
+ $text = htmlspecialchars( substr( trim( $match[0] ), 0, $contextlines * $contextchars ) );
+ return str_replace( "\n", '<br>', $text );
+ }
+}
diff --git a/www/wiki/includes/search/SearchIndexField.php b/www/wiki/includes/search/SearchIndexField.php
new file mode 100644
index 00000000..6f3b2078
--- /dev/null
+++ b/www/wiki/includes/search/SearchIndexField.php
@@ -0,0 +1,98 @@
+<?php
+/**
+ * Definition of a mapping for the search index field.
+ * @since 1.28
+ */
+interface SearchIndexField {
+ /**
+ * Field types
+ */
+ const INDEX_TYPE_TEXT = 0;
+ const INDEX_TYPE_KEYWORD = 1;
+ const INDEX_TYPE_INTEGER = 2;
+ const INDEX_TYPE_NUMBER = 3;
+ const INDEX_TYPE_DATETIME = 4;
+ const INDEX_TYPE_NESTED = 5;
+ const INDEX_TYPE_BOOL = 6;
+
+ /**
+ * SHORT_TEXT is meant to be used with short text made of mostly ascii
+ * technical information. Generally a language agnostic analysis chain
+ * is used and aggressive splitting to increase recall.
+ * E.g suited for mime/type
+ */
+ const INDEX_TYPE_SHORT_TEXT = 7;
+
+ /**
+ * Generic field flags.
+ */
+ /**
+ * This field is case-insensitive.
+ */
+ const FLAG_CASEFOLD = 1;
+
+ /**
+ * This field contains secondary information, which is
+ * already present in other fields, but can be used for
+ * scoring.
+ */
+ const FLAG_SCORING = 2;
+
+ /**
+ * This field does not need highlight handling.
+ */
+ const FLAG_NO_HIGHLIGHT = 4;
+
+ /**
+ * Do not index this field, just store it.
+ */
+ const FLAG_NO_INDEX = 8;
+
+ /**
+ * Get mapping for specific search engine
+ * @param SearchEngine $engine
+ * @return array|null Null means this field does not map to anything
+ */
+ public function getMapping( SearchEngine $engine );
+
+ /**
+ * Set global flag for this field.
+ *
+ * @param int $flag Bit flag to set/unset
+ * @param bool $unset True if flag should be unset, false by default
+ * @return $this
+ */
+ public function setFlag( $flag, $unset = false );
+
+ /**
+ * Check if flag is set.
+ * @param int $flag
+ * @return int 0 if unset, !=0 if set
+ */
+ public function checkFlag( $flag );
+
+ /**
+ * Merge two field definitions if possible.
+ *
+ * @param SearchIndexField $that
+ * @return SearchIndexField|false New definition or false if not mergeable.
+ */
+ public function merge( SearchIndexField $that );
+
+ /**
+ * A list of search engine hints for this field.
+ * Hints are usually specific to a search engine implementation
+ * and allow to fine control how the search engine will handle this
+ * particular field.
+ *
+ * For example some search engine permits some optimizations
+ * at index time by ignoring an update if the updated value
+ * does not change by more than X% on a numeric value.
+ *
+ * @param SearchEngine $engine
+ * @return array an array of hints generally indexed by hint name. The type of
+ * values is search engine specific
+ * @since 1.30
+ */
+ public function getEngineHints( SearchEngine $engine );
+}
diff --git a/www/wiki/includes/search/SearchIndexFieldDefinition.php b/www/wiki/includes/search/SearchIndexFieldDefinition.php
new file mode 100644
index 00000000..a11dff9f
--- /dev/null
+++ b/www/wiki/includes/search/SearchIndexFieldDefinition.php
@@ -0,0 +1,153 @@
+<?php
+
+/**
+ * Basic infrastructure of the field definition.
+ *
+ * Specific engines should extend this class and at at least,
+ * override the getMapping method, but can reuse other parts.
+ *
+ * @since 1.28
+ */
+abstract class SearchIndexFieldDefinition implements SearchIndexField {
+
+ /**
+ * Name of the field
+ *
+ * @var string
+ */
+ protected $name;
+
+ /**
+ * Type of the field, one of the constants above
+ *
+ * @var int
+ */
+ protected $type;
+
+ /**
+ * Bit flags for the field.
+ *
+ * @var int
+ */
+ protected $flags = 0;
+
+ /**
+ * Subfields
+ * @var SearchIndexFieldDefinition[]
+ */
+ protected $subfields = [];
+
+ /**
+ * @var callable
+ */
+ private $mergeCallback;
+
+ /**
+ * @param string $name Field name
+ * @param int $type Index type
+ */
+ public function __construct( $name, $type ) {
+ $this->name = $name;
+ $this->type = $type;
+ }
+
+ /**
+ * Get field name
+ * @return string
+ */
+ public function getName() {
+ return $this->name;
+ }
+
+ /**
+ * Get index type
+ * @return int
+ */
+ public function getIndexType() {
+ return $this->type;
+ }
+
+ /**
+ * Set global flag for this field.
+ *
+ * @param int $flag Bit flag to set/unset
+ * @param bool $unset True if flag should be unset, false by default
+ * @return $this
+ */
+ public function setFlag( $flag, $unset = false ) {
+ if ( $unset ) {
+ $this->flags &= ~$flag;
+ } else {
+ $this->flags |= $flag;
+ }
+ return $this;
+ }
+
+ /**
+ * Check if flag is set.
+ * @param int $flag
+ * @return int 0 if unset, !=0 if set
+ */
+ public function checkFlag( $flag ) {
+ return $this->flags & $flag;
+ }
+
+ /**
+ * Merge two field definitions if possible.
+ *
+ * @param SearchIndexField $that
+ * @return SearchIndexField|false New definition or false if not mergeable.
+ */
+ public function merge( SearchIndexField $that ) {
+ if ( !empty( $this->mergeCallback ) ) {
+ return call_user_func( $this->mergeCallback, $this, $that );
+ }
+ // TODO: which definitions may be compatible?
+ if ( ( $that instanceof self ) && $this->type === $that->type &&
+ $this->flags === $that->flags && $this->type !== self::INDEX_TYPE_NESTED
+ ) {
+ return $that;
+ }
+ return false;
+ }
+
+ /**
+ * Get subfields
+ * @return SearchIndexFieldDefinition[]
+ */
+ public function getSubfields() {
+ return $this->subfields;
+ }
+
+ /**
+ * Set subfields
+ * @param SearchIndexFieldDefinition[] $subfields
+ * @return $this
+ */
+ public function setSubfields( array $subfields ) {
+ $this->subfields = $subfields;
+ return $this;
+ }
+
+ /**
+ * @param SearchEngine $engine
+ *
+ * @return array
+ */
+ abstract public function getMapping( SearchEngine $engine );
+
+ /**
+ * Set field-specific merge strategy.
+ * @param callable $callback
+ */
+ public function setMergeCallback( $callback ) {
+ $this->mergeCallback = $callback;
+ }
+
+ /**
+ * @inheritDoc
+ */
+ public function getEngineHints( SearchEngine $engine ) {
+ return [];
+ }
+}
diff --git a/www/wiki/includes/search/SearchMssql.php b/www/wiki/includes/search/SearchMssql.php
new file mode 100644
index 00000000..57ca06e3
--- /dev/null
+++ b/www/wiki/includes/search/SearchMssql.php
@@ -0,0 +1,210 @@
+<?php
+/**
+ * Mssql search engine
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Search
+ */
+
+/**
+ * Search engine hook base class for Mssql (ConText).
+ * @ingroup Search
+ */
+class SearchMssql extends SearchDatabase {
+ /**
+ * Perform a full text search query and return a result set.
+ *
+ * @param string $term Raw search term
+ * @return SqlSearchResultSet
+ * @access public
+ */
+ function searchText( $term ) {
+ $resultSet = $this->db->query( $this->getQuery( $this->filter( $term ), true ) );
+ return new SqlSearchResultSet( $resultSet, $this->searchTerms );
+ }
+
+ /**
+ * Perform a title-only search query and return a result set.
+ *
+ * @param string $term Raw search term
+ * @return SqlSearchResultSet
+ * @access public
+ */
+ function searchTitle( $term ) {
+ $resultSet = $this->db->query( $this->getQuery( $this->filter( $term ), false ) );
+ return new SqlSearchResultSet( $resultSet, $this->searchTerms );
+ }
+
+ /**
+ * Return a partial WHERE clause to limit the search to the given namespaces
+ *
+ * @return string
+ * @private
+ */
+ function queryNamespaces() {
+ $namespaces = implode( ',', $this->namespaces );
+ if ( $namespaces == '' ) {
+ $namespaces = '0';
+ }
+ return 'AND page_namespace IN (' . $namespaces . ')';
+ }
+
+ /**
+ * Return a LIMIT clause to limit results on the query.
+ *
+ * @param string $sql
+ *
+ * @return string
+ */
+ function queryLimit( $sql ) {
+ return $this->db->limitResult( $sql, $this->limit, $this->offset );
+ }
+
+ /**
+ * Does not do anything for generic search engine
+ * subclasses may define this though
+ *
+ * @param string $filteredTerm
+ * @param bool $fulltext
+ * @return string
+ */
+ function queryRanking( $filteredTerm, $fulltext ) {
+ return ' ORDER BY ftindex.[RANK] DESC'; // return ' ORDER BY score(1)';
+ }
+
+ /**
+ * Construct the full SQL query to do the search.
+ * The guts shoulds be constructed in queryMain()
+ *
+ * @param string $filteredTerm
+ * @param bool $fulltext
+ * @return string
+ */
+ function getQuery( $filteredTerm, $fulltext ) {
+ return $this->queryLimit( $this->queryMain( $filteredTerm, $fulltext ) . ' ' .
+ $this->queryNamespaces() . ' ' .
+ $this->queryRanking( $filteredTerm, $fulltext ) . ' ' );
+ }
+
+ /**
+ * Picks which field to index on, depending on what type of query.
+ *
+ * @param bool $fulltext
+ * @return string
+ */
+ function getIndexField( $fulltext ) {
+ return $fulltext ? 'si_text' : 'si_title';
+ }
+
+ /**
+ * Get the base part of the search query.
+ *
+ * @param string $filteredTerm
+ * @param bool $fulltext
+ * @return string
+ * @private
+ */
+ function queryMain( $filteredTerm, $fulltext ) {
+ $match = $this->parseQuery( $filteredTerm, $fulltext );
+ $page = $this->db->tableName( 'page' );
+ $searchindex = $this->db->tableName( 'searchindex' );
+
+ return 'SELECT page_id, page_namespace, page_title, ftindex.[RANK]' .
+ "FROM $page,FREETEXTTABLE($searchindex , $match, LANGUAGE 'English') as ftindex " .
+ 'WHERE page_id=ftindex.[KEY] ';
+ }
+
+ /** @todo document
+ * @param string $filteredText
+ * @param bool $fulltext
+ * @return string
+ */
+ function parseQuery( $filteredText, $fulltext ) {
+ global $wgContLang;
+ $lc = $this->legalSearchChars( self::CHARS_NO_SYNTAX );
+ $this->searchTerms = [];
+
+ # @todo FIXME: This doesn't handle parenthetical expressions.
+ $m = [];
+ $q = [];
+
+ if ( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
+ $filteredText, $m, PREG_SET_ORDER ) ) {
+ foreach ( $m as $terms ) {
+ $q[] = $terms[1] . $wgContLang->normalizeForSearch( $terms[2] );
+
+ if ( !empty( $terms[3] ) ) {
+ $regexp = preg_quote( $terms[3], '/' );
+ if ( $terms[4] ) {
+ $regexp .= "[0-9A-Za-z_]+";
+ }
+ } else {
+ $regexp = preg_quote( str_replace( '"', '', $terms[2] ), '/' );
+ }
+ $this->searchTerms[] = $regexp;
+ }
+ }
+
+ $searchon = $this->db->addQuotes( implode( ',', $q ) );
+ $field = $this->getIndexField( $fulltext );
+ return "$field, $searchon";
+ }
+
+ /**
+ * Create or update the search index record for the given page.
+ * Title and text should be pre-processed.
+ *
+ * @param int $id
+ * @param string $title
+ * @param string $text
+ * @return bool|ResultWrapper
+ */
+ function update( $id, $title, $text ) {
+ // We store the column data as UTF-8 byte order marked binary stream
+ // because we are invoking the plain text IFilter on it so that, and we want it
+ // to properly decode the stream as UTF-8. SQL doesn't support UTF8 as a data type
+ // but the indexer will correctly handle it by this method. Since all we are doing
+ // is passing this data to the indexer and never retrieving it via PHP, this will save space
+ $table = $this->db->tableName( 'searchindex' );
+ $utf8bom = '0xEFBBBF';
+ $si_title = $utf8bom . bin2hex( $title );
+ $si_text = $utf8bom . bin2hex( $text );
+ $sql = "DELETE FROM $table WHERE si_page = $id;";
+ $sql .= "INSERT INTO $table (si_page, si_title, si_text) VALUES ($id, $si_title, $si_text)";
+ return $this->db->query( $sql, 'SearchMssql::update' );
+ }
+
+ /**
+ * Update a search index record's title only.
+ * Title should be pre-processed.
+ *
+ * @param int $id
+ * @param string $title
+ * @return bool|ResultWrapper
+ */
+ function updateTitle( $id, $title ) {
+ $table = $this->db->tableName( 'searchindex' );
+
+ // see update for why we are using the utf8bom
+ $utf8bom = '0xEFBBBF';
+ $si_title = $utf8bom . bin2hex( $title );
+ $sql = "DELETE FROM $table WHERE si_page = $id;";
+ $sql .= "INSERT INTO $table (si_page, si_title, si_text) VALUES ($id, $si_title, 0x00)";
+ return $this->db->query( $sql, 'SearchMssql::updateTitle' );
+ }
+}
diff --git a/www/wiki/includes/search/SearchMySQL.php b/www/wiki/includes/search/SearchMySQL.php
new file mode 100644
index 00000000..8e705c1f
--- /dev/null
+++ b/www/wiki/includes/search/SearchMySQL.php
@@ -0,0 +1,458 @@
+<?php
+/**
+ * MySQL search engine
+ *
+ * Copyright (C) 2004 Brion Vibber <brion@pobox.com>
+ * https://www.mediawiki.org/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Search
+ */
+
+/**
+ * Search engine hook for MySQL 4+
+ * @ingroup Search
+ */
+class SearchMySQL extends SearchDatabase {
+ protected $strictMatching = true;
+
+ private static $mMinSearchLength;
+
+ /**
+ * Parse the user's query and transform it into an SQL fragment which will
+ * become part of a WHERE clause
+ *
+ * @param string $filteredText
+ * @param string $fulltext
+ *
+ * @return string
+ */
+ function parseQuery( $filteredText, $fulltext ) {
+ global $wgContLang;
+
+ $lc = $this->legalSearchChars( self::CHARS_NO_SYNTAX ); // Minus syntax chars (" and *)
+ $searchon = '';
+ $this->searchTerms = [];
+
+ # @todo FIXME: This doesn't handle parenthetical expressions.
+ $m = [];
+ if ( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
+ $filteredText, $m, PREG_SET_ORDER ) ) {
+ foreach ( $m as $bits ) {
+ Wikimedia\suppressWarnings();
+ list( /* all */, $modifier, $term, $nonQuoted, $wildcard ) = $bits;
+ Wikimedia\restoreWarnings();
+
+ if ( $nonQuoted != '' ) {
+ $term = $nonQuoted;
+ $quote = '';
+ } else {
+ $term = str_replace( '"', '', $term );
+ $quote = '"';
+ }
+
+ if ( $searchon !== '' ) {
+ $searchon .= ' ';
+ }
+ if ( $this->strictMatching && ( $modifier == '' ) ) {
+ // If we leave this out, boolean op defaults to OR which is rarely helpful.
+ $modifier = '+';
+ }
+
+ // Some languages such as Serbian store the input form in the search index,
+ // so we may need to search for matches in multiple writing system variants.
+ $convertedVariants = $wgContLang->autoConvertToAllVariants( $term );
+ if ( is_array( $convertedVariants ) ) {
+ $variants = array_unique( array_values( $convertedVariants ) );
+ } else {
+ $variants = [ $term ];
+ }
+
+ // The low-level search index does some processing on input to work
+ // around problems with minimum lengths and encoding in MySQL's
+ // fulltext engine.
+ // For Chinese this also inserts spaces between adjacent Han characters.
+ $strippedVariants = array_map(
+ [ $wgContLang, 'normalizeForSearch' ],
+ $variants );
+
+ // Some languages such as Chinese force all variants to a canonical
+ // form when stripping to the low-level search index, so to be sure
+ // let's check our variants list for unique items after stripping.
+ $strippedVariants = array_unique( $strippedVariants );
+
+ $searchon .= $modifier;
+ if ( count( $strippedVariants ) > 1 ) {
+ $searchon .= '(';
+ }
+ foreach ( $strippedVariants as $stripped ) {
+ $stripped = $this->normalizeText( $stripped );
+ if ( $nonQuoted && strpos( $stripped, ' ' ) !== false ) {
+ // Hack for Chinese: we need to toss in quotes for
+ // multiple-character phrases since normalizeForSearch()
+ // added spaces between them to make word breaks.
+ $stripped = '"' . trim( $stripped ) . '"';
+ }
+ $searchon .= "$quote$stripped$quote$wildcard ";
+ }
+ if ( count( $strippedVariants ) > 1 ) {
+ $searchon .= ')';
+ }
+
+ // Match individual terms or quoted phrase in result highlighting...
+ // Note that variants will be introduced in a later stage for highlighting!
+ $regexp = $this->regexTerm( $term, $wildcard );
+ $this->searchTerms[] = $regexp;
+ }
+ wfDebug( __METHOD__ . ": Would search with '$searchon'\n" );
+ wfDebug( __METHOD__ . ': Match with /' . implode( '|', $this->searchTerms ) . "/\n" );
+ } else {
+ wfDebug( __METHOD__ . ": Can't understand search query '{$filteredText}'\n" );
+ }
+
+ $searchon = $this->db->addQuotes( $searchon );
+ $field = $this->getIndexField( $fulltext );
+ return " MATCH($field) AGAINST($searchon IN BOOLEAN MODE) ";
+ }
+
+ function regexTerm( $string, $wildcard ) {
+ global $wgContLang;
+
+ $regex = preg_quote( $string, '/' );
+ if ( $wgContLang->hasWordBreaks() ) {
+ if ( $wildcard ) {
+ // Don't cut off the final bit!
+ $regex = "\b$regex";
+ } else {
+ $regex = "\b$regex\b";
+ }
+ } else {
+ // For Chinese, words may legitimately abut other words in the text literal.
+ // Don't add \b boundary checks... note this could cause false positives
+ // for latin chars.
+ }
+ return $regex;
+ }
+
+ public static function legalSearchChars( $type = self::CHARS_ALL ) {
+ $searchChars = parent::legalSearchChars( $type );
+ if ( $type === self::CHARS_ALL ) {
+ // " for phrase, * for wildcard
+ $searchChars = "\"*" . $searchChars;
+ }
+ return $searchChars;
+ }
+
+ /**
+ * Perform a full text search query and return a result set.
+ *
+ * @param string $term Raw search term
+ * @return SqlSearchResultSet
+ */
+ function searchText( $term ) {
+ return $this->searchInternal( $term, true );
+ }
+
+ /**
+ * Perform a title-only search query and return a result set.
+ *
+ * @param string $term Raw search term
+ * @return SqlSearchResultSet
+ */
+ function searchTitle( $term ) {
+ return $this->searchInternal( $term, false );
+ }
+
+ protected function searchInternal( $term, $fulltext ) {
+ // This seems out of place, why is this called with empty term?
+ if ( trim( $term ) === '' ) {
+ return null;
+ }
+
+ $filteredTerm = $this->filter( $term );
+ $query = $this->getQuery( $filteredTerm, $fulltext );
+ $resultSet = $this->db->select(
+ $query['tables'], $query['fields'], $query['conds'],
+ __METHOD__, $query['options'], $query['joins']
+ );
+
+ $total = null;
+ $query = $this->getCountQuery( $filteredTerm, $fulltext );
+ $totalResult = $this->db->select(
+ $query['tables'], $query['fields'], $query['conds'],
+ __METHOD__, $query['options'], $query['joins']
+ );
+
+ $row = $totalResult->fetchObject();
+ if ( $row ) {
+ $total = intval( $row->c );
+ }
+ $totalResult->free();
+
+ return new SqlSearchResultSet( $resultSet, $this->searchTerms, $total );
+ }
+
+ public function supports( $feature ) {
+ switch ( $feature ) {
+ case 'title-suffix-filter':
+ return true;
+ default:
+ return parent::supports( $feature );
+ }
+ }
+
+ /**
+ * Add special conditions
+ * @param array &$query
+ * @since 1.18
+ */
+ protected function queryFeatures( &$query ) {
+ foreach ( $this->features as $feature => $value ) {
+ if ( $feature === 'title-suffix-filter' && $value ) {
+ $query['conds'][] = 'page_title' . $this->db->buildLike( $this->db->anyString(), $value );
+ }
+ }
+ }
+
+ /**
+ * Add namespace conditions
+ * @param array &$query
+ * @since 1.18 (changed)
+ */
+ function queryNamespaces( &$query ) {
+ if ( is_array( $this->namespaces ) ) {
+ if ( count( $this->namespaces ) === 0 ) {
+ $this->namespaces[] = '0';
+ }
+ $query['conds']['page_namespace'] = $this->namespaces;
+ }
+ }
+
+ /**
+ * Add limit options
+ * @param array &$query
+ * @since 1.18
+ */
+ protected function limitResult( &$query ) {
+ $query['options']['LIMIT'] = $this->limit;
+ $query['options']['OFFSET'] = $this->offset;
+ }
+
+ /**
+ * Construct the SQL query to do the search.
+ * The guts shoulds be constructed in queryMain()
+ * @param string $filteredTerm
+ * @param bool $fulltext
+ * @return array
+ * @since 1.18 (changed)
+ */
+ function getQuery( $filteredTerm, $fulltext ) {
+ $query = [
+ 'tables' => [],
+ 'fields' => [],
+ 'conds' => [],
+ 'options' => [],
+ 'joins' => [],
+ ];
+
+ $this->queryMain( $query, $filteredTerm, $fulltext );
+ $this->queryFeatures( $query );
+ $this->queryNamespaces( $query );
+ $this->limitResult( $query );
+
+ return $query;
+ }
+
+ /**
+ * Picks which field to index on, depending on what type of query.
+ * @param bool $fulltext
+ * @return string
+ */
+ function getIndexField( $fulltext ) {
+ return $fulltext ? 'si_text' : 'si_title';
+ }
+
+ /**
+ * Get the base part of the search query.
+ *
+ * @param array &$query Search query array
+ * @param string $filteredTerm
+ * @param bool $fulltext
+ * @since 1.18 (changed)
+ */
+ function queryMain( &$query, $filteredTerm, $fulltext ) {
+ $match = $this->parseQuery( $filteredTerm, $fulltext );
+ $query['tables'][] = 'page';
+ $query['tables'][] = 'searchindex';
+ $query['fields'][] = 'page_id';
+ $query['fields'][] = 'page_namespace';
+ $query['fields'][] = 'page_title';
+ $query['conds'][] = 'page_id=si_page';
+ $query['conds'][] = $match;
+ }
+
+ /**
+ * @since 1.18 (changed)
+ * @param string $filteredTerm
+ * @param bool $fulltext
+ * @return array
+ */
+ function getCountQuery( $filteredTerm, $fulltext ) {
+ $match = $this->parseQuery( $filteredTerm, $fulltext );
+
+ $query = [
+ 'tables' => [ 'page', 'searchindex' ],
+ 'fields' => [ 'COUNT(*) as c' ],
+ 'conds' => [ 'page_id=si_page', $match ],
+ 'options' => [],
+ 'joins' => [],
+ ];
+
+ $this->queryFeatures( $query );
+ $this->queryNamespaces( $query );
+
+ return $query;
+ }
+
+ /**
+ * Create or update the search index record for the given page.
+ * Title and text should be pre-processed.
+ *
+ * @param int $id
+ * @param string $title
+ * @param string $text
+ */
+ function update( $id, $title, $text ) {
+ $dbw = wfGetDB( DB_MASTER );
+ $dbw->replace( 'searchindex',
+ [ 'si_page' ],
+ [
+ 'si_page' => $id,
+ 'si_title' => $this->normalizeText( $title ),
+ 'si_text' => $this->normalizeText( $text )
+ ], __METHOD__ );
+ }
+
+ /**
+ * Update a search index record's title only.
+ * Title should be pre-processed.
+ *
+ * @param int $id
+ * @param string $title
+ */
+ function updateTitle( $id, $title ) {
+ $dbw = wfGetDB( DB_MASTER );
+
+ $dbw->update( 'searchindex',
+ [ 'si_title' => $this->normalizeText( $title ) ],
+ [ 'si_page' => $id ],
+ __METHOD__,
+ [ $dbw->lowPriorityOption() ] );
+ }
+
+ /**
+ * Delete an indexed page
+ * Title should be pre-processed.
+ *
+ * @param int $id Page id that was deleted
+ * @param string $title Title of page that was deleted
+ */
+ function delete( $id, $title ) {
+ $dbw = wfGetDB( DB_MASTER );
+
+ $dbw->delete( 'searchindex', [ 'si_page' => $id ], __METHOD__ );
+ }
+
+ /**
+ * Converts some characters for MySQL's indexing to grok it correctly,
+ * and pads short words to overcome limitations.
+ * @param string $string
+ * @return mixed|string
+ */
+ function normalizeText( $string ) {
+ global $wgContLang;
+
+ $out = parent::normalizeText( $string );
+
+ // MySQL fulltext index doesn't grok utf-8, so we
+ // need to fold cases and convert to hex
+ $out = preg_replace_callback(
+ "/([\\xc0-\\xff][\\x80-\\xbf]*)/",
+ [ $this, 'stripForSearchCallback' ],
+ $wgContLang->lc( $out ) );
+
+ // And to add insult to injury, the default indexing
+ // ignores short words... Pad them so we can pass them
+ // through without reconfiguring the server...
+ $minLength = $this->minSearchLength();
+ if ( $minLength > 1 ) {
+ $n = $minLength - 1;
+ $out = preg_replace(
+ "/\b(\w{1,$n})\b/",
+ "$1u800",
+ $out );
+ }
+
+ // Periods within things like hostnames and IP addresses
+ // are also important -- we want a search for "example.com"
+ // or "192.168.1.1" to work sanely.
+ // MySQL's search seems to ignore them, so you'd match on
+ // "example.wikipedia.com" and "192.168.83.1" as well.
+ $out = preg_replace(
+ "/(\w)\.(\w|\*)/u",
+ "$1u82e$2",
+ $out );
+
+ return $out;
+ }
+
+ /**
+ * Armor a case-folded UTF-8 string to get through MySQL's
+ * fulltext search without being mucked up by funny charset
+ * settings or anything else of the sort.
+ * @param array $matches
+ * @return string
+ */
+ protected function stripForSearchCallback( $matches ) {
+ return 'u8' . bin2hex( $matches[1] );
+ }
+
+ /**
+ * Check MySQL server's ft_min_word_len setting so we know
+ * if we need to pad short words...
+ *
+ * @return int
+ */
+ protected function minSearchLength() {
+ if ( is_null( self::$mMinSearchLength ) ) {
+ $sql = "SHOW GLOBAL VARIABLES LIKE 'ft\\_min\\_word\\_len'";
+
+ $dbr = wfGetDB( DB_REPLICA );
+ $result = $dbr->query( $sql, __METHOD__ );
+ $row = $result->fetchObject();
+ $result->free();
+
+ if ( $row && $row->Variable_name == 'ft_min_word_len' ) {
+ self::$mMinSearchLength = intval( $row->Value );
+ } else {
+ self::$mMinSearchLength = 0;
+ }
+ }
+ return self::$mMinSearchLength;
+ }
+}
diff --git a/www/wiki/includes/search/SearchNearMatchResultSet.php b/www/wiki/includes/search/SearchNearMatchResultSet.php
new file mode 100644
index 00000000..31417974
--- /dev/null
+++ b/www/wiki/includes/search/SearchNearMatchResultSet.php
@@ -0,0 +1,30 @@
+<?php
+/**
+ * A SearchResultSet wrapper for SearchNearMatcher
+ */
+class SearchNearMatchResultSet extends SearchResultSet {
+ private $fetched = false;
+
+ /**
+ * @param Title|null $match Title if matched, else null
+ */
+ public function __construct( $match ) {
+ $this->result = $match;
+ }
+
+ public function numRows() {
+ return $this->result ? 1 : 0;
+ }
+
+ public function next() {
+ if ( $this->fetched || !$this->result ) {
+ return false;
+ }
+ $this->fetched = true;
+ return SearchResult::newFromTitle( $this->result, $this );
+ }
+
+ public function rewind() {
+ $this->fetched = false;
+ }
+}
diff --git a/www/wiki/includes/search/SearchNearMatcher.php b/www/wiki/includes/search/SearchNearMatcher.php
new file mode 100644
index 00000000..27046f31
--- /dev/null
+++ b/www/wiki/includes/search/SearchNearMatcher.php
@@ -0,0 +1,167 @@
+<?php
+
+/**
+ * Implementation of near match title search.
+ * TODO: split into service/implementation.
+ */
+class SearchNearMatcher {
+ /**
+ * @var Config
+ */
+ protected $config;
+
+ /**
+ * Current language
+ * @var Language
+ */
+ private $language;
+
+ public function __construct( Config $config, Language $lang ) {
+ $this->config = $config;
+ $this->language = $lang;
+ }
+
+ /**
+ * If an exact title match can be found, or a very slightly close match,
+ * return the title. If no match, returns NULL.
+ *
+ * @param string $searchterm
+ * @return Title
+ */
+ public function getNearMatch( $searchterm ) {
+ $title = $this->getNearMatchInternal( $searchterm );
+
+ Hooks::run( 'SearchGetNearMatchComplete', [ $searchterm, &$title ] );
+ return $title;
+ }
+
+ /**
+ * Do a near match (see SearchEngine::getNearMatch) and wrap it into a
+ * SearchResultSet.
+ *
+ * @param string $searchterm
+ * @return SearchResultSet
+ */
+ public function getNearMatchResultSet( $searchterm ) {
+ return new SearchNearMatchResultSet( $this->getNearMatch( $searchterm ) );
+ }
+
+ /**
+ * Really find the title match.
+ * @param string $searchterm
+ * @return null|Title
+ */
+ protected function getNearMatchInternal( $searchterm ) {
+ $lang = $this->language;
+
+ $allSearchTerms = [ $searchterm ];
+
+ if ( $lang->hasVariants() ) {
+ $allSearchTerms = array_unique( array_merge(
+ $allSearchTerms,
+ $lang->autoConvertToAllVariants( $searchterm )
+ ) );
+ }
+
+ $titleResult = null;
+ if ( !Hooks::run( 'SearchGetNearMatchBefore', [ $allSearchTerms, &$titleResult ] ) ) {
+ return $titleResult;
+ }
+
+ foreach ( $allSearchTerms as $term ) {
+ # Exact match? No need to look further.
+ $title = Title::newFromText( $term );
+ if ( is_null( $title ) ) {
+ return null;
+ }
+
+ # Try files if searching in the Media: namespace
+ if ( $title->getNamespace() == NS_MEDIA ) {
+ $title = Title::makeTitle( NS_FILE, $title->getText() );
+ }
+
+ if ( $title->isSpecialPage() || $title->isExternal() || $title->exists() ) {
+ return $title;
+ }
+
+ # See if it still otherwise has content is some sane sense
+ $page = WikiPage::factory( $title );
+ if ( $page->hasViewableContent() ) {
+ return $title;
+ }
+
+ if ( !Hooks::run( 'SearchAfterNoDirectMatch', [ $term, &$title ] ) ) {
+ return $title;
+ }
+
+ # Now try all lower case (i.e. first letter capitalized)
+ $title = Title::newFromText( $lang->lc( $term ) );
+ if ( $title && $title->exists() ) {
+ return $title;
+ }
+
+ # Now try capitalized string
+ $title = Title::newFromText( $lang->ucwords( $term ) );
+ if ( $title && $title->exists() ) {
+ return $title;
+ }
+
+ # Now try all upper case
+ $title = Title::newFromText( $lang->uc( $term ) );
+ if ( $title && $title->exists() ) {
+ return $title;
+ }
+
+ # Now try Word-Caps-Breaking-At-Word-Breaks, for hyphenated names etc
+ $title = Title::newFromText( $lang->ucwordbreaks( $term ) );
+ if ( $title && $title->exists() ) {
+ return $title;
+ }
+
+ // Give hooks a chance at better match variants
+ $title = null;
+ if ( !Hooks::run( 'SearchGetNearMatch', [ $term, &$title ] ) ) {
+ return $title;
+ }
+ }
+
+ $title = Title::newFromText( $searchterm );
+
+ # Entering an IP address goes to the contributions page
+ if ( $this->config->get( 'EnableSearchContributorsByIP' ) ) {
+ if ( ( $title->getNamespace() == NS_USER && User::isIP( $title->getText() ) )
+ || User::isIP( trim( $searchterm ) ) ) {
+ return SpecialPage::getTitleFor( 'Contributions', $title->getDBkey() );
+ }
+ }
+
+ # Entering a user goes to the user page whether it's there or not
+ if ( $title->getNamespace() == NS_USER ) {
+ return $title;
+ }
+
+ # Go to images that exist even if there's no local page.
+ # There may have been a funny upload, or it may be on a shared
+ # file repository such as Wikimedia Commons.
+ if ( $title->getNamespace() == NS_FILE ) {
+ $image = wfFindFile( $title );
+ if ( $image ) {
+ return $title;
+ }
+ }
+
+ # MediaWiki namespace? Page may be "implied" if not customized.
+ # Just return it, with caps forced as the message system likes it.
+ if ( $title->getNamespace() == NS_MEDIAWIKI ) {
+ return Title::makeTitle( NS_MEDIAWIKI, $lang->ucfirst( $title->getText() ) );
+ }
+
+ # Quoted term? Try without the quotes...
+ $matches = [];
+ if ( preg_match( '/^"([^"]+)"$/', $searchterm, $matches ) ) {
+ return self::getNearMatch( $matches[1] );
+ }
+
+ return null;
+ }
+}
diff --git a/www/wiki/includes/search/SearchOracle.php b/www/wiki/includes/search/SearchOracle.php
new file mode 100644
index 00000000..8bcd78fa
--- /dev/null
+++ b/www/wiki/includes/search/SearchOracle.php
@@ -0,0 +1,276 @@
+<?php
+/**
+ * Oracle search engine
+ *
+ * Copyright © 2004 Brion Vibber <brion@pobox.com>
+ * https://www.mediawiki.org/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Search
+ */
+
+/**
+ * Search engine hook base class for Oracle (ConText).
+ * @ingroup Search
+ */
+class SearchOracle extends SearchDatabase {
+ private $reservedWords = [
+ 'ABOUT' => 1,
+ 'ACCUM' => 1,
+ 'AND' => 1,
+ 'BT' => 1,
+ 'BTG' => 1,
+ 'BTI' => 1,
+ 'BTP' => 1,
+ 'FUZZY' => 1,
+ 'HASPATH' => 1,
+ 'INPATH' => 1,
+ 'MINUS' => 1,
+ 'NEAR' => 1,
+ 'NOT' => 1,
+ 'NT' => 1,
+ 'NTG' => 1,
+ 'NTI' => 1,
+ 'NTP' => 1,
+ 'OR' => 1,
+ 'PT' => 1,
+ 'RT' => 1,
+ 'SQE' => 1,
+ 'SYN' => 1,
+ 'TR' => 1,
+ 'TRSYN' => 1,
+ 'TT' => 1,
+ 'WITHIN' => 1,
+ ];
+
+ /**
+ * Perform a full text search query and return a result set.
+ *
+ * @param string $term Raw search term
+ * @return SqlSearchResultSet
+ */
+ function searchText( $term ) {
+ if ( $term == '' ) {
+ return new SqlSearchResultSet( false, '' );
+ }
+
+ $resultSet = $this->db->query( $this->getQuery( $this->filter( $term ), true ) );
+ return new SqlSearchResultSet( $resultSet, $this->searchTerms );
+ }
+
+ /**
+ * Perform a title-only search query and return a result set.
+ *
+ * @param string $term Raw search term
+ * @return SqlSearchResultSet
+ */
+ function searchTitle( $term ) {
+ if ( $term == '' ) {
+ return new SqlSearchResultSet( false, '' );
+ }
+
+ $resultSet = $this->db->query( $this->getQuery( $this->filter( $term ), false ) );
+ return new SqlSearchResultSet( $resultSet, $this->searchTerms );
+ }
+
+ /**
+ * Return a partial WHERE clause to limit the search to the given namespaces
+ * @return string
+ */
+ function queryNamespaces() {
+ if ( is_null( $this->namespaces ) ) {
+ return '';
+ }
+ if ( !count( $this->namespaces ) ) {
+ $namespaces = '0';
+ } else {
+ $namespaces = $this->db->makeList( $this->namespaces );
+ }
+ return 'AND page_namespace IN (' . $namespaces . ')';
+ }
+
+ /**
+ * Return a LIMIT clause to limit results on the query.
+ *
+ * @param string $sql
+ *
+ * @return string
+ */
+ function queryLimit( $sql ) {
+ return $this->db->limitResult( $sql, $this->limit, $this->offset );
+ }
+
+ /**
+ * Does not do anything for generic search engine
+ * subclasses may define this though
+ *
+ * @param string $filteredTerm
+ * @param bool $fulltext
+ * @return string
+ */
+ function queryRanking( $filteredTerm, $fulltext ) {
+ return ' ORDER BY score(1)';
+ }
+
+ /**
+ * Construct the full SQL query to do the search.
+ * The guts shoulds be constructed in queryMain()
+ * @param string $filteredTerm
+ * @param bool $fulltext
+ * @return string
+ */
+ function getQuery( $filteredTerm, $fulltext ) {
+ return $this->queryLimit( $this->queryMain( $filteredTerm, $fulltext ) . ' ' .
+ $this->queryNamespaces() . ' ' .
+ $this->queryRanking( $filteredTerm, $fulltext ) . ' ' );
+ }
+
+ /**
+ * Picks which field to index on, depending on what type of query.
+ * @param bool $fulltext
+ * @return string
+ */
+ function getIndexField( $fulltext ) {
+ return $fulltext ? 'si_text' : 'si_title';
+ }
+
+ /**
+ * Get the base part of the search query.
+ *
+ * @param string $filteredTerm
+ * @param bool $fulltext
+ * @return string
+ */
+ function queryMain( $filteredTerm, $fulltext ) {
+ $match = $this->parseQuery( $filteredTerm, $fulltext );
+ $page = $this->db->tableName( 'page' );
+ $searchindex = $this->db->tableName( 'searchindex' );
+ return 'SELECT page_id, page_namespace, page_title ' .
+ "FROM $page,$searchindex " .
+ 'WHERE page_id=si_page AND ' . $match;
+ }
+
+ /**
+ * Parse a user input search string, and return an SQL fragment to be used
+ * as part of a WHERE clause
+ * @param string $filteredText
+ * @param bool $fulltext
+ * @return string
+ */
+ function parseQuery( $filteredText, $fulltext ) {
+ global $wgContLang;
+ $lc = $this->legalSearchChars( self::CHARS_NO_SYNTAX );
+ $this->searchTerms = [];
+
+ # @todo FIXME: This doesn't handle parenthetical expressions.
+ $m = [];
+ $searchon = '';
+ if ( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
+ $filteredText, $m, PREG_SET_ORDER ) ) {
+ foreach ( $m as $terms ) {
+ // Search terms in all variant forms, only
+ // apply on wiki with LanguageConverter
+ $temp_terms = $wgContLang->autoConvertToAllVariants( $terms[2] );
+ if ( is_array( $temp_terms ) ) {
+ $temp_terms = array_unique( array_values( $temp_terms ) );
+ foreach ( $temp_terms as $t ) {
+ $searchon .= ( $terms[1] == '-' ? ' ~' : ' & ' ) . $this->escapeTerm( $t );
+ }
+ } else {
+ $searchon .= ( $terms[1] == '-' ? ' ~' : ' & ' ) . $this->escapeTerm( $terms[2] );
+ }
+ if ( !empty( $terms[3] ) ) {
+ $regexp = preg_quote( $terms[3], '/' );
+ if ( $terms[4] ) {
+ $regexp .= "[0-9A-Za-z_]+";
+ }
+ } else {
+ $regexp = preg_quote( str_replace( '"', '', $terms[2] ), '/' );
+ }
+ $this->searchTerms[] = $regexp;
+ }
+ }
+
+ $searchon = $this->db->addQuotes( ltrim( $searchon, ' &' ) );
+ $field = $this->getIndexField( $fulltext );
+ return " CONTAINS($field, $searchon, 1) > 0 ";
+ }
+
+ private function escapeTerm( $t ) {
+ global $wgContLang;
+ $t = $wgContLang->normalizeForSearch( $t );
+ $t = isset( $this->reservedWords[strtoupper( $t )] ) ? '{' . $t . '}' : $t;
+ $t = preg_replace( '/^"(.*)"$/', '($1)', $t );
+ $t = preg_replace( '/([-&|])/', '\\\\$1', $t );
+ return $t;
+ }
+
+ /**
+ * Create or update the search index record for the given page.
+ * Title and text should be pre-processed.
+ *
+ * @param int $id
+ * @param string $title
+ * @param string $text
+ */
+ function update( $id, $title, $text ) {
+ $dbw = wfGetDB( DB_MASTER );
+ $dbw->replace( 'searchindex',
+ [ 'si_page' ],
+ [
+ 'si_page' => $id,
+ 'si_title' => $title,
+ 'si_text' => $text
+ ], 'SearchOracle::update' );
+
+ // Sync the index
+ // We need to specify the DB name (i.e. user/schema) here so that
+ // it can work from the installer, where
+ // ALTER SESSION SET CURRENT_SCHEMA = ...
+ // was used.
+ $dbw->query( "CALL ctx_ddl.sync_index(" .
+ $dbw->addQuotes( $dbw->getDBname() . '.' . $dbw->tableName( 'si_text_idx', 'raw' ) ) . ")" );
+ $dbw->query( "CALL ctx_ddl.sync_index(" .
+ $dbw->addQuotes( $dbw->getDBname() . '.' . $dbw->tableName( 'si_title_idx', 'raw' ) ) . ")" );
+ }
+
+ /**
+ * Update a search index record's title only.
+ * Title should be pre-processed.
+ *
+ * @param int $id
+ * @param string $title
+ */
+ function updateTitle( $id, $title ) {
+ $dbw = wfGetDB( DB_MASTER );
+
+ $dbw->update( 'searchindex',
+ [ 'si_title' => $title ],
+ [ 'si_page' => $id ],
+ 'SearchOracle::updateTitle',
+ [] );
+ }
+
+ public static function legalSearchChars( $type = self::CHARS_ALL ) {
+ $searchChars = parent::legalSearchChars( $type );
+ if ( $type === self::CHARS_ALL ) {
+ $searchChars = "\"" . $searchChars;
+ }
+ return $searchChars;
+ }
+}
diff --git a/www/wiki/includes/search/SearchPostgres.php b/www/wiki/includes/search/SearchPostgres.php
new file mode 100644
index 00000000..5a50b176
--- /dev/null
+++ b/www/wiki/includes/search/SearchPostgres.php
@@ -0,0 +1,192 @@
+<?php
+/**
+ * PostgreSQL search engine
+ *
+ * Copyright © 2006-2007 Greg Sabino Mullane <greg@turnstep.com>
+ * https://www.mediawiki.org/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Search
+ */
+
+/**
+ * Search engine hook base class for Postgres
+ * @ingroup Search
+ */
+class SearchPostgres extends SearchDatabase {
+ /**
+ * Perform a full text search query via tsearch2 and return a result set.
+ * Currently searches a page's current title (page.page_title) and
+ * latest revision article text (pagecontent.old_text)
+ *
+ * @param string $term Raw search term
+ * @return SqlSearchResultSet
+ */
+ function searchTitle( $term ) {
+ $q = $this->searchQuery( $term, 'titlevector', 'page_title' );
+ $olderror = error_reporting( E_ERROR );
+ $resultSet = $this->db->query( $q, 'SearchPostgres', true );
+ error_reporting( $olderror );
+ return new SqlSearchResultSet( $resultSet, $this->searchTerms );
+ }
+
+ function searchText( $term ) {
+ $q = $this->searchQuery( $term, 'textvector', 'old_text' );
+ $olderror = error_reporting( E_ERROR );
+ $resultSet = $this->db->query( $q, 'SearchPostgres', true );
+ error_reporting( $olderror );
+ return new SqlSearchResultSet( $resultSet, $this->searchTerms );
+ }
+
+ /**
+ * Transform the user's search string into a better form for tsearch2
+ * Returns an SQL fragment consisting of quoted text to search for.
+ *
+ * @param string $term
+ *
+ * @return string
+ */
+ function parseQuery( $term ) {
+ wfDebug( "parseQuery received: $term \n" );
+
+ # # No backslashes allowed
+ $term = preg_replace( '/\\\/', '', $term );
+
+ # # Collapse parens into nearby words:
+ $term = preg_replace( '/\s*\(\s*/', ' (', $term );
+ $term = preg_replace( '/\s*\)\s*/', ') ', $term );
+
+ # # Treat colons as word separators:
+ $term = preg_replace( '/:/', ' ', $term );
+
+ $searchstring = '';
+ $m = [];
+ if ( preg_match_all( '/([-!]?)(\S+)\s*/', $term, $m, PREG_SET_ORDER ) ) {
+ foreach ( $m as $terms ) {
+ if ( strlen( $terms[1] ) ) {
+ $searchstring .= ' & !';
+ }
+ if ( strtolower( $terms[2] ) === 'and' ) {
+ $searchstring .= ' & ';
+ } elseif ( strtolower( $terms[2] ) === 'or' || $terms[2] === '|' ) {
+ $searchstring .= ' | ';
+ } elseif ( strtolower( $terms[2] ) === 'not' ) {
+ $searchstring .= ' & !';
+ } else {
+ $searchstring .= " & $terms[2]";
+ }
+ }
+ }
+
+ # # Strip out leading junk
+ $searchstring = preg_replace( '/^[\s\&\|]+/', '', $searchstring );
+
+ # # Remove any doubled-up operators
+ $searchstring = preg_replace( '/([\!\&\|]) +(?:[\&\|] +)+/', "$1 ", $searchstring );
+
+ # # Remove any non-spaced operators (e.g. "Zounds!")
+ $searchstring = preg_replace( '/([^ ])[\!\&\|]/', "$1", $searchstring );
+
+ # # Remove any trailing whitespace or operators
+ $searchstring = preg_replace( '/[\s\!\&\|]+$/', '', $searchstring );
+
+ # # Remove unnecessary quotes around everything
+ $searchstring = preg_replace( '/^[\'"](.*)[\'"]$/', "$1", $searchstring );
+
+ # # Quote the whole thing
+ $searchstring = $this->db->addQuotes( $searchstring );
+
+ wfDebug( "parseQuery returned: $searchstring \n" );
+
+ return $searchstring;
+ }
+
+ /**
+ * Construct the full SQL query to do the search.
+ * @param string $term
+ * @param string $fulltext
+ * @param string $colname
+ * @return string
+ */
+ function searchQuery( $term, $fulltext, $colname ) {
+ # Get the SQL fragment for the given term
+ $searchstring = $this->parseQuery( $term );
+
+ # # We need a separate query here so gin does not complain about empty searches
+ $sql = "SELECT to_tsquery($searchstring)";
+ $res = $this->db->query( $sql );
+ if ( !$res ) {
+ # # TODO: Better output (example to catch: one 'two)
+ die( "Sorry, that was not a valid search string. Please go back and try again" );
+ }
+ $top = $res->fetchRow()[0];
+
+ $this->searchTerms = [];
+ if ( $top === "" ) { # # e.g. if only stopwords are used XXX return something better
+ $query = "SELECT page_id, page_namespace, page_title, 0 AS score " .
+ "FROM page p, revision r, pagecontent c WHERE p.page_latest = r.rev_id " .
+ "AND r.rev_text_id = c.old_id AND 1=0";
+ } else {
+ $m = [];
+ if ( preg_match_all( "/'([^']+)'/", $top, $m, PREG_SET_ORDER ) ) {
+ foreach ( $m as $terms ) {
+ $this->searchTerms[$terms[1]] = $terms[1];
+ }
+ }
+
+ $query = "SELECT page_id, page_namespace, page_title, " .
+ "ts_rank($fulltext, to_tsquery($searchstring), 5) AS score " .
+ "FROM page p, revision r, pagecontent c WHERE p.page_latest = r.rev_id " .
+ "AND r.rev_text_id = c.old_id AND $fulltext @@ to_tsquery($searchstring)";
+ }
+
+ # # Namespaces - defaults to 0
+ if ( !is_null( $this->namespaces ) ) { // null -> search all
+ if ( count( $this->namespaces ) < 1 ) {
+ $query .= ' AND page_namespace = 0';
+ } else {
+ $namespaces = $this->db->makeList( $this->namespaces );
+ $query .= " AND page_namespace IN ($namespaces)";
+ }
+ }
+
+ $query .= " ORDER BY score DESC, page_id DESC";
+
+ $query .= $this->db->limitResult( '', $this->limit, $this->offset );
+
+ wfDebug( "searchQuery returned: $query \n" );
+
+ return $query;
+ }
+
+ # # Most of the work of these two functions are done automatically via triggers
+
+ function update( $pageid, $title, $text ) {
+ # # We don't want to index older revisions
+ $sql = "UPDATE pagecontent SET textvector = NULL WHERE textvector IS NOT NULL and old_id IN " .
+ "(SELECT DISTINCT rev_text_id FROM revision WHERE rev_page = " . intval( $pageid ) .
+ " ORDER BY rev_text_id DESC OFFSET 1)";
+ $this->db->query( $sql );
+ return true;
+ }
+
+ function updateTitle( $id, $title ) {
+ return true;
+ }
+
+}
diff --git a/www/wiki/includes/search/SearchResult.php b/www/wiki/includes/search/SearchResult.php
new file mode 100644
index 00000000..dc294c32
--- /dev/null
+++ b/www/wiki/includes/search/SearchResult.php
@@ -0,0 +1,283 @@
+<?php
+/**
+ * Search engine result
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Search
+ */
+
+use MediaWiki\MediaWikiServices;
+
+/**
+ * @todo FIXME: This class is horribly factored. It would probably be better to
+ * have a useful base class to which you pass some standard information, then
+ * let the fancy self-highlighters extend that.
+ * @ingroup Search
+ */
+class SearchResult {
+
+ /**
+ * @var Revision
+ */
+ protected $mRevision = null;
+
+ /**
+ * @var File
+ */
+ protected $mImage = null;
+
+ /**
+ * @var Title
+ */
+ protected $mTitle;
+
+ /**
+ * @var string
+ */
+ protected $mText;
+
+ /**
+ * @var SearchEngine
+ */
+ protected $searchEngine;
+
+ /**
+ * A set of extension data.
+ * @var array[]
+ */
+ protected $extensionData;
+
+ /**
+ * Return a new SearchResult and initializes it with a title.
+ *
+ * @param Title $title
+ * @param SearchResultSet $parentSet
+ * @return SearchResult
+ */
+ public static function newFromTitle( $title, SearchResultSet $parentSet = null ) {
+ $result = new static();
+ $result->initFromTitle( $title );
+ if ( $parentSet ) {
+ $parentSet->augmentResult( $result );
+ }
+ return $result;
+ }
+
+ /**
+ * Initialize from a Title and if possible initializes a corresponding
+ * Revision and File.
+ *
+ * @param Title $title
+ */
+ protected function initFromTitle( $title ) {
+ $this->mTitle = $title;
+ if ( !is_null( $this->mTitle ) ) {
+ $id = false;
+ Hooks::run( 'SearchResultInitFromTitle', [ $title, &$id ] );
+ $this->mRevision = Revision::newFromTitle(
+ $this->mTitle, $id, Revision::READ_NORMAL );
+ if ( $this->mTitle->getNamespace() === NS_FILE ) {
+ $this->mImage = wfFindFile( $this->mTitle );
+ }
+ }
+ $this->searchEngine = MediaWikiServices::getInstance()->newSearchEngine();
+ }
+
+ /**
+ * Check if this is result points to an invalid title
+ *
+ * @return bool
+ */
+ function isBrokenTitle() {
+ return is_null( $this->mTitle );
+ }
+
+ /**
+ * Check if target page is missing, happens when index is out of date
+ *
+ * @return bool
+ */
+ function isMissingRevision() {
+ return !$this->mRevision && !$this->mImage;
+ }
+
+ /**
+ * @return Title
+ */
+ function getTitle() {
+ return $this->mTitle;
+ }
+
+ /**
+ * Get the file for this page, if one exists
+ * @return File|null
+ */
+ function getFile() {
+ return $this->mImage;
+ }
+
+ /**
+ * Lazy initialization of article text from DB
+ */
+ protected function initText() {
+ if ( !isset( $this->mText ) ) {
+ if ( $this->mRevision != null ) {
+ $this->mText = $this->searchEngine->getTextFromContent(
+ $this->mTitle, $this->mRevision->getContent() );
+ } else { // TODO: can we fetch raw wikitext for commons images?
+ $this->mText = '';
+ }
+ }
+ }
+
+ /**
+ * @param array $terms Terms to highlight
+ * @return string Highlighted text snippet, null (and not '') if not supported
+ */
+ function getTextSnippet( $terms ) {
+ global $wgAdvancedSearchHighlighting;
+ $this->initText();
+
+ // TODO: make highliter take a content object. Make ContentHandler a factory for SearchHighliter.
+ list( $contextlines, $contextchars ) = $this->searchEngine->userHighlightPrefs();
+
+ $h = new SearchHighlighter();
+ if ( count( $terms ) > 0 ) {
+ if ( $wgAdvancedSearchHighlighting ) {
+ return $h->highlightText( $this->mText, $terms, $contextlines, $contextchars );
+ } else {
+ return $h->highlightSimple( $this->mText, $terms, $contextlines, $contextchars );
+ }
+ } else {
+ return $h->highlightNone( $this->mText, $contextlines, $contextchars );
+ }
+ }
+
+ /**
+ * @return string Highlighted title, '' if not supported
+ */
+ function getTitleSnippet() {
+ return '';
+ }
+
+ /**
+ * @return string Highlighted redirect name (redirect to this page), '' if none or not supported
+ */
+ function getRedirectSnippet() {
+ return '';
+ }
+
+ /**
+ * @return Title|null Title object for the redirect to this page, null if none or not supported
+ */
+ function getRedirectTitle() {
+ return null;
+ }
+
+ /**
+ * @return string Highlighted relevant section name, null if none or not supported
+ */
+ function getSectionSnippet() {
+ return '';
+ }
+
+ /**
+ * @return Title|null Title object (pagename+fragment) for the section,
+ * null if none or not supported
+ */
+ function getSectionTitle() {
+ return null;
+ }
+
+ /**
+ * @return string Highlighted relevant category name or '' if none or not supported
+ */
+ public function getCategorySnippet() {
+ return '';
+ }
+
+ /**
+ * @return string Timestamp
+ */
+ function getTimestamp() {
+ if ( $this->mRevision ) {
+ return $this->mRevision->getTimestamp();
+ } elseif ( $this->mImage ) {
+ return $this->mImage->getTimestamp();
+ }
+ return '';
+ }
+
+ /**
+ * @return int Number of words
+ */
+ function getWordCount() {
+ $this->initText();
+ return str_word_count( $this->mText );
+ }
+
+ /**
+ * @return int Size in bytes
+ */
+ function getByteSize() {
+ $this->initText();
+ return strlen( $this->mText );
+ }
+
+ /**
+ * @return string Interwiki prefix of the title (return iw even if title is broken)
+ */
+ function getInterwikiPrefix() {
+ return '';
+ }
+
+ /**
+ * @return string Interwiki namespace of the title (since we likely can't resolve it locally)
+ */
+ function getInterwikiNamespaceText() {
+ return '';
+ }
+
+ /**
+ * Did this match file contents (eg: PDF/DJVU)?
+ * @return bool
+ */
+ function isFileMatch() {
+ return false;
+ }
+
+ /**
+ * Get the extension data as:
+ * augmentor name => data
+ * @return array[]
+ */
+ public function getExtensionData() {
+ return $this->extensionData;
+ }
+
+ /**
+ * Set extension data for this result.
+ * The data is:
+ * augmentor name => data
+ * @param array[] $extensionData
+ */
+ public function setExtensionData( array $extensionData ) {
+ $this->extensionData = $extensionData;
+ }
+
+}
diff --git a/www/wiki/includes/search/SearchResultSet.php b/www/wiki/includes/search/SearchResultSet.php
new file mode 100644
index 00000000..f25c7283
--- /dev/null
+++ b/www/wiki/includes/search/SearchResultSet.php
@@ -0,0 +1,279 @@
+<?php
+/**
+ * Search result sets
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Search
+ */
+
+/**
+ * @ingroup Search
+ */
+class SearchResultSet {
+
+ /**
+ * Types of interwiki results
+ */
+ /**
+ * Results that are displayed only together with existing main wiki results
+ * @var int
+ */
+ const SECONDARY_RESULTS = 0;
+ /**
+ * Results that can displayed even if no existing main wiki results exist
+ * @var int
+ */
+ const INLINE_RESULTS = 1;
+
+ protected $containedSyntax = false;
+
+ /**
+ * Cache of titles.
+ * Lists titles of the result set, in the same order as results.
+ * @var Title[]
+ */
+ private $titles;
+
+ /**
+ * Cache of results - serialization of the result iterator
+ * as an array.
+ * @var SearchResult[]
+ */
+ private $results;
+
+ /**
+ * Set of result's extra data, indexed per result id
+ * and then per data item name.
+ * The structure is:
+ * PAGE_ID => [ augmentor name => data, ... ]
+ * @var array[]
+ */
+ protected $extraData = [];
+
+ public function __construct( $containedSyntax = false ) {
+ $this->containedSyntax = $containedSyntax;
+ }
+
+ /**
+ * Fetch an array of regular expression fragments for matching
+ * the search terms as parsed by this engine in a text extract.
+ * STUB
+ *
+ * @return array
+ */
+ function termMatches() {
+ return [];
+ }
+
+ function numRows() {
+ return 0;
+ }
+
+ /**
+ * Some search modes return a total hit count for the query
+ * in the entire article database. This may include pages
+ * in namespaces that would not be matched on the given
+ * settings.
+ *
+ * Return null if no total hits number is supported.
+ *
+ * @return int
+ */
+ function getTotalHits() {
+ return null;
+ }
+
+ /**
+ * Some search modes will run an alternative query that it thinks gives
+ * a better result than the provided search. Returns true if this has
+ * occured.
+ *
+ * @return bool
+ */
+ function hasRewrittenQuery() {
+ return false;
+ }
+
+ /**
+ * @return string|null The search the query was internally rewritten to,
+ * or null when the result of the original query was returned.
+ */
+ function getQueryAfterRewrite() {
+ return null;
+ }
+
+ /**
+ * @return string|null Same as self::getQueryAfterRewrite(), but in HTML
+ * and with changes highlighted. Null when the query was not rewritten.
+ */
+ function getQueryAfterRewriteSnippet() {
+ return null;
+ }
+
+ /**
+ * Some search modes return a suggested alternate term if there are
+ * no exact hits. Returns true if there is one on this set.
+ *
+ * @return bool
+ */
+ function hasSuggestion() {
+ return false;
+ }
+
+ /**
+ * @return string|null Suggested query, null if none
+ */
+ function getSuggestionQuery() {
+ return null;
+ }
+
+ /**
+ * @return string HTML highlighted suggested query, '' if none
+ */
+ function getSuggestionSnippet() {
+ return '';
+ }
+
+ /**
+ * Return a result set of hits on other (multiple) wikis associated with this one
+ *
+ * @param int $type
+ * @return SearchResultSet[]
+ */
+ function getInterwikiResults( $type = self::SECONDARY_RESULTS ) {
+ return null;
+ }
+
+ /**
+ * Check if there are results on other wikis
+ *
+ * @param int $type
+ * @return bool
+ */
+ function hasInterwikiResults( $type = self::SECONDARY_RESULTS ) {
+ return false;
+ }
+
+ /**
+ * Fetches next search result, or false.
+ * STUB
+ * FIXME: refactor as iterator, so we could use nicer interfaces.
+ * @return SearchResult|false
+ */
+ function next() {
+ return false;
+ }
+
+ /**
+ * Rewind result set back to beginning
+ */
+ function rewind() {
+ }
+
+ /**
+ * Frees the result set, if applicable.
+ */
+ function free() {
+ // ...
+ }
+
+ /**
+ * Did the search contain search syntax? If so, Special:Search won't offer
+ * the user a link to a create a page named by the search string because the
+ * name would contain the search syntax.
+ * @return bool
+ */
+ public function searchContainedSyntax() {
+ return $this->containedSyntax;
+ }
+
+ /**
+ * Extract all the results in the result set as array.
+ * @return SearchResult[]
+ */
+ public function extractResults() {
+ if ( is_null( $this->results ) ) {
+ $this->results = [];
+ if ( $this->numRows() == 0 ) {
+ // Don't bother if we've got empty result
+ return $this->results;
+ }
+ $this->rewind();
+ while ( ( $result = $this->next() ) != false ) {
+ $this->results[] = $result;
+ }
+ $this->rewind();
+ }
+ return $this->results;
+ }
+
+ /**
+ * Extract all the titles in the result set.
+ * @return Title[]
+ */
+ public function extractTitles() {
+ if ( is_null( $this->titles ) ) {
+ if ( $this->numRows() == 0 ) {
+ // Don't bother if we've got empty result
+ $this->titles = [];
+ } else {
+ $this->titles = array_map(
+ function ( SearchResult $result ) {
+ return $result->getTitle();
+ },
+ $this->extractResults() );
+ }
+ }
+ return $this->titles;
+ }
+
+ /**
+ * Sets augmented data for result set.
+ * @param string $name Extra data item name
+ * @param array[] $data Extra data as PAGEID => data
+ */
+ public function setAugmentedData( $name, $data ) {
+ foreach ( $data as $id => $resultData ) {
+ $this->extraData[$id][$name] = $resultData;
+ }
+ }
+
+ /**
+ * Returns extra data for specific result and store it in SearchResult object.
+ * @param SearchResult $result
+ * @return array|null List of data as name => value or null if none present.
+ */
+ public function augmentResult( SearchResult $result ) {
+ $id = $result->getTitle()->getArticleID();
+ if ( !$id || !isset( $this->extraData[$id] ) ) {
+ return null;
+ }
+ $result->setExtensionData( $this->extraData[$id] );
+ return $this->extraData[$id];
+ }
+
+ /**
+ * @return int|null The offset the current page starts at. Typically
+ * this should be null to allow the UI to decide on its own, but in
+ * special cases like interleaved AB tests specifying explicitly is
+ * necessary.
+ */
+ public function getOffset() {
+ return null;
+ }
+}
diff --git a/www/wiki/includes/search/SearchSqlite.php b/www/wiki/includes/search/SearchSqlite.php
new file mode 100644
index 00000000..af29212b
--- /dev/null
+++ b/www/wiki/includes/search/SearchSqlite.php
@@ -0,0 +1,312 @@
+<?php
+/**
+ * SQLite search backend, based upon SearchMysql
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Search
+ */
+
+/**
+ * Search engine hook for SQLite
+ * @ingroup Search
+ */
+class SearchSqlite extends SearchDatabase {
+ /**
+ * Whether fulltext search is supported by current schema
+ * @return bool
+ */
+ function fulltextSearchSupported() {
+ return $this->db->checkForEnabledSearch();
+ }
+
+ /**
+ * Parse the user's query and transform it into an SQL fragment which will
+ * become part of a WHERE clause
+ *
+ * @param string $filteredText
+ * @param bool $fulltext
+ * @return string
+ */
+ function parseQuery( $filteredText, $fulltext ) {
+ global $wgContLang;
+ $lc = $this->legalSearchChars( self::CHARS_NO_SYNTAX ); // Minus syntax chars (" and *)
+ $searchon = '';
+ $this->searchTerms = [];
+
+ $m = [];
+ if ( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
+ $filteredText, $m, PREG_SET_ORDER ) ) {
+ foreach ( $m as $bits ) {
+ Wikimedia\suppressWarnings();
+ list( /* all */, $modifier, $term, $nonQuoted, $wildcard ) = $bits;
+ Wikimedia\restoreWarnings();
+
+ if ( $nonQuoted != '' ) {
+ $term = $nonQuoted;
+ $quote = '';
+ } else {
+ $term = str_replace( '"', '', $term );
+ $quote = '"';
+ }
+
+ if ( $searchon !== '' ) {
+ $searchon .= ' ';
+ }
+
+ // Some languages such as Serbian store the input form in the search index,
+ // so we may need to search for matches in multiple writing system variants.
+ $convertedVariants = $wgContLang->autoConvertToAllVariants( $term );
+ if ( is_array( $convertedVariants ) ) {
+ $variants = array_unique( array_values( $convertedVariants ) );
+ } else {
+ $variants = [ $term ];
+ }
+
+ // The low-level search index does some processing on input to work
+ // around problems with minimum lengths and encoding in MySQL's
+ // fulltext engine.
+ // For Chinese this also inserts spaces between adjacent Han characters.
+ $strippedVariants = array_map(
+ [ $wgContLang, 'normalizeForSearch' ],
+ $variants );
+
+ // Some languages such as Chinese force all variants to a canonical
+ // form when stripping to the low-level search index, so to be sure
+ // let's check our variants list for unique items after stripping.
+ $strippedVariants = array_unique( $strippedVariants );
+
+ $searchon .= $modifier;
+ if ( count( $strippedVariants ) > 1 ) {
+ $searchon .= '(';
+ }
+ foreach ( $strippedVariants as $stripped ) {
+ if ( $nonQuoted && strpos( $stripped, ' ' ) !== false ) {
+ // Hack for Chinese: we need to toss in quotes for
+ // multiple-character phrases since normalizeForSearch()
+ // added spaces between them to make word breaks.
+ $stripped = '"' . trim( $stripped ) . '"';
+ }
+ $searchon .= "$quote$stripped$quote$wildcard ";
+ }
+ if ( count( $strippedVariants ) > 1 ) {
+ $searchon .= ')';
+ }
+
+ // Match individual terms or quoted phrase in result highlighting...
+ // Note that variants will be introduced in a later stage for highlighting!
+ $regexp = $this->regexTerm( $term, $wildcard );
+ $this->searchTerms[] = $regexp;
+ }
+
+ } else {
+ wfDebug( __METHOD__ . ": Can't understand search query '{$filteredText}'\n" );
+ }
+
+ $searchon = $this->db->addQuotes( $searchon );
+ $field = $this->getIndexField( $fulltext );
+ return " $field MATCH $searchon ";
+ }
+
+ function regexTerm( $string, $wildcard ) {
+ global $wgContLang;
+
+ $regex = preg_quote( $string, '/' );
+ if ( $wgContLang->hasWordBreaks() ) {
+ if ( $wildcard ) {
+ // Don't cut off the final bit!
+ $regex = "\b$regex";
+ } else {
+ $regex = "\b$regex\b";
+ }
+ } else {
+ // For Chinese, words may legitimately abut other words in the text literal.
+ // Don't add \b boundary checks... note this could cause false positives
+ // for latin chars.
+ }
+ return $regex;
+ }
+
+ public static function legalSearchChars( $type = self::CHARS_ALL ) {
+ $searchChars = parent::legalSearchChars( $type );
+ if ( $type === self::CHARS_ALL ) {
+ // " for phrase, * for wildcard
+ $searchChars = "\"*" . $searchChars;
+ }
+ return $searchChars;
+ }
+
+ /**
+ * Perform a full text search query and return a result set.
+ *
+ * @param string $term Raw search term
+ * @return SqlSearchResultSet
+ */
+ function searchText( $term ) {
+ return $this->searchInternal( $term, true );
+ }
+
+ /**
+ * Perform a title-only search query and return a result set.
+ *
+ * @param string $term Raw search term
+ * @return SqlSearchResultSet
+ */
+ function searchTitle( $term ) {
+ return $this->searchInternal( $term, false );
+ }
+
+ protected function searchInternal( $term, $fulltext ) {
+ global $wgContLang;
+
+ if ( !$this->fulltextSearchSupported() ) {
+ return null;
+ }
+
+ $filteredTerm = $this->filter( $wgContLang->lc( $term ) );
+ $resultSet = $this->db->query( $this->getQuery( $filteredTerm, $fulltext ) );
+
+ $total = null;
+ $totalResult = $this->db->query( $this->getCountQuery( $filteredTerm, $fulltext ) );
+ $row = $totalResult->fetchObject();
+ if ( $row ) {
+ $total = intval( $row->c );
+ }
+ $totalResult->free();
+
+ return new SqlSearchResultSet( $resultSet, $this->searchTerms, $total );
+ }
+
+ /**
+ * Return a partial WHERE clause to limit the search to the given namespaces
+ * @return string
+ */
+ function queryNamespaces() {
+ if ( is_null( $this->namespaces ) ) {
+ return ''; # search all
+ }
+ if ( !count( $this->namespaces ) ) {
+ $namespaces = '0';
+ } else {
+ $namespaces = $this->db->makeList( $this->namespaces );
+ }
+ return 'AND page_namespace IN (' . $namespaces . ')';
+ }
+
+ /**
+ * Returns a query with limit for number of results set.
+ * @param string $sql
+ * @return string
+ */
+ function limitResult( $sql ) {
+ return $this->db->limitResult( $sql, $this->limit, $this->offset );
+ }
+
+ /**
+ * Construct the full SQL query to do the search.
+ * The guts shoulds be constructed in queryMain()
+ * @param string $filteredTerm
+ * @param bool $fulltext
+ * @return string
+ */
+ function getQuery( $filteredTerm, $fulltext ) {
+ return $this->limitResult(
+ $this->queryMain( $filteredTerm, $fulltext ) . ' ' .
+ $this->queryNamespaces()
+ );
+ }
+
+ /**
+ * Picks which field to index on, depending on what type of query.
+ * @param bool $fulltext
+ * @return string
+ */
+ function getIndexField( $fulltext ) {
+ return $fulltext ? 'si_text' : 'si_title';
+ }
+
+ /**
+ * Get the base part of the search query.
+ *
+ * @param string $filteredTerm
+ * @param bool $fulltext
+ * @return string
+ */
+ function queryMain( $filteredTerm, $fulltext ) {
+ $match = $this->parseQuery( $filteredTerm, $fulltext );
+ $page = $this->db->tableName( 'page' );
+ $searchindex = $this->db->tableName( 'searchindex' );
+ return "SELECT $searchindex.rowid, page_namespace, page_title " .
+ "FROM $page,$searchindex " .
+ "WHERE page_id=$searchindex.rowid AND $match";
+ }
+
+ function getCountQuery( $filteredTerm, $fulltext ) {
+ $match = $this->parseQuery( $filteredTerm, $fulltext );
+ $page = $this->db->tableName( 'page' );
+ $searchindex = $this->db->tableName( 'searchindex' );
+ return "SELECT COUNT(*) AS c " .
+ "FROM $page,$searchindex " .
+ "WHERE page_id=$searchindex.rowid AND $match " .
+ $this->queryNamespaces();
+ }
+
+ /**
+ * Create or update the search index record for the given page.
+ * Title and text should be pre-processed.
+ *
+ * @param int $id
+ * @param string $title
+ * @param string $text
+ */
+ function update( $id, $title, $text ) {
+ if ( !$this->fulltextSearchSupported() ) {
+ return;
+ }
+ // @todo find a method to do it in a single request,
+ // couldn't do it so far due to typelessness of FTS3 tables.
+ $dbw = wfGetDB( DB_MASTER );
+
+ $dbw->delete( 'searchindex', [ 'rowid' => $id ], __METHOD__ );
+
+ $dbw->insert( 'searchindex',
+ [
+ 'rowid' => $id,
+ 'si_title' => $title,
+ 'si_text' => $text
+ ], __METHOD__ );
+ }
+
+ /**
+ * Update a search index record's title only.
+ * Title should be pre-processed.
+ *
+ * @param int $id
+ * @param string $title
+ */
+ function updateTitle( $id, $title ) {
+ if ( !$this->fulltextSearchSupported() ) {
+ return;
+ }
+ $dbw = wfGetDB( DB_MASTER );
+
+ $dbw->update( 'searchindex',
+ [ 'si_title' => $title ],
+ [ 'rowid' => $id ],
+ __METHOD__ );
+ }
+}
diff --git a/www/wiki/includes/search/SearchSuggestion.php b/www/wiki/includes/search/SearchSuggestion.php
new file mode 100644
index 00000000..7f433db4
--- /dev/null
+++ b/www/wiki/includes/search/SearchSuggestion.php
@@ -0,0 +1,185 @@
+<?php
+
+/**
+ * Search suggestion
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+/**
+ * A search suggestion
+ */
+class SearchSuggestion {
+ /**
+ * @var string the suggestion
+ */
+ private $text;
+
+ /**
+ * @var string the suggestion URL
+ */
+ private $url;
+
+ /**
+ * @var Title|null the suggested title
+ */
+ private $suggestedTitle;
+
+ /**
+ * NOTE: even if suggestedTitle is a redirect suggestedTitleID
+ * is the ID of the target page.
+ * @var int|null the suggested title ID
+ */
+ private $suggestedTitleID;
+
+ /**
+ * @var float|null The suggestion score
+ */
+ private $score;
+
+ /**
+ * Construct a new suggestion
+ * @param float $score the suggestion score
+ * @param string|null $text the suggestion text
+ * @param Title|null $suggestedTitle the suggested title
+ * @param int|null $suggestedTitleID the suggested title ID
+ */
+ public function __construct( $score, $text = null, Title $suggestedTitle = null,
+ $suggestedTitleID = null ) {
+ $this->score = $score;
+ $this->text = $text;
+ if ( $suggestedTitle ) {
+ $this->setSuggestedTitle( $suggestedTitle );
+ }
+ $this->suggestedTitleID = $suggestedTitleID;
+ }
+
+ /**
+ * The suggestion text
+ * @return string
+ */
+ public function getText() {
+ return $this->text;
+ }
+
+ /**
+ * Set the suggestion text.
+ * @param string $text
+ * @param bool $setTitle Should we also update the title?
+ */
+ public function setText( $text, $setTitle = true ) {
+ $this->text = $text;
+ if ( $setTitle && $text !== '' && $text !== null ) {
+ $this->setSuggestedTitle( Title::makeTitle( 0, $text ) );
+ }
+ }
+
+ /**
+ * Title object in the case this suggestion is based on a title.
+ * May return null if the suggestion is not a Title.
+ * @return Title|null
+ */
+ public function getSuggestedTitle() {
+ return $this->suggestedTitle;
+ }
+
+ /**
+ * Set the suggested title
+ * @param Title|null $title
+ */
+ public function setSuggestedTitle( Title $title = null ) {
+ $this->suggestedTitle = $title;
+ if ( $title !== null ) {
+ $this->url = wfExpandUrl( $title->getFullURL(), PROTO_CURRENT );
+ }
+ }
+
+ /**
+ * Title ID in the case this suggestion is based on a title.
+ * May return null if the suggestion is not a Title.
+ * @return int|null
+ */
+ public function getSuggestedTitleID() {
+ return $this->suggestedTitleID;
+ }
+
+ /**
+ * Set the suggested title ID
+ * @param int|null $suggestedTitleID
+ */
+ public function setSuggestedTitleID( $suggestedTitleID = null ) {
+ $this->suggestedTitleID = $suggestedTitleID;
+ }
+
+ /**
+ * Suggestion score
+ * @return float Suggestion score
+ */
+ public function getScore() {
+ return $this->score;
+ }
+
+ /**
+ * Set the suggestion score
+ * @param float $score
+ */
+ public function setScore( $score ) {
+ $this->score = $score;
+ }
+
+ /**
+ * Suggestion URL, can be the link to the Title or maybe in the
+ * future a link to the search results for this search suggestion.
+ * @return string Suggestion URL
+ */
+ public function getURL() {
+ return $this->url;
+ }
+
+ /**
+ * Set the suggestion URL
+ * @param string $url
+ */
+ public function setURL( $url ) {
+ $this->url = $url;
+ }
+
+ /**
+ * Create suggestion from Title
+ * @param float $score Suggestions score
+ * @param Title $title
+ * @return SearchSuggestion
+ */
+ public static function fromTitle( $score, Title $title ) {
+ return new self( $score, $title->getPrefixedText(), $title, $title->getArticleID() );
+ }
+
+ /**
+ * Create suggestion from text
+ * Will also create a title if text if not empty.
+ * @param float $score Suggestions score
+ * @param string $text
+ * @return SearchSuggestion
+ */
+ public static function fromText( $score, $text ) {
+ $suggestion = new self( $score, $text );
+ if ( $text ) {
+ $suggestion->setSuggestedTitle( Title::makeTitle( 0, $text ) );
+ }
+ return $suggestion;
+ }
+
+}
diff --git a/www/wiki/includes/search/SearchSuggestionSet.php b/www/wiki/includes/search/SearchSuggestionSet.php
new file mode 100644
index 00000000..aced5e18
--- /dev/null
+++ b/www/wiki/includes/search/SearchSuggestionSet.php
@@ -0,0 +1,212 @@
+<?php
+
+/**
+ * Search suggestion sets
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+/**
+ * A set of search suggestions.
+ * The set is always ordered by score, with the best match first.
+ */
+class SearchSuggestionSet {
+ /**
+ * @var SearchSuggestion[]
+ */
+ private $suggestions = [];
+
+ /**
+ *
+ * @var array
+ */
+ private $pageMap = [];
+
+ /**
+ * Builds a new set of suggestions.
+ *
+ * NOTE: the array should be sorted by score (higher is better),
+ * in descending order.
+ * SearchSuggestionSet will not try to re-order this input array.
+ * Providing an unsorted input array is a mistake and will lead to
+ * unexpected behaviors.
+ *
+ * @param SearchSuggestion[] $suggestions (must be sorted by score)
+ */
+ public function __construct( array $suggestions ) {
+ foreach ( $suggestions as $suggestion ) {
+ $pageID = $suggestion->getSuggestedTitleID();
+ if ( $pageID && empty( $this->pageMap[$pageID] ) ) {
+ $this->pageMap[$pageID] = true;
+ }
+ $this->suggestions[] = $suggestion;
+ }
+ }
+
+ /**
+ * Get the list of suggestions.
+ * @return SearchSuggestion[]
+ */
+ public function getSuggestions() {
+ return $this->suggestions;
+ }
+
+ /**
+ * Call array_map on the suggestions array
+ * @param callback $callback
+ * @return array
+ */
+ public function map( $callback ) {
+ return array_map( $callback, $this->suggestions );
+ }
+
+ /**
+ * Add a new suggestion at the end.
+ * If the score of the new suggestion is greater than the worst one,
+ * the new suggestion score will be updated (worst - 1).
+ *
+ * @param SearchSuggestion $suggestion
+ */
+ public function append( SearchSuggestion $suggestion ) {
+ $pageID = $suggestion->getSuggestedTitleID();
+ if ( $pageID && isset( $this->pageMap[$pageID] ) ) {
+ return;
+ }
+ if ( $this->getSize() > 0 && $suggestion->getScore() >= $this->getWorstScore() ) {
+ $suggestion->setScore( $this->getWorstScore() - 1 );
+ }
+ $this->suggestions[] = $suggestion;
+ if ( $pageID ) {
+ $this->pageMap[$pageID] = true;
+ }
+ }
+
+ /**
+ * Add suggestion set to the end of the current one.
+ * @param SearchSuggestionSet $set
+ */
+ public function appendAll( SearchSuggestionSet $set ) {
+ foreach ( $set->getSuggestions() as $sugg ) {
+ $this->append( $sugg );
+ }
+ }
+
+ /**
+ * Move the suggestion at index $key to the first position
+ * @param string $key
+ */
+ public function rescore( $key ) {
+ $removed = array_splice( $this->suggestions, $key, 1 );
+ unset( $this->pageMap[$removed[0]->getSuggestedTitleID()] );
+ $this->prepend( $removed[0] );
+ }
+
+ /**
+ * Add a new suggestion at the top. If the new suggestion score
+ * is lower than the best one its score will be updated (best + 1)
+ * @param SearchSuggestion $suggestion
+ */
+ public function prepend( SearchSuggestion $suggestion ) {
+ $pageID = $suggestion->getSuggestedTitleID();
+ if ( $pageID && isset( $this->pageMap[$pageID] ) ) {
+ return;
+ }
+ if ( $this->getSize() > 0 && $suggestion->getScore() <= $this->getBestScore() ) {
+ $suggestion->setScore( $this->getBestScore() + 1 );
+ }
+ array_unshift( $this->suggestions, $suggestion );
+ if ( $pageID ) {
+ $this->pageMap[$pageID] = true;
+ }
+ }
+
+ /**
+ * @return float the best score in this suggestion set
+ */
+ public function getBestScore() {
+ if ( empty( $this->suggestions ) ) {
+ return 0;
+ }
+ return $this->suggestions[0]->getScore();
+ }
+
+ /**
+ * @return float the worst score in this set
+ */
+ public function getWorstScore() {
+ if ( empty( $this->suggestions ) ) {
+ return 0;
+ }
+ return end( $this->suggestions )->getScore();
+ }
+
+ /**
+ * @return int the number of suggestion in this set
+ */
+ public function getSize() {
+ return count( $this->suggestions );
+ }
+
+ /**
+ * Remove any extra elements in the suggestions set
+ * @param int $limit the max size of this set.
+ */
+ public function shrink( $limit ) {
+ if ( count( $this->suggestions ) > $limit ) {
+ $this->suggestions = array_slice( $this->suggestions, 0, $limit );
+ }
+ }
+
+ /**
+ * Builds a new set of suggestion based on a title array.
+ * Useful when using a backend that supports only Titles.
+ *
+ * NOTE: Suggestion scores will be generated.
+ *
+ * @param Title[] $titles
+ * @return SearchSuggestionSet
+ */
+ public static function fromTitles( array $titles ) {
+ $score = count( $titles );
+ $suggestions = array_map( function ( $title ) use ( &$score ) {
+ return SearchSuggestion::fromTitle( $score--, $title );
+ }, $titles );
+ return new SearchSuggestionSet( $suggestions );
+ }
+
+ /**
+ * Builds a new set of suggestion based on a string array.
+ *
+ * NOTE: Suggestion scores will be generated.
+ *
+ * @param string[] $titles
+ * @return SearchSuggestionSet
+ */
+ public static function fromStrings( array $titles ) {
+ $score = count( $titles );
+ $suggestions = array_map( function ( $title ) use ( &$score ) {
+ return SearchSuggestion::fromText( $score--, $title );
+ }, $titles );
+ return new SearchSuggestionSet( $suggestions );
+ }
+
+ /**
+ * @return SearchSuggestionSet an empty suggestion set
+ */
+ public static function emptySuggestionSet() {
+ return new SearchSuggestionSet( [] );
+ }
+}
diff --git a/www/wiki/includes/search/SqlSearchResultSet.php b/www/wiki/includes/search/SqlSearchResultSet.php
new file mode 100644
index 00000000..53d09e82
--- /dev/null
+++ b/www/wiki/includes/search/SqlSearchResultSet.php
@@ -0,0 +1,69 @@
+<?php
+
+use Wikimedia\Rdbms\ResultWrapper;
+
+/**
+ * This class is used for different SQL-based search engines shipped with MediaWiki
+ * @ingroup Search
+ */
+class SqlSearchResultSet extends SearchResultSet {
+ protected $resultSet;
+ protected $terms;
+ protected $totalHits;
+
+ function __construct( ResultWrapper $resultSet, $terms, $total = null ) {
+ $this->resultSet = $resultSet;
+ $this->terms = $terms;
+ $this->totalHits = $total;
+ }
+
+ function termMatches() {
+ return $this->terms;
+ }
+
+ function numRows() {
+ if ( $this->resultSet === false ) {
+ return false;
+ }
+
+ return $this->resultSet->numRows();
+ }
+
+ function next() {
+ if ( $this->resultSet === false ) {
+ return false;
+ }
+
+ $row = $this->resultSet->fetchObject();
+ if ( $row === false ) {
+ return false;
+ }
+
+ return SearchResult::newFromTitle(
+ Title::makeTitle( $row->page_namespace, $row->page_title ), $this
+ );
+ }
+
+ function rewind() {
+ if ( $this->resultSet ) {
+ $this->resultSet->rewind();
+ }
+ }
+
+ function free() {
+ if ( $this->resultSet === false ) {
+ return false;
+ }
+
+ $this->resultSet->free();
+ }
+
+ function getTotalHits() {
+ if ( !is_null( $this->totalHits ) ) {
+ return $this->totalHits;
+ } else {
+ // Special:Search expects a number here.
+ return $this->numRows();
+ }
+ }
+}