summaryrefslogtreecommitdiff
path: root/www/wiki/includes/search/SearchExactMatchRescorer.php
diff options
context:
space:
mode:
Diffstat (limited to 'www/wiki/includes/search/SearchExactMatchRescorer.php')
-rw-r--r--www/wiki/includes/search/SearchExactMatchRescorer.php144
1 files changed, 144 insertions, 0 deletions
diff --git a/www/wiki/includes/search/SearchExactMatchRescorer.php b/www/wiki/includes/search/SearchExactMatchRescorer.php
new file mode 100644
index 00000000..354b3909
--- /dev/null
+++ b/www/wiki/includes/search/SearchExactMatchRescorer.php
@@ -0,0 +1,144 @@
+<?php
+/**
+ * Rescores results from a prefix search/opensearch to make sure the
+ * exact match is the first result.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * An utility class to rescore search results by looking for an exact match
+ * in the db and add the page found to the first position.
+ *
+ * NOTE: extracted from TitlePrefixSearch
+ * @ingroup Search
+ */
+class SearchExactMatchRescorer {
+ /**
+ * Default search backend does proper prefix searching, but custom backends
+ * may sort based on other algorithms that may cause the exact title match
+ * to not be in the results or be lower down the list.
+ * @param string $search the query
+ * @param int[] $namespaces
+ * @param string[] $srchres results
+ * @param int $limit the max number of results to return
+ * @return string[] munged results
+ */
+ public function rescore( $search, $namespaces, $srchres, $limit ) {
+ // Pick namespace (based on PrefixSearch::defaultSearchBackend)
+ $ns = in_array( NS_MAIN, $namespaces ) ? NS_MAIN : reset( $namespaces );
+ $t = Title::newFromText( $search, $ns );
+ if ( !$t || !$t->exists() ) {
+ // No exact match so just return the search results
+ return $srchres;
+ }
+ $string = $t->getPrefixedText();
+ $key = array_search( $string, $srchres );
+ if ( $key !== false ) {
+ // Exact match was in the results so just move it to the front
+ return $this->pullFront( $key, $srchres );
+ }
+ // Exact match not in the search results so check for some redirect handling cases
+ if ( $t->isRedirect() ) {
+ $target = $this->getRedirectTarget( $t );
+ $key = array_search( $target, $srchres );
+ if ( $key !== false ) {
+ // Exact match is a redirect to one of the returned matches so pull the
+ // returned match to the front. This might look odd but the alternative
+ // is to put the redirect in front and drop the match. The name of the
+ // found match is often more descriptive/better formed than the name of
+ // the redirect AND by definition they share a prefix. Hopefully this
+ // choice is less confusing and more helpful. But it might not be. But
+ // it is the choice we're going with for now.
+ return $this->pullFront( $key, $srchres );
+ }
+ $redirectTargetsToRedirect = $this->redirectTargetsToRedirect( $srchres );
+ if ( isset( $redirectTargetsToRedirect[$target] ) ) {
+ // The exact match and something in the results list are both redirects
+ // to the same thing! In this case we'll pull the returned match to the
+ // top following the same logic above. Again, it might not be a perfect
+ // choice but it'll do.
+ return $this->pullFront( $redirectTargetsToRedirect[$target], $srchres );
+ }
+ } else {
+ $redirectTargetsToRedirect = $this->redirectTargetsToRedirect( $srchres );
+ if ( isset( $redirectTargetsToRedirect[$string] ) ) {
+ // The exact match is the target of a redirect already in the results list so remove
+ // the redirect from the results list and push the exact match to the front
+ array_splice( $srchres, $redirectTargetsToRedirect[$string], 1 );
+ array_unshift( $srchres, $string );
+ return $srchres;
+ }
+ }
+
+ // Exact match is totally unique from the other results so just add it to the front
+ array_unshift( $srchres, $string );
+ // And roll one off the end if the results are too long
+ if ( count( $srchres ) > $limit ) {
+ array_pop( $srchres );
+ }
+ return $srchres;
+ }
+
+ /**
+ * @param string[] $titles
+ * @return array redirect target prefixedText to index of title in titles
+ * that is a redirect to it.
+ */
+ private function redirectTargetsToRedirect( array $titles ) {
+ $result = [];
+ foreach ( $titles as $key => $titleText ) {
+ $title = Title::newFromText( $titleText );
+ if ( !$title || !$title->isRedirect() ) {
+ continue;
+ }
+ $target = $this->getRedirectTarget( $title );
+ if ( !$target ) {
+ continue;
+ }
+ $result[$target] = $key;
+ }
+ return $result;
+ }
+
+ /**
+ * Returns an array where the element of $array at index $key becomes
+ * the first element.
+ * @param int $key key to pull to the front
+ * @return array $array with the item at $key pulled to the front
+ */
+ private function pullFront( $key, array $array ) {
+ $cut = array_splice( $array, $key, 1 );
+ array_unshift( $array, $cut[0] );
+ return $array;
+ }
+
+ /**
+ * Get a redirect's destination from a title
+ * @param Title $title A title to redirect. It may not redirect or even exist
+ * @return null|string If title exists and redirects, get the destination's prefixed name
+ */
+ private function getRedirectTarget( $title ) {
+ $page = WikiPage::factory( $title );
+ if ( !$page->exists() ) {
+ return null;
+ }
+ $redir = $page->getRedirectTarget();
+ return $redir ? $redir->getPrefixedText() : null;
+ }
+}