summaryrefslogtreecommitdiff
path: root/www/wiki/includes/search/SearchExactMatchRescorer.php
blob: 354b3909dcd7911d451af09b810c1d399ea48704 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
<?php
/**
 * Rescores results from a prefix search/opensearch to make sure the
 * exact match is the first result.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 * http://www.gnu.org/copyleft/gpl.html
 *
 * @file
 */

/**
 * An utility class to rescore search results by looking for an exact match
 * in the db and add the page found to the first position.
 *
 * NOTE: extracted from TitlePrefixSearch
 * @ingroup Search
 */
class SearchExactMatchRescorer {
	/**
	 * Default search backend does proper prefix searching, but custom backends
	 * may sort based on other algorithms that may cause the exact title match
	 * to not be in the results or be lower down the list.
	 * @param string $search the query
	 * @param int[] $namespaces
	 * @param string[] $srchres results
	 * @param int $limit the max number of results to return
	 * @return string[] munged results
	 */
	public function rescore( $search, $namespaces, $srchres, $limit ) {
		// Pick namespace (based on PrefixSearch::defaultSearchBackend)
		$ns = in_array( NS_MAIN, $namespaces ) ? NS_MAIN : reset( $namespaces );
		$t = Title::newFromText( $search, $ns );
		if ( !$t || !$t->exists() ) {
			// No exact match so just return the search results
			return $srchres;
		}
		$string = $t->getPrefixedText();
		$key = array_search( $string, $srchres );
		if ( $key !== false ) {
			// Exact match was in the results so just move it to the front
			return $this->pullFront( $key, $srchres );
		}
		// Exact match not in the search results so check for some redirect handling cases
		if ( $t->isRedirect() ) {
			$target = $this->getRedirectTarget( $t );
			$key = array_search( $target, $srchres );
			if ( $key !== false ) {
				// Exact match is a redirect to one of the returned matches so pull the
				// returned match to the front.  This might look odd but the alternative
				// is to put the redirect in front and drop the match.  The name of the
				// found match is often more descriptive/better formed than the name of
				// the redirect AND by definition they share a prefix.  Hopefully this
				// choice is less confusing and more helpful.  But it might not be.  But
				// it is the choice we're going with for now.
				return $this->pullFront( $key, $srchres );
			}
			$redirectTargetsToRedirect = $this->redirectTargetsToRedirect( $srchres );
			if ( isset( $redirectTargetsToRedirect[$target] ) ) {
				// The exact match and something in the results list are both redirects
				// to the same thing!  In this case we'll pull the returned match to the
				// top following the same logic above.  Again, it might not be a perfect
				// choice but it'll do.
				return $this->pullFront( $redirectTargetsToRedirect[$target], $srchres );
			}
		} else {
			$redirectTargetsToRedirect = $this->redirectTargetsToRedirect( $srchres );
			if ( isset( $redirectTargetsToRedirect[$string] ) ) {
				// The exact match is the target of a redirect already in the results list so remove
				// the redirect from the results list and push the exact match to the front
				array_splice( $srchres, $redirectTargetsToRedirect[$string], 1 );
				array_unshift( $srchres, $string );
				return $srchres;
			}
		}

		// Exact match is totally unique from the other results so just add it to the front
		array_unshift( $srchres, $string );
		// And roll one off the end if the results are too long
		if ( count( $srchres ) > $limit ) {
			array_pop( $srchres );
		}
		return $srchres;
	}

	/**
	 * @param string[] $titles
	 * @return array redirect target prefixedText to index of title in titles
	 *   that is a redirect to it.
	 */
	private function redirectTargetsToRedirect( array $titles ) {
		$result = [];
		foreach ( $titles as $key => $titleText ) {
			$title = Title::newFromText( $titleText );
			if ( !$title || !$title->isRedirect() ) {
				continue;
			}
			$target = $this->getRedirectTarget( $title );
			if ( !$target ) {
				continue;
			}
			$result[$target] = $key;
		}
		return $result;
	}

	/**
	 * Returns an array where the element of $array at index $key becomes
	 * the first element.
	 * @param int $key key to pull to the front
	 * @return array $array with the item at $key pulled to the front
	 */
	private function pullFront( $key, array $array ) {
		$cut = array_splice( $array, $key, 1 );
		array_unshift( $array, $cut[0] );
		return $array;
	}

	/**
	 * Get a redirect's destination from a title
	 * @param Title $title A title to redirect. It may not redirect or even exist
	 * @return null|string If title exists and redirects, get the destination's prefixed name
	 */
	private function getRedirectTarget( $title ) {
		$page = WikiPage::factory( $title );
		if ( !$page->exists() ) {
			return null;
		}
		$redir = $page->getRedirectTarget();
		return $redir ? $redir->getPrefixedText() : null;
	}
}