includes/search/SearchExactMatchRescorer.php
<?php
/**
* Rescores results from a prefix search/opensearch to make sure the
* exact match is the first result.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
*/
use MediaWiki\MediaWikiServices;
use MediaWiki\Title\Title;
/**
* An utility class to rescore search results by looking for an exact match
* in the db and add the page found to the first position.
*
* NOTE: extracted from TitlePrefixSearch
* @ingroup Search
*/
class SearchExactMatchRescorer {
/**
* @var ?string set when a redirect returned from the engine is replaced by the exact match
*/
private ?string $replacedRedirect;
/**
* Default search backend does proper prefix searching, but custom backends
* may sort based on other algorithms that may cause the exact title match
* to not be in the results or be lower down the list.
* @param string $search the query
* @param int[] $namespaces
* @param string[] $srchres results
* @param int $limit the max number of results to return
* @return string[] munged results
*/
public function rescore( $search, $namespaces, $srchres, $limit ) {
$this->replacedRedirect = null;
// Pick namespace (based on PrefixSearch::defaultSearchBackend)
$ns = in_array( NS_MAIN, $namespaces ) ? NS_MAIN : reset( $namespaces );
$t = Title::newFromText( $search, $ns );
if ( !$t || !$t->exists() ) {
// No exact match so just return the search results
return $srchres;
}
$string = $t->getPrefixedText();
$key = array_search( $string, $srchres );
if ( $key !== false ) {
// Exact match was in the results so just move it to the front
return $this->pullFront( $key, $srchres );
}
// Exact match not in the search results so check for some redirect handling cases
if ( $t->isRedirect() ) {
$target = $this->getRedirectTarget( $t );
$key = array_search( $target, $srchres );
if ( $key !== false ) {
// Exact match is a redirect to one of the returned matches so pull the
// returned match to the front. This might look odd but the alternative
// is to put the redirect in front and drop the match. The name of the
// found match is often more descriptive/better formed than the name of
// the redirect AND by definition they share a prefix. Hopefully this
// choice is less confusing and more helpful. But it might not be. But
// it is the choice we're going with for now.
return $this->pullFront( $key, $srchres );
}
$redirectTargetsToRedirect = $this->redirectTargetsToRedirect( $srchres );
if ( isset( $redirectTargetsToRedirect[$target] ) ) {
// The exact match and something in the results list are both redirects
// to the same thing! In this case we prefer the match the user typed.
$this->replacedRedirect = array_splice( $srchres, $redirectTargetsToRedirect[$target], 1 )[0];
array_unshift( $srchres, $string );
return $srchres;
}
} else {
$redirectTargetsToRedirect = $this->redirectTargetsToRedirect( $srchres );
if ( isset( $redirectTargetsToRedirect[$string] ) ) {
// The exact match is the target of a redirect already in the results list so remove
// the redirect from the results list and push the exact match to the front
array_splice( $srchres, $redirectTargetsToRedirect[$string], 1 );
array_unshift( $srchres, $string );
return $srchres;
}
}
// Exact match is totally unique from the other results so just add it to the front
array_unshift( $srchres, $string );
// And roll one off the end if the results are too long
if ( count( $srchres ) > $limit ) {
array_pop( $srchres );
}
return $srchres;
}
/**
* Redirect initially returned by the search engine that got replaced by a better match:
* - exact match to a redirect to the same page
* - exact match to the target page
* @return string|null the replaced redirect or null if nothing was replaced
*/
public function getReplacedRedirect(): ?string {
return $this->replacedRedirect;
}
/**
* @param string[] $titles
* @return array redirect target prefixedText to index of title in titles
* that is a redirect to it.
*/
private function redirectTargetsToRedirect( array $titles ) {
$result = [];
foreach ( $titles as $key => $titleText ) {
$title = Title::newFromText( $titleText );
if ( !$title || !$title->isRedirect() ) {
continue;
}
$target = $this->getRedirectTarget( $title );
if ( !$target ) {
continue;
}
$result[$target] = $key;
}
return $result;
}
/**
* Returns an array where the element of $array at index $key becomes
* the first element.
* @param int $key key to pull to the front
* @param array $array
* @return array $array with the item at $key pulled to the front
*/
private function pullFront( $key, array $array ) {
$cut = array_splice( $array, $key, 1 );
array_unshift( $array, $cut[0] );
return $array;
}
/**
* Get a redirect's destination from a title
* @param Title $title A title to redirect. It may not redirect or even exist
* @return null|string If title exists and redirects, get the destination's prefixed name
*/
private function getRedirectTarget( $title ) {
$redirectStore = MediaWikiServices::getInstance()->getRedirectStore();
$redir = $redirectStore->getRedirectTarget( $title );
// Needed to get the text needed for display.
$redir = Title::castFromLinkTarget( $redir );
return $redir ? $redir->getPrefixedText() : null;
}
}