wikimedia/mediawiki-core

View on GitHub
includes/search/SearchExactMatchRescorer.php

Summary

Maintainability
B
4 hrs
Test Coverage
<?php
/**
 * Rescores results from a prefix search/opensearch to make sure the
 * exact match is the first result.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 * http://www.gnu.org/copyleft/gpl.html
 *
 * @file
 */

use MediaWiki\MediaWikiServices;
use MediaWiki\Title\Title;

/**
 * An utility class to rescore search results by looking for an exact match
 * in the db and add the page found to the first position.
 *
 * NOTE: extracted from TitlePrefixSearch
 * @ingroup Search
 */
class SearchExactMatchRescorer {
    /**
     * @var ?string set when a redirect returned from the engine is replaced by the exact match
     */
    private ?string $replacedRedirect;

    /**
     * Default search backend does proper prefix searching, but custom backends
     * may sort based on other algorithms that may cause the exact title match
     * to not be in the results or be lower down the list.
     * @param string $search the query
     * @param int[] $namespaces
     * @param string[] $srchres results
     * @param int $limit the max number of results to return
     * @return string[] munged results
     */
    public function rescore( $search, $namespaces, $srchres, $limit ) {
        $this->replacedRedirect = null;
        // Pick namespace (based on PrefixSearch::defaultSearchBackend)
        $ns = in_array( NS_MAIN, $namespaces ) ? NS_MAIN : reset( $namespaces );
        $t = Title::newFromText( $search, $ns );
        if ( !$t || !$t->exists() ) {
            // No exact match so just return the search results
            return $srchres;
        }
        $string = $t->getPrefixedText();
        $key = array_search( $string, $srchres );
        if ( $key !== false ) {
            // Exact match was in the results so just move it to the front
            return $this->pullFront( $key, $srchres );
        }
        // Exact match not in the search results so check for some redirect handling cases
        if ( $t->isRedirect() ) {
            $target = $this->getRedirectTarget( $t );
            $key = array_search( $target, $srchres );
            if ( $key !== false ) {
                // Exact match is a redirect to one of the returned matches so pull the
                // returned match to the front.  This might look odd but the alternative
                // is to put the redirect in front and drop the match.  The name of the
                // found match is often more descriptive/better formed than the name of
                // the redirect AND by definition they share a prefix.  Hopefully this
                // choice is less confusing and more helpful.  But it might not be.  But
                // it is the choice we're going with for now.
                return $this->pullFront( $key, $srchres );
            }
            $redirectTargetsToRedirect = $this->redirectTargetsToRedirect( $srchres );
            if ( isset( $redirectTargetsToRedirect[$target] ) ) {
                // The exact match and something in the results list are both redirects
                // to the same thing! In this case we prefer the match the user typed.
                $this->replacedRedirect = array_splice( $srchres, $redirectTargetsToRedirect[$target], 1 )[0];
                array_unshift( $srchres, $string );
                return $srchres;
            }
        } else {
            $redirectTargetsToRedirect = $this->redirectTargetsToRedirect( $srchres );
            if ( isset( $redirectTargetsToRedirect[$string] ) ) {
                // The exact match is the target of a redirect already in the results list so remove
                // the redirect from the results list and push the exact match to the front
                array_splice( $srchres, $redirectTargetsToRedirect[$string], 1 );
                array_unshift( $srchres, $string );
                return $srchres;
            }
        }

        // Exact match is totally unique from the other results so just add it to the front
        array_unshift( $srchres, $string );
        // And roll one off the end if the results are too long
        if ( count( $srchres ) > $limit ) {
            array_pop( $srchres );
        }
        return $srchres;
    }

    /**
     * Redirect initially returned by the search engine that got replaced by a better match:
     * - exact match to a redirect to the same page
     * - exact match to the target page
     * @return string|null the replaced redirect or null if nothing was replaced
     */
    public function getReplacedRedirect(): ?string {
        return $this->replacedRedirect;
    }

    /**
     * @param string[] $titles
     * @return array redirect target prefixedText to index of title in titles
     *   that is a redirect to it.
     */
    private function redirectTargetsToRedirect( array $titles ) {
        $result = [];
        foreach ( $titles as $key => $titleText ) {
            $title = Title::newFromText( $titleText );
            if ( !$title || !$title->isRedirect() ) {
                continue;
            }
            $target = $this->getRedirectTarget( $title );
            if ( !$target ) {
                continue;
            }
            $result[$target] = $key;
        }
        return $result;
    }

    /**
     * Returns an array where the element of $array at index $key becomes
     * the first element.
     * @param int $key key to pull to the front
     * @param array $array
     * @return array $array with the item at $key pulled to the front
     */
    private function pullFront( $key, array $array ) {
        $cut = array_splice( $array, $key, 1 );
        array_unshift( $array, $cut[0] );
        return $array;
    }

    /**
     * Get a redirect's destination from a title
     * @param Title $title A title to redirect. It may not redirect or even exist
     * @return null|string If title exists and redirects, get the destination's prefixed name
     */
    private function getRedirectTarget( $title ) {
        $redirectStore = MediaWikiServices::getInstance()->getRedirectStore();
        $redir = $redirectStore->getRedirectTarget( $title );

        // Needed to get the text needed for display.
        $redir = Title::castFromLinkTarget( $redir );
        return $redir ? $redir->getPrefixedText() : null;
    }
}