wikimedia/mediawiki-extensions-CirrusSearch

View on GitHub
includes/Search/Fetch/HighlightingTrait.php

Summary

Maintainability
A
55 mins
Test Coverage
<?php

namespace CirrusSearch\Search\Fetch;

use CirrusSearch\Search\TitleHelper;
use CirrusSearch\Searcher;
use MediaWiki\Logger\LoggerFactory;
use MediaWiki\Title\Title;

trait HighlightingTrait {
    /**
     * Escape highlighted text coming back from Elasticsearch.
     *
     * @param string $snippet highlighted snippet returned from elasticsearch
     * @return string $snippet with html escaped _except_ highlighting pre and post tags
     */
    protected function escapeHighlightedText( $snippet ) {
        /**
         * \p{M} matches any combining Unicode character
         * \P{M} matches any non-combining Unicode character
         *
         * For HIGHLIGHT_PRE_MARKER, move the marker earlier if it occurs before a
         * combining character, and there is a non-combining character (and zero
         * or more combining characters) directly before it.
         *
         * For HIGHLIGHT_POST_MARKER, move the marker later if it occurs before
         * one or more combining characters.
         */
        $snippet = preg_replace( '/(\P{M}\p{M}*)(' . Searcher::HIGHLIGHT_PRE_MARKER .
                                 ')(\p{M}+)/u', '$2$1$3', $snippet );
        $snippet = preg_replace( '/(' . Searcher::HIGHLIGHT_POST_MARKER . ')(\p{M}+)/u',
            '$2$1', $snippet );
        return strtr( htmlspecialchars( $snippet ), [
            Searcher::HIGHLIGHT_PRE_MARKER => Searcher::HIGHLIGHT_PRE,
            Searcher::HIGHLIGHT_POST_MARKER => Searcher::HIGHLIGHT_POST
        ] );
    }

    /**
     * Build the redirect title from the highlighted redirect snippet.
     *
     * @param \Elastica\Result $result
     * @param string $snippet Highlighted redirect snippet
     * @return Title|null object representing the redirect
     */
    protected function findRedirectTitle( \Elastica\Result $result, $snippet ) {
        $title = $this->stripHighlighting( $snippet );
        // Grab the redirect that matches the highlighted title with the lowest namespace.
        $redirects = $result->redirect;
        // That is pretty arbitrary but it prioritizes 0 over others.
        $best = null;
        if ( $redirects !== null ) {
            foreach ( $redirects as $redirect ) {
                if ( $redirect[ 'title' ] === $title && ( $best === null || $best[ 'namespace' ] > $redirect['namespace'] ) ) {
                    $best = $redirect;
                }
            }
        }
        if ( $best === null ) {
            LoggerFactory::getInstance( 'CirrusSearch' )->warning(
                "Search backend highlighted a redirect ({title}) but didn't return it.",
                [ 'title' => $title ]
            );
            return null;
        }
        return $this->getTitleHelper()->makeRedirectTitle( $result, $best['title'], $best['namespace'] );
    }

    /**
     * Checks if a snippet contains matches by looking for HIGHLIGHT_PRE.
     *
     * @param string $snippet highlighted snippet returned from elasticsearch
     * @return bool true if $snippet contains matches, false otherwise
     */
    protected function containsMatches( $snippet ) {
        return strpos( $snippet, Searcher::HIGHLIGHT_PRE_MARKER ) !== false;
    }

    /**
     * @param string $highlighted
     * @return string
     */
    protected function stripHighlighting( $highlighted ) {
        $markers = [ Searcher::HIGHLIGHT_PRE_MARKER, Searcher::HIGHLIGHT_POST_MARKER ];
        return str_replace( $markers, '', $highlighted );
    }

    /**
     * @param string $highlighted
     * @param Title $title
     * @return Title
     */
    protected function findSectionTitle( $highlighted, Title $title ) {
        return $title->createFragmentTarget( $this->getTitleHelper()->sanitizeSectionFragment(
            $this->stripHighlighting( $highlighted )
        ) );
    }

    /**
     * @return TitleHelper
     */
    abstract protected function getTitleHelper(): TitleHelper;
}