wikimedia/mediawiki-extensions-CirrusSearch

View on GitHub
includes/Query/MoreLikeTrait.php

Summary

Maintainability
B
4 hrs
Test Coverage
<?php

namespace CirrusSearch\Query;

use CirrusSearch\Hooks;
use CirrusSearch\SearchConfig;
use CirrusSearch\WarningCollector;
use Elastica\Query\MoreLikeThis;
use MediaWiki\MediaWikiServices;
use MediaWiki\Title\Title;

trait MoreLikeTrait {
    /**
     * @param string $key
     * @param string $term
     * @param WarningCollector $warningCollector
     * @return Title[]
     */
    protected function doExpand( $key, $term, WarningCollector $warningCollector ) {
        // If no fields have been set we return no results. This can happen if
        // the user override this setting with field names that are not allowed
        // in $this->getConfig()->get( 'CirrusSearchMoreLikeThisAllowedFields' )
        // (see Hooks.php)
        if ( !$this->getConfig()->get( 'CirrusSearchMoreLikeThisFields' ) ) {
            $warningCollector->addWarning( "cirrussearch-mlt-not-configured", $key );
            return [];
        }
        $titles = $this->collectTitles( $term );
        if ( $titles === [] ) {
            $warningCollector->addWarning( "cirrussearch-mlt-feature-no-valid-titles", $key );
        }
        return $titles;
    }

    /**
     * @param string $term
     * @return Title[]
     */
    private function collectTitles( $term ) {
        if ( $this->getConfig()->getElement( 'CirrusSearchDevelOptions',
            'morelike_collect_titles_from_elastic' )
        ) {
            return $this->collectTitlesFromElastic( $term );
        } else {
            return $this->collectTitlesFromDB( $term );
        }
    }

    /**
     * Use for devel purpose only
     * @param string $terms
     * @return Title[]
     */
    private function collectTitlesFromElastic( $terms ) {
        $titles = [];
        foreach ( explode( '|', $terms ) as $term ) {
            $title = null;
            Hooks::handleSearchGetNearMatch( $term, $title );
            if ( $title != null ) {
                $titles[] = $title;
            }
        }
        return $titles;
    }

    /**
     * @param string $term
     * @return Title[]
     */
    private function collectTitlesFromDB( $term ) {
        $titles = [];
        $found = [];
        $titleFactory = MediaWikiServices::getInstance()->getTitleFactory();
        $wikiPageFactory = MediaWikiServices::getInstance()->getWikiPageFactory();
        foreach ( explode( '|', $term ) as $title ) {
            $title = $titleFactory->newFromText( trim( $title ) );
            while ( true ) {
                if ( !$title ) {
                    continue 2;
                }
                $titleText = $title->getFullText();
                if ( isset( $found[$titleText] ) ) {
                    continue 2;
                }
                $found[$titleText] = true;
                if ( !$title->exists() ) {
                    continue 2;
                }
                if ( !$title->isRedirect() ) {
                    break;
                }
                // If the page was a redirect loop the while( true ) again.
                $page = $wikiPageFactory->newFromTitle( $title );
                if ( !$page->exists() ) {
                    continue 2;
                }
                $title = $page->getRedirectTarget();
            }
            $titles[] = $title;
        }

        return $titles;
    }

    /**
     * Builds a more like this query for the specified titles. Take care that
     * this outputs a stable result, regardless of order of configuration
     * parameters and input titles. The result of this is hashed to generate an
     * application side cache key. If the result is unstable we will see a
     * reduced hit rate, and waste cache storage space.
     *
     * @param Title[] $titles
     * @return MoreLikeThis
     */
    protected function buildMoreLikeQuery( array $titles ) {
        sort( $titles, SORT_STRING );
        $docIds = [];
        $likeDocs = [];
        foreach ( $titles as $title ) {
            $docId = $this->getConfig()->makeId( $title->getArticleID() );
            $docIds[] = $docId;
            $likeDocs[] = [ '_id' => $docId ];
        }

        $moreLikeThisFields = $this->getConfig()->get( 'CirrusSearchMoreLikeThisFields' );
        sort( $moreLikeThisFields );
        $query = new MoreLikeThis();
        $query->setParams( $this->getConfig()->get( 'CirrusSearchMoreLikeThisConfig' ) );
        $query->setFields( $moreLikeThisFields );

        /** @phan-suppress-next-line PhanTypeMismatchArgumentProbablyReal library is mis-annotated */
        $query->setLike( $likeDocs );

        return $query;
    }

    /**
     * @return SearchConfig
     */
    abstract public function getConfig(): SearchConfig;
}