includes/CirrusSearch.php
<?php
namespace CirrusSearch;
use CirrusSearch\Extra\MultiList\MultiListBuilder;
use CirrusSearch\Parser\NamespacePrefixParser;
use CirrusSearch\Parser\QueryStringRegex\SearchQueryParseException;
use CirrusSearch\Profile\ContextualProfileOverride;
use CirrusSearch\Profile\SearchProfileService;
use CirrusSearch\Search\ArrayCirrusSearchResult;
use CirrusSearch\Search\CirrusSearchIndexFieldFactory;
use CirrusSearch\Search\CirrusSearchResultSet;
use CirrusSearch\Search\FancyTitleResultsType;
use CirrusSearch\Search\SearchMetricsProvider;
use CirrusSearch\Search\SearchQuery;
use CirrusSearch\Search\SearchQueryBuilder;
use CirrusSearch\Search\TitleHelper;
use CirrusSearch\Search\TitleResultsType;
use ISearchResultSet;
use MediaWiki\Context\RequestContext;
use MediaWiki\MediaWikiServices;
use MediaWiki\Page\ProperPageIdentity;
use MediaWiki\Parser\Sanitizer;
use MediaWiki\Request\WebRequest;
use MediaWiki\Status\Status;
use MediaWiki\Title\Title;
use MediaWiki\User\User;
use MediaWiki\WikiMap\WikiMap;
use SearchEngine;
use SearchIndexField;
use SearchSuggestionSet;
use Wikimedia\Assert\Assert;
/**
* SearchEngine implementation for CirrusSearch. Delegates to
* CirrusSearchSearcher for searches and CirrusSearchUpdater for updates. Note
* that lots of search behavior is hooked in CirrusSearchHooks rather than
* overridden here.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*/
class CirrusSearch extends SearchEngine {
/**
* Special profile to instruct this class to use profile
* selection mechanism.
* This allows to defer profile selection to when we actually perform
* the search. The reason is that the list of possible profiles
* is returned by self::getProfiles so instead of assigning a default
* profile at this point we use this special profile.
*/
public const AUTOSELECT_PROFILE = 'engine_autoselect';
/** @const string name of the prefixsearch fallback profile */
public const COMPLETION_PREFIX_FALLBACK_PROFILE = 'classic';
/**
* @const int Maximum title length that we'll check in prefix and keyword searches.
* Since titles can be 255 bytes in length we're setting this to 255
* characters.
*/
public const MAX_TITLE_SEARCH = 255;
/**
* Name of the feature to extract more fields during a fulltext search request.
* Expected value is a list of strings identifying the fields to extract out
* of the source document.
* @see SearchEngine::supports() and SearchEngine::setFeatureData()
*/
public const EXTRA_FIELDS_TO_EXTRACT = 'extra-fields-to-extract';
/**
* Name of the entry in the extension data array holding the extracted field
* requested using the EXTRA_FIELDS_TO_EXTRACT feature.
* @see \SearchResult::getExtensionData()
*/
private const EXTRA_FIELDS = ArrayCirrusSearchResult::EXTRA_FIELDS;
/**
* @var array metrics about the last thing we searched sourced from the
* Searcher instance
*/
private $lastSearchMetrics = [];
/**
* @var array additional metrics about the search sourced within this class
*/
private $extraSearchMetrics = [];
/**
* @var Connection
*/
private $connection;
/**
* Search configuration.
* @var SearchConfig immutable
*/
private $config;
/**
* Current request.
* @var WebRequest
*/
private $request;
/**
* @var RequestContext
*/
private $requestContext;
/**
* @var CirrusSearchIndexFieldFactory
*/
private $searchIndexFieldFactory;
/**
* @var CirrusDebugOptions
*/
private $debugOptions;
/**
* @var NamespacePrefixParser
*/
private $namespacePrefixParser;
/**
* @var InterwikiResolver
*/
private $interwikiResolver;
/**
* @var TitleHelper
*/
private $titleHelper;
/**
* @var CirrusSearchHookRunner|null
*/
private $cirrusSearchHookRunner;
/**
* @param SearchConfig|null $config
* @param CirrusDebugOptions|null $debugOptions
* @param NamespacePrefixParser|null $namespacePrefixParser
* @param InterwikiResolver|null $interwikiResolver
* @param TitleHelper|null $titleHelper
*/
public function __construct( SearchConfig $config = null,
CirrusDebugOptions $debugOptions = null,
NamespacePrefixParser $namespacePrefixParser = null,
InterwikiResolver $interwikiResolver = null, TitleHelper $titleHelper = null
) {
// Initialize UserTesting before we create a Connection
// This is useful to do tests across multiple clusters
UserTestingStatus::getInstance();
$this->config = $config ?? MediaWikiServices::getInstance()
->getConfigFactory()
->makeConfig( 'CirrusSearch' );
$this->connection = new Connection( $this->config );
$this->requestContext = RequestContext::getMain();
$this->request = $this->requestContext->getRequest();
$this->searchIndexFieldFactory = new CirrusSearchIndexFieldFactory( $this->config );
$this->namespacePrefixParser = $namespacePrefixParser ?: new class() implements NamespacePrefixParser {
public function parse( $query ) {
return CirrusSearch::parseNamespacePrefixes( $query, true, true );
}
};
$this->interwikiResolver = $interwikiResolver ?: MediaWikiServices::getInstance()->getService( InterwikiResolver::SERVICE );
// enable interwiki by default
$this->features['interwiki'] = true;
$this->features['show-multimedia-search-results'] = $this->config->get( 'CirrusSearchCrossProjectShowMultimedia' ) == true;
$this->debugOptions = $debugOptions ?? CirrusDebugOptions::fromRequest( $this->request );
$this->titleHelper = $titleHelper ?: new TitleHelper( WikiMap::getCurrentWikiId(), $interwikiResolver,
static function ( $v ) {
return Sanitizer::escapeIdForLink( $v );
}
);
$extraFieldsInSearchResults = $this->config->get( 'CirrusSearchExtraFieldsInSearchResults' );
if ( $extraFieldsInSearchResults ) {
$this->features[ self::EXTRA_FIELDS_TO_EXTRACT ] = $extraFieldsInSearchResults;
}
}
public function setConnection( Connection $connection ) {
$this->connection = $connection;
}
/**
* @return Connection
*/
public function getConnection() {
return $this->connection;
}
/**
* Get search config
* @return SearchConfig
*/
public function getConfig() {
return $this->config;
}
/**
* Override supports to shut off updates to Cirrus via the SearchEngine infrastructure. Page
* updates and additions are chained on the end of the links update job. Deletes are noticed
* via the ArticleDeleteComplete hook.
* @param string $feature feature name
* @return bool is this feature supported?
*/
public function supports( $feature ) {
switch ( $feature ) {
case 'search-update':
case 'list-redirects':
return false;
case self::FT_QUERY_INDEP_PROFILE_TYPE:
case self::EXTRA_FIELDS_TO_EXTRACT:
return true;
default:
return parent::supports( $feature );
}
}
/**
* Overridden to delegate prefix searching to Searcher.
* @param string $term text to search
* @return Status Value is either SearchResultSet, or null on error.
*/
protected function doSearchText( $term ) {
try {
$builder = SearchQueryBuilder::newFTSearchQueryBuilder( $this->config,
$term, $this->namespacePrefixParser, $this->getCirrusSearchHookRunner() );
} catch ( SearchQueryParseException $e ) {
return $e->asStatus();
}
$builder->setDebugOptions( $this->debugOptions )
->setInitialNamespaces( $this->namespaces )
->setLimit( $this->limit )
->setOffset( $this->offset )
->setSort( $this->getSort() )
->setRandomSeed( $this->getFeatureData( 'random_seed' ) )
->setExtraIndicesSearch( true )
->setCrossProjectSearch( $this->isFeatureEnabled( 'interwiki' ) )
->setWithDYMSuggestion( $this->showSuggestion )
->setAllowRewrite( $this->isFeatureEnabled( 'rewrite' ) )
->addProfileContextParameter( ContextualProfileOverride::LANGUAGE,
$this->requestContext->getLanguage()->getCode() )
->setExtraFieldsToExtract( $this->features[self::EXTRA_FIELDS_TO_EXTRACT] ?? [] )
->setProvideAllSnippets( !empty( $this->features['snippets'] ) );
if ( $this->prefix !== '' ) {
$builder->addContextualFilter( 'prefix',
\CirrusSearch\Query\PrefixFeature::asContextualFilter( $this->prefix ) );
}
$profile = $this->extractProfileFromFeatureData( SearchEngine::FT_QUERY_INDEP_PROFILE_TYPE );
if ( $profile !== null ) {
$builder->addForcedProfile( SearchProfileService::RESCORE, $profile );
}
$query = $builder->build();
$status = $this->searchTextReal( $query );
$matches = $status->getValue();
if ( $matches instanceof CirrusSearchResultSet ) {
ElasticsearchIntermediary::setResultPages( [ $matches ] );
}
if ( $matches instanceof SearchMetricsProvider ) {
$this->extraSearchMetrics += $status->getValue()->getMetrics();
}
return $status;
}
/**
* @param string $feature
* @return bool
*/
private function isFeatureEnabled( $feature ) {
return isset( $this->features[$feature] ) && $this->features[$feature];
}
/**
* Do the hard part of the searching - actual Searcher invocation
* @param SearchQuery $query
* @return Status
*/
protected function searchTextReal( SearchQuery $query ) {
$searcher = $this->makeSearcher( $query->getSearchConfig() );
$status = $searcher->search( $query );
$this->lastSearchMetrics = $searcher->getSearchMetrics();
if ( !$status->isOK() ) {
return $status;
}
$result = $status->getValue();
// Add interwiki results, if we have a sane result
// Note that we have no way of sending warning back to the user. In this case all warnings
// are logged when they are added to the status object so we just ignore them here....
// TODO: move this to the Searcher class and get rid of InterwikiSearcher
// there are no reasons we can't do this in a single msearch request.
if ( $query->getCrossSearchStrategy()->isCrossProjectSearchSupported() &&
$searcher->getSearchContext()->areResultsPossible() &&
( $this->debugOptions->isReturnRaw() || method_exists( $result, 'addInterwikiResults' ) )
) {
$iwSearch = new InterwikiSearcher( $this->connection, $query->getSearchConfig(), $this->namespaces, null,
$this->debugOptions, $this->namespacePrefixParser, $this->interwikiResolver, $this->titleHelper,
$this->getCirrusSearchHookRunner() );
$interwikiResults = $iwSearch->getInterwikiResults( $query );
if ( $interwikiResults->isOK() && $interwikiResults->getValue() !== [] ) {
foreach ( $interwikiResults->getValue() as $interwiki => $interwikiResult ) {
if ( $this->debugOptions->isReturnRaw() ) {
$result[$interwiki] = $interwikiResult;
} elseif ( $interwikiResult && $interwikiResult->numRows() > 0 ) {
$result->addInterwikiResults(
$interwikiResult, ISearchResultSet::SECONDARY_RESULTS, $interwiki
);
}
}
}
}
if ( $this->debugOptions->isReturnRaw() ) {
$status->setResult( true,
$searcher->processRawReturn( $result, $this->request ) );
}
return $status;
}
/**
* Look for suggestions using ES completion suggester.
* @param string $search Search string
* @param string[]|null $variants Search term variants
* @param SearchConfig $config search configuration
* @return SearchSuggestionSet Set of suggested names
*/
protected function getSuggestions( $search, $variants, SearchConfig $config ) {
// Inspect features to check if the user selected a specific profile
$profile = $this->extractProfileFromFeatureData( SearchEngine::COMPLETION_PROFILE_TYPE );
$clusterOverride = $config->getElement( 'CirrusSearchClusterOverrides', 'completion' );
if ( $clusterOverride !== null ) {
$connection = Connection::getPool( $config, $clusterOverride );
} else {
$connection = $this->connection;
}
$suggester = new CompletionSuggester( $connection, $this->limit,
$this->offset, $config, $this->namespaces, null,
false, $profile, $this->debugOptions );
$response = $suggester->suggest( $search, $variants );
if ( !$response->isOK() ) {
return SearchSuggestionSet::emptySuggestionSet();
}
$result = $response->getValue();
if ( $this->debugOptions->isReturnRaw() ) {
Util::processSearchRawReturn( $result, $this->request, $this->debugOptions );
}
// Errors will be logged, let's try the exact db match
return $result;
}
/**
* Get the sort of sorts we allow
* @return string[]
*/
public function getValidSorts() {
$sorts = [
'relevance', 'just_match', 'none',
'incoming_links_asc', 'incoming_links_desc',
'last_edit_asc', 'last_edit_desc',
'create_timestamp_asc', 'create_timestamp_desc',
'random', 'user_random',
];
if ( $this->config->getElement( 'CirrusSearchNaturalTitleSort', 'use' ) ) {
$sorts[] = 'title_natural_asc';
$sorts[] = 'title_natural_desc';
}
return $sorts;
}
/**
* Get the metrics for the last search we performed. Null if we haven't done any.
* @return array
*/
public function getLastSearchMetrics() {
return $this->lastSearchMetrics + $this->extraSearchMetrics;
}
/**
* Perform a completion search.
* Does not resolve namespaces and does not check variants.
* We use parent search for:
* - Special: namespace
* We use old prefix search for:
* - Suggester not enabled
* -
* @param string $search
* @return SearchSuggestionSet
*/
protected function completionSearchBackend( $search ) {
if ( in_array( NS_SPECIAL, $this->namespaces ) ) {
// delegate special search to parent
return parent::completionSearchBackend( $search );
}
// Not really useful, mostly for testing purpose
$variants = $this->debugOptions->getCirrusCompletionVariant();
if ( !$variants ) {
$converter = MediaWikiServices::getInstance()->getLanguageConverterFactory()->getLanguageConverter();
$variants = $converter->autoConvertToAllVariants( $search );
} elseif ( count( $variants ) > 3 ) {
// We should not allow too many variants
$variants = array_slice( $variants, 0, 3 );
}
if ( !$this->config->isCompletionSuggesterEnabled() ) {
// Completion suggester is not enabled, fallback to
// default implementation
return $this->prefixSearch( $search, $variants );
}
// the completion suggester is only worth a try if NS_MAIN is requested
if ( !in_array( NS_MAIN, $this->namespaces ) ) {
return $this->prefixSearch( $search, $variants );
}
$profile = $this->extractProfileFromFeatureData( SearchEngine::COMPLETION_PROFILE_TYPE );
if ( $profile === null ) {
// Need to fetch the name to fallback to prefix (not ideal)
// We should probably refactor this to have a single code path for prefix and completion suggester.
$profile = $this->config->getProfileService()
->getProfileName( SearchProfileService::COMPLETION, SearchProfileService::CONTEXT_DEFAULT );
}
if ( $profile === self::COMPLETION_PREFIX_FALLBACK_PROFILE ) {
// Fallback to prefixsearch if the classic profile was selected.
return $this->prefixSearch( $search, $variants );
}
return $this->getSuggestions( $search, $variants, $this->config );
}
/**
* Override variants function because we always do variants
* in the backend.
* @see SearchEngine::completionSearchWithVariants()
* @param string $search
* @return SearchSuggestionSet
*/
public function completionSearchWithVariants( $search ) {
return $this->completionSearch( $search );
}
/**
* Older prefix search.
* @param string $search search text
* @param string[] $variants
* @return SearchSuggestionSet
*/
protected function prefixSearch( $search, $variants ) {
$searcher = $this->makeSearcher();
if ( $search ) {
$searcher->setResultsType( new FancyTitleResultsType( 'prefix' ) );
} else {
// Empty searches always find the title.
$searcher->setResultsType( new TitleResultsType() );
}
$status = $searcher->prefixSearch( $search, $variants );
// There is no way to send errors or warnings back to the caller here so we have to make do with
// only sending results back if there are results and relying on the logging done at the status
// construction site to log errors.
if ( $status->isOK() ) {
if ( $this->debugOptions->isReturnRaw() ) {
Util::processSearchRawReturn( $status->getValue(), $this->request,
$this->debugOptions );
}
if ( !$search ) {
// No need to unpack the simple title matches from non-fancy TitleResultsType
return SearchSuggestionSet::fromTitles( $status->getValue() );
}
$results = array_filter( array_map(
[ FancyTitleResultsType::class, 'chooseBestTitleOrRedirect' ],
$status->getValue() ) );
return SearchSuggestionSet::fromTitles( $results );
}
return SearchSuggestionSet::emptySuggestionSet();
}
/**
* @param string $profileType
* @param User|null $user
* @return array|null
* @see SearchEngine::getProfiles()
*/
public function getProfiles( $profileType, User $user = null ) {
$profileService = $this->config->getProfileService();
$serviceProfileType = null;
switch ( $profileType ) {
case SearchEngine::COMPLETION_PROFILE_TYPE:
if ( $this->config->isCompletionSuggesterEnabled() ) {
$serviceProfileType = SearchProfileService::COMPLETION;
}
break;
case SearchEngine::FT_QUERY_INDEP_PROFILE_TYPE:
$serviceProfileType = SearchProfileService::RESCORE;
break;
}
if ( $serviceProfileType === null ) {
return null;
}
$allowedProfiles = $profileService->listExposedProfiles( $serviceProfileType );
$profiles = [];
foreach ( $allowedProfiles as $name => $profile ) {
// @todo: decide what to with profiles we declare
// in wmf-config with no i18n messages.
// Do we want to expose them anyway, or simply
// hide them but still allow Api to pass them to us.
// It may require a change in core since ApiBase is
// strict and won't allow unknown values to be set
// here.
$profiles[] = [
'name' => $name,
'desc-message' => $profile['i18n_msg'] ?? null,
];
}
if ( $profiles !== [] ) {
$profiles[] = [
'name' => self::AUTOSELECT_PROFILE,
'desc-message' => 'cirrussearch-autoselect-profile',
'default' => true,
];
}
return $profiles;
}
/**
* (public for testing purposes)
* @param string $profileType
* @return string|null the profile name set in SearchEngine::features
* null if none present or equal to self::AUTOSELECT_PROFILE
*/
public function extractProfileFromFeatureData( $profileType ) {
if ( isset( $this->features[$profileType] )
&& $this->features[$profileType] !== self::AUTOSELECT_PROFILE
) {
return $this->features[$profileType];
}
return null;
}
/**
* Create a search field definition
* @param string $name
* @param string $type
* @return SearchIndexField
*/
public function makeSearchFieldMapping( $name, $type ): SearchIndexField {
return $this->searchIndexFieldFactory->makeSearchFieldMapping( $name, $type );
}
/**
* Perform a title search in the article archive.
*
* @param string $term Raw search term
* @return Status<Title[]>
*/
public function searchArchiveTitle( $term ) {
if ( !$this->config->get( 'CirrusSearchEnableArchive' ) ) {
return Status::newGood( [] );
}
$term = trim( $term );
if ( $term === '' ) {
return Status::newGood( [] );
}
$searcher = $this->makeSearcher();
$status = $searcher->searchArchive( $term );
if ( $status->isOK() && $searcher->isReturnRaw() ) {
$status->setResult( true,
$searcher->processRawReturn( $status->getValue(), $this->request ) );
}
return $status;
}
/**
* @deprecated update via {@link WeightedTagsUpdater} service
*/
public function updateWeightedTags( ProperPageIdentity $page, string $tagPrefix, $tagNames = null, $tagWeights = null ): void {
Assert::precondition( strpos( $tagPrefix, '/' ) === false,
"invalid tag prefix $tagPrefix: must not contain /" );
$this->getUpdater()->updateWeightedTags(
$page,
$tagPrefix,
MultiListBuilder::buildTagWeightsFromLegacyParameters( $tagNames, $tagWeights )
);
}
/**
* @deprecated update via {@link WeightedTagsUpdater} service
*/
public function resetWeightedTags( ProperPageIdentity $page, string $tagPrefix ): void {
$this->getUpdater()->resetWeightedTags( $page, [ $tagPrefix ] );
}
/**
* Helper method to facilitate mocking during tests.
* @return Updater
*/
protected function getUpdater(): Updater {
return new Updater( $this->connection );
}
/**
* @return Status Contains a single integer indicating the number
* of content words in the wiki
*/
public function countContentWords() {
$this->limit = 1;
$searcher = $this->makeSearcher();
$status = $searcher->countContentWords();
if ( $status->isOK() && $searcher->isReturnRaw() ) {
$status->setResult( true,
$searcher->processRawReturn( $status->getValue(), $this->request ) );
}
return $status;
}
/**
* @param SearchConfig|null $config
* @return Searcher
*/
private function makeSearcher( SearchConfig $config = null ) {
return new Searcher( $this->connection, $this->offset, $this->limit, $config ?? $this->config, $this->namespaces,
null, false, $this->debugOptions, $this->namespacePrefixParser, $this->interwikiResolver, $this->titleHelper,
$this->getCirrusSearchHookRunner() );
}
private function getCirrusSearchHookRunner(): CirrusSearchHookRunner {
if ( $this->cirrusSearchHookRunner == null ) {
$this->cirrusSearchHookRunner = new CirrusSearchHookRunner( $this->getHookContainer() );
}
return $this->cirrusSearchHookRunner;
}
}