includes/Profile/SearchProfileService.php
<?php
namespace CirrusSearch\Profile;
use CirrusSearch\BuildDocument\DocumentSizeLimiter;
use CirrusSearch\Dispatch\BasicSearchQueryRoute;
use CirrusSearch\Dispatch\CirrusDefaultSearchQueryRoute;
use CirrusSearch\Dispatch\DefaultSearchQueryDispatchService;
use CirrusSearch\Dispatch\SearchQueryDispatchService;
use CirrusSearch\Dispatch\SearchQueryRoute;
use CirrusSearch\Search\SearchQuery;
use MediaWiki\Config\Config;
use MediaWiki\Context\RequestContext;
use MediaWiki\Request\WebRequest;
use MediaWiki\User\Options\UserOptionsLookup;
use MediaWiki\User\UserIdentity;
use Wikimedia\Assert\Assert;
/**
* Service to manage and access search profiles.
* Search profiles are arranged by type identified by a string constant:
* - COMPLETION: profiles used for autocomplete search when running the completion suggester
* - CROSS_PROJECT_BLOCK_SCORER: used when reordering blocks of crossproject search results
* - FT_QUERY_BUILDER: used when building fulltext search queries
* - PHRASE_SUGGESTER: Controls the behavior of the phrase suggester (did you mean suggestions)
* - INDEX_LOOKUP_FALLBACK: Controls the behavior of the index lookup fallback method (did you mean suggestions)
* - RESCORE: Controls how elasticsearch rescore queries are built
* - RESCORE_FUNCTION_CHAINS: Controls the list of functions used by a rescore profile
* - SANEITIZER: Controls the saneitizer
* - SIMILARITY: Defines similarity profiles used when building the index
*
* Multiple repository per type can be declared, in general we have:
* - the cirrus_base repository holding the default profiles contained in cirrus code
* - the cirrus_config repository holding the profiles customized using $wgCirrusSearch config vars.
*
* The service is bound to a SearchConfig instance which means that the profiles may vary depending
* on the SearchConfig being used. The cirrus_base repository will always hold the same set of
* profiles but the cirrus_config may change according to SearchConfig content.
*
* The service is also responsible for determining the name of the default profile for a given context.
* The profile context is a notion introduced to allow using the same profile for multiple purposes.
* For example the rescore profiles may be used for different kind of queries (fulltext vs prefixsearch).
* While they share the same set of profiles we may prefer to use different defaults depending on the
* type of the query. The profile context allows to distinguish between these use cases.
*
* Then in order to customize the default profile the service allows to define a list of "overriders":
* - ConfigSearchProfileOverride: overrides the default profile by reading a config var
* - UriParamSearchProfileOverride: overrides the default profile by inspecting the URI params
* - UserPrefSearchProfileOverride: overrides the default profile by inspecting the user prefs
*/
class SearchProfileService {
/**
* Profile type for ordering crossproject result blocks
*/
public const CROSS_PROJECT_BLOCK_SCORER = 'crossproject_block_scorer';
/**
* Profile type for similarity configuration
* Used when building the indices
*/
public const SIMILARITY = 'similarity';
/**
* Profile type for rescoring components
* Used at query when building elastic queries
* @see \CirrusSearch\Search\Rescore\RescoreBuilder
*/
public const RESCORE = 'rescore';
/**
* Profile type used to build function chains
* Used at query time by rescore builders
* @see \CirrusSearch\Search\Rescore\RescoreBuilder
*/
public const RESCORE_FUNCTION_CHAINS = 'rescore_function_chains';
/**
* Profile type used by the completion suggester
* @see \CirrusSearch\CompletionSuggester
*/
public const COMPLETION = 'completion';
/**
* Profile type used by the phrase suggester (fulltext search only)
* @see \CirrusSearch\Fallbacks\PhraseSuggestFallbackMethod
*/
public const PHRASE_SUGGESTER = 'phrase_suggester';
/**
* Profile type used by the index lookup fallback method method
* @see \CirrusSearch\Fallbacks\IndexLookupFallbackMethod
*/
public const INDEX_LOOKUP_FALLBACK = 'index_lookup_fallback';
/**
* Profile type used by saneitizer
* @see \CirrusSearch\Maintenance\SaneitizeJobs
*/
public const SANEITIZER = 'saneitizer';
/**
* Profile type used by the document size limiter
* @see DocumentSizeLimiter
*/
public const DOCUMENT_SIZE_LIMITER = 'document_size_limiter';
/**
* Profiles used for building fulltext search queries
* @see \CirrusSearch\Search\SearchContext::getFulltextQueryBuilderProfile()
*/
public const FT_QUERY_BUILDER = 'ft_query_builder';
/**
* Profile type used by FallbackRunner.
* @see \CirrusSearch\Fallbacks\FallbackRunner::create()
*/
public const FALLBACKS = 'fallbacks';
/**
* Profile context used for prefix search queries
*/
public const CONTEXT_PREFIXSEARCH = 'prefixsearch';
/**
* Default profile context (used by fulltext queries)
*/
public const CONTEXT_DEFAULT = 'default';
/**
* List of profile repositories, grouped by type and then by repository name.
* @var SearchProfileRepository[][]
*/
private $repositories = [];
/**
* List of default profile names to use for a given type in a given context
* Key path is [type][context]
* @var string[][]
*/
private $defaultProfiles = [];
/**
* list of overriders, $this->overriders[$type][$context] is an array of SearchProfileOverride
* Key path is [type][context]
* @var SearchProfileOverride[][][]
*/
private $overriders = [];
/**
* @var UserIdentity
*/
private $user;
/**
* @var WebRequest
*/
private $request;
/**
* @var bool
*/
private $frozen;
/**
* @var SearchQueryDispatchService|null (lazy loaded)
*/
private $dispatchService;
/**
* @var SearchQueryRoute[][]
*/
private $routes;
/**
* @var UserOptionsLookup
*/
private $userOptionsLookup;
/**
* @param UserOptionsLookup $userOptionsLookup
* @param WebRequest|null $request obtained from \RequestContext::getMain()->getRequest() if null
* @param UserIdentity|null $user obtained from \RequestContext::getMain()->getUser() if null
*/
public function __construct(
UserOptionsLookup $userOptionsLookup,
WebRequest $request = null,
UserIdentity $user = null
) {
$this->userOptionsLookup = $userOptionsLookup;
$this->request = $request ?? RequestContext::getMain()->getRequest();
$this->user = $user ?? RequestContext::getMain()->getUser();
$this->routes = [
'searchText' => [ CirrusDefaultSearchQueryRoute::searchTextDefaultRoute() ]
];
}
/**
* @param string $type
* @param string $name
* @return bool
*/
public function hasProfile( $type, $name ) {
if ( isset( $this->repositories[$type] ) ) {
foreach ( $this->repositories[$type] as $repo ) {
if ( $repo->hasProfile( $name ) ) {
return true;
}
}
}
return false;
}
/**
* @param string $type
* @param string $context
* @return bool
*/
public function supportsContext( $type, $context ) {
return isset( $this->defaultProfiles[$type][$context] );
}
/**
* Load a profile by its name.
* It's better to use self::loadProfile and let the service
* determine the proper profile to use in a given context.
*
* @param string $type the type of the profile (see class doc)
* @param string $name
* @param bool $failIfMissing when true will throw SearchProfileException
* @return array|null
*/
public function loadProfileByName( $type, $name, $failIfMissing = true ) {
if ( isset( $this->repositories[$type] ) ) {
$repos = $this->repositories[$type];
foreach ( $repos as $repo ) {
$prof = $repo->getProfile( $name );
if ( $prof !== null ) {
return $prof;
}
}
}
if ( $failIfMissing ) {
throw new SearchProfileException( "Cannot load a profile type $type: $name not found" );
}
return null;
}
/**
* Load a profile for the context or by its name if name is provided
*
* @param string $type
* @param string $context used to determine the name of the profile if $name is not provided
* @param string|null $name force the name of the profile to use
* @param string[] $contextParams Parameters of the context, for determining the profile
* name. Some overriders use these to decide if an override is appropriate.
* @return array
* @see self::getProfileName()
*/
public function loadProfile( $type, $context = self::CONTEXT_DEFAULT, $name = null, $contextParams = [] ) {
if ( $name === null && $context === null ) {
throw new SearchProfileException( '$name and $context cannot be both null' );
}
if ( $name === null ) {
$name = $this->getProfileName( $type, $context, $contextParams );
}
return $this->loadProfileByName( $type, $name );
}
/**
* @param string $type the type of the profile (see class doc)
* @param string $context
* @param string[] $contextParams Parameters of the context, for determining the profile
* name. Some overriders use these to decide if an override is appropriate.
* @return string
*/
public function getProfileName( $type, $context = self::CONTEXT_DEFAULT, array $contextParams = [] ) {
$minPrio = PHP_INT_MAX;
if ( !isset( $this->defaultProfiles[$type][$context] ) ) {
throw new SearchProfileException( "No default profile found for $type in context $context" );
}
$profile = $this->defaultProfiles[$type][$context];
if ( !$this->hasProfile( $type, $profile ) ) {
throw new SearchProfileException( "The default profile $profile does not exist in profile repositories of type $type" );
}
if ( !isset( $this->overriders[$type][$context] ) ) {
return $profile;
}
foreach ( $this->overriders[$type][$context] as $overrider ) {
if ( $overrider->priority() < $minPrio ) {
$name = $overrider->getOverriddenName( $contextParams );
if ( $name !== null && $this->hasProfile( $type, $name ) ) {
$minPrio = $overrider->priority();
$profile = $name;
}
}
}
return $profile;
}
/**
* Register a new profile repository
* @param SearchProfileRepository $repository
*/
public function registerRepository( SearchProfileRepository $repository ) {
$this->checkFrozen();
if ( isset( $this->repositories[$repository->repositoryType()][$repository->repositoryName()] ) ) {
throw new SearchProfileException( "A profile repository type {$repository->repositoryType()} " .
"named {$repository->repositoryName()} is already registered." );
}
$this->repositories[$repository->repositoryType()][$repository->repositoryName()] = $repository;
}
/**
* Register a new repository backed by a simple array
* @param string $repoType
* @param string $repoName
* @param array $profiles
*/
public function registerArrayRepository( $repoType, $repoName, array $profiles ) {
$this->registerRepository( ArrayProfileRepository::fromArray( $repoType, $repoName, $profiles ) );
}
/**
* Register a new repository backed by a PHP file returning an array.
*
* <b>NOTE:</b> $phpFile is loaded with PHP's require keyword.
*
* @param string $type
* @param string $name
* @param string $phpFile
* @see FileProfileRepository
*/
public function registerFileRepository( $type, $name, $phpFile ) {
$this->registerRepository( ArrayProfileRepository::fromFile( $type, $name, $phpFile ) );
}
/**
* List profiles under type $type that are suited
* to be exposed to the users.
*
* This method is provided for convenience and to help
* users to discover existing profile.
* It's possible that an existing profile may not be listed here
* so this method must not be used to verify the existence of a given
* profile. Use hasProfile instead.
*
* @param string $type
* @return array
*/
public function listExposedProfiles( $type ) {
$profiles = [];
if ( isset( $this->repositories[$type] ) ) {
foreach ( $this->repositories[$type] as $repo ) {
foreach ( $repo->listExposedProfiles() as $name => $profile ) {
if ( !isset( $profiles[$name] ) ) {
$profiles[$name] = $profile;
}
}
}
}
return $profiles;
}
/**
* Register a default profile named $profileName for $type in context $profileContext
* It must be an existing profile otherwise it will always fail when trying to determine
* the profile name.
* @param string $type
* @param string $profileContext
* @param string $profileName
*/
public function registerDefaultProfile( $type, $profileContext, $profileName ) {
if ( isset( $this->defaultProfiles[$type][$profileContext] ) ) {
throw new SearchProfileException( "A default profile already exists for $type in context $profileContext" );
}
$this->defaultProfiles[$type][$profileContext] = $profileName;
}
/**
* Register a new profile overrider.
* It allows to override the default profile based on the implementation of SearchProfileOverride.
* @param string $type
* @param string|string[] $profileContext one or multiple contexts
* @param SearchProfileOverride $override
*/
public function registerProfileOverride( $type, $profileContext, SearchProfileOverride $override ) {
$this->checkFrozen();
if ( !is_array( $profileContext ) ) {
$profileContext = [ $profileContext ];
}
foreach ( $profileContext as $context ) {
$this->overriders[$type][$context][] = $override;
}
}
/**
* Register a new overrider using the ConfigSearchProfileOverride implementation
* @param string $type
* @param string|string[] $profileContext one or multiple contexts
* @param Config $config
* @param string $configEntry
* @see ConfigSearchProfileOverride
*/
public function registerConfigOverride( $type, $profileContext, Config $config, $configEntry ) {
$this->registerProfileOverride( $type, $profileContext, new ConfigSearchProfileOverride( $config, $configEntry ) );
}
/**
* @param string $type
* @param string|string[] $profileContext one or multiple contexts
* @param string $uriParam
*/
public function registerUriParamOverride( $type, $profileContext, $uriParam ) {
$this->registerProfileOverride( $type, $profileContext, new UriParamSearchProfileOverride( $this->request, $uriParam ) );
}
/**
* @param string $type
* @param string|string[] $profileContext one or multiple contexts
* @param string $userPref the name of the key used to store this user preference
*/
public function registerUserPrefOverride( $type, $profileContext, $userPref ) {
$this->registerProfileOverride(
$type,
$profileContext,
new UserPrefSearchProfileOverride( $this->user, $this->userOptionsLookup, $userPref ) );
}
/**
* @param string $type
* @param string|string[] $profileContext one or multiple contexts
* @param string $template A templated profile name
* @param string[] $params Map from string in $template to context parameter to
* replace with. All params must be available in the context parameters or
* no override will be applied.
*/
public function registerContextualOverride( $type, $profileContext, $template, array $params ) {
$this->registerProfileOverride( $type, $profileContext, new ContextualProfileOverride( $template, $params ) );
}
/**
* Register a new route to be used by the SearchQueryDispatchService
*
* @param SearchQueryRoute $route
* @see SearchQueryDispatchService
* @see SearchProfileService::getDispatchService()
*/
public function registerSearchQueryRoute( SearchQueryRoute $route ) {
$this->checkFrozen();
if ( !isset( $this->routes[$route->getSearchEngineEntryPoint()] ) ) {
throw new SearchProfileException( "Unsupported search engine entry point {$route->getSearchEngineEntryPoint()}" );
}
$this->routes[$route->getSearchEngineEntryPoint()][] = $route;
}
/**
* Register a new static route for fulltext search queries.
*
* @param string $profileContext
* @param float $score score of the route
* @param int[] $supportedNamespaces
* @param string[] $acceptableQueryClasses
* @see SearchProfileService::getDispatchService()
* @see SearchQueryDispatchService::CIRRUS_DEFAULTS_SCORE
*/
public function registerFTSearchQueryRoute(
$profileContext,
$score,
array $supportedNamespaces,
array $acceptableQueryClasses = []
) {
Assert::parameter( $score > SearchQueryDispatchService::CIRRUS_DEFAULTS_SCORE, '$score',
"This route will never be selected it must " .
"be greater than " . SearchQueryDispatchService::CIRRUS_DEFAULTS_SCORE
);
$this->registerSearchQueryRoute( new BasicSearchQueryRoute( SearchQuery::SEARCH_TEXT,
$supportedNamespaces, $acceptableQueryClasses, $profileContext, $score ) );
}
/**
* Return the service responsible for dispatching a SearchQuery
* to its preferred profile context.
*
* @return SearchQueryDispatchService
*/
public function getDispatchService(): SearchQueryDispatchService {
if ( $this->dispatchService === null ) {
Assert::precondition( $this->frozen,
"Must be frozen when accessing the SearchQuery dispatch service." );
$this->dispatchService = new DefaultSearchQueryDispatchService( $this->routes );
}
return $this->dispatchService;
}
/**
* Freeze the service, any attempt to declare a new repository
* will fail.
*/
public function freeze() {
$this->frozen = true;
}
private function checkFrozen() {
if ( $this->frozen ) {
throw new SearchProfileException( self::class . " is frozen, you cannot register new repositories/overriders." );
}
}
/**
* @return string[]
*/
public function listProfileTypes() {
return array_keys( $this->repositories );
}
/**
* List default profile per context
* @param string $type
* @return string[] context is the key, the default profile name
*/
public function listProfileContexts( $type ) {
return $this->defaultProfiles[$type] ?? [];
}
/**
* List profile repositories
* @param string $type
* @return SearchProfileRepository[]
*/
public function listProfileRepositories( $type ) {
return $this->repositories[$type] ?? [];
}
/**
* L
* @param string $type
* @param string $context
* @return SearchProfileOverride[]
*/
public function listProfileOverrides( $type, $context ) {
return $this->overriders[$type][$context] ?? [];
}
}