includes/Hooks.php
<?php
namespace CirrusSearch;
use CirrusSearch\Search\FancyTitleResultsType;
use HtmlArmor;
use ISearchResultSet;
use MediaWiki\Actions\ActionEntryPoint;
use MediaWiki\Api\ApiBase;
use MediaWiki\Api\ApiMain;
use MediaWiki\Api\ApiOpenSearch;
use MediaWiki\Api\Hook\APIAfterExecuteHook;
use MediaWiki\Api\Hook\APIQuerySiteInfoStatisticsInfoHook;
use MediaWiki\Config\Config;
use MediaWiki\Config\ConfigFactory;
use MediaWiki\Context\RequestContext;
use MediaWiki\Hook\ApiBeforeMainHook;
use MediaWiki\Hook\BeforeInitializeHook;
use MediaWiki\Hook\SoftwareInfoHook;
use MediaWiki\Hook\SpecialSearchResultsAppendHook;
use MediaWiki\Hook\SpecialSearchResultsHook;
use MediaWiki\Hook\SpecialStatsAddExtraHook;
use MediaWiki\Html\Html;
use MediaWiki\MediaWikiServices;
use MediaWiki\Output\OutputPage;
use MediaWiki\Preferences\Hook\GetPreferencesHook;
use MediaWiki\Request\WebRequest;
use MediaWiki\ResourceLoader\Hook\ResourceLoaderGetConfigVarsHook;
use MediaWiki\Search\Hook\PrefixSearchExtractNamespaceHook;
use MediaWiki\Search\Hook\SearchGetNearMatchHook;
use MediaWiki\Search\Hook\ShowSearchHitTitleHook;
use MediaWiki\Specials\SpecialSearch;
use MediaWiki\Title\Title;
use MediaWiki\User\Hook\UserGetDefaultOptionsHook;
use MediaWiki\User\User;
use SearchResult;
/**
* All CirrusSearch's external hooks.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*/
class Hooks implements
UserGetDefaultOptionsHook,
GetPreferencesHook,
APIAfterExecuteHook,
ApiBeforeMainHook,
APIQuerySiteInfoStatisticsInfoHook,
BeforeInitializeHook,
PrefixSearchExtractNamespaceHook,
ResourceLoaderGetConfigVarsHook,
SearchGetNearMatchHook,
ShowSearchHitTitleHook,
SoftwareInfoHook,
SpecialSearchResultsHook,
SpecialSearchResultsAppendHook,
SpecialStatsAddExtraHook
{
/** @var ConfigFactory */
private $configFactory;
/**
* @param ConfigFactory $configFactory
*/
public function __construct( ConfigFactory $configFactory ) {
$this->configFactory = $configFactory;
}
/**
* Hooked to call initialize after the user is set up.
*
* @param Title $title
* @param null $unused
* @param OutputPage $outputPage
* @param User $user
* @param WebRequest $request
* @param ActionEntryPoint $mediaWiki
*/
public function onBeforeInitialize( $title, $unused, $outputPage, $user, $request, $mediaWiki ) {
self::initializeForRequest( $request );
}
/**
* Hooked to call initialize after the user is set up.
* @param ApiMain &$apiMain The ApiMain instance being used
*/
public function onApiBeforeMain( &$apiMain ) {
self::initializeForRequest( $apiMain->getRequest() );
}
/**
* Initializes the portions of Cirrus that require the $request to be fully initialized
*
* @param WebRequest $request
*/
public static function initializeForRequest( WebRequest $request ) {
global $wgCirrusSearchPhraseRescoreWindowSize,
$wgCirrusSearchFunctionRescoreWindowSize,
$wgCirrusSearchFragmentSize,
$wgCirrusSearchPhraseRescoreBoost,
$wgCirrusSearchPhraseSlop,
$wgCirrusSearchLogElasticRequests,
$wgCirrusSearchLogElasticRequestsSecret,
$wgCirrusSearchEnableAltLanguage,
$wgCirrusSearchUseCompletionSuggester;
self::overrideMoreLikeThisOptionsFromMessage();
self::overrideNumeric( $wgCirrusSearchPhraseRescoreWindowSize,
$request, 'cirrusPhraseWindow', 10000 );
self::overrideNumeric( $wgCirrusSearchPhraseRescoreBoost,
$request, 'cirrusPhraseBoost' );
self::overrideNumeric( $wgCirrusSearchPhraseSlop[ 'boost' ],
$request, 'cirrusPhraseSlop', 10 );
self::overrideNumeric( $wgCirrusSearchFunctionRescoreWindowSize,
$request, 'cirrusFunctionWindow', 10000 );
self::overrideNumeric( $wgCirrusSearchFragmentSize,
$request, 'cirrusFragmentSize', 1000 );
if ( $wgCirrusSearchUseCompletionSuggester === 'yes' || $wgCirrusSearchUseCompletionSuggester === true ) {
// Only allow disabling the completion suggester, enabling it from request params might cause failures
// as the index might not be present.
self::overrideYesNo( $wgCirrusSearchUseCompletionSuggester,
$request, 'cirrusUseCompletionSuggester' );
}
self::overrideMoreLikeThisOptions( $request );
self::overrideSecret( $wgCirrusSearchLogElasticRequests,
$wgCirrusSearchLogElasticRequestsSecret, $request, 'cirrusLogElasticRequests', false );
self::overrideYesNo( $wgCirrusSearchEnableAltLanguage,
$request, 'cirrusAltLanguage' );
}
/**
* Set $dest to the numeric value from $request->getVal( $name ) if it is <= $limit
* or => $limit if upperLimit is false.
*
* @param mixed &$dest
* @param WebRequest $request
* @param string $name
* @param int|null $limit
* @param bool $upperLimit
*/
private static function overrideNumeric(
&$dest,
WebRequest $request,
$name,
$limit = null,
$upperLimit = true
) {
Util::overrideNumeric( $dest, $request, $name, $limit, $upperLimit );
}
/**
* @param mixed &$dest
* @param WebRequest $request
* @param string $name
*/
private static function overrideMinimumShouldMatch( &$dest, WebRequest $request, $name ) {
$val = $request->getVal( $name );
if ( $val !== null && self::isMinimumShouldMatch( $val ) ) {
$dest = $val;
}
}
/**
* Set $dest to $value when $request->getVal( $name ) contains $secret
*
* @param mixed &$dest
* @param string $secret
* @param WebRequest $request
* @param string $name
* @param mixed $value
*/
private static function overrideSecret( &$dest, $secret, WebRequest $request, $name, $value = true ) {
if ( $secret && $secret === $request->getVal( $name ) ) {
$dest = $value;
}
}
/**
* Set $dest to the true/false from $request->getVal( $name ) if yes/no.
*
* @param mixed &$dest
* @param WebRequest $request
* @param string $name
*/
private static function overrideYesNo( &$dest, WebRequest $request, $name ) {
Util::overrideYesNo( $dest, $request, $name );
}
/**
* Extract more like this settings from the i18n message cirrussearch-morelikethis-settings
*/
private static function overrideMoreLikeThisOptionsFromMessage() {
global $wgCirrusSearchMoreLikeThisConfig,
$wgCirrusSearchMoreLikeThisAllowedFields,
$wgCirrusSearchMoreLikeThisMaxQueryTermsLimit,
$wgCirrusSearchMoreLikeThisFields;
$cache = MediaWikiServices::getInstance()->getLocalServerObjectCache();
$lines = $cache->getWithSetCallback(
$cache->makeKey( 'cirrussearch-morelikethis-settings' ),
600,
static function () {
$source = wfMessage( 'cirrussearch-morelikethis-settings' )->inContentLanguage();
if ( $source->isDisabled() ) {
return [];
}
return Util::parseSettingsInMessage( $source->plain() );
}
);
foreach ( $lines as $line ) {
if ( strpos( $line, ':' ) === false ) {
continue;
}
[ $k, $v ] = explode( ':', $line, 2 );
switch ( $k ) {
case 'min_doc_freq':
case 'max_doc_freq':
case 'max_query_terms':
case 'min_term_freq':
case 'min_word_length':
case 'max_word_length':
if ( is_numeric( $v ) && $v >= 0 ) {
$wgCirrusSearchMoreLikeThisConfig[$k] = intval( $v );
} elseif ( $v === 'null' ) {
unset( $wgCirrusSearchMoreLikeThisConfig[$k] );
}
break;
case 'percent_terms_to_match':
// @deprecated Use minimum_should_match now
$k = 'minimum_should_match';
if ( is_numeric( $v ) && $v > 0 && $v <= 1 ) {
$v = ( (int)( (float)$v * 100 ) ) . '%';
} else {
break;
}
// intentional fall-through
case 'minimum_should_match':
if ( self::isMinimumShouldMatch( $v ) ) {
$wgCirrusSearchMoreLikeThisConfig[$k] = $v;
} elseif ( $v === 'null' ) {
unset( $wgCirrusSearchMoreLikeThisConfig[$k] );
}
break;
case 'fields':
$wgCirrusSearchMoreLikeThisFields = array_intersect(
array_map( 'trim', explode( ',', $v ) ),
$wgCirrusSearchMoreLikeThisAllowedFields );
break;
}
// @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset
if ( $wgCirrusSearchMoreLikeThisConfig['max_query_terms'] > $wgCirrusSearchMoreLikeThisMaxQueryTermsLimit ) {
$wgCirrusSearchMoreLikeThisConfig['max_query_terms'] = $wgCirrusSearchMoreLikeThisMaxQueryTermsLimit;
}
}
}
/**
* @param string $v The value to check
* @return bool True if $v is an integer percentage in the domain -100 <= $v <= 100, $v != 0
* @todo minimum_should_match also supports combinations (3<90%) and multiple combinations
*/
private static function isMinimumShouldMatch( string $v ) {
// specific integer count > 0
if ( ctype_digit( $v ) && $v != 0 ) {
return true;
}
// percentage 0 < x <= 100
if ( !str_ends_with( $v, '%' ) ) {
return false;
}
$v = substr( $v, 0, -1 );
if ( str_starts_with( $v, '-' ) ) {
$v = substr( $v, 1 );
}
return ctype_digit( $v ) && $v > 0 && $v <= 100;
}
/**
* Override more like this settings from request URI parameters
*
* @param WebRequest $request
*/
private static function overrideMoreLikeThisOptions( WebRequest $request ) {
global $wgCirrusSearchMoreLikeThisConfig,
$wgCirrusSearchMoreLikeThisAllowedFields,
$wgCirrusSearchMoreLikeThisMaxQueryTermsLimit,
$wgCirrusSearchMoreLikeThisFields;
self::overrideNumeric( $wgCirrusSearchMoreLikeThisConfig['min_doc_freq'],
$request, 'cirrusMltMinDocFreq' );
self::overrideNumeric( $wgCirrusSearchMoreLikeThisConfig['max_doc_freq'],
$request, 'cirrusMltMaxDocFreq' );
// @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset
self::overrideNumeric( $wgCirrusSearchMoreLikeThisConfig['max_query_terms'],
$request, 'cirrusMltMaxQueryTerms', $wgCirrusSearchMoreLikeThisMaxQueryTermsLimit );
self::overrideNumeric( $wgCirrusSearchMoreLikeThisConfig['min_term_freq'],
$request, 'cirrusMltMinTermFreq' );
self::overrideMinimumShouldMatch( $wgCirrusSearchMoreLikeThisConfig['minimum_should_match'],
$request, 'cirrusMltMinimumShouldMatch' );
self::overrideNumeric( $wgCirrusSearchMoreLikeThisConfig['min_word_length'],
$request, 'cirrusMltMinWordLength' );
self::overrideNumeric( $wgCirrusSearchMoreLikeThisConfig['max_word_length'],
$request, 'cirrusMltMaxWordLength' );
$fields = $request->getVal( 'cirrusMltFields' );
if ( $fields !== null ) {
$wgCirrusSearchMoreLikeThisFields = array_intersect(
array_map( 'trim', explode( ',', $fields ) ),
$wgCirrusSearchMoreLikeThisAllowedFields );
}
}
/**
* Hook called to include Elasticsearch version info on Special:Version
* @param array &$software Array of wikitext and version numbers
*/
public function onSoftwareInfo( &$software ) {
$version = new Version( self::getConnection() );
$status = $version->get();
if ( $status->isOK() ) {
// We've already logged if this isn't ok and there is no need to warn the user on this page.
$software[ '[https://www.elastic.co/elasticsearch Elasticsearch]' ] = $status->getValue();
}
}
/**
* @param SpecialSearch $specialSearch
* @param OutputPage $out
* @param string $term
*/
public function onSpecialSearchResultsAppend( $specialSearch, $out, $term ) {
$feedbackLink = $out->getConfig()->get( 'CirrusSearchFeedbackLink' );
if ( $feedbackLink ) {
self::addSearchFeedbackLink( $feedbackLink, $specialSearch, $out );
}
// Embed metrics if this was a Cirrus page
$engine = $specialSearch->getSearchEngine();
if ( $engine instanceof CirrusSearch ) {
$out->addJsConfigVars( $engine->getLastSearchMetrics() );
}
}
/**
* @param string $link
* @param SpecialSearch $specialSearch
* @param OutputPage $out
*/
private static function addSearchFeedbackLink( $link, SpecialSearch $specialSearch, OutputPage $out ) {
$anchor = Html::element(
'a',
[ 'href' => $link ],
$specialSearch->msg( 'cirrussearch-give-feedback' )->text()
);
$block = Html::rawElement( 'div', [], $anchor );
$out->addHTML( $block );
}
/**
* Extract namespaces from query string.
* @param array &$namespaces
* @param string &$search
* @return bool
*/
public function onPrefixSearchExtractNamespace( &$namespaces, &$search ) {
global $wgSearchType;
if ( $wgSearchType !== 'CirrusSearch' ) {
return true;
}
return self::prefixSearchExtractNamespaceWithConnection( self::getConnection(), $namespaces, $search );
}
/**
* @param Connection $connection
* @param array &$namespaces
* @param string &$search
* @return false
*/
public static function prefixSearchExtractNamespaceWithConnection(
Connection $connection,
&$namespaces,
&$search
) {
$method = $connection->getConfig()->get( 'CirrusSearchNamespaceResolutionMethod' );
if ( $method === 'elastic' ) {
$searcher =
new Searcher( $connection, 0, 1, $connection->getConfig(), $namespaces );
$searcher->updateNamespacesFromQuery( $search );
$namespaces = $searcher->getSearchContext()->getNamespaces();
} else {
$colon = strpos( $search, ':' );
if ( $colon === false ) {
return false;
}
$namespaceName = substr( $search, 0, $colon );
$ns = Util::identifyNamespace( $namespaceName, $method );
if ( $ns !== false ) {
$namespaces = [ $ns ];
$search = substr( $search, $colon + 1 );
}
}
return false;
}
public function onSearchGetNearMatch( $term, &$titleResult ) {
return self::handleSearchGetNearMatch( $term, $titleResult );
}
/**
* Let Elasticsearch take a crack at getting near matches once mediawiki has tried all kinds of variants.
* @param string $term the original search term and all language variants
* @param null|Title &$titleResult resulting match. A Title if we found something, unchanged otherwise.
* @return bool return false if we find something, true otherwise so mediawiki can try its default behavior
*/
public static function handleSearchGetNearMatch( $term, &$titleResult ) {
global $wgSearchType;
if ( $wgSearchType !== 'CirrusSearch' ) {
return true;
}
$title = Title::newFromText( $term );
if ( $title === null ) {
return false;
}
$user = RequestContext::getMain()->getUser();
// Ask for the first 50 results we see. If there are more than that too bad.
$searcher = new Searcher(
self::getConnection(), 0, 50, self::getConfig(), [ $title->getNamespace() ], $user );
if ( $title->getNamespace() === NS_MAIN ) {
$searcher->updateNamespacesFromQuery( $term );
} else {
$term = $title->getText();
}
$searcher->setResultsType( new FancyTitleResultsType( 'near_match' ) );
$status = $searcher->nearMatchTitleSearch( $term );
// There is no way to send errors or warnings back to the caller here so we have to make do with
// only sending results back if there are results and relying on the logging done at the status
// construction site to log errors.
if ( !$status->isOK() ) {
return true;
}
$contLang = MediaWikiServices::getInstance()->getContentLanguage();
$picker = new NearMatchPicker( $contLang, $term, $status->getValue() );
$best = $picker->pickBest();
if ( $best ) {
$titleResult = $best;
return false;
}
// Didn't find a result so let MediaWiki have a crack at it.
return true;
}
/**
* ResourceLoaderGetConfigVars hook handler
* This should be used for variables which vary with the html
* and for variables this should work cross skin
* @see https://www.mediawiki.org/wiki/Manual:Hooks/ResourceLoaderGetConfigVars
*
* @param array &$vars
* @param string $skin
* @param Config $config
*/
public function onResourceLoaderGetConfigVars( array &$vars, $skin, Config $config ): void {
$vars += [
'wgCirrusSearchFeedbackLink' => $config->get( 'CirrusSearchFeedbackLink' ),
];
}
/**
* @return SearchConfig
*/
private static function getConfig() {
// @phan-suppress-next-line PhanTypeMismatchReturnSuperType
return MediaWikiServices::getInstance()
->getConfigFactory()
->makeConfig( 'CirrusSearch' );
}
/**
* @return Connection
*/
private static function getConnection() {
return new Connection( self::getConfig() );
}
/**
* Add $wgCirrusSearchInterwikiProv to external results.
* @param Title &$title
* @param string|HtmlArmor|null &$text
* @param SearchResult $result
* @param array $terms
* @param SpecialSearch $page
* @param string[] &$query
* @param string[] &$attributes
*/
public function onShowSearchHitTitle( &$title, &$text, $result, $terms, $page, &$query, &$attributes ) {
global $wgCirrusSearchInterwikiProv;
if ( $wgCirrusSearchInterwikiProv && $title->isExternal() ) {
$query["wprov"] = $wgCirrusSearchInterwikiProv;
}
}
/**
* @param ApiBase $module
*/
public function onAPIAfterExecute( $module ) {
if ( !ElasticsearchIntermediary::hasQueryLogs() ) {
return;
}
$response = $module->getContext()->getRequest()->response();
$response->header( 'X-Search-ID: ' . Util::getRequestSetToken() );
if ( $module instanceof ApiOpenSearch ) {
$types = ElasticsearchIntermediary::getQueryTypesUsed();
if ( $types ) {
$response->header( 'X-OpenSearch-Type: ' . implode( ',', $types ) );
}
}
}
/**
* @param string $term
* @param ISearchResultSet|null &$titleMatches
* @param ISearchResultSet|null &$textMatches
*/
public function onSpecialSearchResults( $term, &$titleMatches, &$textMatches ) {
$context = RequestContext::getMain();
$out = $context->getOutput();
$out->addModules( 'ext.cirrus.serp' );
$jsVars = [
'wgCirrusSearchRequestSetToken' => Util::getRequestSetToken(),
];
// In theory UserTesting should always have been activated by now, but if
// somehow it wasn't we don't want to activate it now at the end of the request
// and report incorrect data.
if ( UserTestingStatus::hasInstance() ) {
$ut = UserTestingStatus::getInstance();
if ( $ut->isActive() ) {
$trigger = $ut->getTrigger();
$jsVars['wgCirrusSearchActiveUserTest'] = $trigger;
// bc for first deployment, some users will still have old js.
// Should be removed in following deployment.
$jsVars['wgCirrusSearchBackendUserTests'] = $trigger ? [ $trigger ] : [];
}
}
$out->addJsConfigVars( $jsVars );
// This ignores interwiki results for now...not sure what do do with those
ElasticsearchIntermediary::setResultPages( [
$titleMatches,
$textMatches
] );
}
/**
* @param array &$extraStats
* @return void
*/
private static function addWordCount( array &$extraStats ): void {
$search = new CirrusSearch();
$status = $search->countContentWords();
if ( !$status->isOK() ) {
return;
}
$wordCount = $status->getValue();
if ( $wordCount !== null ) {
$extraStats['cirrussearch-article-words'] = $wordCount;
}
}
/** @inheritDoc */
public function onGetPreferences( $user, &$prefs ) {
$search = new CirrusSearch();
$profiles = $search->getProfiles( \SearchEngine::COMPLETION_PROFILE_TYPE, $user );
if ( !$profiles ) {
return;
}
$options = self::autoCompleteOptionsForPreferences( $profiles );
if ( !$options ) {
return;
}
$prefs['cirrussearch-pref-completion-profile'] = [
'type' => 'radio',
'section' => 'searchoptions/completion',
'options' => $options,
'label-message' => 'cirrussearch-pref-completion-profile-help',
];
}
/**
* @param array[] $profiles
* @return string[]
*/
private static function autoCompleteOptionsForPreferences( array $profiles ): array {
$available = array_column( $profiles, 'name' );
// Order in which we propose comp suggest profiles
$preferredOrder = [
'fuzzy',
'fuzzy-subphrases',
'strict',
'normal',
'normal-subphrases',
'classic'
];
$messages = [];
foreach ( $preferredOrder as $name ) {
if ( in_array( $name, $available ) ) {
$display = wfMessage( "cirrussearch-completion-profile-$name-pref-name" )->escaped() .
new \OOUI\LabelWidget( [
'classes' => [ 'oo-ui-inline-help' ],
'label' => wfMessage( "cirrussearch-completion-profile-$name-pref-desc" )->text()
] );
$messages[$display] = $name;
}
}
// At least 2 choices are required to provide the user a choice
return count( $messages ) >= 2 ? $messages : [];
}
/** @inheritDoc */
public function onUserGetDefaultOptions( &$defaultOptions ) {
$defaultOptions['cirrussearch-pref-completion-profile'] =
$this->configFactory->makeConfig( 'CirrusSearch' )->get( 'CirrusSearchCompletionSettings' );
}
public function onSpecialStatsAddExtra( &$extraStats, $context ) {
self::addWordCount( $extraStats );
}
public function onAPIQuerySiteInfoStatisticsInfo( &$extraStats ) {
self::addWordCount( $extraStats );
}
}