maintenance/UpdateOneSearchIndexConfig.php
<?php
namespace CirrusSearch\Maintenance;
use CirrusSearch\Connection;
use CirrusSearch\ElasticaErrorHandler;
use CirrusSearch\Maintenance\Validators\MappingValidator;
use CirrusSearch\SearchConfig;
use CirrusSearch\Util;
use MediaWiki\Config\ConfigException;
/**
* Update the search configuration on the search backend.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*/
$IP = getenv( 'MW_INSTALL_PATH' );
if ( $IP === false ) {
$IP = __DIR__ . '/../../..';
}
require_once "$IP/maintenance/Maintenance.php";
require_once __DIR__ . '/../includes/Maintenance/Maintenance.php';
/**
* Update the elasticsearch configuration for this index.
*/
class UpdateOneSearchIndexConfig extends Maintenance {
/**
* @var string
*/
private $indexSuffix;
/**
* @var bool Are we going to blow the index away and start from scratch?
*/
private $startOver;
/**
* @var int
*/
private $reindexChunkSize;
/**
* @var string
*/
private $indexBaseName;
/**
* @var string
*/
private $indexIdentifier;
/**
* @var bool
*/
private $reindexAndRemoveOk;
/**
* @var int number of scan slices to use when reindexing
*/
private $reindexSlices;
/**
* @var string language code we're building for
*/
private $langCode;
/**
* @var bool prefix search on any term
*/
private $prefixSearchStartsWithAny;
/**
* @var bool use suggestions on text fields
*/
private $phraseSuggestUseText;
/**
* @var bool print config as it is being checked
*/
private $printDebugCheckConfig;
/**
* @var float how much can the reindexed copy of an index is allowed to deviate from the current
* copy without triggering a reindex failure
*/
private $reindexAcceptableCountDeviation;
/**
* @var array filtered analysis config
*/
private $analysisConfig;
/**
* @var array list of available plugins
*/
private $availablePlugins;
/**
* @var array
*/
protected $bannedPlugins;
/**
* @var bool
*/
protected $optimizeIndexForExperimentalHighlighter;
/**
* @var int
*/
protected $refreshInterval;
/**
* @var string
*/
protected $masterTimeout;
/**
* @var array
*/
private $mapping = [];
/**
* @var array
*/
private $similarityConfig;
/**
* @var bool true if the analysis config can be optimized
*/
private $safeToOptimizeAnalysisConfig;
/** @var bool State flag indicating if we should attempt deleting the index we created */
private $canCleanupCreatedIndex = false;
public function __construct() {
parent::__construct();
$this->addDescription( "Update the configuration or contents of one search index. This always " .
"operates on a single cluster." );
$this->addOption( 'indexSuffix', 'Index to update. Either content or general.', false, true );
$this->addOption( 'indexType', 'BC form of --indexSuffix', false, true );
self::addSharedOptions( $this );
}
/**
* @param Maintenance $maintenance
*/
public static function addSharedOptions( $maintenance ) {
$maintenance->addOption( 'startOver', 'Blow away the identified index and rebuild it with ' .
'no data.' );
$maintenance->addOption( 'indexIdentifier', "Set the identifier of the index to work on. " .
"You'll need this if you have an index in production serving queries and you have " .
"to alter some portion of its configuration that cannot safely be done without " .
"rebuilding it. Once you specify a new indexIdentifier for this wiki you'll have to " .
"run this script with the same identifier each time. Defaults to 'current' which " .
"infers the currently in use identifier. You can also use 'now' to set the identifier " .
"to the current time in seconds which should give you a unique identifier.", false, true );
$maintenance->addOption( 'reindexAndRemoveOk', "If the alias is held by another index then " .
"reindex all documents from that index (via the alias) to this one, swing the " .
"alias to this index, and then remove other index. Updates performed while this" .
"operation is in progress will be queued up in the job queue. Defaults to false." );
$maintenance->addOption( 'reindexSlices', 'Number of slices to use in reindex. Roughly '
. 'equivalent to the level of indexing parallelism. Defaults to number of shards.', false, true );
$maintenance->addOption( 'reindexAcceptableCountDeviation', 'How much can the reindexed ' .
'copy of an index is allowed to deviate from the current copy without triggering a ' .
'reindex failure. Defaults to 5%.', false, true );
$maintenance->addOption( 'reindexChunkSize', 'Documents per shard to reindex in a batch. ' .
'Note when changing the number of shards that the old shard size is used, not the new ' .
'one. If you see many errors submitting documents in bulk but the automatic retry as ' .
'singles works then lower this number. Defaults to 100.', false, true );
$maintenance->addOption( 'baseName', 'What basename to use for all indexes, ' .
'defaults to wiki id', false, true );
$maintenance->addOption( 'debugCheckConfig', 'Print the configuration as it is checked ' .
'to help debug unexpected configuration mismatches.' );
$maintenance->addOption( 'justAllocation', 'Just validate the shard allocation settings. Use ' .
"when you need to apply new cache warmers but want to be sure that you won't apply any other " .
'changes at an inopportune time.' );
$maintenance->addOption( 'fieldsToDelete', 'List of of comma separated field names to delete ' .
'while reindexing documents (defaults to empty)', false, true );
$maintenance->addOption( 'justMapping', 'Just try to update the mapping.' );
$maintenance->addOption( 'ignoreIndexChanged', 'Skip checking if the new index is different ' .
'from the old index.', false, false );
}
public function execute() {
$this->disablePoolCountersAndLogging();
$utils = new ConfigUtils( $this->getConnection()->getClient(), $this );
$this->indexSuffix = $this->getBackCompatOption( 'indexSuffix', 'indexType' );
$this->startOver = $this->getOption( 'startOver', false );
$this->indexBaseName = $this->getOption( 'baseName',
$this->getSearchConfig()->get( SearchConfig::INDEX_BASE_NAME ) );
$this->reindexAndRemoveOk = $this->getOption( 'reindexAndRemoveOk', false );
$this->reindexSlices = $this->getOption( 'reindexSlices', null );
$this->reindexAcceptableCountDeviation = Util::parsePotentialPercent(
$this->getOption( 'reindexAcceptableCountDeviation', '5%' ) );
$this->reindexChunkSize = $this->getOption( 'reindexChunkSize', 100 );
$this->printDebugCheckConfig = $this->getOption( 'debugCheckConfig', false );
$this->langCode = $this->getSearchConfig()->get( "LanguageCode" );
$this->prefixSearchStartsWithAny = $this->getSearchConfig()->get( "CirrusSearchPrefixSearchStartsWithAnyWord" );
$this->phraseSuggestUseText = $this->getSearchConfig()->get( "CirrusSearchPhraseSuggestUseText" );
$this->bannedPlugins = $this->getSearchConfig()->get( "CirrusSearchBannedPlugins" );
$this->optimizeIndexForExperimentalHighlighter = $this->getSearchConfig()
->get( "CirrusSearchOptimizeIndexForExperimentalHighlighter" );
$this->masterTimeout = $this->getSearchConfig()->get( "CirrusSearchMasterTimeout" );
$this->refreshInterval = $this->getSearchConfig()->get( "CirrusSearchRefreshInterval" );
if ( $this->indexSuffix === Connection::ARCHIVE_INDEX_SUFFIX ) {
if ( !$this->getSearchConfig()->get( 'CirrusSearchEnableArchive' ) ) {
$this->output( "Warning: Not allowing {$this->indexSuffix}, archives are disabled\n" );
return true;
}
if ( !$this->getConnection()->getSettings()->isPrivateCluster() ) {
$this->output( "Warning: Not allowing {$this->indexSuffix} on a non-private cluster\n" );
return true;
}
}
$this->initMappingConfigBuilder();
try {
$indexSuffixes = $this->getConnection()->getAllIndexSuffixes( null );
if ( !in_array( $this->indexSuffix, $indexSuffixes ) ) {
$this->fatalError( 'indexSuffix option must be one of ' .
implode( ', ', $indexSuffixes ) );
}
$this->unwrap( $utils->checkElasticsearchVersion() );
$this->availablePlugins = $this->unwrap( $utils->scanAvailablePlugins( $this->bannedPlugins ) );
if ( $this->getOption( 'justAllocation', false ) ) {
$this->validateShardAllocation();
return true;
}
if ( $this->getOption( 'justMapping', false ) ) {
$this->validateMapping();
return true;
}
$this->initAnalysisConfig();
$this->indexIdentifier = $this->unwrap( $utils->pickIndexIdentifierFromOption(
$this->getOption( 'indexIdentifier', 'current' ), $this->getIndexAliasName() ) );
// At this point everything is initialized and we start to mutate the cluster
// This creates the index if needed, such as when --indexIdentifier=now is provided.
$this->validateIndex();
// Compares analyzers against expected. If the index is newly
// created this should do nothing. If the index was not created
// this may fail the build if it needs to be recreated.
$this->validateAnalyzers();
// Compares mapping against expected. Same behavior as analyzers,
// but some mapping changes can be applied to a live index.
$this->validateMapping();
// If we have a replacement index, check that it is actually different
// from the live index in some way. If they are the same then do nothing.
if ( !$this->validateIndexHasChanged() ) {
$this->cleanupCreatedIndex( "Cleaning up unnecessary index" );
// Orchestration needs some way to know that we are refusing to
// create the index. Simplest way is to signal with an arbitrary
// exit code.
$this->fatalError( "Use --ignoreIndexChanged to do it anyways", 10 );
}
// Makes sure the index is part of the production aliases. This will
// reindex into the new index if necessary, promote the new index,
// and delete the old index.
$this->validateAlias();
// Flag the index version information in metadata
$this->updateVersions();
} catch ( \Elastica\Exception\Connection\HttpException $e ) {
$message = $e->getMessage();
$this->output( "\nUnexpected Elasticsearch failure.\n" );
$this->fatalError( "Http error communicating with Elasticsearch: $message.\n" );
} catch ( \Elastica\Exception\ExceptionInterface $e ) {
$type = get_class( $e );
$message = ElasticaErrorHandler::extractMessage( $e );
/** @suppress PhanUndeclaredMethod ExceptionInterface has no methods */
$trace = $e->getTraceAsString();
$this->output( "\nUnexpected Elasticsearch failure.\n" );
$this->fatalError( "Elasticsearch failed in an unexpected way. " .
"This is always a bug in CirrusSearch.\n" .
"Error type: $type\n" .
"Message: $message\n" .
"Trace:\n" . $trace );
}
return true;
}
/**
* @suppress PhanUndeclaredMethod runChild technically returns a
* \Maintenance instance but only \CirrusSearch\Maintenance\Maintenance
* classes have the done method. Just allow it since we know what type of
* maint class is being created
*/
private function updateVersions() {
$child = $this->runChild( Metastore::class );
$child->done();
$child->loadParamsAndArgs(
null,
array_merge( $this->parameters->getOptions(), [
'index-version-basename' => $this->indexBaseName,
'update-index-version' => true,
] ),
$this->parameters->getArgs()
);
$child->execute();
$child->done();
}
private function validateIndex() {
if ( $this->startOver ) {
$this->createIndex( true, "Blowing away index to start over...\n" );
} elseif ( !$this->getIndex()->exists() ) {
$this->createIndex( false, "Creating index...\n" );
}
$this->validateIndexSettings();
}
/**
* @param bool $rebuild
* @param string $msg
*/
private function createIndex( $rebuild, $msg ) {
$this->canCleanupCreatedIndex = true;
$index = $this->getIndex();
$indexCreator = new \CirrusSearch\Maintenance\IndexCreator(
$index,
new ConfigUtils( $index->getClient(), $this ),
$this->analysisConfig,
$this->mapping,
$this->similarityConfig,
);
$this->outputIndented( $msg );
$this->unwrap( $indexCreator->createIndex(
$rebuild,
$this->getMaxShardsPerNode(),
$this->getShardCount(),
$this->getReplicaCount(),
$this->refreshInterval,
$this->getMergeSettings(),
$this->getSearchConfig()->get( "CirrusSearchExtraIndexSettings" )
) );
$this->outputIndented( "Index created.\n" );
}
/**
* @return \CirrusSearch\Maintenance\Validators\Validator[]
*/
private function getIndexSettingsValidators() {
$validators = [];
$validators[] = new \CirrusSearch\Maintenance\Validators\NumberOfShardsValidator(
$this->getIndex(), $this->getShardCount(), $this );
$validators[] = new \CirrusSearch\Maintenance\Validators\ReplicaRangeValidator(
$this->getIndex(), $this->getReplicaCount(), $this );
$validators[] = $this->getShardAllocationValidator();
$validators[] = new \CirrusSearch\Maintenance\Validators\MaxShardsPerNodeValidator(
$this->getIndex(), $this->getMaxShardsPerNode(), $this );
return $validators;
}
private function validateIndexSettings() {
$validators = $this->getIndexSettingsValidators();
foreach ( $validators as $validator ) {
$this->unwrap( $validator->validate() );
}
}
private function validateAnalyzers() {
$validator = new \CirrusSearch\Maintenance\Validators\AnalyzersValidator(
$this->getIndex(), $this->analysisConfig, $this );
$validator->printDebugCheckConfig( $this->printDebugCheckConfig );
$this->unwrap( $validator->validate() );
}
private function validateMapping() {
$validator = new MappingValidator(
$this->getIndex(),
$this->masterTimeout,
$this->optimizeIndexForExperimentalHighlighter,
$this->availablePlugins,
$this->mapping,
$this
);
$validator->printDebugCheckConfig( $this->printDebugCheckConfig );
$this->unwrap( $validator->validate() );
}
private function validateAlias() {
$this->outputIndented( "Validating aliases...\n" );
// Since validate the specific alias first as that can cause reindexing
// and we want the all index to stay with the old index during reindexing
$this->validateSpecificAlias();
// At this point the index is live and under no circumstances should it be
// automatically deleted.
$this->canCleanupCreatedIndex = false;
if ( $this->indexSuffix !== Connection::ARCHIVE_INDEX_SUFFIX ) {
// Do not add the archive index to the global alias
$this->validateAllAlias();
}
}
/**
* Validate the alias that is just for this index's type.
*/
private function validateSpecificAlias() {
$connection = $this->getConnection();
$fieldsToCleanup = array_filter( explode( ',', $this->getOption( 'fieldsToDelete', '' ) ) );
$fieldsToCleanup = array_merge( $fieldsToCleanup, $this->getSearchConfig()->get( "CirrusSearchIndexFieldsToCleanup" ) );
$reindexer = new Reindexer(
$this->getSearchConfig(),
$connection,
$connection,
$this->getIndex(),
$this->getOldIndex(),
$this,
$fieldsToCleanup
);
$validator = new \CirrusSearch\Maintenance\Validators\SpecificAliasValidator(
$this->getConnection()->getClient(),
$this->getIndexAliasName(),
$this->getSpecificIndexName(),
$this->startOver,
$reindexer,
[
$this->reindexSlices,
$this->reindexChunkSize,
$this->reindexAcceptableCountDeviation
],
$this->getIndexSettingsValidators(),
$this->reindexAndRemoveOk,
$this
);
$this->unwrap( $validator->validate() );
}
public function validateAllAlias() {
$validator = new \CirrusSearch\Maintenance\Validators\IndexAllAliasValidator(
$this->getConnection()->getClient(),
$this->getIndexName(),
$this->getSpecificIndexName(),
$this->startOver,
$this->getIndexAliasName(),
$this
);
$this->unwrap( $validator->validate() );
}
public function validateIndexHasChanged(): bool {
if ( $this->getOption( 'ignoreIndexChanged' ) ) {
return true;
}
$validator = new \CirrusSearch\Maintenance\Validators\IndexHasChangedValidator(
$this->getConnection()->getClient(),
$this->getOldIndex(),
$this->getIndex(),
$this,
);
return $this->unwrap( $validator->validate() );
}
/**
* @return \CirrusSearch\Maintenance\Validators\Validator
*/
private function getShardAllocationValidator() {
return new \CirrusSearch\Maintenance\Validators\ShardAllocationValidator(
$this->getIndex(), $this->getSearchConfig()->get( "CirrusSearchIndexAllocation" ), $this );
}
protected function validateShardAllocation() {
$this->unwrap( $this->getShardAllocationValidator()->validate() );
}
/**
* @param string $langCode
* @param array $availablePlugins
* @return AnalysisConfigBuilder
*/
private function pickAnalyzer( $langCode, array $availablePlugins = [] ) {
$analysisConfigBuilder = new \CirrusSearch\Maintenance\AnalysisConfigBuilder(
$langCode, $availablePlugins );
$this->outputIndented( 'Picking analyzer...' .
$analysisConfigBuilder->getDefaultTextAnalyzerType( $langCode ) .
"\n" );
return $analysisConfigBuilder;
}
/**
* @throws ConfigException
*/
protected function initMappingConfigBuilder() {
$configFlags = 0;
if ( $this->prefixSearchStartsWithAny ) {
$configFlags |= MappingConfigBuilder::PREFIX_START_WITH_ANY;
}
if ( $this->phraseSuggestUseText ) {
$configFlags |= MappingConfigBuilder::PHRASE_SUGGEST_USE_TEXT;
}
switch ( $this->indexSuffix ) {
case Connection::ARCHIVE_DOC_TYPE:
$mappingConfigBuilder = new ArchiveMappingConfigBuilder( $this->optimizeIndexForExperimentalHighlighter, $configFlags );
break;
default:
$mappingConfigBuilder = new MappingConfigBuilder( $this->optimizeIndexForExperimentalHighlighter, $configFlags );
}
$this->mapping = $mappingConfigBuilder->buildConfig();
$this->safeToOptimizeAnalysisConfig = $mappingConfigBuilder->canOptimizeAnalysisConfig();
}
/**
* @return \Elastica\Index being updated
*/
public function getIndex() {
return $this->getConnection()->getIndex(
$this->indexBaseName, $this->indexSuffix, $this->indexIdentifier );
}
/**
* @return string name of the index being updated
*/
protected function getSpecificIndexName() {
return $this->getConnection()->getIndexName(
$this->indexBaseName, $this->indexSuffix, $this->indexIdentifier );
}
/**
* @return string name of the index type being updated
*/
protected function getIndexAliasName() {
return $this->getConnection()->getIndexName( $this->indexBaseName, $this->indexSuffix );
}
/**
* @return string
*/
protected function getIndexName() {
return $this->getConnection()->getIndexName( $this->indexBaseName );
}
/**
* @return \Elastica\Index
*/
protected function getOldIndex() {
return $this->getConnection()->getIndex( $this->indexBaseName, $this->indexSuffix );
}
/**
* Get the merge settings for this index.
* @return array
*/
private function getMergeSettings() {
$mergeSettings = $this->getSearchConfig()->get( "CirrusSearchMergeSettings" );
return $mergeSettings[$this->indexSuffix]
// If there aren't configured merge settings for this index type
// default to the content type.
?? $mergeSettings['content']
// It's also fine to not specify merge settings.
?? [];
}
/**
* @return int Number of shards this index should have
*/
private function getShardCount() {
return $this->getConnection()->getSettings()->getShardCount( $this->indexSuffix );
}
/**
* @return string Number of replicas this index should have. May be a range such as '0-2'
*/
private function getReplicaCount() {
return $this->getConnection()->getSettings()->getReplicaCount( $this->indexSuffix );
}
/**
* @return int Maximum number of shards that can be allocated on a single elasticsearch
* node. -1 for unlimited.
*/
private function getMaxShardsPerNode() {
return $this->getConnection()->getSettings()->getMaxShardsPerNode( $this->indexSuffix );
}
private function initAnalysisConfig() {
$analysisConfigBuilder = $this->pickAnalyzer( $this->langCode, $this->availablePlugins );
$this->analysisConfig = $analysisConfigBuilder->buildConfig();
if ( $this->safeToOptimizeAnalysisConfig ) {
$filter = new AnalysisFilter();
$deduplicate = $this->getSearchConfig()->get( 'CirrusSearchDeduplicateAnalysis' );
// A bit adhoc, this is the list of analyzers that should not be renamed, because
// they are referenced at query time.
$protected = [ 'token_reverse' ];
[ $this->analysisConfig, $this->mapping ] = $filter
->filterAnalysis( $this->analysisConfig, $this->mapping, $deduplicate, $protected );
}
$this->similarityConfig = $analysisConfigBuilder->buildSimilarityConfig();
}
private function cleanupCreatedIndex( $msg ) {
if ( $this->canCleanupCreatedIndex && $this->getIndex()->exists() ) {
$utils = new ConfigUtils( $this->getConnection()->getClient(), $this );
$indexName = $this->getSpecificIndexName();
$status = $utils->isIndexLive( $indexName );
if ( !$status->isGood() ) {
$this->output( (string)$status );
} elseif ( $status->getValue() === false ) {
$this->output( "$msg $indexName\n" );
$this->getIndex()->delete();
}
}
}
/**
* Output a message and terminate the current script.
*
* @param string $msg Error Message
* @param int $exitCode PHP exit status. Should be in range 1-254
* @return never
*/
protected function fatalError( $msg, $exitCode = 1 ) {
try {
$this->cleanupCreatedIndex( "Cleaning up incomplete index" );
} catch ( \Elastica\Exception\ExceptionInterface $e ) {
$this->output( "Exception thrown while cleaning up created index: $e\n" );
} finally {
parent::fatalError( $msg, $exitCode );
}
}
}
$maintClass = UpdateOneSearchIndexConfig::class;
require_once RUN_MAINTENANCE_IF_MAIN;