profiles/FullTextQueryBuilderProfiles.config.php
<?php
namespace CirrusSearch;
/**
* CirrusSearch - List of FullTextQueryBuilderProfiles used to generate an elasticsearch
* query by parsing user input.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*/
return [
// default query builder, based on top of QueryString
'default' => [
'builder_class' => Query\FullTextQueryStringQueryBuilder::class,
'settings' => [],
],
// fulltext query based on simple match queries suited to with browser tests
// Not necessarily good for real world wikis
'browser_tests' => [
'builder_class' => Query\FullTextSimpleMatchQueryBuilder::class,
// Adjusted according to tests/browser/features/relevancy_api.feature
// and a fresh index (no deletes) and bm25 defaults for all fields
// title > redirects > category > heading > opening > text > aux
// These settings might not be ideal with a real index and real word norms
'settings' => [
'default_min_should_match' => '1',
'default_query_type' => 'most_fields',
'default_stem_weight' => 0.3,
'fields' => [
// very high title weight for features/create_new_page.feature:23
// Make sure that Catapult wins Catapult/adsf despite not having
// Catapult in the content
'title' => 2.3,
'redirect.title' => [
'boost' => 2.0,
'in_dismax' => 'redirects_or_shingles'
],
// Shingles on title+redirect, suggest is
// currently analyzed only with plain so we
// include them in a dismax with redirects
'suggest' => [
'is_plain' => true,
'boost' => 1.05,
'in_dismax' => 'redirects_or_shingles',
],
// category should win over heading/opening
'category' => 1.8,
'heading' => 1.3,
// Pack text and opening_text in a dismax query
// this is to avoid scoring twice the same words
'text' => [
'boost' => 0.4,
'in_dismax' => 'text_and_opening_text',
],
'opening_text' => [
'boost' => 0.5,
'in_dismax' => 'text_and_opening_text',
],
'auxiliary_text' => 0.2,
'file_text' => 0.2,
],
'phrase_rescore_fields' => [
// Low boost to counter high phrase rescore boost
'text' => 0.14,
// higher on text.plain for tests/browser/features/relevancy_api.feature:106
'text.plain' => 0.2,
],
'dismax_settings' => [
// Use a tie breaker, avg field length is so
// low for opening_text that we would have to
// set an insanely high boost to make sure it
// wins text in the dismax. Instead we use a
// tie breaker that will add 20% of the score
// of the opening_text clauses
'text_and_opening_text' => [
'tie_breaker' => 0.2,
],
],
]
],
// Per field builder tuned for en.wikipedia.org
'perfield_builder' => [
'builder_class' => \CirrusSearch\Query\FullTextSimpleMatchQueryBuilder::class,
'settings' => [
'default_min_should_match' => '1',
'default_query_type' => 'most_fields',
'default_stem_weight' => 3.0,
'fields' => [
'title' => 0.3,
'redirect.title' => [
'boost' => 0.27,
'in_dismax' => 'redirects_or_shingles'
],
'suggest' => [
'is_plain' => true,
'boost' => 0.20,
'in_dismax' => 'redirects_or_shingles',
],
'category' => 0.05,
'heading' => 0.05,
'text' => [
'boost' => 0.6,
'in_dismax' => 'text_and_opening_text',
],
'opening_text' => [
'boost' => 0.5,
'in_dismax' => 'text_and_opening_text',
],
'auxiliary_text' => 0.05,
'file_text' => 0.5,
],
'phrase_rescore_fields' => [
// very low (don't forget it's multiplied by 10 by default)
// Use the all field to avoid loading positions on another field,
// score is roughly the same when used on text
'all' => 0.06,
'all.plain' => 0.1,
],
],
],
// Per field builder tuned for searching crossproject where a strong
// title match is required
'perfield_builder_title_filter' => [
'builder_class' => \CirrusSearch\Query\FullTextSimpleMatchQueryBuilder::class,
'settings' => [
'default_min_should_match' => '1',
'default_query_type' => 'most_fields',
'default_stem_weight' => 3.0,
'filter' => [
// Similar to the default filter (all terms must match
// in the content) + additional contraint on title/redirect
// which can be relaxed with minimum_should_match (defaults
// to 3<80%)
'type' => 'constrain_title',
'settings' => [
'minimum_should_match' => '3<80%'
],
],
'fields' => [
'title' => 0.3,
'redirect.title' => [
'boost' => 0.27,
'in_dismax' => 'redirects_or_shingles'
],
'suggest' => [
'is_plain' => true,
'boost' => 0.20,
'in_dismax' => 'redirects_or_shingles',
],
'category' => 0.05,
'heading' => 0.05,
'text' => [
'boost' => 0.6,
'in_dismax' => 'text_and_opening_text',
],
'opening_text' => [
'boost' => 0.5,
'in_dismax' => 'text_and_opening_text',
],
'auxiliary_text' => 0.05,
'file_text' => 0.5,
],
'phrase_rescore_fields' => [
// very low (don't forget it's multiplied by 10 by default)
// Use the all field to avoid loading positions on another field,
// score is roughly the same when used on text
'all' => 0.06,
'all.plain' => 0.1,
],
],
],
];