Smile-SA/elasticsuite

View on GitHub
src/module-elasticsuite-core/Search/Request/Query/Fulltext/QueryBuilder.php

Summary

Maintainability
C
7 hrs
Test Coverage
<?php
/**
 * DISCLAIMER
 *
 * Do not edit or add to this file if you wish to upgrade Smile ElasticSuite to newer
 * versions in the future.
 *
 * @category  Smile
 * @package   Smile\ElasticsuiteCore
 * @author    Aurelien FOUCRET <aurelien.foucret@smile.fr>
 * @copyright 2020 Smile
 * @license   Open Software License ("OSL") v. 3.0
 */

namespace Smile\ElasticsuiteCore\Search\Request\Query\Fulltext;

use Smile\ElasticsuiteCore\Model\Search\Request\RelevanceConfig\Reader\Container;
use Smile\ElasticsuiteCore\Search\Request\Query\SpanQueryInterface;
use Smile\ElasticsuiteCore\Search\Request\QueryInterface;
use Smile\ElasticsuiteCore\Api\Index\MappingInterface;
use Smile\ElasticsuiteCore\Api\Index\Mapping\FieldInterface;
use Smile\ElasticsuiteCore\Search\Request\Query\QueryFactory;
use Smile\ElasticsuiteCore\Api\Search\Request\ContainerConfigurationInterface;
use Smile\ElasticsuiteCore\Api\Search\SpellcheckerInterface;
use Smile\ElasticsuiteCore\Api\Index\Mapping\FieldFilterInterface;
use Smile\ElasticsuiteCore\Api\Search\Request\Container\RelevanceConfiguration\FuzzinessConfigurationInterface;

/**
 * Prepare a fulltext search query.
 *
 * @category Smile
 * @package  Smile\ElasticsuiteCore
 * @author   Aurelien FOUCRET <aurelien.foucret@smile.fr>
 */
class QueryBuilder
{
    /**
     * @var QueryFactory
     */
    private $queryFactory;

    /**
     *
     * @var FieldFilterInterface[]
     */
    private $fieldFilters;

    /**
     * Constructor.
     *
     * @param QueryFactory           $queryFactory Query factory (used to build subqueries.
     * @param FieldFilterInterface[] $fieldFilters Field filters models.
     */
    public function __construct(QueryFactory $queryFactory, array $fieldFilters = [])
    {
        $this->queryFactory = $queryFactory;
        $this->fieldFilters = $fieldFilters;
    }

    /**
     * Create the fulltext search query.
     *
     * @param ContainerConfigurationInterface $containerConfig Search request container configuration.
     * @param string                          $queryText       The text query.
     * @param string                          $spellingType    The type of spellchecked applied.
     * @param float                           $boost           Boost of the created query.
     * @param int                             $depth           Call depth of the create method. Can be used to avoid/prevent cycles.
     *
     * @return QueryInterface
     */
    public function create(ContainerConfigurationInterface $containerConfig, $queryText, $spellingType, $boost = 1, $depth = 0)
    {
        $query = null;

        $fuzzySpellingTypes = [SpellcheckerInterface::SPELLING_TYPE_FUZZY, SpellcheckerInterface::SPELLING_TYPE_MOST_FUZZY];

        if (is_array($queryText)) {
            $queries = [];
            foreach ($queryText as $currentQueryText) {
                $queries[] = $this->create($containerConfig, $currentQueryText, $spellingType, $boost, $depth + 1);
            }
            $query = $this->queryFactory->create(QueryInterface::TYPE_BOOL, ['should' => $queries, 'boost' => $boost]);
        } elseif ($spellingType == SpellcheckerInterface::SPELLING_TYPE_PURE_STOPWORDS) {
            $query = $this->getPureStopwordsQuery($containerConfig, $queryText, $boost);
            $query->setName('PURE_STOPWORDS');
        } elseif (in_array($spellingType, $fuzzySpellingTypes)) {
            $query = $this->getSpellcheckedQuery($containerConfig, $queryText, $spellingType, $boost);
            if ($query !== null) {
                $query->setName('SPELLCHECK');
            }
        }

        if ($query === null) {
            $queryParams = [
                'query'  => $this->getWeightedSearchQuery($containerConfig, $queryText),
                'filter' => $this->getCutoffFrequencyQuery($containerConfig, $queryText),
                'boost'  => $boost,
            ];
            $query = $this->queryFactory->create(QueryInterface::TYPE_FILTER, $queryParams);
            $query->setName('EXACT');

            $relevanceConfig = $containerConfig->getRelevanceConfig();
            if ($relevanceConfig->getSpanMatchBoost()) {
                $spanQuery = $this->getSpanQuery($containerConfig, $queryText, $relevanceConfig->getSpanMatchBoost());
                if ($spanQuery !== null) {
                    $spanQuery->setName('SPAN');
                    $queryParams = [
                        'must'      => [$query],
                        'should'    => [$spanQuery],
                        'minimumShouldMatch' => 0,
                    ];
                    $query = $this->queryFactory->create(QueryInterface::TYPE_BOOL, $queryParams);
                }
            }
        }

        return $query;
    }

    /**
     * Provides a common search query for the searched text.
     *
     * @param ContainerConfigurationInterface $containerConfig Search request container configuration.
     * @param string                          $queryText       The text query.
     *
     * @return QueryInterface
     */
    private function getCutoffFrequencyQuery(ContainerConfigurationInterface $containerConfig, $queryText)
    {
        $relevanceConfig = $containerConfig->getRelevanceConfig();
        $fields          = array_fill_keys([MappingInterface::DEFAULT_SEARCH_FIELD], 1);

        if ($containerConfig->getRelevanceConfig()->isUsingDefaultAnalyzerInExactMatchFilter()) {
            $nonStandardSearchableFieldFilter = $this->fieldFilters['nonStandardSearchableFieldFilter'];

            $fields = $fields + $this->getWeightedFields(
                $containerConfig,
                null,
                $nonStandardSearchableFieldFilter,
                MappingInterface::DEFAULT_SEARCH_FIELD
            );
        }

        if ($containerConfig->getRelevanceConfig()->isUsingReferenceInExactMatchFilter()) {
            $fields += array_fill_keys(
                [MappingInterface::DEFAULT_SEARCH_FIELD, MappingInterface::DEFAULT_REFERENCE_FIELD . ".reference"],
                1
            );
        }

        $queryParams = [
            'fields'             => array_fill_keys(array_keys($fields), 1),
            'queryText'          => $queryText,
            'cutoffFrequency'    => $relevanceConfig->getCutOffFrequency(),
            'minimumShouldMatch' => $relevanceConfig->getMinimumShouldMatch(),
        ];

        return $this->queryFactory->create(QueryInterface::TYPE_MULTIMATCH, $queryParams);
    }

    /**
     * Provides a weighted search query (multi match) using mapping field configuration.
     *
     * @param ContainerConfigurationInterface $containerConfig Search request container configuration.
     * @param string                          $queryText       The text query.
     *
     * @return QueryInterface
     */
    private function getWeightedSearchQuery(ContainerConfigurationInterface $containerConfig, $queryText)
    {
        $relevanceConfig       = $containerConfig->getRelevanceConfig();
        $phraseMatchBoost      = $relevanceConfig->getPhraseMatchBoost();
        $defaultSearchField    = MappingInterface::DEFAULT_SEARCH_FIELD;
        $searchableFieldFilter = $this->fieldFilters['searchableFieldFilter'];
        $sortableAnalyzer      = FieldInterface::ANALYZER_SORTABLE;
        $phraseAnalyzer        = FieldInterface::ANALYZER_WHITESPACE;
        $sortableMatchBoost    = 2 * $phraseMatchBoost;

        if (is_string($queryText) && str_word_count($queryText) > 1) {
            $phraseAnalyzer = FieldInterface::ANALYZER_SHINGLE;
        } elseif ($relevanceConfig->areExactMatchSingleTermBoostsCustomized()) {
            $phraseMatchBoost = $relevanceConfig->getExactMatchSingleTermPhraseMatchBoost();
            $sortableMatchBoost = $relevanceConfig->getExactMatchSingleTermSortableBoost();
        }

        $searchFields = array_merge(
            $this->getWeightedFields($containerConfig, null, $searchableFieldFilter, $defaultSearchField),
            $this->getWeightedFields($containerConfig, $phraseAnalyzer, $searchableFieldFilter, $defaultSearchField, $phraseMatchBoost),
            $this->getWeightedFields($containerConfig, $sortableAnalyzer, $searchableFieldFilter, null, $sortableMatchBoost)
        );

        $queryParams = [
            'fields'             => $searchFields,
            'queryText'          => $queryText,
            'minimumShouldMatch' => 1,
            'cutoffFrequency'    => $relevanceConfig->getCutOffFrequency(),
            'tieBreaker'         => $relevanceConfig->getTieBreaker(),
        ];

        return $this->queryFactory->create(QueryInterface::TYPE_MULTIMATCH, $queryParams);
    }

    /**
     * Build a query when the fulltext search query contains only stopwords.
     *
     * @param ContainerConfigurationInterface $containerConfig Search request container configuration.
     * @param string                          $queryText       The text query.
     * @param float                           $boost           Boost of the created query.
     *
     * @return QueryInterface
     */
    private function getPureStopwordsQuery(ContainerConfigurationInterface $containerConfig, $queryText, $boost)
    {
        $relevanceConfig = $containerConfig->getRelevanceConfig();

        $analyzer = FieldInterface::ANALYZER_WHITESPACE;
        if (is_string($queryText) && str_word_count($queryText) > 1) {
            $analyzer = FieldInterface::ANALYZER_SHINGLE;
        }

        $defaultSearchField    = MappingInterface::DEFAULT_SEARCH_FIELD;
        $searchableFieldFilter = $this->fieldFilters['searchableFieldFilter'];

        $searchFields = $this->getWeightedFields($containerConfig, $analyzer, $searchableFieldFilter, $defaultSearchField);

        $queryParams = [
            'fields'             => $searchFields,
            'queryText'          => $queryText,
            'minimumShouldMatch' => "100%",
            'tieBreaker'         => $relevanceConfig->getTieBreaker(),
            'boost'              => $boost,
        ];

        return $this->queryFactory->create(QueryInterface::TYPE_MULTIMATCH, $queryParams);
    }

    /**
     * Spellchecked query building.
     *
     * @param ContainerConfigurationInterface $containerConfig Search request container configuration.
     * @param string                          $queryText       The text query.
     * @param string                          $spellingType    The type of spellchecked applied.
     * @param float                           $boost           Boost of the created query.
     *
     * @return QueryInterface|null
     */
    private function getSpellcheckedQuery(ContainerConfigurationInterface $containerConfig, $queryText, $spellingType, $boost)
    {
        $query = null;

        $relevanceConfig = $containerConfig->getRelevanceConfig();
        $queryClauses = [];

        if ($relevanceConfig->isFuzzinessEnabled()) {
            $queryClauses[] = $this->getFuzzyQuery($containerConfig, $queryText)->setName('FUZZY');
        }

        if ($relevanceConfig->isPhoneticSearchEnabled()) {
            $queryClauses[] = $this->getPhoneticQuery($containerConfig, $queryText)->setName('PHONETIC');
        }

        if (!empty($queryClauses)) {
            $queryParams = ['should' => $queryClauses, 'boost' => $boost];

            if ($spellingType == SpellcheckerInterface::SPELLING_TYPE_MOST_FUZZY) {
                $queryParams['must'] = [$this->getWeightedSearchQuery($containerConfig, $queryText)];
            }

            $query = $this->queryFactory->create(QueryInterface::TYPE_BOOL, $queryParams);
        }

        return $query;
    }

    /**
     * Fuzzy query part.
     *
     * @param ContainerConfigurationInterface $containerConfig Search request container configuration.
     * @param string                          $queryText       The text query.
     *
     * @return QueryInterface
     */
    private function getFuzzyQuery(ContainerConfigurationInterface $containerConfig, $queryText)
    {
        $relevanceConfig    = $containerConfig->getRelevanceConfig();
        $phraseMatchBoost = $relevanceConfig->getPhraseMatchBoost();

        $defaultSearchField = MappingInterface::DEFAULT_SPELLING_FIELD;

        $standardAnalyzer = FieldInterface::ANALYZER_WHITESPACE;
        $phraseAnalyzer   = FieldInterface::ANALYZER_WHITESPACE;
        if (is_string($queryText) && str_word_count($queryText) > 1) {
            $phraseAnalyzer = FieldInterface::ANALYZER_SHINGLE;
        }

        $fuzzyFieldFilter = $this->fieldFilters['fuzzyFieldFilter'];
        $nonStandardFuzzyFieldFilter = $this->fieldFilters['nonStandardFuzzyFieldFilter'];

        $searchFields = array_merge(
            $this->getWeightedFields($containerConfig, $standardAnalyzer, $fuzzyFieldFilter, $defaultSearchField),
            $this->getWeightedFields($containerConfig, $phraseAnalyzer, $fuzzyFieldFilter, $defaultSearchField, $phraseMatchBoost),
            // Allow fuzzy query to contain fields using for fuzzy search with their default analyzer.
            // Same logic as defined in getWeightedSearchQuery().
            // This will automatically include sku.reference and any other fields having defaultSearchAnalyzer.
            $this->getWeightedFields($containerConfig, null, $nonStandardFuzzyFieldFilter, $defaultSearchField),
        );

        $queryParams = [
            'fields'             => $searchFields,
            'queryText'          => $queryText,
            'minimumShouldMatch' => $relevanceConfig->getMinimumShouldMatch(),
            'tieBreaker'         => $relevanceConfig->getTieBreaker(),
            'fuzzinessConfig'    => $relevanceConfig->getFuzzinessConfiguration(),
            'cutoffFrequency'    => $relevanceConfig->getCutoffFrequency(),
        ];

        return $this->queryFactory->create(QueryInterface::TYPE_MULTIMATCH, $queryParams);
    }

    /**
     * Phonetic query part.
     *
     * @param ContainerConfigurationInterface $containerConfig Search request container configuration.
     * @param string                          $queryText       The text query.
     *
     * @return QueryInterface
     */
    private function getPhoneticQuery(ContainerConfigurationInterface $containerConfig, $queryText)
    {
        $relevanceConfig    = $containerConfig->getRelevanceConfig();
        $analyzer           = FieldInterface::ANALYZER_PHONETIC;
        $defaultSearchField = MappingInterface::DEFAULT_SPELLING_FIELD;
        $fuzzyFieldFilter   = $this->fieldFilters['fuzzyFieldFilter'];
        $minimumShouldMatch = $relevanceConfig->getMinimumShouldMatch();
        if ($relevanceConfig->getFuzzinessConfiguration() instanceof FuzzinessConfigurationInterface) {
            $minimumShouldMatch = $relevanceConfig->getFuzzinessConfiguration()->getMinimumShouldMatch();
        }

        $searchFields = $this->getWeightedFields($containerConfig, $analyzer, $fuzzyFieldFilter, $defaultSearchField);

        $queryParams = [
            'fields'             => $searchFields,
            'queryText'          => $queryText,
            'minimumShouldMatch' => $minimumShouldMatch,
            'tieBreaker'         => $relevanceConfig->getTieBreaker(),
            'cutoffFrequency'    => $relevanceConfig->getCutoffFrequency(),
        ];

        return $this->queryFactory->create(QueryInterface::TYPE_MULTIMATCH, $queryParams);
    }


    /**
     * Build an array of weighted fields to be searched with the ability to apply a filter callback method and a default field.
     *
     * @param ContainerConfigurationInterface $containerConfig Search request container config.
     * @param string                          $analyzer        Target analyzer.
     * @param FieldFilterInterface            $fieldFilter     Field filter.
     * @param string|null                     $defaultField    Default search field.
     * @param integer                         $boost           Additional boost applied to the fields (multiplicative).
     *
     * @return array
     */
    private function getWeightedFields(
        ContainerConfigurationInterface $containerConfig,
        $analyzer = null,
        FieldFilterInterface $fieldFilter = null,
        $defaultField = null,
        $boost = 1
    ) {

        $mapping = $containerConfig->getMapping();

        return $mapping->getWeightedSearchProperties($analyzer, $defaultField, $boost, $fieldFilter);
    }

    /**
     * Build a span query to raise score of fields beginning by the query text.
     *
     * @param ContainerConfigurationInterface $containerConfig The container configuration
     * @param string                          $queryText       The query text
     * @param int                             $boost           The boost applied to the span query
     *
     * @return QueryInterface|null
     */
    private function getSpanQuery(ContainerConfigurationInterface $containerConfig, $queryText, $boost)
    {
        $query = null;
        $terms = explode(' ', $queryText);

        $relevanceConfig  = $containerConfig->getRelevanceConfig();
        $spanSize         = $relevanceConfig->getSpanSize();

        if ((int) $spanSize === 0) {
            return $query;
        }

        $terms            = array_slice($terms, 0, $spanSize);
        $wordCount        = count($terms);
        $spanFieldsFilter = $this->fieldFilters['spannableFieldFilter'];
        $spanFields       = $containerConfig->getMapping()->getFields();
        $spanFields       = array_filter($spanFields, [$spanFieldsFilter, 'filterField']);
        $spanQueryParams  = ['boost' => $boost, 'end' => $wordCount];
        $spanQueryType    = SpanQueryInterface::TYPE_SPAN_FIRST;

        if (count($spanFields) > 0) {
            $queries = [];
            foreach ($spanFields as $field) {
                $clauses = [];
                foreach ($terms as $term) {
                    $clauses[] = $this->queryFactory->create(
                        SpanQueryInterface::TYPE_SPAN_TERM,
                        [
                            'field' => $field->getMappingProperty(FieldInterface::ANALYZER_WHITESPACE) ?? $field->getName(),
                            'value' => strtolower($term),
                        ]
                    );
                }

                $spanQueryParams['match'] = $this->queryFactory->create(
                    SpanQueryInterface::TYPE_SPAN_NEAR,
                    [
                        'clauses' => $clauses,
                        'slop'    => 0,
                        'inOrder' => true,
                    ]
                );

                $queries[] = $this->queryFactory->create($spanQueryType, $spanQueryParams);
            }

            $query = current($queries);
            if (count($queries) > 1) {
                $query = $this->queryFactory->create(QueryInterface::TYPE_BOOL, ['should' => $queries]);
            }
        }

        return $query;
    }
}