Smile-SA/elasticsuite

View on GitHub
src/module-elasticsuite-thesaurus/Plugin/QueryRewrite.php

Summary

Maintainability
A
1 hr
Test Coverage
<?php
/**
 * DISCLAIMER
 *
 * Do not edit or add to this file if you wish to upgrade Smile ElasticSuite to newer
 * versions in the future.
 *
 * @category  Smile
 * @package   Smile\ElasticsuiteThesaurus
 * @author    Aurelien FOUCRET <aurelien.foucret@smile.fr>
 * @copyright 2020 Smile
 * @license   Open Software License ("OSL") v. 3.0
 */

namespace Smile\ElasticsuiteThesaurus\Plugin;

use Smile\ElasticsuiteCore\Search\Request\Query\Fulltext\QueryBuilder;
use Smile\ElasticsuiteCore\Api\Search\Request\ContainerConfigurationInterface;
use Smile\ElasticsuiteCore\Search\Request\Query\QueryFactory;
use Smile\ElasticsuiteThesaurus\Config\ThesaurusConfig;
use Smile\ElasticsuiteThesaurus\Config\ThesaurusConfigFactory;
use Smile\ElasticsuiteThesaurus\Model\Index;
use Smile\ElasticsuiteCore\Api\Search\SpellcheckerInterface;
use Smile\ElasticsuiteCore\Search\Request\QueryInterface;

/**
 * Plugin that handle query rewriting (synonym substitution) during fulltext query building phase.
 *
 * @category Smile
 * @package  Smile\ElasticsuiteThesaurus
 * @author   Aurelien FOUCRET <aurelien.foucret@smile.fr>
 */
class QueryRewrite
{
    /**
     * @var QueryFactory
     */
    private $queryFactory;

    /**
     * @var ThesaurusConfigFactory
     */
    private $thesaurusConfigFactory;

    /**
     * @var Index
     */
    private $index;

    /**
     * @var array
     */
    private $rewritesCache = [];

    /**
     * Constructor.
     *
     * @param QueryFactory           $queryFactory           Search request query factory.
     * @param ThesaurusConfigFactory $thesaurusConfigFactory Thesaurus configuration factory.
     * @param Index                  $index                  Synonym index.
     */
    public function __construct(
        QueryFactory $queryFactory,
        ThesaurusConfigFactory $thesaurusConfigFactory,
        Index $index
    ) {
        $this->queryFactory           = $queryFactory;
        $this->thesaurusConfigFactory = $thesaurusConfigFactory;
        $this->index                  = $index;
    }

    /**
     * Rewrite the query.
     *
     * @SuppressWarnings(PHPMD.UnusedFormalParameter)
     *
     * @param QueryBuilder                    $subject         Original query builder.
     * @param \Closure                        $proceed         Original create func.
     * @param ContainerConfigurationInterface $containerConfig Search request container config.
     * @param string                          $queryText       Current query text.
     * @param string                          $spellingType    Spelling type of the query.
     * @param float                           $boost           Original query boost.
     * @param int                             $depth           Call depth of the create method. Can be used to avoid/prevent cycles.
     *
     * @return QueryInterface
     */
    public function aroundCreate(
        QueryBuilder $subject,
        \Closure $proceed,
        ContainerConfigurationInterface $containerConfig,
        $queryText,
        $spellingType,
        $boost = 1,
        $depth = 0
    ) {
        $storeId         = $containerConfig->getStoreId();
        $requestName     = $containerConfig->getName();
        $rewriteCacheKey = $requestName . '|' . $storeId . '|' . $depth . '|' . md5(json_encode($queryText));

        if (!isset($this->rewritesCache[$rewriteCacheKey])) {
            $rewrites = [];
            /*
             * Prevents multiple and excessive rewriting when calling the fulltext query builder 'create' method
             * with an array of query text
             * - ALL queries will be rewritten here on the first pass in this plugin
             * - but no longer on the consecutive "atomic" calls from the 'create' method to itself
             * Also prevents rewriting a query text that has been provided by the rewriting process.
             */
            if ($depth === 0) {
                $rewrites = $this->getWeightedRewrites($queryText, $containerConfig, $boost);
            }
            // Set base query as SPELLING_TYPE_EXACT if synonyms/expansions are found.
            $spellingType = empty($rewrites) ? $spellingType : SpellcheckerInterface::SPELLING_TYPE_EXACT;
            $query        = $proceed($containerConfig, $queryText, $spellingType, $boost, $depth);

            if (!empty($rewrites)) {
                $synonymQueries           = [$query];
                $synonymQueriesSpellcheck = SpellcheckerInterface::SPELLING_TYPE_EXACT;

                foreach ($rewrites as $rewrittenQuery => $weight) {
                    $synonymQueries[] = $proceed($containerConfig, $rewrittenQuery, $synonymQueriesSpellcheck, $weight, $depth + 1);
                }

                $query = $this->queryFactory->create(QueryInterface::TYPE_BOOL, ['should' => $synonymQueries]);
            }

            $this->rewritesCache[$rewriteCacheKey] = $query;
        }

        return $this->rewritesCache[$rewriteCacheKey];
    }

    /**
     * Get weighted rewrites for a given query text.
     * Returns an associative array of ['rewritten query' => weight] if any matches are found.
     *
     * @param string|array                    $queryText       The query text
     * @param ContainerConfigurationInterface $containerConfig Container Configuration
     * @param float                           $originalBoost   Original boost of the query
     *
     * @return array
     */
    private function getWeightedRewrites($queryText, $containerConfig, $originalBoost)
    {
        $rewrites = [];

        if (!is_array($queryText)) {
            $queryText = [$queryText];
        }

        foreach ($queryText as $currentQueryText) {
            // Use + instead of array_merge because $queryText can be purely numeric and would be casted to 0 by array_merge.
            $rewrites = $rewrites + $this->index->getQueryRewrites($containerConfig, $currentQueryText, $originalBoost);
        }

        $maxRewrittenQueries = $this->getThesaurusConfig($containerConfig)->getMaxRewrittenQueries();
        if ($maxRewrittenQueries > 0) {
            $rewrites = array_slice($rewrites, 0, $maxRewrittenQueries, true);
        }

        return $rewrites;
    }

    /**
     * Return thesaurus/relevance configuration.
     *
     * @param ContainerConfigurationInterface $containerConfig Container configuration.
     *
     * @return ThesaurusConfig
     */
    private function getThesaurusConfig(ContainerConfigurationInterface $containerConfig)
    {
        $storeId       = $containerConfig->getStoreId();
        $containerName = $containerConfig->getName();

        return $this->thesaurusConfigFactory->create($storeId, $containerName);
    }
}