Classes/Search/MysqlQueryBuilder.php
<?php
declare(strict_types=1);
namespace Flowpack\SimpleSearch\Search;
use Flowpack\SimpleSearch\Domain\Service\IndexInterface;
/**
* Query Builder for searches
*
* Note: some signatures are not as strict as in the interfaces, because two query builder interfaces are "mixed"
* in classes implementing this one
*/
class MysqlQueryBuilder implements QueryBuilderInterface
{
/**
* @var IndexInterface
*/
protected $indexClient;
/**
* @var int
*/
protected $limit;
/**
* @var int
*/
protected $from;
/**
* Sorting strings
*
* @var array<string>
*/
protected $sorting = [];
/**
* where clauses
*
* @var array
*/
protected $where = [];
/**
* Map of query parameters to bind to the final statement.
*
* @var array
*/
protected $parameterMap = [];
/**
* Injection method used by Flow dependency injection
*
* @param IndexInterface $indexClient
*/
public function injectIndexClient(IndexInterface $indexClient): void
{
$this->indexClient = $indexClient;
}
/**
* Sort descending by $propertyName
*
* @param string $propertyName the property name to sort by
* @return QueryBuilderInterface
*/
public function sortDesc(string $propertyName): QueryBuilderInterface
{
$this->sorting[] = '"fulltext_objects"."' . $propertyName . '" DESC';
return $this;
}
/**
* Sort ascending by $propertyName
*
* @param string $propertyName the property name to sort by
* @return QueryBuilderInterface
*/
public function sortAsc(string $propertyName): QueryBuilderInterface
{
$this->sorting[] = '"fulltext_objects"."' . $propertyName . '" ASC';
return $this;
}
/**
* output only $limit records
*
* @param int|null $limit
* @return QueryBuilderInterface
*/
public function limit($limit): QueryBuilderInterface
{
$this->limit = $limit === null ? $limit : (int)$limit;
return $this;
}
/**
* Start returned results $from number results.
*
* @param int|null $from
* @return QueryBuilderInterface
*/
public function from($from): QueryBuilderInterface
{
$this->from = $from === null ? $from : (int)$from;
return $this;
}
/**
* add an exact-match query for a given property
*
* @param string $propertyName
* @param string $propertyValue
* @return QueryBuilderInterface
*/
public function exactMatch(string $propertyName, $propertyValue): QueryBuilderInterface
{
$this->compare($propertyName, $propertyValue, '=');
return $this;
}
/**
* add an like query for a given property
*
* @param string $propertyName
* @param $propertyValue
* @return QueryBuilderInterface
*/
public function like(string $propertyName, $propertyValue): QueryBuilderInterface
{
$parameterName = ':' . md5($propertyName . '#' . count($this->where));
$this->where[] = '("' . $propertyName . '" LIKE ' . $parameterName . ')';
$this->parameterMap[$parameterName] = '%' . $propertyValue . '%';
return $this;
}
/**
* Add a custom condition
*
* @param string $conditon
* @return QueryBuilderInterface
*/
public function customCondition(string $conditon): QueryBuilderInterface
{
$this->where[] = $conditon;
return $this;
}
/**
* @param string $searchword
* @return QueryBuilderInterface
*/
public function fulltext(string $searchword): QueryBuilderInterface
{
$parameterName = ':' . md5('FULLTEXT#' . count($this->where));
$this->where[] = '("__identifier__" IN (SELECT "__identifier__" FROM "fulltext_index" WHERE MATCH ("h1", "h2", "h3", "h4", "h5", "h6", "text") AGAINST (' . $parameterName . ')))';
$this->parameterMap[$parameterName] = $searchword;
return $this;
}
/**
* add a greater than query for a given property
*
* @param string $propertyName
* @param string $propertyValue
* @return QueryBuilderInterface
*/
public function greaterThan($propertyName, $propertyValue): QueryBuilderInterface
{
return $this->compare($propertyName, $propertyValue, '>');
}
/**
* add a greater than or equal query for a given property
*
* @param string $propertyName
* @param string $propertyValue
* @return QueryBuilderInterface
*/
public function greaterThanOrEqual($propertyName, $propertyValue): QueryBuilderInterface
{
return $this->compare($propertyName, $propertyValue, '>=');
}
/**
* add a less than query for a given property
*
* @param $propertyName
* @param $propertyValue
* @return QueryBuilderInterface
*/
public function lessThan($propertyName, $propertyValue): QueryBuilderInterface
{
return $this->compare($propertyName, $propertyValue, '<');
}
/**
* add a less than or equal query for a given property
*
* @param $propertyName
* @param $propertyValue
* @return QueryBuilderInterface
*/
public function lessThanOrEqual($propertyName, $propertyValue): QueryBuilderInterface
{
return $this->compare($propertyName, $propertyValue, '<=');
}
/**
* Execute the query and return the list of results
*
* @return array
*/
public function execute(): array
{
$query = $this->buildQueryString();
$result = $this->indexClient->executeStatement($query, $this->parameterMap);
if (empty($result)) {
return [];
}
return array_values($result);
}
/**
* Return the total number of hits for the query.
*
* @return int
*/
public function count(): int
{
$result = $this->execute();
return count($result);
}
/**
* Produces a snippet with the first match result for the search term.
*
* @param string $searchword The search word
* @param int $resultTokens The amount of characters to get surrounding the match hit. (defaults to 200)
* @param string $ellipsis added to the end of the string if the text was longer than the snippet produced. (defaults to "...")
* @param string $beginModifier added immediately before the searchword in the snippet (defaults to <b>)
* @param string $endModifier added immediately after the searchword in the snippet (defaults to </b>)
* @return string
* @see https://github.com/boyter/php-excerpt
*/
public function fulltextMatchResult($searchword, $resultTokens = 200, $ellipsis = '...', $beginModifier = '<b>', $endModifier = '</b>'): string
{
$searchword = trim($searchword);
$query = $this->buildQueryString();
$results = $this->indexClient->executeStatement($query, $this->parameterMap);
if ($results === []) {
return '';
}
$matches = [];
foreach ($results[0] as $indexedFieldName => $indexedFieldContent) {
if (!empty($indexedFieldContent) && strpos($indexedFieldName, '_') !== 0) {
$matches[] = trim(strip_tags((string)$indexedFieldContent));
}
}
$matchContent = implode(' ', $matches);
$searchWordParts = explode(' ', $searchword);
$matchContent = preg_replace(
array_map(static function (string $searchWordPart) {
return sprintf('/(%s)/iu', preg_quote($searchWordPart, '/'));
}, $searchWordParts),
array_fill(0, count($searchWordParts), sprintf('%s$1%s', $beginModifier, $endModifier)),
$matchContent
);
$matchLength = strlen($matchContent);
if ($matchLength <= $resultTokens) {
return $matchContent;
}
$locations = $this->extractLocations($searchWordParts, $matchContent);
$snippetLocation = $this->determineSnippetLocation($locations, (int)($resultTokens / 3));
return $this->extractSnippet($resultTokens, $ellipsis, $matchLength, $snippetLocation, $matchContent);
}
/**
* find the locations of each of the words
* Nothing exciting here. The array_unique is required
* unless you decide to make the words unique before passing in
*
* @param array $words
* @param string $fulltext
* @return array
* @see https://github.com/boyter/php-excerpt
*/
private function extractLocations(array $words, string $fulltext): array
{
$locations = [];
foreach ($words as $word) {
$loc = stripos($fulltext, $word);
while ($loc !== false) {
$locations[0] = $loc;
$loc = stripos($fulltext, $word, $loc + strlen($word));
}
}
$locations = array_unique($locations);
sort($locations);
return $locations;
}
/**
* Work out which is the most relevant portion to display
*
* This is done by looping over each match and finding the smallest distance between two found
* strings. The idea being that the closer the terms are the better match the snippet would be.
* When checking for matches we only change the location if there is a better match.
* The only exception is where we have only two matches in which case we just take the
* first as will be equally distant.
*
* @param array $locations
* @param int $relativePosition
* @return int
* @see https://github.com/boyter/php-excerpt
*/
private function determineSnippetLocation(array $locations, int $relativePosition): int
{
$locationsCount = count($locations);
$smallestDiff = PHP_INT_MAX;
if ($locationsCount === 0) {
return 0;
}
$startPosition = $locations[0];
if ($locationsCount > 2) {
// skip the first as we check 1 behind
for ($i = 1; $i < $locationsCount; $i++) {
if ($i === $locationsCount - 1) { // at the end
$diff = $locations[$i] - $locations[$i - 1];
} else {
$diff = $locations[$i + 1] - $locations[$i];
}
if ($smallestDiff > $diff) {
$smallestDiff = $diff;
$startPosition = $locations[$i];
}
}
}
return $startPosition > $relativePosition ? $startPosition - $relativePosition : 0;
}
/**
* @param int $resultTokens
* @param string $ellipsis
* @param int $matchLength
* @param int $snippetLocation
* @param string $matchContent
* @return string
*/
private function extractSnippet(int $resultTokens, string $ellipsis, int $matchLength, int $snippetLocation, string $matchContent): string
{
if ($matchLength - $snippetLocation < $resultTokens) {
$snippetLocation = (int)($snippetLocation - ($matchLength - $snippetLocation) / 2);
}
$snippet = substr($matchContent, $snippetLocation, $resultTokens);
if ($snippetLocation + $resultTokens < $matchLength) {
$snippet = substr($snippet, 0, strrpos($snippet, ' ')) . $ellipsis;
}
if ($snippetLocation !== 0) {
$snippet = $ellipsis . substr($snippet, strpos($snippet, ' ') + 1);
}
return $snippet;
}
/**
* Match any value in the given array for the property
*
* @param string $propertyName
* @param array $propertyValues
* @return QueryBuilderInterface
*/
public function anyMatch(string $propertyName, array $propertyValues): QueryBuilderInterface
{
if ($propertyValues === null || empty($propertyValues) || $propertyValues[0] === null) {
return $this;
}
$queryString = null;
$lastElemtentKey = count($propertyValues) - 1;
foreach ($propertyValues as $key => $propertyValue) {
$parameterName = ':' . md5($propertyName . '#' . count($this->where) . $key);
$this->parameterMap[$parameterName] = $propertyValue;
if ($key === 0) {
$queryString .= '(';
}
if ($key !== $lastElemtentKey) {
$queryString .= sprintf('("%s") = %s OR ', $propertyName, $parameterName);
} else {
$queryString .= sprintf('("%s") = %s )', $propertyName, $parameterName);
}
}
$this->where[] = $queryString;
return $this;
}
/**
* Match any value which is like in the given array for the property
*
* @param string $propertyName
* @param array $propertyValues
* @return QueryBuilderInterface
*/
public function likeAnyMatch(string $propertyName, array $propertyValues): QueryBuilderInterface
{
if ($propertyValues === null || empty($propertyValues) || $propertyValues[0] === null) {
return $this;
}
$queryString = null;
$lastElementKey = count($propertyValues) - 1;
foreach ($propertyValues as $key => $propertyValue) {
$parameterName = ':' . md5($propertyName . '#' . count($this->where) . $key);
$this->parameterMap[$parameterName] = '%' . $propertyValue . '%';
if ($key === 0) {
$queryString .= '(';
}
if ($key !== $lastElementKey) {
$queryString .= sprintf('("%s") LIKE %s OR ', $propertyName, $parameterName);
} else {
$queryString .= sprintf('("%s") LIKE %s)', $propertyName, $parameterName);
}
}
$this->where[] = $queryString;
return $this;
}
/**
* @return string
*/
protected function buildQueryString(): string
{
$whereString = implode(' AND ', $this->where);
$orderString = implode(', ', $this->sorting);
$queryString = 'SELECT * FROM "fulltext_objects" WHERE ' . $whereString;
if (count($this->sorting)) {
$queryString .= ' ORDER BY ' . $orderString;
}
if ($this->limit !== null) {
$queryString .= ' LIMIT ' . $this->limit;
}
if ($this->from !== null) {
$queryString .= ' OFFSET ' . $this->from;
}
return $queryString;
}
/**
* @param string $propertyName
* @param mixed $propertyValue
* @param string $comparator Comparator sign i.e. '>' or '<='
* @return QueryBuilderInterface
*/
protected function compare($propertyName, $propertyValue, $comparator): QueryBuilderInterface
{
if ($propertyValue instanceof \DateTime) {
$this->where[] = sprintf("datetime(`%s`) %s strftime('%s', '%s')", $propertyName, $comparator, '%Y-%m-%d %H:%M:%S', $propertyValue->format('Y-m-d H:i:s'));
} else {
$parameterName = ':' . md5($propertyName . '#' . count($this->where));
$this->parameterMap[$parameterName] = $propertyValue;
$this->where[] = sprintf('("%s") %s %s', $propertyName, $comparator, $parameterName);
}
return $this;
}
}