lib/es-query-builder/parser.rb
class EsQueryBuilder
# Public: The class which has a responsibility for creatign a query.
#
# Note that the term "query" has two different meanings in the terminology of
# Elasticsearch. One represents how to retrieve documents from Elasticsearch
# and it consists of query and filter, so that is to say the other is a
# part of previous one. In this file, "query" and "query hash" represents the
# former and the latter respectively:
#
# "query" = "query hash" + "filter hash"
#
class Parser
# Public: Construct the parser object.
#
# all_query_fields - A String or an Array of Strings for searching usual
# query terms (default: '_all').
# hierarchy_fields - An Array of Strings which treats the trailing slash
# character as a hierarchy (default: []).
#
# Returns nothing.
def initialize(all_query_fields: '_all', hierarchy_fields: [],
nested_fields: {}, child_fields: {})
@all_query_fields = all_query_fields
@hierarchy_fields = hierarchy_fields
@nested_fields = nested_fields
@child_fields = child_fields
end
# Public: Parse the given tokens and build a query hash.
#
# tokens - An Array of Tokens.
#
# Returns a Hash for Elasticsearch client or nil.
def parse(tokens)
connect_queries(build_queries(tokens))
end
private
# Internal: Convert the given tokens into sequence of queries.
#
# tokens - An Array of Tokens.
#
# Returns an Array of Hashes. Each hash represents a query.
def build_queries(tokens)
split_by_or_token(tokens).map do |or_less_tokens|
query_hash = build_query_hash(or_less_tokens.select(&:query?))
filter_hash = build_filter_hash(or_less_tokens.select(&:filter?))
create_query(query_hash, filter_hash)
end
end
# Internal: Merge sequence of queries into a single query.
#
# queries - An Array of Hashes. Eash hash represents a query.
#
# Returns a Hash or nil.
def connect_queries(queries)
case queries.size
when 0
nil
when 1
queries.first
else
{
bool: {
should: queries
}
}
end
end
# Internal: Divide the given tokens array into sub arrays by 'or' token.
#
# tokens - An Array of Search::QueryBuilder::Token.
#
# Examples
#
# split_by_or_token([<Query>, <OR>, <Query>, <Filter>])
# #=> [[<Query>], [<Query>, <Filter>]]
#
# Returns an Array of Arrays of Tokens.
def split_by_or_token(tokens)
expressions = [[]]
tokens.each do |token|
if token.or?
expressions << []
else
expressions.last << token
end
end
expressions.select { |e| e.size > 0 }
end
# Internal: Connect given query hash and filter hash objects.
#
# query_hash - A Hash represents a query hash.
# filter_hash - A Hash represents a filter hash.
#
# Returns a Hash represents a query.
def create_query(query_hash, filter_hash)
if filter_hash.size > 0
{
filtered: {
query: query_hash,
filter: filter_hash
}
}
else
query_hash
end
end
# Internal: Build a query hash by query tokens
#
# query_tokens - An Array of query Tokens.
#
# Returns a Hash represents a query hash.
def build_query_hash(query_tokens)
return { match_all: {} } if query_tokens.empty?
must, must_not = create_bool_queries(query_tokens)
if must.size == 1 && must_not.empty?
must.first
else
bool = {}
bool[:must] = must if must.size > 0
bool[:must_not] = must_not if must_not.size > 0
{ bool: bool }
end
end
# Internal: Build a filter parameter hash by query tokens
#
# filter_tokens - An Array of filter Tokens.
#
# Returns a Hash represents a filter hash.
def build_filter_hash(filter_tokens)
return {} if filter_tokens.empty?
must, must_not = create_bool_filters(filter_tokens)
if must.size == 1 && must_not.empty?
# Term filter is cached by default.
must.first
else
bool = {}
bool[:must] = must if must.size > 0
bool[:must_not] = must_not if must_not.size > 0
# Bool filter is not cached by default.
{ bool: bool.merge(_cache: true) }
end
end
# Internal: Create boolean query based with the given query tokens.
#
# query_tokens - An Array of query Tokens.
#
# Returns an Array consists of must and must_not query arrays.
def create_bool_queries(query_tokens)
must, must_not = [], []
query_tokens.each do |token|
queries = token.minus? ? must_not : must
queries <<
# When the field is not given or invalid one, search by all fields.
if token.field.nil?
should = []
should << create_match_query(@all_query_fields, token.term)
@nested_fields.each do |nested_path, nested_field|
should << create_nested_match_query(nested_path, nested_field, token.term)
end
@child_fields.each do |child_type, child_field|
should << create_has_child_match_query(child_type, child_field, token.term)
end
connect_queries(should)
# When the specify nested field
elsif nested_field = @nested_fields[token.field_namespace]
create_nested_match_query(token.field_namespace, nested_field, token.term)
# When the specify child field
elsif child_field = @child_fields[token.field_namespace]
create_has_child_match_query(token.field_namespace, child_field, token.term)
# When the specify standard field
else
create_match_query(token.field, token.term)
end
end
[must, must_not]
end
def create_match_query(field, term)
if field.is_a?(String)
{
match: {
field => term
}
}
else
{
multi_match: {
fields: field,
query: term
}
}
end
end
def create_nested_match_query(path, field, term)
{
nested: {
path: path.to_s,
query: create_match_query(field, term)
}
}
end
def create_has_child_match_query(child_type, field, term)
{
has_child: {
type: child_type.to_s,
query: create_match_query(field, term)
}
}
end
# Internal: Create boolean filter based on the filter matches.
# If a field query in hierarchy fields ends with '/', it matches to all
# descendant terms.
#
# query_tokens - An Array of filter Tokens.
#
# Examples
#
# # When 'tag:foo'
# create_bool_filters([...])
# # => [[ { bool: { should: [{ term: { tag: 'foo' } }, { prefix: { tag: 'foo/' } }] } }], []]
#
# # When '-tag:foo'
# create_bool_filters([...])
# # => [[], [], [{ term: { tag: 'foo' } }, { prefix: { tag: 'foo/' } }]]
#
# Returns an Array consists of must, should and must_not filters arrays.
def create_bool_filters(filter_tokens)
must, must_not = [], []
filter_tokens.each do |token|
token.term.split.each do |term|
if @hierarchy_fields.include?(token.field)
if token.minus?
must_not << { prefix: { token.field => term.downcase + '/' } }
must_not << { term: { token.field => term.downcase } }
else
must << {
bool: {
should: [
{ prefix: { token.field => term.downcase + '/' } },
# Exactly matches to the tag.
{ term: { token.field => term.downcase } },
]
}
}
end
else
cond = token.minus? ? must_not : must
cond << { term: { token.field => term.downcase } }
end
end
end
[must, must_not]
end
end
end