spec/classes/document_search_spec.rb
# frozen_string_literal: true
require 'rails_helper'
describe DocumentSearch do
let(:query) { 'common' }
let(:handles) { %w[agency_blogs] }
let(:lang) { :en }
let(:size) { 10 }
let(:offset) { 0 }
let(:search_options) do
{
handles: handles,
language: lang,
query: query,
size: size,
offset: offset
}
end
let(:common_params) do
{
language: 'en',
created: DateTime.now,
path: 'http://www.agency.gov/page1.html',
title: 'title common',
description: 'description common',
content: 'content'
}
end
let(:document_search) { described_class.new(search_options) }
let(:document_search_results) { document_search.search }
let(:documents_index_name) do
[DocumentRepository.index_namespace('agency_blogs'), 'v1'].join('-')
end
# Using a single shard prevents intermittent relevancy issues in tests
# https://www.elastic.co/guide/en/elasticsearch/guide/current/relevance-is-broken.html
let(:document_repository) do
DocumentRepository.new(
index_name: documents_index_name,
settings: { index: { number_of_shards: 1 } }
)
end
def create_documents(document_hashes)
document_hashes.each { |hash| create_document(hash, document_repository) }
end
before do
ES.client.indices.delete(
index: [DocumentRepository.index_namespace('agency_blogs'), '*'].join('-')
)
document_repository.create_index!(include_type_name: true)
ES.client.indices.put_alias(
index: documents_index_name,
name: DocumentRepository.index_namespace('agency_blogs')
)
end
context 'when searching across a single index collection' do
context 'when matching documents exist' do
before { create_documents([common_params]) }
it 'returns results' do
expect(document_search_results.total).to eq(1)
end
it 'returns non-nil aggregations' do
expect(document_search_results.aggregations).not_to be_nil
end
context 'when those documents contain a text type aggregation field' do
before do
create_documents([
common_params.merge(tags: 'just, some, tags')
])
end
let(:tags_arr) { document_search_results.aggregations.find { |a| a[:tags] }[:tags] }
it 'returns a hash with doc_count and agg_key keys' do
expect(tags_arr.first.keys).to match(array_including(:agg_key,
:doc_count))
end
it 'returns a hash of doc_count and agg_key values matching the document' do
expect(tags_arr).to match(array_including({ agg_key: 'just', doc_count: 1 },
{ agg_key: 'some', doc_count: 1 },
{ agg_key: 'tags', doc_count: 1 }))
end
it 'does not return an aggregation hash for fields not present in any result doucuments' do
audience_arr = document_search_results.aggregations.find { |a| a[:audience] }
expect(audience_arr).to be_nil
end
end
context 'when those documents contain a date type aggregation field' do
before do
create_documents([
common_params.merge(title: 'title with date agg',
changed: 6.months.ago.to_s)
])
end
let(:changed_arr) { document_search_results.aggregations.find { |a| a[:changed] }[:changed] }
let(:query) { 'date agg' }
it 'returns a hash with doc_count, agg_key, and date keys' do
expect(changed_arr.first.keys).to match(array_including(:agg_key,
:doc_count,
:to,
:from,
:to_as_string,
:from_as_string))
end
it 'returns a hash with doc_count, agg_key, and date values matching the document' do
expect(changed_arr.first).to match(hash_including(agg_key: 'Last Year',
doc_count: 1,
to_as_string: DateTime.now.strftime('%-m/%-d/%Y'),
from_as_string: 1.year.ago.strftime('%-m/%-d/%Y')))
end
it 'does not return keys with zero corresponding documents' do
changed_keys = changed_arr.pluck(:agg_key)
expect(changed_keys).not_to include('Last Week', 'Last Month')
end
it 'does return keys with at least one corresponding document' do
changed_keys = changed_arr.pluck(:agg_key)
expect(changed_keys).to include('Last Year')
end
end
context 'when searching without a query' do
let(:document_search) { described_class.new(search_options.except(:query)) }
it 'returns results' do
expect(document_search_results.total).to eq(1)
end
it 'returns nil aggregations' do
expect(document_search_results.aggregations).to be_nil
end
end
context 'when searching without a language' do
let(:document_search) { described_class.new(search_options.except(:language)) }
it 'defaults to English' do
expect(document_search_results.results.first['language']).to eq 'en'
end
it 'returns results' do
expect(document_search_results.total).to eq(1)
end
end
describe 'included source fields' do
# NOTE: 'path', 'created', 'changed', and 'language' all represent the corresponding value
# in each result's '_source' hash. 'title' and 'description' populated with the highlighted values
# of those fields during hit extraction; those fields in search results do NOT
# represent the original value stored in the document's source.
it 'returns the default fields' do
result = document_search.search.results.first
expect(result.keys).to match_array %w[title path created changed language description thumbnail_url]
end
context 'when specifying included fields' do
let(:document_search) { described_class.new(search_options.merge(include: ['promote'])) }
it 'returns the specified fields' do
result = document_search.search.results.first
expect(result.keys).to include 'promote'
end
end
end
end
context 'when no matching documents exist' do
it 'returns no results' do
expect(document_search_results.total).to eq(0)
end
it 'returns non-nil aggregations' do
expect(document_search_results.aggregations).not_to be_nil
end
end
context 'when something terrible happens during the search' do
let(:query) { 'uh oh' }
let(:error) { StandardError.new('something went wrong') }
before do
allow(ES).to receive(:client).and_raise(error)
allow(Rails.logger).to receive(:error)
allow(NewRelic::Agent).to receive(:notice_error).and_return(nil)
end
it 'returns a no results response' do
expect(document_search_results.total).to eq(0)
expect(document_search_results.results).to eq([])
end
it 'logs details about the query' do
document_search.search
expect(Rails.logger).to have_received(:error).with(/"query":"uh oh"/)
end
it 'sends the error to NewRelic' do
document_search.search
expect(NewRelic::Agent).to have_received(:notice_error)
end
end
end
describe 'pagination' do
before do
create_documents([
common_params.merge(title: 'most relevant title common content', description: 'other content'),
Array.new(10) { |x| common_params.merge(title: "title #{x}", description: "common content #{x}") }
].flatten)
end
context 'when size is 3' do
let(:size) { 3 }
it 'returns "size" results' do
expect(document_search_results.total).to eq(11)
expect(document_search_results.results.size).to eq(3)
end
end
context 'when offset is 1' do
let(:offset) { 1 }
it 'obeys the offset' do
expect(document_search_results.total).to eq(11)
expect(document_search_results.results.size).to eq(10)
document_search_results.results.each do |result|
expect(result['title']).to start_with('title')
end
end
end
end
context 'when searching across multiple indexes' do
let(:handles) { %w[agency_blogs other_agency_blogs] }
before do
create_document(common_params, document_repository)
es_documents_index_name = [
DocumentRepository.index_namespace('other_agency_blogs'), 'v1'
].join('-')
other_repository = DocumentRepository.new(index_name: es_documents_index_name)
other_repository.create_index!(include_type_name: true)
ES.client.indices.put_alias(
index: es_documents_index_name,
name: DocumentRepository.index_namespace('other_agency_blogs')
)
create_document(common_params, other_repository)
end
it 'returns results from all indexes' do
expect(document_search_results.total).to eq(2)
end
end
describe 'recall' do
context 'when all query terms are in URL basename' do
let(:query) { 'obama hud' }
before do
create_documents([
common_params.merge(path: 'http://www.agency.gov/obama-visits-hud.html')
])
end
it 'matches' do
expect(document_search_results.total).to eq(1)
end
end
context 'when enough low frequency and high frequency words are found' do
before do
create_documents([
common_params.merge(title: 'low frequency term'),
common_params.merge(title: 'very rare words'),
Array.new(80, common_params.merge(title: 'high occurrence tokens',
description: 'these are like stopwords')),
Array.new(80, common_params.merge(title: 'showing up everywhere',
description: 'these are like stopwords'))
].flatten)
end
context 'with low freq terms' do
let(:query) { 'very low frequency term' }
it 'matches 3 out of 4 low freq terms' do
expect(document_search_results.total).to eq(1)
end
end
context 'with missing terms' do
let(:query) { 'MISSING low frequency term' }
it 'matches 3 out of 4 low missing terms' do
expect(document_search_results.total).to eq(1)
end
end
context 'with high freq terms' do
let(:query) { 'high occurrence everywhere' }
it 'matches 2 out of 3 high freq terms' do
expect(document_search_results.total).to eq(80)
end
end
end
end
describe 'overall relevancy' do
context 'when exact phrase matches' do
let(:query) { 'jefferson Memorial' }
before do
create_documents([
common_params.merge(title: 'jefferson township Petitions and Memorials'),
common_params.merge(title: 'jefferson Memorial and township Petitions')
])
end
it 'ranks those higher' do
expect(document_search_results.results.first['title']).to match(/jefferson Memorial/)
end
end
context 'when a search term appears in varying fields' do
let(:query) { 'rutabaga' }
before do
create_documents([
common_params.merge(title: 'other', description: 'other', content: 'Rutabagas'),
common_params.merge(title: 'other', description: 'Rutabagas', content: 'other'),
common_params.merge(title: 'Rutabagas', description: 'other', content: 'other')
])
end
it 'prioritizes matches in the title, then description, then content' do
expect(document_search_results.results.first['title']).to match(/Rutabagas/)
expect(document_search_results.results[1]['description']).to match(/Rutabagas/)
expect(document_search_results.results[2]['content']).to match(/Rutabagas/)
end
end
%w[doc docx pdf ppt pptx xls xlsx].each do |ext|
context 'when the results contain demoted and non-demoted file types' do
before do
create_documents([
common_params.merge(path: "http://www.agency.gov/dir1/page1.#{ext}"),
common_params.merge(path: 'http://www.agency.gov/dir1/page1.html'),
common_params.merge(path: 'http://www.agency.gov/dir1/page1'),
common_params.merge(path: 'http://www.agency.gov/dir1/page1.txt')
])
end
it "docs ending in .#{ext} appear after non-demoted docs" do
expect(document_search_results.results[3]['path']).to eq("http://www.agency.gov/dir1/page1.#{ext}")
end
end
end
context 'when exact word form matches' do
let(:query) { 'news memorials' }
before do
create_documents([
common_params.merge(description: 'jefferson township Memorial new'),
common_params.merge(description: 'jefferson township memorials news')
])
end
it 'ranks those higher' do
expect(document_search_results.results.first['description']).to match(/memorials news/)
end
end
context 'when exact match on a document tag' do
let(:document_search) do
described_class.new(search_options.merge(query: 'Stats', include: ['tags']))
end
before do
create_documents([
common_params.merge(title: 'This mentions stats in the title'),
common_params.merge(title: 'This mentions stats in the title',
tags: 'stats'),
common_params.merge(title: 'This mentions stats in the title',
tags: 'unimportant stats')
])
end
it 'ranks those higher' do
expect(document_search_results.total).to eq(3)
expect(document_search_results.results.first['tags']).to match_array(['stats'])
end
end
context 'when documents include click counts' do
before do
create_documents([
common_params.merge(path: 'http://agency.gov/popular'),
common_params.merge(path: 'http://agency.gov/most_popular', click_count: 10),
common_params.merge(path: 'http://agency.gov/more_popular', click_count: 5)
])
end
it 'ranks documents with higher click counts higher' do
paths = document_search_results.results.pluck(:path)
expect(paths).to eq(
%w[http://agency.gov/most_popular
http://agency.gov/more_popular
http://agency.gov/popular]
)
end
end
end
describe 'sorting by date' do
before do
create_documents([
common_params.merge(changed: 2.months.ago,
path: 'http://www.agency.gov/2months.html'),
common_params.merge(changed: nil,
created: nil,
path: 'http://www.agency.gov/nodate.html'),
common_params.merge(changed: 6.months.ago,
path: 'http://www.agency.gov/6months.html'),
common_params.merge(changed: 1.minute.ago,
path: 'http://www.agency.gov/1minute.html'),
common_params.merge(changed: 3.years.ago,
path: 'http://www.agency.gov/3years.html')
])
end
context 'when using default sort' do
let(:document_search) do
described_class.new(search_options.merge(sort_by_date: false))
end
it 'returns results in reverse chronological order based on changed timestamp' do
expect(document_search_results.results.pluck('path')).
to eq(
%w[
http://www.agency.gov/nodate.html
http://www.agency.gov/1minute.html
http://www.agency.gov/2months.html
http://www.agency.gov/6months.html
http://www.agency.gov/3years.html
]
)
end
end
context 'when sorting by date' do
let(:document_search) do
described_class.new(search_options.merge(sort_by_date: true))
end
it 'returns results in reverse chronological order based on changed timestamp' do
expect(document_search_results.results.pluck('path')).
to eq(
%w[
http://www.agency.gov/1minute.html
http://www.agency.gov/2months.html
http://www.agency.gov/6months.html
http://www.agency.gov/3years.html
http://www.agency.gov/nodate.html
]
)
end
end
end
context 'when documents contain text type facet field data' do
{ 'audience' => 'everyone',
'content_type' => 'article',
'mime_type' => 'text/html' }.each do |field, content|
before do
create_documents([
common_params,
common_params.merge("#{field}": content),
common_params.merge("#{field}": 'application/pdf')
])
end
let(:search_options) do
{ handles: handles, language: :en, query: query, size: 10, offset: 0, include: %w[audience
content_type
mime_type] }
end
context "when filtering by a single #{field}" do
let(:document_search) { described_class.new(search_options.merge("#{field}": [content])) }
it 'returns matches' do
expect(document_search_results.total).to eq(1)
field_values = document_search_results.results.pluck(field)
expect(field_values).to all include(content)
end
end
context "when filtering by multiple #{field}s and at least one matches" do
let(:document_search) { described_class.new(search_options.merge("#{field}": [content, 'missing value'])) }
it 'returns matches' do
expect(document_search_results.total).to eq(1)
field_values = document_search_results.results.pluck(field)
expect(field_values).to all include(content)
end
end
context "when filtering by multiple #{field}s and both have matches" do
let(:document_search) { described_class.new(search_options.merge("#{field}": [content, 'application/pdf'])) }
it 'returns all matches' do
expect(document_search_results.total).to eq(2)
field_values = document_search_results.results.pluck(field)
expect(field_values).to all include(content).or include('application/pdf')
end
end
context "when filtering by a partial #{field} term" do
let(:document_search) { described_class.new(search_options.merge("#{field}": [content.chop])) }
it 'does not return partially matching results' do
expect(document_search_results.total).to eq(0)
end
end
context 'when the query matches audience' do
let(:document_search) do
described_class.new(search_options.merge(query: 'everyone'))
end
it 'returns matching results' do
expect(document_search_results.total).to eq(1)
expect(document_search_results.results.first['audience']).to eq('everyone')
end
end
context 'when the query partially matches audience' do
let(:document_search) { described_class.new(search_options.merge(query: 'one')) }
it 'does not return partially matching results' do
expect(document_search_results.total).to eq(0)
end
end
end
end
context 'when documents contain array type facet field data' do
{ 'searchgov_custom1' => 'something, like, this',
'searchgov_custom2' => 'this or that',
'searchgov_custom3' => '100',
'tags' => 'some, tags' }.each do |field, content|
before do
create_documents([
common_params,
common_params.merge("#{field}": content),
common_params.merge("#{field}": 'extra item')
])
end
let(:search_options) do
{ handles: handles, language: :en, query: query, size: 10, offset: 0, include: %w[searchgov_custom1
searchgov_custom2
searchgov_custom3
tags] }
end
context "when filtering by one #{field} term" do
let(:filter_value) { content.split(', ').sample(1) }
let(:document_search) do
described_class.new(search_options.merge("#{field}": filter_value))
end
it 'returns results matching that single term' do
expect(document_search_results.total).to eq(1)
expect(document_search_results.results.first[field]).to match(array_including(filter_value))
end
end
context "when filtering by multiple #{field} terms that both have matches" do
let(:sampled_value) { content.split(', ').sample(1) }
let(:filter_value) { sampled_value + ['extra item'] }
let(:document_search) do
described_class.new(search_options.merge("#{field}": filter_value))
end
it 'returns all matches' do
expect(document_search_results.total).to eq(2)
field_values = document_search_results.results.pluck(field)
expect(field_values).to all match(array_including(sampled_value)).or match(array_including('extra item'))
end
end
context "when filtering by a partial #{field} term" do
let(:filter_value) { [content.split(', ').sample(1).first.chop] }
let(:document_search) { described_class.new(search_options.merge("#{field}": filter_value)) }
it 'does not return partially matching results' do
expect(document_search_results.total).to eq(0)
end
end
context "when filtering by the entire #{field} array" do
let(:filter_value) { content.split(', ') }
let(:document_search) do
described_class.new(search_options.merge("#{field}": filter_value))
end
it 'returns results matching the entire array' do
expect(document_search_results.total).to eq(1)
expect(document_search_results.results.first[field]).to eq(filter_value)
end
end
context "when the query matches a single #{field} term" do
let(:query) { content.split(', ').sample(1).first }
it 'returns results matching that field' do
expect(document_search_results.total).to eq(1)
expect(document_search_results.results.first[field]).to match(array_including(query))
end
end
context "when the query partially matches a #{field} term" do
let(:query) { content.split(', ').sample(1).first.chop }
it 'does not return partially matching results' do
expect(document_search_results.total).to eq(0)
end
end
end
end
context 'when filtering on language' do
let(:query) { 'america' }
let(:lang) { :fr }
before do
create_documents([
common_params.merge(language: 'en',
title: 'america',
path: 'http://www.agency.gov/page1.html'),
common_params.merge(language: 'fr',
title: 'america',
path: 'http://fr.agency.gov/page1.html')
])
end
it 'returns results from only that language' do
expect(document_search_results.total).to eq(1)
expect(document_search_results.results.first['language']).to eq('fr')
end
end
# SRCH-3477: The tests below may likely be removed and/or pared down after MVP faceted search work is complete
# as they are now largely replicated above; however, since tag filtering still maintains some functionality
# not yet included in facet work (e.g. exclusive filtering, querying on tag content, etc.), I'm leaving these for
# the time being.
describe 'filtering on tags' do
let(:search_options) do
{ handles: handles, language: :en, query: query, size: 10, offset: 0, include: ['tags'] }
end
before do
create_documents([
common_params.merge(tags: 'usa'),
common_params.merge(tags: 'york, usa'),
common_params.merge(tags: 'new york, usa'),
common_params.merge(tags: 'random tag')
])
end
context 'with inclusive filtering' do
context 'when searching by one tag' do
let(:document_search) { described_class.new(search_options.merge(query: 'title', tags: %w[york])) }
it 'returns results matching the exact tag' do
expect(document_search_results.total).to eq(1)
expect(document_search_results.results.first['tags']).to match_array(%w[york usa])
end
end
context 'when searching by multiple tags' do
let(:document_search) { described_class.new(search_options.merge(query: 'title', tags: %w[york usa])) }
it 'returns results matching either of those exact tags' do
expect(document_search_results.total).to eq(3)
tags = document_search_results.results.pluck('tags')
expect(tags).to all include('york').or include('usa')
end
end
context 'when the query matches a tag' do
let(:document_search) { described_class.new(search_options.merge(query: 'random tag')) }
it 'returns results matching that tag' do
expect(document_search_results.total).to eq(1)
expect(document_search_results.results.first['tags']).to match_array(['random tag'])
end
end
context 'when searching by a tag with a partial match' do
let(:document_search) { described_class.new(search_options.merge(query: 'random')) }
it 'does not return partially matching results' do
expect(document_search_results.total).to eq(0)
end
end
end
context 'with exclusive filtering' do
context 'when multiple tags' do
let(:document_search) { described_class.new(search_options.merge(ignore_tags: %w[york usa])) }
it 'returns results without those exact tags' do
expect(document_search_results.total).to eq(1)
end
end
context 'when single tag' do
let(:document_search) { described_class.new(search_options.merge(ignore_tags: %w[york])) }
it 'returns results without that exact tag' do
expect(document_search_results.total).to eq(3)
end
end
end
end
context 'when filtering on dates' do
let(:document_search) { described_class.new(date_filtered_options) }
before do
create_documents([
common_params.merge(changed: 1.month.ago,
created: nil,
path: 'http://www.agency.gov/dir1/page1.html'),
common_params.merge(changed: 1.week.ago,
created: DateTime.now,
path: 'http://www.agency.gov/dir1/page2.html'),
common_params.merge(changed: DateTime.now,
created: 1.week.ago,
path: 'http://www.agency.gov/dir1/page3.html'),
common_params.merge(changed: nil,
created: 1.month.ago,
path: 'http://www.agency.gov/dir1/page4.html')
])
end
context 'when filtering on changed date range' do
let(:date_filtered_options) do
search_options.merge(min_timestamp: 2.weeks.ago,
max_timestamp: 1.day.ago)
end
it 'returns results from only that date range' do
expect(document_search_results.total).to eq(1)
expect(document_search_results.results.first['path']).
to eq('http://www.agency.gov/dir1/page2.html')
end
end
context 'when filtering on minimum changed date' do
let(:date_filtered_options) { search_options.merge(min_timestamp: 2.weeks.ago) }
it 'returns results from only after that minimum date' do
expect(document_search_results.total).to eq(2)
expect(document_search_results.results.pluck('path')).
to match_array(
%w[
http://www.agency.gov/dir1/page2.html
http://www.agency.gov/dir1/page3.html
]
)
end
end
context 'when filtering on maximum changed date' do
let(:date_filtered_options) { search_options.merge(max_timestamp: 1.day.ago) }
it 'returns results from only before that maxium date' do
expect(document_search_results.total).to eq(2)
expect(document_search_results.results.pluck('path')).
to match_array(
%w[
http://www.agency.gov/dir1/page2.html
http://www.agency.gov/dir1/page1.html
]
)
end
end
context 'when filtering on created date range' do
let(:date_filtered_options) do
search_options.merge(min_timestamp_created: 2.weeks.ago,
max_timestamp_created: 1.day.ago)
end
it 'returns results from only that date range' do
expect(document_search_results.total).to eq(1)
expect(document_search_results.results.first['path']).
to eq('http://www.agency.gov/dir1/page3.html')
end
end
context 'when filtering on minimum created date' do
let(:date_filtered_options) { search_options.merge(min_timestamp_created: 2.weeks.ago) }
it 'returns results from only after that minimum date' do
expect(document_search_results.total).to eq(2)
expect(document_search_results.results.pluck('path')).
to match_array(
%w[
http://www.agency.gov/dir1/page2.html
http://www.agency.gov/dir1/page3.html
]
)
end
end
context 'when filtering on maximum created date' do
let(:date_filtered_options) { search_options.merge(max_timestamp_created: 1.day.ago) }
it 'returns results from only before that maxium date' do
expect(document_search_results.total).to eq(2)
expect(document_search_results.results.pluck('path')).
to match_array(
%w[
http://www.agency.gov/dir1/page3.html
http://www.agency.gov/dir1/page4.html
]
)
end
end
end
context 'when filtering on site:' do
before do
create_documents([
common_params.merge(title: 'america',
path: 'http://www.agency.gov/dir1/page1.html'),
common_params.merge(title: 'america',
path: 'http://www.agency.gov/dir1/dir2/page1.html'),
common_params.merge(title: 'america',
path: 'http://www.other.gov/dir2/dir3/page1.html'),
common_params.merge(title: 'america',
path: 'http://agency.gov/page1.html')
])
end
context 'when two-deep path provided' do
let(:query) { '(site:www.agency.gov/dir1/dir2) america' }
it { expect(document_search_results.total).to eq(1) }
end
context 'when one-deep path provided' do
let(:query) { '(site:www.agency.gov/dir1) america' }
it { expect(document_search_results.total).to eq(2) }
end
context 'when base path provided' do
let(:query) { '(site:agency.gov/) america' }
it { expect(document_search_results.total).to eq(3) }
end
context 'when domain provided' do
let(:query) { '(site:agency.gov) america' }
it { expect(document_search_results.total).to eq(3) }
end
context 'when multiple domains and a missing path provided' do
let(:query) { '(site:agency.gov site:other.gov site:missing.gov/not_there) america' }
it { expect(document_search_results.total).to eq(4) }
end
context 'when multiple incomplete paths provided' do
let(:query) { '(site:agency.gov/dir2 site:other.gov/dir1) america' }
it { expect(document_search_results.total).to be_zero }
end
context 'when single incomplete path provided' do
let(:query) { '(site:www.agency.gov/dir2) america' }
it { expect(document_search_results.total).to be_zero }
end
context 'when single www domain but no query provided' do
let(:query) { '(site:www.other.gov)' }
it { expect(document_search_results.total).to eq(1) }
end
context 'when domain but no query provided' do
let(:query) { 'site:agency.gov' }
it { expect(document_search_results.total).to eq(3) }
end
context 'when excluding domains' do
let(:query) { '-site:agency.gov america' }
let(:document_paths) { document_search_results.results.pluck('path').join(' ') }
it { expect(document_search_results.results.count).to eq(1) }
it { expect(document_paths).not_to match(/agency.gov/) }
context 'when excluding a path' do
let(:query) { '-site:www.agency.gov/dir1 america' }
it { expect(document_search_results.results.count).to eq(2) }
it { expect(document_paths).not_to match(%r{agency.gov/dir1}) }
context 'when the path includes a trailing slash' do
let(:query) { '-site:www.agency.gov/dir1/ america' }
it { expect(document_search_results.results.count).to eq(2) }
it { expect(document_paths).not_to match(%r{agency.gov/dir1}) }
end
context 'when excluding sub-subdirectories' do
let(:query) { '-site:www.agency.gov/dir1/dir2 america' }
it { expect(document_search_results.results.count).to eq(3) }
it { expect(document_paths).not_to match(%r{agency.gov/dir1/dir2}) }
end
end
context 'when excluding a path that is a partial match' do
let(:query) { '-site:www.agency.gov/di america' }
it 'does not exclude those results' do
expect(document_search_results.results.count).to eq(4)
end
end
end
end
context 'when search term yields no results but a similar spelling does have results' do
let(:query) { '99 problemz' }
before do
create_documents([
{
language: 'en',
title: '99 problems',
description: 'but speling aint one of the 99 problems',
path: 'http://en.agency.gov/page1.html',
content: 'Will I have to pay more if I have employees with health problems'
},
{
language: 'es',
title: '99 problemas',
description: 'pero la ortografía no es uno dello las 99 problemas',
path: 'http://es.agency.gov/page1.html',
content: '¿Tendré que pagar más si tengo empleados con problemas de la salud?'
}
])
end
context 'when searching in English' do
let(:lang) { :en }
it 'returns results for the close spelling for English' do
expect(document_search_results.total).to eq(1)
expect(document_search_results.suggestion['text']).to eq('99 problems')
expect(document_search_results.suggestion['highlighted']).to eq('99 problems')
end
end
context 'when searching in Spanish' do
let(:lang) { :es }
it 'returns results for the close spelling for Spanish' do
expect(document_search_results.total).to eq(1)
expect(document_search_results.suggestion['text']).to eq('99 problemas')
expect(document_search_results.suggestion['highlighted']).to eq('99 problemas')
end
end
context 'when searching in English with an excluded site' do
let(:query) { '99 problemz -site:agency.gov' }
let(:lang) { :en }
it 'does not return results from excluded sites' do
expect(document_search_results.total).to eq(0)
end
end
end
context 'when a search term yields results as well as a suggestion' do
let(:query) { 'fsands' }
before do
create_documents([
common_params.merge(content: 'FSAND'),
common_params.merge(content: 'fund'),
common_params.merge(content: 'fraud')
])
end
it 'does not return a suggestion' do
expect(document_search_results.suggestion).to be_nil
end
end
describe 'searching by exact phrase' do
let(:query) { '"amazing spiderman"' }
before do
create_documents([
common_params.merge(content: 'amazing spiderman'),
common_params.merge(content: 'spiderman is amazing')
])
end
it 'returns exact matches only' do
expect(document_search_results.total).to eq 1
expect(document_search_results.results.first['content']).to eq 'amazing spiderman'
end
context 'when a result contains both exact and inexact matches' do
let(:query) { '"exact phrase"' }
before do
create_documents([
common_params.merge(
content: 'This phrase match is not exact. This is an exact phrase match'
)
])
end
it 'only highlights exact matches' do
expect(document_search_results.results.first['content']).
to eq 'match is not exact. This is an exact phrase match'
end
context 'when searching by exact and inexact phrases' do
let(:query) { 'this "exact phrase"' }
it 'only highlights exact matches' do
expect(document_search_results.results.first['content']).
to eq 'This phrase match is not exact. This is an exact phrase match'
end
end
end
end
context 'when a document has been promoted' do
before do
create_documents([
common_params.merge(title: 'no', promote: false),
common_params.merge(title: 'yes', promote: true),
common_params.merge(title: 'no', promote: false)
])
end
it 'prioritizes promoted documents' do
expect(document_search_results.total).to eq 3
expect(document_search_results.results.first['title']).to eq 'yes'
end
end
describe 'stemming' do
let(:query) { 'renew' }
before do
create_documents([
common_params.merge(content: 'passport renewal'),
common_params.merge(content: 'renew passport'),
common_params.merge(content: 'something unrelated')
])
end
it 'finds similar similar by word stem' do
expect(document_search_results.total).to eq 2
expect(document_search_results.results.first['content']).to eq 'renew passport'
end
end
describe 'language support' do
# Create documents for each supported language
languages = [
{
lang_code: 'en',
content: 'Select your state or territory from the dropdown menu to find the rules that apply to you.',
query: 'territory'
},
{
lang_code: 'es',
content: 'Seleccione su estado o territorio en el menú desplegable y encontrará las normas a seguir.',
query: 'territorio'
},
{
lang_code: 'hi',
content: 'आप पर लागू होने वाले नियमों को जानने के लिए ड्रॉपडाउन मेनू से अपना राज्य या क्षेत्र चुनें।',
query: 'क्षेत्र'
},
{
lang_code: 'bn',
content: 'আপনার ক্ষেত্রে প্রযোজ্য নিয়মগুলি খুঁজে পেতে ড্রপডাউন মেনু থেকে আপনার রাজ্য বা অঞ্চল নির্বাচন করুন৷',
query: 'অঞ্চল'
}
]
languages.each do |lang|
lang_code, content, query = lang.values_at(:lang_code, :content, :query)
before do
create_documents([
{
language: lang_code,
path: "https://vote.gov/#{lang_code}",
content: content
}
])
end
it "gets results for #{lang_code}" do
document_search_results = described_class.new(search_options.merge(query: query, language: lang_code)).search
expect(document_search_results.results.first['content']).to match(/#{query}/)
end
end
end
end