lib/autocomplete_source.rb
module AutocompleteSource
AUTOCOMPLETE_DELIMITER = ": ".freeze
AUTOCOMPLETE_COMPLETION_KEY = "completion".freeze
AUTOCOMPLETE_SCORE_KEY = "score".freeze
AUTOCOMPLETE_CACHE_KEY = "cache".freeze
AUTOCOMPLETE_RANGE_LENGTH = 50 # not random
AUTOCOMPLETE_BOOST = 1000 # amt by which we boost results that have all the words
# this marks a completed word in the completion set -- we use double commas because
# commas are not allowed in pseud and tag names, and double-commas have been disallowed
# from collection titles
AUTOCOMPLETE_WORD_TERMINATOR = ",,".freeze
def transliterate(input)
input = input.to_s.mb_chars.unicode_normalize(:nfkd).gsub(/[\u0300-\u036F]/, "")
result = ""
input.each_char do |char|
tl = ActiveSupport::Inflector.transliterate(char)
# If transliterate returns "?", the original character is either unsupported
# (e.g. a non-Latin character) or was actually a question mark.
# In both cases, we should keep the original.
result << if tl == "?"
char
else
tl
end
end
result
end
# override to define any autocomplete prefix spaces where this object should live
def autocomplete_prefixes
[self.transliterate("autocomplete_#{self.class.name.downcase}")]
end
def autocomplete_search_string
self.transliterate(name)
end
def autocomplete_search_string_was
self.transliterate(name_was)
end
def autocomplete_search_string_before_last_save
self.transliterate(name_before_last_save)
end
def autocomplete_value
"#{id}#{AUTOCOMPLETE_DELIMITER}#{name}" + (self.respond_to?(:title) ? "#{AUTOCOMPLETE_DELIMITER}#{title}" : "")
end
def autocomplete_value_was
"#{id}#{AUTOCOMPLETE_DELIMITER}#{name_was}" + (self.respond_to?(:title) ? "#{AUTOCOMPLETE_DELIMITER}#{title_was}" : "")
end
def autocomplete_value_before_last_save
"#{id}#{AUTOCOMPLETE_DELIMITER}#{name_before_last_save}" + (self.respond_to?(:title) ? "#{AUTOCOMPLETE_DELIMITER}#{title_before_last_save}" : "")
end
def autocomplete_score
0
end
def add_to_autocomplete(score = nil)
score = autocomplete_score unless score
self.class.autocomplete_pieces(autocomplete_search_string).each do |word_piece|
# each prefix represents an autocompletion space -- eg, "autocomplete_collection_all"
autocomplete_prefixes.each do |prefix|
# We put each prefix and the word + completion token into the set of all completions,
# with score 0 so they just get sorted lexicographically --
# this will be used to quickly find all possible completions in this space
REDIS_AUTOCOMPLETE.zadd(self.transliterate(self.class.autocomplete_completion_key(prefix)), 0, word_piece)
# We put each complete search string into a separate set indexed by word with specified score
if self.class.is_complete_word?(word_piece)
REDIS_AUTOCOMPLETE.zadd(self.transliterate(self.class.autocomplete_score_key(prefix, word_piece)), score, autocomplete_value)
end
end
end
end
def remove_from_autocomplete
self.class.remove_from_autocomplete(self.autocomplete_search_string, self.autocomplete_prefixes, self.autocomplete_value)
end
def remove_stale_from_autocomplete
self.class.remove_from_autocomplete(self.autocomplete_search_string_before_last_save, self.autocomplete_prefixes, self.autocomplete_value_before_last_save)
end
module ClassMethods
include AutocompleteSource
# returns a properly escaped and case-insensitive regexp for a more manual search
def get_search_regex(search_param)
Regexp.new(Regexp.escape(search_param), Regexp::IGNORECASE)
end
# takes either an array or string of search terms (typically extra values passed in through live params, like fandom)
# and returns an array of stripped and lowercase words for actual searching or use in keys
def get_search_terms(search_term)
terms = if search_term.is_a?(Array)
search_term.map { |term| term.split(",") }.flatten
else
search_term.blank? ? [] : search_term.split(",")
end
terms.map { |term| self.transliterate(term.strip.downcase) }
end
def parse_autocomplete_value(current_autocomplete_value)
current_autocomplete_value.split(AUTOCOMPLETE_DELIMITER, 3)
end
def fullname_from_autocomplete(current_autocomplete_value)
current_autocomplete_value.split(AUTOCOMPLETE_DELIMITER, 2)[1]
end
def id_from_autocomplete(current_autocomplete_value)
parse_autocomplete_value(current_autocomplete_value)[0]
end
def name_from_autocomplete(current_autocomplete_value)
parse_autocomplete_value(current_autocomplete_value)[1]
end
def title_from_autocomplete(current_autocomplete_value)
parse_autocomplete_value(current_autocomplete_value)[2]
end
def autocomplete_lookup(options = {})
options.reverse_merge!({search_param: "", autocomplete_prefix: "", sort: "down"})
search_param = options[:search_param]
autocomplete_prefix = options[:autocomplete_prefix]
if REDIS_AUTOCOMPLETE.exists(autocomplete_cache_key(autocomplete_prefix, search_param))
return REDIS_AUTOCOMPLETE.zrange(autocomplete_cache_key(autocomplete_prefix, search_param), 0, -1)
end
# we assume that if the user is typing in a phrase, any words they have
# entered are the exact word they want, so we only get the prefixes for
# the very last word they have entered so far
search_pieces = autocomplete_phrase_split(search_param).map { |w| w + AUTOCOMPLETE_WORD_TERMINATOR }
# for each complete word, we look up the phrases in that word's set
# along with their scores and add up the scores
scored_results = {}
count = {}
exact = {}
search_regex = Regexp.new(Regexp.escape(search_param), Regexp::IGNORECASE)
search_pieces.each_with_index do |search_piece, index|
lastpiece = false
if index == search_pieces.size - 1
lastpiece = true
search_piece.gsub!(/#{Tag::AUTOCOMPLETE_WORD_TERMINATOR}$/, '')
break if search_pieces.size > 1 && search_piece.length == 1
end
# Get all the complete words which could match this search term
completions = autocomplete_word_completions(search_piece, autocomplete_prefix)
completions.each do |word|
# O(logN + M) where M is number of items returned -- we could speed up even more by putting in a limit
phrases_with_scores = []
if lastpiece && search_piece.length < 3
# use a limit
phrases_with_scores = REDIS_AUTOCOMPLETE.zrevrangebyscore(autocomplete_score_key(autocomplete_prefix, word),
'inf', 0, withscores: true, limit: [0, 50])
else
phrases_with_scores = REDIS_AUTOCOMPLETE.zrevrangebyscore(autocomplete_score_key(autocomplete_prefix, word),
'inf', 0, withscores: true)
end
phrases_with_scores.each do |phrase, score|
score = score.to_i
if options[:constraint_sets]
# phrases must be in these sets or else no go
# O(logN) complexity
next unless options[:constraint_sets].all {|set| REDIS_AUTOCOMPLETE.zrank(set, phrase)}
end
if count[phrase]
# if we've already seen this phrase, increase the score
scored_results[phrase] += score
count[phrase] += 1
else
# initialize the score and check if it exactly matches our regexp
scored_results[phrase] = score
if lastpiece
# don't count if it only matches the last search piece
count[phrase] = 0
else
count[phrase] = 1
end
if phrase.match(search_regex)
exact[phrase] = true
else
exact[phrase] = false
end
end
end
end
end
# final sort is O(NlogN) but N is only the number of complete phrase results which should be relatively small
results = scored_results.keys.sort do |k1, k2|
exact[k1] && !exact[k2] ? -1 : (exact[k2] && !exact[k1] ? 1 :
count[k1] > count[k2] ? -1 : (count[k2] > count[k1] ? 1 :
scored_results[options[:sort] == "down" ? k2 : k1].to_i <=> scored_results[options[:sort] == "down" ? k1 : k2].to_i))
end
limit = options[:limit] || 15
if search_param.length <= 2
# cache the result for really quick response when only 1-2 letters entered
# adds only a little bit to memory and saves doing a lot of processing of many phrases
results[0..limit].each_with_index {|res, index| REDIS_AUTOCOMPLETE.zadd(autocomplete_cache_key(autocomplete_prefix, search_param), index, res)}
# expire every 24 hours so new entries get added if appropriate
REDIS_AUTOCOMPLETE.expire(autocomplete_cache_key(autocomplete_prefix, search_param), 24*60*60)
end
results[0..limit]
end
def is_complete_word?(word_piece)
word_piece.match(/#{AUTOCOMPLETE_WORD_TERMINATOR}$/)
end
def get_word(word_piece)
word_piece.gsub(/#{AUTOCOMPLETE_WORD_TERMINATOR}$/, '')
end
def autocomplete_score_key(autocomplete_prefix, word)
self.transliterate(autocomplete_prefix + "_" + AUTOCOMPLETE_SCORE_KEY + "_" + get_word(word))
end
def autocomplete_completion_key(autocomplete_prefix)
self.transliterate(autocomplete_prefix + "_" + AUTOCOMPLETE_COMPLETION_KEY)
end
def autocomplete_cache_key(autocomplete_prefix, search_param)
self.transliterate(autocomplete_prefix + "_" + AUTOCOMPLETE_CACHE_KEY + "_" + search_param)
end
# Split a string into words.
def autocomplete_phrase_split(string)
# Use the ActiveSupport::Multibyte::Chars class to handle downcasing
# instead of the basic string class, because it can handle downcasing
# letters with accents or other diacritics.
normalized = self.transliterate(string).downcase.to_s
# Split on one or more spaces, ampersand, slash, double quotation mark,
# opening parenthesis, closing parenthesis (just in case), tilde, hyphen
normalized.split(/(?:\s+|\&|\/|"|\(|\)|\~|-)/).reject(&:blank?)
end
def autocomplete_pieces(string)
# prefixes for autocomplete
prefixes = []
words = autocomplete_phrase_split(string)
words.each do |word|
prefixes << self.transliterate(word) + AUTOCOMPLETE_WORD_TERMINATOR
word.length.downto(1).each do |last_index|
prefixes << word.slice(0, last_index)
end
end
prefixes
end
# overall time complexity: O(log N)
def autocomplete_word_completions(word_piece, autocomplete_prefix)
get_exact = is_complete_word?(word_piece)
# the rank of the word piece tells us where to start looking
# in the completion set for possible completions
# O(logN) N = number of things in the completion set (ie all the possible prefixes for all the words)
start_position = REDIS_AUTOCOMPLETE.zrank(autocomplete_completion_key(autocomplete_prefix), word_piece)
return [] unless start_position
results = []
# start from that position and go for the specified range length
# O(logN + M) M is the range length, so reduces to logN
REDIS_AUTOCOMPLETE.zrange(autocomplete_completion_key(autocomplete_prefix), start_position, start_position + AUTOCOMPLETE_RANGE_LENGTH - 1).each do |entry|
minlen = [entry.length, word_piece.length].min
# if the entry stops matching the prefix then we've passed out of
# the completions that could belong to this word -- return
return results if entry.slice(0, minlen) != word_piece.slice(0, minlen)
# otherwise if we've hit a complete word add it to the results
if is_complete_word?(entry)
results << entry
return results if get_exact
end
end
results
end
# generic method to remove pieces for given search string and value from the given autocomplete prefixes
def remove_from_autocomplete(search_string, prefixes, value)
autocomplete_pieces(search_string).each do |word_piece|
prefixes.each do |prefix|
# we leave the word pieces in the completion set so we don't accidentally trash
# parts of other completions -- doing a weekly reload for cleanup is good enough
if is_complete_word?(word_piece)
word = get_word(word_piece)
phrases = REDIS_AUTOCOMPLETE.zrevrangebyscore(autocomplete_score_key(prefix, word), 'inf', 0)
if phrases.count == 1 && phrases.first == value
# there's only one phrase for this word and we're removing it, remove the completed word from the completion set
REDIS_AUTOCOMPLETE.zrem(autocomplete_completion_key(prefix), word_piece)
end
# remove the phrase we're deleting from the score set
REDIS_AUTOCOMPLETE.zrem(autocomplete_score_key(prefix, word_piece), value)
end
end
end
end
end
def self.included(base)
base.extend(ClassMethods)
end
end