app/service_adaptors/hip_holding_search.rb
class HipHoldingSearch < Hip3Service
required_config_params :base_path, :display_name
attr_reader :base_path
include MarcHelper
def initialize(config)
# Default preemption by any holding
@bib_limit = 4
@preempted_by = { "existing_type" => "holding" }
@keyword_exact_match = true
# If you are sending an OpenURL from a library service, you may
# have the HIP bibnum, and include it in the OpenURL as, eg.
# rft_id=http://catalog.library.jhu.edu/bib/343434 (except URL-encoded)
# Then you'd set rft_id_bibnum_prefix to http://catalog.library.jhu.edu/bib/
@rft_id_bibnum_prefix = nil
@profile = "general"
super(config)
# Trim question-mark from base_url, if given
@base_path.chop! if (@base_path.rindex('?') == @base_path.length)
end
def service_types_generated
# Add one more to whatever the Hip3Service does.
return super.push(ServiceTypeValue['holding_search'])
end
def handle(request)
# Only do anything if we have no holdings results from someone else.
holdings = request.service_types.where(:service_type_value_name => "holding")
if (holdings.length > 0)
return request.dispatched(self, true)
end
ref_metadata = request.referent.metadata
bib_searcher = Hip3::BibSearcher.new(@base_path)
search_hash = {}
if ( request.referent.format != "book" &&
(! ref_metadata['jtitle'].blank?) &&
ref_metadata['bititle'].blank? )
hip_title_index = Hip3::BibSearcher::SERIAL_TITLE_KW_INDEX
else
hip_title_index = Hip3::BibSearcher::TITLE_KW_INDEX
end
title = ref_metadata['jtitle']
title = ref_metadata['btitle'] if title.blank?
title = ref_metadata['title'] if title.blank?
#title_terms = search_terms_for_title_tokenized(title)
# tokenized was too much recall, not enough precision. Try phrase
# search.
title_terms = search_terms_for_title_phrase(title)
unless ( title_terms )
Rails.logger.debug("#{self.service_id} is missing title, can not search.")
return request.dispatched(self, true)
end
search_hash[hip_title_index] = title_terms
# Do we have the bibnum?
bibnum = get_bibnum(request.referent)
bib_searcher.bibnum = bibnum if bibnum
# If it's a non-journal thing, add the author if we have an aulast (preferred) or au.
# But wait--if it's a book _part_, don't include the author name, since
# it _might_ just be the author of the part, not of the book.
unless (request.referent.format == "journal" ||
( request.referent.format == "book" && ! ref_metadata['atitle'].blank?))
# prefer aulast
if (! ref_metadata['aulast'].blank?)
search_hash[ Hip3::BibSearcher::AUTHOR_KW_INDEX ] = [ref_metadata['aulast']]
elsif (! ref_metadata['au'].blank?)
search_hash[ Hip3::BibSearcher::AUTHOR_KW_INDEX ] = [ref_metadata['au']]
end
end
bib_searcher.search_hash = search_hash
unless bib_searcher.insufficient_query
timing_debug("start search")
bibs = bib_searcher.search
timing_debug("bib searching")
# Ssee if any our matches are exact title matches. 'exact' after normalizing a bit, including removing subtitles.
matches = [];
# Various variant normalized forms of the title from the OpenURL
# request. #compact removes nil values.
request_titles = [title,
normalize_title( title ),
normalize_title( title, :remove_subtitle => true) ].compact
if ( @keyword_exact_match )
bibs.each do |bib|
# various variant normalized forms of the title from the bib
# #compact removes nil values.
bib_titles = [ bib.title,
normalize_title(bib.title, :remove_subtitle => true),
normalize_title(bib.title) ].compact
# Do any of the various forms match? Set intersection on our
# two sets.
if ( bib_titles & request_titles ).length > 0
matches.push( bib )
end
end
end
responses_added = Hash.new
timing_debug("Finding matches")
if (matches.length > 0 )
# process as exact matches with method from Hip3Service
# Add copies
# Add 856 urls.
responses_added = {}
unless preempted_by(request, "fulltext")
# Let's do some analysis of our results. If it's got a matching
# bibnum, then include it as an EXACT match.
req_bibnum = get_bibnum(request.referent)
if ( req_bibnum )
matches.each do |bib|
if (req_bibnum == bib.bibNum)
responses_added.merge!( add_856_links(request, [bib.marc_xml]) )
responses_added.merge!( add_copies( request, [bib] ))
matches.delete(bib)
end
end
end
timing_debug("Identified matches")
# Otherwise, sort records with matching dates FIRST.
# Some link generators use an illegal 'year' parameter, bah.
if ( date = (request.referent['date'] || request.referent['year']))
req_year = date[0,4]
matches = matches.partition {|bib| get_years(bib.marc_xml).include?( req_year )}.flatten
end
timing_debug("Date sorted")
responses_added.merge!( add_856_links(request, matches.collect{|b| b.marc_xml}, :match_reliability => ServiceResponse::MatchUnsure ) )
timing_debug("added 856's")
end
responses_added.merge!( add_copies(request, matches, :match_reliability => ServiceResponse::MatchUnsure ) )
timing_debug("added copies")
end
if (bibs.length > 0 && (! responses_added['holding']))
# process as holdings_search
request.add_service_response(
:service => self,
:source_name => @display_name,
:count => bibs.length,
:display_text => "#{bibs.length} possible #{case; when bibs.length > 1 ; 'matches' ; else; 'match' ; end} in #{display_name}",
:url => bib_searcher.search_url,
:service_type_value => :holding_search)
end
end
return request.dispatched(self, true)
end
# One algorithm for turning a title into HIP search terms.
# Tokenizes the title into individual words, eliminates stop-words,
# and combines each word with 'AND'. We started with this for maximum
# recall, but after some experimentation seems to have too low precision
# without sufficient enough increase in recall.
# Returns an array of keywords.
def search_terms_for_title_tokenized(title)
title_cleaned = normalize_title(title)
if title_cleaned.blank?
# Not enough metadata to search.
return nil
end
# plus remove some obvious stop words, cause HIP is going to choke on em
title_cleaned.gsub!(/\bthe\b|\band\b|\bor\b|\bof\b|\ba\b/i,'')
title_kws = title_cleaned.split
# limit to 12 keywords
title_kws = title_kws.slice( (0..11) )
return title_kws
end
# Another algorithm for turning a title into HIP search terms.
# This one doesn't tokenize, but keeps the whole title as a phrase
# search. Does eliminate punctuation. Does not remove things that
# look like a sub-title.
# Returns an array with one item.
def search_terms_for_title_phrase(title)
title_cleaned = normalize_title(title)
if title_cleaned.blank?
# Not enough metadata to search.
return nil
end
return [title_cleaned]
end
def timing_debug(waypoint = "Waypoint")
@last_timed ||= Time.now
before = @last_timed
@last_timed = Time.now
interval = @last_timed - before
Rails.logger.debug("#{service_id}: #{waypoint}: #{interval}")
end
end