app/mixin_logic/marc_helper.rb
module MarcHelper
# Takes an array of ruby MARC objects, adds ServiceResponses
# for the 856 links contained.
# Returns a hash of arrays of ServiceResponse objects added, keyed
# by service type value string.
def add_856_links(request, marc_records, options = {})
options[:default_service_type] ||= "fulltext"
options[:match_reliability] ||= ServiceResponse::MatchExact
responses_added = Hash.new
# Keep track of urls to avoid putting the exact same url in twice
urls_seen = Array.new
marc_records.each do |marc_xml|
marc_xml.find_all {|f| '856' === f.tag}.each do |field|
# Might have more than one $u, in which case we want to
# possibly add each of them. Might have 0 $u in which case
# we skip.
field.subfields.find_all {|sf| sf.code == 'u'}.each do |sf|
url = sf.value
# Already got it from another catalog record?
next if urls_seen.include?(url)
# Trying to avoid duplicates with SFX/link resolver.
skip = should_skip_856_link?(request, marc_xml, url)
next if skip
urls_seen.push(url)
display_name = nil
if field['y']
display_name = field['y']
else
# okay let's try taking just the domain from the url
begin
u_obj = URI::parse( url )
display_name = u_obj.host
rescue Exception
end
# Okay, can't parse out a domain, whole url then.
display_name = url if display_name.nil?
end
# But if we've got a $3, the closest MARC comes to a field
# that explains what this actually IS, use that too please.
display_name = field['3'] + ' from ' + display_name if field['3']
# Build the response.
response_params = {:service=>self, :display_text=>display_name, :url=>url}
# get all those $z subfields and put em in notes.
response_params[:url] = url
# subfield 3 is being used for OCA records loaded in our catalog.
response_params[:notes] =
field.subfields.collect {|f| f.value if (f.code == 'z') }.compact.join('; ')
is_journal = (marc_xml.leader[7,1] == 's')
unless ( field['3'] || ! is_journal ) # subfield 3 is in fact some kind of coverage note, usually
response_params[:notes] += "; " unless response_params[:notes].blank?
response_params[:notes] += "Dates of coverage unknown."
end
unless ( options[:match_reliability] == ServiceResponse::MatchExact )
response_params[:match_reliability] = options[:match_reliability]
response_params[:edition_str] = edition_statement(marc_xml)
end
# Figure out the right service type value for this, fulltext, ToC,
# whatever.
response_params[:service_type_value] = service_type_for_856( field, options )
# fulltext urls from MARC are always marked as specially stupid.
response_params[:coverage_checked] = false
response_params[:can_link_to_article] = false
# Some debugging info, add the 001 bibID if we have one.
response_params[:debug_info] = "BibID: #{marc_xml['001'].value}" if marc_xml['001']
# Add the response
response = request.add_service_response(response_params)
responses_added[response_params[:service_type_value]] ||= Array.new
responses_added[response_params[:service_type_value]].push(response)
end
end
end
return responses_added
end
# Used by #add_856_links. Complicated logic to try and avoid
# presenting a URL from the catalog that duplicates what SFX does,
# but present a URL from the catalog when it's really needed.
#
# One reason not to include Catalog links for an article-level
# citation, even if SFX provided no targets, is maybe SFX
# provided no targets because SFX _knew_ that the _particular date_
# requested is not available. The catalog doesn't know that, but
# we don't want to show a link from the catalog that SFX really
# already knew wasn't going to be available.
#
# So:
#
# If this is a journal, skip the URL if it matches in our
# SFXUrl finder, because that means we think it's an SFX controlled
# URL. But if it's not a journal, use it anyway, because it's probably
# an e-book that is not in SFX, even if it's from a vendor who is in
# SFX. We use MARC leader byte 7 to tell if it's a journal. Confusing enough?
# Not yet! Even if it is a journal, if this isn't an article-level
# cite and there are no other full text already provided, we
# still include.
def should_skip_856_link?(request, marc_record, url)
is_journal = (marc_record.leader[7,1] == 's')
sfx_controlled = SfxUrl.sfx_controls_url?(url)
# Do NOT skip if it's a title-level citation with no
# existing full text entries.
not_title_level_empty = !( request.title_level_citation? &&
request.get_service_type("fulltext").length == 0
)
result = ( is_journal && sfx_controlled && not_title_level_empty )
return result
end
# Take a ruby Marc Field object representing an 856 field,
# decide what umlaut service type value to map it to. Fulltext, ToC, etc.
# This is neccesarily a heuristic guess, Marc doesn't have enough granularity
# to really let us know for sure --
# although if indicator2 is '2' for 'related resource', we decide it is
# NOT fulltext.
def service_type_for_856(field, options)
options[:default_service_type] ||= "fulltext_title_level"
# LC records here at hopkins have "Table of contents only" in the 856$3
# Think that's a convention from LC?
if (field['3'] && field['3'].downcase =~ /table of contents( only)?/)
return "table_of_contents"
elsif (field['3'] && field['3'].downcase =~ /description/)
# If it contains the word 'description', it's probably an abstract.
# That's the best we can do, sadly.
return "abstract"
elsif (field['3'] && field['3'].downcase == 'sample text')
# LC records often include these links.
return "excerpts"
elsif ( field['u'] =~ /www\.loc\.gov/ )
# Any other loc.gov link, we know it's not full text, don't put
# it in full text field, put it as "see also".
return "highlighted_link"
elsif field.indicator2 == '2' # 'related resource'
return "highlighted_link"
else
return options[:default_service_type]
end
end
# A MARC record has two dates in it, date1 and date2. Exactly
# what they represent is something of an esoteric mystery.
# But this will return them both, in an array.
def get_years(marc)
array = []
# no marc 008? Weird, but okay.
return array unless marc['008']
date1 = marc['008'].value[7,4]
date1.strip! if date1
array.push(date1) unless date1.blank?
date2 = marc['008'].value[11,4]
date2.strip! if date2
array.push(date2) unless date2.blank?
return array
end
# Take the title out of a marc record
def get_title(marc)
marc['245'].find_all {|sf| sf.code == "a" || sf.code == "b" || sf.code == "k"}.collect {|sf| sf.text}.join(" ").sub(/\s*[;:\/.,]\s*$/)
end
# From a marc record, get a string useful to display for identifying
# which edition/version of a work this represents.
def edition_statement(marc, options = {})
options[:include_repro_info] ||= true
options[:exclude_533_fields] = ['7','f','b', 'e']
parts = Array.new
return "" unless marc
#245$h GMD
unless ( marc['245'].blank? || marc['245']['h'].blank? )
parts.push('(' + marc['245']['h'].gsub(/[^\w\s]/, '').strip.titlecase + ')')
end
#250
if ( marc['250'])
parts.push( marc['250']['a'] ) unless marc['250']['a'].blank?
parts.push( marc['250']['b'] ) unless marc['250']['b'].blank?
end
# 260
if ( marc['260'])
if (marc['260']['b'] =~ /s\.n\./)
parts.push(marc['260']['a']) unless marc['260']['a'].blank?
else
parts.push(marc['260']['b']) unless marc['260']['b'].blank?
end
parts.push( marc['260']['c'] ) unless marc['260']['c'].blank?
end
# 533
if options[:include_repro_info] && marc['533']
marc['533'].subfields.each do |s|
if ( s.code == 'a' )
parts.push(s.value.gsub(/[^\w\s]/, '') + ':' )
elsif (! options[:exclude_533_fields].include?( s.code ))
parts.push(s.value)
end
end
end
return nil if parts.length == 0
return parts.join(' ')
end
# AACR2 "General Material Designation" . While these are (I think?)
# controlled, it's actually really hard to find the list. Maybe they're
# only semi-controlled.
# ONE list can be found here: http://www.oclc.org/bibformats/en/onlinecataloging/default.shtm#BCGFECEG
def gmd_values
# 'computer file' is an old one that may still be found in data.
return ['activity card',
'art original','art reproduction','braille','chart','diorama','electronic resource','computer file', 'filmstrip','flash card','game','globe','kit','manuscript','map','microform','microscope slides','model','motion picture','music','picture','realia','slide','sound recording','technical drawing','text','toy','transparency','videorecording']
end
# removes something that looks like an AACR2 GMD in square brackets from
# the string. Pretty kludgey.
def strip_gmd(arg_string, options = {})
options[:replacement] ||= ':'
gmd_values.each do |gmd_val|
arg_string = arg_string.sub(/\[#{gmd_val}( \((tactile|braile|large print)\))?\]/, options[:replacement])
end
return arg_string
end
end