app/service_adaptors/gpo.rb
# Still in progress. Uses illegal info:sudoc and info:gpo to get a
# a sudoc or a GPO Item Number for a given referent, and finds online
# availability, and/or links to GPO lookup for local depository with the
# item.
class Gpo < Service
include MetadataHelper
require 'nokogiri'
require 'open-uri'
def initialize(config)
@display_name = "U.S. Government Printing Office"
@gpo_item_find = true
@sudoc_url_lookup = true
super(config)
end
def service_types_generated
a = []
a.push(ServiceTypeValue["highlighted_link"]) if @gpo_item_find
a.push(ServiceTypeValue["fulltext"]) if @sudoc_url_lookup
return a
end
def handle(request)
if ( @gpo_item_find )
items = analyze_gpo_items( get_gpo_item_nums(request.referent) )
items.each do |item, formats|
# Generate URL to GPO Item Number lookup to finding
# it in a repository near you.
request.add_service_response(:service => self,
:display_text => "Find in a Federal Depository Library",
:url => gpo_item_lookup_url(item),
:notes => "In " + formats.join(" or "),
:service_type_value => "highlighted_link"
)
end
end
sudoc = get_sudoc(request.referent)
if ( sudoc && @sudoc_url_lookup )
add_links_from_sudoc(request, sudoc)
end
request.dispatched(self, true)
end
# Takes an array of string of GPO Items with formats in parens, groups
# them by individual Item Number, identified by formats.
def analyze_gpo_items(items)
item_hash = {}
items.each do |i|
bare_item = i
format_str = 'paper'
# seperate the format marker from the base item number, if present.
# if it's not present, means paper.
if ( i =~ /^(.*)\(([^\)]+)\)\s*$/ )
bare_item = $1.strip
format_str = $2.strip
format_str = "microform" if format_str == "MF"
end
item_hash[bare_item] ||= []
item_hash[bare_item].push( format_str )
end
return item_hash
end
def gpo_item_lookup_url(item)
return "http://catalog.gpo.gov/fdlpdir/locate.jsp?ItemNumber=" + CGI.escape(item)
end
def add_links_from_sudoc(request, sudoc)
# Screen scrape the GPO catalog.
response = open( gpo_sudoc_find_url(sudoc) ).read
response_dom = Nokogiri::HTML(response)
# Find each tr with class tr1, holding a td => The sixth td in there =>
# one or more 'a' tags in there. These are links to fulltext.
links = response_dom.search('//tr[@class = "tr1"][td]/td[7]/a')
urls_seen = []
links.each do |link|
# The href is an internally pointing ILS link. But the text inside
# the a is what we want, it's actually a URL, fortunately. .
url = link.inner_text
unless urls_seen.include?(url)
notes = nil
if (links.length > 1)
notes = "via " + URI.parse(url).host
end
request.add_service_response(:service => self,
:display_text => @display_name,
:url => url,
:notes => notes,
:service_type_value => "fulltext"
)
urls_seen.push( url )
end
end
end
def gpo_sudoc_find_url(sudoc)
return "http://catalog.gpo.gov/F/?func=find-a&find_code=GVD&request=#{CGI.escape('"'+sudoc+'"')}&local_base=GPO01PUB"
end
end