app/service_adaptors/worldcat.rb
# Link to worldcat.org that relies on screen scraping to see if it's gotten
# a hit.
#
# Warning, worldcat can be awfully slow to respond.
# optional search_zip_code param.
# Optional base_url param, but I don't know why you'd want to change it.
# display_text
require 'uri'
require 'net/http'
class Worldcat < Service
include MetadataHelper
include UmlautHttp
def initialize(config)
# defaults
@suppress_precheck = false # it seems unneccesary to pre-check worldcat, it's mostly ALWAYS a positive hit. And pre-checking against worldcat is running into Worldcat's rate limiting defenses. If neccesary, you can turn this off. Really, we should be using the Worldcat API anyway.
@base_url = 'http://www.worldcat.org/'
@display_text = 'Find in other libraries'
@display_text_i18n = 'display_text'
@display_name = 'OCLC WorldCat.org'
@credits = {
"OCLC WorldCat.org" => "http://www.worldcat.org/"
}
super(config)
end
def service_types_generated
return [ServiceTypeValue['highlighted_link']]
end
def handle(request)
isbn = get_identifier(:urn, "isbn", request.referent)
issn = get_identifier(:urn, "issn", request.referent)
oclcnum = get_identifier(:info, "oclcnum", request.referent)
isxn_key = nil
isxn_value = nil
if (! oclcnum.blank?)
isxn_key = 'oclc'
isxn_value = oclcnum
elsif (! issn.blank?)
isxn_key = 'issn'
#isxn_value = ref_metadata['issn'] + '+dt:ser'
isxn_value = issn
elsif (! isbn.blank?)
isxn_key = 'isbn'
isxn_value = isbn
else
# We have no useful identifiers
return request.dispatched(self, true)
end
# Do some cleanup of the value. Sometimes spaces or other
# weird chars get in there, why not strip out everything that
# isn't a number or X?
isxn_value = isxn_value.gsub( /[^\dX]/, '')
# and URL escape just to be safe, although really shouldn't be neccesary
isxn_value = URI.escape( isxn_value )
# We do a pre-emptive lookup to worldcat to try and see if worldcat
# has a hit or not, before adding the link.
isxn_key = URI.escape( isxn_key )
uri_str = @base_url+isxn_key+'/'+isxn_value
uri_str += "&loc=#{URI.escape(@search_zip_code.to_s)}" if @search_zip_code
begin
worldcat_uri = URI.parse(uri_str)
rescue Exception => e
Rails.logger.error("Bad worldcat uri string constructed?")
Rails.logger.error(e)
return request.dispatched(self, DispatchedService::FailedFatal)
end
unless ( @suppress_precheck )
http = Net::HTTP.new worldcat_uri.host
http.open_timeout = 7
http.read_timeout = 7
begin
# Fake being a proxy to send info on actual end-user client to worldcat,
# to lessen chance of worldcat traffic limiters.
headers = proxy_like_headers( request, worldcat_uri.host )
wc_response = http.get(worldcat_uri.path, headers)
rescue Timeout::Error => exception
return request.dispatched(self, DispatchedService::FailedTemporary, exception)
end
# Bad response code?
unless wc_response.code == "200"
# Could be temporary, could be fatal. Let's say temporary.
return request.dispatched(self, DispatchedService::FailedTemporary, Exception.new("oclc returned error http status code: #{wc_response.code}"))
end
# Sadly, worldcat returns a 200 even if there are no matches.
# We need to screen-scrape to discover if there are matches.
if (wc_response.body =~ /The page you tried was not found\./)
# Not found in worldcat, we won't add a link.
return request.dispatched(self, true)
end
end
request.add_service_response(
:service=>self,
:url=>worldcat_uri.to_s,
:display_text=>@display_text,
:display_text_i18n => @display_text_i18n,
:service_type_value => :highlighted_link
)
return request.dispatched(self, true)
end
end