team-umlaut/umlaut

View on GitHub
app/service_adaptors/worldcat.rb

Summary

Maintainability
B
5 hrs
Test Coverage
# Link to worldcat.org that relies on screen scraping to see if it's gotten
# a hit. 
#
# Warning, worldcat can be awfully slow to respond. 
# optional search_zip_code param.
# Optional base_url param, but I don't know why you'd want to change it.
# display_text
require 'uri'
require 'net/http'
class Worldcat < Service
  include MetadataHelper
  include UmlautHttp
  
  def initialize(config)
    # defaults
    @suppress_precheck = false # it seems unneccesary to pre-check worldcat, it's mostly ALWAYS a positive hit. And pre-checking against worldcat is running into Worldcat's rate limiting defenses. If neccesary, you can turn this off. Really, we should be using the Worldcat API anyway. 
    @base_url = 'http://www.worldcat.org/'
    @display_text = 'Find in other libraries'
    @display_text_i18n = 'display_text'
    @display_name = 'OCLC WorldCat.org'
    
    @credits = {
      "OCLC WorldCat.org" => "http://www.worldcat.org/"
    }
    
    super(config)
  end

  def service_types_generated
    return [ServiceTypeValue['highlighted_link']]
  end
  
  def handle(request)
    isbn = get_identifier(:urn, "isbn", request.referent)
    issn = get_identifier(:urn, "issn", request.referent)
    oclcnum = get_identifier(:info, "oclcnum", request.referent)
    
    
    isxn_key = nil
    isxn_value = nil
    if (! oclcnum.blank?)
      isxn_key = 'oclc'
      isxn_value = oclcnum    
    elsif (! issn.blank?)
      isxn_key = 'issn'
      #isxn_value = ref_metadata['issn'] + '+dt:ser'
      isxn_value = issn
    elsif (! isbn.blank?)
      isxn_key = 'isbn'
      isxn_value = isbn
    else
      # We have no useful identifiers
      return request.dispatched(self, true)
    end

    # Do some cleanup of the value. Sometimes spaces or other
    # weird chars get in there, why not strip out everything that
    # isn't a number or X?
    isxn_value = isxn_value.gsub( /[^\dX]/, '')
    # and URL escape just to be safe, although really shouldn't be neccesary
    isxn_value = URI.escape( isxn_value )
    
    # We do a pre-emptive lookup to worldcat to try and see if worldcat
    # has a hit or not, before adding the link.
    isxn_key = URI.escape( isxn_key )
    uri_str = @base_url+isxn_key+'/'+isxn_value
    uri_str +=  "&loc=#{URI.escape(@search_zip_code.to_s)}" if @search_zip_code

    
    begin
      worldcat_uri = URI.parse(uri_str)
    rescue Exception => e
      Rails.logger.error("Bad worldcat uri string constructed?")
      Rails.logger.error(e)
      return request.dispatched(self, DispatchedService::FailedFatal)
    end

    unless ( @suppress_precheck )
    
      http = Net::HTTP.new worldcat_uri.host
      http.open_timeout = 7
      http.read_timeout = 7
  
      
      begin
        # Fake being a proxy to send info on actual end-user client to worldcat,
        # to lessen chance of worldcat traffic limiters. 
        headers = proxy_like_headers( request, worldcat_uri.host )
        wc_response = http.get(worldcat_uri.path, headers)
      rescue  Timeout::Error => exception
        return request.dispatched(self, DispatchedService::FailedTemporary, exception)
      end
  
      # Bad response code?
      unless wc_response.code == "200"
        # Could be temporary, could be fatal. Let's say temporary. 
        return request.dispatched(self, DispatchedService::FailedTemporary, Exception.new("oclc returned error http status code: #{wc_response.code}"))
      end
  
      # Sadly, worldcat returns a 200 even if there are no matches.
      # We need to screen-scrape to discover if there are matches.
      if (wc_response.body =~ /The page you tried was not found\./)
        # Not found in worldcat, we won't add a link.
        return request.dispatched(self, true)
      end
    end
    
    request.add_service_response(
      :service=>self, 
      :url=>worldcat_uri.to_s,
      :display_text=>@display_text,
      :display_text_i18n => @display_text_i18n,
      :service_type_value => :highlighted_link
      )
    
    return request.dispatched(self, true)
  end
end