team-umlaut/umlaut

View on GitHub
app/service_adaptors/gpo.rb

Summary

Maintainability
A
50 mins
Test Coverage
# Still in progress. Uses illegal info:sudoc and info:gpo to get a
# a sudoc or a GPO Item Number for a given referent, and finds online
# availability, and/or links to GPO lookup for local depository with the
# item.
class Gpo < Service
  include MetadataHelper
  require 'nokogiri'
  require 'open-uri' 
  
  
  def initialize(config)
    @display_name = "U.S. Government Printing Office"
    @gpo_item_find = true
    @sudoc_url_lookup = true
    super(config)
  end

  def service_types_generated
    a = []
    a.push(ServiceTypeValue["highlighted_link"]) if @gpo_item_find
    a.push(ServiceTypeValue["fulltext"]) if @sudoc_url_lookup
    return a
  end

  def handle(request)
    
    if ( @gpo_item_find )
      items = analyze_gpo_items(  get_gpo_item_nums(request.referent)  )
      
      items.each do |item, formats|
         # Generate URL to GPO Item Number lookup to finding
         # it in a repository near you. 
  
         request.add_service_response(:service => self, 
             :display_text => "Find in a Federal Depository Library",
             :url => gpo_item_lookup_url(item),
             :notes => "In " + formats.join(" or "),
             :service_type_value => "highlighted_link"
             )
      end
    end
    sudoc = get_sudoc(request.referent)
    
    if ( sudoc && @sudoc_url_lookup )
      add_links_from_sudoc(request, sudoc)
    end
    

    request.dispatched(self, true)
    
  end

  # Takes an array of string of GPO Items with formats in parens, groups
  # them by individual Item Number, identified by formats. 
  def analyze_gpo_items(items)
    item_hash = {}

    items.each do |i|      

      bare_item = i
      format_str = 'paper'

      # seperate the format marker from the base item number, if present.
      # if it's not present, means paper. 
      if ( i =~ /^(.*)\(([^\)]+)\)\s*$/  )      
        bare_item = $1.strip
        format_str = $2.strip
        format_str = "microform" if format_str == "MF"
      end
      
      item_hash[bare_item] ||= []
      
      item_hash[bare_item].push( format_str )      
    end
    
    return item_hash  
  end

  def gpo_item_lookup_url(item)
    return "http://catalog.gpo.gov/fdlpdir/locate.jsp?ItemNumber=" + CGI.escape(item)
  end

  def add_links_from_sudoc(request, sudoc)
    # Screen scrape the GPO catalog.
    
    response = open( gpo_sudoc_find_url(sudoc)  ).read

    response_dom = Nokogiri::HTML(response)
    
    # Find each tr with class tr1, holding a td => The sixth td in there =>
    # one or more 'a' tags in there. These are links to fulltext. 
    links = response_dom.search('//tr[@class = "tr1"][td]/td[7]/a')

    urls_seen = []
    
    links.each do |link|
      # The href is an internally pointing ILS link. But the text inside
      # the a is what we want, it's actually a URL, fortunately. . 

      url = link.inner_text
      unless urls_seen.include?(url)
      
        notes = nil
        if (links.length > 1)        
          notes = "via " + URI.parse(url).host
        end
  
        request.add_service_response(:service => self, 
         :display_text => @display_name,
         :url => url,
         :notes => notes,
         :service_type_value => "fulltext"
         )
         urls_seen.push( url )
      end         
    end
    
  end

  def gpo_sudoc_find_url(sudoc)
    return "http://catalog.gpo.gov/F/?func=find-a&find_code=GVD&request=#{CGI.escape('"'+sudoc+'"')}&local_base=GPO01PUB"
  end
  
end