team-umlaut/umlaut

View on GitHub
app/models/hip3/bib.rb

Summary

Maintainability
A
3 hrs
Test Coverage
#!/usr/bin/ruby

require 'marc'
require 'stringio'

module Hip3
    # Bib record/object from HIP. Fetches in it's own data via
    # HIP XML interface. Which may make it sensitive, sorry. If not fetched in yet, # for an item with 'copy' (serial) control, we  
    # have to do TWO fetches to get complete information, one for summary
    # holdings statements, plus another for items. Sigh, sorry. 
    class Bib 
        attr_accessor :httpSession, :hip_base_url
        # should have copies or items, not both
        attr_accessor :bibNum, :copies, :items
    attr_writer :title
    
        # CustomFieldLookup objects
        attr_accessor :item_field_lookup, :copy_field_lookup, :bib_field_lookup # We cache HIP's XML representation of bib with and without items serial items. 
        attr_accessor :bib_xml, :xml_with_items 
        # and the beautiful MARC xml object for the bib, ruby-marc object. 
        attr_accessor :marx_xml
        
    # First arg: Horizon BibNo this thing represents.
    # Second arg: Net::URI representing HIP base path.
    # labelled args:
        # bib_xml_doc => is optional Hpricot representing bib from HIP. If you have
    # it already, give it to us and we won't have to fetch it. 
    # http_session => optional already initialized http session. You are
    #                 advised to use a Hip3::HTTPSession for it's error 
    #                 handling.
        def initialize(argBibNum, a_hip_base_path, params)
      @title_label = 'Title'
      
      self.bibNum = argBibNum
      raise ArgumentException.new("Nil 1st argument: You must supply a bib number to first arg of Bib.new") unless self.bibNum

      self.hip_base_url = a_hip_base_path
      raise ArgumentException.new("Nil 2nd arg: You must supply the HIP instance base URL as a Net::URI object in 2nd arg to Bib.new") unless self.hip_base_url
     
            
            self.httpSession = params[:http_session]
      self.httpSession ||= Hip3::HTTPSession.create(hip_base_url.host() )

      self.title = params[:title]
      
            self.copies = nil

            @bib_xml = params[:bib_xml_doc]
        end

        
        
        def hip_http_path
            # 1000 items and copies per page, we want em all
            return hip_base_url.path + "?index=BIB&term=#{bibNum}&ipp=1000&cpp=1000" 
        end
        def hip_http_xml_path
            return hip_http_path + "&GetXML=1"
        end
    
        # Fetch in our data from HIP. Called lazily by accessor methods. 
        def load_from_store
                
            # If we have serial copies, load those. We never should have both, but oh well. 
            serialElements = bib_xml.search('searchresponse/subscriptionsummary/serial')
            self.copies = serialElements.collect do |serialElement|
                holding = Hip3::SerialCopy.new( self, serialElement )                
            
                holding # collect holdings
            end
            @copies_loaded = true
                        
            # If we didn't have copies, we might have items directly in this bib.  
            if (self.copies.length == 0 &&
                !bib_xml.search('searchresponse/items/searchresults/results/row').nil?)
                self.xml_with_items = bib_xml
                load_items_from_store
            end
            @items_loaded = true

        end
    
        # This method gets the XML from HIP and puts it in an ivar. 
        def xml_with_items
            # first try to load our bib record, that might give us items.
            unless (@xml_with_items)
                load_from_store
            end
            # If we still don't have it, have to load the item xml
            unless (@xml_with_items)
                # Got to make another request
                # give us up to 1000 item records!
                bibWithItemsRequestPath = self.hip_http_xml_path + "&view=items&ipp=1000"
        
        
                resp = Hip3::HTTPSession.safe_get(httpSession, bibWithItemsRequestPath)
                @xml_with_items = Hpricot.XML( resp.body )
            end
        
            return @xml_with_items
        end
        
        def bib_xml
            unless (@bib_xml)
                summaryRequestPath = hip_http_xml_path
        
                resp = Hip3::HTTPSession.safe_get(httpSession, summaryRequestPath )
                @bib_xml = Hpricot.XML( resp.body )                        
            end
            
            return @bib_xml
        end



        def marc_xml
            # Sadly, loading this takes ANOTHER request. At least this
            # request, unlike the HIP requests, is speedy. We need this
      # for getting 856 urls with sub-fields. Depends on Casey
      # Durfee's package to provide marcxml from hip being installed. 
            unless (@marc_xml)
                # should have copies or items, not both
                path = "/mods/?format=marcxml&bib=#{bibNum}"
        #host = hip_base_url.host
        host = hip_base_url.host
        port = hip_base_url.port

        # If it's https, rejigger it to be http, that's just the way we roll.      
        port = 80 if port == 443;
        
        # put in a rescue on bad http, reword error. 
        resp = Hip3::HTTPSession.start(host, port) {|http| http.get(path) }
        
        
                
                reader = MARC::XMLReader.new( StringIO.new(resp.body.to_s) )
                # there should only be one record in there, just grap one. Since
        # this is an odd object, we can't just do reader[0], but instead:        
                xml = reader.find { true }
        
        # HIP marc doesn't have the bibID in it. We want the bibID in there.
        # Set it ourselves.
        xml.fields.push( MARC::ControlField.new('001', bibNum()))
        
                @marc_xml = xml 
            end
            return @marc_xml
        end
            
        # Load _serial_ Item data from HIP belonging to this BIB. Called lazily by accessors.
        # Will also work for mono item data, but no reason to call it, can get mono item data
        # without this extra fetch. 
        def load_items_from_store
                    
            itemRowElements =  xml_with_items.search('searchresponse/items/searchresults/results/row');            
            itemRowElements.each do | el |
                # constructor will take care of registering the thing with us
                Hip3::Item.new(el, self)
            end
            # Tell all our copies they're loaded, so they won't try and load again
            copies.each { |c| c.items_loaded = true }
            
        end
    
        def item_field_lookup
            
            # This guy loads our lookup obj
            unless @item_field_lookup
                xml = self.xml_with_items
                itemLabels = xml.search('searchresponse/items/searchresults/header/col/label').collect {|e| e.inner_text}                
                @item_field_lookup = CustomFieldLookup.new(itemLabels)
            end
            
            return @item_field_lookup
        end
        
        # Lookup custom HIP admin assigned fields in copy summary record
        # => CustomFieldLookup object. 
        def copy_field_lookup
            unless @copy_field_lookup
                labels = self.bib_xml.search('searchresponse/subscriptionsummary/header/col/label').collect {|e| e.inner_text}
                @copy_field_lookup = CustomFieldLookup.new(labels)
            end
            
            return @copy_field_lookup
        end

        def bib_field_lookup
            unless ( @bib_field_lookup)
                labels = self.bib_xml.search('searchresponse/fullnonmarc/header/col/label').collect {|e| e.inner_text}
                @bib_field_lookup = CustomFieldLookup.new(labels)
            end
            return @bib_field_lookup
        end

        # Look up a field added in HIP Admin screen, by label given in that screen.
        def custom_field_for_label(label)
            dataRows = self.bib_xml.search('searchresponse/fullnonmarc/results/row[1]/cell');
            return bib_field_lookup.text_value_for(dataRows, label)
        end

        # copy records, in case of serial bib
        def copies
            self.load_from_store unless @copies_loaded
            return @copies || [] # never return nil, always return an array
        end
        
        # Items directly held by this bib, only in case of mono bib
        def items
            self.load_from_store unless @items_loaded
            
            return @items || []
        end
        
        # Returns items OR copies, depending on whether it's a serial
        # or a mono bib. HIP doesn't generally allow a mixture of of both.
        def holdings
            return items + copies
        end

    def title
      unless (@title)
        # title may already have been lazily loaded, or loaded by the
        # bibsearcher. Otherwise, we need to extract it from the bib_xml, 
        # which is kind of a pain.

        # first find the index number of the title.
        labels = bib_xml().search('//fullnonmarc/searchresults/header/col/label')
        title_index = nil
        (0..(labels.length-1)).each do |i|
          if (labels[i].inner_text == @title_label)
            title_index = i
            break
          end
        end

        if title_index
          raw_title = bib_xml().at("//fullnonmarc/searchresults/results/row/cell[#{title_index + 1}]/data/text").inner_text
          # remove possible author on there, after a '/' char. That's how HIP rolls. 
          @title = raw_title.sub(/\/.*$/, '')
        else
          @title = 'unknown'
        end
        
      end
      
    
      return @title
    end
        
        # We could try to store the copies in a hash for efficiency, but
        # we're only going to have a handful, it's not worth it. 
        def copy_for_id(copy_id)
            copies.find { |c|  c.id == copy_id }
        end
                
        
        # Will add this to an appropriate Copy record if one exists,
        # otherwise directly to our items array. According to HIP's
        # constriants, a bib should never have copies _and_ directly
        # included items, just one or the other. 
        def register_item(item)            
            copy = copy_for_id( item.copy_id )
            if ( copy.nil? )
                # can't find a copy record, guess it's a directly registered item
                register_direct_item( item )
            else
                copy.register_item( item )
            end
            
        end
        
        # An item that does not have a copy parent, but is directly
        # related to the big. 
        def register_direct_item( item )
            @items ||= []
            @items.push( item ) if ! @items.include?(item)    
        end

        def http_url
        
            return "#{hip_base_url.scheme}://#{hip_base_url.host}:#{hip_base_url.port}#{hip_http_path}"
        end
        
        def to_s
            return "#<Hip3::Bib bibnum=#{bibNum} #{http_url}>"
        end
    end
    
                    
end