npolar/api.npolar.no

View on GitHub
lib/metadata/oai_dumb_couchdb_model.rb

Summary

Maintainability
C
7 hrs
Test Coverage
require "oai/provider"
require "date"
module Metadata
  # A dumb/unscalable CouchDB OAI-PMH provider model
  #
  # To avoid depending on views, and to get deleted documents, the provider depends
  # on CouchDB's _changes feed. However, this has no information on when the document was deleted. Oh my.
  # Likewise, to support sets and from/until queries (without views), the filtering
  # is done on the client side (meaning that all documents are retrieved first). Ouch.
  class OaiDumbCouchDbModel < ::OAI::Provider::Model

    attr_accessor :storage

    def initialize(storage)
      @storage = storage
    end

    def earliest
      DateTime.parse("2008-01-01T00:00:00Z").to_time.utc.xmlschema
    end

    def latest
      Date.parse("9999-12-31T23:59:59.999Z").to_time.utc.xmlschema
    end

    def timestamp_field
      "updated_time"
    end

    def sets
      sets = OaiDatasetProvider.oai_sets.map {|set|
        OAI::Set.new(set)
      }
      sets
    end

    # find is the core method of a model, it returns records from the model
    # bases on the parameters passed in.
    #
    # <tt>selector</tt> can be a singular id, or the symbol :all
    # <tt>options</tt> is a hash of options to be used to constrain the query.
    #
    # Valid options:
    # * :from => earliest timestamp to be included in the results
    # * :until => latest timestamp to be included in the results
    # * :set => the set from which to retrieve the results
    # * :metadata_prefix => type of metadata requested (this may be useful if
    #                       not all records are available in all formats)
    # [:all, {:metadata_prefix=>"oai_dc", :from=>-4712-01-01 00:43:00 +0043, :until=>-4712-01-01 00:43:00 +0043}]
    def find(selector, options={})

      if :all == selector

        storage.param = {"include_docs" => true}
        changes = JSON.parse(storage.get("_changes").body)["results"]

        docs = changes.reject {|change|
          change["id"] =~ /^_design\//
          }.map {|change|


          if change["deleted"] != true
             if change["doc"]["updated"].nil?
              raise change.to_json
            end

            updated = DateTime.parse(change["doc"]["updated"]).to_time.utc.xmlschema
            dataset = Metadata::Dataset.new(change["doc"])

            # Merge topics into sets
            unless dataset.sets?
              dataset.sets = []
            end
            
            if dataset.topics? and dataset.topics.any?
              dataset[:sets] = (dataset.sets+dataset.topics).uniq
            end

            # Only advertise known sets
            dataset[:sets] = (dataset.sets||[]).select {|set|
              OaiDatasetProvider.oai_sets.map {|o|
                o[:spec]}.include? set }.map {|set| { spec: set } }

          else
            updated = Time.now.utc.xmlschema
            dataset = Metadata::Dataset.new({"id" => change["id"], "deleted" => true, "updated" => updated})
          end

          dataset

        }

        # Exclude drafts
        docs = docs.select {|d|
          d.draft != "yes"
        }

        if options.key? :set
          set = options[:set]

          docs = docs.select {|d|
            sets = d.sets||[]
            zets = sets.map {|s|s[:spec]}
            zets.include? set
          }
        end

        if options.key? :from
          docs = docs.select {|d|
            updated = DateTime.parse(d["updated"]).to_time.utc
            updated >= options[:from].utc
          }
        end

        if options.key? :until
          docs = docs.select {|d|
            updated = DateTime.parse(d["updated"]).to_time.utc
            updated <= options[:until].utc
          }
        end

        docs

      else

        response = storage.get(selector)

        if 200 == response.code

          hash = JSON.parse(response.body)
          dataset = ::Metadata::Dataset.new(hash)
          dataset.sets = (dataset.sets||[]).map {|set|
            OAI::Set.new( {:spec => set, :name => ""})
          }
          dataset

        elsif 404 == response.code
          raise OAI::IdException
        else
          raise OAI::Exception.new("OAI provider crashed")
        end
      end
    end
  end

end