sul-dlss/moab-versioning

View on GitHub
lib/stanford/storage_services.rb

Summary

Maintainability
A
0 mins
Test Coverage
A
100%
# frozen_string_literal: true

module Stanford
  # An interface class to support access to SDR storage via a RESTful server
  class StorageServices < Moab::StorageServices
    # @note After some discussion, consensus was that this is a thread safe use of a
    # class variable, as 1) it's never mutated after the class is initialized, and 2) the
    # value of the StorageRepository instance is determined from configuration that
    # rarely changes and is loaded once at app start time (at least in Stanford's
    # consumers; see Moab::Config.configure calls in preservation_robots, preservation_catalog,
    # and technical-metadata-service).
    # Sidekiq requires thread safe code, so please preserve thread safety for multiple
    # concurrent callers of this service if refactoring, so Sidekiq remains an option for
    # ActiveJob backend for moab-versioning consumers.
    @@repository = Stanford::StorageRepository.new

    # @return [StorageRepository] an instance of the interface to SDR storage
    def self.repository
      @@repository
    end

    # NOTE: `ShelvingService.content_diff` in dor-services-app implements similar logic, but in a way that
    # uses XML retrieved via preservation-client instead of reading the XML from disk.  This allows DSA to
    # perform the potentially time expensive diff without requiring access to preservation disk mounts.
    # See https://github.com/sul-dlss/dor-services-app/pull/4492 and https://github.com/sul-dlss/dor-services-app/issues/4359
    # @param new_content_metadata [String] The content metadata to be compared to the base
    # @param object_id [String] The digital object identifier of the object whose version inventory is the basis of the
    #  comparison
    # @param subset [String] Speciifes which subset of files to list in the inventories extracted from the
    #  contentMetadata (all|preserve|publish|shelve)
    # @param base_version [Integer] The ID of the version whose inventory is the basis of, if nil use latest version
    # @return [FileInventoryDifference] The report of differences between the content metadata and the specified version
    def self.compare_cm_to_version(new_content_metadata, object_id, subset, base_version = nil)
      new_inventory = Stanford::ContentInventory.new.inventory_from_cm(new_content_metadata, object_id, subset)
      begin
        # ObjectNotFoundException is raised if the object does not exist in storage
        base_version ||= current_version(object_id)
        # FileNotFoundException is raised if object exists but has no contentMetadata file
        base_cm_pathname = retrieve_file('metadata', 'contentMetadata.xml', object_id, base_version)
        base_inventory = Stanford::ContentInventory.new.inventory_from_cm(base_cm_pathname.read, object_id, subset, base_version)
      rescue Moab::ObjectNotFoundException, Moab::FileNotFoundException
        # Create a skeletal FileInventory object, containing no file entries
        storage_object = Moab::StorageObject.new(object_id, 'dummy')
        base_version = Moab::StorageObjectVersion.new(storage_object, 0)
        base_inventory = base_version.file_inventory('version')
      end
      diff = Moab::FileInventoryDifference.new.compare(base_inventory, new_inventory)
      metadata_diff = diff.group_difference('metadata')
      diff.group_differences.delete(metadata_diff) if metadata_diff
      diff
    end

    # @param new_content_metadata [String] The content metadata to be compared to the current signtature catalog
    # @param object_id [String] The digital object identifier of the object whose signature catalog is to be used
    # @param version_id [Integer] The ID of the version whose signature catalog is to be used, if nil use latest version
    # @return [FileInventory] The versionAddtions report showing which files are new or modified in the content metadata
    def self.cm_version_additions(new_content_metadata, object_id, version_id = nil)
      new_inventory = Stanford::ContentInventory.new.inventory_from_cm(new_content_metadata, object_id, 'preserve')
      begin
        # ObjectNotFoundException is raised if the object does not exist in storage
        version_id ||= current_version(object_id)
        storage_object_version = repository.storage_object(object_id).find_object_version(version_id)
        signature_catalog = storage_object_version.signature_catalog
      rescue Moab::ObjectNotFoundException
        storage_object = Moab::StorageObject.new(object_id, 'dummy')
        base_version = Moab::StorageObjectVersion.new(storage_object, 0)
        signature_catalog = base_version.signature_catalog
      end
      signature_catalog.version_additions(new_inventory)
    end

    # @param object_id [String] The digital object identifier of the object whose contentMetadata is to be remediated
    # @param version_id [Integer] The ID of the version whose file data is to be used, if nil use latest version
    # @return [String] Returns a remediated copy of the contentMetadata with fixity data filled in
    def self.cm_remediate(object_id, version_id = nil)
      cm = retrieve_file('metadata', 'contentMetadata.xml', object_id, version_id)
      group = retrieve_file_group('content', object_id, version_id)
      Stanford::ContentInventory.new.remediate_content_metadata(cm, group)
    end
  end
end