lib/moab/storage_object_version.rb
# frozen_string_literal: true
module Moab
# A class to represent a version subdirectory within an object's home directory in preservation storage
# ====Data Model
# * {StorageRepository} = represents a digital object repository storage node
# * {StorageServices} = supports application layer access to the repository's objects, data, and metadata
# * {StorageObject} = represents a digital object's repository storage location and ingest/dissemination methods
# * <b>{StorageObjectVersion} [1..*] = represents a version subdirectory within an object's home directory</b>
# * {Bagger} [1] = utility for creating bagit packages for ingest or dissemination
#
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
# All rights reserved. See {file:LICENSE.rdoc} for details.
class StorageObjectVersion
# @return [Integer] The ordinal version number
attr_accessor :version_id
# @return [String] The "v0001" directory name derived from the version id
attr_accessor :version_name
# @return [Pathname] The location of the version inside the home directory
attr_accessor :version_pathname
# @return [Pathname] The location of the object's home directory
attr_accessor :storage_object
# @return [Hash<FileInventory>] Cached copies of versionInventory, versionAdditions, or manifestInventory
attr_accessor :inventory_cache
# @param storage_object [StorageObject] The object representing the digital object's storage location
# @param version_id [Integer,String] The ordinal version number or a string like 'v0003'
def initialize(storage_object, version_id)
if version_id.is_a?(Integer)
@version_id = version_id
elsif version_id.is_a?(String) && version_id =~ /^v(\d+)$/
@version_id = version_id.sub(/^v/, '').to_i
else
raise(MoabRuntimeError, "version_id (#{version_id}) is not in a recognized format")
end
@version_name = StorageObject.version_dirname(@version_id)
@version_pathname = storage_object.object_pathname.join(@version_name)
@storage_object = storage_object
@inventory_cache = {}
end
# @return [String] The unique identifier concatenating digital object id with version id
def composite_key
"#{@storage_object.digital_object_id}-#{StorageObject.version_dirname(@version_id)}"
end
# @return [Boolean] true if the object version directory exists
def exist?
@version_pathname.exist?
end
# @param [String] file_category The category of file ('content', 'metadata', or 'manifest'))
# @param [String] file_id The name of the file (path relative to base directory)
# @return [FileSignature] signature of the specified file
def find_signature(file_category, file_id)
if file_category =~ /manifest/
file_inventory('manifests').file_signature('manifests', file_id)
else
file_inventory('version').file_signature(file_category, file_id)
end
end
# @param [String] file_category The category of file ('content', 'metadata', or 'manifest')
# @param [String] file_id The name of the file (path relative to base directory)
# @return [Pathname] Pathname object containing the full path for the specified file
def find_filepath(file_category, file_id)
this_version_filepath = file_pathname(file_category, file_id)
return this_version_filepath if this_version_filepath.exist?
if file_category == 'manifest'
msg = "manifest file #{file_id} not found for #{@storage_object.digital_object_id} - #{@version_id}"
raise FileNotFoundException, msg
end
file_signature = file_inventory('version').file_signature(file_category, file_id)
catalog_filepath = signature_catalog.catalog_filepath(file_signature)
@storage_object.storage_filepath(catalog_filepath)
end
# @param _file_category (unused; kept here for backwards compatibility)
# @param [FileSignature] file_signature The signature of the file
# @return [Pathname] Pathname object containing the full path for the specified file
def find_filepath_using_signature(_file_category, file_signature)
catalog_filepath = signature_catalog.catalog_filepath(file_signature)
@storage_object.storage_filepath(catalog_filepath)
end
# @param [String] file_category The category of file ('content', 'metadata', or 's')
# @param [String] file_id The name of the file (path relative to base directory)
# @return [Pathname] Pathname object containing this version's storage path for the specified file
def file_pathname(file_category, file_id)
file_category_pathname(file_category).join(file_id)
end
# @param [String] file_category The category of file ('content', 'metadata', or 's')
# @return [Pathname] Pathname object containing this version's storage home for the specified file category
def file_category_pathname(file_category)
if file_category =~ /manifest/
@version_pathname.join('manifests')
else
@version_pathname.join('data', file_category)
end
end
# @api external
# @param type [String] The type of inventory to return (version|additions|manifests)
# @return [FileInventory] The file inventory of the specified type for this version
# @see FileInventory#read_xml_file
def file_inventory(type)
if version_id > 0
return @inventory_cache[type] if @inventory_cache.key?(type)
@inventory_cache[type] = FileInventory.read_xml_file(@version_pathname.join('manifests'), type)
else
groups = %w[content metadata].collect { |id| FileGroup.new(group_id: id) }
FileInventory.new(
type: 'version',
digital_object_id: @storage_object.digital_object_id,
version_id: @version_id,
groups: groups
)
end
end
# @api external
# @return [SignatureCatalog] The signature catalog of the digital object as of this version
def signature_catalog
@signature_catalog ||= if version_id > 0
SignatureCatalog.read_xml_file(@version_pathname.join('manifests'))
else
SignatureCatalog.new(digital_object_id: @storage_object.digital_object_id)
end
end
# @api internal
# @param bag_dir [Pathname,String] The location of the bag to be ingested
# @param use_links [Boolean] If true, use hard links; if false, make copies
# @return [void] Create the version subdirectory and move files into it
def ingest_bag_data(bag_dir, use_links: true)
raise(MoabRuntimeError, "Version already exists: #{@version_pathname}") if @version_pathname.exist?
@version_pathname.join('manifests').mkpath
bag_dir = Pathname(bag_dir)
ingest_dir(bag_dir.join('data'), @version_pathname.join('data'), use_links)
ingest_file(bag_dir.join(FileInventory.xml_filename('version')), @version_pathname.join('manifests'), use_links)
ingest_file(bag_dir.join(FileInventory.xml_filename('additions')), @version_pathname.join('manifests'), use_links)
end
# @api internal
# @param source_dir [Pathname] The source location of the directory whose contents are to be ingested
# @param target_dir [Pathname] The target location of the directory into which files are ingested
# @param use_links [Boolean] If true, use hard links; if false, make copies
# @return [void] recursively link or copy the source directory contents to the target directory
def ingest_dir(source_dir, target_dir, use_links = true)
raise(MoabRuntimeError, "cannot copy - target already exists: #{target_dir.expand_path}") if target_dir.exist?
target_dir.mkpath
source_dir.children.each do |child|
if child.directory?
ingest_dir(child, target_dir.join(child.basename), use_links)
else
ingest_file(child, target_dir, use_links)
end
end
end
# @api internal
# @param source_file [Pathname] The source location of the file to be ingested
# @param target_dir [Pathname] The location of the directory in which to place the file
# @param use_links [Boolean] If true, use hard links; if false, make copies
# @return [void] link or copy the specified file from source location to the version directory
def ingest_file(source_file, target_dir, use_links = true)
if use_links
FileUtils.link(source_file.to_s, target_dir.to_s)
else
FileUtils.copy(source_file.to_s, target_dir.to_s)
end
end
# @api internal
# @param signature_catalog [SignatureCatalog] The current version's catalog
# @param new_inventory [FileInventory] The new version's inventory
# @return [void] Updates the catalog to include newly added files, then saves it to disk
# @see SignatureCatalog#update
def update_catalog(signature_catalog, new_inventory)
signature_catalog.update(new_inventory, @version_pathname.join('data'))
signature_catalog.write_xml_file(@version_pathname.join('manifests'))
end
# @api internal
# @param old_inventory [FileInventory] The old version's inventory
# @param new_inventory [FileInventory] The new version's inventory
# @return [void] generate a file inventory differences report and save to disk
def generate_differences_report(old_inventory, new_inventory)
differences = FileInventoryDifference.new.compare(old_inventory, new_inventory)
differences.write_xml_file(@version_pathname.join('manifests'))
end
# @api internal
# @return [void] examine the version's directory and create/serialize a {FileInventory} containing the manifest files
def generate_manifest_inventory
manifest_inventory = FileInventory.new(
type: 'manifests',
digital_object_id: @storage_object.digital_object_id,
version_id: @version_id
)
pathname = @version_pathname.join('manifests')
manifest_inventory.groups << FileGroup.new(group_id: 'manifests').group_from_directory(pathname, false)
manifest_inventory.write_xml_file(pathname)
end
# @return [VerificationResult] return result of testing correctness of version manifests
def verify_version_storage
result = VerificationResult.new(composite_key)
result.subentities << verify_manifest_inventory
result.subentities << verify_version_inventory
result.subentities << verify_version_additions
result.verified = result.subentities.all?(&:verified)
result
end
# @return [VerificationResult] return true if the manifest inventory matches the actual files
def verify_manifest_inventory
# read/parse manifestInventory.xml
result = VerificationResult.new('manifest_inventory')
manifest_inventory = file_inventory('manifests')
result.subentities << VerificationResult.verify_value('composite_key', composite_key, manifest_inventory.composite_key)
result.subentities << VerificationResult.verify_truth('manifests_group', !manifest_inventory.group_empty?('manifests'))
# measure the manifest signatures of the files in the directory (excluding manifestInventory.xml)
directory_inventory = FileInventory.new.inventory_from_directory(@version_pathname.join('manifests'), 'manifests')
directory_inventory.digital_object_id = storage_object.digital_object_id
directory_group = directory_inventory.group('manifests')
directory_group.remove_file_having_path('manifestInventory.xml')
# compare the measured signatures against the values in manifestInventory.xml
diff = FileInventoryDifference.new
diff.compare(manifest_inventory, directory_inventory)
compare_result = VerificationResult.new('file_differences')
compare_result.verified = (diff.difference_count == 0)
compare_result.details = diff.differences_detail
result.subentities << compare_result
result.verified = result.subentities.all?(&:verified)
result
end
# @return [VerificationResult]
def verify_signature_catalog
result = VerificationResult.new('signature_catalog')
signature_catalog = self.signature_catalog
result.subentities << VerificationResult.verify_value('signature_key', composite_key, signature_catalog.composite_key)
found = 0
missing = []
object_pathname = storage_object.object_pathname
signature_catalog.entries.each do |catalog_entry|
storage_location = object_pathname.join(catalog_entry.storage_path)
if storage_location.exist?
found += 1
else
missing << storage_location.to_s
end
end
file_result = VerificationResult.new('storage_location')
file_result.verified = (found == signature_catalog.file_count)
file_result.details = {
'expected' => signature_catalog.file_count,
'found' => found
}
file_result.details['missing'] = missing unless missing.empty?
result.subentities << file_result
result.verified = result.subentities.all?(&:verified)
result
end
# @return [Boolean] true if files & signatures listed in version inventory can all be found
def verify_version_inventory
result = VerificationResult.new('version_inventory')
version_inventory = file_inventory('version')
result.subentities << VerificationResult.verify_value('inventory_key', composite_key, version_inventory.composite_key)
signature_catalog = self.signature_catalog
result.subentities << VerificationResult.verify_value('signature_key', composite_key, signature_catalog.composite_key)
found = 0
missing = []
version_inventory.groups.each do |group|
group.files.each do |file|
file.instances.each do |instance|
relative_path = File.join(group.group_id, instance.path)
catalog_entry = signature_catalog.signature_hash[file.signature]
if catalog_entry.nil?
missing << relative_path.to_s
else
found += 1
end
end
end
end
file_result = VerificationResult.new('catalog_entry')
file_result.verified = (found == version_inventory.file_count)
file_result.details = {
'expected' => version_inventory.file_count,
'found' => found
}
file_result.details['missing'] = missing unless missing.empty?
result.subentities << file_result
result.verified = result.subentities.all?(&:verified)
result
end
# @return [Boolean] returns true if files in data folder match files listed in version addtions inventory
def verify_version_additions
result = VerificationResult.new('version_additions')
version_additions = file_inventory('additions')
result.subentities << VerificationResult.verify_value('composite_key', composite_key, version_additions.composite_key)
data_directory = @version_pathname.join('data')
directory_inventory = FileInventory.new(type: 'directory').inventory_from_directory(data_directory)
diff = FileInventoryDifference.new
diff.compare(version_additions, directory_inventory)
compare_result = VerificationResult.new('file_differences')
compare_result.verified = (diff.difference_count == 0)
compare_result.details = diff.differences_detail
result.subentities << compare_result
result.verified = result.subentities.all?(&:verified)
result
end
# @param timestamp [Time] The time at which the deactivation was initiated. Used to name the inactive directory
# @return [null] Deactivate this object version by moving it to another directory. (Used by restore operation)
def deactivate(timestamp)
return unless @version_pathname.exist?
timestamp_pathname = @version_pathname.parent.join(timestamp.utc.iso8601.gsub(/[-:]/, ''))
timestamp_pathname.mkpath
demote_pathame = timestamp_pathname.join(@version_pathname.basename)
@version_pathname.rename(demote_pathame)
end
end
end