app/models/import_dataset/darwin_core/occurrences.rb
class ImportDataset::DarwinCore::Occurrences < ImportDataset::DarwinCore
is_origin_for Person::Unvetted.to_s
has_many :core_records, foreign_key: 'import_dataset_id', class_name: 'DatasetRecord::DarwinCore::Occurrence'
has_many :extension_records, foreign_key: 'import_dataset_id', class_name: 'DatasetRecord::DarwinCore::Extension'
# TODO: Can occurrenceID requirement be dropped? Should other fields be added here?
MINIMUM_FIELD_SET = ["occurrenceID", "scientificName", "basisOfRecord"]
validate :source, :check_field_set
def core_records_class
DatasetRecord::DarwinCore::Occurrence
end
def core_records_identifier_name
'occurrenceID'
end
def get_event_id_namespace
id = metadata.dig("namespaces", "eventID")
if id.nil? || (@event_id_identifier_namespace ||= Namespace.find_by(id: id)).nil?
random = SecureRandom.hex(4)
project_name = Project.find(Current.project_id).name
namespace_name = "eventID namespace for \"#{description}\" dataset in \"#{project_name}\" project [#{random}]"
@event_id_identifier_namespace = Namespace.create!(
name: namespace_name,
short_name: "eventID-#{random}",
verbatim_short_name: "eventID",
delimiter: ':'
)
metadata["namespaces"]["eventID"] = @event_id_identifier_namespace.id
save!
end
@event_id_identifier_namespace
end
# Stages core (Occurrence) records and all extension records.
def perform_staging
records, headers = get_records(source.path)
update!(metadata:
metadata.merge({
core_headers: headers[:core],
extensions_headers: headers[:extensions],
catalog_numbers_namespaces: []
})
)
core_records = records[:core].map do |record|
{
src_data: record,
basisOfRecord: record["basisOfRecord"]
}
end
catalog_numbers_namespaces = Set[]
catalog_numbers_collection_code_namespaces = Set[]
core_records.each do |record|
dwc_occurrence = DatasetRecord::DarwinCore::Occurrence.new(import_dataset: self)
dwc_occurrence.initialize_data_fields(record[:src_data].map { |k, v| v })
catalog_numbers_namespaces << [
[
dwc_occurrence.get_field_value(:institutionCode),
dwc_occurrence.get_field_value(:collectionCode)
],
nil # User will select namespace through UI. TODO: Should we attempt guessing here?
]
catalog_numbers_collection_code_namespaces << [dwc_occurrence.get_field_value(:collectionCode), nil]
if dwc_occurrence.get_field_value(:catalogNumber).blank?
dwc_occurrence.status = "Ready"
else
dwc_occurrence.status = "NotReady"
record["error_data"] = { messages: { catalogNumber: ["Record cannot be imported until namespace is set."] } }
end
record.delete(:src_data)
dwc_occurrence.metadata = record
dwc_occurrence.save!
end
records[:extensions].each do |extension_type, records|
records.each do |record|
dwc_extension = DatasetRecord::DarwinCore::Extension.new(import_dataset: self)
dwc_extension.initialize_data_fields(record.map { |k, v| v })
dwc_extension.status = "Unsupported"
dwc_extension.metadata = { "type" => extension_type }
dwc_extension.save!
end
end
self.metadata.merge!(
catalog_numbers_namespaces: catalog_numbers_namespaces.sort { |a, b| a[0].map(&:to_s) <=> b[0].map(&:to_s) }
)
self.metadata.merge!(
catalog_numbers_collection_code_namespaces: catalog_numbers_collection_code_namespaces.sort { |a, b| a[0].to_s <=> b[0].to_s }
)
save!
end
def get_catalog_number_namespace(institution_code, collection_code)
get_catalog_number_namespace_mapping(institution_code, collection_code)&.at(1) ||
get_catalog_number_collection_code_namespace_mapping(collection_code)&.at(1)
end
def update_catalog_number_namespace(institution_code, collection_code, namespace_id)
transaction do
mapping = get_catalog_number_namespace_mapping(institution_code, collection_code)
mapping[1] = namespace_id
ready = namespace_id.to_i > 0
save!
query = ready ? core_records.where(status: 'NotReady') : core_records.where.not(status: ['NotReady', 'Imported', 'Unsupported'])
# TODO: Add scopes/methods in DatasetRecord to handle nil fields values transparently
unless institution_code.nil?
query = query.where(
id: core_records_fields.at(get_field_mapping(:institutionCode)).having_value(institution_code).select(:dataset_record_id)
)
else
query = query.where.not(
id: core_records_fields.at(get_field_mapping(:institutionCode)).select(:dataset_record_id)
)
end
unless collection_code.nil?
query = query.where(
id: core_records_fields.at(get_field_mapping(:collectionCode)).having_value(collection_code).select(:dataset_record_id)
)
else
query = query.where.not(
id: core_records_fields.at(get_field_mapping(:collectionCode)).select(:dataset_record_id)
)
end
query.update_all(ready ?
"status = 'Ready', metadata = metadata - 'error_data'" :
"status = 'NotReady', metadata = jsonb_set(metadata, '{error_data}', '{ \"messages\": { \"catalogNumber\": [\"Record cannot be imported until namespace is set, see \\\"Settings\\\".\"] } }')"
)
end
end
def update_catalog_number_collection_code_namespace(collection_code, namespace_id)
return if collection_code.nil? # No support for mapping blank data at this time
transaction do
mapping = get_catalog_number_collection_code_namespace_mapping(collection_code)
mapping[1] = namespace_id
ready = namespace_id.to_i > 0
save!
query = ready ? core_records.where(status: 'NotReady') : core_records.where.not(status: ['NotReady', 'Imported', 'Unsupported'])
if ready
query.where(
id: core_records_fields.at(get_field_mapping(:collectionCode)).having_value(collection_code).select(:dataset_record_id)
).update_all(
"status = 'Ready', metadata = metadata - 'error_data'"
)
else
institution_codes = self.metadata["catalog_numbers_namespaces"]&.select { |m| m[0][1] == collection_code && m[1] }&.map { |m| m[0][0] } || []
query.where(
id: core_records_fields.at(get_field_mapping(:collectionCode)).having_value(collection_code).select(:dataset_record_id)
).where.not(
id: core_records_fields.at(get_field_mapping(:institutionCode)).having_values(institution_codes).select(:dataset_record_id)
).update_all(
"status = 'NotReady', metadata = jsonb_set(metadata, '{error_data}', '{ \"messages\": { \"catalogNumber\": [\"Record cannot be imported until namespace is set, see \\\"Settings\\\".\"] } }')"
)
end
end
end
def add_catalog_number_namespace(institution_code, collection_code, namespace_id = nil)
unless get_catalog_number_namespace_mapping(institution_code, collection_code)
self.metadata["catalog_numbers_namespaces"] << [[institution_code, collection_code], namespace_id]
self.metadata["catalog_numbers_namespaces"].sort! { |a, b| a[0].map(&:to_s) <=> b[0].map(&:to_s) }
end
save!
end
def add_catalog_number_collection_code_namespace(collection_code, namespace_id = nil)
unless collection_code.nil? || get_catalog_number_collection_code_namespace_mapping(collection_code)
self.metadata["catalog_numbers_collection_code_namespaces"] << [collection_code, namespace_id]
self.metadata["catalog_numbers_collection_code_namespaces"].sort! { |a, b| a[0].to_s <=> b[0].to_s }
end
save!
end
def containerize_dup_cat_no?
!!self.metadata.dig("import_settings", "containerize_dup_cat_no")
end
def restrict_to_existing_nomenclature?
!!self.metadata.dig("import_settings", "restrict_to_existing_nomenclature")
end
def require_type_material_success?
!!self.metadata.dig("import_settings", "require_type_material_success")
end
def require_tripcode_match_verbatim?
!!self.metadata.dig("import_settings", "require_tripcode_match_verbatim")
end
def require_catalog_number_match_verbatim?
!!self.metadata.dig("import_settings", "require_catalog_number_match_verbatim")
end
def enable_organization_determiners?
!!self.metadata.dig("import_settings", "enable_organization_determiners")
end
def enable_organization_determiners_alt_name?
!!self.metadata.dig("import_settings", "enable_organization_determiners_alt_name")
end
private
def get_catalog_number_namespace_mapping(institution_code, collection_code)
self.metadata["catalog_numbers_namespaces"]&.detect { |m| m[0] == [institution_code, collection_code] }
end
def get_catalog_number_collection_code_namespace_mapping(collection_code)
self.metadata["catalog_numbers_collection_code_namespaces"]&.detect { |m| m[0] == collection_code }
end
end