archivesspace/archivesspace

View on GitHub
backend/app/lib/bulk_import/import_archival_objects.rb

Summary

Maintainability
F
5 days
Test Coverage
require_relative "bulk_import_parser"

class ImportArchivalObjects < BulkImportParser
  START_MARKER = /ArchivesSpace field code/.freeze

  def initialize(input_file, content_type, current_user, opts, log_method = nil)
    super(input_file, content_type, current_user, opts, log_method)
    @first_level_aos = []
    @archival_levels = CvList.new("archival_record_level", @current_user)
    @container_types = CvList.new("container_type", @current_user)
    @date_types = CvList.new("date_type", @current_user)
    @date_labels = CvList.new("date_label", @current_user)
    @date_certainty = CvList.new("date_certainty", @current_user)
    @extent_types = CvList.new("extent_extent_type", @current_user)
    @extent_portions = CvList.new("extent_portion", @current_user)
    @instance_types ||= CvList.new("instance_instance_type", @current_user)
    @parents = ParentTracker.new
    @start_marker = START_MARKER
  end

  def initialize_handler_enums
    @cih = ContainerInstanceHandler.new(@current_user, @validate_only)
    @doh = DigitalObjectHandler.new(@current_user, @validate_only)
    @sh = SubjectHandler.new(@current_user, @validate_only)
    @ah = AgentHandler.new(@current_user, @validate_only)
    @lh = LangHandler.new(@current_user) # doesn't need validation
  end

  # look for all the required fields to make sure they are legit
  # strip all the strings and turn publish and restrictions_flaginto true/false
  def check_row
    err_arr = []
    begin
      # we'll check hierarchical level first, in case there was a parent that didn't get created
      hier = @row_hash["hierarchy"]
      if !hier
        err_arr.push I18n.t("bulk_import.error.hier_miss")
      else
        hier = hier.to_i
        # we bail if the parent wasn't created!
        return I18n.t("bulk_import.error.hier_below_error_level") if (@error_level && hier > @error_level)
        err_arr.push I18n.t("bulk_import.error.hier_zero") if hier < 1
        # going from a 1 to a 3, for example
        if (hier - 1) > @hier
          err_arr.push I18n.t("bulk_import.error.hier_wrong")
          if @hier == 0
            if @validate_only
              err_arr.push I18n.t("bulk_import.error.hier_wrong_resource_validation")
              @hier = 1
            else
              err_arr.push I18n.t("bulk_import.error.hier_wrong_resource")
              raise StopBulkImportException.new(err_arr.join(";"))
            end
          end
        end
        @hier = hier
      end
      missing_title = @row_hash["title"].nil?
      #date stuff: if already missing the title, we have to make sure the date label is valid
      missing_date = [@row_hash["begin"], @row_hash["end"], @row_hash["expression"]].reject(&:nil?).empty?
      if !missing_date
        begin
          label = @date_labels.value((@row_hash["dates_label"] || "creation"))
        rescue Exception => e
          err_arr.push I18n.t("bulk_import.error.invalid_date_label", :what => e.message) if missing_title
          missing_date = true
        end
      end
      err_arr.push I18n.t("bulk_import.error.title_and_date") if (missing_title && missing_date)
      # tree hierachy
      level = value_check(@archival_levels, @row_hash["level"], err_arr)
      err_arr.push I18n.t("bulk_import.error.level") if level.nil?
    rescue StopBulkImportException => se
      raise se
    rescue Exception => e
      Log.error(["UNEXPLAINED EXCEPTION on check row", e.message, e.backtrace, @row_hash].pretty_inspect)
    end
    if err_arr.empty? || @validate_only
      @row_hash.each do |k, v|
        @row_hash[k] = v.strip if !v.nil?
        if k == "publish" || k == "restrictions_flag"
          @row_hash[k] = (v == "1")
        end
      end
    end
    err_arr.join("; ")
  end

  def process_row(row_hash = nil)
    ao = nil
    ret_str = ""
    begin
      resource_match(@resource, @row_hash["ead"], @row_hash["res_uri"])
    rescue Exception => e
      ret_str = e.message
    end
    # mismatch of resource stops all other processing
    if ret_str.empty?
      ret_str = check_row
    end
    if !ret_str.empty?
      if @validate_only
        @report.add_errors(ret_str)
      else
        raise BulkImportException.new(I18n.t("bulk_import.row_error", :row => @counter, :errs => ret_str))
      end
    end
    parent_uri = @parents.parent_for(@row_hash["hierarchy"].to_i)
    begin
      ao = create_archival_object(parent_uri)
      ao = ao_save(ao)
    rescue JSONModel::ValidationException => ve
      # ao won't have been created
      msg = I18n.t("bulk_import.error.second_save_error", :what => ve.errors, :title => ao.title, :pos => ao.position)
      @report.add_errors(msg)
    rescue Exception => e
      Log.error("UNEXPECTED ON SECOND SAVE#{e.message}")
      Log.error(e.backtrace.pretty_inspect)
      Log.error(ASUtils.jsonmodels_to_hashes(ao).pretty_inspect)
      raise BulkImportException.new(e.message)
    end
    if !ao.nil?
      fail_test = (ao.title.nil? && ao.dates.empty?) || ao.level.nil?
      ao.uri = nil if fail_test
      @report.add_archival_object(ao) if !ao.nil?
    end

    @parents.set_uri(@hier, ao.uri)
    @created_refs << ao.uri if ao.uri && !@validate_only
    if @hier == 1
      @first_level_aos.push ao.uri
      if @first_one && @start_position
        @need_to_move = (ao.position - @start_position) > 1 if !@validate_only
        @first_one = false
      end
    end
  end

  def log_row(row)
    create_key = @validate_only ? "bulk_import.log_created_be" : "bulk_import.log_created"
    not_create_key = @validate_only ? "bulk_import.error.no_ao_be" : "bulk_import.error.no_ao"
    obj_key = @validate_only ? "bulk_import.log_obj_be" : "bulk_import.log_obj"
    if row.archival_object_id.nil?
      @log_method.call(I18n.t("bulk_import.log_error", :row => row.row, :what => I18n.t(not_create_key)))
    else
      log_obj = I18n.t(obj_key, :what => I18n.t("bulk_import.ao"), :nm => row.archival_object_display, :id => row.archival_object_id, :ref_id => row.ref_id)
      @log_method.call(I18n.t(create_key, :row => row.row, :what => log_obj))
    end
    unless row.info.empty?
      row.info.each do |info|
        @log_method.call(I18n.t("bulk_import.log_info", :row => row.row, :what => info))
      end
    end
    unless row.errors.empty?
      row.errors.each do |err|
        @log_method.call(I18n.t("bulk_import.log_error", :row => row.row, :what => err))
      end
    end
  end

  private

  # create an archival_object
  def create_archival_object(parent_uri)
    errs = []
    ao = JSONModel(:archival_object).new._always_valid!
    ao.title = @row_hash["title"] if @row_hash["title"]
    ao.dates = create_dates
    #because the date may have been invalid, we should check if there's a title, otherwise bail
    if ao.title.nil? && ao.dates.empty?
      error_msg = I18n.t("bulk_import.error.title_and_date")
      if @validate_only
        @report.add_errors(error_msg) if !@report.in_errors(error_msg)
      else
        raise BulkImportException.new(error_msg)
      end
    end
    ao.level = value_check(@archival_levels, @row_hash["level"], errs) if @row_hash["level"]
    if !errs.empty?
      if @validate_only
        @report.add_errors(errs[0])
      else
        raise errs[0]
      end
    end
    ao.resource = { "ref" => @resource["uri"] }
    ao.ref_id = @row_hash['ref_id'] if @row_hash['ref_id']
    ao.component_id = @row_hash["unit_id"] if @row_hash["unit_id"]
    ao.repository_processing_note = @row_hash["processing_note"] if @row_hash["processing_note"]

    ao.other_level = @row_hash["other_level"] || "unspecified" if ao.level == "otherlevel"
    ao.publish = @row_hash["publish"]
    ao.restrictions_apply = @row_hash["restrictions_flag"]
    ao.parent = { "ref" => parent_uri } unless parent_uri.nil?
    # handle language issues
    langs = process_langs(ao.publish)
    ao.lang_materials = langs if !langs.empty?
    begin
      ao.extents = process_extents
    rescue Exception => e
      @report.add_errors(e.message)
    end
    errs = handle_notes(ao, @row_hash)
    @report.add_errors(errs) if !errs.empty?
    # we have to save the ao for the display_string
    begin
      ao = ao_save(ao)
    rescue Exception => e
      msg = I18n.t("bulk_import.error.initial_save_error", :title => ao.title, :msg => e.message)
      raise BulkImportException.new(msg)
    end

    ao.instances = create_top_container_instances
    dig_instance = nil
    unless [@row_hash["digital_object_title"], @row_hash["rep_file_uri"], @row_hash["nonrep_file_uri"],
            @row_hash["digital_object_id"]].reject(&:nil?).empty?

      begin
        normalize_boolean_column(@row_hash, 'digital_object_publish')
        normalize_boolean_column(@row_hash, 'nonrep_publish')
        dig_instance = @doh.create(
          @row_hash["digital_object_title"],
          @row_hash["digital_object_id"],
          @row_hash["digital_object_publish"],
          nil, # level
          nil, # digital_object_type
          nil, # restrictions
          [],  # dates
          [],  # notes
          [],  # extents
          [],  # subjects
          [],  # linked_agents
          ao,
          @report,
          representative_file_version,
          non_representative_file_version)
      rescue Exception => e
        @report.add_errors(e.message)
      end
      if dig_instance
        ao.instances ||= []
        ao.instances << dig_instance
      elsif @validate_only
        @report.add_errors(I18n.t("bulk_import.object_not_created_be", :what => I18n.t("bulk_import.dig")))
      else
        @report.add_errors(I18n.t("bulk_import.error.dig_validation", :err => ""))
      end

    end
    subjs = process_subjects
    subjs.each { |subj| ao.subjects.push({ "ref" => subj.uri }) } unless subjs.empty?
    links = process_agents
    ao.linked_agents = links
    ao
  end

  def create_dates
    dates = []
    cntr = 1
    substr = ""
    until [@row_hash["begin#{substr}"], @row_hash["end#{substr}"], @row_hash["expression#{substr}"]].reject(&:nil?).empty?
      date = create_date(@row_hash["dates_label#{substr}"], @row_hash["begin#{substr}"], @row_hash["end#{substr}"], @row_hash["date_type#{substr}"], @row_hash["expression#{substr}"], @row_hash["date_certainty#{substr}"])
      dates << date if date
      cntr += 1
      substr = "_#{cntr}"
    end
    return dates
  end

  def create_extent(substr)
    ext_str = "Extent: #{@row_hash["portion#{substr}"] || "whole"} #{@row_hash["number#{substr}"]} #{@row_hash["extent_type#{substr}"]} #{@row_hash["container_summary#{substr}"]} #{@row_hash["physical_details#{substr}"]} #{@row_hash["dimensions#{substr}"]}"
    errs = []
    portion = value_check(@extent_portions, (@row_hash["portion#{substr}"] || "whole"), errs)
    type = value_check(@extent_types, @row_hash["extent_type#{substr}"], errs)

    extent = { "portion" => portion,
               "extent_type" => type }
    %w(number container_summary physical_details dimensions).each do |w|
      extent[w] = @row_hash["#{w}#{substr}"] || nil
    end
    if errs.empty?
      begin
        ex = JSONModel(:extent).new(extent)
        return ex if test_exceptions(ex, "Extent")
      rescue Exception => e
        @report.add_errors(I18n.t("bulk_import.error.extent_validation", :msg => e.message, :ext => ext_str))
      end
    else
      @report.add_errors(I18n.t("bulk_import.error.extent_validation", :msg => errs.join(" ,"), :ext => ext_str))
    end
    return nil
  end

  def process_agents
    agent_links = []
    %w(people corporate_entities families).each do |type|
      num = 1
      while true
        id_key = "#{type}_agent_record_id_#{num}"
        header_key = "#{type}_agent_header_#{num}"
        break if @row_hash[id_key].nil? && @row_hash[header_key].nil?
        link = nil
        begin
          link = @ah.get_or_create(type, @row_hash[id_key], @row_hash[header_key],
                                   @row_hash["#{type}_agent_relator_#{num}"], @row_hash["#{type}_agent_role_#{num}"], @report)
          agent_links.push link if link && !@validate_only
        rescue BulkImportException => e
          @report.add_errors(I18n.t("bulk_import.error.process_error", :type => "#{type} Agent", :num => num, :why => e.message))
        end
        num += 1
      end
    end
    agent_links
  end

  def initialize_info
    super
    @ao = nil
    aoid = @opts[:aoid] || nil
    @resource_level = (aoid.nil? || aoid.strip.empty?)
    @first_one = false # to determine whether we need to worry about positioning
    if @resource_level
      @parents.set_uri(0, nil)
      @hier = 0
    else
      @ao = ArchivalObject.to_jsonmodel(Integer(aoid))
      @start_position = @ao.position
      parent = @ao.parent # we need this for sibling/child disabiguation later on
      @parents.set_uri(0, (parent ? ASUtils.jsonmodels_to_hashes(parent)["ref"] : nil))
      @parents.set_uri(1, @ao.uri)
      @first_one = true
    end
  end

  def create_top_container_instances
    instances = []
    cntr = 1
    substr = ""
    until @row_hash["cont_instance_type#{substr}"].nil? && @row_hash["type_1#{substr}"].nil? && @row_hash["barcode#{substr}"].nil?
      begin
        subcont = { "type_2" => @row_hash["type_2#{substr}"],
                    "indicator_2" => @row_hash["indicator_2#{substr}"],
                    "type_3" => @row_hash["type_3#{substr}"],
                    "indicator_3" => @row_hash["indicator_3#{substr}"] }

        instance = @cih.create_container_instance(@row_hash["cont_instance_type#{substr}"],
                                                  @row_hash["type_1#{substr}"], @row_hash["indicator_1#{substr}"], @row_hash["barcode#{substr}"], @resource["uri"], @report, subcont)
      rescue Exception => e
        @report.add_errors(I18n.t("bulk_import.error.no_container_instance", number: cntr.to_s, why: e.message))
        instance = nil
      end
      cntr += 1
      substr = "_#{cntr}"
      instances << instance if instance
    end
    return instances
  end

  def process_extents
    extents = []
    cntr = 1
    substr = ""
    until @row_hash["number#{substr}"].nil? && @row_hash["extent_type#{substr}"].nil?
      extent = create_extent(substr)
      extents << extent if extent
      cntr += 1
      substr = "_#{cntr}"
    end
    return extents
  end

  def process_langs(publish)
    langs = []
    cntr = 1
    substr = ""
    until @row_hash["l_lang#{substr}"].nil? && @row_hash["l_langscript#{substr}"].nil? && @row_hash["n_langmaterial#{substr}"].nil?
      pubnote = @row_hash["p_langmaterial#{substr}"]
      if pubnote.nil?
        pubnote = publish
      else
        pubnote = (pubnote == "1")
      end
      lang = @lh.create_language(@row_hash["l_lang#{substr}"], @row_hash["l_langscript#{substr}"], @row_hash["n_langmaterial#{substr}"], pubnote, @report)
      langs.concat(lang) if !lang.empty?
      @row_hash["n_langmaterial#{substr}"] = nil
      cntr += 1
      substr = "_#{cntr}"
    end
    return langs
  end

  def process_subjects
    ret_subjs = []
    repo_id = @repository.split("/")[2]
    (1..10).each do |num|
      unless @row_hash["subject_#{num}_record_id"].nil? && @row_hash["subject_#{num}_term"].nil?
        subj = nil
        begin
          subj = @sh.get_or_create(@row_hash["subject_#{num}_record_id"],
                                   @row_hash["subject_#{num}_term"], @row_hash["subject_#{num}_type"],
                                   @row_hash["subject_#{num}_source"], repo_id, @report)
          ret_subjs.push subj if subj
        rescue Exception => e
          @report.add_errors(I18n.t("bulk_import.error.process_error", :type => "Subject", :num => num, :why => e.message))
        end
      end
    end
    ret_subjs
  end

end