UNC-Libraries/hy-c

View on GitHub
app/services/tasks/ingest_service.rb

Summary

Maintainability
A
0 mins
Test Coverage
A
95%
# frozen_string_literal: true
module Tasks
  require 'tasks/migrate/services/progress_tracker'
  class IngestService
    attr_reader :temp, :admin_set, :depositor, :package_dir

    def initialize(config, status_service)
      logger.info("Beginning #{ingest_source} ingest")

      @config = config
      # Create temp directory for unzipped contents
      @temp = @config['unzip_dir']
      FileUtils.mkdir_p @temp unless File.exist?(@temp)

      @status_service = status_service

      # Should deposit works into an admin set
      admin_set_title = @config['admin_set']
      @admin_set = ::AdminSet.where(title: admin_set_title)&.first
      raise(ActiveRecord::RecordNotFound, "Could not find AdminSet with title #{admin_set_title}") unless @admin_set.present?

      @depositor = User.find_by(uid: @config['depositor_onyen'])
      raise(ActiveRecord::RecordNotFound, "Could not find User with onyen #{@config['depositor_onyen']}") unless @depositor.present?

      @package_dir = @config['package_dir']

      deposit_record
    end

    def deposit_record_hash
      @deposit_record_hash ||= { title: "#{ingest_source} Ingest #{Time.new.strftime('%F %T')}",
                                 deposit_method: "Hy-C #{BRANCH}, #{self.class}",
                                 deposit_package_type: deposit_package_type,
                                 deposit_package_subtype: deposit_package_subtype,
                                 deposited_by: @depositor.uid }
    end

    def deposit_record
      @deposit_record ||= begin
        record = DepositRecord.new(@deposit_record_hash)
        record[:manifest] = nil
        record[:premis] = nil
        record.save!

        record
      end
    end

    def orig_file_name(package_path)
      File.basename(package_path, '.zip')
    end

    def unzip_dir(package_path)
      fname = orig_file_name(package_path)
      dirname = "#{@temp}/#{fname}"
      FileUtils.mkdir_p(dirname) unless File.exist?(dirname)
      dirname
    end

    def process_all_packages
      process_packages(package_paths)
    end

    def process_packages(file_paths)
      logger.info("Beginning ingest of #{file_paths.count} #{ingest_source} packages")
      @status_service.initialize_statuses(file_paths)

      file_paths.each do |package_path|
        begin
          @status_service.status_in_progress(package_path)
          process_package(package_path)
          @status_service.status_complete(package_path)
        rescue => error
          stacktrace = error.backtrace.join('\n')
          logger.error("Failed to process package #{package_path}, #{error.message}: #{stacktrace}")
          @status_service.status_failed(package_path, error)
        end
      end
      logger.info("Completing ingest of #{ingest_source} packages.")
    end

    # the paths of all the packages in the ingest directory
    def package_paths
      # sort zip files for tests
      @package_paths ||= Dir.glob("#{@package_dir}/*.zip").sort
    end

    def count
      @count ||= package_paths.count
    end

    def logger
      @logger ||= begin
        log_path = File.join(Rails.configuration.log_directory, "#{ingest_source.downcase}_ingest.log")
        Logger.new(log_path, progname: "#{ingest_source} ingest")
      end
    end

    def ingest_progress_log
      @ingest_progress_log ||= begin
        log_path = File.join(Rails.configuration.log_directory, "#{ingest_source.downcase}_progress.log")
        Migrate::Services::ProgressTracker.new(log_path)
      end
    end

    def cleanup_enabled?
      @cleanup_enabled ||= ENV['CLEANUP_FTP_PACKAGES'].to_s.downcase == 'true'
    end

    def extract_files(package_path)
      dirname = unzip_dir(package_path)
      logger.info("Extracting files from #{package_path} to #{dirname}")
      extracted_files = Zip::File.open(package_path) do |zip_file|
        zip_file.each do |file|
          file_path = File.join(dirname, file.name)
          zip_file.extract(file, file_path) unless File.exist?(file_path)
        end
      end
      logger.error("Unexpected package contents - #{extracted_files.count} files extracted from #{package_path}") unless valid_extract?(extracted_files)
      extracted_files
    rescue Zip::Error => e
      logger.info("#{package_path}, zip file error: #{e.message}")
      raise
    end
  end
end