QutBioacoustics/baw-workers

View on GitHub
lib/baw-workers/audio_check/work_helper.rb

Summary

Maintainability
D
3 days
Test Coverage
module BawWorkers
  module AudioCheck
    class WorkHelper

      def initialize(logger, file_info, api_comm)
        @logger = logger
        @file_info = file_info
        @api_communicator = api_comm

        @class_name = self.class.name
      end

      # Check existing files and modify the file name and/or details via api if necessary.
      # @param [Hash] audio_params
      # @param [Boolean] is_real_run
      # @return [Array<Hash>] array of hashes representing operations performed
      def run(audio_params, is_real_run)
        # validate params
        audio_params_sym = BawWorkers::AudioCheck::WorkHelper.validate(audio_params)

        if is_real_run
          @logger.info(@class_name) { 'Starting...' }
        else
          @logger.warn(@class_name) { 'Starting dry run...' }
        end

        # ensure :recorded_date is an ActiveSupport::TimeWithZone object
        if audio_params_sym[:recorded_date].end_with?('Z')
          audio_params_sym[:recorded_date] = Time.zone.parse(audio_params_sym[:recorded_date])
        else
          fail ArgumentError, ":recorded_date must be a UTC time (i.e. end with Z), given #{audio_params_sym[:recorded_date]}"
        end

        # get the original possible and existing paths, and new and old file names
        original_paths = original_paths(audio_params_sym)

        # HIGH LEVEL PROBLEM: do any audio files exist?
        check_exists(original_paths, audio_params_sym)

        # now check the comparisons for each existing file. Any failures will be logged and fixed if possible.
        result = []
        original_paths[:existing].each do |existing_file|

          # fix all other issues before renaming file
          single_result = run_single(existing_file, audio_params_sym, is_real_run)

          # LOW LEVEL PROBLEM: rename old file names to new file names
          file_move_info = rename_file(existing_file, original_paths[:name_utc], is_real_run)

          # calculate review level
          good_api_results = [:dry_run, :notrequired, :success]
          attribute_change_success = good_api_results.include?(single_result[:api_result])

          was_file_moved = file_move_info[:moved]
          attributes_changed = single_result[:api_result_hash].size > 0

          review_level = :none_all_good

          if was_file_moved && !attributes_changed && attribute_change_success
            review_level = :low_file_moved
          elsif was_file_moved && attributes_changed && attribute_change_success
            review_level = :low_file_moved_and_attributes_updated
          elsif was_file_moved && attributes_changed && !attribute_change_success
            review_level = :medium_file_moved_and_failed_updating_attributes
          elsif !was_file_moved && attributes_changed && !attribute_change_success
            review_level = :medium_failed_updating_attributes
          end

          # record new file location
          result_hash =
              {
                  file_path: existing_file,
                  exists: true,
                  moved_path: file_move_info[:moved] ? file_move_info[:new_file] : nil,
                  compare_hash: single_result[:compare_hash],
                  api_result_hash: single_result[:api_result_hash],
                  api_response: single_result[:api_result],
                  review_level: review_level
              }

          result.push(result_hash)

          # create csv info line
          log_csv_line(
              result_hash[:file_path],
              result_hash[:exists],
              result_hash[:moved_path],
              result_hash[:compare_hash],
              result_hash[:api_result_hash],
              result_hash[:api_response],
              result_hash[:review_level],
              audio_params_sym[:id]
          )
        end

        @logger.info(@class_name) { '...finished.' }

        result
      end

      # Check an existing file and modify the file name and/or details on website if necessary.
      # @param [String] existing_file
      # @param [Hash] audio_params
      # @param [Boolean] is_real_run
      # @return [Hash] comparison and api results
      def run_single(existing_file, audio_params, is_real_run)
        # get existing file info and comparisons between expected and actual
        existing_file_info = @file_info.audio_info(existing_file)

        @logger.debug(@class_name) {
          "Actual file info: #{existing_file_info}"
        }

        compare_hash = compare_info(existing_file, existing_file_info, audio_params)

        base_msg = "for #{compare_hash}"

        @logger.info(@class_name) {
          "Compared expected and actual info #{base_msg}"
        }

        # MID LEVEL PROBLEM: is the file valid?
        # usually will not log 'File integrity uncertain', since the info check will raise an error
        # for most things that would present as 'File integrity uncertain'.
        check_file_integrity = compare_hash[:checks][:file_errors] == :pass
        if check_file_integrity
          @logger.debug(@class_name) {
            "File integrity ok #{base_msg}"
          }
        else
          msg = "File integrity uncertain #{base_msg}"
          @logger.warn(@class_name) { msg }
        end


        # MID LEVEL PROBLEM: extensions do not match
        # (this is impossible, since if the extension/media_type doesn't match,
        # can't find the file in the first place)
        check_extension = compare_hash[:checks][:extension] == :pass
        if check_extension
          @logger.debug(@class_name) {
            "File extensions match #{base_msg}"
          }
        else
          msg = "File extensions do not match #{base_msg}"
          @logger.warn(@class_name) { msg }
        end

        # HIGH LEVEL PROBLEM: do the hashes match?
        # if the hash from params is 'SHA256::' then first check all other checks pass
        # then update it.
        check_file_hash = compare_hash[:checks][:file_hash] == :pass
        is_expected_file_hash_default = compare_hash[:expected][:file_hash] == 'SHA256::'
        if check_file_hash
          @logger.debug(@class_name) {
            "File hashes match #{base_msg}"
          }

        elsif is_expected_file_hash_default
          # do nothing here - raise error if something else doesn't match
        else
          msg = "File hashes DO NOT match #{base_msg}"

          # log error
          @logger.error(@class_name) { msg }

          # write row of csv into log file
          log_csv_line(
              existing_file, true, nil,
              compare_hash, nil, nil,
              :high_file_hashes_do_not_match,
              audio_params[:id])

          fail BawAudioTools::Exceptions::FileCorruptError, msg
        end

        changed_metadata = {}

        # LOW LEVEL PROBLEM: media type, sample_rate, channels, bit_rate, data_length_bytes, duration_seconds
        check_media_type = compare_hash[:checks][:media_type] == :pass
        changed_metadata[:media_type] = compare_hash[:actual][:media_type] unless check_media_type

        check_sample_rate = compare_hash[:checks][:sample_rate_hertz] == :pass
        changed_metadata[:sample_rate_hertz] = compare_hash[:actual][:sample_rate_hertz] unless check_sample_rate

        check_channels = compare_hash[:checks][:channels] == :pass
        changed_metadata[:channels] = compare_hash[:actual][:channels] unless check_channels

        check_bit_rate_bps = compare_hash[:checks][:bit_rate_bps] == :pass
        changed_metadata[:bit_rate_bps] = compare_hash[:actual][:bit_rate_bps] unless check_bit_rate_bps

        check_data_length_bytes = compare_hash[:checks][:data_length_bytes] == :pass
        changed_metadata[:data_length_bytes] = compare_hash[:actual][:data_length_bytes] unless check_data_length_bytes

        check_duration_seconds = compare_hash[:checks][:duration_seconds] == :pass
        changed_metadata[:duration_seconds] = compare_hash[:actual][:duration_seconds] unless check_duration_seconds

        # check on file hash - if everything else matches, update it. if anything else doesn't
        # match, raise an error
        if is_expected_file_hash_default
          if changed_metadata.size > 0
            msg = "File hash and other properties DO NOT match #{changed_metadata} #{base_msg}"

            # log error
            @logger.error(@class_name) { msg }

            # write row of csv into log file
            log_csv_line(existing_file, true, nil,
                         compare_hash, nil, nil,
                         :medium_multiple_properties_do_not_match,
                         audio_params[:id])

            fail BawAudioTools::Exceptions::FileCorruptError, msg
          else
            changed_metadata[:file_hash] = compare_hash[:actual][:file_hash]
          end
        end

        # use api for any changes/updates for low level problems
        update_result = nil
        if changed_metadata.size > 0

          msg = "Update required #{changed_metadata} #{base_msg}"
          @logger.warn(@class_name) { msg }

          if is_real_run
            @logger.info(@class_name) { 'Updating properties.' }
            host = BawWorkers::Settings.api.host
            port = BawWorkers::Settings.api.port

            # get auth token
            security_info = @api_communicator.request_login

            # update audio recording metadata
            update_result = @api_communicator.update_audio_recording_details(
                'mismatch between file and database',
                existing_file,
                audio_params[:id],
                changed_metadata,
                security_info
            )
          else
            @logger.info(@class_name) { 'Dry Run: Would have updated properties.' }
          end
        else
          @logger.info(@class_name) {
            "No updates required #{base_msg}"
          }
        end

        api_result_value = :unknown
        api_result_value = :notrequired if changed_metadata.size < 1
        api_result_value = :dry_run if changed_metadata.size > 0 && !is_real_run
        api_result_value = :sent_with_unknown_response if changed_metadata.size > 0 && is_real_run
        api_result_value = update_result ? :success : :error unless update_result.nil?

        {
            compare_hash: compare_hash,
            api_result_hash: changed_metadata,
            api_result: api_result_value
        }
      end

      # Validate audio params hash
      # @param [Hash] audio_params
      # @return [Hash] audio params hash with keys converted to symbols
      def self.validate(audio_params)
        props = [:id, :uuid, :recorded_date,
                 :duration_seconds, :sample_rate_hertz, :channels,
                 :bit_rate_bps, :media_type, :data_length_bytes,
                 :file_hash, :original_format]

        BawWorkers::Validation.check_hash(audio_params)
        audio_params_sym = BawWorkers::Validation.deep_symbolize_keys(audio_params)

        props.each do |prop|
          fail ArgumentError, "Audio params must include #{prop}." unless audio_params_sym.include?(prop)
        end

        audio_params_sym
      end

      private

      # Get expected and actual file paths.
      # @param [Hash] audio_params
      # @return [Hash] info about possible and existing files.
      def original_paths(audio_params)
        original_audio = BawWorkers::Config.original_audio_helper

        modify_parameters = {
            uuid: audio_params[:uuid],
            datetime_with_offset: audio_params[:recorded_date],
            original_format: audio_params[:original_format],
        }

        source_existing_paths = original_audio.existing_paths(modify_parameters)
        source_possible_paths = original_audio.possible_paths(modify_parameters)

        name_old = original_audio.file_name_10(modify_parameters)
        name_utc = original_audio.file_name_utc(modify_parameters)

        {
            possible: source_possible_paths.map { |path| File.expand_path(path) },
            existing: source_existing_paths.map { |path| File.expand_path(path) },
            name_utc: name_utc,
            name_old: name_old
        }
      end

      # Compare expected and actual audio file information.
      # @param [String] existing_file
      # @param [Hash] existing_file_info
      # @param [Hash] audio_params
      # @return [Hash] information about comparison between expected and actual audio file info.
      def compare_info(existing_file, existing_file_info, audio_params)
        correct = :pass
        wrong = :fail

        bit_rate_bps_delta = 10000 # due to difference for asf files of 1300-2000 bps (set large to catch only very different bit rates)
        duration_seconds_delta = 0.200 # 200 ms due to estimates of duration for mp3 files

        file_hash = existing_file_info[:file_hash].to_s == audio_params[:file_hash].to_s ? correct : wrong
        extension = existing_file_info[:extension].to_s == audio_params[:original_format].to_s ? correct : wrong
        media_type = Mime::Type.lookup(existing_file_info[:media_type]) == Mime::Type.lookup(audio_params[:media_type]) ? correct : wrong

        sample_rate_hertz = existing_file_info[:sample_rate_hertz].to_i == audio_params[:sample_rate_hertz].to_i ? correct : wrong
        channels = existing_file_info[:channels].to_i == audio_params[:channels].to_i ? correct : wrong
        data_length_bytes = existing_file_info[:data_length_bytes].to_i == audio_params[:data_length_bytes].to_i ? correct : wrong

        bit_rate_bps = (existing_file_info[:bit_rate_bps].to_i - audio_params[:bit_rate_bps].to_i).abs <= bit_rate_bps_delta ? correct : wrong
        duration_seconds = (existing_file_info[:duration_seconds].to_f - audio_params[:duration_seconds].to_f).abs <= duration_seconds_delta ? correct : wrong

        file_errors = existing_file_info[:errors].size < 1 ? correct : wrong
        new_file_name = File.basename(existing_file, File.extname(existing_file)).end_with?('Z') ? correct : wrong

        {
            actual: existing_file_info,
            expected: audio_params,
            checks: {
                file_hash: file_hash,
                extension: extension,
                media_type: media_type,
                sample_rate_hertz: sample_rate_hertz,
                channels: channels,
                bit_rate_bps: bit_rate_bps,
                data_length_bytes: data_length_bytes,
                duration_seconds: duration_seconds,
                file_errors: file_errors,
                new_file_name: new_file_name
            },
            bit_rate_bps_delta: bit_rate_bps_delta,
            duration_seconds_delta: duration_seconds_delta
        }
      end

      # Check that at least one original file exists.
      # @param [Hash] original_paths
      # @param [Hash] audio_params
      # @return [void]
      def check_exists(original_paths, audio_params)
        check_file_exists = original_paths[:existing].size > 0

        if check_file_exists
          @logger.debug(@class_name) {
            "Existing files #{original_paths} given #{audio_params}"
          }
        else
          msg = "No existing files for #{original_paths} given #{audio_params}"

          # log error
          @logger.error(@class_name) { msg }

          # write row of csv into log file
          log_csv_line(original_paths[:possible][0], false, nil,
                       nil, nil, nil,
                       :high_original_file_does_not_exist,
                       audio_params[:id])

          fail BawAudioTools::Exceptions::FileNotFoundError, msg
        end
      end

      # create and log a single line of CSV from
      # source file, expected vs actual comparisons, api request & response.
      # @param [String] file_path
      # @param [Boolean] exists
      # @param [String] moved_path
      # @param [Hash] compare_hash
      # @param [Hash] api_result_hash
      # @param [Symbol] review_level
      # @return [void]
      def log_csv_line(file_path, exists, moved_path = nil,
                       compare_hash = nil, api_result_hash = nil,
                       api_response = nil, review_level = :none_all_good,
                       audio_recording_id)

        logged_csv_line = BawWorkers::AudioCheck::CsvHelper.logged_csv_line(
            file_path, exists, moved_path,
            compare_hash, api_result_hash,
            api_response, review_level,
            audio_recording_id)

        # write to csv
        csv_options = {col_sep: ',', force_quotes: true}

        csv_header_line = logged_csv_line[:headers].to_csv(csv_options).strip
        @logger.fatal(@class_name) { "[CSV-header], #{csv_header_line}" }

        csv_value_line = logged_csv_line[:values].to_csv(csv_options).strip
        @logger.fatal(@class_name) { "[CSV-data], #{csv_value_line}" }
      end

      # Rename file with old file name to new file name.
      # @param [String] existing_file
      # @param [String] file_name_utc
      # @param [Boolean] is_real_run
      # @return [Hash] action applied to existing file
      def rename_file(existing_file, file_name_utc, is_real_run)

        # create all needed information
        existing_path = existing_file
        existing_name = File.basename(existing_path)
        existing_name_without_ext = File.basename(existing_path, File.extname(existing_path))
        existing_dir = File.dirname(existing_path)
        existing_is_new = existing_name_without_ext.end_with?('Z')

        new_name = file_name_utc
        new_path = File.join(existing_dir, new_name)
        new_name_without_ext = File.basename(new_name, File.extname(new_name))
        new_dir = existing_dir

        # check each possible situation
        if existing_is_new && File.exist?(new_path)
          # existing file is already new format, nothing to change
          {
              new_file: existing_path,
              moved: false
          }
        elsif !existing_is_new && File.exist?(new_path) && File.exist?(existing_path)
          # both new and old formats exist, do nothing

          @logger.info(@class_name) {
            "Found equivalent old and new file names, no action performed. Old: #{existing_path} New: #{new_path}."
          }

          {
              new_file: new_path,
              moved: false
          }
        else
          # file is in old format, file in new format does not exist

          @logger.info(@class_name) { "Moving #{existing_path} to #{new_path}." }  if is_real_run
          FileUtils.move(existing_path, new_path) if is_real_run

          @logger.info(@class_name) { "Dry Run: Would have moved #{existing_path} to #{new_path}." } unless is_real_run

          {
              new_file: new_path,
              moved: true
          }

        end

      end

    end
  end
end