QutBioacoustics/baw-audio-tools

View on GitHub
lib/baw-audio-tools/audio_ffmpeg.rb

Summary

Maintainability
D
1 day
Test Coverage
module BawAudioTools
  class AudioFfmpeg

    ERROR_FRAME_SIZE_1 = "Could not find codec parameters for stream [0-9]+ \\(Audio\\: [a-zA-Z0-9]+\\, [0-9]+ channels\\, [a-zA-Z0-9]+\\)\\: unspecified frame size"
    ERROR_FRAME_SIZE_2 = "Failed to read frame size: Could not seek to [0-9]+\\."
    ERROR_END_OF_FILE = 'End of file'

    # @param [String] ffmpeg_executable
    # @param [String] ffprobe_executable
    # @param [Logger] logger
    # @param [String] temp_dir
    def initialize(ffmpeg_executable, ffprobe_executable, logger, temp_dir)
      @ffmpeg_executable = ffmpeg_executable
      @ffprobe_executable = ffprobe_executable
      @logger = logger
      @temp_dir = temp_dir

      @class_name = self.class.name
    end

    public

    def info_command(source)
      "#{@ffprobe_executable} -sexagesimal -print_format default -show_error -show_streams -show_format \"#{source}\""
    end

    def integrity_command(source)
      "#{@ffmpeg_executable} -loglevel repeat+verbose -nostdin -i \"#{source}\" -codec copy -f null -"
    end

    def modify_command(source, source_info, target, start_offset = nil, end_offset = nil, channel = nil, sample_rate = nil)
      fail Exceptions::FileNotFoundError, "Source does not exist: #{source}" unless File.exists? source
      fail Exceptions::FileAlreadyExistsError, "Target exists: #{target}" if File.exists? target
      fail ArgumentError, "Source and Target are the same file: #{target}" if source == target

      cmd_offsets = arg_offsets(start_offset, end_offset)
      cmd_sample_rate = arg_sample_rate(sample_rate)

      if sample_rate.blank? && source_info.include?(:sample_rate)
        cmd_sample_rate = arg_sample_rate(source_info[:sample_rate])
      else
        cmd_sample_rate = arg_sample_rate(sample_rate)
      end

      cmd_channel = arg_channel(channel)
      codec_info = codec_calc(target)

      audio_cmd = "#{@ffmpeg_executable} -i \"#{source}\" #{cmd_offsets} #{cmd_sample_rate} #{cmd_channel} #{codec_info[:codec]} \"#{codec_info[:target]}\""
      cmd = ''

      if codec_info[:target] == codec_info[:old_target]
        cmd = audio_cmd
      else
        partial_cmd = "\"#{codec_info[:target]}\" \"#{codec_info[:old_target]}\""
        separator_move = OS.windows? ? '&& move' : '; mv'
        cmd = "#{audio_cmd} #{separator_move} #{partial_cmd}"
      end

      cmd
    end

    def check_for_errors(execute_msg)
      #stdout = execute_msg[:stdout]
      stderr = execute_msg[:stderr]

      if !stderr.blank? && /#{ERROR_FRAME_SIZE_1}/.match(stderr)
        fail Exceptions::FileCorruptError, "Ffmpeg could not get frame size (msg type 1).\n\t#{execute_msg[:execute_msg]}"
      end
      if !stderr.blank? && /#{ERROR_FRAME_SIZE_2}/.match(stderr)
        fail Exceptions::FileCorruptError, "Ffmpeg could not get frame size (msg type 2).\n\t#{execute_msg[:execute_msg]}"
      end
      if !stderr.blank? && /#{ERROR_END_OF_FILE}/i.match(stderr)
        fail Exceptions::FileCorruptError, "Ffmpeg encountered unexpected end of file.\n\t#{execute_msg[:execute_msg]}"
      end
    end

    def check_integrity_output(execute_msg)
      #stdout = execute_msg[:stdout]
      stderr = execute_msg[:stderr]

      result = {
          errors: [],
          warnings: [],
          info: {
              read: {
                  packets: 0, bytes: 0, frames: 0, samples: 0
              },
              write: {
                  packets: 0, bytes: 0, frames: 0, samples: 0
              }
          }
      }

      return result if stderr.blank?

      stderr.each_line do |line|
        match_result = /\A\[(.+?) @ 0x.+?\] (.*)/.match(line)
        item = nil
        item = check_integrity_item({id: match_result[1], description: match_result[2]}) unless match_result.blank?
        result[:warnings].push(item) unless item.blank?

        read_packets_match = /Total: (\d+) packets \((\d+) bytes\) demuxed/.match(line)
        unless read_packets_match.blank?
          result[:info][:read][:packets] = read_packets_match[1]
          result[:info][:read][:bytes] = read_packets_match[2]
        end

        read_frames_match = /(\d+) frames decoded \((\d+) samples\);/.match(line)
        unless read_frames_match.blank?
          result[:info][:read][:frames] = read_frames_match[1]
          result[:info][:read][:samples] = read_frames_match[2]
        end

        write_packets_match = /Total: (\d+) packets \((\d+) bytes\) muxed/.match(line)
        unless write_packets_match.blank?
          result[:info][:write][:packets] = write_packets_match[1]
          result[:info][:write][:bytes] = write_packets_match[2]
        end

        write_frames_match = /(\d+) frames encoded \((\d+) samples\);/.match(line)
        unless write_frames_match.blank?
          result[:info][:write][:frames] = write_frames_match[1]
          result[:info][:write][:samples] = write_frames_match[2]
        end
      end

      result
    end

    def check_integrity_item(hash)
      return nil if hash.blank?

      return nil if hash[:id].starts_with?('graph')
      return nil if hash[:id].starts_with?('audio format')
      return nil if hash[:id].starts_with?('auto-inserted')

      return nil if hash[:description].starts_with?('parser not found for')
      return nil if hash[:description].starts_with?('max_analyze_duration')
      return nil if hash[:description].starts_with?('Application provided invalid, non monotonically increasing dts to muxer in stream')

      hash
    end

    def find_remove_warning(mod_stderr, match_regex)
      match_info = mod_stderr.match(match_regex)
      mod_stderr = mod_stderr.gsub(match_regex, '')

      unless match_info.blank?
        @logger.warn(@class_name) {
          "Found and removed '#{match_info}' in ffmpeg output."
        }
      end

      mod_stderr
    end

    def has_regex?(string, regex)
      !!(string =~ regex)
    end

    # returns the duration in seconds (and fractions if present)
    def parse_duration(duration_string)
      duration_match = /(?<hour>\d+):(?<minute>\d+):(?<second>[\d+\.]+)/i.match(duration_string)
      duration = 0
      if !duration_match.nil? && duration_match.size == 4
        duration = (duration_match[:hour].to_f * 60 * 60) + (duration_match[:minute].to_f * 60) + duration_match[:second].to_f
      end
      duration
    end

    def parse_ffprobe_output(source, execute_msg)
      # ffprobe std err contains info (separate on first equals(=))

      result = {}
      ffprobe_current_block_name = ''
      execute_msg[:stdout].strip.split(/\r?\n|\r/).each do |line|
        line.strip!
        if line[0] == '['
          # this chomp reverse stuff is due to the lack of a proper 'trim'
          ffprobe_current_block_name = line.chomp(']').reverse.chomp('[').reverse
        else
          current_key = line[0, line.index('=')].strip
          current_value = line[line.index('=')+1, line.length].strip
          result[ffprobe_current_block_name + ' ' + current_key] = current_value
        end
      end

      unless File.exists?(source)
        fail Exceptions::AudioFileNotFoundError, "Could not locate #{source}\n\t#{execute_msg[:execute_msg]}"
      end

      actual_stream_codec_type = result['STREAM codec_type']
      expected_stream_codec_type = 'audio'
      if actual_stream_codec_type != expected_stream_codec_type
        msg = "Not an audio file #{source} ('#{actual_stream_codec_type}' is not '#{expected_stream_codec_type}'): #{result.to_json}\n\t#{execute_msg[:execute_msg]}"
        fail Exceptions::NotAnAudioFileError, msg
      end

      result
    end

    def arg_channel(channel)
      cmd_arg = ''
      unless channel.blank?
        channel_number = channel.to_i
        if channel_number < 1
          # mix down to mono
          cmd_arg = ' -ac 1 '
        else
          # select the channel (0 index based)
          cmd_arg = " -map_channel 0.0.#{channel_number - 1} "
        end
      end
      cmd_arg
    end

    def arg_sample_rate(sample_rate)
      cmd_arg = ''
      unless sample_rate.blank?
        # -ar Set the audio sampling frequency (default = 44100 Hz).
        # -ab Set the audio bitrate in bit/s (default = 64k).
        cmd_arg = " -ar #{sample_rate} "
      end
      cmd_arg
    end

    def arg_offsets(start_offset, end_offset)
      cmd_arg = ''

      # start offset
      # -ss Seek to given time position in seconds. hh:mm:ss[.xxx] syntax is also supported.
      unless start_offset.blank?
        start_offset_formatted = Time.at(start_offset.to_f).utc.strftime('%H:%M:%S.%3N')
        cmd_arg = " -ss #{start_offset_formatted}"
      end

      # end offset
      # -t Restrict the transcoded/captured video sequence to the duration specified in seconds. hh:mm:ss[.xxx] syntax is also supported.
      unless end_offset.blank?
        #end_offset_formatted = Time.at(modify_parameters[:end_offset]).utc.strftime('%H:%M:%S.%3N')
        end_offset_raw = end_offset.to_f
        end_offset_time = Time.at(end_offset_raw).utc
        if start_offset.blank?
          # if start offset was not included, include audio from the start of the file.
          cmd_arg += " -t #{end_offset_time.strftime('%H:%M:%S.%3N')}"
        else
          start_offset_raw = start_offset.to_f
          #start_offset_time = Time.at(start_offset_raw).utc
          cmd_arg += " -t #{Time.at(end_offset_raw - start_offset_raw).utc.strftime('%H:%M:%S.%3N')}"
        end
      end

      cmd_arg
    end

    def codec_calc(target)

      # high quality codec settings
      # https://trac.ffmpeg.org/wiki/GuidelinesHighQualityAudio

      # http://trac.ffmpeg.org/wiki/TheoraVorbisEncodingGuide
      # http://en.wikipedia.org/wiki/Vorbis#Technical_details
      # http://wiki.hydrogenaudio.org/index.php?title=Recommended_Ogg_Vorbis#Recommended_Encoder_Settings
      # -aq 6 will be approx 192kbit/s
      codec_high_vorbis = 'libvorbis -aq 6'

      # pcm signed 16-bit little endian - compatible with CDDA
      codec_high_wav = 'pcm_s16le'

      # http://lame.cvs.sourceforge.net/viewvc/lame/lame/USAGE
      # 0 = slowest algorithms, but potentially highest quality
      # 9 = faster algorithms, very poor quality
      # http://trac.ffmpeg.org/wiki/Encoding%20VBR%20(Variable%20Bit%20Rate)%20mp3%20audio
      # -aq 2 recommended, but still cuts off at 10khz at 22.05khz
      # using CBR: -b:a 192k
      codec_high_mp3 = 'libmp3lame -aq 0'

      codec_high_wavpack = 'wavpack'

      # The flac reference implementation may not keep the .wav metadata
      # https://xiph.org/flac/faq.html#general__no_wave_metadata
      # The flag --keep-foreign-metadata can be included to store the WAVE non-audio data as well
      # Note that when a flac file is decoded to WAVE/AIFF the flag must be used again to keep metadata
      # The ffmpeg implementation of flac encoding / decoding can handle some metadata formats, this is sufficient until
      # there is something concrete that requires using additional ffmpeg flags or using the reference flac implementation
      # instead.
      # The relevant ffmpeg flag is `-map_metadata`. "By default, global metadata is copied from the first input file,
      # per-stream and per-chapter metadata is copied along with streams/chapters."
      codec_high_flac = 'flac'

      # output file. extension used to determine filetype.
      old_target = target

      # set the right codec if we know it
      extension = File.extname(target).upcase!.reverse.chomp('.').reverse
      case extension
        when 'WAV'
          codec = codec_high_wav
        when 'MP3'
          codec = codec_high_mp3
        when 'OGG'
          codec = codec_high_vorbis
        when 'OGA'
          codec = codec_high_vorbis
          target = target.chomp(File.extname(target))+'.ogg'
        when 'WEBM'
          codec = codec_high_vorbis
        when 'WEBMA'
          codec = codec_high_vorbis
          target = target.chomp(File.extname(target))+'.webm'
        when 'WV'
          codec = codec_high_wavpack
        when 'FLAC'
          codec = codec_high_flac
        else
          # don't specify codec for any other extension
          # Alternative: Use the 'copy' special value to specify that the raw codec data must be copied as is.
          codec = ''
      end

      # -acodec Force audio codec to codec.
      {
          codec: codec.blank? ? '' : " -acodec #{codec}",
          target: target,
          old_target: old_target
      }

    end

    # mime type to ffmpeg string identifier conversions
    def get_mime_type(ffmpeg_info)

      #[:info][:ffmpeg]['STREAM codec_type']+'/'+file_info[:info][:ffmpeg]['STREAM codec_name']

      case ffmpeg_info['FORMAT format_long_name']
        when 'WAV / WAVE (Waveform Audio)'
          # :codec_name => 'pcm_s16le',
          # :codec_long_name => 'PCM signed 16-bit little-endian',
          'audio/wav'
        when 'MP2/3 (MPEG audio layer 2/3)', 'MP3 (MPEG audio layer 3)'
          # :codec_name => 'mp3',
          # :codec_long_name => 'MP3 (MPEG audio layer 3)',
          'audio/mp3'
        when 'Matroska / WebM', 'WebM'
          # :codec_name => 'vorbis',
          # :codec_long_name => 'Vorbis',
          # :format_name => 'matroska,webm',
          'audio/webm'
        when 'Ogg'
          # :codec_name => 'vorbis',
          # :codec_long_name => 'Vorbis',
          'audio/ogg'
        when 'ASF (Advanced / Active Streaming Format)'
          # :codec_name => 'wmav2',
          # :codec_long_name => 'Windows Media Audio 2',
          'audio/asf'
        when 'WavPack', 'raw WavPack'
          # :codec_name => 'wavpack',
          # :codec_long_name => 'WavPack',
          'audio/wavpack'
        when 'QuickTime / MOV', 'MP4 (MPEG-4 Part 14)', 'PSP MP4 (MPEG-4 Part 14)', 'iPod H.264 MP4 (MPEG-4 Part 14)'
          # codec_name=alac
          # codec_long_name=ALAC (Apple Lossless Audio Codec)
          # format_name=mov,mp4,m4a,3gp,3g2,mj2
          'audio/mp4'
        when 'AAC (Advanced Audio Coding)', 'AAC LATM (Advanced Audio Coding LATM syntax)',
            'ADTS AAC (Advanced Audio Coding)', 'raw ADTS AAC (Advanced Audio Coding)'
          # codec_name=aac
          # codec_long_name=AAC (Advanced Audio Coding)
          # format_name=aac
          'audio/aac'
        when 'FLAC (Free Lossless Audio Codec)', 'raw FLAC', 'flac'
          # codec_name=flac
          # codec_long_name=raw FLAC
          # format_name=flac
          'audio/x-flac'
        else
          'application/octet-stream'
      end
    end

  end
end