SpeciesFileGroup/taxonworks

View on GitHub
lib/tasks/batch_load/sqed_depictions.rake

Summary

Maintainability
Test Coverage
namespace :tw do
  namespace :batch_load do
    namespace :sqed_depiction do

      # rake tw:batch_load:sqed_depiction:preprocess total=10 
      desc 'preprocess the first <total> sqed depictions that do not have both OCR and boundaries populated in cache, does not care what project the data are in'
      task preprocess: [:environment] do |t|
        if ENV['total'] 
          total = ENV['total'].to_i
        else
          total = 10 
        end 

        i = 0

        puts Rainbow('Processing empty sqed depictions').yellow

        while i < total
          print "\r #{i}"
          break if SqedDepiction.preprocess_empty(1) != 1
          i += 1
        end
        puts

        puts Rainbow("Processed #{i} records.").yellow
      end


      # Import Sqed formatted images.
      # By default it assumes :cross pattern and layout.  If you want to provide anything else
      # you should provide both a `pattern` and a `layout`
      #
      # Basic format: 
      #   rake tw:batch_load:sqed_depiction:import total=1 data_directory=/Users/matt/Desktop/images/ project_id=1 user_id=1 
      # Extended format:
      #   rake tw:batch_load:sqed_depiction:import total=1 layout=cross metadata_map="{"0": "curator_metadata", "1": "identifier", "2": "image_registration", "3": "annotated_specimen"}" boundary_finder='Sqed::BoundaryFinder::ColorLineFinder' data_directory=/Users/matt/Desktop/images/ preprocess_result=false project_id=1 user_id=1 
      desc 'import sqed formated collection object depictions'
      task import: [:environment, :project_id, :user_id, :data_directory] do |t|

        @args.merge!(transaction_total: ENV['transaction_total'] || 20)

        # These match sqed and sqed_depiction extraction_metadata patterns
        @args.merge!(layout: (ENV['layout'] || :cross))
        @args.merge!(boundary_finder: (ENV['boundary_finder'] || 'Sqed::BoundaryFinder::ColorLineFinder'))
        @args.merge!(metadata_map: (ENV['metadata_map'] || '{"0": "curator_metadata", "1": "identifier", "2": "image_registration", "3": "annotated_specimen"}'))

        @args.merge!(has_border: (ENV['has_border'] || 'false'))
        @args.merge!(boundary_color: (ENV['boundary_color'] || :green))

        # Stored in/defines the CollectionObject instance
        @args.merge!(total: (ENV['total'] || '1'))

        # coerce some types from text
        @args[:metadata_map] = JSON.parse(@args[:metadata_map]).inject({}){|hsh, i| hsh.merge(i[0].to_i => i[1].to_sym)} 
        @args[:has_border] = (@args[:has_border] == 'true' ? true : false)
        @args[:boundary_color] = @args[:boundary_color].to_sym


        puts Rainbow('Using attributes:').yellow
        print @args

        puts Rainbow("\nProcessing images: \n").yellow

        begin
          Dir.glob(@args[:data_directory] + '**/*.*').sort.in_groups_of(@args[:transaction_total], false) do |group| 
            ApplicationRecord.transaction do 
              group.each do |f|
                print Rainbow(f).blue + ': '

                if SqedDepiction.joins(:image).where(images: {image_file_fingerprint: Digest::MD5.file(f).hexdigest}, sqed_depictions: {project_id: Current.project_id}).any?
                  print Rainbow("exists as depiction, skipping\n").red
                  next
                end

                image = Image.new(image_file: File.open(f))
                collection_object = CollectionObject.new(total: @args[:total])

                sqed_depiction = SqedDepiction.new(
                  boundary_color: @args[:boundary_color],
                  boundary_finder: @args[:boundary_finder],
                  has_border: @args[:has_border],
                  layout: @args[:layout],
                  metadata_map: @args[:metadata_map],

                  depiction_attributes: { 
                    image: image,
                    depiction_object: collection_object
                  }
                )

                if sqed_depiction.valid?
                  sqed_depiction.save!
                  print "success\n"

                  unless ENV['preprocess_result'] == 'false'
                    sqed_depiction.preprocess
                  end 

                else
                  print(' failed, skipping - ' + sqed_depiction.errors.full_messages.join('; ').red + "\n")
                end
              end

              puts Rainbow('group handled').yellow
            end # end transaction
          end

        rescue ActiveRecord::RecordInvalid
          raise 'transaction aborted, this groups records not stored.'
        end
      end

    end
  end
end