fairplaysk/datacamp

View on GitHub
lib/etl/extraction.rb

Summary

Maintainability
A
0 mins
Test Coverage
module Etl
  class Extraction < Struct.new(:start_id, :batch_limit, :id, :single_extraction)

    def download(id)
      Nokogiri::HTML(Typhoeus::Request.get(document_url(id)).body.encode('utf-8', 'cp1250'))
    end

    def update_last_processed
      config.update_attribute(:last_processed_id, id) if config.last_processed_id.nil? || id > config.last_processed_id
    end

    def perform
      document = download(id)
      if is_acceptable?(document)
        procurement_hash = digest(document)
        save(procurement_hash)
        update_last_processed
      end
    end

    def after(job)
      return if single_extraction
      if id == (start_id + batch_limit)
        if config.last_processed_id > start_id
          ((id+1)..(id+1+config.batch_limit)).each do |i|
            enque_job(i)
          end
        else
          config.update_attribute(:start_id, config.last_processed_id+1)
        end
      end
    end

  end
end