zooniverse/Talk-Api

View on GitHub
lib/tasks/update_search_tsvectors.rake

Summary

Maintainability
Test Coverage
namespace :search do

  class VectorUpdater
    attr_reader :redis, :start, :batch_size, :resource

    def initialize(args)
      @redis = Redis.new(url: ENV['REDIS_URL'], db: 15)
      @resource = args[:resource]
      @batch_size = args[:batch_size].to_i
      @start = args[:start].to_i
    end

    def start_id
      if redis.exists(rkey)
        redis.get(rkey).to_i
      else
        start
      end
    end

    def rkey
      "rake-task-update-#{resource}-tsvectors"
    end

    def process
      # Test redis connection to raise error if unavailable before processing begins
      redis.ping

      klass = resource.capitalize.constantize
      puts "Processing #{klass}s..."
      klass.find_in_batches(start: start_id, batch_size: batch_size).with_index do |resources, batch|
        puts "Processing batch ##{batch}, starting on id #{resources.first.id}"
        resources.map { |res| res.searchable.set_content if res.searchable }

        # Checkpoint the id of the last resource in the batch in Redis
        checkpoint_record_id = resources.last.id
        redis.set(rkey, checkpoint_record_id, ex: 2629746)
        puts "Checkpoint: id #{checkpoint_record_id}"
        sleep(0.01) # throttle
      end
      puts "Processing finished! Deleting redis key #{rkey}, with value #{redis.get(rkey)}"
      redis.del(rkey)
    end
  end

  desc 'Update comment tsvectors'
  task :update_comment_tsvectors, [:start, :batch_size] => :environment do |t, args|
    # Set defaults so there are no nils
    args.with_defaults(start: 0, batch_size: 1000, resource: 'comment')
    processor = VectorUpdater.new(args)
    processor.process
  end

  desc 'Update discussion tsvectors'
  task :update_discussion_tsvectors, [:start, :batch_size] => :environment do |t, args|
    args.with_defaults(start: 0, batch_size: 1000, resource: 'discussion')
    processor = VectorUpdater.new(args)
    processor.process
  end

  desc 'Update board tsvectors'
  task :update_board_tsvectors, [:start, :batch_size] => :environment do |t, args|
    args.with_defaults(start: 0, batch_size: 1000, resource: 'board')
    processor = VectorUpdater.new(args)
    processor.process
  end
end