18F/identity-idp

View on GitHub
lib/pwned_password_downloader.rb

Summary

Maintainability
A
0 mins
Test Coverage
#!/usr/bin/env ruby
# frozen_string_literal: true

require 'fileutils'
require 'net/http/persistent'
require 'retries'
require 'ruby-progressbar'
require 'ruby-progressbar/outputs/null'

class PwnedPasswordDownloader
  attr_reader :destination,
              :num_threads,
              :keep_threshold,
              :output_progress

  alias_method :output_progress?, :output_progress

  RANGE_API_ROOT = 'https://api.pwnedpasswords.com/range/'
  SHA1_LENGTH = 40
  HASH_PREFIX_LENGTH = 5
  OCCURRENCE_OFFSET = (SHA1_LENGTH - HASH_PREFIX_LENGTH + ':'.length).freeze

  def initialize(
    destination: 'tmp/pwned',
    num_threads: 64,
    keep_threshold: 30,
    output_progress: true
  )
    @destination = destination
    @num_threads = num_threads
    @keep_threshold = keep_threshold
    @output_progress = output_progress
  end

  def run!(start: '00000', finish: 'FFFFF')
    (start.to_i(16)..finish.to_i(16)).each do |prefix_num|
      queue << prefix_num.to_s(16).upcase.rjust(HASH_PREFIX_LENGTH, '0')
    end

    FileUtils.mkdir_p(destination)

    progress_bar = ProgressBar.create(
      title: 'Downloading...',
      total: queue.size,
      output: output_progress? ? $stdout : ProgressBar::Outputs::Null,
      format: '[ %t ] %p%% %B %a (%E)',
    )

    failed_prefixes = Queue.new

    [num_threads, queue.size].min.times do
      Thread.new do |thread_id|
        net_http = Net::HTTP::Persistent.new(name: "thread_id_#{thread_id}")

        while (prefix = queue.pop)
          if already_downloaded?(prefix)
            progress_bar.increment
            next
          end

          begin
            write_one(
              prefix:,
              content: with_retries(max_tries: 5, rescue: Socket::ResolutionError) do
                download_one(prefix:, net_http:)
              end,
            )
          rescue
            failed_prefixes << prefix
          else
            progress_bar.increment
          end
        end
      ensure
        net_http.shutdown
      end
    end

    wait_for_progress until progress_bar.finished? || !failed_prefixes.empty?
    raise "Error: Failed to download prefix #{failed_prefixes.pop}" if !failed_prefixes.empty?
  ensure
    progress_bar.stop
  end

  def queue
    @queue ||= Queue.new
  end

  def wait_for_progress
    sleep 3
  end

  def already_downloaded?(prefix)
    File.exist?(File.join(destination, prefix))
  end

  # @return [String]
  def download_one(prefix:, net_http: Net::HTTP::Persistent.new, keep: keep_threshold)
    net_http.
      request(URI.join(RANGE_API_ROOT, prefix)).
      body.
      each_line(chomp: true).
      select { |line| line[OCCURRENCE_OFFSET..].to_i >= keep }.
      reduce('') { |result, line| result + "#{prefix}#{line}\n" }
  end

  def write_one(prefix:, content:)
    File.open(File.join(destination, prefix), 'w') do |f|
      f.write(content)
    end
  end
end

if __FILE__ == $PROGRAM_NAME
  PwnedPasswordDownloader.new.run!
end