disclosed/disclosed_app

View on GitHub
lib/scrapers/dl_extractor.rb

Summary

Maintainability
A
25 mins
Test Coverage
# Extracts contract information from <dl> html structures
class Scrapers::DlExtractor
  include Scrapers::TextHelpers
  attr_reader :page, :result

  def initialize(url, row_mappings)
    @page = Nokogiri::HTML(open(url))
    @row_mappings = row_mappings
    @result = {}
    @result[:url] = url
    extract
  end

  def extract
    @row_mappings.each do |mapping|
      @result[mapping.field] = @page.xpath('//dl/dt[regex(., "' + mapping.label + '")]/following-sibling::dd[position()=1]', NokogiriXpathExtensions.new).text.strip
      @result[mapping.field] = clean_nbsp(@result[mapping.field])
      @result[mapping.field] = clean_dash(@result[mapping.field])
      @result[mapping.field] = parse_date(@result[mapping.field]) if mapping.field == :effective_date
      @result[mapping.field] = parse_value(@result[mapping.field]) if mapping.field == :value
      @result[mapping.field] = @result[mapping.field].strip if @result[mapping.field].is_a? String
    end
    @result
  end
end