stevenocchipinti/dwi-visualiser

View on GitHub
lib/DwiScraper.rb

Summary

Maintainability
A
0 mins
Test Coverage
require 'nokogiri'
require 'open-uri'
require 'json'
require './lib/Lens'

class DwiScraper

  attr_accessor :lenses
  DWI_URL = 'http://www.dwidigitalcameras.com.au'


  def initialize(url)
    @lenses = get_lenses(url)
  end


  def to_json(*opts)
    @lenses.to_json(*opts)
  end


  def get_lenses(url)

    # Find the element with the pricetag
    # Asend up the DOM until the containing table element is found
    # Descend down the DOM until the link with the 'highlight' class is found
    #   This element contains the description (with focal length and aperture)

    doc = Nokogiri::HTML(open(url))

    lenses = []
    @stats = { parsed: 0, failed: 0 }
    doc.xpath('//div[contains(text(), "$")]').each do |price_element|

      begin
        containing_element = price_element.xpath("ancestor::table[1]")
        name_element = containing_element.css('.highlight')
        image_element = containing_element.xpath("ancestor::table[1]//img")

        lenses << Lens.new(
          name:  name_element.text,
          link:  DWI_URL + name_element.first.attribute('href').text,
          image: DWI_URL + image_element.first.attribute('src').text,
          price: price_element.text
        )
        @stats[:parsed] += 1
      rescue
        @stats[:failed] += 1
        next
      end

    end

    print_report
    lenses
  end


  def print_report
    $stderr.puts "Report:"
    $stderr.puts "  Parsed lenses: #{@stats[:parsed]}"
    $stderr.puts "  Failed lenses: #{@stats[:failed]}"
  end

end