lib/ar_book_finder/search_results_parser.rb
module ARBookFinder
class SearchResultsParser
SEARCH_ROOT_XPATH = '//*[@id="ctl00_ContentPlaceHolder1_pnlSearchFormPanel"]'
SEARCH_PAGE_COUNT_XPATH = '//*[@id="ctl00_ContentPlaceHolder1_ucSeachResults_lblResultsSummaryTop"]'
SEARCH_RESULTS_COUNT = '//*[@id="ctl00_ContentPlaceHolder1_ucSearchResultsHeader_lblResultSummary"]'
SEARCH_RESULTS_XPATH = '//*[@id="ctl00_ContentPlaceHolder1_ucSeachResults_lblQuizzes"]/table'
COLLECTION_ROOT_XPATH = '//*[@id="ctl00_ContentPlaceHolder1_ucCollection_pnlSearchFormPanel"]'
COLLECTION_PAGE_COUNT_XPATH = '//*[@id="ctl00_ContentPlaceHolder1_ucCollection_ucSeachResults_lblResultsSummaryTop"]'
COLLECTION_RESULTS_COUNT = '//*[@id="ctl00_ContentPlaceHolder1_ucCollection_ucSearchResultsHeader_lblResultSummary"]'
COLLECTION_RESULTS_XPATH = '//*[@id="ctl00_ContentPlaceHolder1_ucCollection_ucSeachResults_lblQuizzes"]/table'
BOOK_XPATH = 'tbody/tr/td[2]'
BOOK_DETAIL_XPATH = 'table/tbody/tr/td[2]'
BOOK_URL_XPATH = 'a'
attr_reader :current_page, :page_count, :total_books, :books
def initialize(html, collection = false)
@doc = Nokogiri::HTML.parse(html)
@xpath_const = collection ? :COLLECTION : :SEARCH
@root = @doc.xpath(self.class.const_get(:"#{@xpath_const}_ROOT_XPATH"))
@books = []
end
def parse
@current_page = parse_current_page.to_i
@page_count = parse_page_count.to_i
@total_books = parse_total_books.to_i
@books = parse_results
self
end
private
def parse_current_page
@root.xpath(self.class.const_get(:"#{@xpath_const}_PAGE_COUNT_XPATH")).text.gsub(/Page /, '').gsub(/ of \d+/, '')
end
def parse_page_count
@root.xpath(self.class.const_get(:"#{@xpath_const}_PAGE_COUNT_XPATH")).text.gsub(/Page \d+ of /, '')
end
def parse_total_books
@root.xpath(self.class.const_get(:"#{@xpath_const}_RESULTS_COUNT")).text.gsub(/Titles \d+ - \d+ of /, '')
end
def parse_results
books = []
@root.xpath(self.class.const_get(:"#{@xpath_const}_RESULTS_XPATH")).each_with_index do |result, i|
next if i.odd?
book = result.xpath(BOOK_XPATH)
book_detail = book.xpath(BOOK_DETAIL_XPATH)
books << Book.new("#{ARBookFinder::BASE_URL}/#{book_detail.xpath(BOOK_URL_XPATH).attribute('href').content}")
end
books
end
end
end