blackxored/whos_dated_who

View on GitHub
lib/whos_dated_who/parser.rb

Summary

Maintainability
A
35 mins
Test Coverage
module WhosDatedWho
  class Parser
    attr_reader :doc

    def parse(body)
      @doc = Nokogiri::HTML(body)
      extract_bio
      extract_current_relationship
      # TODO: extract_past_relationships
      @result
    end

    def result
      @result ||= Hashie::Mash.new(biography: @biography, status: :unknown)
    end

    private

    def extract_bio
      bio_selector = '#rcol .cbox:nth-child(3)'
      bio = @doc.css(bio_selector)

      # if it's biography section, skip it
      if bio.css('#wikitext').size > 0
        bio = @doc.css(bio_selector.sub('3', '4'))
      end

      result = parse_bio(bio)
      result[:description] = @doc.css('#wikitext').text

      @biography = Biography.new(result.symbolize_keys)
    end

    def extract_current_relationship
      current = @doc.css('.pbox.datebox')
      relationship = {}

      relationship[:human] = current.css('div.pb10:first').text
      relationship[:dates] = current.css('ul li').map(&:content)
      result[:current_relationship] = relationship

      result[:status] = parse_relationship_status(relationship)
      relationship
    end

    def list?(el)
      el.css('div').size > 0
    end

    def parse_bio(bio)
      bio.css('.posl, .posr').each_with_object({}) do |el, result|
        if el.matches?('.posl')
          @key = el.content
        else
          key = normalize_bio_key(@key)
          result[key] = list?(el) ? parse_list(el) : parse_content(el, key)
        end
      end
    end

    def parse_list(el)
      el.children.map(&:content).reject do |c|
        c.empty? || c =~ /^\s\(/
      end
    end

    def parse_content(el, key)
      value = el.content.rstrip
      if respond_to?("parse_#{key}".to_sym, true)
        send("parse_#{key}", value)
      else
        value
      end
    end

    def parse_relationship_status(relationship)
      case relationship[:human]
      when /married/ then :married
      when /engaged/ then :engaged
      when /dating/  then :dating
      when /in a long-term relationship/ then :dating
      when /single/  then :single
      else
        :unknown
      end
    end

    def normalize_bio_key(key)
      key.gsub(/\s/, '_').gsub(/[()]/, '').downcase
    end

    def parse_height(value)
      (Regexp.last_match(1).to_i / 100.0) if value =~ /(\d+) cm.*$/
    end

    def parse_weight(value)
      Regexp.last_match(1).to_f if value =~ /\((.+) kg/
    end
  end
end