socialchorus/link_oracle

View on GitHub
lib/link_oracle/extractor/body.rb

Summary

Maintainability
A
0 mins
Test Coverage
class LinkOracle
  module Extractor
    class Body < Base
      def type
        :body
      end

      def perform
        link_data.assign({
          titles: titles,
          image_urls: images,
          descriptions: descriptions
        })
      end

      def titles
        @titles ||= parsed_body.xpath(
          "//h1/text() | //h2/text() | //h3/text()"
        ).first(3).compact.map{ |text| text.content }
      end

      def images
        @images ||= first_valid_size_image ? [first_valid_size_image] : []
      end

      def parsed_images
        @parsed_images ||= parsed_body.xpath(
          "//img[@src[(contains(.,'://') or contains(., '/')) and not(contains(.,'ads.') or contains(.,'ad.') or contains(.,'?') or contains(.,'.gif'))]]"
        ).map{ |node| node['src'] }
      end

      def formatted_images
        @formatted_images ||= parsed_images.map { |image_url| ::Utils::ImageUrlFormatter.new(url, image_url).perform }
      end

      def first_valid_size_image
        @first_valid_size_image ||= formatted_images.find do |image|
          size = image_size(image)
          size[0] >= 100 && size[1] >= 100 if size
        end
      end

      def image_size(image)
        ::FastImage.size(image)
      rescue ::URI::InvalidURIError
        [0, 0]
      end

      def descriptions
        @description ||= parsed_body.xpath("//p/text()").first(3).compact.map{ |text| text.content }
      end
    end
  end
end