jaimeiniesta/metainspector

View on GitHub
lib/meta_inspector/parsers/links.rb

Summary

Maintainability
A
0 mins
Test Coverage
module MetaInspector
  module Parsers
    class LinksParser < Base
      delegate [:parsed, :url, :scheme, :host] => :@main_parser

      def links
        self
      end

      # Returns all links found, unprocessed
      def raw
        @raw ||= cleanup(parsed.search('//a/@href')).compact.uniq
      end

      # Returns all links found, unrelavitized and absolutified
      def all
        @all ||= raw.map { |link| URL.absolutify(link, base_url) }.compact.uniq
      end

      # Returns all HTTP links found
      def http
        @http ||= all.select { |link| link =~ /^http(s)?:\/\//i}
      end

      # Returns all non-HTTP links found
      def non_http
        @non_http ||= all.select { |link| link !~ /^http(s)?:\/\//i}
      end

      # Returns all internal HTTP links found
      def internal
        @internal ||= http.select { |link| URL.new(link).host == host }
      end

      # Returns all external HTTP links found
      def external
        @external ||= http.select { |link| URL.new(link).host != host }
      end

      def to_hash
        { 'internal' => internal,
          'external' => external,
          'non_http' => non_http }
      end

      # Returns the base url to absolutify relative links.
      # This can be the one set on a <base> tag,
      # or the url of the document if no <base> tag was found.
      def base_url
        current_base_href = base_href.to_s.strip.empty? ? nil : URL.absolutify(base_href, URL.new(url).root_url)
        current_base_href || url
      end

      # Returns the value of the href attribute on the <base /> tag, if exists
      def base_href
        parsed.search('base').first.attributes['href'].value rescue nil
      end
    end
  end
end