rapid7/metasploit-framework

View on GitHub
lib/anemone/page_store.rb

Summary

Maintainability
B
5 hrs
Test Coverage
require 'forwardable'

module Anemone
  class PageStore
    extend Forwardable

    def_delegators :@storage, :keys, :values, :size, :each

    def initialize(storage = {})
      @storage = storage
    end

    # We typically index the hash with a URI,
    # but convert it to a String for easier retrieval
    def [](index)
      @storage[index.to_s]
    end

    def []=(index, other)
      @storage[index.to_s] = other
    end

    def delete(key)
      @storage.delete key.to_s
    end

    def has_key?(key)
      @storage.has_key? key.to_s
    end

    def each_value
      each { |key, value| yield value }
    end

    def values
      result = []
      each { |key, value| result << value }
      result
    end

    def touch_key(key)
      self[key] = Page.new(key)
    end

    def touch_keys(keys)
      @storage.merge! keys.inject({}) { |h, k| h[k.to_s] = Page.new(k); h }
    end

    # Does this PageStore contain the specified URL?
    # HTTP and HTTPS versions of a URL are considered to be the same page.
    def has_page?(url)
      schemes = %w(http https)
      if schemes.include? url.scheme
        u = url.dup
        return schemes.any? { |s| u.scheme = s; has_key?(u) }
      end

      has_key? url
    end

    #
    # Use a breadth-first search to calculate the single-source
    # shortest paths from *root* to all pages in the PageStore
    #
    def shortest_paths!(root)
      root = URI(root) if root.is_a?(String)
      raise "Root node not found" if !has_key?(root)

      q = Queue.new

      q.enq root
      root_page = self[root]
      root_page.depth = 0
      root_page.visited = true
      self[root] = root_page
      while !q.empty?
        page = self[q.deq]
        page.links.each do |u|
          begin
            link = self[u]
            next if link.nil? || !link.fetched? || link.visited

            q << u unless link.redirect?
            link.visited = true
            link.depth = page.depth + 1
            self[u] = link

            if link.redirect?
              u = link.redirect_to
              redo
            end
          end
        end
      end

      self
    end

    #
    # Removes all Pages from storage where redirect? is true
    #
    def uniq!
      each_value { |page| delete page.url if page.redirect? }
      self
    end

    #
    # If given a single URL (as a String or URI), returns an Array of Pages which link to that URL
    # If given an Array of URLs, returns a Hash (URI => [Page, Page...]) of Pages linking to those URLs
    #
    def pages_linking_to(urls)
      unless urls.is_a?(Array)
        urls = [urls]
        single = true
      end

      urls.map! do |url|
        unless url.is_a?(URI)
          URI(url) rescue nil
        else
          url
        end
      end
      urls.compact

      links = {}
      urls.each { |url| links[url] = [] }
      values.each do |page|
        urls.each { |url| links[url] << page if page.links.include?(url) }
      end

      if single and !links.empty?
        return links[urls.first]
      else
        return links
      end
    end

    #
    # If given a single URL (as a String or URI), returns an Array of URLs which link to that URL
    # If given an Array of URLs, returns a Hash (URI => [URI, URI...]) of URLs linking to those URLs
    #
    def urls_linking_to(urls)
      unless urls.is_a?(Array)
        urls = [urls] unless urls.is_a?(Array)
        single = true
      end

      links = pages_linking_to(urls)
      links.each { |url, pages| links[url] = pages.map{|p| p.url} }

      if single and !links.empty?
        return links[urls.first]
      else
        return links
      end
    end

  end
end