rapid7/metasploit-framework

View on GitHub
lib/rex/parser/graphml.rb

Summary

Maintainability
C
1 day
Test Coverage
# -*- coding: binary -*-

module Rex
  module Parser
    #
    # A partial implementation of the GraphML specification for loading structured data from an XML file. Notable
    # missing components include GraphML parse meta-data (XML attributes with the "parse" prefix), hyperedges and ports.
    # See: http://graphml.graphdrawing.org/
    #
    module GraphML
      #
      # Load the contents of a GraphML file by parsing it with Nokogiri and returning
      # the top level GraphML structure.
      #
      # @param file_path [String] The file path to load the data from.
      # @return [Rex::Parser::GraphML::Element::GraphML]
      def self.from_file(file_path)
        parser = Nokogiri::XML::SAX::Parser.new(Document.new)
        parser.parse(File.read(file_path, mode: 'rb'))
        parser.document.graphml
      end

      #
      # Convert a GraphML value string into a Ruby value depending on the specified type. Values of int and long will be
      # converted to Ruby integer, while float and double values will be converted to floats. For booleans, values that are
      # either blank or "false" (case-insensitive) will evaluate to Ruby's false, while everything else will be true.
      #
      # @param attr_type [Symbol] The type of the attribute, one of either boolean, int, long, float, double or string.
      # @param value [String] The value to convert into a native Ruby data type.
      def self.convert_attribute(attr_type, value)
        case attr_type
        when :boolean
          value.strip!
          if value.blank?
            value = false
          else
            value = value.downcase != 'false'
          end
        when :int, :long
          value = Integer(value)
        when :float, :double
          value = Float(value)
        when :string # rubocop:disable Lint/EmptyWhen
        else
          raise ArgumentError, 'Unsupported attribute type: ' + attr_type.to_s
        end

        value
      end

      #
      # Define a GraphML attribute including its name, data type, default value and where it can be applied.
      #
      class MetaAttribute
        # @param id [String] The attribute's document identifier.
        # @param name [String] The attribute's name as used by applications.
        # @param type [Symbol] The data type of the attribute, one of either boolean, int, long, float, double or string.
        # @param domain [Symbol] What elements this attribute is valid for, one of either edge, node, graph or all.
        # @param default An optional default value for this attribute.
        def initialize(id, name, type, domain: :all, default: nil)
          @id = id
          @name = name
          @type = type
          @domain = domain
          @default = default
        end

        #
        # Create a new instance from a Key element.
        #
        # @param key [Rex::Parser::GraphML::Element::Key] The key to create a new instance from.
        def self.from_key(key)
          new(key.id, key.attr_name, key.attr_type, domain: key.domain, default: key.default&.value)
        end

        #
        # Convert a value to the type specified by this attribute.
        #
        # @param value The value to convert.
        def convert(value)
          GraphML.convert_attribute(@type, value)
        end

        #
        # Whether or not the attribute is valid for the specified element.
        #
        # @param element [Rex::Parser::GraphML::AttributeContainer] The element to check.
        def valid_for?(element)
          @domain == :all || @domain == element.class::ELEMENT_NAME.to_sym
        end

        # @!attribute id
        #   @return [String] The attribute's document identifier.
        attr_reader :id
        # @!attribute name
        #   @return [String] The attribute's name as used by applications.
        attr_reader :name
        # @!attribute type
        #   @return [Symbol] The data type of the attribute.
        attr_reader :type
        # @!attribute domain
        #   @return [Symbol] What elements this attribute is valid for.
        attr_reader :domain
        # @!attribute default
        #   @return An optional default value for this attribute.
        attr_reader :default
      end

      #
      # A base class for GraphML elements that are capable of storing attributes.
      #
      class AttributeContainer
        def initialize
          @attributes = {}
        end

        # @!attribute attributes
        #   @return [Hash] The defined attributes for the element.
        attr_reader :attributes
      end

      #
      # A module for organizing GraphML elements that define the data structure. Each provides a from_xml_attributes
      # function to create an instance from a hash of XML attributes.
      #
      module Element
        #
        # A data element defines the value of an attribute for the parent XML node.
        # See: http://graphml.graphdrawing.org/specification/xsd.html#element-data
        #
        class Data
          ELEMENT_NAME = 'data'.freeze
          # @param key [String] The identifier of the attribute that this object contains a value for.
          def initialize(key)
            @key = key
            @value = nil
          end

          def self.from_xml_attributes(xml_attrs)
            key = xml_attrs['key']
            raise Error::InvalidAttributeError.new('data', 'key') if key.nil?

            new(key)
          end

          # @!attribute key
          #   @return [String] The identifier of the attribute that this object contains a value for.
          attr_reader :key
          # @!attribute value
          #   @return The value of the attribute.
          attr_reader :value
        end

        #
        # A default element defines the optional default value of an attribute. If not default is specified, per the GraphML
        # specification, the attribute is undefined.
        # See: http://graphml.graphdrawing.org/specification/xsd.html#element-default
        #
        class Default
          ELEMENT_NAME = 'default'.freeze
          # @param value The default attribute value.
          def initialize(value: nil)
            @value = value
          end

          def self.from_xml_attributes(_xml_attrs)
            new # no attributes for this element
          end

          # @!attribute value
          #   @return The default attribute value.
          attr_reader :value
        end

        #
        # An edge element defines a connection between two nodes. Connections are optionally directional.
        # See: http://graphml.graphdrawing.org/specification/xsd.html#element-edge
        #
        class Edge < AttributeContainer
          ELEMENT_NAME = 'edge'.freeze
          # @param source [String] The id of the node that this edge originated from.
          # @param target [String] The id of the node that this edge is destined for.
          # @param directed [Boolean] Whether or not this edge only connects in one direction.
          # @param id [String] The optional, unique identifier of this edge.
          def initialize(source, target, directed, id: nil)
            @source = source
            @target = target
            @directed = directed
            @id = id
            super()
          end

          def self.from_xml_attributes(xml_attrs, edgedefault)
            source = xml_attrs['source']
            raise Error::InvalidAttributeError.new('edge', 'source') if source.nil?

            target = xml_attrs['target']
            raise Error::InvalidAttributeError.new('edge', 'target') if target.nil?

            directed = xml_attrs['directed']
            if directed.nil?
              directed = edgedefault == :directed
            elsif %w[true false].include? directed
              directed = directed == 'true'
            else
              raise Error::InvalidAttributeError.new('edge', 'directed', details: 'must be either true or false when specified', missing: false)
            end

            new(source, target, directed, id: xml_attrs['id'])
          end

          # !@attribute source
          #   @return [String] The id of the node that this edge originated from.
          attr_reader :source
          # !@attribute target
          #   @return [String] The id of the node that this edge is destined for.
          attr_reader :target
          # !@attribute directed
          #   @return [Boolean] Whether or not this edge only connects in one direction.
          attr_reader :directed
          # !@attribute id
          #   @return [String] The optional, unique identifier of this edge.
          attr_reader :id
        end

        #
        # A graph element defines a collection of nodes and edges.
        # See: http://graphml.graphdrawing.org/specification/xsd.html#element-graph
        #
        class Graph < AttributeContainer
          ELEMENT_NAME = 'graph'.freeze
          # @param edgedefault [Boolean] Whether or not edges within this graph should be directional by default.
          # @param id [String] The optional, unique identifier of this graph.
          def initialize(edgedefault, id: nil)
            @edgedefault = edgedefault
            @id = id

            @nodes = {}
            @edges = []
            super()
          end

          def self.from_xml_attributes(xml_attrs)
            edgedefault = xml_attrs['edgedefault']
            unless %w[directed undirected].include? edgedefault
              # see: http://graphml.graphdrawing.org/primer/graphml-primer.html section 2.3.1
              raise Error::InvalidAttributeError.new('graph', 'edgedefault', missing: edgedefault.nil?)
            end

            edgedefault = edgedefault.to_sym

            new(edgedefault, id: xml_attrs['id'])
          end

          # @!attribute edgedefault
          #   @return [Boolean] Whether or not edges within this graph should be directional by default.
          attr_reader :edgedefault
          # @!attribute id
          #   @return [String] The optional, unique identifier of this graph.
          attr_reader :id
          # @!attribute edges
          #   @return [Array] An array of edge elements within this graph.
          attr_reader :edges
          # @!attribute nodes
          #   @return [Hash] A hash of node elements, keyed by their string identifier.
          attr_reader :nodes
        end

        #
        # A graphml element is the root of a GraphML document.
        # See: http://graphml.graphdrawing.org/specification/xsd.html#element-graphml
        #
        class GraphML
          ELEMENT_NAME = 'graphml'.freeze
          def initialize
            @nodes = {}
            @edges = []
            @graphs = []
          end

          # @!attribute nodes
          #   @return [Hash] A hash of all node elements within this GraphML document, keyed by their string identifier.
          attr_reader :nodes
          # @!attribute edges
          #   @return [Array] An array of all edge elements within this GraphML document.
          attr_reader :edges
          # @!attribute graphs
          #   @return [Array] An array of all graph elements within this GraphML document.
          attr_reader :graphs
        end

        #
        # A key element defines the attributes that may be present in a document.
        # See: http://graphml.graphdrawing.org/specification/xsd.html#element-key
        #
        class Key
          ELEMENT_NAME = 'key'.freeze
          # @param id [String] The document identifier of the attribute described by this element.
          # @param name [String] The name (as used by applications) of the attribute described by this element.
          # @param type [Symbol] The data type of the attribute described by this element, one of either boolean, int, long, float, double or string.
          # @param domain [Symbol] What elements the attribute described by this element is valid for, one of either edge, node, graph or all.
          def initialize(id, name, type, domain)
            @id = id
            @attr_name = name
            @attr_type = type
            @domain = domain # using 'for' would cause an awkward keyword conflict
            @default = nil
          end

          def self.from_xml_attributes(xml_attrs)
            id = xml_attrs['id']
            raise Error::InvalidAttributeError.new('key', 'id') if id.nil?

            name = xml_attrs['attr.name']
            raise Error::InvalidAttributeError.new('key', 'attr.name') if name.nil?

            type = xml_attrs['attr.type']
            unless %w[boolean int long float double string].include? type
              raise Error::InvalidAttributeError.new('key', 'attr.type', details: 'must be boolean int long float double or string', missing: type.nil?)
            end

            type = type.to_sym

            domain = xml_attrs['for']
            unless %w[graph node edge all].include? domain
              raise Error::InvalidAttributeError.new('key', 'for', details: 'must be graph node edge or all', missing: domain.nil?)
            end

            domain = domain.to_sym

            new(id, name, type, domain)
          end

          def default=(value)
            @default = GraphML.convert_attribute(@attr_type, value)
          end

          # @!attribute id
          #   @return [String] The document identifier of the attribute described by this element.
          attr_reader :id
          # @!attribute attr_name
          #   @return [String] The name (as used by applications) of the attribute described by this element.
          attr_reader :attr_name
          # @!attribute attr_type
          #   @return [Symbol] The data type of the attribute described by this element.
          attr_reader :attr_type
          # @!attribute domain
          #   @return [Symbol] What elements the attribute described by this element is valid for.
          attr_reader :domain
          # @!attribute default
          #   @return The default value of the attribute described by this element.
          attr_reader :default
        end

        #
        # A node element defines an object within the graph that can have zero or more edges connecting it to other nodes. A
        # node element may contain a graph element.
        #
        class Node < AttributeContainer
          ELEMENT_NAME = 'node'.freeze
          # @param id [String] The unique identifier for this node element.
          def initialize(id)
            @id = id
            @edges = []
            @subgraph = nil
            super()
          end

          def self.from_xml_attributes(xml_attrs)
            id = xml_attrs['id']
            raise Error::InvalidAttributeError.new('node', 'id') if id.nil?

            new(id)
          end

          # @return [Array] An array of all edges for which this node is the target.
          def source_edges
            # edges connected to this node
            @edges.select { |edge| edge.target == @id || !edge.directed }
          end

          # @return [Array] An array of all edges for which this node is the source.
          def target_edges
            # edges connecting this to other nodes
            @edges.select { |edge| edge.source == @id || !edge.directed }
          end

          # @!attribute id
          #   @return [String] The unique identifier for this node.
          attr_reader :id
          # @!attribute edges
          #   @return [Array] An array of all edges for which this node is either the source or the target.
          attr_reader :edges
          # @!attribute subgraph
          #   @return [Graph,nil] A subgraph contained within this node.
          attr_accessor :subgraph
        end
      end

      #
      # A module collecting the errors raised by this parser.
      #
      module Error
        #
        # The base error class for errors raised by this parser.
        #
        class GraphMLError < StandardError
        end

        #
        # An error describing an issue that occurred while parsing the data structure.
        #
        class ParserError < GraphMLError
        end

        #
        # An error describing an XML attribute that is invalid either because the value is missing or otherwise invalid.
        #
        class InvalidAttributeError < ParserError
          def initialize(element, attribute, details: nil, missing: true)
            @element = element
            @attribute = attribute
            # whether or not the attribute is invalid because it is absent
            @missing = missing

            message = "Element '#{element}' contains an invalid attribute: '#{attribute}'"
            message << " (#{details})" unless details.nil?

            super(message)
          end
        end
      end

      #
      # The top-level document parser.
      #
      class Document < Nokogiri::XML::SAX::Document
        def initialize
          @stack = []
          @nodes = {}
          @meta_attributes = {}
          @graphml = nil
          super
        end

        def start_element(name, attrs = [])
          attrs = attrs.to_h

          case name
          when 'data'
            raise Error::ParserError, 'The \'data\' element must be a direct child of an attribute container' unless @stack[-1].is_a? AttributeContainer

            element = Element::Data.from_xml_attributes(attrs)

          when 'default'
            raise Error::ParserError, 'The \'default\' element must be a direct child of a \'key\' element' unless @stack[-1].is_a? Element::Key

            element = Element::Default.from_xml_attributes(attrs)

          when 'edge'
            raise Error::ParserError, 'The \'edge\' element must be a direct child of a \'graph\' element' unless @stack[-1].is_a? Element::Graph

            element = Element::Edge.from_xml_attributes(attrs, @stack[-1].edgedefault)
            @graphml.edges << element

          when 'graph'
            element = Element::Graph.from_xml_attributes(attrs)
            @stack[-1].subgraph = element if @stack[-1].is_a? Element::Node
            @graphml.graphs << element

          when 'graphml'
            element = Element::GraphML.new
            raise Error::ParserError, 'The \'graphml\' element must be a top-level element' unless @stack.empty?

            @graphml = element

          when 'key'
            raise Error::ParserError, 'The \'key\' element must be a direct child of a \'graphml\' element' unless @stack[-1].is_a? Element::GraphML

            element = Element::Key.from_xml_attributes(attrs)
            raise Error::InvalidAttributeError.new('key', 'id', details: 'duplicate key id') if @meta_attributes.key? element.id
            if @meta_attributes.values.any? { |attr| attr.name == element.attr_name }
              raise Error::InvalidAttributeError.new('key', 'attr.name', details: 'duplicate key attr.name')
            end

          when 'node'
            raise Error::ParserError, 'The \'node\' element must be a direct child of a \'graph\' element' unless @stack[-1].is_a? Element::Graph

            element = Element::Node.from_xml_attributes(attrs)
            raise Error::InvalidAttributeError.new('node', 'id', details: 'duplicate node id') if @nodes.key? element.id

            @nodes[element.id] = element
            @graphml.nodes[element.id] = element

          else
            raise Error::ParserError, 'Unknown element: ' + name

          end

          @stack.push element
        end

        def characters(string)
          element = @stack[-1]
          case element
          when Element::Data
            parent = @stack[-2]
            meta_attribute = @meta_attributes[element.key]
            unless meta_attribute.valid_for? parent
              raise Error::ParserError, "The #{meta_attribute.name} attribute is invalid for #{parent.class::ELEMENT_NAME} elements"
            end

            if meta_attribute.type == :string && !parent.attributes[meta_attribute.name].nil?
              # this may be run multiple times if there is an XML escape sequence in the string to concat the parts together
              parent.attributes[meta_attribute.name] << meta_attribute.convert(string)
            else
              parent.attributes[meta_attribute.name] = meta_attribute.convert(string)
            end

          when Element::Default
            @stack[-1] = Element::Default.new(value: string)

          end
        end

        def end_element(name)
          element = @stack.pop

          populate_element_default_attributes(element) if element.is_a? AttributeContainer

          case name
          when 'default'
            key = @stack[-1]
            key.default = element

          when 'edge'
            graph = @stack[-1]
            graph.edges << element

          when 'graph'
            element.edges.each do |edge|
              source_node = element.nodes[edge.source]
              raise Error::InvalidAttributeError.new('edge', 'source', details: "undefined source: '#{edge.source}'", missing: false) if source_node.nil?

              target_node = element.nodes[edge.target]
              raise Error::InvalidAttributeError.new('edge', 'target', details: "undefined target: '#{edge.target}'", missing: false) if target_node.nil?

              source_node.edges << edge
              target_node.edges << edge
            end

          when 'key'
            meta_attribute = MetaAttribute.from_key(element)
            @meta_attributes[meta_attribute.id] = meta_attribute

          when 'node'
            graph = @stack[-1]
            graph.nodes[element.id] = element

          end
        end

        # @!attribute graphml
        #   @return [Rex::Parser::GraphML::Element::GraphML] The root of the parsed document.
        attr_reader :graphml

        private

        def populate_element_default_attributes(element)
          @meta_attributes.values.each do |meta_attribute|
            next unless meta_attribute.valid_for? element
            next if element.attributes.key? meta_attribute.name
            next if meta_attribute.default.nil?

            element.attributes[meta_attribute.name] = meta_attribute.default
          end
        end
      end
    end
  end
end