rpanachi/core_ext

View on GitHub
lib/core_ext/xml_mini/jdom.rb

Summary

Maintainability
A
2 hrs
Test Coverage
raise "JRuby is required to use the JDOM backend for XmlMini" unless RUBY_PLATFORM =~ /java/

require 'jruby'
include Java

require 'core_ext/object/blank'

java_import javax.xml.parsers.DocumentBuilder unless defined? DocumentBuilder
java_import javax.xml.parsers.DocumentBuilderFactory unless defined? DocumentBuilderFactory
java_import java.io.StringReader unless defined? StringReader
java_import org.xml.sax.InputSource unless defined? InputSource
java_import org.xml.sax.Attributes unless defined? Attributes
java_import org.w3c.dom.Node unless defined? Node

module CoreExt
  module XmlMini_JDOM #:nodoc:
    extend self

    CONTENT_KEY = '__content__'.freeze

    NODE_TYPE_NAMES = %w{ATTRIBUTE_NODE CDATA_SECTION_NODE COMMENT_NODE DOCUMENT_FRAGMENT_NODE
    DOCUMENT_NODE DOCUMENT_TYPE_NODE ELEMENT_NODE ENTITY_NODE ENTITY_REFERENCE_NODE NOTATION_NODE
    PROCESSING_INSTRUCTION_NODE TEXT_NODE}

    node_type_map = {}
    NODE_TYPE_NAMES.each { |type| node_type_map[Node.send(type)] = type }

    # Parse an XML Document string or IO into a simple hash using Java's jdom.
    # data::
    #   XML Document string or IO to parse
    def parse(data)
      if data.respond_to?(:read)
        data = data.read
      end

      if data.blank?
        {}
      else
        @dbf = DocumentBuilderFactory.new_instance
        # secure processing of java xml
        # http://www.ibm.com/developerworks/xml/library/x-tipcfsx/index.html
        @dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false)
        @dbf.setFeature("http://xml.org/sax/features/external-general-entities", false)
        @dbf.setFeature("http://xml.org/sax/features/external-parameter-entities", false)
        @dbf.setFeature(javax.xml.XMLConstants::FEATURE_SECURE_PROCESSING, true)
        xml_string_reader = StringReader.new(data)
        xml_input_source = InputSource.new(xml_string_reader)
        doc = @dbf.new_document_builder.parse(xml_input_source)
        merge_element!({CONTENT_KEY => ''}, doc.document_element, XmlMini.depth)
      end
    end

    private

    # Convert an XML element and merge into the hash
    #
    # hash::
    #   Hash to merge the converted element into.
    # element::
    #   XML element to merge into hash
    def merge_element!(hash, element, depth)
      raise 'Document too deep!' if depth == 0
      delete_empty(hash)
      merge!(hash, element.tag_name, collapse(element, depth))
    end

    def delete_empty(hash)
      hash.delete(CONTENT_KEY) if hash[CONTENT_KEY] == ''
    end

    # Actually converts an XML document element into a data structure.
    #
    # element::
    #   The document element to be collapsed.
    def collapse(element, depth)
      hash = get_attributes(element)

      child_nodes = element.child_nodes
      if child_nodes.length > 0
        (0...child_nodes.length).each do |i|
          child = child_nodes.item(i)
          merge_element!(hash, child, depth - 1) unless child.node_type == Node.TEXT_NODE
        end
        merge_texts!(hash, element) unless empty_content?(element)
        hash
      else
        merge_texts!(hash, element)
      end
    end

    # Merge all the texts of an element into the hash
    #
    # hash::
    #   Hash to add the converted element to.
    # element::
    #   XML element whose texts are to me merged into the hash
    def merge_texts!(hash, element)
      delete_empty(hash)
      text_children = texts(element)
      if text_children.join.empty?
        hash
      else
        # must use value to prevent double-escaping
        merge!(hash, CONTENT_KEY, text_children.join)
      end
    end

    # Adds a new key/value pair to an existing Hash. If the key to be added
    # already exists and the existing value associated with key is not
    # an Array, it will be wrapped in an Array. Then the new value is
    # appended to that Array.
    #
    # hash::
    #   Hash to add key/value pair to.
    # key::
    #   Key to be added.
    # value::
    #   Value to be associated with key.
    def merge!(hash, key, value)
      if hash.has_key?(key)
        if hash[key].instance_of?(Array)
          hash[key] << value
        else
          hash[key] = [hash[key], value]
        end
      elsif value.instance_of?(Array)
        hash[key] = [value]
      else
        hash[key] = value
      end
      hash
    end

    # Converts the attributes array of an XML element into a hash.
    # Returns an empty Hash if node has no attributes.
    #
    # element::
    #   XML element to extract attributes from.
    def get_attributes(element)
      attribute_hash = {}
      attributes = element.attributes
      (0...attributes.length).each do |i|
         attribute_hash[CONTENT_KEY] ||= ''
         attribute_hash[attributes.item(i).name] =  attributes.item(i).value
      end
      attribute_hash
    end

    # Determines if a document element has text content
    #
    # element::
    #   XML element to be checked.
    def texts(element)
      texts = []
      child_nodes = element.child_nodes
      (0...child_nodes.length).each do |i|
        item = child_nodes.item(i)
        if item.node_type == Node.TEXT_NODE
          texts << item.get_data
        end
      end
      texts
    end

    # Determines if a document element has text content
    #
    # element::
    #   XML element to be checked.
    def empty_content?(element)
      text = ''
      child_nodes = element.child_nodes
      (0...child_nodes.length).each do |i|
        item = child_nodes.item(i)
        if item.node_type == Node.TEXT_NODE
          text << item.get_data.strip
        end
      end
      text.strip.length == 0
    end
  end
end