sferik/multi_xml

View on GitHub
lib/multi_xml/parsers/ox.rb

Summary

Maintainability
A
0 mins
Test Coverage
require "ox" unless defined?(Ox)

# Each MultiXml parser is expected to parse an XML document into a Hash. The
# conversion rules are:
#
# - Each document starts out as an empty Hash.
#
# - Reading an element created an entry in the parent Hash that has a key of
#   the element name and a value of a Hash with attributes as key value
#   pairs. Children are added as described by this rule.
#
# - Text and CDATE is stored in the parent element Hash with a key of
#   MultiXml::CONTENT_ROOT and a value of the text itself.
#
# - If a key already exists in the Hash then the value associated with the key
#   is converted to an Array with the old and new value in it.
#
# - Other elements such as the xml prolog, doctype, and comments are ignored.
#

module MultiXml
  module Parsers
    module Ox # :nodoc:
      module_function

      def parse_error
        Exception
      end

      def parse(io)
        handler = Handler.new
        ::Ox.sax_parse(handler, io, convert_special: true, skip: :skip_return)
        handler.doc
      end

      class Handler
        attr_accessor :stack

        def initialize
          @stack = []
        end

        def doc
          @stack[0]
        end

        def attr(name, value)
          append(name, value) unless @stack.empty?
        end

        def text(value)
          append(MultiXml::CONTENT_ROOT, value)
        end

        def cdata(value)
          append(MultiXml::CONTENT_ROOT, value)
        end

        def start_element(name)
          @stack.push({}) if @stack.empty?
          h = {}
          append(name, h)
          @stack.push(h)
        end

        def end_element(_)
          @stack.pop
        end

        def error(message, line, column)
          raise(StandardError, "#{message} at #{line}:#{column}")
        end

        def append(key, value)
          key = key.to_s
          h = @stack.last
          if h.key?(key)
            v = h[key]
            if v.is_a?(Array)
              v << value
            else
              h[key] = [v, value]
            end
          else
            h[key] = value
          end
        end
      end
    end
  end
end