integrallis/excemel

View on GitHub
lib/excemel/excemel.rb

Summary

Maintainability
B
6 hrs
Test Coverage
#--
# Copyright &169;2001-2008 Integrallis Software, LLC. 
# All Rights Reserved.
# 
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#++

require 'rubygems' 
require 'blankslate' 
require 'java'
require 'module/lang'
require 'module/xom'

module Excemel
  
  class Document < BlankSlate
    
    # Create an Excemel Document
    # The method expects a hash that contain the possible elements
    # :root => value : The name of the root or top-most element of the document
    # :xml => value : A string containing an XML document 
    # :url => value : A URL pointing to an XML document
    # :file => filename : A filename pointing to an XML file
    # :validate => true|false : whether to validate the document being read from
    #              an XML string, URL or file
    # :resolve_includes => true|false replaces xi:include elements by the content they refer to
    # :namespace => provides a namespace prefix to the elements
    def initialize(options)
      resolve_includes = _extract_options(options, :resolve_includes, true)
      
      _build_document(options)
            
      @doc = XOM::XIncluder.resolve(@doc) if resolve_includes  
      @target = @root
    end
    
    # Dynamic Proxy behavior
    # Add XML elements (tags) based on the name of the method called on the 
    # Document instance, blocks passed get processed recursively
    def method_missing(sym, *args, &block)
      unless %w(class to_s inspect).include?(sym.to_s)                                      
        sym = _extract_sym(args, sym)
        attrs, namespace, prefix, text = _process_args(args, sym)
        tag = _build_tag(sym, namespace)   
        _add_attributes(tag, attrs)
        
        if block
          _nested_structures(block, tag)
        elsif !text.nil?              
          tag.append_child text
        end
        
        @target.append_child tag
      end
      
      self
    end
    
    # Returns the XML document as a single line, e.g. no formatting
    def to_xml
      @doc.to_xml
    end 
 
    # Appends a comment node at the current position in the document
    def comment!(string)
      comment = XOM::Comment.new string
      @target.append_child(comment)
      self
    end
    
    # Appends a text node at the current position in the document
    def text!(string)
      text = XOM::Text.new string
      @target.append_child(text)
      self
    end
    
    # Appends a processing instruction at the current position in the document
    def processing_instruction!(target, data)
      pi = XOM::ProcessingInstruction.new(target, data)
      @target.append_child(pi)
      self
    end
    
    # Attempts to reposition the document pointer based on the first match of 
    # and XQuery expression. Return true is the pointer was successfully moved
    # and false otherwise
    def target!(xpath_query)
      nodes = @doc.query xpath_query
      (0..nodes.size-1).each do |i| 
        node = nodes.get(i)
        if node.class == XOM::Element
          @target = node
          return true
        end        
      end
      return false
    end
    
    # Create a tag named +sym+.  Other than the first argument which
    # is the tag name, the arguments are the same as the tags
    # implemented via <tt>method_missing</tt>.
    # XOM will throw an exception if you pass something with a prefix but no
    # associated namespace
    def tag!(sym, *args, &block)
      method_missing(sym.to_sym, *args, &block)
      self
    end
    
    # Returns a pretty-formatted document with the given indent, max line length
    # and encoding. The preserve_base_uri determines whether preserves the 
    # original base URIs by inserting extra xml:base attributes.
    def to_pretty_xml(indent=2, line_length=0, encoding='utf-8', preserve_base_uri=true)
      baos = Lang::ByteArrayOutputStream.new      
      serializer = XOM::Serializer.new(baos, encoding)
      serializer.indent = indent 
      serializer.max_length = line_length
      serializer.write @doc 
      baos.to_string
    end
    
    # Appends an XML document type declaration at the current position in the 
    # document
    def doc_type!(root_element_name, public_id='', system_id='')
      doctype = XOM::DocType.new(root_element_name, public_id, system_id)
      @doc.insert_child(doctype, 0)
      self
    end
    
    # Returns the values of the nodes selected by the XPath expression in the 
    # context of this node in document order as defined by XSLT. This XPath 
    # expression must not contain any namespace prefixes.
    def query(xpath_query)
      nodes = @doc.query xpath_query      
      result = Array.new
      (0..nodes.size-1).each do |i| result << nodes.get(i).get_value end
      result
    end
    
    # See Jim Weirich's comment on builder (plus all the test frameworks seem
    # to call nil? on objects being tested
    def nil?
      false
    end
    
    # Returns the value of the document as defined by XPath 1.0. This is the 
    # same as the value of the root element, which is the complete PCDATA 
    # content of the root element, without any tags, comments, or processing 
    # instructions after all entity and character references have been resolved.
    def extract_text
      @doc.get_value
    end
    
    # Returns XML in the format specified by the Canonica XML Version 1.0 
    # (w3.org/TR/2001/REC-xml-c14n-20010315 or Exclusive XML) 
    # Canonicalization Version 1.0 (w3.org/TR/2002/REC-xml-exc-c14n-20020718/)
    def to_canonical_form
      baos = Lang::ByteArrayOutputStream.new
      outputter = XOM::Canonicalizer.new baos
      outputter.write(@doc)
      baos.to_string
    end
    
    # Adds attributes to the current target element (pointer to an element in the document 
    # by default the root of the document)
    def attributes(attrs)
      _add_attributes(@target, attrs)
    end
    
    private
    
    # Adds attributes to the given Element (tag) as define by the parameter 
    # target, passing an attribute with a value of nil, removes the attribute if
    # it exists
    def _add_attributes(target, attrs)
      if attrs    
        attrs.each do |key, value|
          unless value.nil?
            attribute = XOM::Attribute.new(key.to_s, value.to_s)
            target.add_attribute(attribute)
          else
            attribute = target.get_attribute(key.to_s)
            target.remove_attribute(attribute) if attribute
          end
        end
      end
    end  

    # Recursively processed blocks passed to document in the context of a method
    # call
    def _nested_structures(block, new_target)
      old_target = @target
      @target = new_target
      block.call(self)
    ensure
      @target = old_target
    end    
    
    def _build_tag(sym, namespace = nil)
      if namespace && !namespace.empty?
        tag = XOM::Element.new sym.to_s, namespace
      else
        tag = XOM::Element.new sym.to_s
      end
      tag
    end
    
    def _get_namespace(prefix)
      @namespaces ? "#{@namespaces[prefix]}" : nil
    end
    
    def _extract_sym(args, sym)
      args.first.kind_of?(Symbol) ? "#{sym}:#{args.shift}" : sym
    end
    
    def _extract_prefix(sym)
      sym.to_s.include?(':') ? sym.to_s.split(":").first : nil 
    end
    
    def _process_args(args, sym)
      attrs, namespace, text = nil
      prefix = _extract_prefix(sym)
      args.each do |arg|
        case arg
        when Hash
          if arg.has_key? :namespace
            namespace = arg[:namespace]
            prefix = sym.to_s.split(":").first
            (@namespaces ||= {})[prefix] = namespace if prefix
          else
            attrs ||= {}
            attrs.merge!(arg)
          end
        else
          text ||= ''
          text << arg.to_s
        end
      end
      namespace = _get_namespace(prefix) if prefix
      [attrs, namespace, prefix, text]
    end
    
    def _extract_options(options, sym, boolean = false)
      boolean ? (options[sym] ? options[sym] : false) : options[sym]
    end
    
    def _generate_doc_from_source(options, builder)
      doc = nil
      xml = _extract_options(options, :xml)
      url = _extract_options(options, :url)
      file = _extract_options(options, :file)
      
      if xml
        doc = builder.build(xml, nil)
      elsif url
        doc = builder.build url
      elsif file
        java_file = Java::JavaIo::File.new file        
        doc = builder.build java_file       
      end
      
      doc
    end
    
    def _build_document(options)
      root = _extract_options(options, :root)
      namespace = _extract_options(options, :namespace)
      validate = _extract_options(options, :validate, true)
      
      if root
        _build_document_from_root(root, namespace)
      else
        @doc = _generate_doc_from_source(options, XOM::Builder.new(validate))
        @root = @doc.get_root_element
      end
    end
    
    def _build_document_from_root(root, namespace)
      @root = _build_tag(root, namespace) 
      if namespace
        prefix = root.to_s.split(":").first if root.include? ":"
        (@namespaces ||= {})[prefix] = namespace if prefix
      end
      @doc = XOM::Document.new @root
    end
    
  end 
  
end