ext/java/nokogiri/internals/c14n/CanonicalizerBase.java from sparklemotion/nokogiri

ext/java/nokogiri/internals/c14n/CanonicalizerBase.java
Summary

Maintainability

4 days
Test Coverage

Issues
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package nokogiri.internals.c14n;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.Set;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;


import org.w3c.dom.Attr;
import org.w3c.dom.Comment;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.ProcessingInstruction;

/**
 * Abstract base class for canonicalization algorithms.
 *
 * @author Christian Geuer-Pollmann <geuerp@apache.org>
 */
public abstract class CanonicalizerBase extends CanonicalizerSpi
{
  public static final String XML = "xml";
  public static final String XMLNS = "xmlns";

  protected static final AttrCompare COMPARE = new AttrCompare();
  protected static final Attr nullNode;

  private static final byte[] END_PI = {'?', '>'};
  private static final byte[] BEGIN_PI = {'<', '?'};
  private static final byte[] END_COMM = {'-', '-', '>'};
  private static final byte[] BEGIN_COMM = {'<', '!', '-', '-'};
  private static final byte[] XA = {'&', '#', 'x', 'A', ';'};
  private static final byte[] X9 = {'&', '#', 'x', '9', ';'};
  private static final byte[] QUOT = {'&', 'q', 'u', 'o', 't', ';'};
  private static final byte[] XD = {'&', '#', 'x', 'D', ';'};
  private static final byte[] GT = {'&', 'g', 't', ';'};
  private static final byte[] LT = {'&', 'l', 't', ';'};
  private static final byte[] END_TAG = {'<', '/'};
  private static final byte[] AMP = {'&', 'a', 'm', 'p', ';'};
  private static final byte[] equalsStr = {'=', '\"'};

  protected static final int NODE_BEFORE_DOCUMENT_ELEMENT = -1;
  protected static final int NODE_NOT_BEFORE_OR_AFTER_DOCUMENT_ELEMENT = 0;
  protected static final int NODE_AFTER_DOCUMENT_ELEMENT = 1;

  static
  {
    // The null xmlns definition.
    try {
      DocumentBuilder documentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
      nullNode = documentBuilder.newDocument().createAttributeNS(Constants.NamespaceSpecNS, XMLNS);
      nullNode.setValue("");
    } catch (Exception e) {
      throw new RuntimeException("Unable to create nullNode: " + e);
    }
  }

  private List<NodeFilter> nodeFilter;

  private final boolean includeComments;
  //private Set<Node> xpathNodeSet;
  /**
   * The node to be skipped/excluded from the DOM tree
   * in subtree canonicalization.
   */
  private Node excludeNode;
  private OutputStream writer = new ByteArrayOutputStream();

  /**
   * Constructor CanonicalizerBase
   *
   * @param includeComments
   */
  public
  CanonicalizerBase(boolean includeComments)
  {
    this.includeComments = includeComments;
  }

  /**
   * Method engineCanonicalizeSubTree
   * @inheritDoc
   * @param rootNode
   * @throws CanonicalizationException
   */
  @Override
  public byte[]
  engineCanonicalizeSubTree(Node rootNode, CanonicalFilter filter)
  throws CanonicalizationException
  {
    return engineCanonicalizeSubTree(rootNode, (Node)null, filter);
  }

  /**
   * @param writer The writer to set.
   */
  @Override
  public void
  setWriter(OutputStream writer)
  {
    this.writer = writer;
  }

  /**
   * Canonicalizes a Subtree node.
   *
   * @param rootNode
   *            the root of the subtree to canonicalize
   * @param excludeNode
   *            a node to be excluded from the canonicalize operation
   * @return The canonicalize stream.
   * @throws CanonicalizationException
   */
  protected byte[]
  engineCanonicalizeSubTree(Node rootNode, Node excludeNode, CanonicalFilter filter)
  throws CanonicalizationException
  {
    this.excludeNode = excludeNode;
    try {
      NameSpaceSymbTable ns = new NameSpaceSymbTable();
      int nodeLevel = NODE_BEFORE_DOCUMENT_ELEMENT;
      if (rootNode != null && Node.ELEMENT_NODE == rootNode.getNodeType()) {
        //Fills the nssymbtable with the definitions of the parent of the root subnode
        getParentNameSpaces((Element)rootNode, ns);
        nodeLevel = NODE_NOT_BEFORE_OR_AFTER_DOCUMENT_ELEMENT;
      }
      this.canonicalizeSubTree(rootNode, ns, rootNode, nodeLevel, filter);
      this.writer.flush();
      if (this.writer instanceof ByteArrayOutputStream) {
        byte[] result = ((ByteArrayOutputStream)this.writer).toByteArray();
        if (reset) {
          ((ByteArrayOutputStream)this.writer).reset();
        } else {
          this.writer.close();
        }
        return result;
      } else {
        this.writer.close();
      }
      return null;

    } catch (UnsupportedEncodingException ex) {
      throw new CanonicalizationException("empty", ex);
    } catch (IOException ex) {
      throw new CanonicalizationException("empty", ex);
    }
  }


  /**
   * Method canonicalizeSubTree, this function is a recursive one.
   *
   * @param currentNode
   * @param ns
   * @param endnode
   * @throws CanonicalizationException
   * @throws IOException
   */
  protected final void
  canonicalizeSubTree(
    Node currentNode, NameSpaceSymbTable ns, Node endnode, int documentLevel, CanonicalFilter filter
  ) throws CanonicalizationException, IOException
  {
    if (isVisibleInt(currentNode) == -1) {
      return;
    }
    Node sibling = null;
    Node parentNode = null;
    final OutputStream writer = this.writer;
    final Node excludeNode = this.excludeNode;
    final boolean includeComments = this.includeComments;
    Map<String, byte[]> cache = new HashMap<String, byte[]>();
    do {
      switch (currentNode.getNodeType()) {

        case Node.ENTITY_NODE :
        case Node.NOTATION_NODE :
        case Node.ATTRIBUTE_NODE :
          // illegal node type during traversal
          throw new CanonicalizationException("empty");

        case Node.DOCUMENT_FRAGMENT_NODE :
        case Node.DOCUMENT_NODE :
          ns.outputNodePush();
          sibling = currentNode.getFirstChild();
          break;

        case Node.COMMENT_NODE :
          if (includeComments) {
            outputCommentToWriter((Comment) currentNode, writer, documentLevel);
          }
          break;

        case Node.PROCESSING_INSTRUCTION_NODE :
          outputPItoWriter((ProcessingInstruction) currentNode, writer, documentLevel);
          break;

        case Node.TEXT_NODE :
        case Node.CDATA_SECTION_NODE :
          outputTextToWriter(currentNode.getNodeValue(), writer);
          break;

        case Node.ELEMENT_NODE :
          documentLevel = NODE_NOT_BEFORE_OR_AFTER_DOCUMENT_ELEMENT;
          if (currentNode == excludeNode) {
            break;
          }
          if (filter != null && !filter.includeNodes(currentNode, parentNode)) {
            break;
          }

          Element currentElement = (Element)currentNode;
          //Add a level to the nssymbtable. So latter can be pop-back.
          ns.outputNodePush();
          writer.write('<');
          String name = currentElement.getTagName();
          UtfHelpper.writeByte(name, writer, cache);

          Iterator<Attr> attrs = this.handleAttributesSubtree(currentElement, ns);
          if (attrs != null) {
            //we output all Attrs which are available
            while (attrs.hasNext()) {
              Attr attr = attrs.next();
              outputAttrToWriter(attr.getNodeName(), attr.getNodeValue(), writer, cache);
            }
          }
          writer.write('>');
          sibling = currentNode.getFirstChild();
          if (sibling == null) {
            writer.write(END_TAG);
            UtfHelpper.writeStringToUtf8(name, writer);
            writer.write('>');
            //We finished with this level, pop to the previous definitions.
            ns.outputNodePop();
            if (parentNode != null) {
              sibling = currentNode.getNextSibling();
            }
          } else {
            parentNode = currentElement;
          }
          break;
        case Node.DOCUMENT_TYPE_NODE :
        default :
          break;
      }
      while (sibling == null && parentNode != null) {
        writer.write(END_TAG);
        UtfHelpper.writeByte(((Element)parentNode).getTagName(), writer, cache);
        writer.write('>');
        //We finished with this level, pop to the previous definitions.
        ns.outputNodePop();
        if (parentNode == endnode) {
          return;
        }
        sibling = parentNode.getNextSibling();
        parentNode = parentNode.getParentNode();
        if (parentNode == null || Node.ELEMENT_NODE != parentNode.getNodeType()) {
          documentLevel = NODE_AFTER_DOCUMENT_ELEMENT;
          parentNode = null;
        }
      }
      if (sibling == null) {
        return;
      }
      currentNode = sibling;
      sibling = currentNode.getNextSibling();
    } while (true);
  }

  protected int
  isVisibleDO(Node currentNode, int level)
  {
    if (nodeFilter != null) {
      Iterator<NodeFilter> it = nodeFilter.iterator();
      while (it.hasNext()) {
        int i = (it.next()).isNodeIncludeDO(currentNode, level);
        if (i != 1) {
          return i;
        }
      }
    }
    //if ((this.xpathNodeSet != null) && !this.xpathNodeSet.contains(currentNode)) {
    //    return 0;
    //}
    return 1;
  }

  protected int
  isVisibleInt(Node currentNode)
  {
    if (nodeFilter != null) {
      Iterator<NodeFilter> it = nodeFilter.iterator();
      while (it.hasNext()) {
        int i = (it.next()).isNodeInclude(currentNode);
        if (i != 1) {
          return i;
        }
      }
    }
    //if ((this.xpathNodeSet != null) && !this.xpathNodeSet.contains(currentNode)) {
    //    return 0;
    //}
    return 1;
  }

  protected boolean
  isVisible(Node currentNode)
  {
    if (nodeFilter != null) {
      Iterator<NodeFilter> it = nodeFilter.iterator();
      while (it.hasNext()) {
        if (it.next().isNodeInclude(currentNode) != 1) {
          return false;
        }
      }
    }
    //if ((this.xpathNodeSet != null) && !this.xpathNodeSet.contains(currentNode)) {
    //    return false;
    //}
    return true;
  }

  protected void
  handleParent(Element e, NameSpaceSymbTable ns)
  {
    if (!e.hasAttributes() && e.getNamespaceURI() == null) {
      return;
    }
    NamedNodeMap attrs = e.getAttributes();
    int attrsLength = attrs.getLength();
    for (int i = 0; i < attrsLength; i++) {
      Attr attribute = (Attr) attrs.item(i);
      String NName = attribute.getLocalName();
      String NValue = attribute.getNodeValue();

      if (Constants.NamespaceSpecNS.equals(attribute.getNamespaceURI())
          && (!XML.equals(NName) || !Constants.XML_LANG_SPACE_SpecNS.equals(NValue))) {
        ns.addMapping(NName, NValue, attribute);
      }
    }
    if (e.getNamespaceURI() != null) {
      String NName = e.getPrefix();
      String NValue = e.getNamespaceURI();
      String Name;
      if (NName == null || NName.equals("")) {
        NName = XMLNS;
        Name = XMLNS;
      } else {
        Name = XMLNS + ":" + NName;
      }
      Attr n = e.getOwnerDocument().createAttributeNS("http://www.w3.org/2000/xmlns/", Name);
      n.setValue(NValue);
      ns.addMapping(NName, NValue, n);
    }
  }

  /**
   * Adds to ns the definitions from the parent elements of el
   * @param el
   * @param ns
   */
  protected final void
  getParentNameSpaces(Element el, NameSpaceSymbTable ns)
  {
    Node n1 = el.getParentNode();
    if (n1 == null || Node.ELEMENT_NODE != n1.getNodeType()) {
      return;
    }
    //Obtain all the parents of the element
    List<Element> parents = new ArrayList<Element>();
    Node parent = n1;
    while (parent != null && Node.ELEMENT_NODE == parent.getNodeType()) {
      parents.add((Element)parent);
      parent = parent.getParentNode();
    }
    //Visit them in reverse order.
    ListIterator<Element> it = parents.listIterator(parents.size());
    while (it.hasPrevious()) {
      Element ele = it.previous();
      handleParent(ele, ns);
    }
    parents.clear();
    Attr nsprefix;
    if (((nsprefix = ns.getMappingWithoutRendered(XMLNS)) != null)
        && "".equals(nsprefix.getValue())) {
      ns.addMappingAndRender(XMLNS, "", nullNode);
    }
  }

  /**
   * Obtain the attributes to output for this node in XPathNodeSet c14n.
   *
   * @param element
   * @param ns
   * @return the attributes nodes to output.
   * @throws CanonicalizationException
   */
  abstract Iterator<Attr> handleAttributes(Element element, NameSpaceSymbTable ns)
  throws CanonicalizationException;

  /**
   * Obtain the attributes to output for this node in a Subtree c14n.
   *
   * @param element
   * @param ns
   * @return the attributes nodes to output.
   * @throws CanonicalizationException
   */
  abstract Iterator<Attr> handleAttributesSubtree(Element element, NameSpaceSymbTable ns)
  throws CanonicalizationException;

  //abstract void circumventBugIfNeeded(XMLSignatureInput input)
  //    throws CanonicalizationException, ParserConfigurationException, IOException, SAXException;

  /**
   * Outputs an Attribute to the internal Writer.
   *
   * The string value of the node is modified by replacing
   * <UL>
   * <LI>all ampersands (&) with <CODE>&amp;amp;</CODE></LI>
   * <LI>all open angle brackets (<) with <CODE>&amp;lt;</CODE></LI>
   * <LI>all quotation mark characters with <CODE>&amp;quot;</CODE></LI>
   * <LI>and the whitespace characters <CODE>#x9</CODE>, #xA, and #xD, with character
   * references. The character references are written in uppercase
   * hexadecimal with no leading zeroes (for example, <CODE>#xD</CODE> is represented
   * by the character reference <CODE>&amp;#xD;</CODE>)</LI>
   * </UL>
   *
   * @param name
   * @param value
   * @param writer
   * @throws IOException
   */
  protected static final void
  outputAttrToWriter(
    final String name, final String value,
    final OutputStream writer, final Map<String, byte[]> cache
  ) throws IOException
  {
    writer.write(' ');
    UtfHelpper.writeByte(name, writer, cache);
    writer.write(equalsStr);
    byte[] toWrite;
    final int length = value.length();
    int i = 0;
    while (i < length) {
      char c = value.charAt(i++);

      switch (c) {

        case '&' :
          toWrite = AMP;
          break;

        case '<' :
          toWrite = LT;
          break;

        case '"' :
          toWrite = QUOT;
          break;

        case 0x09 :    // '\t'
          toWrite = X9;
          break;

        case 0x0A :    // '\n'
          toWrite = XA;
          break;

        case 0x0D :    // '\r'
          toWrite = XD;
          break;

        default :
          if (c < 0x80) {
            writer.write(c);
          } else {
            UtfHelpper.writeCharToUtf8(c, writer);
          }
          continue;
      }
      writer.write(toWrite);
    }

    writer.write('\"');
  }

  /**
   * Outputs a PI to the internal Writer.
   *
   * @param currentPI
   * @param writer where to write the things
   * @throws IOException
   */
  protected void
  outputPItoWriter(
    ProcessingInstruction currentPI, OutputStream writer, int position
  ) throws IOException
  {
    if (position == NODE_AFTER_DOCUMENT_ELEMENT) {
      writer.write('\n');
    }
    writer.write(BEGIN_PI);

    final String target = currentPI.getTarget();
    int length = target.length();

    for (int i = 0; i < length; i++) {
      char c = target.charAt(i);
      if (c == 0x0D) {
        writer.write(XD);
      } else {
        if (c < 0x80) {
          writer.write(c);
        } else {
          UtfHelpper.writeCharToUtf8(c, writer);
        }
      }
    }

    final String data = currentPI.getData();

    length = data.length();

    if (length > 0) {
      writer.write(' ');

      for (int i = 0; i < length; i++) {
        char c = data.charAt(i);
        if (c == 0x0D) {
          writer.write(XD);
        } else {
          UtfHelpper.writeCharToUtf8(c, writer);
        }
      }
    }

    writer.write(END_PI);
    if (position == NODE_BEFORE_DOCUMENT_ELEMENT) {
      writer.write('\n');
    }
  }

  /**
   * Method outputCommentToWriter
   *
   * @param currentComment
   * @param writer writer where to write the things
   * @throws IOException
   */
  protected void
  outputCommentToWriter(
    Comment currentComment, OutputStream writer, int position
  ) throws IOException
  {
    if (position == NODE_AFTER_DOCUMENT_ELEMENT) {
      writer.write('\n');
    }
    writer.write(BEGIN_COMM);

    final String data = currentComment.getData();
    final int length = data.length();

    for (int i = 0; i < length; i++) {
      char c = data.charAt(i);
      if (c == 0x0D) {
        writer.write(XD);
      } else {
        if (c < 0x80) {
          writer.write(c);
        } else {
          UtfHelpper.writeCharToUtf8(c, writer);
        }
      }
    }

    writer.write(END_COMM);
    if (position == NODE_BEFORE_DOCUMENT_ELEMENT) {
      writer.write('\n');
    }
  }

  /**
   * Outputs a Text of CDATA section to the internal Writer.
   *
   * @param text
   * @param writer writer where to write the things
   * @throws IOException
   */
  protected static final void
  outputTextToWriter(
    final String text, final OutputStream writer
  ) throws IOException
  {
    final int length = text.length();
    byte[] toWrite;
    for (int i = 0; i < length; i++) {
      char c = text.charAt(i);

      switch (c) {

        case '&' :
          toWrite = AMP;
          break;

        case '<' :
          toWrite = LT;
          break;

        case '>' :
          toWrite = GT;
          break;

        case 0xD :
          toWrite = XD;
          break;

        default :
          if (c < 0x80) {
            writer.write(c);
          } else {
            UtfHelpper.writeCharToUtf8(c, writer);
          }
          continue;
      }
      writer.write(toWrite);
    }
  }

}