sparklemotion/nokogiri

View on GitHub
ext/java/nokogiri/internals/c14n/XMLUtils.java

Summary

Maintainability
F
3 days
Test Coverage
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package nokogiri.internals.c14n;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;


import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.ProcessingInstruction;
import org.w3c.dom.Text;

/**
 * DOM and XML accessibility and comfort functions.
 *
 * @author Christian Geuer-Pollmann
 */
public class XMLUtils
{

  /**
   * Constructor XMLUtils
   *
   */
  private
  XMLUtils()
  {
    // we don't allow instantiation
  }

  /**
   * Method getFullTextChildrenFromElement
   *
   * @param element
   * @return the string of children
   */
  public static String
  getFullTextChildrenFromElement(Element element)
  {
    StringBuilder sb = new StringBuilder();

    Node child = element.getFirstChild();
    while (child != null) {
      if (child.getNodeType() == Node.TEXT_NODE) {
        sb.append(((Text)child).getData());
      }
      child = child.getNextSibling();
    }

    return sb.toString();
  }

  /**
   * This method returns the owner document of a particular node.
   * This method is necessary because it <I>always</I> returns a
   * {@link Document}. {@link Node#getOwnerDocument} returns <CODE>null</CODE>
   * if the {@link Node} is a {@link Document}.
   *
   * @param node
   * @return the owner document of the node
   */
  public static Document
  getOwnerDocument(Node node)
  {
    if (node.getNodeType() == Node.DOCUMENT_NODE) {
      return (Document) node;
    }
    try {
      return node.getOwnerDocument();
    } catch (NullPointerException npe) {
      throw new NullPointerException(npe.getMessage());
    }
  }

  /**
   * This method returns the first non-null owner document of the Nodes in this Set.
   * This method is necessary because it <I>always</I> returns a
   * {@link Document}. {@link Node#getOwnerDocument} returns <CODE>null</CODE>
   * if the {@link Node} is a {@link Document}.
   *
   * @param xpathNodeSet
   * @return the owner document
   */
  public static Document
  getOwnerDocument(Set<Node> xpathNodeSet)
  {
    NullPointerException npe = null;
    for (Node node : xpathNodeSet) {
      int nodeType = node.getNodeType();
      if (nodeType == Node.DOCUMENT_NODE) {
        return (Document) node;
      }
      try {
        if (nodeType == Node.ATTRIBUTE_NODE) {
          return ((Attr)node).getOwnerElement().getOwnerDocument();
        }
        return node.getOwnerDocument();
      } catch (NullPointerException e) {
        npe = e;
      }
    }

    throw new NullPointerException(npe.getMessage());
  }

  /**
   * Method convertNodelistToSet
   *
   * @param xpathNodeSet
   * @return the set with the nodelist
   */
  public static Set<Node>
  convertNodelistToSet(NodeList xpathNodeSet)
  {
    if (xpathNodeSet == null) {
      return new HashSet<Node>();
    }

    int length = xpathNodeSet.getLength();
    Set<Node> set = new HashSet<Node>(length);

    for (int i = 0; i < length; i++) {
      set.add(xpathNodeSet.item(i));
    }

    return set;
  }

  /**
   * This method spreads all namespace attributes in a DOM document to their
   * children. This is needed because the XML Signature XPath transform
   * must evaluate the XPath against all nodes in the input, even against
   * XPath namespace nodes. Through a bug in XalanJ2, the namespace nodes are
   * not fully visible in the Xalan XPath model, so we have to do this by
   * hand in DOM spaces so that the nodes become visible in XPath space.
   *
   * @param doc
   * @see <A HREF="http://nagoya.apache.org/bugzilla/show_bug.cgi?id=2650">
   * Namespace axis resolution is not XPath compliant </A>
   */
  public static void
  circumventBug2650(Document doc)
  {

    Element documentElement = doc.getDocumentElement();

    // if the document element has no xmlns definition, we add xmlns=""
    Attr xmlnsAttr =
      documentElement.getAttributeNodeNS(Constants.NamespaceSpecNS, "xmlns");

    if (xmlnsAttr == null) {
      documentElement.setAttributeNS(Constants.NamespaceSpecNS, "xmlns", "");
    }

    XMLUtils.circumventBug2650internal(doc);
  }

  /**
   * This is the work horse for {@link #circumventBug2650}.
   *
   * @param node
   * @see <A HREF="http://nagoya.apache.org/bugzilla/show_bug.cgi?id=2650">
   * Namespace axis resolution is not XPath compliant </A>
   */
  @SuppressWarnings("fallthrough")
  private static void
  circumventBug2650internal(Node node)
  {
    Node parent = null;
    Node sibling = null;
    final String namespaceNs = Constants.NamespaceSpecNS;
    do {
      switch (node.getNodeType()) {
        case Node.ELEMENT_NODE :
          Element element = (Element) node;
          if (!element.hasChildNodes()) {
            break;
          }
          if (element.hasAttributes()) {
            NamedNodeMap attributes = element.getAttributes();
            int attributesLength = attributes.getLength();

            for (Node child = element.getFirstChild(); child != null;
                 child = child.getNextSibling()) {

              if (child.getNodeType() != Node.ELEMENT_NODE) {
                continue;
              }
              Element childElement = (Element) child;

              for (int i = 0; i < attributesLength; i++) {
                Attr currentAttr = (Attr) attributes.item(i);
                if (!namespaceNs.equals(currentAttr.getNamespaceURI())) {
                  continue;
                }
                if (childElement.hasAttributeNS(namespaceNs,
                                                currentAttr.getLocalName())) {
                  continue;
                }
                childElement.setAttributeNS(namespaceNs,
                                            currentAttr.getName(),
                                            currentAttr.getNodeValue());
              }
            }
          }
        case Node.ENTITY_REFERENCE_NODE :
        case Node.DOCUMENT_NODE :
          parent = node;
          sibling = node.getFirstChild();
          break;
      }
      while ((sibling == null) && (parent != null)) {
        sibling = parent.getNextSibling();
        parent = parent.getParentNode();
      }
      if (sibling == null) {
        return;
      }

      node = sibling;
      sibling = node.getNextSibling();
    } while (true);
  }

  /**
   * @param sibling
   * @param uri
   * @param nodeName
   * @param number
   * @return nodes with the constrain
   */
  public static Text
  selectNodeText(Node sibling, String uri, String nodeName, int number)
  {
    Node n = selectNode(sibling, uri, nodeName, number);
    if (n == null) {
      return null;
    }
    n = n.getFirstChild();
    while (n != null && n.getNodeType() != Node.TEXT_NODE) {
      n = n.getNextSibling();
    }
    return (Text)n;
  }

  /**
   * @param sibling
   * @param uri
   * @param nodeName
   * @param number
   * @return nodes with the constrain
   */
  public static Element
  selectNode(Node sibling, String uri, String nodeName, int number)
  {
    while (sibling != null) {
      if (sibling.getNamespaceURI() != null && sibling.getNamespaceURI().equals(uri)
          && sibling.getLocalName().equals(nodeName)) {
        if (number == 0) {
          return (Element)sibling;
        }
        number--;
      }
      sibling = sibling.getNextSibling();
    }
    return null;
  }

  /**
   * @param sibling
   * @param uri
   * @param nodeName
   * @return nodes with the constraint
   */
  public static Element[]
  selectNodes(Node sibling, String uri, String nodeName)
  {
    List<Element> list = new ArrayList<Element>();
    while (sibling != null) {
      if (sibling.getNamespaceURI() != null && sibling.getNamespaceURI().equals(uri)
          && sibling.getLocalName().equals(nodeName)) {
        list.add((Element)sibling);
      }
      sibling = sibling.getNextSibling();
    }
    return list.toArray(new Element[list.size()]);
  }

  /**
   * @param signatureElement
   * @param inputSet
   * @return nodes with the constrain
   */
  public static Set<Node>
  excludeNodeFromSet(Node signatureElement, Set<Node> inputSet)
  {
    Set<Node> resultSet = new HashSet<Node>();
    Iterator<Node> iterator = inputSet.iterator();

    while (iterator.hasNext()) {
      Node inputNode = iterator.next();

      if (!XMLUtils.isDescendantOrSelf(signatureElement, inputNode)) {
        resultSet.add(inputNode);
      }
    }
    return resultSet;
  }

  /**
   * Method getStrFromNode
   *
   * @param xpathnode
   * @return the string for the node.
   */
  public static String
  getStrFromNode(Node xpathnode)
  {
    if (xpathnode.getNodeType() == Node.TEXT_NODE) {
      // we iterate over all siblings of the context node because eventually,
      // the text is "polluted" with pi's or comments
      StringBuilder sb = new StringBuilder();

      for (Node currentSibling = xpathnode.getParentNode().getFirstChild();
           currentSibling != null;
           currentSibling = currentSibling.getNextSibling()) {
        if (currentSibling.getNodeType() == Node.TEXT_NODE) {
          sb.append(((Text) currentSibling).getData());
        }
      }

      return sb.toString();
    } else if (xpathnode.getNodeType() == Node.ATTRIBUTE_NODE) {
      return ((Attr) xpathnode).getNodeValue();
    } else if (xpathnode.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) {
      return ((ProcessingInstruction) xpathnode).getNodeValue();
    }

    return null;
  }

  /**
   * Returns true if the descendantOrSelf is on the descendant-or-self axis
   * of the context node.
   *
   * @param ctx
   * @param descendantOrSelf
   * @return true if the node is descendant
   */
  public static boolean
  isDescendantOrSelf(Node ctx, Node descendantOrSelf)
  {
    if (ctx == descendantOrSelf) {
      return true;
    }

    Node parent = descendantOrSelf;

    while (true) {
      if (parent == null) {
        return false;
      }

      if (parent == ctx) {
        return true;
      }

      if (parent.getNodeType() == Node.ATTRIBUTE_NODE) {
        parent = ((Attr) parent).getOwnerElement();
      } else {
        parent = parent.getParentNode();
      }
    }
  }

  /**
   * Returns the attribute value for the attribute with the specified name.
   * Returns null if there is no such attribute, or
   * the empty string if the attribute value is empty.
   *
   * <p>This works around a limitation of the DOM
   * <code>Element.getAttributeNode</code> method, which does not distinguish
   * between an unspecified attribute and an attribute with a value of
   * "" (it returns "" for both cases).
   *
   * @param elem the element containing the attribute
   * @param name the name of the attribute
   * @return the attribute value (may be null if unspecified)
   */
  public static String
  getAttributeValue(Element elem, String name)
  {
    Attr attr = elem.getAttributeNodeNS(null, name);
    return (attr == null) ? null : attr.getValue();
  }

  /**
   * This method is a tree-search to help prevent against wrapping attacks. It checks that no
   * two Elements have ID Attributes that match the "value" argument, if this is the case then
   * "false" is returned. Note that a return value of "true" does not necessarily mean that
   * a matching Element has been found, just that no wrapping attack has been detected.
   */
  public static boolean
  protectAgainstWrappingAttack(Node startNode, String value)
  {
    Node startParent = startNode.getParentNode();
    Node processedNode;
    Element foundElement = null;

    String id = value.trim();
    if (id.charAt(0) == '#') {
      id = id.substring(1);
    }

    while (startNode != null) {
      if (startNode.getNodeType() == Node.ELEMENT_NODE) {
        Element se = (Element) startNode;

        NamedNodeMap attributes = se.getAttributes();
        if (attributes != null) {
          for (int i = 0; i < attributes.getLength(); i++) {
            Attr attr = (Attr)attributes.item(i);
            if (attr.isId() && id.equals(attr.getValue())) {
              if (foundElement == null) {
                // Continue searching to find duplicates
                foundElement = attr.getOwnerElement();
              } else {
                //log.debug("Multiple elements with the same 'Id' attribute value!");
                return false;
              }
            }
          }
        }
      }

      processedNode = startNode;
      startNode = startNode.getFirstChild();

      // no child, this node is done.
      if (startNode == null) {
        // close node processing, get sibling
        startNode = processedNode.getNextSibling();
      }

      // no more siblings, get parent, all children
      // of parent are processed.
      while (startNode == null) {
        processedNode = processedNode.getParentNode();
        if (processedNode == startParent) {
          return true;
        }
        // close parent node processing (processed node now)
        startNode = processedNode.getNextSibling();
      }
    }
    return true;
  }

  /**
   * This method is a tree-search to help prevent against wrapping attacks. It checks that no other
   * Element than the given "knownElement" argument has an ID attribute that matches the "value"
   * argument, which is the ID value of "knownElement". If this is the case then "false" is returned.
   */
  public static boolean
  protectAgainstWrappingAttack(
    Node startNode, Element knownElement, String value
  )
  {
    Node startParent = startNode.getParentNode();
    Node processedNode;

    String id = value.trim();
    if (id.charAt(0) == '#') {
      id = id.substring(1);
    }

    while (startNode != null) {
      if (startNode.getNodeType() == Node.ELEMENT_NODE) {
        Element se = (Element) startNode;

        NamedNodeMap attributes = se.getAttributes();
        if (attributes != null) {
          for (int i = 0; i < attributes.getLength(); i++) {
            Attr attr = (Attr)attributes.item(i);
            if (attr.isId() && id.equals(attr.getValue()) && se != knownElement) {
              //log.debug("Multiple elements with the same 'Id' attribute value!");
              return false;
            }
          }
        }
      }

      processedNode = startNode;
      startNode = startNode.getFirstChild();

      // no child, this node is done.
      if (startNode == null) {
        // close node processing, get sibling
        startNode = processedNode.getNextSibling();
      }

      // no more siblings, get parent, all children
      // of parent are processed.
      while (startNode == null) {
        processedNode = processedNode.getParentNode();
        if (processedNode == startParent) {
          return true;
        }
        // close parent node processing (processed node now)
        startNode = processedNode.getNextSibling();
      }
    }
    return true;
  }

}