sparklemotion/nokogiri

View on GitHub
ext/java/nokogiri/internals/c14n/XMLUtils.java

Summary

Maintainability
F
3 days
Test Coverage
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package nokogiri.internals.c14n;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;


import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.ProcessingInstruction;
import org.w3c.dom.Text;

/**
 * DOM and XML accessibility and comfort functions.
 *
 * @author Christian Geuer-Pollmann
 */
public class XMLUtils {

    /**
     * Constructor XMLUtils
     *
     */
    private XMLUtils() {
        // we don't allow instantiation
    }

    /**
     * Method getFullTextChildrenFromElement
     *
     * @param element
     * @return the string of children
     */
    public static String getFullTextChildrenFromElement(Element element) {
        StringBuilder sb = new StringBuilder();

        Node child = element.getFirstChild();
        while (child != null) {
            if (child.getNodeType() == Node.TEXT_NODE) {
                sb.append(((Text)child).getData());
            }
            child = child.getNextSibling();
        }

        return sb.toString();
    }

    /**
     * This method returns the owner document of a particular node.
     * This method is necessary because it <I>always</I> returns a
     * {@link Document}. {@link Node#getOwnerDocument} returns <CODE>null</CODE>
     * if the {@link Node} is a {@link Document}.
     *
     * @param node
     * @return the owner document of the node
     */
    public static Document getOwnerDocument(Node node) {
        if (node.getNodeType() == Node.DOCUMENT_NODE) {
            return (Document) node;
        }
        try {
            return node.getOwnerDocument();
        } catch (NullPointerException npe) {
            throw new NullPointerException(npe.getMessage());
        }
    }

    /**
     * This method returns the first non-null owner document of the Nodes in this Set.
     * This method is necessary because it <I>always</I> returns a
     * {@link Document}. {@link Node#getOwnerDocument} returns <CODE>null</CODE>
     * if the {@link Node} is a {@link Document}.
     *
     * @param xpathNodeSet
     * @return the owner document
     */
    public static Document getOwnerDocument(Set<Node> xpathNodeSet) {
        NullPointerException npe = null;
        for (Node node : xpathNodeSet) {
            int nodeType = node.getNodeType();
            if (nodeType == Node.DOCUMENT_NODE) {
                return (Document) node;
            }
            try {
                if (nodeType == Node.ATTRIBUTE_NODE) {
                    return ((Attr)node).getOwnerElement().getOwnerDocument();
                }
                return node.getOwnerDocument();
            } catch (NullPointerException e) {
                npe = e;
            }
        }

        throw new NullPointerException(npe.getMessage());
    }

    /**
     * Method convertNodelistToSet
     *
     * @param xpathNodeSet
     * @return the set with the nodelist
     */
    public static Set<Node> convertNodelistToSet(NodeList xpathNodeSet) {
        if (xpathNodeSet == null) {
            return new HashSet<Node>();
        }

        int length = xpathNodeSet.getLength();
        Set<Node> set = new HashSet<Node>(length);

        for (int i = 0; i < length; i++) {
            set.add(xpathNodeSet.item(i));
        }

        return set;
    }

    /**
     * This method spreads all namespace attributes in a DOM document to their
     * children. This is needed because the XML Signature XPath transform
     * must evaluate the XPath against all nodes in the input, even against
     * XPath namespace nodes. Through a bug in XalanJ2, the namespace nodes are
     * not fully visible in the Xalan XPath model, so we have to do this by
     * hand in DOM spaces so that the nodes become visible in XPath space.
     *
     * @param doc
     * @see <A HREF="http://nagoya.apache.org/bugzilla/show_bug.cgi?id=2650">
     * Namespace axis resolution is not XPath compliant </A>
     */
    public static void circumventBug2650(Document doc) {

        Element documentElement = doc.getDocumentElement();

        // if the document element has no xmlns definition, we add xmlns=""
        Attr xmlnsAttr =
            documentElement.getAttributeNodeNS(Constants.NamespaceSpecNS, "xmlns");

        if (xmlnsAttr == null) {
            documentElement.setAttributeNS(Constants.NamespaceSpecNS, "xmlns", "");
        }

        XMLUtils.circumventBug2650internal(doc);
    }

    /**
     * This is the work horse for {@link #circumventBug2650}.
     *
     * @param node
     * @see <A HREF="http://nagoya.apache.org/bugzilla/show_bug.cgi?id=2650">
     * Namespace axis resolution is not XPath compliant </A>
     */
    @SuppressWarnings("fallthrough")
    private static void circumventBug2650internal(Node node) {
        Node parent = null;
        Node sibling = null;
        final String namespaceNs = Constants.NamespaceSpecNS;
        do {
            switch (node.getNodeType()) {
            case Node.ELEMENT_NODE :
                Element element = (Element) node;
                if (!element.hasChildNodes()) {
                    break;
                }
                if (element.hasAttributes()) {
                    NamedNodeMap attributes = element.getAttributes();
                    int attributesLength = attributes.getLength();

                    for (Node child = element.getFirstChild(); child!=null;
                        child = child.getNextSibling()) {

                        if (child.getNodeType() != Node.ELEMENT_NODE) {
                            continue;
                        }
                        Element childElement = (Element) child;

                        for (int i = 0; i < attributesLength; i++) {
                            Attr currentAttr = (Attr) attributes.item(i);
                            if (!namespaceNs.equals(currentAttr.getNamespaceURI())) {
                                continue;
                            }
                            if (childElement.hasAttributeNS(namespaceNs,
                                                            currentAttr.getLocalName())) {
                                continue;
                            }
                            childElement.setAttributeNS(namespaceNs,
                                                        currentAttr.getName(),
                                                        currentAttr.getNodeValue());
                        }
                    }
                }
            case Node.ENTITY_REFERENCE_NODE :
            case Node.DOCUMENT_NODE :
                parent = node;
                sibling = node.getFirstChild();
                break;
            }
            while ((sibling == null) && (parent != null)) {
                sibling = parent.getNextSibling();
                parent = parent.getParentNode();
            }
            if (sibling == null) {
                return;
            }

            node = sibling;
            sibling = node.getNextSibling();
        } while (true);
    }

    /**
     * @param sibling
     * @param uri
     * @param nodeName
     * @param number
     * @return nodes with the constrain
     */
    public static Text selectNodeText(Node sibling, String uri, String nodeName, int number) {
        Node n = selectNode(sibling,uri,nodeName,number);
        if (n == null) {
            return null;
        }
        n = n.getFirstChild();
        while (n != null && n.getNodeType() != Node.TEXT_NODE) {
            n = n.getNextSibling();
        }
        return (Text)n;
    }

    /**
     * @param sibling
     * @param uri
     * @param nodeName
     * @param number
     * @return nodes with the constrain
     */
    public static Element selectNode(Node sibling, String uri, String nodeName, int number) {
        while (sibling != null) {
            if (sibling.getNamespaceURI() != null && sibling.getNamespaceURI().equals(uri)
                && sibling.getLocalName().equals(nodeName)) {
                if (number == 0){
                    return (Element)sibling;
                }
                number--;
            }
            sibling = sibling.getNextSibling();
        }
        return null;
    }

    /**
     * @param sibling
     * @param uri
     * @param nodeName
     * @return nodes with the constraint
     */
    public static Element[] selectNodes(Node sibling, String uri, String nodeName) {
        List<Element> list = new ArrayList<Element>();
        while (sibling != null) {
            if (sibling.getNamespaceURI() != null && sibling.getNamespaceURI().equals(uri)
                && sibling.getLocalName().equals(nodeName)) {
                list.add((Element)sibling);
            }
            sibling = sibling.getNextSibling();
        }
        return list.toArray(new Element[list.size()]);
    }

    /**
     * @param signatureElement
     * @param inputSet
     * @return nodes with the constrain
     */
    public static Set<Node> excludeNodeFromSet(Node signatureElement, Set<Node> inputSet) {
        Set<Node> resultSet = new HashSet<Node>();
        Iterator<Node> iterator = inputSet.iterator();

        while (iterator.hasNext()) {
            Node inputNode = iterator.next();

            if (!XMLUtils.isDescendantOrSelf(signatureElement, inputNode)) {
                resultSet.add(inputNode);
            }
        }
        return resultSet;
    }

    /**
     * Method getStrFromNode
     *
     * @param xpathnode
     * @return the string for the node.
     */
    public static String getStrFromNode(Node xpathnode) {
        if (xpathnode.getNodeType() == Node.TEXT_NODE) {
            // we iterate over all siblings of the context node because eventually,
            // the text is "polluted" with pi's or comments
            StringBuilder sb = new StringBuilder();

            for (Node currentSibling = xpathnode.getParentNode().getFirstChild();
                currentSibling != null;
                currentSibling = currentSibling.getNextSibling()) {
                if (currentSibling.getNodeType() == Node.TEXT_NODE) {
                    sb.append(((Text) currentSibling).getData());
                }
            }

            return sb.toString();
        } else if (xpathnode.getNodeType() == Node.ATTRIBUTE_NODE) {
            return ((Attr) xpathnode).getNodeValue();
        } else if (xpathnode.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) {
            return ((ProcessingInstruction) xpathnode).getNodeValue();
        }

        return null;
    }

    /**
     * Returns true if the descendantOrSelf is on the descendant-or-self axis
     * of the context node.
     *
     * @param ctx
     * @param descendantOrSelf
     * @return true if the node is descendant
     */
    public static boolean isDescendantOrSelf(Node ctx, Node descendantOrSelf) {
        if (ctx == descendantOrSelf) {
            return true;
        }

        Node parent = descendantOrSelf;

        while (true) {
            if (parent == null) {
                return false;
            }

            if (parent == ctx) {
                return true;
            }

            if (parent.getNodeType() == Node.ATTRIBUTE_NODE) {
                parent = ((Attr) parent).getOwnerElement();
            } else {
                parent = parent.getParentNode();
            }
        }
    }

    /**
     * Returns the attribute value for the attribute with the specified name.
     * Returns null if there is no such attribute, or
     * the empty string if the attribute value is empty.
     *
     * <p>This works around a limitation of the DOM
     * <code>Element.getAttributeNode</code> method, which does not distinguish
     * between an unspecified attribute and an attribute with a value of
     * "" (it returns "" for both cases).
     *
     * @param elem the element containing the attribute
     * @param name the name of the attribute
     * @return the attribute value (may be null if unspecified)
     */
    public static String getAttributeValue(Element elem, String name) {
        Attr attr = elem.getAttributeNodeNS(null, name);
        return (attr == null) ? null : attr.getValue();
    }

    /**
     * This method is a tree-search to help prevent against wrapping attacks. It checks that no
     * two Elements have ID Attributes that match the "value" argument, if this is the case then
     * "false" is returned. Note that a return value of "true" does not necessarily mean that
     * a matching Element has been found, just that no wrapping attack has been detected.
     */
    public static boolean protectAgainstWrappingAttack(Node startNode, String value) {
        Node startParent = startNode.getParentNode();
        Node processedNode;
        Element foundElement = null;

        String id = value.trim();
        if (id.charAt(0) == '#') {
            id = id.substring(1);
        }

        while (startNode != null) {
            if (startNode.getNodeType() == Node.ELEMENT_NODE) {
                Element se = (Element) startNode;

                NamedNodeMap attributes = se.getAttributes();
                if (attributes != null) {
                    for (int i = 0; i < attributes.getLength(); i++) {
                        Attr attr = (Attr)attributes.item(i);
                        if (attr.isId() && id.equals(attr.getValue())) {
                            if (foundElement == null) {
                                // Continue searching to find duplicates
                                foundElement = attr.getOwnerElement();
                            } else {
                                //log.debug("Multiple elements with the same 'Id' attribute value!");
                                return false;
                            }
                        }
                    }
                }
            }

            processedNode = startNode;
            startNode = startNode.getFirstChild();

            // no child, this node is done.
            if (startNode == null) {
                // close node processing, get sibling
                startNode = processedNode.getNextSibling();
            }

            // no more siblings, get parent, all children
            // of parent are processed.
            while (startNode == null) {
                processedNode = processedNode.getParentNode();
                if (processedNode == startParent) {
                    return true;
                }
                // close parent node processing (processed node now)
                startNode = processedNode.getNextSibling();
            }
        }
        return true;
    }

    /**
     * This method is a tree-search to help prevent against wrapping attacks. It checks that no other
     * Element than the given "knownElement" argument has an ID attribute that matches the "value"
     * argument, which is the ID value of "knownElement". If this is the case then "false" is returned.
     */
    public static boolean protectAgainstWrappingAttack(
        Node startNode, Element knownElement, String value
    ) {
        Node startParent = startNode.getParentNode();
        Node processedNode;

        String id = value.trim();
        if (id.charAt(0) == '#') {
            id = id.substring(1);
        }

        while (startNode != null) {
            if (startNode.getNodeType() == Node.ELEMENT_NODE) {
                Element se = (Element) startNode;

                NamedNodeMap attributes = se.getAttributes();
                if (attributes != null) {
                    for (int i = 0; i < attributes.getLength(); i++) {
                        Attr attr = (Attr)attributes.item(i);
                        if (attr.isId() && id.equals(attr.getValue()) && se != knownElement) {
                            //log.debug("Multiple elements with the same 'Id' attribute value!");
                            return false;
                        }
                    }
                }
            }

            processedNode = startNode;
            startNode = startNode.getFirstChild();

            // no child, this node is done.
            if (startNode == null) {
                // close node processing, get sibling
                startNode = processedNode.getNextSibling();
            }

            // no more siblings, get parent, all children
            // of parent are processed.
            while (startNode == null) {
                processedNode = processedNode.getParentNode();
                if (processedNode == startParent) {
                    return true;
                }
                // close parent node processing (processed node now)
                startNode = processedNode.getNextSibling();
            }
        }
        return true;
    }

}