
View on GitHub


6 hrs
Test Coverage
 * Copyright (c) 2017 [Karol Bucek](
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the  "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * See the License for the specific language governing permissions and
 * limitations under the License.
package nokogiri.internals;

import javax.xml.transform.dom.DOMSource;

import org.apache.xml.dtm.DTM;
import nokogiri.internals.dom2dtm.DOM2DTM;
import nokogiri.internals.dom2dtm.DOM2DTMdefaultNamespaceDeclarationNode;
import org.apache.xml.dtm.DTMWSFilter;
import org.apache.xml.res.XMLErrorResources;
import org.apache.xml.res.XMLMessages;
import org.w3c.dom.Node;

 * @author kares
public final class XalanDTMManagerPatch extends org.apache.xml.dtm.ref.DTMManagerDefault

   * Given a W3C DOM node, try and return a DTM handle.
   * Note: calling this may be non-optimal, and there is no guarantee that
   * the node will be found in any particular DTM.
   * @param node Non-null reference to a DOM node.
   * @return a valid DTM handle.
  public /* synchronized */ int
  getDTMHandleFromNode(org.w3c.dom.Node node)
    //if (node == null) // "node must be non-null for getDTMHandleFromNode!");
    //    throw new IllegalArgumentException(XMLMessages.createXMLMessage(XMLErrorResources.ER_NODE_NON_NULL, null));
    assert node != null;

    if (node instanceof org.apache.xml.dtm.ref.DTMNodeProxy) {
      return ((org.apache.xml.dtm.ref.DTMNodeProxy) node).getDTMNodeNumber();

    // Find the DOM2DTMs wrapped around this Document (if any)
    // and check whether they contain the Node in question.
    // NOTE that since a DOM2DTM may represent a subtree rather
    // than a full document, we have to be prepared to check more
    // than one -- and there is no guarantee that we will find
    // one that contains ancestors or siblings of the node we're
    // seeking.
    // %REVIEW% We could search for the one which contains this
    // node at the deepest level, and thus covers the widest
    // subtree, but that's going to entail additional work
    // checking more DTMs... and getHandleOfNode is not a
    // cheap operation in most implementations.
    // TODO: %REVIEW% If overflow addressing, we may recheck a DTM
    // already examined. Ouch. But with the increased number of DTMs,
    // scanning back to check this is painful.
    //   Generate a list of _unique_ DTM objects?
    //   Have each DTM cache last DOM node search?
    for (int i = 0; i < m_dtms.length; i++) {
      DTM thisDTM = m_dtms[i];
      if (thisDTM instanceof DOM2DTM) {
        int handle = ((DOM2DTM) thisDTM).getHandleOfNode(node);
        if (handle != DTM.NULL) {
          return handle;

    // Not found; generate a new DTM.
    // %REVIEW% Is this really desirable, or should we return null
    // and make folks explicitly instantiate from a DOMSource? The
    // latter is more work but gives the caller the opportunity to
    // explicitly add the DTM to a DTMManager... and thus to know when
    // it can be discarded again, which is something we need to pay much
    // more attention to. (Especially since only DTMs which are assigned
    // to a manager can use the overflow addressing scheme.)
    // %BUG% If the source node was a DOM2DTM$defaultNamespaceDeclarationNode
    // and the DTM wasn't registered with this DTMManager, we will create
    // a new DTM and _still_ not be able to find the node (since it will
    // be resynthesized). Another reason to push hard on making all DTMs
    // be managed DTMs.

    // Since the real root of our tree may be a DocumentFragment, we need to
    // use getParent to find the root, instead of getOwnerDocument.  Otherwise
    // DOM2DTM#getHandleOfNode will be very unhappy.
    Node root = node;
    int rootType = root.getNodeType();
    Node p = (rootType == Node.ATTRIBUTE_NODE) ? ((org.w3c.dom.Attr) root).getOwnerElement() : root.getParentNode();
    for (; p != null; p = p.getParentNode()) { root = p; }

    // DOM2DTM dtm = (DOM2DTM) getDTM(new DOMSource(root), false, null);
    DOM2DTM dtm = getDTM(new DOMSource(root), false, null/*, true, true*/);

    int handle;

    if (node instanceof org.apache.xml.dtm.ref.dom2dtm.DOM2DTMdefaultNamespaceDeclarationNode
        || node instanceof DOM2DTMdefaultNamespaceDeclarationNode) {
      // Can't return the same node since it's unique to a specific DTM,
      // but can return the equivalent node -- find the corresponding
      // Document Element, then ask it for the xml: namespace decl.
      handle = dtm.getHandleOfNode(((org.w3c.dom.Attr) node).getOwnerElement());
      handle = dtm.getAttributeNode(handle, node.getNamespaceURI(), node.getLocalName());
    } else {
      handle = dtm.getHandleOfNode(node);

      rootType = root.getNodeType();
      // Is Node actually within the same document? If not, don't search!
      // This would be easier if m_root was always the Document node, but
      // we decided to allow wrapping a DTM around a subtree.
      if ((root == node) ||
          (rootType == Node.DOCUMENT_NODE && root == node.getOwnerDocument()) ||
          (rootType != Node.DOCUMENT_NODE && root.getOwnerDocument() == node.getOwnerDocument())
         ) {
        // If node _is_ in m_root's tree, find its handle
        // %OPT% This check may be improved significantly when DOM
        // Level 3 nodeKey and relative-order tests become
        // available!
        for (Node cursor = node; cursor != null;
             cursor = (cursor.getNodeType() != Node.ATTRIBUTE_NODE)
                      ? cursor.getParentNode()
                      : ((org.w3c.dom.Attr)cursor).getOwnerElement()) {
          if (cursor == root) {
            // We know this node; find its handle.
            return (dtm).getHandleFromNode(node);
        } // for ancestors of node
      } // if node and m_root in same Document

    if (DTM.NULL == handle) {
      throw new RuntimeException(XMLMessages.createXMLMessage(XMLErrorResources.ER_COULD_NOT_RESOLVE_NODE,
                                 null));  //"Could not resolve the node to a handle!");

    return handle;

  private DOM2DTM
  getDTM(DOMSource source, boolean unique, DTMWSFilter whiteSpaceFilter/*, boolean incremental, boolean doIndexing*/)
    int dtmPos = getFirstFreeDTMID();
    int documentID = dtmPos << IDENT_DTM_NODE_BITS;

    DOM2DTM dtm = new DOM2DTM(this, source, documentID, whiteSpaceFilter, m_xsf, true);

    addDTM(dtm, dtmPos, 0);
    return dtm;
