ext/java/nokogiri/XmlElementContent.java
package nokogiri;
import static nokogiri.internals.NokogiriHelpers.getLocalPart;
import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
import static nokogiri.internals.NokogiriHelpers.getPrefix;
import static nokogiri.internals.NokogiriHelpers.nonEmptyStringOrNil;
import org.cyberneko.dtd.DTDConfiguration;
import org.jruby.Ruby;
import org.jruby.RubyClass;
import org.jruby.RubyObject;
import org.jruby.anno.JRubyClass;
import org.jruby.anno.JRubyMethod;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
/**
* DTD element content model. This converts the nice tree of content
* model declarations returned by NekoDTD into the convoluted binary
* tree used by libxml.
*
* @author Patrick Mahoney <pat@polycrystal.org>
*/
@JRubyClass(name = "Nokogiri::XML::ElementContent")
public class XmlElementContent extends RubyObject
{
private static final long serialVersionUID = 1L;
protected String element_name = null;
protected String name;
protected Type type;
protected Occur occur;
protected IRubyObject left;
protected IRubyObject right;
/** values hardcoded from nokogiri/xml/element_content.rb; this
* makes me uneasy, but it works */
public enum Type {
PCDATA(1),
ELEMENT(2),
SEQ(3),
OR(4);
private final int value;
Type(int value)
{
this.value = value;
}
public IRubyObject value(Ruby runtime)
{
return runtime.newFixnum(value);
}
}
public enum Occur {
ONCE(1),
OPT(2),
MULT(3),
PLUS(4);
private final int value;
Occur(int value)
{
this.value = value;
}
public IRubyObject value(Ruby runtime)
{
return runtime.newFixnum(value);
}
}
public
XmlElementContent(Ruby runtime, RubyClass klass,
XmlDocument document, Node node)
{
this(runtime, klass, document, new NodeIter(node));
element_name = ((Element)node).getAttribute("ename");
/*
* This is a bit of a hack to match libxml behavior.
*
* If the tree contains but a single group with a single
* element, we can simply return the bare element without the
* surrounding group.
*
* TODO: is SEQ/ONCE with a single child the only case for
* reduction?
*
* - pmahoney
*/
if (!this.left.isNil()) {
XmlElementContent left = (XmlElementContent) this.left;
if (type == Type.SEQ &&
occur == Occur.ONCE &&
left.type == Type.ELEMENT &&
right.isNil()) {
this.name = left.name;
this.type = left.type;
this.occur = left.occur;
this.left = this.right; // both nil
}
}
}
public
XmlElementContent(Ruby runtime, XmlDocument document, Node node)
{
this(runtime, getNokogiriClass(runtime, "Nokogiri::XML::ElementContent"), document, node);
}
public
XmlElementContent(Ruby runtime, RubyClass klass,
XmlDocument doc, NodeIter iter)
{
super(runtime, klass);
setInstanceVariable("@document", doc);
name = null;
type = Type.SEQ;
occur = Occur.ONCE;
left = runtime.getNil();
right = runtime.getNil();
apply(runtime, klass, doc, iter);
}
protected
XmlElementContent(Ruby runtime, RubyClass klass,
Type type, XmlDocument doc, NodeIter iter,
XmlElementContent left)
{
super(runtime, klass);
setInstanceVariable("@document", doc);
name = null;
this.type = type;
occur = Occur.ONCE;
this.left = left;
right = runtime.getNil();
switch (type) {
case SEQ:
case OR:
applyGroup(runtime, klass, doc, iter);
break;
default:
break;
}
}
/**
* Applies the current node in <code>iter</code> to this content
* model. When finished, <code>iter</code> will point to the last
* processed node.
*/
protected void
apply(Ruby runtime, RubyClass klass,
XmlDocument doc,
NodeIter iter)
{
if (iter.isNull()) { return; }
Element elem = (Element) iter.current();
if (isGroup(elem) && iter.hasChildren()) {
iter.firstChild();
applyGroup(runtime, klass, doc, iter);
iter.parent();
} else if (isElement(elem)) {
name = elem.getAttribute("name");
type = Type.ELEMENT;
}
iter.nextSibling();
if (iter.isNull()) { return; }
if (isOccurrence(iter.current())) {
setOccur(((Element)iter.current()).getAttribute("type"));
iter.nextSibling();
}
}
protected void
applyGroup(Ruby runtime, RubyClass klass,
XmlDocument doc, NodeIter iter)
{
// LEFT branch
if (iter.isNull()) { return; }
if (left.isNil()) {
left = new XmlElementContent(runtime, klass, doc, iter);
if (iter.isNull()) { return; }
if (isSeparator(iter.current())) {
setType(((Element)iter.current()).getAttribute("type"));
iter.nextSibling(); // skip separator
}
}
// RIGHT branch
if (iter.isNull()) { return; }
right = new XmlElementContent(runtime, klass, doc, iter);
if (iter.isNull()) { return; }
if (isSeparator(iter.current())) {
iter.nextSibling(); // skip separator
}
if (iter.isNull()) { return; }
// binary tree can only hold two children. If we have more,
// the right child is another tree with the same sequence
// "type". The "left" of the new tree is what we've
// currently consumed as our "right" branch of this tree.
right = new XmlElementContent(runtime, klass, type, doc, iter,
(XmlElementContent) right);
}
/**
* Set the type based on the separator node type string.
*/
protected void
setType(String type)
{
if ("|".equals(type)) { this.type = Type.OR; }
else if (",".equals(type)) { this.type = Type.SEQ; }
}
protected void
setOccur(String type)
{
if ("*".equals(type)) { this.occur = Occur.MULT; }
else if ("+".equals(type)) { this.occur = Occur.PLUS; }
}
public static boolean
isGroup(Node node)
{
return XmlDtd.nameEquals(node, DTDConfiguration.E_GROUP);
}
// content model element, not Element node type
public static boolean
isElement(Node node)
{
return XmlDtd.nameEquals(node, DTDConfiguration.E_ELEMENT);
}
public static boolean
isSeparator(Node node)
{
return XmlDtd.nameEquals(node, DTDConfiguration.E_SEPARATOR);
}
public static boolean
isOccurrence(Node node)
{
return XmlDtd.nameEquals(node, DTDConfiguration.E_OCCURRENCE);
}
/**
* Return the name of the element to which this content model
* applies. Only works for the root of the tree.
*/
public IRubyObject
element_name(ThreadContext context)
{
return nonEmptyStringOrNil(context.getRuntime(), element_name);
}
@JRubyMethod
public IRubyObject
prefix(ThreadContext context)
{
return nonEmptyStringOrNil(context.getRuntime(), getPrefix(name));
}
@JRubyMethod
public IRubyObject
name(ThreadContext context)
{
return nonEmptyStringOrNil(context.getRuntime(), getLocalPart(name));
}
@JRubyMethod
public IRubyObject
type(ThreadContext context)
{
return type.value(context.getRuntime());
}
@JRubyMethod
public IRubyObject
occur(ThreadContext context)
{
return occur.value(context.getRuntime());
}
@JRubyMethod
public IRubyObject
c1(ThreadContext context)
{
return left;
}
@JRubyMethod
public IRubyObject
c2(ThreadContext context)
{
return right;
}
/**
* Iterator for a tree of Nodes. Has a current position that
* points to a given node. Calling nextSibling() on the last
* sibling results in a current position of null. This position
* is not fatal and can be escaped by calling parent() (which
* moves to the parent of previous sibling). The null position is
* used to indicate the end of a list.
*/
protected static class NodeIter
{
protected Node pre;
protected Node cur;
/**
* The first time, we fake a previous sibling element. Thus,
* initially, current() is null, and the first call should be
* nextSibling().
*/
public
NodeIter(Node node)
{
pre = null;
cur = node.getFirstChild(); // skip root contentModel node
}
public Node
current()
{
return cur;
}
public boolean
isNull()
{
return (cur == null);
}
public boolean
hasChildren()
{
return (cur != null && cur.hasChildNodes());
}
/**
* Descend to the first child.
*/
public Node
firstChild()
{
if (cur == null) { throw new RuntimeException("no children"); }
Node ch = cur.getFirstChild();
if (ch == null) { throw new RuntimeException("no children"); }
cur = ch;
return cur;
}
/**
* Move to the next sibling
*/
public Node
nextSibling()
{
if (cur == null) {
throw new RuntimeException("no next sibling");
} else {
Node ns = cur.getNextSibling();
if (ns == null) {
pre = cur;
cur = null;
} else {
cur = ns;
}
return cur;
}
}
/**
* Move to the parent.
*/
public Node
parent()
{
if (cur == null) { cur = pre; }
Node p = cur.getParentNode();
if (p == null) { throw new RuntimeException("no parent"); }
cur = p;
return cur;
}
}
}