ext/java/nokogiri/Html4Document.java
package nokogiri;
import org.jruby.Ruby;
import org.jruby.RubyClass;
import org.jruby.anno.JRubyClass;
import org.jruby.anno.JRubyMethod;
import org.jruby.runtime.Helpers;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import nokogiri.internals.HtmlDomParserContext;
import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
/**
* Class for Nokogiri::HTML4::Document.
*
* @author sergio
* @author Yoko Harada <yokolet@gmail.com>
*/
@JRubyClass(name = "Nokogiri::HTML4::Document", parent = "Nokogiri::XML::Document")
public class Html4Document extends XmlDocument
{
private static final long serialVersionUID = 1L;
private static final String DEFAULT_CONTENT_TYPE = "html";
private static final String DEFAULT_PUBLIC_ID = "-//W3C//DTD HTML 4.01//EN";
private static final String DEFAULT_SYSTEM_ID = "http://www.w3.org/TR/html4/strict.dtd";
private String parsed_encoding = null;
public
Html4Document(Ruby ruby, RubyClass klazz)
{
super(ruby, klazz);
}
public
Html4Document(Ruby runtime, Document document)
{
this(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Document"), document);
}
public
Html4Document(Ruby ruby, RubyClass klazz, Document doc)
{
super(ruby, klazz, doc);
}
@JRubyMethod(name = "new", meta = true, rest = true, required = 0)
public static IRubyObject
rbNew(ThreadContext context, IRubyObject klazz, IRubyObject[] args)
{
final Ruby runtime = context.runtime;
Html4Document htmlDocument;
try {
Document docNode = createNewDocument(runtime);
htmlDocument = (Html4Document) NokogiriService.HTML_DOCUMENT_ALLOCATOR.allocate(runtime, (RubyClass) klazz);
htmlDocument.setDocumentNode(context.runtime, docNode);
} catch (Exception ex) {
throw asRuntimeError(runtime, "couldn't create document: ", ex);
}
Helpers.invoke(context, htmlDocument, "initialize", args);
return htmlDocument;
}
public IRubyObject
getInternalSubset(ThreadContext context)
{
IRubyObject internalSubset = super.getInternalSubset(context);
// html documents are expected to have a default internal subset
// the default values are the same ones used when the following
// feature is turned on
// "http://cyberneko.org/html/features/insert-doctype"
// the reason we don't turn it on, is because it overrides the document's
// declared doctype declaration.
if (internalSubset.isNil()) {
internalSubset = XmlDtd.newEmpty(context.getRuntime(),
getDocument(),
context.getRuntime().newString(DEFAULT_CONTENT_TYPE),
context.getRuntime().newString(DEFAULT_PUBLIC_ID),
context.getRuntime().newString(DEFAULT_SYSTEM_ID));
setInternalSubset(internalSubset);
}
return internalSubset;
}
@Override
void
init(Ruby runtime, Document document)
{
stabilizeTextContent(document);
document.normalize();
setInstanceVariable("@decorators", runtime.getNil());
if (document.getDocumentElement() != null) {
stabilizeAttrs(document.getDocumentElement());
}
}
private static void
stabilizeAttrs(Node node)
{
if (node.hasAttributes()) {
NamedNodeMap nodeMap = node.getAttributes();
for (int i = 0; i < nodeMap.getLength(); i++) {
Node n = nodeMap.item(i);
if (n instanceof Attr) {
stabilizeAttr((Attr) n);
}
}
}
NodeList children = node.getChildNodes();
for (int i = 0; i < children.getLength(); i++) {
stabilizeAttrs(children.item(i));
}
}
public void
setParsedEncoding(String encoding)
{
parsed_encoding = encoding;
}
public String
getPraedEncoding()
{
return parsed_encoding;
}
@JRubyMethod(meta = true, required = 4)
public static IRubyObject
read_io(ThreadContext context, IRubyObject klass, IRubyObject[] args)
{
HtmlDomParserContext ctx = new HtmlDomParserContext(context.runtime, args[2], args[3]);
ctx.setIOInputSource(context, args[0], args[1]);
return ctx.parse(context, (RubyClass) klass, args[1]);
}
@JRubyMethod(meta = true, required = 4)
public static IRubyObject
read_memory(ThreadContext context, IRubyObject klass, IRubyObject[] args)
{
HtmlDomParserContext ctx = new HtmlDomParserContext(context.runtime, args[2], args[3]);
ctx.setStringInputSource(context, args[0], args[1]);
return ctx.parse(context, (RubyClass) klass, args[1]);
}
}