zcommon/src/main/java/org/zkoss/idom/input/SAXBuilder.java
/* SAXBuilder.java
Purpose:
Description:
History:
2001/10/25 13:21:14, Create, Tom M. Yeh.
Copyright (C) 2001 Potix Corporation. All Rights Reserved.
{{IS_RIGHT
This program is distributed under LGPL Version 2.1 in the hope that
it will be useful, but WITHOUT ANY WARRANTY.
}}IS_RIGHT
*/
package org.zkoss.idom.input;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.net.URL;
import javax.xml.XMLConstants;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.EntityResolver;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.zkoss.idom.Document;
import org.zkoss.lang.Exceptions;
/**
* The builder based on SAX parsers.
*
* <p>A new instance of {@link SAXHandler} is created and configured
* each time one of the build methods is called.
*
* @author tomyeh
* @see SAXHandler
*/
public class SAXBuilder {
private static final Logger log = LoggerFactory.getLogger(SAXBuilder.class);
/** The parser. */
private final SAXParser _parser;
/** The iDOM factory. */
private IDOMFactory _factory;
/** Whether to ignore ignorable whitespace */
private boolean _ignoreWhitespaces = false;
/** Whether expansion of entities should occur */
private boolean _expandEntities = true;
/** Whether to convert CData to Text and coalesce them. */
private boolean _coalescing = false;
/** Whether to ignore comments. */
private boolean _ignoreComments = false;
/** The error handler. */
private ErrorHandler _errHandler = null;
/** The entity resolver. */
private EntityResolver _resolver = null;
/**
* Constructor which reuses a parser.
*/
public SAXBuilder(SAXParser parser) {
if (parser == null)
throw new NullPointerException("parser");
_parser = parser;
}
/**
* Constructor that creates the parser on-the-fly.
*
* @param nsaware whether the parser is namespace aware
* @param validate whether the parser shall validate the document
*
* @exception ParserConfigurationException if a parser cannot be created
* which satisfies the requested configuration.
*
* @see #SAXBuilder(boolean, boolean, boolean)
*/
public SAXBuilder(boolean nsaware, boolean validate)
throws ParserConfigurationException, SAXException {
SAXParserFactory fty = SAXParserFactory.newInstance();
// Fix XML external entity injection
fty.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
fty.setFeature("http://xml.org/sax/features/external-general-entities", false);
// Fix Resolving XML external entity in user-controlled data
fty.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
// SAX2 namespace-prefixes should be true for either builder
setSafeFeature(fty, "http://xml.org/sax/features/namespace-prefixes", true);
// Set SAX2 namespaces feature appropriately
setSafeFeature(fty, "http://xml.org/sax/features/namespaces", nsaware);
fty.setNamespaceAware(nsaware);
setSafeFeature(fty, "http://xml.org/sax/features/validation", validate);
setSafeFeature(fty, "http://apache.org/xml/features/validation/schema", validate);
fty.setValidating(validate);
_parser = fty.newSAXParser();
}
private static
void setSafeFeature(SAXParserFactory fty, String feature, boolean value) {
try {
fty.setFeature(feature, value);
} catch (Throwable ex) {
//IGNORE IT (crimson doesn't support ...validation/schema)
if (feature.startsWith("http://xml.org"))
log.warn("Ignored: "+fty+" doesn't support "+feature+". Cause: "+Exceptions.getMessage(ex));
}
}
/**
* Constructor that creates the parser on-the-fly, that accepts
* an additional option, smartIgnore.
*
* <p>When parsing XML for input purpose only, it is better to use this
* constructor with smartIgnore true, and then comments will be ignored
* CDATA will be coalesced with TEXT. A smaller DOM tree is formed.
*
* @param nsaware whether the parser is namespace aware
* @param validate whether the parser shall validate the document
* @param smartIgnore whether to ignore comments and ignorable-whitespace
* (if validate is true), and to coalesce
*
* @exception ParserConfigurationException if a parser cannot be created
* which satisfies the requested configuration.
*/
public SAXBuilder(boolean nsaware, boolean validate, boolean smartIgnore)
throws ParserConfigurationException, SAXException {
this(nsaware, validate);
if (smartIgnore) {
setIgnoringComments(true);
setCoalescing(true);
if (validate)
setIgnoringElementContentWhitespace(true);
}
}
/**
* Tests whether to ignore whitespaces in element content.
*/
public final boolean isIgnoringElementContentWhitespace() {
return _ignoreWhitespaces;
}
/**
* Sets whether the parser should eliminate whitespace in
* element content. They are known as "ignorable whitespace".
* Only whitespace which is contained within element content that has
* an element only content model will be eliminated (see XML Rec 2.10).
*
* <p>For this setting to take effect requires that validation be turned on.
*
* <p>Default: false.
*
* @param ignore Whether to ignore whitespaces in element content.
*/
public final void setIgnoringElementContentWhitespace(boolean ignore) {
_ignoreWhitespaces = ignore;
}
/**
* Tests whether to expand entity reference nodes.
*/
public final boolean isExpandEntityReferences() {
return _expandEntities;
}
/**
* Sets whether to expand entities during parsing.
* A true means to expand entities as normal content. A false means to
* leave entities unexpanded as <code>EntityReference</code> objects.
*
* <p>Default: true.
*
* @param expand whether entity expansion should occur.
*/
public final void setExpandEntityReferences(boolean expand) {
_expandEntities = expand;
}
/**
* Indicates whether or not the factory is configured to produce parsers
* which converts CDATA to Text and appends it to the adjacent (if any)
* Text node.
*
* <p>Default: false.
*
* @return true if the factory is configured to produce parsers which
* converts CDATA nodes to Text nodes
* and appends it to the adjacent (if any) Text node; false otherwise.
*/
public final boolean isCoalescing() {
return _coalescing;
}
/**
* Specifies that the parser produced by this code will convert
* CDATA to Text and append it to the adjacent (if any) text.
*
* <p>Default: false.
*/
public final void setCoalescing(boolean coalescing) {
_coalescing = coalescing;
}
/**
* Indicates whether or not the factory is configured to produce parsers
* which ignores comments.
*
* <p>Default: false.
*
* @return true if the factory is configured to produce parsers
* which ignores comments; false otherwise.
*/
public final boolean isIgnoringComments() {
return _ignoreComments;
}
/**
* Specifies that the parser produced by this code will ignore comments.
*
* <p>Default: false.
*/
public final void setIgnoringComments(boolean ignoreComments) {
_ignoreComments = ignoreComments;
}
/**
* Specifies the org.xml.sax.ErrorHandler to be used to report errors
* present in the XML document to be parsed.
* <p>Default: null -- to use the default implementation and behavior.
*/
public final void setErrorHandler(ErrorHandler eh) {
_errHandler = eh;
}
/**
* Gets the org.xml.sax.ErrorHandler.
*
* @return the error handler; null to use the default implementation
*/
public final ErrorHandler getErrorHandler() {
return _errHandler;
}
/**
* Specifies the org.xml.sax.EntityResolver to be used to resolve
* entities present in the XML document to be parsed.
* <p>Default: null -- to use the default implementation and behavior.
*/
public final void setEntityResolver(org.xml.sax.EntityResolver er) {
_resolver = er;
}
/**
* Gets the org.xml.sax.EntityResolver.
*
* @return the entity resolver; null to use the default implementation
*/
public final EntityResolver getEntityResolver() {
return _resolver;
}
/**
* Tests whether or not this parser is configured to understand namespaces.
*/
public final boolean isNamespaceAware() {
return _parser.isNamespaceAware();
}
/**
* Tests whether or not this parser is configured to validate XML documents.
*/
public final boolean isValidating() {
return _parser.isValidating();
}
/**
* Gets the iDOM factory. Null for DefaultIDOMFactory.THE.
*/
public final IDOMFactory getIDOMFactory() {
return _factory;
}
/**
* Sets the iDOM factory. Null for DefaultIDOMFactory.THE.
*/
public final void setIDOMFactory(IDOMFactory factory) {
_factory = factory;
}
/**
* Gets the SAX parser.
*/
public final SAXParser getParser() {
return _parser;
}
/**
* Build an iDOM tree from a file.
*/
public final Document build(File src)
throws SAXException, IOException {
SAXHandler handler = newHandler();
_parser.parse(src, handler);
return handler.getDocument();
}
/**
* Build an iDOM tree from a input stream.
*/
public final Document build(InputStream src)
throws SAXException, IOException {
SAXHandler handler = newHandler();
_parser.parse(src, handler);
return handler.getDocument();
}
/**
* Build an iDOM tree from a input source.
*/
public final Document build(InputSource src)
throws SAXException, IOException {
SAXHandler handler = newHandler();
_parser.parse(src, handler);
return handler.getDocument();
}
/**
* Build an iDOM tree from a URI string.
*/
public final Document build(String uri)
throws SAXException, IOException {
SAXHandler handler = newHandler();
_parser.parse(uri, handler);
return handler.getDocument();
}
/**
* Build an iDOM tree from a URL.
*/
public final Document build(URL url)
throws SAXException, IOException {
SAXHandler handler = newHandler();
_parser.parse(url.toExternalForm(), handler);
return handler.getDocument();
}
/**
* Build an iDOM tree from a Reader.
*/
public final Document build(Reader src)
throws SAXException, IOException {
SAXHandler handler = newHandler();
_parser.parse(new InputSource(src), handler);
return handler.getDocument();
}
/**
* Creates a SAX Handler.
* Deriving class might override to provide a subclass of SAXHandler.
*/
protected SAXHandler newHandler() throws SAXException {
SAXHandler handler = new SAXHandler(_factory);
//configure handler
handler.setIgnoringElementContentWhitespace(_ignoreWhitespaces);
handler.setExpandEntityReferences(_expandEntities);
handler.setCoalescing(_coalescing);
handler.setIgnoringComments(_ignoreComments);
handler.setErrorHandler(_errHandler);
handler.setEntityResolver(_resolver);
//configure parser
setSafeProperty(
"http://xml.org/sax/properties/lexical-handler",
"http://xml.org/sax/handlers/LexicalHandler",
handler);
if (!isExpandEntityReferences()) { //not expanding?
//then, we need declaration-handler
setSafeProperty(
"http://xml.org/sax/properties/declaration-handler", null,
handler);
}
return handler;
}
private void setSafeProperty(String name, String auxnm, Object value) {
Throwable ex;
try {
_parser.setProperty(name, value);
return;
} catch (Throwable t) {
ex = t;
}
if (auxnm != null) {
try {
_parser.setProperty(auxnm, value);
return;
} catch (Throwable t) {
}
}
if (name.startsWith("http://xml.org"))
log.warn("Ignored: "+_parser+" doesn't support "+name+". Cause: "+Exceptions.getMessage(ex));
}
}