zcommon/src/main/java/org/zkoss/idom/input/SAXBuilder.java

Summary

Maintainability
B
4 hrs
Test Coverage
/* SAXBuilder.java


    Purpose: 
    Description: 
    History:
    2001/10/25 13:21:14, Create, Tom M. Yeh.

Copyright (C) 2001 Potix Corporation. All Rights Reserved.

{{IS_RIGHT
    This program is distributed under LGPL Version 2.1 in the hope that
    it will be useful, but WITHOUT ANY WARRANTY.
}}IS_RIGHT
*/
package org.zkoss.idom.input;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.net.URL;

import javax.xml.XMLConstants;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.EntityResolver;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.zkoss.idom.Document;
import org.zkoss.lang.Exceptions;

/**
 * The builder based on SAX parsers.
 *
 * <p>A new instance of {@link SAXHandler} is created and configured
 * each time one of the build methods is called.
 *
 * @author tomyeh
 * @see SAXHandler
 */
public class SAXBuilder {
    private static final Logger log = LoggerFactory.getLogger(SAXBuilder.class);

    /** The parser. */
    private final SAXParser _parser;
    /** The iDOM factory. */
    private IDOMFactory _factory;
    /** Whether to ignore ignorable whitespace */
    private boolean _ignoreWhitespaces = false;
    /** Whether expansion of entities should occur */
    private boolean _expandEntities = true;
    /** Whether to convert CData to Text and coalesce them. */
    private boolean _coalescing = false;
    /** Whether to ignore comments. */
    private boolean _ignoreComments = false;
    /** The error handler. */
    private ErrorHandler _errHandler = null;
    /** The entity resolver. */
    private EntityResolver _resolver = null;

    /**
     * Constructor which reuses a parser.
     */
    public SAXBuilder(SAXParser parser) {
        if (parser == null)
            throw new NullPointerException("parser");
        _parser = parser;
    }
    /**
     * Constructor that creates the parser on-the-fly.
     *
     * @param nsaware whether the parser is namespace aware
     * @param validate whether the parser shall validate the document
     *
     * @exception ParserConfigurationException if a parser cannot be created
     * which satisfies the requested configuration.
     *
     * @see #SAXBuilder(boolean, boolean, boolean)
     */
    public SAXBuilder(boolean nsaware, boolean validate)
    throws ParserConfigurationException, SAXException {
        SAXParserFactory fty = SAXParserFactory.newInstance();

        // Fix XML external entity injection
        fty.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
        fty.setFeature("http://xml.org/sax/features/external-general-entities", false);

        // Fix Resolving XML external entity in user-controlled data
        fty.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);

        // SAX2 namespace-prefixes should be true for either builder
        setSafeFeature(fty, "http://xml.org/sax/features/namespace-prefixes", true);

        // Set SAX2 namespaces feature appropriately
        setSafeFeature(fty, "http://xml.org/sax/features/namespaces", nsaware);
        fty.setNamespaceAware(nsaware);

        setSafeFeature(fty, "http://xml.org/sax/features/validation", validate);
        setSafeFeature(fty, "http://apache.org/xml/features/validation/schema", validate);
        fty.setValidating(validate);

        _parser = fty.newSAXParser();
    }
    private static
    void setSafeFeature(SAXParserFactory fty, String feature, boolean value) {
        try {
            fty.setFeature(feature, value);
        } catch (Throwable ex) {
            //IGNORE IT (crimson doesn't support ...validation/schema)
            if (feature.startsWith("http://xml.org"))
                log.warn("Ignored: "+fty+" doesn't support "+feature+". Cause: "+Exceptions.getMessage(ex));
        }
    }
    /**
     * Constructor that creates the parser on-the-fly, that accepts
     * an additional option, smartIgnore.
     *
     * <p>When parsing XML for input purpose only, it is better to use this
     * constructor with smartIgnore true, and then comments will be ignored
     * CDATA will be coalesced with TEXT. A smaller DOM tree is formed.
     *
     * @param nsaware whether the parser is namespace aware
     * @param validate whether the parser shall validate the document
     * @param smartIgnore whether to ignore comments and ignorable-whitespace
     * (if validate is true), and to coalesce
     *
     * @exception ParserConfigurationException if a parser cannot be created
     * which satisfies the requested configuration.
     */
    public SAXBuilder(boolean nsaware, boolean validate, boolean smartIgnore)
    throws ParserConfigurationException, SAXException {
        this(nsaware, validate);
        if (smartIgnore) {
            setIgnoringComments(true);
            setCoalescing(true);
            if (validate)
                setIgnoringElementContentWhitespace(true);
        }
    }

    /**
     * Tests whether to ignore whitespaces in element content.
     */
    public final boolean isIgnoringElementContentWhitespace() {
        return _ignoreWhitespaces;
    }
    /**
     * Sets whether the parser should eliminate whitespace in 
     * element content. They are known as "ignorable whitespace". 
     * Only whitespace which is contained within element content that has
     * an element only content model will be eliminated (see XML Rec 2.10).
     *
     * <p>For this setting to take effect requires that validation be turned on.
     *
     * <p>Default: false.
     *
     * @param ignore Whether to ignore whitespaces in element content.
     */
    public final void setIgnoringElementContentWhitespace(boolean ignore) {
        _ignoreWhitespaces = ignore;
    }

    /**
     * Tests whether to expand entity reference nodes.
     */
    public final boolean isExpandEntityReferences() {
        return _expandEntities;
    }
    /**
     * Sets whether to expand entities during parsing.
     * A true means to expand entities as normal content.  A false means to
     * leave entities unexpanded as <code>EntityReference</code> objects.
     *
     * <p>Default: true.
     *
     * @param expand whether entity expansion should occur.
     */
    public final void setExpandEntityReferences(boolean expand) {
        _expandEntities = expand;
    }

    /**
     * Indicates whether or not the factory is configured to produce parsers
     * which converts CDATA to Text and appends it to the adjacent (if any)
     * Text node.
     *
     * <p>Default: false.
     *
     * @return true if the factory is configured to produce parsers which
     * converts CDATA nodes to Text nodes
     * and appends it to the adjacent (if any) Text node; false otherwise.
     */
    public final boolean isCoalescing() {
        return _coalescing;
    }
    /**
     * Specifies that the parser produced by this code will convert
     * CDATA to Text and append it to the adjacent (if any) text.
     *
     * <p>Default: false.
     */
    public final void setCoalescing(boolean coalescing) {
        _coalescing = coalescing;
    }

    /**
     * Indicates whether or not the factory is configured to produce parsers
     * which ignores comments.
     *
     * <p>Default: false.
     *
     * @return true if the factory is configured to produce parsers
     * which ignores comments; false otherwise.
     */
    public final boolean isIgnoringComments() {
        return _ignoreComments;
    }
    /**
     * Specifies that the parser produced by this code will ignore comments.
     *
     * <p>Default: false.
     */
    public final void setIgnoringComments(boolean ignoreComments) {
        _ignoreComments = ignoreComments;
    }

    /**
     * Specifies the org.xml.sax.ErrorHandler to be used to report errors
     * present in the XML document to be parsed.
     * <p>Default: null -- to use the default implementation and behavior.
     */
    public final void setErrorHandler(ErrorHandler eh) {
        _errHandler = eh;
    }
    /**
     * Gets the org.xml.sax.ErrorHandler.
     *
     * @return the error handler; null to use the default implementation
     */
    public final ErrorHandler getErrorHandler() {
        return _errHandler;
    }

    /**
     * Specifies the org.xml.sax.EntityResolver to be used to resolve
     * entities present in the XML document to be parsed.
     * <p>Default: null -- to use the default implementation and behavior.
     */
    public final void setEntityResolver(org.xml.sax.EntityResolver er) {
        _resolver = er;
    }
    /**
     * Gets the org.xml.sax.EntityResolver.
     *
     * @return the entity resolver; null to use the default implementation
     */
    public final EntityResolver getEntityResolver() {
        return _resolver;
    }

    /**
     * Tests whether or not this parser is configured to understand namespaces.
     */
    public final boolean isNamespaceAware() {
        return _parser.isNamespaceAware();
    }
    /**
     * Tests whether or not this parser is configured to validate XML documents.
     */
    public final boolean isValidating() {
        return _parser.isValidating();
    }

    /**
     * Gets the iDOM factory. Null for DefaultIDOMFactory.THE.
     */
    public final IDOMFactory getIDOMFactory() {
        return _factory;
    }
    /**
     * Sets the iDOM factory. Null for DefaultIDOMFactory.THE.
     */
    public final void setIDOMFactory(IDOMFactory factory) {
        _factory = factory;
    }

    /**
     * Gets the SAX parser.
     */
    public final SAXParser getParser() {
        return _parser;
    }

    /**
     * Build an iDOM tree from a file.
     */
    public final Document build(File src)
    throws SAXException, IOException {
        SAXHandler handler = newHandler();
        _parser.parse(src, handler);
        return handler.getDocument();
    }
    /**
     * Build an iDOM tree from a input stream.
     */
    public final Document build(InputStream src)
    throws SAXException, IOException {
        SAXHandler handler = newHandler();
        _parser.parse(src, handler);
        return handler.getDocument();
    }
    /**
     * Build an iDOM tree from a input source.
     */
    public final Document build(InputSource src)
    throws SAXException, IOException {
        SAXHandler handler = newHandler();
        _parser.parse(src, handler);
        return handler.getDocument();
    }
    /**
     * Build an iDOM tree from a URI string.
     */
    public final Document build(String uri)
    throws SAXException, IOException {
        SAXHandler handler = newHandler();
        _parser.parse(uri, handler);
        return handler.getDocument();
    }
    /**
     * Build an iDOM tree from a URL.
     */
    public final Document build(URL url)
    throws SAXException, IOException {
        SAXHandler handler = newHandler();
        _parser.parse(url.toExternalForm(), handler);
        return handler.getDocument();
    }
    /**
     * Build an iDOM tree from a Reader.
     */
    public final Document build(Reader src)
    throws SAXException, IOException {
        SAXHandler handler = newHandler();
        _parser.parse(new InputSource(src), handler);
        return handler.getDocument();
    }
    /**
     * Creates a SAX Handler.
     * Deriving class might override to provide a subclass of SAXHandler.
     */
    protected SAXHandler newHandler() throws SAXException {
        SAXHandler handler = new SAXHandler(_factory);

        //configure handler
        handler.setIgnoringElementContentWhitespace(_ignoreWhitespaces);
        handler.setExpandEntityReferences(_expandEntities);
        handler.setCoalescing(_coalescing);
        handler.setIgnoringComments(_ignoreComments);
        handler.setErrorHandler(_errHandler);
        handler.setEntityResolver(_resolver);

        //configure parser
        setSafeProperty(
            "http://xml.org/sax/properties/lexical-handler",
            "http://xml.org/sax/handlers/LexicalHandler",
            handler);

        if (!isExpandEntityReferences()) { //not expanding?
            //then, we need declaration-handler
            setSafeProperty(
                "http://xml.org/sax/properties/declaration-handler", null,
                handler);
        }
        return handler;
    }
    private void setSafeProperty(String name, String auxnm, Object value) {
        Throwable ex;
        try {
            _parser.setProperty(name, value);
            return;
        } catch (Throwable t) {
            ex = t;
        }
        if (auxnm != null) {
            try {
                _parser.setProperty(auxnm, value);
                return;
            } catch (Throwable t) {
            }
        }

        if (name.startsWith("http://xml.org"))
            log.warn("Ignored: "+_parser+" doesn't support "+name+". Cause: "+Exceptions.getMessage(ex));
    }
}