src/main/java/innovimax/quixproc/datamodel/stream/IQuiXStreamReader.java
/*
* QuiXProc: efficient evaluation of XProc Pipelines.
* Copyright (C) 2011-2018 Innovimax
* All rights reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0*/
package innovimax.quixproc.datamodel.stream;
import java.util.NoSuchElementException;
import javax.xml.namespace.NamespaceContext;
import javax.xml.namespace.QName;
import javax.xml.stream.Location;
import innovimax.quixproc.datamodel.QuiXCharStream;
import innovimax.quixproc.datamodel.QuiXToken;
/**
* The IQuixStreamReader interface allows forward, read-only access to XML. It
* is designed to be the higher level (XPath Data Model) and most efficient way
* to read XML data.
*
* <p>
* The IQuixStreamReader is designed to iterate over XML using next() and
* hasNext(). The data can be accessed using methods such as getEventType(),
* getNamespaceURI(), getLocalName() and getText();
*
* <p>
* The {@link #next()} method causes the reader to read the next parse event.
* The next() method returns an enum which identifies the type of event just
* read.
* <p>
* The event type can be determined using {@link #getEventType()}.
* <p>
* Parsing events are defined as the start sequence, start document, start tag,
* attribute, namespace, character data, end tag, comment, or processing
* instruction.
*
*
* <p>
* The following table describes which methods are valid in what state. If a
* method is called in an invalid state the method will throw a
* java.lang.IllegalStateException.
*
* <table border="2" rules="all" cellpadding="4">
* <thead>
* <tr>
* <th align="center" colspan="2">Valid methods for each state</th>
* </tr>
* </thead> <tbody>
* <tr>
* <th>Event Type</th>
* <th>Valid Methods</th>
* </tr>
* <tr>
* <td>All States</td>
* <td>getProperty(), hasNext(), require(), close(), getNamespaceURI(),
* isStartSequence(), isStartElement(), isEndElement(), isCharacters(),
* getNamespaceContext(), getEventType(), getLocation(), hasText(), hasName(),
* getSequencePosition()</td>
* </tr>
* <tr>
* <td>START_SEQUENCE</td>
* <td>next()</td>
* </tr>
* <tr>
* <td>END_SEQUENCE</td>
* <td>close()</td>
* </tr>
* <tr>
* <td>START_DOCUMENT</td>
* <td>next(), getDocumentURI()</td>
* </tr>
* <tr>
* <td>END_DOCUMENT</td>
* <td>next(), close()</td>
* </tr>
* <tr>
* <tr>
* <td>START_ELEMENT</td>
* <td>next(), getName(), getLocalName(), hasName(), getPrefix(),
* getAttributeXXX(), isAttributeSpecified(), getNamespaceXXX(),
* getElementText(), nextTag()</td>
* </tr>
* <td>ATTRIBUTE</td>
* <td>next(), nextTag() getAttributeXXX(), isAttributeSpecified(),</td>
* </tr>
* </tr>
* <td>NAMESPACE</td>
* <td>next(), nextTag() getNamespaceXXX()</td>
* </tr>
* <tr>
* <td>END_ELEMENT</td>
* <td>next(), getName(), getLocalName(), hasName(), getPrefix(),
* getNamespaceXXX(), nextTag()</td>
* </tr>
* <tr>
* <td>CHARACTERS</td>
* <td>next(), getTextXXX(), nextTag()</td>
* </tr>
* <tr>
* <td>CDATA</td>
* <td>next(), getTextXXX(), nextTag()</td>
* </tr>
* <tr>
* <td>COMMENT</td>
* <td>next(), getTextXXX(), nextTag()</td>
* </tr>
* <tr>
* <td>SPACE</td>
* <td>next(), getTextXXX(), nextTag()</td>
* </tr>
* <td>PROCESSING_INSTRUCTION</td>
* <td>next(), getPITarget(), getPIData(), nextTag()</td>
* </tr>
* <tr>
* <td>ENTITY_REFERENCE</td>
* <td>next(), getLocalName(), getText(), nextTag()</td>
* </tr>
* <tr>
* <td>DTD</td>
* <td>next(), getText(), nextTag()</td>
* </tr>
* </tbody>
* </table>
*
* @version 0.1
* @author Copyright (c) 2015-2018 by Innovimax. All Rights Reserved.
* @see QuiXToken
* @see javax.xml.stream.XMLStreamReader
*/
public interface IQuiXStreamReader {
/**
* Get the value of a feature/property from the underlying implementation
*
* @param name
* The name of the property, may not be null
* @return The value of the property
* @throws IllegalArgumentException
* if name is null
*/
Object getProperty(String name);
/**
* Get next parsing event - a processor may return all contiguous character
* data in a single chunk, or it may split it into several chunks. If the
* property javax.xml.stream.isCoalescing is set to true element content
* must be coalesced and only one CHARACTERS event must be returned for
* contiguous element content or CDATA Sections.
*
* By default entity references must be expanded and reported transparently
* to the application. An exception will be thrown if an entity reference
* cannot be expanded. If element content is empty (i.e. content is "") then
* no CHARACTERS event will be reported.
*
* <p>
* Given the following XML:<br>
* <foo><!--description-->content
* text<![CDATA[<greeting>Hello</greeting>]]>other content</foo>
* <br>
* The behavior of calling next() when being on foo will be:<br>
* 1- the comment (COMMENT)<br>
* 2- then the characters section (CHARACTERS)<br>
* 3- then the CDATA section (another CHARACTERS)<br>
* 4- then the next characters section (another CHARACTERS)<br>
* 5- then the END_ELEMENT<br>
*
* <p>
* <b>NOTE:</b> empty element (such as <tag/>) will be reported with two
* separate events: START_ELEMENT, END_ELEMENT - This preserves parsing
* equivalency of empty element to <tag></tag>.
*
* This method will throw an IllegalStateException if it is called after
* hasNext() returns false.
*
* @see javax.xml.stream.events.XMLEvent
* @return the integer code corresponding to the current parse event
* @throws NoSuchElementException
* if this is called when hasNext() returns false
* @throws QuiXStreamException
* if there is an error processing the underlying XML source
*/
QuiXToken next();
/**
* Test if the current event is of the given type and if the namespace and
* name match the current namespace and name of the current event. If the
* namespaceURI is null it is not checked for equality, if the localName is
* null it is not checked for equality.
*
* @param type
* the event type
* @param namespaceURI
* the uri of the event, may be null
* @param localName
* the localName of the event, may be null
* @throws QuiXStreamException
* if the required values are not matched.
*/
void require(int type, String namespaceURI, String localName);
/**
* Reads the content of a text-only element, an exception is thrown if this
* is not a text-only element. Regardless of value of
* javax.xml.stream.isCoalescing this method always returns coalesced
* content. <br />
* Precondition: the current event is START_ELEMENT. <br />
* Postcondition: the current event is the corresponding END_ELEMENT.
*
* <br />
* The method does the following (implementations are free to optimized but
* must do equivalent processing):
*
* <pre>
* if (getEventType() != XMLStreamConstants.START_ELEMENT) {
* throw new QuixStreamException("parser must be on START_ELEMENT to read next text", getLocation());
* }
* int eventType = next();
* StringBuffer content = new StringBuffer();
* while (eventType != XMLStreamConstants.END_ELEMENT) {
* if (eventType == XMLStreamConstants.CHARACTERS || eventType == XMLStreamConstants.CDATA
* || eventType == XMLStreamConstants.SPACE || eventType == XMLStreamConstants.ENTITY_REFERENCE) {
* buf.append(getText());
* } else if (eventType == XMLStreamConstants.PROCESSING_INSTRUCTION
* || eventType == XMLStreamConstants.COMMENT) {
* // skipping
* } else if (eventType == XMLStreamConstants.END_DOCUMENT) {
* throw new QuixStreamException("unexpected end of document when reading element text content", this);
* } else if (eventType == XMLStreamConstants.START_ELEMENT) {
* throw new QuixStreamException("element text content may not contain START_ELEMENT", getLocation());
* } else {
* throw new QuixStreamException("Unexpected event type " + eventType, getLocation());
* }
* eventType = next();
* }
* return buf.toString();
* </pre>
*
* @throws QuiXStreamException
* if the current event is not a START_ELEMENT or if a non text
* element is encountered
*/
QuiXCharStream getElementText();
/**
* Skips any white space (isWhiteSpace() returns true), COMMENT, or
* PROCESSING_INSTRUCTION, until a START_ELEMENT or END_ELEMENT is reached.
* If other than white space characters, COMMENT, PROCESSING_INSTRUCTION,
* START_ELEMENT, END_ELEMENT are encountered, an exception is thrown. This
* method should be used when processing element-only content separated by
* white space.
*
* <br />
* Precondition: none <br />
* Postcondition: the current event is START_ELEMENT or END_ELEMENT and
* cursor may have moved over any whitespace event.
*
* <br />
* Essentially it does the following (implementations are free to optimized
* but must do equivalent processing):
*
* <pre>
* int eventType = next();
* while((eventType == XMLStreamConstants.CHARACTERS && isWhiteSpace()) // skip whitespace
* || (eventType == XMLStreamConstants.CDATA && isWhiteSpace())
* // skip whitespace
* || eventType == XMLStreamConstants.SPACE
* || eventType == XMLStreamConstants.PROCESSING_INSTRUCTION
* || eventType == XMLStreamConstants.COMMENT
* ) {
* eventType = next();
* }
* if (eventType != XMLStreamConstants.START_ELEMENT && eventType != XMLStreamConstants.END_ELEMENT) {
* throw new String QuixStreamException("expected start or end tag", getLocation());
* }
* return eventType;
* </pre>
*
* @return the event type of the element read (START_ELEMENT or END_ELEMENT)
* @throws QuiXStreamException
* if the current event is not white space,
* PROCESSING_INSTRUCTION, START_ELEMENT or END_ELEMENT
* @throws NoSuchElementException
* if this is called when hasNext() returns false
*/
int nextTag();
/**
* Returns true if there are more parsing events and false if there are no
* more events. This method will return false if the current state of the
* XMLStreamReader is END_DOCUMENT
*
* @return true if there are more events, false otherwise
* @throws QuiXStreamException
* if there is a fatal error detecting the next state
*/
boolean hasNext();
/**
* Frees any resources associated with this Reader. This method does not
* close the underlying input source.
*
* @throws QuiXStreamException
* if there are errors freeing associated resources
*/
void close();
/**
* Return the uri for the given prefix. The uri returned depends on the
* current state of the processor.
*
* <p>
* <strong>NOTE:</strong>The 'xml' prefix is bound as defined in
* <a href="http://www.w3.org/TR/REC-xml-names/#ns-using">Namespaces in
* XML</a> specification to "http://www.w3.org/XML/1998/namespace".
*
* <p>
* <strong>NOTE:</strong> The 'xmlns' prefix must be resolved to following
* namespace
* <a href="http://www.w3.org/2000/xmlns/">http://www.w3.org/2000/xmlns/</a>
*
* @param prefix
* The prefix to lookup, may not be null
* @return the uri bound to the given prefix or null if it is not bound
* @throws IllegalArgumentException
* if the prefix is null
*/
QuiXCharStream getNamespaceURI(String prefix);
/**
* Returns true if the cursor points to a start tag (otherwise false)
*
* @return true if the cursor points to a start tag, false otherwise
*/
boolean isStartElement();
/**
* Returns true if the cursor points to an end tag (otherwise false)
*
* @return true if the cursor points to an end tag, false otherwise
*/
boolean isEndElement();
/**
* Returns true if the cursor points to a character data event
*
* @return true if the cursor points to character data, false otherwise
*/
boolean isCharacters();
/**
* Returns true if the cursor points to a character data event that consists
* of all whitespace
*
* @return true if the cursor points to all whitespace, false otherwise
*/
boolean isWhiteSpace();
/**
* Returns the normalized attribute value of the attribute with the
* namespace and localName If the namespaceURI is null the namespace is not
* checked for equality
*
* @param namespaceURI
* the namespace of the attribute
* @param localName
* the local name of the attribute, cannot be null
* @return returns the value of the attribute , returns null if not found
* @throws IllegalStateException
* if this is not a START_ELEMENT or ATTRIBUTE
*/
String getAttributeValue(String namespaceURI, String localName);
/**
* Returns the count of attributes on this START_ELEMENT, this method is
* only valid on a START_ELEMENT or ATTRIBUTE. This count excludes namespace
* definitions. Attribute indices are zero-based.
*
* @return returns the number of attributes
* @throws IllegalStateException
* if this is not a START_ELEMENT or ATTRIBUTE
*/
int getAttributeCount();
/**
* Returns the qname of the attribute at the provided index
*
* @param index
* the position of the attribute
* @return the QName of the attribute
* @throws IllegalStateException
* if this is not a START_ELEMENT or ATTRIBUTE
*/
QName getAttributeName(int index);
/**
* Returns the namespace of the attribute at the provided index
*
* @param index
* the position of the attribute
* @return the namespace URI (can be null)
* @throws IllegalStateException
* if this is not a START_ELEMENT or ATTRIBUTE
*/
String getAttributeNamespace(int index);
/**
* Returns the localName of the attribute at the provided index
*
* @param index
* the position of the attribute
* @return the localName of the attribute
* @throws IllegalStateException
* if this is not a START_ELEMENT or ATTRIBUTE
*/
String getAttributeLocalName(int index);
/**
* Returns the prefix of this attribute at the provided index
*
* @param index
* the position of the attribute
* @return the prefix of the attribute
* @throws IllegalStateException
* if this is not a START_ELEMENT or ATTRIBUTE
*/
String getAttributePrefix(int index);
/**
* Returns the XML type of the attribute at the provided index
*
* @param index
* the position of the attribute
* @return the XML type of the attribute
* @throws IllegalStateException
* if this is not a START_ELEMENT or ATTRIBUTE
*/
String getAttributeType(int index);
/**
* Returns the value of the attribute at the index
*
* @param index
* the position of the attribute
* @return the attribute value
* @throws IllegalStateException
* if this is not a START_ELEMENT or ATTRIBUTE
*/
String getAttributeValue(int index);
/**
* Returns a boolean which indicates if this attribute was created by
* default
*
* @param index
* the position of the attribute
* @return true if this is a default attribute
* @throws IllegalStateException
* if this is not a START_ELEMENT or ATTRIBUTE
*/
boolean isAttributeSpecified(int index);
/**
* Returns the count of namespaces declared on this START_ELEMENT or
* END_ELEMENT, this method is only valid on a START_ELEMENT, END_ELEMENT or
* NAMESPACE. On an END_ELEMENT the count is of the namespaces that are
* about to go out of scope. This is the equivalent of the information
* reported by SAX callback for an end element event.
*
* @return returns the number of namespace declarations on this specific
* element
* @throws IllegalStateException
* if this is not a START_ELEMENT, END_ELEMENT or NAMESPACE
*/
int getNamespaceCount();
/**
* Returns the prefix for the namespace declared at the index. Returns null
* if this is the default namespace declaration
*
* @param index
* the position of the namespace declaration
* @return returns the namespace prefix
* @throws IllegalStateException
* if this is not a START_ELEMENT, END_ELEMENT or NAMESPACE
*/
String getNamespacePrefix(int index);
/**
* Returns the uri for the namespace declared at the index.
*
* @param index
* the position of the namespace declaration
* @return returns the namespace uri
* @throws IllegalStateException
* if this is not a START_ELEMENT, END_ELEMENT or NAMESPACE
*/
String getNamespaceURI(int index);
/**
* Returns a read only namespace context for the current position. The
* context is transient and only valid until a call to next() changes the
* state of the reader.
*
* @return return a namespace context
*/
NamespaceContext getNamespaceContext();
/**
* Returns a reader that points to the current start element and all of its
* contents. Throws an QuixStreamException if the cursor does not point to a
* START_ELEMENT.
* <p>
* The sub stream is read from it MUST be read before the parent stream is
* moved on, if not any call on the sub stream will cause an
* QuixStreamException to be thrown. The parent stream will always return
* the same result from next() whatever is done to the sub stream.
*
* @return an XMLStreamReader which points to the next element
*/
// public XMLStreamReader subReader() throws QuixStreamException;
/**
* Allows the implementation to reset and reuse any underlying tables
*/
// public void recycle() throws QuixStreamException;
/**
* Returns an integer code that indicates the type of the event the cursor
* is pointing to.
*/
int getEventType();
/**
* Returns the current value of the parse event as a string, this returns
* the string value of a CHARACTERS event, returns the value of a COMMENT,
* the replacement value for an ENTITY_REFERENCE, the string value of a
* CDATA section, the string value for a SPACE event, or the String value of
* the internal subset of the DTD. If an ENTITY_REFERENCE has been resolved,
* any character data will be reported as CHARACTERS events.
*
* @return the current text or null
* @throws IllegalStateException
* if this state is not a valid text state.
*/
String getText();
/**
* Returns an array which contains the characters from this event. This
* array should be treated as read-only and transient. I.e. the array will
* contain the text characters until the XMLStreamReader moves on to the
* next event. Attempts to hold onto the character array beyond that time or
* modify the contents of the array are breaches of the contract for this
* interface.
*
* @return the current text or an empty array
* @throws IllegalStateException
* if this state is not a valid text state.
*/
char[] getTextCharacters();
/**
* Gets the the text associated with a CHARACTERS, SPACE or CDATA event.
* Text starting a "sourceStart" is copied into "target" starting at
* "targetStart". Up to "length" characters are copied. The number of
* characters actually copied is returned.
*
* The "sourceStart" argument must be greater or equal to 0 and less than or
* equal to the number of characters associated with the event. Usually, one
* requests text starting at a "sourceStart" of 0. If the number of
* characters actually copied is less than the "length", then there is no
* more text. Otherwise, subsequent calls need to be made until all text has
* been retrieved. For example:
*
* {@code
* int length = 1024;
* char[] myBuffer = new char[ length ];
*
* for ( int sourceStart = 0 ; ; sourceStart += length ) { int nCopied =
* stream.getTextCharacters( sourceStart, myBuffer, 0, length );
*
* if (nCopied < length) break; } } QuixStreamException may be thrown if
* there are any XML errors in the underlying source. The "targetStart"
* argument must be greater than or equal to 0 and less than the length of
* "target", Length must be greater than 0 and "targetStart + length" must
* be less than or equal to length of "target".
*
* @param sourceStart
* the index of the first character in the source array to copy
* @param target
* the destination array
* @param targetStart
* the start offset in the target array
* @param length
* the number of characters to copy
* @return the number of characters actually copied
* @throws QuiXStreamException
* if the underlying XML source is not well-formed
* @throws IndexOutOfBoundsException
* if targetStart < 0 or > than the length of target
* @throws IndexOutOfBoundsException
* if length < 0 or targetStart + length > length of target
* @throws UnsupportedOperationException
* if this method is not supported
* @throws NullPointerException
* is if target is null
*/
int getTextCharacters(int sourceStart, char[] target, int targetStart, int length);
/**
* Gets the text associated with a CHARACTERS, SPACE or CDATA event. Allows
* the underlying implementation to return the text as a stream of
* characters. The reference to the Reader returned by this method is only
* valid until next() is called.
*
* All characters must have been checked for well-formedness.
*
* <p>
* This method is optional and will throw UnsupportedOperationException if
* it is not supported.
*
* @throws UnsupportedOperationException
* if this method is not supported
* @throws IllegalStateException
* if this is not a valid text state
*/
// public Reader getTextStream();
/**
* Returns the offset into the text character array where the first
* character (of this text event) is stored.
*
* @throws IllegalStateException
* if this state is not a valid text state.
*/
int getTextStart();
/**
* Returns the length of the sequence of characters for this Text event
* within the text character array.
*
* @throws IllegalStateException
* if this state is not a valid text state.
*/
int getTextLength();
/**
* Return input encoding if known or null if unknown.
*
* @return the encoding of this instance or null
*/
String getEncoding();
/**
* Return true if the current event has text, false otherwise The following
* events have text: CHARACTERS,DTD ,ENTITY_REFERENCE, COMMENT, SPACE
*/
boolean hasText();
/**
* Return the current location of the processor. If the Location is unknown
* the processor should return an implementation of Location that returns -1
* for the location and null for the publicId and systemId. The location
* information is only valid until next() is called.
*/
Location getLocation();
/**
* Returns a QName for the current START_ELEMENT or END_ELEMENT event
*
* @return the QName for the current START_ELEMENT or END_ELEMENT event
* @throws IllegalStateException
* if this is not a START_ELEMENT or END_ELEMENT
*/
QName getName();
/**
* Returns the (local) name of the current event. For START_ELEMENT or
* END_ELEMENT returns the (local) name of the current element. For
* ENTITY_REFERENCE it returns entity name. The current event must be
* START_ELEMENT or END_ELEMENT, or ENTITY_REFERENCE
*
* @return the localName
* @throws IllegalStateException
* if this not a START_ELEMENT, END_ELEMENT or ENTITY_REFERENCE
*/
String getLocalName();
/**
* returns true if the current event has a name (is a START_ELEMENT or
* END_ELEMENT) returns false otherwise
*/
boolean hasName();
/**
* If the current event is a START_ELEMENT or END_ELEMENT this method
* returns the URI of the prefix or the default namespace. Returns null if
* the event does not have a prefix.
*
* @return the URI bound to this elements prefix, the default namespace, or
* null
*/
String getNamespaceURI();
/**
* Returns the prefix of the current event or null if the event does not
* have a prefix
*
* @return the prefix or null
*/
String getPrefix();
/**
* Get the xml version declared on the xml declaration Returns null if none
* was declared
*
* @return the XML version or null
*/
String getVersion();
/**
* Get the standalone declaration from the xml declaration
*
* @return true if this is standalone, or false otherwise
*/
boolean isStandalone();
/**
* Checks if standalone was set in the document
*
* @return true if standalone was set in the document, or false otherwise
*/
boolean standaloneSet();
/**
* Returns the character encoding declared on the xml declaration Returns
* null if none was declared
*
* @return the encoding declared in the document or null
*/
String getCharacterEncodingScheme();
/**
* Get the target of a processing instruction
*
* @return the target or null
*/
String getPITarget();
/**
* Get the data section of a processing instruction
*
* @return the data or null
*/
String getPIData();
}