zcommon/src/main/java/org/zkoss/idom/Verifier.java
/* Verifier.java
Purpose:
Description:
History:
2001/10/21 17:03:23, Create, Tom M. Yeh.
Copyright (C) 2001 Potix Corporation. All Rights Reserved.
{{IS_RIGHT
This program is distributed under LGPL Version 2.1 in the hope that
it will be useful, but WITHOUT ANY WARRANTY.
}}IS_RIGHT
*/
package org.zkoss.idom;
import org.xml.sax.Locator;
/**
* The verifier to verify W3C/DOM related constraints.
*
* @author tomyeh
*/
public class Verifier {
// The following implementation are referred from jdom2's implementation to support
// UTF-16 characters. License under BSD -
// https://raw.githubusercontent.com/hunterhacker/jdom/master/core/src/java/org/jdom2/Verifier.java
/*
* KEY TO UNDERSTANDING MASKS.
* ===========================
*
* This Verifier uses bitwise logic to perform fast validation on
* XML characters. The concept is as follows...
*
* There are 7 major tests for characters in JDOM and one special case.
* Can the character be a regular character, can it be part of an XML Name
* (element, attribute, entity-ref, etc.), does it represent a letter,
* digit, or combining character. Finally can a character be the first
* character in a name, or can the character be part of a URI. The special
* case is that Attributes and Element names in JDOM do not include the
* namespace prefix, thus, for Attribute and Elements, the name is the
* identical test to other XML names, but excludes the ':'. For performance
* reasons we only have the bitmask for the JDOM names, and then add the
* ':' for the general case tests.
*
* These 7 tests are often performed in very tight performance critical
* loops. It is essential for them to be fast.
*
* These 7 tests conveniently can be represented as 8 bits in a byte.
* We can thus have a single byte that represents the possible roles for
* each possible character. There are 64K characters... thus we need 64K
* bytes to represent each character's possible roles.
*
* We could use arrays of booleans to accomplish the same thing, but each
* boolean is a byte of memory, and using a bitmask allows us to put the
* 8 bitmask tests in the same memory space as just one boolean array.
*
* The end solution is to have an array of these bytes, one per character,
* and to then query each bit on the byte to see whether the corresponding
* character is able to perform in the respective role.
*
* The complicated part of this process is three-fold. The hardest part is
* knowing what role each character can play. The next hard part is
* converting this knowledge in to an array of bytes we can express in this
* Verifier class. The final part is querying that array for each test.
*
* Before this particular performance upgrade, the knowledge of what roles
* each character can play was embedded in each of the isXML*() methods.
* Those methods have been transferred in to the 'contrib' class
* org.jdom2.contrib.verifier.VerifierBuilder. That VerifierBuilder class
* has a main method which takes that knowledge, and converts it in to a
* 'compressed' set of two arrays, the byte mask, and the number of
* consecutive characters that have that mask, which are then copy/pasted
* in to this file as the VALCONST and LENCONST arrays.
*
* These two arrays are then 'decompressed' in to the CHARFLAGS array.
*
* The CHARFLAGS array is then queried for each of the 8 critical tests
* to determine which roles a character performs.
*
* If you need to change the roles a character plays in XML (i.e. change
* the return-value of one of the isXML...() methods, then you need to:
*
* - update the logic in org.jdom2.contrib.verifier.VerifierBuilder
* - run the VerifierBuilder
* - copy/paste the output to this file.
* - update the JUnit test harness TestVerifier
*/
/**
* The seed array used with LENCONST to populate CHARFLAGS.
*/
private static final byte[] VALCONST = new byte[] {
0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x41, 0x01,
0x41, 0x49, 0x41, 0x59, 0x41, 0x01, 0x41, 0x01,
0x41, 0x4f, 0x01, 0x4d, 0x01, 0x4f, 0x01, 0x41,
0x01, 0x09, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f,
0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f,
0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f,
0x01, 0x0f, 0x01, 0x09, 0x01, 0x29, 0x01, 0x29,
0x01, 0x0f, 0x09, 0x0f, 0x01, 0x0f, 0x01, 0x0f,
0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f,
0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f,
0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x29,
0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f,
0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f,
0x01, 0x0f, 0x01, 0x29, 0x01, 0x29, 0x01, 0x29,
0x01, 0x29, 0x01, 0x29, 0x01, 0x29, 0x01, 0x0f,
0x01, 0x0f, 0x01, 0x0f, 0x01, 0x09, 0x0f, 0x29,
0x01, 0x19, 0x01, 0x29, 0x0f, 0x01, 0x0f, 0x01,
0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x29, 0x0f, 0x29,
0x01, 0x29, 0x01, 0x19, 0x01, 0x29, 0x01, 0x0f,
0x01, 0x29, 0x0f, 0x29, 0x01, 0x29, 0x01, 0x0f,
0x29, 0x01, 0x19, 0x01, 0x29, 0x01, 0x0f, 0x01,
0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01,
0x0f, 0x01, 0x29, 0x01, 0x29, 0x01, 0x29, 0x01,
0x29, 0x01, 0x29, 0x01, 0x0f, 0x01, 0x0f, 0x29,
0x01, 0x19, 0x0f, 0x01, 0x29, 0x01, 0x0f, 0x01,
0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01,
0x0f, 0x01, 0x0f, 0x01, 0x29, 0x01, 0x29, 0x01,
0x29, 0x01, 0x29, 0x01, 0x0f, 0x01, 0x0f, 0x01,
0x19, 0x29, 0x0f, 0x01, 0x29, 0x01, 0x0f, 0x01,
0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01,
0x0f, 0x01, 0x0f, 0x01, 0x29, 0x0f, 0x29, 0x01,
0x29, 0x01, 0x29, 0x01, 0x0f, 0x01, 0x19, 0x01,
0x29, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01,
0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x29, 0x0f,
0x29, 0x01, 0x29, 0x01, 0x29, 0x01, 0x29, 0x01,
0x0f, 0x01, 0x0f, 0x01, 0x19, 0x01, 0x29, 0x01,
0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01,
0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01,
0x0f, 0x01, 0x0f, 0x01, 0x29, 0x01, 0x29, 0x01,
0x29, 0x01, 0x29, 0x01, 0x19, 0x01, 0x29, 0x01,
0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01,
0x0f, 0x01, 0x29, 0x01, 0x29, 0x01, 0x29, 0x01,
0x29, 0x01, 0x0f, 0x01, 0x19, 0x01, 0x29, 0x01,
0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01,
0x0f, 0x01, 0x29, 0x01, 0x29, 0x01, 0x29, 0x01,
0x29, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x19, 0x01,
0x29, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01,
0x0f, 0x01, 0x29, 0x01, 0x29, 0x01, 0x29, 0x01,
0x29, 0x01, 0x0f, 0x01, 0x19, 0x01, 0x0f, 0x01,
0x0f, 0x29, 0x0f, 0x29, 0x01, 0x0f, 0x09, 0x29,
0x01, 0x19, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f,
0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f,
0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f,
0x01, 0x0f, 0x01, 0x0f, 0x29, 0x0f, 0x29, 0x01,
0x29, 0x0f, 0x01, 0x0f, 0x01, 0x09, 0x01, 0x29,
0x01, 0x19, 0x01, 0x29, 0x01, 0x19, 0x01, 0x29,
0x01, 0x29, 0x01, 0x29, 0x01, 0x29, 0x0f, 0x01,
0x0f, 0x01, 0x29, 0x01, 0x29, 0x01, 0x29, 0x01,
0x29, 0x01, 0x29, 0x01, 0x29, 0x01, 0x29, 0x01,
0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01,
0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01,
0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01,
0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01,
0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01,
0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01,
0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01,
0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01,
0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01,
0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01,
0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01,
0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01,
0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01,
0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x29, 0x01,
0x29, 0x01, 0x0f, 0x01, 0x0f, 0x01, 0x0f, 0x01,
0x0f, 0x01, 0x09, 0x01, 0x0f, 0x01, 0x0f, 0x29,
0x01, 0x09, 0x01, 0x0f, 0x01, 0x29, 0x01, 0x09,
0x01, 0x0f, 0x01, 0x09, 0x01, 0x0f, 0x01, 0x0f,
0x01, 0x0f, 0x01, 0x00, 0x01, 0x00};
/**
* The seed array used with VALCONST to populate CHARFLAGS.
*/
private static final int [] LENCONST = new int [] {
9, 2, 2, 1, 18, 1, 1, 2,
9, 2, 1, 10, 1, 2, 1, 1,
2, 26, 4, 1, 1, 26, 3, 1,
56, 1, 8, 23, 1, 31, 1, 58,
2, 11, 2, 8, 1, 53, 1, 68,
9, 36, 3, 2, 4, 30, 56, 89,
18, 7, 14, 2, 46, 70, 26, 2,
36, 1, 1, 3, 1, 1, 1, 20,
1, 44, 1, 7, 3, 1, 1, 1,
1, 1, 1, 1, 1, 18, 13, 12,
1, 66, 1, 12, 1, 36, 1, 4,
9, 53, 2, 2, 2, 2, 3, 28,
2, 8, 2, 2, 55, 38, 2, 1,
7, 38, 10, 17, 1, 23, 1, 3,
1, 1, 1, 2, 1, 1, 11, 27,
5, 3, 46, 26, 5, 1, 10, 8,
13, 10, 6, 1, 71, 2, 5, 1,
15, 1, 4, 1, 1, 15, 2, 2,
1, 4, 2, 10, 519, 3, 1, 53,
2, 1, 1, 16, 3, 4, 3, 10,
2, 2, 10, 17, 3, 1, 8, 2,
2, 2, 22, 1, 7, 1, 1, 3,
4, 2, 1, 1, 7, 2, 2, 2,
3, 9, 1, 4, 2, 1, 3, 2,
2, 10, 2, 16, 1, 2, 6, 4,
2, 2, 22, 1, 7, 1, 2, 1,
2, 1, 2, 2, 1, 1, 5, 4,
2, 2, 3, 11, 4, 1, 1, 7,
10, 2, 3, 12, 3, 1, 7, 1,
1, 1, 3, 1, 22, 1, 7, 1,
2, 1, 5, 2, 1, 1, 8, 1,
3, 1, 3, 18, 1, 5, 10, 17,
3, 1, 8, 2, 2, 2, 22, 1,
7, 1, 2, 2, 4, 2, 1, 1,
6, 3, 2, 2, 3, 8, 2, 4,
2, 1, 3, 4, 10, 18, 2, 1,
6, 3, 3, 1, 4, 3, 2, 1,
1, 1, 2, 3, 2, 3, 3, 3,
8, 1, 3, 4, 5, 3, 3, 1,
4, 9, 1, 15, 9, 17, 3, 1,
8, 1, 3, 1, 23, 1, 10, 1,
5, 4, 7, 1, 3, 1, 4, 7,
2, 9, 2, 4, 10, 18, 2, 1,
8, 1, 3, 1, 23, 1, 10, 1,
5, 4, 7, 1, 3, 1, 4, 7,
2, 7, 1, 1, 2, 4, 10, 18,
2, 1, 8, 1, 3, 1, 23, 1,
16, 4, 6, 2, 3, 1, 4, 9,
1, 8, 2, 4, 10, 145, 46, 1,
1, 1, 2, 7, 5, 6, 1, 8,
1, 10, 39, 2, 1, 1, 2, 2,
1, 1, 2, 1, 6, 4, 1, 7,
1, 3, 1, 1, 1, 1, 2, 2,
1, 2, 1, 1, 1, 2, 6, 1,
2, 1, 2, 5, 1, 1, 1, 6,
2, 10, 62, 2, 6, 10, 11, 1,
1, 1, 1, 1, 4, 2, 8, 1,
33, 7, 20, 1, 6, 4, 6, 1,
1, 1, 21, 3, 7, 1, 1, 230,
38, 10, 39, 9, 1, 1, 2, 1,
3, 1, 1, 1, 2, 1, 5, 41,
1, 1, 1, 1, 1, 11, 1, 1,
1, 1, 1, 3, 2, 3, 1, 5,
3, 1, 1, 1, 1, 1, 1, 1,
1, 3, 2, 3, 2, 1, 1, 40,
1, 9, 1, 2, 1, 2, 2, 7,
2, 1, 1, 1, 7, 40, 1, 4,
1, 8, 1, 3078, 156, 4, 90, 6,
22, 2, 6, 2, 38, 2, 6, 2,
8, 1, 1, 1, 1, 1, 1, 1,
31, 2, 53, 1, 7, 1, 1, 3,
3, 1, 7, 3, 4, 2, 6, 4,
13, 5, 3, 1, 7, 211, 13, 4,
1, 68, 1, 3, 2, 2, 1, 81,
3, 3714, 1, 1, 1, 25, 9, 6,
1, 5, 11, 84, 4, 2, 2, 2,
2, 90, 1, 3, 6, 40, 7379, 20902,
3162, 11172, 92, 2048, 8190, 2};
/**
* The number of characters in Java.
*/
private static final int CHARCNT = Character.MAX_VALUE + 1;
/**
* An array of byte where each byte represents the roles that the
* corresponding character can play. Use the bit mask values
* to access each character's role.
*/
private static final byte[] CHARFLAGS = buildBitFlags();
/**
* Convert the two compressed arrays in to th CHARFLAGS array.
* @return the CHARFLAGS array.
*/
private static final byte[] buildBitFlags() {
final byte[] ret = new byte[CHARCNT];
int index = 0;
for (int i = 0; i < VALCONST.length; i++) {
// v represents the roles a character can play.
final byte v = VALCONST[i];
// l is the number of consecutive chars that have the same
// roles 'v'
int l = LENCONST[i];
// we need to give the next 'l' chars the role bits 'v'
while (--l >= 0) {
ret[index++] = v;
}
}
return ret;
}
/** Mask used to test for {@link #isXMLCharacter(int)} */
private static final byte MASKXMLCHARACTER = 1 << 0;
/** Mask used to test for {@link #isXMLLetter(char)} */
private static final byte MASKXMLLETTER = 1 << 1;
/** Mask used to test for {@link #isXMLNameStartCharacter(char)} */
private static final byte MASKXMLSTARTCHAR = 1 << 2;
/** Mask used to test for {@link #isXMLNameCharacter(char)} */
private static final byte MASKXMLNAMECHAR = 1 << 3;
/** Mask used to test for {@link #isXMLDigit(char)} */
private static final byte MASKXMLDIGIT = 1 << 4;
/** Mask used to test for {@link #isXMLCombiningChar(char)} */
private static final byte MASKXMLCOMBINING = 1 << 5;
/** Mask used to test for {@link #isURICharacter(char)} */
private static final byte MASKURICHAR = 1 << 6;
/** Mask used to test for {@link #isXMLLetterOrDigit(char)} */
private static final byte MASKXMLLETTERORDIGIT = MASKXMLLETTER | MASKXMLDIGIT;
private Verifier() {
}
/**
* Checks whether an element's name is valid.
*/
public static final void checkElementName(String name, Locator loc) {
if (name.indexOf(":") >= 0)
throw new DOMException(DOMException.INVALID_CHARACTER_ERR,
"Element or attribute names cannot contain colons", loc);
checkXMLName(name, loc);
}
/**
* Checks whether an attribute's name is valid.
*/
public static final void checkAttributeName(String name, Locator loc) {
// Allow xml:space and xml:lang as special cases
if (!name.equals("xml:space") && !name.equals("xml:lang"))
checkElementName(name, loc);
}
protected static final StringBuffer appendAsHex(StringBuffer sb, char c) {
return sb.append('\'').append(c)
.append("' (0x").append(Integer.toHexString(c)).append(')');
}
/**
* Checks whether a text is valid.
*/
public static final void checkCharacterData(String text, Locator loc) {
if (text == null)
throw new DOMException(DOMException.INVALID_CHARACTER_ERR,
"A null is not a legal XML value", loc);
final int len = text.length();
for (int i = 0; i < len; i++) {
// we are expecting a normal char, but may be a surrogate.
// the isXMLCharacter method takes an int argument, but we have a char.
// we save a lot of time by doing the test directly here without
// doing the unnecessary cast-to-int and double-checking ranges
// for the char.
// Also, note that we only need to check for non-zero flags, instead
// of checking for an actual bit, because all the other
// character roles are a pure subset of CharacterData. Put another way,
// any character with any bit set, will always also have the
// CharacterData bit set.
while (CHARFLAGS[text.charAt(i)] != (byte)0) {
// fast-loop through the chars until we find something that's not.
if (++i == len) {
// we passed all the characters...
return;
}
}
// the character is not a normal character.
// we need to sort out what it is. Neither high nor low
// surrogate pairs are valid characters, so they will get here.
if (Character.isHighSurrogate(text.charAt(i))) {
// we have the valid high char of a pair.
// we will expect the low char on the next index,
i++;
if (i >= len) {
// we got a normal character, but we wanted a low surrogate
throw new DOMException(DOMException.INVALID_CHARACTER_ERR,
String.format("Truncated Surrogate Pair 0x%04x????",
(int)text.charAt(i - 1)), loc);
}
if (Character.isLowSurrogate(text.charAt(i))) {
// we now have the low char of a pair, decode and validate
if (!isXMLCharacter(decodeSurrogatePair(
text.charAt(i - 1), text.charAt(i)))) {
// Likely this character can't be easily displayed
// because it's a control so we use it'd hexadecimal
// representation in the reason.
throw new DOMException(DOMException.INVALID_CHARACTER_ERR,
String.format("0x%06x is not a legal XML character",
decodeSurrogatePair(
text.charAt(i - 1), text.charAt(i))), loc);
}
} else {
// we got a normal character, but we wanted a low surrogate
throw new DOMException(DOMException.INVALID_CHARACTER_ERR,
String.format("Illegal Surrogate Pair 0x%04x%04x",
(int)text.charAt(i - 1), (int)text.charAt(i)), loc);
}
} else {
// Likely this character can't be easily displayed
// because it's a control so we use its hexadecimal
// representation in the reason.
throw new DOMException(DOMException.INVALID_CHARACTER_ERR,
String.format("0x%04x is not a legal XML character",
(int)text.charAt(i)), loc);
}
}
}
/**
* This is a utility function to decode a non-BMP
* UTF-16 surrogate pair.
* @param high high 16 bits
* @param low low 16 bits
* @return decoded character
*/
public static int decodeSurrogatePair(final char high, final char low) {
return 0x10000 + (high - 0xD800) * 0x400 + (low - 0xDC00);
}
/**
* Checks whether a CDATA is valid.
*/
public static final void checkCData(String data, Locator loc) {
if (data.indexOf("]]>") >= 0)
throw new DOMException(DOMException.INVALID_CHARACTER_ERR,
"']]>' is not allowed inside a CDATA string", loc);
checkCharacterData(data, loc);
}
/**
* Checks whether the prefix of a namespace is valid.
*/
public static final void checkNamespacePrefix(String prefix, Locator loc) {
if (prefix == null || prefix.length() == 0)
return; //OK: null or empty
String reason = null;
char first = prefix.charAt(0);
if (isXMLDigit(first)) {
reason = "a number";
} else if (first == '$') {
reason = "a dollar sign ($)";
} else if (first == '-') {
reason = "a hyphen (-)";
} else if (first == '.') {
reason = "a period (.)";
} else {
final String s = prefix.toLowerCase(java.util.Locale.ENGLISH);
if (s.startsWith("xml") && !s.equals("xmlns"))
reason = "\"xml\" in any combination of case";
}
if (reason != null)
throw new DOMException(DOMException.INVALID_CHARACTER_ERR,
"Namespace prefixes, "+prefix+", cannot begin with " + reason, loc);
for (int j=0, len = prefix.length(); j<len; j++)
if (!isXMLNameCharacter(prefix.charAt(j))) {
StringBuffer sb =
new StringBuffer("Namespace prefixes cannot contain ");
throw new DOMException(DOMException.INVALID_CHARACTER_ERR,
appendAsHex(sb, prefix.charAt(j)).toString(), loc);
}
if (prefix.indexOf(":") >= 0)
throw new DOMException(DOMException.INVALID_CHARACTER_ERR,
"Namespace prefixes cannot contain colons", loc);
}
/**
* Checks whether the URI of a namespace is valid.
*/
public static final void checkNamespaceURI(String uri, Locator loc) {
if (uri == null || uri.length() == 0)
return; //OK: null or empty
String reason = null;
char first = uri.charAt(0);
if (Character.isDigit(first))
reason = "a number";
else if (first == '$')
reason = "a dollar sign ($)";
else if (first == '-')
reason = "a hyphen (-)";
if (reason != null)
throw new DOMException(DOMException.INVALID_CHARACTER_ERR,
"Namespace URIs cannot begin with " + reason, loc);
}
/**
* Checks whether a processing instruction target is valid.
*/
public static final void
checkPITarget(String target, Locator loc) {
if (target.indexOf(":") >= 0)
throw new DOMException(DOMException.INVALID_CHARACTER_ERR,
"Processing instruction targets cannot contain colons", loc);
if (target.equalsIgnoreCase("xml"))
throw new DOMException(DOMException.INVALID_CHARACTER_ERR,
"Processing instruction targets cannot be " +
"\"xml\" in any combination of case", loc);
checkXMLName(target, loc);
}
/**
* Checks whether a comment data is valid.
*/
public static final void checkCommentData(String data, Locator loc) {
if (data.indexOf("--") >= 0)
throw new DOMException(DOMException.INVALID_CHARACTER_ERR,
"Comments cannot contain double hyphens (--)", loc);
checkCharacterData(data, loc);
}
/**
* Checks whether a name is valid.
*/
public static void checkXMLName(String name, Locator loc) {
if (name == null || name.length() == 0)
throw new DOMException(DOMException.INVALID_CHARACTER_ERR,
"XML names cannot be null or empty", loc);
if (!isXMLNameStartCharacter(name.charAt(0)))
throw new DOMException(DOMException.INVALID_CHARACTER_ERR,
"XML names cannot begin with \'" + name.charAt(0) + '\'', loc);
for (int j=0, len = name.length(); j<len; j++)
if (!isXMLNameCharacter(name.charAt(j)))
throw new DOMException(DOMException.INVALID_CHARACTER_ERR,
"XML names cannot contain \'" + name.charAt(j) + '\'', loc);
}
/**
* Checks whether a character is valid.
*/
public static boolean isXMLCharacter(int c) {
if (c >= CHARCNT) {
return c <= 0x10FFFF;
}
return (byte)0 != (byte)(CHARFLAGS[c] & MASKXMLCHARACTER);
}
/**
* Checks whether a character can be part of a name.
*/
public static boolean isXMLNameCharacter(char c) {
return (byte)0 != (byte)(CHARFLAGS[c] & MASKXMLNAMECHAR) || c == ':';
}
/**
* Checks whether a character can be the first character of a name.
*/
public static boolean isXMLNameStartCharacter(char c) {
return (byte)0 != (byte)(CHARFLAGS[c] & MASKXMLSTARTCHAR) || c == ':';
}
/**
* Checks whether a character is a letter or digit.
*/
public static boolean isXMLLetterOrDigit(char c) {
return (byte)0 != (byte)(CHARFLAGS[c] & MASKXMLLETTERORDIGIT);
}
/**
* Checks whether a character is a letter.
*/
public static boolean isXMLLetter(final char c) {
return (byte)0 != (byte)(CHARFLAGS[c] & MASKXMLLETTER);
}
/**
* Checks whether a character is a combining character according to
* production 87 of the XML 1.0 specification.
*/
public static boolean isXMLCombiningChar(final char c) {
return (byte)0 != (byte)(CHARFLAGS[c] & MASKXMLCOMBINING);
}
/**
* Checks whether a character is an extender according to
* production 88 of the XML 1.0 specification.
*/
public static boolean isXMLExtender(final char c) {
/*
* This function is not accellerated by the bitmask system because
* there are no longer any actual calls to it from the JDOM code.
* It used to be called by the isXMLNameCharacter() method before
* the bitmask optimization. Now the VerifierBuilder code actually
* calls this method instead.
*/
if (c < 0x00B6) return false; // quick short circuit
// Extenders
if (c == 0x00B7) return true;
if (c == 0x02D0) return true;
if (c == 0x02D1) return true;
if (c == 0x0387) return true;
if (c == 0x0640) return true;
if (c == 0x0E46) return true;
if (c == 0x0EC6) return true;
if (c == 0x3005) return true;
if (c < 0x3031) return false; if (c <= 0x3035) return true;
if (c < 0x309D) return false; if (c <= 0x309E) return true;
if (c < 0x30FC) return false; if (c <= 0x30FE) return true;
return false;
}
/**
* <p>
* Checks whether a character is a digit according to
* production 88 of the XML 1.0 specification.
*/
public static boolean isXMLDigit(final char c) {
return (byte)0 != (byte)(CHARFLAGS[c] & MASKXMLDIGIT);
}
}