zcommon/src/main/java/org/zkoss/lang/JavaScriptEscapeUtil.java

Summary

Maintainability
F
1 wk
Test Coverage
/*
 * =============================================================================
 * 
 *   Copyright (c) 2014, The UNBESCAPE team (http://www.unbescape.org)
 * 
 *   Licensed under the Apache License, Version 2.0 (the "License");
 *   you may not use this file except in compliance with the License.
 *   You may obtain a copy of the License at
 * 
 *       http://www.apache.org/licenses/LICENSE-2.0
 * 
 *   Unless required by applicable law or agreed to in writing, software
 *   distributed under the License is distributed on an "AS IS" BASIS,
 *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *   See the License for the specific language governing permissions and
 *   limitations under the License.
 * 
 * =============================================================================
 */
package org.zkoss.lang;

import java.io.IOException;
import java.io.Writer;
import java.util.Arrays;

/**
 * <p>
 *   Internal class in charge of performing the real escape/unescape operations.
 * </p>
 *
 * @author Daniel Fern&aacute;ndez
 *
 * @since 1.0.0
 *
 */
/**package*/ final class JavaScriptEscapeUtil {



    /*
     * JAVASCRIPT ESCAPE/UNESCAPE OPERATIONS
     * -------------------------------------
     *
     *   See: http://www.ecmascript.org/docs.php
     *        http://mathiasbynens.be/notes/javascript-escapes
     *
     *   (Note that, in the following examples, and in order to avoid escape problems during the compilation
     *    of this class, the backslash symbol is replaced by '%')
     *
     *   - SINGLE ESCAPE CHARACTERS (SECs):
     *        U+0000 -> %0
     *        U+0008 -> %b
     *        U+0009 -> %t
     *        U+000A -> %n
     *        U+000B -> %v  [NOT USED IN ESCAPE - Not supported by Internet Explorer < 9]
     *        U+000C -> %f
     *        U+000D -> %r
     *        U+0022 -> %"
     *        U+0027 -> %'
     *        U+005C -> %%
     *        U+002F -> %/  [ONLY USED WHEN / APPEARS IN </, IN ORDER TO AVOID ISSUES INSIDE <script> TAGS]
     *   - HEXADECIMAL ESCAPE [XHEXA] (only for characters <= U+00FF): %x??
     *   - UNICODE ESCAPE [UHEXA] (also hexadecimal)
     *        Characters <= U+FFFF: %u????
     *        Characters > U+FFFF : %u????%u???? (surrogate character pair)
     *                              %u{?*} [NOT USED - Possible syntax for ECMAScript 6]
     *   - OCTAL ESCAPE: %377 [NOT USED IN ESCAPE - Deprecated in ECMAScript v5]
     *   - GENERAL ESCAPE: %* -> * ('%a' -> 'a')
     *                     (except the [%,%n] sequence, which is not an escape sequence but a line continuation)
     *
     */



    /*
     * Prefixes defined for use in escape and unescape operations
     */
    private static final char ESCAPE_PREFIX = '\\';
    private static final char ESCAPE_XHEXA_PREFIX2 = 'x';
    private static final char ESCAPE_UHEXA_PREFIX2 = 'u';
    private static final char[] ESCAPE_XHEXA_PREFIX = "\\x".toCharArray();
    private static final char[] ESCAPE_UHEXA_PREFIX = "\\u".toCharArray();

    /*
     * Small utility char arrays for hexadecimal conversion.
     */
    private static char[] HEXA_CHARS_UPPER = "0123456789ABCDEF".toCharArray();
    private static char[] HEXA_CHARS_LOWER = "0123456789abcdef".toCharArray();


    /*
     * Structures for holding the Single Escape Characters
     */
    private static int SEC_CHARS_LEN = '\\' + 1; // 0x5C + 1 = 0x5D
    private static char SEC_CHARS_NO_SEC = '*';
    private static char[] SEC_CHARS;

    /*
     * Structured for holding the 'escape level' assigned to chars (not codepoints) up to ESCAPE_LEVELS_LEN.
     * - The last position of the ESCAPE_LEVELS array will be used for determining the level of all
     *   codepoints >= (ESCAPE_LEVELS_LEN - 1)
     */
    private static final char ESCAPE_LEVELS_LEN = 0x9f + 2; // Last relevant char to be indexed is 0x9f
    private static final byte[] ESCAPE_LEVELS;



    static {

        /*
         * Initialize Single Escape Characters
         */
        SEC_CHARS = new char[SEC_CHARS_LEN];
        Arrays.fill(SEC_CHARS,SEC_CHARS_NO_SEC);
        SEC_CHARS[0x00] = '0';
        SEC_CHARS[0x08] = 'b';
        SEC_CHARS[0x09] = 't';
        SEC_CHARS[0x0A] = 'n';
        SEC_CHARS[0x0C] = 'f';
        SEC_CHARS[0x0D] = 'r';
        SEC_CHARS[0x22] = '"';
        SEC_CHARS[0x27] = '\'';
        SEC_CHARS[0x5C] = '\\';
        // slash (solidus) character: will only be escaped if in '</'
        SEC_CHARS[0x2F] = '/';



        /*
         * Initialization of escape levels.
         * Defined levels :
         *
         *    - Level 1 : Basic escape set
         *    - Level 2 : Basic escape set plus all non-ASCII
         *    - Level 3 : All non-alphanumeric characters
         *    - Level 4 : All characters
         *
         */
        ESCAPE_LEVELS = new byte[ESCAPE_LEVELS_LEN];

        /*
         * Everything is level 3 unless contrary indication.
         */
        Arrays.fill(ESCAPE_LEVELS, (byte)3);

        /*
         * Everything non-ASCII is level 2 unless contrary indication.
         */
        for (char c = 0x80; c < ESCAPE_LEVELS_LEN; c++) {
            ESCAPE_LEVELS[c] = 2;
        }

        /*
         * Alphanumeric characters are level 4.
         */
        for (char c = 'A'; c <= 'Z'; c++) {
            ESCAPE_LEVELS[c] = 4;
        }
        for (char c = 'a'; c <= 'z'; c++) {
            ESCAPE_LEVELS[c] = 4;
        }
        for (char c = '0'; c <= '9'; c++) {
            ESCAPE_LEVELS[c] = 4;
        }

        /*
         * Simple Escape Character will be level 1 (always escaped)
         */
        ESCAPE_LEVELS[0x00] = 1;
        ESCAPE_LEVELS[0x08] = 1;
        ESCAPE_LEVELS[0x09] = 1;
        ESCAPE_LEVELS[0x0A] = 1;
        ESCAPE_LEVELS[0x0C] = 1;
        ESCAPE_LEVELS[0x0D] = 1;
        ESCAPE_LEVELS[0x22] = 1;
        ESCAPE_LEVELS[0x27] = 1;
        ESCAPE_LEVELS[0x5C] = 1;
        // slash (solidus) character: will only be escaped if in '</', but we signal it as level 1 anyway
        ESCAPE_LEVELS[0x2F] = 1;

        /*
         * JavaScript defines two ranges of non-displayable, control characters (some of which are already part of the
         * Single Escape Characters list): U+0001 to U+001F and U+007F to U+009F.
         */
        for (char c = 0x01; c <= 0x1F; c++) {
            ESCAPE_LEVELS[c] = 1;
        }
        for (char c = 0x7F; c <= 0x9F; c++) {
            ESCAPE_LEVELS[c] = 1;
        }

    }



    private JavaScriptEscapeUtil() {
        super();
    }




    static char[] toXHexa(final int codepoint) {
        final char[] result = new char[2];
        result[1] = HEXA_CHARS_UPPER[codepoint % 0x10];
        result[0] = HEXA_CHARS_UPPER[(codepoint >>> 4) % 0x10];
        return result;
    }


    static char[] toUHexa(final int codepoint) {
        final char[] result = new char[4];
        result[3] = HEXA_CHARS_UPPER[codepoint % 0x10];
        result[2] = HEXA_CHARS_UPPER[(codepoint >>> 4) % 0x10];
        result[1] = HEXA_CHARS_UPPER[(codepoint >>> 8) % 0x10];
        result[0] = HEXA_CHARS_UPPER[(codepoint >>> 12) % 0x10];
        return result;
    }



    /*
     * Perform an escape operation, based on String, according to the specified level and type.
     */
    static String escape(final String text,
                         final JavaScriptEscapeType escapeType, final JavaScriptEscapeLevel escapeLevel) {

        if (text == null) {
            return null;
        }

        final int level = escapeLevel.getEscapeLevel();
        final boolean useSECs = escapeType.getUseSECs();
        final boolean useXHexa = escapeType.getUseXHexa();

        StringBuilder strBuilder = null;

        final int offset = 0;
        final int max = text.length();

        int readOffset = offset;

        for (int i = offset; i < max; i++) {

            final int codepoint = Character.codePointAt(text, i);


            /*
             * Shortcut: most characters will be ASCII/Alphanumeric, and we won't need to do anything at
             * all for them
             */
            if (codepoint <= (ESCAPE_LEVELS_LEN - 2) && level < ESCAPE_LEVELS[codepoint]) {
                continue;
            }

            /*
             * Check whether the character is a slash (solidus). In such case, only escape if it
             * appears after a '<' ('</') or level >= 3 (non alphanumeric)
             */
            if (codepoint == '/' && level < 3 && (i == 0 || text.charAt(i - 1) != '<')) {
                continue;
            }

            /*
             * Shortcut: we might not want to escape non-ASCII chars at all either.
             * We also check we are not dealing with U+2028 or U+2029, which the JavaScript spec considers
             * LineTerminators and therefore should be escaped always.
             */
            if (codepoint > (ESCAPE_LEVELS_LEN - 2) && level < ESCAPE_LEVELS[ESCAPE_LEVELS_LEN - 1] &&
                    codepoint != '\u2028' && codepoint != '\u2029') {

                if (Character.charCount(codepoint) > 1) {
                    // This is to compensate that we are actually escaping two char[] positions with a single codepoint.
                    i++;
                }

                continue;

            }


            /*
             * At this point we know for sure we will need some kind of escape, so we
             * can increase the offset and initialize the string builder if needed, along with
             * copying to it all the contents pending up to this point.
             */

            if (strBuilder == null) {
                strBuilder = new StringBuilder(max + 20);
            }

            if (i - readOffset > 0) {
                strBuilder.append(text, readOffset, i);
            }

            if (Character.charCount(codepoint) > 1) {
                // This is to compensate that we are actually reading two char[] positions with a single codepoint.
                i++;
            }

            readOffset = i + 1;


            /*
             * -----------------------------------------------------------------------------------------
             *
             * Peform the real escape, attending the different combinations of SECs, XHEXA and UHEXA
             *
             * -----------------------------------------------------------------------------------------
             */

            if (useSECs && codepoint < SEC_CHARS_LEN) {
                // We will try to use a SEC

                final char sec = SEC_CHARS[codepoint];

                if (sec != SEC_CHARS_NO_SEC) {
                    // SEC found! just write it and go for the next char
                    strBuilder.append(ESCAPE_PREFIX);
                    strBuilder.append(sec);
                    continue;
                }

            }

            /*
             * No SEC-escape was possible, so we need xhexa/uhexa escape.
             */

            if (useXHexa && codepoint <= 0xFF) {
                // Codepoint is <= 0xFF, so we can use XHEXA escapes
                strBuilder.append(ESCAPE_XHEXA_PREFIX);
                strBuilder.append(toXHexa(codepoint));
                continue;
            }

            if (Character.charCount(codepoint) > 1) {
                final char[] codepointChars = Character.toChars(codepoint);
                strBuilder.append(ESCAPE_UHEXA_PREFIX);
                strBuilder.append(toUHexa(codepointChars[0]));
                strBuilder.append(ESCAPE_UHEXA_PREFIX);
                strBuilder.append(toUHexa(codepointChars[1]));
                continue;
            }

            strBuilder.append(ESCAPE_UHEXA_PREFIX);
            strBuilder.append(toUHexa(codepoint));

        }


        /*
         * -----------------------------------------------------------------------------------------------
         * Final cleaning: return the original String object if no escape was actually needed. Otherwise
         *                 append the remaining unescaped text to the string builder and return.
         * -----------------------------------------------------------------------------------------------
         */

        if (strBuilder == null) {
            return text;
        }

        if (max - readOffset > 0) {
            strBuilder.append(text, readOffset, max);
        }

        return strBuilder.toString();

    }





    /*
     * Perform an escape operation, based on char[], according to the specified level and type.
     */
    static void escape(final char[] text, final int offset, final int len, final Writer writer,
                       final JavaScriptEscapeType escapeType, final JavaScriptEscapeLevel escapeLevel)
                       throws IOException {

        if (text == null || text.length == 0) {
            return;
        }

        final int level = escapeLevel.getEscapeLevel();
        final boolean useSECs = escapeType.getUseSECs();
        final boolean useXHexa = escapeType.getUseXHexa();

        final int max = (offset + len);

        int readOffset = offset;

        for (int i = offset; i < max; i++) {

            final int codepoint = Character.codePointAt(text, i);


            /*
             * Shortcut: most characters will be ASCII/Alphanumeric, and we won't need to do anything at
             * all for them
             */
            if (codepoint <= (ESCAPE_LEVELS_LEN - 2) && level < ESCAPE_LEVELS[codepoint]) {
                continue;
            }

            /*
             * Check whether the character is a slash (solidus). In such case, only escape if it
             * appears after a '<' ('</') or level >= 3 (non alphanumeric)
             */
            if (codepoint == '/' && level < 3 && (i == 0 || text[i - 1] != '<')) {
                continue;
            }

            /*
             * Shortcut: we might not want to escape non-ASCII chars at all either.
             * We also check we are not dealing with U+2028 or U+2029, which the JavaScript spec considers
             * LineTerminators and therefore should be escaped always.
             */
            if (codepoint > (ESCAPE_LEVELS_LEN - 2) && level < ESCAPE_LEVELS[ESCAPE_LEVELS_LEN - 1] &&
                    codepoint != '\u2028' && codepoint != '\u2029') {

                if (Character.charCount(codepoint) > 1) {
                    // This is to compensate that we are actually escaping two char[] positions with a single codepoint.
                    i++;
                }

                continue;

            }


            /*
             * At this point we know for sure we will need some kind of escape, so we
             * can write all the contents pending up to this point.
             */

            if (i - readOffset > 0) {
                writer.write(text, readOffset, (i - readOffset));
            }

            if (Character.charCount(codepoint) > 1) {
                // This is to compensate that we are actually reading two char[] positions with a single codepoint.
                i++;
            }

            readOffset = i + 1;


            /*
             * -----------------------------------------------------------------------------------------
             *
             * Peform the real escape, attending the different combinations of SECs, XHEXA and UHEXA
             *
             * -----------------------------------------------------------------------------------------
             */

            if (useSECs && codepoint < SEC_CHARS_LEN) {
                // We will try to use a SEC

                final char sec = SEC_CHARS[codepoint];

                if (sec != SEC_CHARS_NO_SEC) {
                    // SEC found! just write it and go for the next char
                    writer.write(ESCAPE_PREFIX);
                    writer.write(sec);
                    continue;
                }

            }

            /*
             * No SEC-escape was possible, so we need xhexa/uhexa escape.
             */

            if (useXHexa && codepoint <= 0xFF) {
                // Codepoint is <= 0xFF, so we can use XHEXA escapes
                writer.write(ESCAPE_XHEXA_PREFIX);
                writer.write(toXHexa(codepoint));
                continue;
            }

            if (Character.charCount(codepoint) > 1) {
                final char[] codepointChars = Character.toChars(codepoint);
                writer.write(ESCAPE_UHEXA_PREFIX);
                writer.write(toUHexa(codepointChars[0]));
                writer.write(ESCAPE_UHEXA_PREFIX);
                writer.write(toUHexa(codepointChars[1]));
                continue;
            }

            writer.write(ESCAPE_UHEXA_PREFIX);
            writer.write(toUHexa(codepoint));

        }


        /*
         * -----------------------------------------------------------------------------------------------
         * Final cleaning: return the original String object if no escape was actually needed. Otherwise
         *                 append the remaining unescaped text to the string builder and return.
         * -----------------------------------------------------------------------------------------------
         */

        if (max - readOffset > 0) {
            writer.write(text, readOffset, (max - readOffset));
        }

    }








    /*
     * This methods (the two versions) are used instead of Integer.parseInt(str,radix) in order to avoid the need
     * to create substrings of the text being unescaped to feed such method.
     * -  No need to check all chars are within the radix limits - reference parsing code will already have done so.
     */

    static int parseIntFromReference(final String text, final int start, final int end, final int radix) {
        int result = 0;
        for (int i = start; i < end; i++) {
            final char c = text.charAt(i);
            int n = -1;
            for (int j = 0; j < HEXA_CHARS_UPPER.length; j++) {
                if (c == HEXA_CHARS_UPPER[j] || c == HEXA_CHARS_LOWER[j]) {
                    n = j;
                    break;
                }
            }
            result = (radix * result) + n;
        }
        return result;
    }

    static int parseIntFromReference(final char[] text, final int start, final int end, final int radix) {
        int result = 0;
        for (int i = start; i < end; i++) {
            final char c = text[i];
            int n = -1;
            for (int j = 0; j < HEXA_CHARS_UPPER.length; j++) {
                if (c == HEXA_CHARS_UPPER[j] || c == HEXA_CHARS_LOWER[j]) {
                    n = j;
                    break;
                }
            }
            result = (radix * result) + n;
        }
        return result;
    }





    static boolean isOctalEscape(final String text, final int start, final int end) {

        if (start >= end) {
            return false;
        }

        final char c1 = text.charAt(start);
        if (c1 < '0' || c1 > '7') {
            return false;
        }

        if (start + 1 >= end) {
            return (c1 != '0'); // It would not be an octal escape, but the U+0000 escape sequence.
        }

        final char c2 = text.charAt(start + 1);
        if (c2 < '0' || c2 > '7') {
            return (c1 != '0'); // It would not be an octal escape, but the U+0000 escape sequence.
        }

        if (start + 2 >= end) {
            return (c1 != '0' || c2 != '0'); // It would not be an octal escape, but the U+0000 escape sequence + '0'.
        }

        final char c3 = text.charAt(start + 2);
        if (c3 < '0' || c3 > '7') {
            return (c1 != '0' || c2 != '0'); // It would not be an octal escape, but the U+0000 escape sequence + '0'.
        }

        return (c1 != '0' || c2 != '0' || c3 != '0'); // Check it's not U+0000 (escaped) + '00'

    }


    static boolean isOctalEscape(final char[] text, final int start, final int end) {

        if (start >= end) {
            return false;
        }

        final char c1 = text[start];
        if (c1 < '0' || c1 > '7') {
            return false;
        }

        if (start + 1 >= end) {
            return (c1 != '0'); // It would not be an octal escape, but the U+0000 escape sequence.
        }

        final char c2 = text[start + 1];
        if (c2 < '0' || c2 > '7') {
            return (c1 != '0'); // It would not be an octal escape, but the U+0000 escape sequence.
        }

        if (start + 2 >= end) {
            return (c1 != '0' || c2 != '0'); // It would not be an octal escape, but the U+0000 escape sequence + '0'.
        }

        final char c3 = text[start + 2];
        if (c3 < '0' || c3 > '7') {
            return (c1 != '0' || c2 != '0'); // It would not be an octal escape, but the U+0000 escape sequence + '0'.
        }

        return (c1 != '0' || c2 != '0' || c3 != '0'); // Check it's not U+0000 (escaped) + '00'

    }




    /*
     * Perform an unescape operation based on String.
     */
    static String unescape(final String text) {

        if (text == null) {
            return null;
        }

        StringBuilder strBuilder = null;

        final int offset = 0;
        final int max = text.length();

        int readOffset = offset;
        int referenceOffset = offset;

        for (int i = offset; i < max; i++) {

            final char c = text.charAt(i);

            /*
             * Check the need for an unescape operation at this point
             */

            if (c != ESCAPE_PREFIX || (i + 1) >= max) {
                continue;
            }

            int codepoint = -1;

            if (c == ESCAPE_PREFIX) {

                final char c1 = text.charAt(i + 1);

                switch (c1) {
                    case '0': if (!isOctalEscape(text,i + 1,max)) { codepoint = 0x00; referenceOffset = i + 1; }; break;
                    case 'b': codepoint = 0x08; referenceOffset = i + 1; break;
                    case 't': codepoint = 0x09; referenceOffset = i + 1; break;
                    case 'n': codepoint = 0x0A; referenceOffset = i + 1; break;
                    case 'v': codepoint = 0x0B; referenceOffset = i + 1; break;
                    case 'f': codepoint = 0x0C; referenceOffset = i + 1; break;
                    case 'r': codepoint = 0x0D; referenceOffset = i + 1; break;
                    case '"': codepoint = 0x22; referenceOffset = i + 1; break;
                    case '\'': codepoint = 0x27; referenceOffset = i + 1; break;
                    case '\\': codepoint = 0x5C; referenceOffset = i + 1; break;
                    case '/': codepoint = 0x2F; referenceOffset = i + 1; break;
                }

                if (codepoint == -1) {

                    if (c1 == ESCAPE_XHEXA_PREFIX2) {
                        // This can be a xhexa escape, we need exactly two more characters

                        int f = i + 2;
                        while (f < (i + 4) && f < max) {
                            final char cf = text.charAt(f);
                            if (!((cf >= '0' && cf <= '9') || (cf >= 'A' && cf <= 'F') || (cf >= 'a' && cf <= 'f'))) {
                                break;
                            }
                            f++;
                        }

                        if ((f - (i + 2)) < 2) {
                            // We weren't able to consume the required two hexa chars, leave it as slash+'x', which
                            // is invalid, and let the corresponding JavaScript engine fail.
                            i++;
                            continue;
                        }

                        codepoint = parseIntFromReference(text, i + 2, f, 16);

                        // Fast-forward to the first char after the parsed codepoint
                        referenceOffset = f - 1;

                        // Don't continue here, just let the unescape code below do its job

                    } else if (c1 == ESCAPE_UHEXA_PREFIX2) {
                        // This can be a uhexa escape, we need exactly four more characters

                        int f = i + 2;
                        while (f < (i + 6) && f < max) {
                            final char cf = text.charAt(f);
                            if (!((cf >= '0' && cf <= '9') || (cf >= 'A' && cf <= 'F') || (cf >= 'a' && cf <= 'f'))) {
                                break;
                            }
                            f++;
                        }

                        if ((f - (i + 2)) < 4) {
                            // We weren't able to consume the required four hexa chars, leave it as slash+'u', which
                            // is invalid, and let the corresponding JavaScript engine fail.
                            i++;
                            continue;
                        }

                        codepoint = parseIntFromReference(text, i + 2, f, 16);

                        // Fast-forward to the first char after the parsed codepoint
                        referenceOffset = f - 1;

                        // Don't continue here, just let the unescape code below do its job

                    } else if (c1 >= '0' && c1 <= '7') {
                        // This can be a octal escape, we need at least 1 more char, and up to 3 more.

                        int f = i + 2;
                        while (f < (i + 4) && f < max) { // We need only a max of two more chars
                            final char cf = text.charAt(f);
                            if (!(cf >= '0' && cf <= '7')) {
                                break;
                            }
                            f++;
                        }

                        codepoint = parseIntFromReference(text, i + 1, f, 8);

                        if (codepoint > 0xFF) {
                            // Maximum octal escape char is FF. Ignore the last digit
                            codepoint = parseIntFromReference(text, i + 1, f - 1, 8);
                            referenceOffset = f - 2;
                        } else {
                            referenceOffset = f - 1;
                        }

                        // Don't continue here, just let the unescape code below do its job

                    } else if (c1 == '8' || c1 == '9' || c1 == '\n' || c1 == '\r' || c1 == '\u2028' || c1 == '\u2029') {

                        // '8' and '9' are not valid octal escape sequences, and the other four characters
                        // are LineTerminators, which are not allowed as escape sequences. So we leave it as is
                        // and expect the corresponding JavaScript engine to fail (except in the case of slash + '\n',
                        // which is considered a LineContinuator).
                        i++;
                        continue;

                    } else {

                        // We weren't able to consume any valid escape chars, just consider it a normal char,
                        // which is allowed by the JavaScript specification (NonEscapeCharacter)

                        codepoint = (int) c1;
                        referenceOffset = i + 1;

                    }

                }

            }


            /*
             * At this point we know for sure we will need some kind of unescape, so we
             * can increase the offset and initialize the string builder if needed, along with
             * copying to it all the contents pending up to this point.
             */

            if (strBuilder == null) {
                strBuilder = new StringBuilder(max + 5);
            }

            if (i - readOffset > 0) {
                strBuilder.append(text, readOffset, i);
            }

            i = referenceOffset;
            readOffset = i + 1;

            /*
             * --------------------------
             *
             * Peform the real unescape
             *
             * --------------------------
             */

            if (codepoint > '\uFFFF') {
                strBuilder.append(Character.toChars(codepoint));
            } else {
                strBuilder.append((char)codepoint);
            }

        }


        /*
         * -----------------------------------------------------------------------------------------------
         * Final cleaning: return the original String object if no unescape was actually needed. Otherwise
         *                 append the remaining escaped text to the string builder and return.
         * -----------------------------------------------------------------------------------------------
         */

        if (strBuilder == null) {
            return text;
        }

        if (max - readOffset > 0) {
            strBuilder.append(text, readOffset, max);
        }

        return strBuilder.toString();

    }






    /*
     * Perform an unescape operation based on char[].
     */
    static void unescape(final char[] text, final int offset, final int len, final Writer writer)
                         throws IOException {

        if (text == null) {
            return;
        }

        final int max = (offset + len);

        int readOffset = offset;
        int referenceOffset = offset;

        for (int i = offset; i < max; i++) {

            final char c = text[i];

            /*
             * Check the need for an unescape operation at this point
             */

            if (c != ESCAPE_PREFIX || (i + 1) >= max) {
                continue;
            }

            int codepoint = -1;

            if (c == ESCAPE_PREFIX) {

                final char c1 = text[i + 1];

                switch (c1) {
                    case '0': if (!isOctalEscape(text,i + 1,max)) { codepoint = 0x00; referenceOffset = i + 1; }; break;
                    case 'b': codepoint = 0x08; referenceOffset = i + 1; break;
                    case 't': codepoint = 0x09; referenceOffset = i + 1; break;
                    case 'n': codepoint = 0x0A; referenceOffset = i + 1; break;
                    case 'v': codepoint = 0x0B; referenceOffset = i + 1; break;
                    case 'f': codepoint = 0x0C; referenceOffset = i + 1; break;
                    case 'r': codepoint = 0x0D; referenceOffset = i + 1; break;
                    case '"': codepoint = 0x22; referenceOffset = i + 1; break;
                    case '\'': codepoint = 0x27; referenceOffset = i + 1; break;
                    case '\\': codepoint = 0x5C; referenceOffset = i + 1; break;
                    case '/': codepoint = 0x2F; referenceOffset = i + 1; break;
                }

                if (codepoint == -1) {

                    if (c1 == ESCAPE_XHEXA_PREFIX2) {
                        // This can be a xhexa escape, we need exactly two more characters

                        int f = i + 2;
                        while (f < (i + 4) && f < max) {
                            final char cf = text[f];
                            if (!((cf >= '0' && cf <= '9') || (cf >= 'A' && cf <= 'F') || (cf >= 'a' && cf <= 'f'))) {
                                break;
                            }
                            f++;
                        }

                        if ((f - (i + 2)) < 2) {
                            // We weren't able to consume the required two hexa chars, leave it as slash+'x', which
                            // is invalid, and let the corresponding JavaScript engine fail.
                            i++;
                            continue;
                        }

                        codepoint = parseIntFromReference(text, i + 2, f, 16);

                        // Fast-forward to the first char after the parsed codepoint
                        referenceOffset = f - 1;

                        // Don't continue here, just let the unescape code below do its job

                    } else if (c1 == ESCAPE_UHEXA_PREFIX2) {
                        // This can be a uhexa escape, we need exactly four more characters

                        int f = i + 2;
                        while (f < (i + 6) && f < max) {
                            final char cf = text[f];
                            if (!((cf >= '0' && cf <= '9') || (cf >= 'A' && cf <= 'F') || (cf >= 'a' && cf <= 'f'))) {
                                break;
                            }
                            f++;
                        }

                        if ((f - (i + 2)) < 4) {
                            // We weren't able to consume the required four hexa chars, leave it as slash+'u', which
                            // is invalid, and let the corresponding JavaScript engine fail.
                            i++;
                            continue;
                        }

                        codepoint = parseIntFromReference(text, i + 2, f, 16);

                        // Fast-forward to the first char after the parsed codepoint
                        referenceOffset = f - 1;

                        // Don't continue here, just let the unescape code below do its job

                    } else if (c1 >= '0' && c1 <= '7') {
                        // This can be a octal escape, we need at least 1 more char, and up to 3 more.

                        int f = i + 2;
                        while (f < (i + 4) && f < max) { // We need only a max of two more chars
                            final char cf = text[f];
                            if (!(cf >= '0' && cf <= '7')) {
                                break;
                            }
                            f++;
                        }

                        codepoint = parseIntFromReference(text, i + 1, f, 8);
                        if (codepoint > 0xFF) {
                            // Maximum octal escape char is FF. Ignore the last digit
                            codepoint = parseIntFromReference(text, i + 1, f - 1, 8);
                            referenceOffset = f - 2;
                        } else {
                            referenceOffset = f - 1;
                        }

                        // Don't continue here, just let the unescape code below do its job

                    } else if (c1 == '8' || c1 == '9' || c1 == '\n' || c1 == '\r' || c1 == '\u2028' || c1 == '\u2029') {

                        // '8' and '9' are not valid octal escape sequences, and the other four characters
                        // are LineTerminators, which are not allowed as escape sequences. So we leave it as is
                        // and expect the corresponding JavaScript engine to fail (except in the case of slash + '\n',
                        // which is considered a LineContinuator).
                        i++;
                        continue;

                    } else {

                        // We weren't able to consume any valid escape chars, just consider it a normal char,
                        // which is allowed by the JavaScript specification (NonEscapeCharacter)

                        codepoint = (int) c1;
                        referenceOffset = i + 1;

                    }

                }

            }


            /*
             * At this point we know for sure we will need some kind of unescape, so we
             * write all the contents pending up to this point.
             */

            if (i - readOffset > 0) {
                writer.write(text, readOffset, (i - readOffset));
            }

            i = referenceOffset;
            readOffset = i + 1;

            /*
             * --------------------------
             *
             * Peform the real unescape
             *
             * --------------------------
             */

            if (codepoint > '\uFFFF') {
                writer.write(Character.toChars(codepoint));
            } else {
                writer.write((char)codepoint);
            }

        }


        /*
         * -----------------------------------------------------------------------------------------------
         * Final cleaning: writer the remaining escaped text and return.
         * -----------------------------------------------------------------------------------------------
         */

        if (max - readOffset > 0) {
            writer.write(text, readOffset, (max - readOffset));
        }

    }



}