fujaba/NetworkParser

View on GitHub
src/main/java/de/uniks/networkparser/ext/io/NioZipEncoding.java

Summary

Maintainability
B
6 hrs
Test Coverage
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package de.uniks.networkparser.ext.io;

import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;

/**
 * A ZipEncoding, which uses a java.nio {@link java.nio.charset.Charset Charset}
 * to encode names.
 * <p>
 * The methods of this class are reentrant.
 * </p>
 * 
 * @Immutable
 */
class NioZipEncoding {

    private final Charset charset;
    private final boolean useReplacement;
    private static final char REPLACEMENT = '?';
    private static final byte[] REPLACEMENT_BYTES = { (byte) REPLACEMENT };
    private static final String REPLACEMENT_STRING = String.valueOf(REPLACEMENT);
    private static final char[] HEX_CHARS = new char[] { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B',
            'C', 'D', 'E', 'F' };

    /**
     * Construct an NioZipEncoding using the given charset.
     * 
     * @param charset        The character set to use.
     * @param useReplacement should invalid characters be replaced, or reported.
     */
    NioZipEncoding(final Charset charset, boolean useReplacement) {
        this.charset = charset;
        this.useReplacement = useReplacement;
    }

    public NioZipEncoding() {
        this.charset = null;
        this.useReplacement = false;
    }

    public Charset getCharset() {
        return charset;
    }

    /**
     * Check for encoding Stream
     * 
     * @param name Name of Encoding
     * @return if Buffer can encode
     */
    public boolean canEncode(String name) {
        if (this.charset == null) {
            return true;
        }
        final CharsetEncoder enc = newEncoder();

        return enc.canEncode(name);
    }

    /**
     * Encode Stream
     * 
     * @param name of Encoding
     * @return ByteBuffer
     */
    public ByteBuffer encode(String name) {
        if (name == null) {
            return null;
        }
        if (this.charset == null) {
            final int length = name.length();
            final byte[] buf = new byte[length];

            /* copy until end of input or output is reached. */
            for (int i = 0; i < length; ++i) {
                buf[i] = (byte) name.charAt(i);
            }
            return ByteBuffer.wrap(buf);
        }
        final CharsetEncoder enc = newEncoder();

        final CharBuffer cb = CharBuffer.wrap(name);
        CharBuffer tmp = null;
        ByteBuffer out = ByteBuffer.allocate(estimateInitialBufferSize(enc, cb.remaining()));

        while (cb.remaining() > 0) {
            final CoderResult res = enc.encode(cb, out, false);

            if (res.isUnmappable() || res.isMalformed()) {

                /*
                 * write the unmappable characters in utf-16 pseudo-URL encoding style to
                 * ByteBuffer.
                 */

                int spaceForSurrogate = estimateIncrementalEncodingSize(enc, 6 * res.length());
                if (spaceForSurrogate > out.remaining()) {
                    /*
                     * if the destination buffer isn't over sized, assume that the presence of one
                     * unmappable character makes it likely that there will be more. Find all the
                     * un-encoded characters and allocate space based on those estimates.
                     */
                    int charCount = 0;
                    for (int i = cb.position(); i < cb.limit(); i++) {
                        charCount += !enc.canEncode(cb.get(i)) ? 6 : 1;
                    }
                    int totalExtraSpace = estimateIncrementalEncodingSize(enc, charCount);
                    out = TarUtils.growBufferBy(out, totalExtraSpace - out.remaining());
                }
                if (tmp == null) {
                    tmp = CharBuffer.allocate(6);
                }
                for (int i = 0; i < res.length(); ++i) {
                    out = encodeFully(enc, encodeSurrogate(tmp, cb.get()), out);
                }

            } else if (res.isOverflow()) {
                int increment = estimateIncrementalEncodingSize(enc, cb.remaining());
                out = TarUtils.growBufferBy(out, increment);
            }
        }
        /* tell the encoder we are done */
        enc.encode(cb, out, true);
        /* may have caused underflow, but that's been ignored traditionally */

        out.limit(out.position());
        out.rewind();
        return out;
    }

    /**
     * Decode Stream
     * 
     * @param data for Decoding
     * @return decoded String
     */
    public String decode(byte[] data) {
        if (data == null) {
            return null;
        }
        if (this.charset == null) {
            final int length = data.length;
            final StringBuilder result = new StringBuilder(length);

            for (final byte b : data) {
                if (b == 0) { /* Trailing null */
                    break;
                }
                result.append((char) (b & 0xFF)); /* Allow for sign-extension */
            }
            return result.toString();
        }
        try {
            return newDecoder().decode(ByteBuffer.wrap(data)).toString();
        } catch (Exception e) {
        }
        return null;
    }

    private static ByteBuffer encodeFully(CharsetEncoder enc, CharBuffer cb, ByteBuffer out) {
        ByteBuffer o = out;
        if (cb == null) {
            return o;
        }
        while (cb.hasRemaining()) {
            CoderResult result = enc.encode(cb, o, false);
            if (result.isOverflow()) {
                int increment = estimateIncrementalEncodingSize(enc, cb.remaining());
                o = TarUtils.growBufferBy(o, increment);
            }
        }
        return o;
    }

    private static CharBuffer encodeSurrogate(CharBuffer cb, char c) {
        if (cb == null) {
            return null;
        }
        cb.position(0).limit(6);
        cb.put('%');
        cb.put('U');

        cb.put(HEX_CHARS[(c >> 12) & 0x0f]);
        cb.put(HEX_CHARS[(c >> 8) & 0x0f]);
        cb.put(HEX_CHARS[(c >> 4) & 0x0f]);
        cb.put(HEX_CHARS[c & 0x0f]);
        cb.flip();
        return cb;
    }

    private CharsetEncoder newEncoder() {
        if (charset == null) {
            return null;
        }
        if (useReplacement) {
            return charset.newEncoder().onMalformedInput(CodingErrorAction.REPLACE)
                    .onUnmappableCharacter(CodingErrorAction.REPLACE).replaceWith(REPLACEMENT_BYTES);
        } else {
            return charset.newEncoder().onMalformedInput(CodingErrorAction.REPORT)
                    .onUnmappableCharacter(CodingErrorAction.REPORT);
        }
    }

    private CharsetDecoder newDecoder() {
        if (charset == null) {
            return null;
        }
        if (useReplacement == false) {
            return this.charset.newDecoder().onMalformedInput(CodingErrorAction.REPORT)
                    .onUnmappableCharacter(CodingErrorAction.REPORT);
        } else {
            return charset.newDecoder().onMalformedInput(CodingErrorAction.REPLACE)
                    .onUnmappableCharacter(CodingErrorAction.REPLACE).replaceWith(REPLACEMENT_STRING);
        }
    }

    /**
     * Estimate the initial encoded size (in bytes) for a character buffer.
     * <p>
     * The estimate assumes that one character consumes uses the maximum length
     * encoding, whilst the rest use an average size encoding. This accounts for any
     * BOM for UTF-16, at the expense of a couple of extra bytes for UTF-8 encoded
     * ASCII.
     * </p>
     *
     * @param enc        encoder to use for estimates
     * @param charChount number of characters in string
     * @return estimated size in bytes.
     */
    private static int estimateInitialBufferSize(CharsetEncoder enc, int charChount) {
        if (enc == null) {
            return -1;
        }
        float first = enc.maxBytesPerChar();
        float rest = (charChount - 1) * enc.averageBytesPerChar();
        return (int) Math.ceil(first + rest);
    }

    /**
     * Estimate the size needed for remaining characters
     *
     * @param enc       encoder to use for estimates
     * @param charCount number of characters remaining
     * @return estimated size in bytes.
     */
    private static int estimateIncrementalEncodingSize(CharsetEncoder enc, int charCount) {
        if (enc == null) {
            return -1;
        }
        return (int) Math.ceil(charCount * enc.averageBytesPerChar());
    }

}