SquirrelJME/SquirrelJME

View on GitHub
modules/zip/src/main/java/net/multiphasicapps/zip/streamreader/ZipStreamReader.java

Summary

Maintainability
A
2 hrs
Test Coverage
// -*- Mode: Java; indent-tabs-mode: t; tab-width: 4 -*-
// ---------------------------------------------------------------------------
// SquirrelJME
//     Copyright (C) Stephanie Gawroriski <xer@multiphasicapps.net>
// ---------------------------------------------------------------------------
// SquirrelJME is under the Mozilla Public License Version 2.0.
// See license.mkd for licensing and copyright information.
// ---------------------------------------------------------------------------

package net.multiphasicapps.zip.streamreader;

import cc.squirreljme.runtime.cldc.archive.ArchiveStreamReader;
import java.io.Closeable;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import net.multiphasicapps.io.DataEndianess;
import net.multiphasicapps.io.DynamicHistoryInputStream;
import net.multiphasicapps.io.ExtendedDataInputStream;
import net.multiphasicapps.zip.IBM437CodePage;
import net.multiphasicapps.zip.ZipCompressionType;
import net.multiphasicapps.zip.ZipException;

/**
 * This class supports stream based reading of input ZIP files.
 *
 * Only files up to 2GiB in length are supported. If a data descriptor is
 * specified for entries then they must have the optional descriptor magic
 * number included.
 *
 * This class is not thread safe.
 *
 * @since 2016/07/19
 */
public class ZipStreamReader
    implements ArchiveStreamReader, Closeable
{
    /** The maximum support version for extracting. */
    private static final int _MAX_EXTRACT_VERSION =
        20;
    
    /** The minimum size of the local header. */
    private static final int _MINIMUM_HEADER_SIZE =
        30;
    
    /** The local header magic number. */
    private static final int _LOCAL_HEADER_MAGIC =
        0x04034B50;
    
    /** The dynamic history stream. */
    protected final DynamicHistoryInputStream input;
    
    /** This is used after an input structure is detected. */
    protected final ExtendedDataInputStream data;
    
    /** This can hold the local header except for the comment and filename. */
    private final byte[] _localheader =
        new byte[ZipStreamReader._MINIMUM_HEADER_SIZE];
    
    /** The current entry being read, cannot next entry if this is the case. */
    private volatile ZipStreamEntry _entry;
    
    /** End of file reached? */
    private volatile boolean _eof;
    
    /** Deferred exceptions, set after an entry read fails. */
    private volatile ZipException _defer;
    
    /**
     * Initializes the reader for input ZIP file data.
     *
     * @param __is The input stream to source bytes from.
     * @throws NullPointerException On null arguments.
     * @since 2016/07/19
     */
    public ZipStreamReader(InputStream __is)
        throws NullPointerException
    {
        // Check
        if (__is == null)
            throw new NullPointerException("NARG");
        
        // Set
        DynamicHistoryInputStream q;
        this.input = (q = new DynamicHistoryInputStream(__is));
        ExtendedDataInputStream d;
        this.data = (d = new ExtendedDataInputStream(q));
        d.setEndianess(DataEndianess.LITTLE);
    }
    
    /**
     * {@inheritDoc}
     * @since 2016/07/19
     */
    @Override
    public void close()
        throws IOException
    {
        // Mark EOF
        this._eof = true;
        
        // Close the source
        this.input.close();
        this.data.close();
    }
    
    /**
     * If an entry is detected and it could not be read, then this exception
     * may be set to detect such events.
     *
     * @return The deferred exception or {@code null} if there is none.
     * @since 2016/09/11
     */
    public ZipException deferred()
    {
        ZipException rv = this._defer;
        this._defer = null;
        return rv;
    }
    
    /**
     * Returns the next entry in the streamed ZIP file for {@code null} if no
     * such entry exists.
     *
     * @return The next entry or {@code null} if there is none.
     * @throws IOException On read errors.
     * @since 2016/07/19
     */
    @Override
    public ZipStreamEntry nextEntry()
        throws IOException
    {
        /* {@squirreljme.error BF0z An entry is currently being read, it
        must first be closed.} */
        if (this._entry != null)
            throw new IOException("BF0z");
        
        // End of file reached?
        if (this._eof)
            return null;
        
        // Read until an entry is found
        DynamicHistoryInputStream input = this.input;
        ExtendedDataInputStream data = this.data;
        byte[] localheader = this._localheader;
        for (; !this._eof;)
        {
            // Peek the magic number
            int rhcount;
            try
            {
                rhcount = input.peek(0, localheader, 0, 4);
            }
            
            /* {@squirreljme.error BF10 Could not peek the magic number.} */
            catch (IndexOutOfBoundsException e)
            {
                throw new ZipException("BF10", e);
            }
        
            // Could not fit the magic number, treat as EOF
            if (rhcount < 4)
            {
                this._eof = true;
                return null;
            }
        
            // Does not match the magic number for local headers
            int lhskip = ZipStreamReader.__skipLocalHeader(localheader);
            
            // Not one
            if (lhskip > 0)
            {
                // Read
                try
                {
                    data.readFully(localheader, 0, lhskip);
                }
                
                // End of file
                catch (EOFException e)
                {
                    this._eof = true;
                }
                
                // Return null on the next loop
                continue;
            }
            
            // Read the rest of the header
            rhcount = input.peek(0, localheader);
            
            // EOF reached (cannot fit a local header in this many bytes)
            // Ignore the somewhat malformed ZIP since it could be part of
            // another file structure due to polyglots
            if (rhcount < ZipStreamReader._MINIMUM_HEADER_SIZE)
            {
                this._eof = true;
                return null;
            }
            
            // Deferred exception?
            ZipException defer = null;
            
            // Check the version needed for extracting
            // Note that some ZIP writing software sets the upper byte when it
            // should not. Since the made by version is not stored in the
            // local file header, the byte will just be stripped.
            int xver = ZipStreamReader.__readUnsignedShort(localheader, 4) & 0xFF;
            boolean deny = false;
            deny |= (xver < 0 || xver > ZipStreamReader._MAX_EXTRACT_VERSION);
            
            /* {@squirreljme.error BF11 Zip version not suppored. (The
            version)} */
            if (defer == null && deny)
                defer = new ZipException(String.format("BF11 %d",
                    xver));
            
            // Read bit flags
            int gpfs = ZipStreamReader.__readUnsignedShort(localheader, 6);
            boolean utf = (0 != (gpfs & (1 << 11)));
            boolean undefinedsize = (0 != (gpfs & (1 << 3)));
            
            // Cannot read encrypted entries
            deny |= (0 != (gpfs & 1));
            
            /* {@squirreljme.error BF12 Encrypted entries not supported.} */
            if (defer == null && deny)
                defer = new ZipException("BF12");
            
            // Read the compression method
            ZipCompressionType cmeth = ZipCompressionType.forMethod(
                ZipStreamReader.__readUnsignedShort(localheader, 8));
            deny |= (cmeth == null);
            
            /* {@squirreljme.error BF13 Compression method not supported.
            (The method)} */
            if (defer == null && deny)
                defer = new ZipException(String.format("BF13 %s", cmeth));
            
            // Read CRC32
            int crc = ZipStreamReader.__readInt(localheader, 14);
            
            // Read Compressed size
            int csz = ZipStreamReader.__readInt(localheader, 18);
            if (!undefinedsize)
                deny |= (csz < 0);
            
            // Uncompressed size
            int usz = ZipStreamReader.__readInt(localheader, 22);
            if (!undefinedsize)
                deny |= (usz < 0);
            
            /* {@squirreljme.error BF14 Entry exceeds 2GiB in size.
            (The compressed size; The uncompressed size)} */
            if (defer == null && deny)
                defer = new ZipException(String.format("BF14 %d %d", csz,
                    usz));
            
            // File name length
            int fnl = ZipStreamReader.__readUnsignedShort(localheader, 26);
            
            // Comment length
            int cml = ZipStreamReader.__readUnsignedShort(localheader, 28);
            
            // If denying, read a single byte and try again, this could
            // just be very ZIP-like data or the local header number could
            // be a constant in an executable.
            if (deny)
            {
                // Defer the issue, if set
                if (defer != null)
                    this._defer = defer;
                
                // Skip 4 bytes because the header was already read
                this.data.readFully(localheader, 0, 4);
                continue;
            }
            
            // Read the local header normally to consume it
            data.readFully(localheader);
            
            // Read the file name, if EOF was reached then ignore
            byte[] rawname = new byte[fnl];
            data.readFully(rawname);
            
            // If UTF-8 then use internal handling
            String filename;
            if (utf)
                filename = new String(rawname, 0, fnl, "utf-8");
        
            // Otherwise use codepage handling, Java ME only has two
            // character sets available
            else
                filename = IBM437CodePage.toString(rawname, 0, fnl);
            
            // Skip the comment
            data.readFully(localheader, 0, Math.min(cml,
                ZipStreamReader._MINIMUM_HEADER_SIZE));
            
            // Create entry so the data can actually be used
            ZipStreamEntry rv = new ZipStreamEntry(this, filename,
                undefinedsize, crc, csz, usz, cmeth, input);
            this._entry = rv;
            return rv;
        }
        
        // No entry
        this._eof = true;
        return null;
    }
    
    /**
     * Closes an entry so that the next one can be read.
     *
     * @param __ent The entry to close.
     * @throws IOException If it could not be closed.
     * @throws NullPointerException On null arguments.
     * @since 2016/07/20
     */
    final void __closeEntry(ZipStreamEntry __ent)
        throws IOException, NullPointerException
    {
        // Check
        if (__ent == null)
            throw new NullPointerException("NARG");
        
        /* {@squirreljme.error BF15 Close of an incorrect entry.} */
        if (this._entry != __ent)
            throw new IOException("BF15");
        
        // Clear it
        this._entry = null;
    }
    
    /**
     * Reads an unsigned integer value.
     *
     * @param __b The byte array to read from.
     * @param __p The position to read from.
     * @return The read value.
     * @since 2016/07/19
     */
    static int __readInt(byte[] __b, int __p)
    {
        return (__b[__p] & 0xFF) |
            ((__b[__p + 1] & 0xFF) << 8) |
            ((__b[__p + 2] & 0xFF) << 16) |
            ((__b[__p + 3] & 0xFF) << 24);
    }
    
    /**
     * Reads an unsigned short from the given byte array.
     *
     * @param __b The byte array to read from.
     * @param __p The position to read from.
     * @return The read value.
     * @since 2016/07/19
     */
    static int __readUnsignedShort(byte[] __b, int __p)
    {
        return (__b[__p] & 0xFF) |
            ((__b[__p + 1] & 0xFF) << 8);
    }
    
    /**
     * Checks if the specified buffer starts with the local header magic
     * number and if not returns the number of bytes to skip.
     *
     * @param __b The bytes to check, from the zero index.
     * @return Zero means this is the local header, otherwise a value up to 4.
     * @since 2016/07/19
     */
    private static int __skipLocalHeader(byte[] __b)
    {
        // Read values
        byte lha = __b[0], lhb = __b[1], lhc = __b[2], lhd = __b[3];
        
        // Is this the magic number?
        if (lha == 0x50 && lhb == 0x4B && lhc == 0x03 && lhd == 0x04)
            return 0;
        
        // Next byte over
        else if (lhb == 0x50 && lhc == 0x4B && lhd == 0x03)
            return 1;
        
        // Skip two bytes
        else if (lhc == 0x50 && lhd == 0x4B)
            return 2;
        
        // Last byte could be it
        if (lhd == 0x50)
            return 3;
        
        // None of them
        return 4;
    }
}