wikimedia/mediawiki-core

View on GitHub
includes/media/GIFMetadataExtractor.php

Summary

Maintainability
D
2 days
Test Coverage
<?php
/**
 * GIF frame counter.
 *
 * Originally written in Perl by Steve Sanbeg.
 * Ported to PHP by Andrew Garrett
 * Deliberately not using MWExceptions to avoid external dependencies, encouraging
 * redistribution.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 * http://www.gnu.org/copyleft/gpl.html
 *
 * @file
 * @ingroup Media
 */

use Wikimedia\AtEase\AtEase;

/**
 * GIF frame counter.
 *
 * @ingroup Media
 */
class GIFMetadataExtractor {
    /** @var string */
    private static $gifFrameSep;

    /** @var string */
    private static $gifExtensionSep;

    /** @var string */
    private static $gifTerm;

    public const VERSION = 1;

    // Each sub-block is less than or equal to 255 bytes.
    // Most of the time its 255 bytes, except for in XMP
    // blocks, where it's usually between 32-127 bytes each.
    private const MAX_SUBBLOCKS = 262144; // 5 MiB divided by 20.

    /**
     * @throws Exception
     * @param string $filename
     * @return array
     */
    public static function getMetadata( $filename ) {
        self::$gifFrameSep = pack( "C", ord( "," ) ); // 2C
        self::$gifExtensionSep = pack( "C", ord( "!" ) ); // 21
        self::$gifTerm = pack( "C", ord( ";" ) ); // 3B

        $frameCount = 0;
        $duration = 0.0;
        $isLooped = false;
        $xmp = "";
        $comment = [];

        if ( !$filename ) {
            throw new InvalidArgumentException( 'No file name specified' );
        }
        if ( !file_exists( $filename ) || is_dir( $filename ) ) {
            throw new InvalidArgumentException( "File $filename does not exist" );
        }

        $fh = fopen( $filename, 'rb' );

        if ( !$fh ) {
            throw new InvalidArgumentException( "Unable to open file $filename" );
        }

        // Check for the GIF header
        $buf = fread( $fh, 6 );
        if ( !( $buf === 'GIF87a' || $buf === 'GIF89a' ) ) {
            throw new InvalidArgumentException( "Not a valid GIF file; header: $buf" );
        }

        // Read width and height.
        $buf = fread( $fh, 2 );
        if ( strlen( $buf ) < 2 ) {
            throw new InvalidArgumentException( "Not a valid GIF file; Unable to read width." );
        }
        $width = unpack( 'v', $buf )[1];
        $buf = fread( $fh, 2 );
        if ( strlen( $buf ) < 2 ) {
            throw new InvalidArgumentException( "Not a valid GIF file; Unable to read height." );
        }
        $height = unpack( 'v', $buf )[1];

        // Read BPP
        $buf = fread( $fh, 1 );
        [ $bpp, $have_map ] = self::decodeBPP( $buf );

        // Skip over background and aspect ratio
        // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
        fread( $fh, 2 );

        // Skip over the GCT
        if ( $have_map ) {
            self::readGCT( $fh, $bpp );
        }

        while ( !feof( $fh ) ) {
            $buf = fread( $fh, 1 );

            if ( $buf === self::$gifFrameSep ) {
                // Found a frame
                $frameCount++;

                # # Skip bounding box
                // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
                fread( $fh, 8 );

                # # Read BPP
                $buf = fread( $fh, 1 );
                [ $bpp, $have_map ] = self::decodeBPP( $buf );

                # # Read GCT
                if ( $have_map ) {
                    self::readGCT( $fh, $bpp );
                }
                // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
                fread( $fh, 1 );
                self::skipBlock( $fh );
            } elseif ( $buf === self::$gifExtensionSep ) {
                $buf = fread( $fh, 1 );
                if ( strlen( $buf ) < 1 ) {
                    throw new InvalidArgumentException(
                        "Not a valid GIF file; Unable to read graphics control extension."
                    );
                }
                $extension_code = unpack( 'C', $buf )[1];

                if ( $extension_code === 0xF9 ) {
                    // Graphics Control Extension.
                    // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
                    fread( $fh, 1 ); // Block size

                    // @phan-suppress-next-next-line PhanPluginUseReturnValueInternalKnown
                    // @phan-suppress-next-line PhanPluginDuplicateAdjacentStatement
                    fread( $fh, 1 ); // Transparency, disposal method, user input

                    $buf = fread( $fh, 2 ); // Delay, in hundredths of seconds.
                    if ( strlen( $buf ) < 2 ) {
                        throw new InvalidArgumentException( "Not a valid GIF file; Unable to read delay" );
                    }
                    $delay = unpack( 'v', $buf )[1];
                    $duration += $delay * 0.01;

                    // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
                    fread( $fh, 1 ); // Transparent colour index

                    $term = fread( $fh, 1 ); // Should be a terminator
                    if ( strlen( $term ) < 1 ) {
                        throw new InvalidArgumentException( "Not a valid GIF file; Unable to read terminator byte" );
                    }
                    $term = unpack( 'C', $term )[1];
                    if ( $term != 0 ) {
                        throw new InvalidArgumentException( "Malformed Graphics Control Extension block" );
                    }
                } elseif ( $extension_code === 0xFE ) {
                    // Comment block(s).
                    $data = self::readBlock( $fh );
                    if ( $data === "" ) {
                        throw new InvalidArgumentException( 'Read error, zero-length comment block' );
                    }

                    // The standard says this should be ASCII, however its unclear if
                    // thats true in practise. Check to see if its valid utf-8, if so
                    // assume its that, otherwise assume its windows-1252 (iso-8859-1)
                    $dataCopy = $data;
                    // quickIsNFCVerify has the side effect of replacing any invalid characters
                    UtfNormal\Validator::quickIsNFCVerify( $dataCopy );

                    if ( $dataCopy !== $data ) {
                        AtEase::suppressWarnings();
                        $data = iconv( 'windows-1252', 'UTF-8', $data );
                        AtEase::restoreWarnings();
                    }

                    $commentCount = count( $comment );
                    if ( $commentCount === 0
                        // @phan-suppress-next-line PhanTypeInvalidDimOffset
                        || $comment[$commentCount - 1] !== $data
                    ) {
                        // Some applications repeat the same comment on each
                        // frame of an animated GIF image, so if this comment
                        // is identical to the last, only extract once.
                        $comment[] = $data;
                    }
                } elseif ( $extension_code === 0xFF ) {
                    // Application extension (Netscape info about the animated gif)
                    // or XMP (or theoretically any other type of extension block)
                    $blockLength = fread( $fh, 1 );
                    if ( strlen( $blockLength ) < 1 ) {
                        throw new InvalidArgumentException( "Not a valid GIF file; Unable to read block length" );
                    }
                    $blockLength = unpack( 'C', $blockLength )[1];
                    $data = fread( $fh, $blockLength );

                    if ( $blockLength !== 11 ) {
                        wfDebug( __METHOD__ . " GIF application block with wrong length" );
                        fseek( $fh, -( $blockLength + 1 ), SEEK_CUR );
                        self::skipBlock( $fh );
                        continue;
                    }

                    // NETSCAPE2.0 (application name for animated gif)
                    if ( $data === 'NETSCAPE2.0' ) {
                        $data = fread( $fh, 2 ); // Block length and introduction, should be 03 01

                        if ( $data !== "\x03\x01" ) {
                            throw new InvalidArgumentException( "Expected \x03\x01, got $data" );
                        }

                        // Unsigned little-endian integer, loop count or zero for "forever"
                        $loopData = fread( $fh, 2 );
                        if ( strlen( $loopData ) < 2 ) {
                            throw new InvalidArgumentException( "Not a valid GIF file; Unable to read loop count" );
                        }
                        $loopCount = unpack( 'v', $loopData )[1];

                        if ( $loopCount !== 1 ) {
                            $isLooped = true;
                        }

                        // Read out terminator byte
                        // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
                        fread( $fh, 1 );
                    } elseif ( $data === 'XMP DataXMP' ) {
                        // application name for XMP data.
                        // see pg 18 of XMP spec part 3.

                        $xmp = self::readBlock( $fh, true );

                        if ( substr( $xmp, -257, 3 ) !== "\x01\xFF\xFE"
                            || substr( $xmp, -4 ) !== "\x03\x02\x01\x00"
                        ) {
                            throw new InvalidArgumentException( "XMP does not have magic trailer!" );
                        }

                        // strip out trailer.
                        $xmp = substr( $xmp, 0, -257 );
                    } else {
                        // unrecognized extension block
                        fseek( $fh, -( $blockLength + 1 ), SEEK_CUR );
                        self::skipBlock( $fh );
                    }
                } else {
                    self::skipBlock( $fh );
                }
            } elseif ( $buf === self::$gifTerm ) {
                break;
            } else {
                if ( strlen( $buf ) < 1 ) {
                    throw new InvalidArgumentException( "Not a valid GIF file; Unable to read unknown byte." );
                }
                $byte = unpack( 'C', $buf )[1];
                throw new InvalidArgumentException( "At position: " . ftell( $fh ) . ", Unknown byte " . $byte );
            }
        }

        return [
            'frameCount' => $frameCount,
            'looped' => $isLooped,
            'duration' => $duration,
            'xmp' => $xmp,
            'comment' => $comment,
            'width' => $width,
            'height' => $height,
            'bits' => $bpp,
        ];
    }

    /**
     * @param resource $fh
     * @param int $bpp
     * @return void
     */
    private static function readGCT( $fh, $bpp ) {
        $max = 2 ** $bpp;
        for ( $i = 1; $i <= $max; ++$i ) {
            // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
            fread( $fh, 3 );
        }
    }

    /**
     * @param string $data
     * @throws Exception
     * @return array [ int bits per channel, bool have GCT ]
     */
    private static function decodeBPP( $data ) {
        if ( strlen( $data ) < 1 ) {
            throw new InvalidArgumentException( "Not a valid GIF file; Unable to read bits per channel." );
        }
        $buf = unpack( 'C', $data )[1];
        $bpp = ( $buf & 7 ) + 1;
        $buf >>= 7;

        $have_map = $buf & 1;

        return [ $bpp, $have_map ];
    }

    /**
     * @param resource $fh
     * @throws Exception
     */
    private static function skipBlock( $fh ) {
        while ( !feof( $fh ) ) {
            $buf = fread( $fh, 1 );
            if ( strlen( $buf ) < 1 ) {
                throw new InvalidArgumentException( "Not a valid GIF file; Unable to read block length." );
            }
            $block_len = unpack( 'C', $buf )[1];
            if ( $block_len == 0 ) {
                return;
            }
            // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
            fread( $fh, $block_len );
        }
    }

    /**
     * Read a block. In the GIF format, a block is made up of
     * several sub-blocks. Each sub block starts with one byte
     * saying how long the sub-block is, followed by the sub-block.
     * The entire block is terminated by a sub-block of length
     * 0.
     * @param resource $fh File handle
     * @param bool $includeLengths Include the length bytes of the
     *  sub-blocks in the returned value. Normally this is false,
     *  except XMP is weird and does a hack where you need to keep
     *  these length bytes.
     * @throws Exception
     * @return string The data.
     */
    private static function readBlock( $fh, $includeLengths = false ) {
        $data = '';
        $subLength = fread( $fh, 1 );
        $blocks = 0;

        while ( $subLength !== "\0" ) {
            $blocks++;
            if ( $blocks > self::MAX_SUBBLOCKS ) {
                throw new InvalidArgumentException( "MAX_SUBBLOCKS exceeded (over $blocks sub-blocks)" );
            }
            if ( feof( $fh ) ) {
                throw new InvalidArgumentException( "Read error: Unexpected EOF." );
            }
            if ( $includeLengths ) {
                $data .= $subLength;
            }

            $data .= fread( $fh, ord( $subLength ) );
            $subLength = fread( $fh, 1 );
        }

        return $data;
    }
}