wikimedia/mediawiki-core

View on GitHub
includes/import/ImportStreamSource.php

Summary

Maintainability
B
4 hrs
Test Coverage
<?php
/**
 * MediaWiki page data importer.
 *
 * Copyright © 2003,2005 Brooke Vibber <bvibber@wikimedia.org>
 * https://www.mediawiki.org/
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 * http://www.gnu.org/copyleft/gpl.html
 *
 * @file
 * @ingroup SpecialPage
 */

use MediaWiki\MainConfigNames;
use MediaWiki\MediaWikiServices;
use MediaWiki\Status\Status;
use Wikimedia\AtEase\AtEase;

/**
 * Imports a XML dump from a file (either from file upload, files on disk, or HTTP)
 * @ingroup SpecialPage
 */
class ImportStreamSource implements ImportSource {
    /** @var resource */
    private $mHandle;

    /**
     * @param resource $handle
     */
    public function __construct( $handle ) {
        $this->mHandle = $handle;
    }

    /**
     * @return bool
     */
    public function atEnd() {
        return feof( $this->mHandle );
    }

    /**
     * @return string
     */
    public function readChunk() {
        return fread( $this->mHandle, 32768 );
    }

    /**
     * @return bool
     */
    public function isSeekable() {
        return stream_get_meta_data( $this->mHandle )['seekable'] ?? false;
    }

    /**
     * @param int $offset
     * @return int
     */
    public function seek( int $offset ) {
        return fseek( $this->mHandle, $offset );
    }

    /**
     * @param string $filename
     * @return Status
     */
    public static function newFromFile( $filename ) {
        AtEase::suppressWarnings();
        $file = fopen( $filename, 'rt' );
        AtEase::restoreWarnings();
        if ( !$file ) {
            return Status::newFatal( "importcantopen" );
        }
        return Status::newGood( new ImportStreamSource( $file ) );
    }

    /**
     * @param string $fieldname
     * @return Status
     */
    public static function newFromUpload( $fieldname = "xmlimport" ) {
        // phpcs:ignore MediaWiki.Usage.SuperGlobalsUsage.SuperGlobals
        $upload =& $_FILES[$fieldname];

        if ( $upload === null || !$upload['name'] ) {
            return Status::newFatal( 'importnofile' );
        }
        if ( !empty( $upload['error'] ) ) {
            switch ( $upload['error'] ) {
                case UPLOAD_ERR_INI_SIZE:
                    // The uploaded file exceeds the upload_max_filesize directive in php.ini.
                    return Status::newFatal( 'importuploaderrorsize' );
                case UPLOAD_ERR_FORM_SIZE:
                    // The uploaded file exceeds the MAX_FILE_SIZE directive that
                    // was specified in the HTML form.
                    // FIXME This is probably never used since that directive was removed in 8e91c520?
                    return Status::newFatal( 'importuploaderrorsize' );
                case UPLOAD_ERR_PARTIAL:
                    // The uploaded file was only partially uploaded
                    return Status::newFatal( 'importuploaderrorpartial' );
                case UPLOAD_ERR_NO_TMP_DIR:
                    // Missing a temporary folder.
                    return Status::newFatal( 'importuploaderrortemp' );
                // Other error codes get the generic 'importnofile' error message below
            }

        }
        $fname = $upload['tmp_name'];
        if ( is_uploaded_file( $fname ) ) {
            return self::newFromFile( $fname );
        } else {
            return Status::newFatal( 'importnofile' );
        }
    }

    /**
     * @param string $url
     * @param string $method
     * @return Status
     */
    public static function newFromURL( $url, $method = 'GET' ) {
        $httpImportTimeout = MediaWikiServices::getInstance()->getMainConfig()->get(
            MainConfigNames::HTTPImportTimeout );
        wfDebug( __METHOD__ . ": opening $url" );
        # Use the standard HTTP fetch function; it times out
        # quicker and sorts out user-agent problems which might
        # otherwise prevent importing from large sites, such
        # as the Wikimedia cluster, etc.
        $data = MediaWikiServices::getInstance()->getHttpRequestFactory()->request(
            $method,
            $url,
            [
                'followRedirects' => true,
                'timeout' => $httpImportTimeout
            ],
            __METHOD__
        );
        if ( $data !== null ) {
            $file = tmpfile();
            fwrite( $file, $data );
            fflush( $file );
            fseek( $file, 0 );
            return Status::newGood( new ImportStreamSource( $file ) );
        } else {
            return Status::newFatal( 'importcantopen' );
        }
    }

    /**
     * @param string $interwiki
     * @param string $page
     * @param bool $history
     * @param bool $templates
     * @param int $pageLinkDepth
     * @return Status
     */
    public static function newFromInterwiki( $interwiki, $page, $history = false,
        $templates = false, $pageLinkDepth = 0
    ) {
        if ( $page == '' ) {
            return Status::newFatal( 'import-noarticle' );
        }

        # Look up the first interwiki prefix, and let the foreign site handle
        # subsequent interwiki prefixes
        $firstIwPrefix = strtok( $interwiki, ':' );
        $interwikiLookup = MediaWikiServices::getInstance()->getInterwikiLookup();
        $firstIw = $interwikiLookup->fetch( $firstIwPrefix );
        if ( !$firstIw ) {
            return Status::newFatal( 'importbadinterwiki' );
        }

        $additionalIwPrefixes = strtok( '' );
        if ( $additionalIwPrefixes ) {
            $additionalIwPrefixes .= ':';
        }
        # Have to do a DB-key replacement ourselves; otherwise spaces get
        # URL-encoded to +, which is wrong in this case. Similar to logic in
        # Title::getLocalURL
        $link = $firstIw->getURL( strtr( "{$additionalIwPrefixes}Special:Export/$page",
            ' ', '_' ) );

        $params = [];
        if ( $history ) {
            $params['history'] = 1;
        }
        if ( $templates ) {
            $params['templates'] = 1;
        }
        if ( $pageLinkDepth ) {
            $params['pagelink-depth'] = $pageLinkDepth;
        }

        $url = wfAppendQuery( $link, $params );
        # For interwikis, use POST to avoid redirects.
        return self::newFromURL( $url, "POST" );
    }
}