wikimedia/mediawiki-core

View on GitHub
includes/page/MergeHistory.php

Summary

Maintainability
D
2 days
Test Coverage
<?php

/**
 * Copyright © 2015 Geoffrey Mon <geofbot@gmail.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 * http://www.gnu.org/copyleft/gpl.html
 *
 * @file
 */

namespace MediaWiki\Page;

use InvalidArgumentException;
use ManualLogEntry;
use MediaWiki;
use MediaWiki\CommentStore\CommentStoreComment;
use MediaWiki\Content\Content;
use MediaWiki\Content\IContentHandlerFactory;
use MediaWiki\EditPage\SpamChecker;
use MediaWiki\HookContainer\HookContainer;
use MediaWiki\HookContainer\HookRunner;
use MediaWiki\Linker\LinkTargetLookup;
use MediaWiki\MainConfigNames;
use MediaWiki\MediaWikiServices;
use MediaWiki\Message\Message;
use MediaWiki\Permissions\Authority;
use MediaWiki\Permissions\PermissionStatus;
use MediaWiki\Revision\MutableRevisionRecord;
use MediaWiki\Revision\RevisionStore;
use MediaWiki\Revision\SlotRecord;
use MediaWiki\Status\Status;
use MediaWiki\Title\TitleFactory;
use MediaWiki\Title\TitleFormatter;
use MediaWiki\Title\TitleValue;
use MediaWiki\Utils\MWTimestamp;
use MediaWiki\Watchlist\WatchedItemStoreInterface;
use Wikimedia\Rdbms\IConnectionProvider;
use Wikimedia\Rdbms\IDatabase;
use Wikimedia\Timestamp\TimestampException;

/**
 * Handles the backend logic of merging the histories of two
 * pages.
 *
 * @since 1.27
 */
class MergeHistory {

    /** Maximum number of revisions that can be merged at once */
    public const REVISION_LIMIT = 5000;

    /** @var PageIdentity Page from which history will be merged */
    protected $source;

    /** @var PageIdentity Page to which history will be merged */
    protected $dest;

    /** @var IDatabase Database that we are using */
    protected $dbw;

    /** @var ?string Timestamp up to which history from the source will be merged */
    private $timestamp;

    /**
     * @var MWTimestamp|false Maximum timestamp that we can use (oldest timestamp of dest).
     * Use ::getMaxTimestamp to lazily initialize.
     */
    protected $maxTimestamp = false;

    /**
     * @var string|false|null SQL WHERE condition that selects source revisions
     * to insert into destination. Use ::getTimeWhere to lazy-initialize.
     */
    protected $timeWhere = false;

    /**
     * @var MWTimestamp|false|null Timestamp upto which history from the source will be merged.
     * Use getTimestampLimit to lazily initialize.
     */
    protected $timestampLimit = false;

    /**
     * @var string|null
     */
    private $revidLimit = null;

    /** @var int Number of revisions merged (for Special:MergeHistory success message) */
    protected $revisionsMerged;

    private IContentHandlerFactory $contentHandlerFactory;
    private RevisionStore $revisionStore;
    private WatchedItemStoreInterface $watchedItemStore;
    private SpamChecker $spamChecker;
    private HookRunner $hookRunner;
    private WikiPageFactory $wikiPageFactory;
    private TitleFormatter $titleFormatter;
    private TitleFactory $titleFactory;
    private LinkTargetLookup $linkTargetLookup;
    private DeletePageFactory $deletePageFactory;

    /**
     * @param PageIdentity $source Page from which history will be merged
     * @param PageIdentity $dest Page to which history will be merged
     * @param ?string $timestamp Timestamp up to which history from the source will be merged
     * @param IConnectionProvider $dbProvider
     * @param IContentHandlerFactory $contentHandlerFactory
     * @param RevisionStore $revisionStore
     * @param WatchedItemStoreInterface $watchedItemStore
     * @param SpamChecker $spamChecker
     * @param HookContainer $hookContainer
     * @param WikiPageFactory $wikiPageFactory
     * @param TitleFormatter $titleFormatter
     * @param TitleFactory $titleFactory
     * @param LinkTargetLookup $linkTargetLookup
     * @param DeletePageFactory $deletePageFactory
     */
    public function __construct(
        PageIdentity $source,
        PageIdentity $dest,
        ?string $timestamp,
        IConnectionProvider $dbProvider,
        IContentHandlerFactory $contentHandlerFactory,
        RevisionStore $revisionStore,
        WatchedItemStoreInterface $watchedItemStore,
        SpamChecker $spamChecker,
        HookContainer $hookContainer,
        WikiPageFactory $wikiPageFactory,
        TitleFormatter $titleFormatter,
        TitleFactory $titleFactory,
        LinkTargetLookup $linkTargetLookup,
        DeletePageFactory $deletePageFactory
    ) {
        // Save the parameters
        $this->source = $source;
        $this->dest = $dest;
        $this->timestamp = $timestamp;

        // Get the database
        $this->dbw = $dbProvider->getPrimaryDatabase();

        $this->contentHandlerFactory = $contentHandlerFactory;
        $this->revisionStore = $revisionStore;
        $this->watchedItemStore = $watchedItemStore;
        $this->spamChecker = $spamChecker;
        $this->hookRunner = new HookRunner( $hookContainer );
        $this->wikiPageFactory = $wikiPageFactory;
        $this->titleFormatter = $titleFormatter;
        $this->titleFactory = $titleFactory;
        $this->linkTargetLookup = $linkTargetLookup;
        $this->deletePageFactory = $deletePageFactory;
    }

    /**
     * Get the number of revisions that will be moved
     * @return int
     */
    public function getRevisionCount() {
        $count = $this->dbw->newSelectQueryBuilder()
            ->select( '1' )
            ->from( 'revision' )
            ->where( [ 'rev_page' => $this->source->getId(), $this->getTimeWhere() ] )
            ->limit( self::REVISION_LIMIT + 1 )
            ->caller( __METHOD__ )->fetchRowCount();

        return $count;
    }

    /**
     * Get the number of revisions that were moved
     * Used in the SpecialMergeHistory success message
     * @return int
     */
    public function getMergedRevisionCount() {
        return $this->revisionsMerged;
    }

    /**
     * @param callable $authorizer ( string $action, PageIdentity $target, PermissionStatus $status )
     * @param Authority $performer
     * @param string $reason
     * @return PermissionStatus
     */
    private function authorizeInternal(
        callable $authorizer,
        Authority $performer,
        string $reason
    ) {
        $status = PermissionStatus::newEmpty();

        $authorizer( 'edit', $this->source, $status );
        $authorizer( 'edit', $this->dest, $status );

        // Anti-spam
        if ( $this->spamChecker->checkSummary( $reason ) !== false ) {
            // This is kind of lame, won't display nice
            $status->fatal( 'spamprotectiontext' );
        }

        // Check mergehistory permission
        if ( !$performer->isAllowed( 'mergehistory' ) ) {
            // User doesn't have the right to merge histories
            $status->fatal( 'mergehistory-fail-permission' );
        }
        return $status;
    }

    /**
     * Check whether $performer can execute the merge.
     *
     * @note this method does not guarantee full permissions check, so it should
     * only be used to to decide whether to show a merge form. To authorize the merge
     * action use {@link self::authorizeMerge} instead.
     *
     * @param Authority $performer
     * @param string|null $reason
     * @return PermissionStatus
     */
    public function probablyCanMerge( Authority $performer, string $reason = null ): PermissionStatus {
        return $this->authorizeInternal(
            static function ( string $action, PageIdentity $target, PermissionStatus $status ) use ( $performer ) {
                return $performer->probablyCan( $action, $target, $status );
            },
            $performer,
            $reason
        );
    }

    /**
     * Authorize the merge by $performer.
     *
     * @note this method should be used right before the actual merge is performed.
     * To check whether a current performer has the potential to merge the history,
     * use {@link self::probablyCanMerge} instead.
     *
     * @param Authority $performer
     * @param string|null $reason
     * @return PermissionStatus
     */
    public function authorizeMerge( Authority $performer, string $reason = null ): PermissionStatus {
        return $this->authorizeInternal(
            static function ( string $action, PageIdentity $target, PermissionStatus $status ) use ( $performer ) {
                return $performer->authorizeWrite( $action, $target, $status );
            },
            $performer,
            $reason
        );
    }

    /**
     * Does various checks that the merge is
     * valid. Only things based on the two pages
     * should be checked here.
     *
     * @return Status
     */
    public function isValidMerge() {
        $status = new Status();

        // If either article ID is 0, then revisions cannot be reliably selected
        if ( $this->source->getId() === 0 ) {
            $status->fatal( 'mergehistory-fail-invalid-source' );
        }
        if ( $this->dest->getId() === 0 ) {
            $status->fatal( 'mergehistory-fail-invalid-dest' );
        }

        // Make sure page aren't the same
        if ( $this->source->isSamePageAs( $this->dest ) ) {
            $status->fatal( 'mergehistory-fail-self-merge' );
        }

        // Make sure the timestamp is valid
        if ( !$this->getTimestampLimit() ) {
            $status->fatal( 'mergehistory-fail-bad-timestamp' );
        }

        // $this->timestampLimit must be older than $this->maxTimestamp
        if ( $this->getTimestampLimit() > $this->getMaxTimestamp() ) {
            $status->fatal( 'mergehistory-fail-timestamps-overlap' );
        }

        // Check that there are not too many revisions to move
        if ( $this->getTimestampLimit() && $this->getRevisionCount() > self::REVISION_LIMIT ) {
            $status->fatal( 'mergehistory-fail-toobig', Message::numParam( self::REVISION_LIMIT ) );
        }

        return $status;
    }

    /**
     * Actually attempt the history move
     *
     * @todo if all versions of page A are moved to B and then a user
     * tries to do a reverse-merge via the "unmerge" log link, then page
     * A will still be a redirect (as it was after the original merge),
     * though it will have the old revisions back from before (as expected).
     * The user may have to "undo" the redirect manually to finish the "unmerge".
     * Maybe this should delete redirects at the source page of merges?
     *
     * @param Authority $performer
     * @param string $reason
     * @return Status status of the history merge
     */
    public function merge( Authority $performer, $reason = '' ) {
        $status = new Status();

        // Check validity and permissions required for merge
        $validCheck = $this->isValidMerge(); // Check this first to check for null pages
        if ( !$validCheck->isOK() ) {
            return $validCheck;
        }
        $permCheck = $this->authorizeMerge( $performer, $reason );
        if ( !$permCheck->isOK() ) {
            return Status::wrap( $permCheck );
        }

        $this->dbw->startAtomic( __METHOD__ );

        $this->dbw->newUpdateQueryBuilder()
            ->update( 'revision' )
            ->set( [ 'rev_page' => $this->dest->getId() ] )
            ->where( [ 'rev_page' => $this->source->getId(), $this->getTimeWhere() ] )
            ->caller( __METHOD__ )->execute();

        // Check if this did anything
        $this->revisionsMerged = $this->dbw->affectedRows();
        if ( $this->revisionsMerged < 1 ) {
            $this->dbw->endAtomic( __METHOD__ );
            return $status->fatal( 'mergehistory-fail-no-change' );
        }

        $haveRevisions = $this->dbw->newSelectQueryBuilder()
            ->from( 'revision' )
            ->where( [ 'rev_page' => $this->source->getId() ] )
            ->forUpdate()
            ->caller( __METHOD__ )
            ->fetchRowCount();

        $legacySource = $this->titleFactory->newFromPageIdentity( $this->source );
        $legacyDest = $this->titleFactory->newFromPageIdentity( $this->dest );

        // Update source page, histories and invalidate caches
        if ( !$haveRevisions ) {
            if ( $reason ) {
                $reason = wfMessage(
                    'mergehistory-comment',
                    $this->titleFormatter->getPrefixedText( $this->source ),
                    $this->titleFormatter->getPrefixedText( $this->dest ),
                    $reason
                )->inContentLanguage()->text();
            } else {
                $reason = wfMessage(
                    'mergehistory-autocomment',
                    $this->titleFormatter->getPrefixedText( $this->source ),
                    $this->titleFormatter->getPrefixedText( $this->dest )
                )->inContentLanguage()->text();
            }

            $this->updateSourcePage( $status, $performer, $reason );

        } else {
            $legacySource->invalidateCache();
        }
        $legacyDest->invalidateCache();

        // Duplicate watchers of the old article to the new article
        $this->watchedItemStore->duplicateAllAssociatedEntries( $this->source, $this->dest );

        // Update our logs
        $logEntry = new ManualLogEntry( 'merge', 'merge' );
        $logEntry->setPerformer( $performer->getUser() );
        $logEntry->setComment( $reason );
        $logEntry->setTarget( $this->source );
        $logEntry->setParameters( [
            '4::dest' => $this->titleFormatter->getPrefixedText( $this->dest ),
            '5::mergepoint' => $this->getTimestampLimit()->getTimestamp( TS_MW ),
            '6::mergerevid' => $this->revidLimit
        ] );
        $logId = $logEntry->insert();
        $logEntry->publish( $logId );

        $this->hookRunner->onArticleMergeComplete( $legacySource, $legacyDest );

        $this->dbw->endAtomic( __METHOD__ );

        return $status;
    }

    /**
     * Do various cleanup work and updates to the source page. This method
     * will only be called if no revision is remaining on the page.
     *
     * At the end, there would be either a redirect page or a deleted page,
     * depending on whether the content model of the page supports redirects or not.
     *
     * @param Status $status
     * @param Authority $performer
     * @param string $reason
     */
    private function updateSourcePage( $status, $performer, $reason ): void {
        $deleteSource = false;
        $legacySourceTitle = $this->titleFactory->newFromPageIdentity( $this->source );
        $legacyDestTitle = $this->titleFactory->newFromPageIdentity( $this->dest );
        $sourceModel = $legacySourceTitle->getContentModel();
        $contentHandler = $this->contentHandlerFactory->getContentHandler( $sourceModel );

        if ( !$contentHandler->supportsRedirects() ) {
            $deleteSource = true;
            $newContent = $contentHandler->makeEmptyContent();
        } else {
            $msg = wfMessage( 'mergehistory-redirect-text' )->inContentLanguage()->plain();
            $newContent = $contentHandler->makeRedirectContent( $legacyDestTitle, $msg );
        }

        if ( !$newContent instanceof Content ) {
            // Handler supports redirect but cannot create new redirect content?
            // Not possible to proceed without Content.

            // @todo. Remove this once there's no evidence it's happening or if it's
            // determined all violating handlers have been fixed.
            // This is mostly kept because previous code was also blindly checking
            // existing of the Content for both content models that supports redirects
            // and those that that don't, so it's hard to know what it was masking.
            $logger = MediaWiki\Logger\LoggerFactory::getInstance( 'ContentHandler' );
            $logger->warning(
                'ContentHandler for {model} says it supports redirects but failed '
                . 'to return Content object from ContentHandler::makeRedirectContent().'
                . ' {value} returned instead.',
                [
                    'value' => get_debug_type( $newContent ),
                    'model' => $sourceModel
                ]
            );

            throw new InvalidArgumentException(
                "ContentHandler for '$sourceModel' supports redirects" .
                ' but cannot create redirect content during History merge.'
            );
        }

        // T263340/T93469: Create revision record to also serve as the page revision.
        // This revision will be used to create page content. If the source page's
        // content model supports redirects, then it will be the redirect content.
        // If the content model does not supports redirect, this content will aid
        // proper deletion of the page below.
        $comment = CommentStoreComment::newUnsavedComment( $reason );
        $revRecord = new MutableRevisionRecord( $this->source );
        $revRecord->setContent( SlotRecord::MAIN, $newContent )
            ->setPageId( $this->source->getId() )
            ->setComment( $comment )
            ->setUser( $performer->getUser() )
            ->setTimestamp( wfTimestampNow() );

        $insertedRevRecord = $this->revisionStore->insertRevisionOn( $revRecord, $this->dbw );

        $newPage = $this->wikiPageFactory->newFromTitle( $this->source );
        $newPage->updateRevisionOn( $this->dbw, $insertedRevRecord );

        if ( !$deleteSource ) {
            // TODO: This doesn't belong here, it should be part of PageLinksTable.
            // We have created a redirect page so let's
            // record the link from the page to the new title.
            // It should have no other outgoing links...
            $this->dbw->newDeleteQueryBuilder()
                ->deleteFrom( 'pagelinks' )
                ->where( [ 'pl_from' => $this->source->getId() ] )
                ->caller( __METHOD__ )->execute();
            $migrationStage = MediaWikiServices::getInstance()->getMainConfig()->get(
                MainConfigNames::PageLinksSchemaMigrationStage
            );
            $row = [
                'pl_from' => $this->source->getId(),
                'pl_from_namespace' => $this->source->getNamespace(),
            ];
            if ( $migrationStage & SCHEMA_COMPAT_WRITE_OLD ) {
                $row['pl_namespace'] = $this->dest->getNamespace();
                $row['pl_title'] = $this->dest->getDBkey();
            }
            if ( $migrationStage & SCHEMA_COMPAT_WRITE_NEW ) {
                $row['pl_target_id'] = $this->linkTargetLookup->acquireLinkTargetId(
                    new TitleValue( $this->dest->getNamespace(), $this->dest->getDBkey() ),
                    $this->dbw
                );
            }
            $this->dbw->newInsertQueryBuilder()
                ->insertInto( 'pagelinks' )
                ->row( $row )
                ->caller( __METHOD__ )->execute();

        } else {
            // T263340/T93469: Delete the source page to prevent errors because its
            // revisions are now tied to a different title and its content model
            // does not support redirects, so we cannot leave a new revision on it.
            // This deletion does not depend on userright but may still fails. If it
            // fails, it will be communicated in the status response.
            $reason = wfMessage( 'mergehistory-source-deleted-reason' )->inContentLanguage()->plain();
            $delPage = $this->deletePageFactory->newDeletePage( $newPage, $performer );
            $deletionStatus = $delPage->deleteUnsafe( $reason );
            if ( $deletionStatus->isGood() && $delPage->deletionsWereScheduled()[DeletePage::PAGE_BASE] ) {
                $deletionStatus->warning(
                    'delete-scheduled',
                    wfEscapeWikiText( $newPage->getTitle()->getPrefixedText() )
                );
            }
            // Notify callers that the source page has been deleted.
            $status->value = 'source-deleted';
            $status->merge( $deletionStatus );
        }
    }

    /**
     * Get the maximum timestamp that we can use (oldest timestamp of dest)
     *
     * @return MWTimestamp
     */
    private function getMaxTimestamp(): MWTimestamp {
        if ( $this->maxTimestamp === false ) {
            $this->initTimestampLimits();
        }
        return $this->maxTimestamp;
    }

    /**
     * Get the timestamp upto which history from the source will be merged,
     * or null if something went wrong
     *
     * @return ?MWTimestamp
     */
    private function getTimestampLimit(): ?MWTimestamp {
        if ( $this->timestampLimit === false ) {
            $this->initTimestampLimits();
        }
        return $this->timestampLimit;
    }

    /**
     * Get the SQL WHERE condition that selects source revisions to insert into destination,
     * or null if something went wrong
     *
     * @return ?string
     */
    private function getTimeWhere(): ?string {
        if ( $this->timeWhere === false ) {
            $this->initTimestampLimits();
        }
        return $this->timeWhere;
    }

    /**
     * Lazily initializes timestamp (and possibly revid) limits and conditions.
     */
    private function initTimestampLimits() {
        // Max timestamp should be min of destination page
        $firstDestTimestamp = $this->dbw->newSelectQueryBuilder()
            ->select( 'MIN(rev_timestamp)' )
            ->from( 'revision' )
            ->where( [ 'rev_page' => $this->dest->getId() ] )
            ->caller( __METHOD__ )->fetchField();
        $this->maxTimestamp = new MWTimestamp( $firstDestTimestamp );
        $this->revidLimit = null;
        // Get the timestamp pivot condition
        try {
            if ( $this->timestamp ) {
                $parts = explode( '|', $this->timestamp );
                if ( count( $parts ) == 2 ) {
                    $timestamp = $parts[0];
                    $this->revidLimit = $parts[1];
                } else {
                    $timestamp = $this->timestamp;
                }
                // If we have a requested timestamp, use the
                // latest revision up to that point as the insertion point
                $mwTimestamp = new MWTimestamp( $timestamp );

                $lastWorkingTimestamp = $this->dbw->newSelectQueryBuilder()
                    ->select( 'MAX(rev_timestamp)' )
                    ->from( 'revision' )
                    ->where( [
                        $this->dbw->expr( 'rev_timestamp', '<=', $this->dbw->timestamp( $mwTimestamp ) ),
                        'rev_page' => $this->source->getId()
                    ] )
                    ->caller( __METHOD__ )->fetchField();
                $mwLastWorkingTimestamp = new MWTimestamp( $lastWorkingTimestamp );

                $timeInsert = $mwLastWorkingTimestamp;
                $this->timestampLimit = $mwLastWorkingTimestamp;
            } else {
                // If we don't, merge entire source page history into the
                // beginning of destination page history

                // Get the latest timestamp of the source
                $row = $this->dbw->newSelectQueryBuilder()
                    ->select( [ 'rev_timestamp', 'rev_id' ] )
                    ->from( 'page' )
                    ->join( 'revision', null, 'page_latest = rev_id' )
                    ->where( [ 'page_id' => $this->source->getId() ] )
                    ->caller( __METHOD__ )->fetchRow();
                $timeInsert = $this->maxTimestamp;
                if ( $row ) {
                    $lasttimestamp = new MWTimestamp( $row->rev_timestamp );
                    $this->timestampLimit = $lasttimestamp;
                    $this->revidLimit = $row->rev_id;
                } else {
                    $this->timestampLimit = null;
                }
            }
            $dbLimit = $this->dbw->timestamp( $timeInsert );
            if ( $this->revidLimit ) {
                $this->timeWhere = $this->dbw->buildComparison( '<=',
                    [ 'rev_timestamp' => $dbLimit, 'rev_id' => $this->revidLimit ]
                );
            } else {
                $this->timeWhere = $this->dbw->buildComparison( '<=',
                    [ 'rev_timestamp' => $dbLimit ]
                );
            }
        } catch ( TimestampException $ex ) {
            // The timestamp we got is screwed up and merge cannot continue
            // This should be detected by $this->isValidMerge()
            $this->timestampLimit = null;
            $this->timeWhere = null;
        }
    }
}

/** @deprecated class alias since 1.40 */
class_alias( MergeHistory::class, 'MergeHistory' );