wikimedia/mediawiki-core

View on GitHub
includes/import/ImportableOldRevisionImporter.php

Summary

Maintainability
C
1 day
Test Coverage
<?php

use MediaWiki\CommentStore\CommentStoreComment;
use MediaWiki\Context\RequestContext;
use MediaWiki\MediaWikiServices;
use MediaWiki\Page\WikiPageFactory;
use MediaWiki\Revision\MutableRevisionRecord;
use MediaWiki\Revision\RevisionStore;
use MediaWiki\Revision\SlotRoleRegistry;
use MediaWiki\Storage\PageUpdaterFactory;
use MediaWiki\Title\Title;
use MediaWiki\User\UserFactory;
use Psr\Log\LoggerInterface;
use Wikimedia\Rdbms\IConnectionProvider;
use Wikimedia\Rdbms\SelectQueryBuilder;

/**
 * @since 1.31
 */
class ImportableOldRevisionImporter implements OldRevisionImporter {

    private bool $doUpdates;
    private LoggerInterface $logger;
    private IConnectionProvider $dbProvider;
    private RevisionStore $revisionStore;
    private SlotRoleRegistry $slotRoleRegistry;
    private WikiPageFactory $wikiPageFactory;
    private PageUpdaterFactory $pageUpdaterFactory;
    private UserFactory $userFactory;

    public function __construct(
        $doUpdates,
        LoggerInterface $logger,
        IConnectionProvider $dbProvider,
        RevisionStore $revisionStore,
        SlotRoleRegistry $slotRoleRegistry,
        WikiPageFactory $wikiPageFactory = null,
        PageUpdaterFactory $pageUpdaterFactory = null,
        UserFactory $userFactory = null
    ) {
        $this->doUpdates = $doUpdates;
        $this->logger = $logger;
        $this->dbProvider = $dbProvider;
        $this->revisionStore = $revisionStore;
        $this->slotRoleRegistry = $slotRoleRegistry;

        $services = MediaWikiServices::getInstance();
        // @todo: temporary - remove when FileImporter extension is updated
        $this->wikiPageFactory = $wikiPageFactory ?? $services->getWikiPageFactory();
        $this->pageUpdaterFactory = $pageUpdaterFactory ?? $services->getPageUpdaterFactory();
        $this->userFactory = $userFactory ?? $services->getUserFactory();
    }

    /** @inheritDoc */
    public function import( ImportableOldRevision $importableRevision, $doUpdates = true ) {
        $dbw = $this->dbProvider->getPrimaryDatabase();

        # Sneak a single revision into place
        $user = $importableRevision->getUserObj() ?: $this->userFactory->newFromName( $importableRevision->getUser() );
        if ( $user ) {
            $userId = $user->getId();
            $userText = $user->getName();
        } else {
            $userId = 0;
            $userText = $importableRevision->getUser();
            $user = $this->userFactory->newAnonymous();
        }

        // avoid memory leak...?
        Title::clearCaches();

        $page = $this->wikiPageFactory->newFromTitle( $importableRevision->getTitle() );
        $page->loadPageData( IDBAccessObject::READ_LATEST );
        $mustCreatePage = !$page->exists();
        if ( $mustCreatePage ) {
            $pageId = $page->insertOn( $dbw );
        } else {
            $pageId = $page->getId();

            // Note: sha1 has been in XML dumps since 2012. If you have an
            // older dump, the duplicate detection here won't work.
            if ( $importableRevision->getSha1Base36() !== false ) {
                $prior = (bool)$dbw->newSelectQueryBuilder()
                    ->select( '1' )
                    ->from( 'revision' )
                    ->where( [
                        'rev_page' => $pageId,
                        'rev_timestamp' => $dbw->timestamp( $importableRevision->getTimestamp() ),
                        'rev_sha1' => $importableRevision->getSha1Base36()
                    ] )
                    ->caller( __METHOD__ )->fetchField();
                if ( $prior ) {
                    // @todo FIXME: This could fail slightly for multiple matches :P
                    $this->logger->debug( __METHOD__ . ": skipping existing revision for [[" .
                        $importableRevision->getTitle()->getPrefixedText() . "]], timestamp " .
                        $importableRevision->getTimestamp() . "\n" );
                    return false;
                }
            }
        }

        if ( !$pageId ) {
            // This seems to happen if two clients simultaneously try to import the
            // same page
            $this->logger->debug( __METHOD__ . ': got invalid $pageId when importing revision of [[' .
                $importableRevision->getTitle()->getPrefixedText() . ']], timestamp ' .
                $importableRevision->getTimestamp() . "\n" );
            return false;
        }

        // Select previous version to make size diffs correct
        // @todo This assumes that multiple revisions of the same page are imported
        // in order from oldest to newest.
        $queryBuilder = $this->revisionStore->newSelectQueryBuilder( $dbw )
            ->joinComment()
            ->where( [ 'rev_page' => $pageId ] )
            ->andWhere( $dbw->expr(
                'rev_timestamp', '<=', $dbw->timestamp( $importableRevision->getTimestamp() )
            ) )
            ->orderBy( [ 'rev_timestamp', 'rev_id' ], SelectQueryBuilder::SORT_DESC );
        $prevRevRow = $queryBuilder->caller( __METHOD__ )->fetchRow();

        # @todo FIXME: Use original rev_id optionally (better for backups)
        # Insert the row
        $revisionRecord = new MutableRevisionRecord( $importableRevision->getTitle() );
        $revisionRecord->setParentId( $prevRevRow ? (int)$prevRevRow->rev_id : 0 );
        $revisionRecord->setComment(
            CommentStoreComment::newUnsavedComment( $importableRevision->getComment() )
        );

        try {
            $revUser = $this->userFactory->newFromAnyId( $userId, $userText );
        } catch ( InvalidArgumentException $ex ) {
            $revUser = RequestContext::getMain()->getUser();
        }
        $revisionRecord->setUser( $revUser );

        $originalRevision = $prevRevRow
            ? $this->revisionStore->newRevisionFromRow(
                $prevRevRow,
                IDBAccessObject::READ_LATEST,
                $importableRevision->getTitle()
            )
            : null;

        foreach ( $importableRevision->getSlotRoles() as $role ) {
            if ( !$this->slotRoleRegistry->isDefinedRole( $role ) ) {
                throw new RuntimeException( "Undefined slot role $role" );
            }

            $newContent = $importableRevision->getContent( $role );
            if ( !$originalRevision || !$originalRevision->hasSlot( $role ) ) {
                $revisionRecord->setContent( $role, $newContent );
            } else {
                $originalSlot = $originalRevision->getSlot( $role );
                if ( !$originalSlot->hasSameContent( $importableRevision->getSlot( $role ) ) ) {
                    $revisionRecord->setContent( $role, $newContent );
                } else {
                    $revisionRecord->inheritSlot( $originalRevision->getSlot( $role ) );
                }
            }
        }

        $revisionRecord->setTimestamp( $importableRevision->getTimestamp() );
        $revisionRecord->setMinorEdit( $importableRevision->getMinor() );
        $revisionRecord->setPageId( $pageId );

        $latestRevId = $page->getLatest();

        $inserted = $this->revisionStore->insertRevisionOn( $revisionRecord, $dbw );
        if ( $latestRevId ) {
            // If not found (false), cast to 0 so that the page is updated
            // Just to be on the safe side, even though it should always be found
            $latestRevTimestamp = (int)$this->revisionStore->getTimestampFromId(
                $latestRevId,
                IDBAccessObject::READ_LATEST
            );
        } else {
            $latestRevTimestamp = 0;
        }
        if ( $importableRevision->getTimestamp() >= $latestRevTimestamp ) {
            $changed = $page->updateRevisionOn( $dbw, $inserted, $latestRevId );
        } else {
            $changed = false;
        }

        $tags = $importableRevision->getTags();
        if ( $tags !== [] ) {
            ChangeTags::addTags( $tags, null, $inserted->getId() );
        }

        if ( $changed !== false && $this->doUpdates ) {
            $this->logger->debug( __METHOD__ . ": running updates" );
            // countable/oldcountable stuff is handled in WikiImporter::finishImportPage

            $options = [
                'created' => $mustCreatePage,
                'oldcountable' => 'no-change',
                'causeAction' => 'import-page',
                'causeAgent' => $user->getName(),
            ];

            $updater = $this->pageUpdaterFactory->newDerivedPageDataUpdater( $page );
            $updater->prepareUpdate( $inserted, $options );
            $updater->doUpdates();
        }

        return true;
    }

}