wikimedia/mediawiki-extensions-Wikibase

View on GitHub
client/includes/Changes/ChangeRunCoalescer.php

Summary

Maintainability
C
1 day
Test Coverage
<?php

namespace Wikibase\Client\Changes;

use Diff\DiffOp\Diff\Diff;
use Diff\DiffOp\DiffOp;
use Exception;
use Psr\Log\LoggerInterface;
use Wikibase\DataModel\Entity\EntityId;
use Wikibase\Lib\Changes\Change;
use Wikibase\Lib\Changes\ChangeRow;
use Wikibase\Lib\Changes\EntityChange;
use Wikibase\Lib\Changes\EntityChangeFactory;
use Wikibase\Lib\Changes\ItemChange;
use Wikibase\Lib\Store\EntityRevisionLookup;
use Wikibase\Lib\Store\LookupConstants;
use Wikibase\Lib\Store\StorageException;

/**
 * A transformer for lists of EntityChanges that combines runs of changes into a single change.
 * A "run" of changes is a sequence of consecutive changes performed by the same
 * user, and not interrupted by a "disruptive" change. Changes altering the association
 * between pages on the local wiki and items on the repo are considered disruptive.
 *
 * @license GPL-2.0-or-later
 * @author Daniel Kinzler
 */
class ChangeRunCoalescer {

    /**
     * @var EntityRevisionLookup
     */
    private $entityRevisionLookup;

    /**
     * @var EntityChangeFactory
     */
    private $changeFactory;

    /**
     * @var LoggerInterface
     */
    private $logger;

    /**
     * @var string
     */
    private $localSiteId;

    /**
     * @param EntityRevisionLookup $entityRevisionLookup
     * @param EntityChangeFactory $changeFactory
     * @param LoggerInterface $logger
     * @param string $localSiteId
     */
    public function __construct(
        EntityRevisionLookup $entityRevisionLookup,
        EntityChangeFactory $changeFactory,
        LoggerInterface $logger,
        $localSiteId
    ) {
        $this->entityRevisionLookup = $entityRevisionLookup;
        $this->changeFactory = $changeFactory;
        $this->logger = $logger;
        $this->localSiteId = $localSiteId;
    }

    /**
     * Processes the given list of changes, combining any runs of changes into a single change.
     * See the class level documentation for more details on change runs.
     *
     * @param EntityChange[] $changes
     *
     * @return EntityChange[]
     */
    public function transformChangeList( array $changes ) {
        $coalesced = [];

        $changesByEntity = $this->groupChangesByEntity( $changes );
        /** @var EntityChange[] $entityChanges */
        foreach ( $changesByEntity as $entityChanges ) {
            $entityChanges = $this->coalesceRuns( $entityChanges[0]->getEntityId(), $entityChanges );
            $coalesced = array_merge( $coalesced, $entityChanges );
        }

        usort( $coalesced, [ $this, 'compareChangesByTimestamp' ] );
        $this->logger->debug(
            '{method}: coalesced {changeCount} into {changeCoalescedCount} changes',
            [
                'method' => __METHOD__,
                'changeCount' => count( $changes ),
                'changeCoalescedCount' => count( $coalesced ),
            ]
        );

        return $coalesced;
    }

    /**
     * Group changes by the entity they were applied to.
     *
     * @param EntityChange[] $changes
     *
     * @return array[] an associative array using entity IDs for keys. Associated with each
     *         entity ID is the list of changes performed on that entity.
     */
    private function groupChangesByEntity( array $changes ) {
        $groups = [];

        foreach ( $changes as $change ) {
            $id = $change->getEntityId()->getSerialization();

            if ( !isset( $groups[$id] ) ) {
                $groups[$id] = [];
            }

            $groups[$id][] = $change;
        }

        return $groups;
    }

    /**
     * Combines a set of changes into one change. All changes are assumed to have been performed
     * by the same user on the same entity. They are further assumed to be UPDATE actions
     * and sorted in causal (chronological) order.
     *
     * If $changes contains exactly one change, that change is returned. Otherwise, a combined
     * change is returned.
     *
     * @param EntityId $entityId
     * @param EntityChange[] $changes The changes to combine.
     *
     * @throws MergeFailedException
     * @return EntityChange a combined change representing the activity from all the original changes.
     */
    private function mergeChanges( EntityId $entityId, array $changes ) {
        if ( count( $changes ) === 1 ) {
            return reset( $changes );
        }

        // we now assume that we have a list if EntityChanges,
        // all done by the same user on the same entity.

        /**
         * @var EntityChange $last
         * @var EntityChange $first
         */
        $last = end( $changes );
        $first = reset( $changes );

        $minor = true;
        $bot = true;

        $ids = [];

        foreach ( $changes as $change ) {
            $ids[] = $change->getId();
            $meta = $change->getMetadata();

            $minor = $minor && isset( $meta['minor'] ) && (bool)$meta['minor'];
            $bot = $bot && isset( $meta['bot'] ) && (bool)$meta['bot'];
        }

        $lastmeta = $last->getMetadata();
        $firstmeta = $first->getMetadata();

        $parentRevId = $firstmeta['parent_id'];
        $latestRevId = $lastmeta['rev_id'];

        try {
            $entityRev = $this->entityRevisionLookup->getEntityRevision(
                $entityId,
                $latestRevId,
                LookupConstants::LATEST_FROM_REPLICA_WITH_FALLBACK
            );
        } catch ( StorageException $e ) {
            throw new MergeFailedException( "Failed to load revision $latestRevId of $entityId", 0, $e );
        }

        if ( !$entityRev ) {
            throw new MergeFailedException( "Failed to load revision $latestRevId of $entityId" );
        }

        try {
            $parentRev = $parentRevId
                ? $this->entityRevisionLookup->getEntityRevision( $entityId, $parentRevId )
                : null;
        } catch ( StorageException $e ) {
            throw new MergeFailedException( "Failed to load parent revision $parentRevId", 0, $e );
        }

        //XXX: we could avoid loading the entity data by merging the diffs programatically
        //     instead of re-calculating.
        $change = $this->changeFactory->newFromUpdate(
            $parentRev ? EntityChange::UPDATE : EntityChange::ADD,
            $parentRev ? $parentRev->getEntity() : null,
            $entityRev->getEntity()
        );

        $change->setFields(
            [
                ChangeRow::REVISION_ID => $last->getField( ChangeRow::REVISION_ID ),
                ChangeRow::USER_ID => $last->getUserId(),
                ChangeRow::TIME => $last->getTime(),
            ]
        );

        $change->setMetadata( array_merge(
            $lastmeta,
            [
                'parent_id' => $parentRevId,
                'minor' => $minor,
                'bot' => $bot,
            ]
        //FIXME: size before & size after
        //FIXME: size before & size after
        ) );

        $info = $change->getInfo();
        $info['change-ids'] = $ids;
        $info['changes'] = $changes;
        $change->setField( ChangeRow::INFO, $info );

        return $change;
    }

    /**
     * @param DiffOp $siteLinkDiffOp
     *
     * @return bool
     */
    private function isBadgesOnlyChange( DiffOp $siteLinkDiffOp ) {
        return $siteLinkDiffOp instanceof Diff && !$siteLinkDiffOp->offsetExists( 'name' );
    }

    /**
     * Coalesce consecutive changes by the same user to the same entity into one.
     *
     * A run of changes may be broken if the action performed changes (e.g. deletion
     * instead of update) or if a sitelink pointing to the local wiki was modified.
     *
     * Some types of actions, like deletion, will break runs.
     *
     * @param EntityId $entityId
     * @param EntityChange[] $changes
     *
     * @return EntityChange[] grouped changes
     */
    private function coalesceRuns( EntityId $entityId, array $changes ) {
        $coalesced = [];

        $currentRun = [];
        $currentUser = null;
        $currentAction = null;
        $breakNext = false;

        $logException = static function ( MergeFailedException $ex ): void {
            $previousEx = $ex->getPrevious();
            $logMsg = $ex->getMessage();
            if ( $previousEx ) {
                $logMsg .= '. Previous message: ' . $previousEx->getMessage();
            }
            wfWarn( $logMsg );
        };
        foreach ( $changes as $change ) {
            try {
                $action = $change->getAction();
                $meta = $change->getMetadata();
                $user = $meta['user_text'];

                $break = $breakNext
                    || $currentAction !== $action
                    || $currentUser !== $user;

                $breakNext = false;

                if ( !$break && ( $change instanceof ItemChange ) ) {
                    $siteLinkDiff = $change->getSiteLinkDiff();
                    if ( isset( $siteLinkDiff[$this->localSiteId] )
                        && !$this->isBadgesOnlyChange( $siteLinkDiff[$this->localSiteId] ) ) {
                        $break = true;
                        $breakNext = true;
                    }
                }

                if ( $break ) {
                    if ( $currentRun ) {
                        try {
                            $coalesced[] = $this->mergeChanges( $entityId, $currentRun );
                        } catch ( MergeFailedException $ex ) {
                            // Something went wrong while trying to merge the changes.
                            // Just keep the original run.
                            $logException( $ex );
                            $coalesced = array_merge( $coalesced, $currentRun );
                        }
                    }

                    $currentRun = [];
                    $currentUser = $user;
                    $currentAction = $action === EntityChange::ADD ? EntityChange::UPDATE : $action;
                }

                $currentRun[] = $change;
                // skip any change that failed to process in some way (bug T51417)
            } catch ( Exception $ex ) {
                wfLogWarning( __METHOD__ . ':' . $ex->getMessage() );
            }
        }

        if ( $currentRun ) {
            try {
                $coalesced[] = $this->mergeChanges( $entityId, $currentRun );
            } catch ( MergeFailedException $ex ) {
                // Something went wrong while trying to merge the changes.
                // Just keep the original run.
                $logException( $ex );
                $coalesced = array_merge( $coalesced, $currentRun );
            }
        }

        return $coalesced;
    }

    /**
     * Compares two changes based on their timestamp.
     *
     * @param Change $a
     * @param Change $b
     *
     * @return int
     */
    public function compareChangesByTimestamp( Change $a, Change $b ) {
        //NOTE: beware https://bugs.php.net/bug.php?id=50688 !

        if ( $a->getTime() > $b->getTime() ) {
            return 1;
        } elseif ( $a->getTime() < $b->getTime() ) {
            return -1;
        }

        if ( $a->getId() > $b->getId() ) {
            return 1;
        } elseif ( $a->getId() < $b->getId() ) {
            return -1;
        }

        return 0;
    }

}