wikimedia/mediawiki-extensions-Wikibase

View on GitHub
repo/includes/Validators/FingerprintUniquenessValidator.php

Summary

Maintainability
A
1 hr
Test Coverage
<?php

namespace Wikibase\Repo\Validators;

use Generator;
use InvalidArgumentException;
use ValueValidators\Result;
use ValueValidators\ValueValidator;
use Wikibase\DataModel\Entity\EntityId;
use Wikibase\DataModel\Entity\Item;
use Wikibase\DataModel\Services\Lookup\TermLookup;
use Wikibase\Repo\ChangeOp\ChangeOpDescriptionResult;
use Wikibase\Repo\ChangeOp\ChangeOpFingerprintResult;
use Wikibase\Repo\ChangeOp\ChangeOpLabelResult;
use Wikibase\Repo\ChangeOp\ChangeOpResultTraversal;
use Wikibase\Repo\Store\TermsCollisionDetector;

/**
 * Validates the uniqueness of changing parts in a {@link ChangeOpFingerprintResult}
 * across entities in store
 *
 * Business logic in here is as following:
 * Given an item Q1 in language L
 * When L label or description of Q1 are being modified
 * Then there should be no other Q2 in the store with L label and/or description
 * Items are unique on their label and description in a language. This means, given a language, no two items should
 * have same label and same description in that language.
 *
 * For properties, label uniqueness is instead validated by the LabelUniquenessValidator
 * added by EntityConstraintProvider.
 *
 * @see EntityConstraintProvider
 * @see LabelUniquenessValidator
 *
 * @license GPL-2.0-or-later
 */
class FingerprintUniquenessValidator implements ValueValidator {

    use ChangeOpResultTraversal;

    /** @var TermsCollisionDetector */
    private $termsCollisionDetector;

    /** @var TermLookup */
    private $termLookup;

    public function __construct(
        TermsCollisionDetector $termsCollisionDetector,
        TermLookup $termLookup
    ) {
        $this->termsCollisionDetector = $termsCollisionDetector;
        $this->termLookup = $termLookup;
    }

    public function setOptions( array $options ) {
        // noop
    }

    public function validate( $value ) {
        if ( !$value instanceof ChangeOpFingerprintResult ) {
            throw new InvalidArgumentException( '$value can only be of type ChangeOpFingerprintResult' );
        }

        $entityId = $value->getEntityId();
        return $entityId->getEntityType() === Item::ENTITY_TYPE
            ? $this->validateItem( $value )
            : Result::newSuccess();
    }

    private function validateItem( ChangeOpFingerprintResult $fingerprintChangeOpResult ): Result {
        $errors = [];

        foreach ( $this->getChangedLabelsAndDescriptionsPerLanguage( $fingerprintChangeOpResult ) as $lang => $terms ) {
            $collidingEntityId = $this->termsCollisionDetector->detectLabelAndDescriptionCollision(
                $lang,
                $terms['label'],
                $terms['description']
            );

            if ( $collidingEntityId !== null ) {
                $errors[] = $this->collisionToError(
                    'label-with-description-conflict',
                    $collidingEntityId,
                    $lang,
                    $terms['label']
                );
            }
        }

        if ( $errors ) {
            return Result::newError( $errors );
        }

        return Result::newSuccess();
    }

    /**
     * @return Generator yielding entries of the shape
     *  [ language code => [ 'label' => label text, 'descripition' => description text ] ]
     */
    private function getChangedLabelsAndDescriptionsPerLanguage( ChangeOpFingerprintResult $changeOpsResult ): Generator {
        [ $newTerms, $oldTerms ] = $this->collectNewAndOldTerms( $changeOpsResult );

        $labelDescriptionPairsPerLanguage = $this->generateLabelDescriptionPairs(
            $newTerms,
            $oldTerms,
            // @phan-suppress-next-line PhanTypeMismatchArgumentNullable Validated via ::ENTITY_TYPE
            $changeOpsResult->getEntityId()
        );

        yield from $labelDescriptionPairsPerLanguage;
    }

    /**
     * @return array containing two entries [ 0 => new terms, 1 => old terms ]
     *  new terms will contain new term values per language per term type that appear in $changeOpsResult as changing
     *  the entity, while old terms will contain old term values per language per term type that appear in
     *  $changeOpsResult as not changing the entity
     *
     *  old terms might not contain complementary data to those entries in new terms, as that depends on whether
     *  the ChangeOpsResult contains results of things that are not being changed or not (which in turn depends
     *  on ChangeOpFingerprint that produced the ChangeOpFingerprintResult). Example scenario is an api call that is
     *  sending to server only the terms that need to change. Counter example scenario is a frontend (e.g. termbox)
     *  sending back to server all terms, whether changed or not.
     */
    private function collectNewAndOldTerms( ChangeOpFingerprintResult $changeOpsResult ): array {
        $traversable = $this->makeRecursiveTraversable( $changeOpsResult );

        $newTerms = [];
        $oldTerms = [];
        foreach ( $traversable as $changeOp ) {
            $lang = null;

            if ( $changeOp instanceof ChangeOpLabelResult ) {
                $lang = $changeOp->getLanguageCode();

                if ( $changeOp->isEntityChanged() ) {
                    $newTerms[$lang]['label'] = $changeOp->getNewLabel();
                } else {
                    $oldTerms[$lang]['label'] = $changeOp->getOldLabel();
                }
            } elseif ( $changeOp instanceof ChangeOpDescriptionResult ) {
                $lang = $changeOp->getLanguageCode();

                if ( $changeOp->isEntityChanged() ) {
                    $newTerms[$lang]['description'] = $changeOp->getNewDescription();
                } else {
                    $oldTerms[$lang]['description'] = $changeOp->getOldDescription();
                }
            } else {
                continue;
            }
        }

        return [ $newTerms, $oldTerms ];
    }

    /**
     * In order to check label and description uniqueness, this validator need to know both the label and the description
     * in a language, where one or both of them are going to change.
     *
     * This method purpose is take those terms that are about to change ($newTerms) and make sure to yield pairs
     * of label and description, filling in those missing labels or descriptions from either $oldTerms or from
     * term store directly.
     *
     * @return Generator yielding entries of the shape
     *  [ language code => [ 'label' => label text, 'descripition' => description text ] ]
     */
    private function generateLabelDescriptionPairs( array $newTerms, array $oldTerms, EntityId $entityId ) {
        foreach ( $newTerms as $lang => $terms ) {
            $missingTerms = array_diff( [ 'label', 'description' ], array_keys( $terms ) );

            if ( count( $missingTerms ) > 1 ) {
                // This should never happen as long as newTerms contains entries per language that each has exactly one or two
                // entries with 'label' or 'description' as keys. Left here for completeness.
                continue;
            } elseif ( count( $missingTerms ) === 1 ) {
                $missingTerm = reset( $missingTerms );
                // Todo: we might want to batch looking up entity terms through TermLookup, which will change this implementation
                // enough to not be suitable for Generator use-case, as those need to be collected and batch fetched before
                // yielding them
                $terms[$missingTerm] = $oldTerms[$lang][$missingTerm] ?? $this->getEntityTerm( $entityId, $lang, $missingTerm );
            }

            yield $lang => $terms;
        }
    }

    private function getEntityTerm( EntityId $entityId, $lang, $termType ): string {
        if ( $termType === 'label' ) {
            return $this->termLookup->getLabel( $entityId, $lang ) ?? '';
        } elseif ( $termType === 'description' ) {
            return $this->termLookup->getDescription( $entityId, $lang ) ?? '';
        }

        throw new InvalidArgumentException( "\$termType can only be 'label' or 'property'. '{$termType}' was given" );
    }

    private function collisionToError( $code, $collidingEntityId, $lang, $label ) {
        return new UniquenessViolation(
            $collidingEntityId,
            'found conflicting terms',
            $code,
            [
                $label,
                $lang,
                $collidingEntityId,
            ]
        );
    }
}