wikimedia/mediawiki-extensions-Wikibase

View on GitHub
lib/packages/wikibase/data-model-services/src/Lookup/EntityRetrievingClosestReferencedEntityIdLookup.php

Summary

Maintainability
A
2 hrs
Test Coverage
<?php

namespace Wikibase\DataModel\Services\Lookup;

use Wikibase\DataModel\Entity\EntityId;
use Wikibase\DataModel\Entity\EntityIdValue;
use Wikibase\DataModel\Entity\PropertyId;
use Wikibase\DataModel\Services\Entity\EntityPrefetcher;
use Wikibase\DataModel\Snak\PropertyValueSnak;
use Wikibase\DataModel\Snak\Snak;
use Wikibase\DataModel\Statement\StatementListProvider;

/**
 * Service for getting the closest entity (out of a specified set),
 * from a given starting entity. The starting entity, and the target entities
 * are (potentially indirectly, via intermediate entities) linked by statements
 * with a given property ID, pointing from the starting entity to one of the
 * target entities.
 *
 * @since 3.10
 *
 * @license GPL-2.0-or-later
 * @author Marius Hoch
 */
class EntityRetrievingClosestReferencedEntityIdLookup implements ReferencedEntityIdLookup {

    /**
     * @var EntityLookup
     */
    private $entityLookup;

    /**
     * @var EntityPrefetcher
     */
    private $entityPrefetcher;

    /**
     * @var int Maximum search depth: Maximum number of intermediate entities to search through.
     *  For example 0 means that only the entities immediately referenced will be found.
     */
    private $maxDepth;

    /**
     * @var int Maximum number of entities to retrieve.
     */
    private $maxEntityVisits;

    /**
     * Map (entity id => true) of already visited entities.
     *
     * @var bool[]
     */
    private $alreadyVisited = [];

    /**
     * @param EntityLookup $entityLookup
     * @param EntityPrefetcher $entityPrefetcher
     * @param int $maxDepth Maximum search depth: Maximum number of intermediate entities to search through.
     *  For example if 0 is given, only the entities immediately referenced will be found.
     *  If this limit gets exhausted, a MaxReferenceDepthExhaustedException is thrown.
     * @param int $maxEntityVisits Maximum number of entities to retrieve during a lookup.
     *  If this limit gets exhausted, a MaxReferencedEntityVisitsExhaustedException is thrown.
     */
    public function __construct(
        EntityLookup $entityLookup,
        EntityPrefetcher $entityPrefetcher,
        $maxDepth,
        $maxEntityVisits
    ) {
        $this->entityLookup = $entityLookup;
        $this->entityPrefetcher = $entityPrefetcher;
        $this->maxDepth = $maxDepth;
        $this->maxEntityVisits = $maxEntityVisits;
    }

    /**
     * Get the closest entity (out of $toIds), from a given entity. The starting entity, and
     * the target entities are (potentially indirectly, via intermediate entities) linked by
     * statements with the given property ID, pointing from the starting entity to one of the
     * target entities.
     *
     * @since 3.10
     *
     * @param EntityId $fromId
     * @param PropertyId $propertyId
     * @param EntityId[] $toIds
     *
     * @return EntityId|null Returns null in case none of the target entities are referenced.
     * @throws ReferencedEntityIdLookupException
     */
    public function getReferencedEntityId( EntityId $fromId, PropertyId $propertyId, array $toIds ) {
        if ( !$toIds ) {
            return null;
        }

        $this->alreadyVisited = [];

        $steps = $this->maxDepth + 1; // Add one as checking $fromId already is a step
        $toVisit = [ $fromId ];

        while ( $steps-- ) {
            $this->entityPrefetcher->prefetch( $toVisit );
            $toVisitNext = [];

            foreach ( $toVisit as $curId ) {
                $result = $this->processEntityById( $curId, $fromId, $propertyId, $toIds, $toVisitNext );
                if ( $result ) {
                    return $result;
                }
            }
            // Remove already visited entities
            $toVisit = array_unique(
                array_diff( $toVisitNext, array_keys( $this->alreadyVisited ) )
            );

            if ( !$toVisit ) {
                return null;
            }
        }

        // Exhausted the max. depth without finding anything.
        throw new MaxReferenceDepthExhaustedException(
            $fromId,
            $propertyId,
            $toIds,
            $this->maxDepth
        );
    }

    /**
     * Find out whether an entity (directly) references one of the target ids.
     *
     * @param EntityId $id Id of the entity to process
     * @param EntityId $fromId Id this lookup started from
     * @param PropertyId $propertyId
     * @param EntityId[] $toIds
     * @param EntityId[] &$toVisit List of entities that still need to be checked
     * @return EntityId|null Target id the entity refers to, null if none.
     */
    private function processEntityById(
        EntityId $id,
        EntityId $fromId,
        PropertyId $propertyId,
        array $toIds,
        array &$toVisit
    ) {
        $entity = $this->getEntity( $id, $fromId, $propertyId, $toIds );
        if ( !$entity ) {
            return null;
        }

        $mainSnaks = $this->getMainSnaks( $entity, $propertyId );

        foreach ( $mainSnaks as $mainSnak ) {
            $result = $this->processSnak( $mainSnak, $toVisit, $toIds );
            if ( $result ) {
                return $result;
            }
        }

        return null;
    }

    /**
     * @param EntityId $id Id of the entity to get
     * @param EntityId $fromId Id this lookup started from
     * @param PropertyId $propertyId
     * @param EntityId[] $toIds
     *
     * @return StatementListProvider|null Null if not applicable.
     */
    private function getEntity( EntityId $id, EntityId $fromId, PropertyId $propertyId, array $toIds ) {
        if ( isset( $this->alreadyVisited[$id->getSerialization()] ) ) {
            trigger_error(
                'Entity ' . $id->getSerialization() . ' already visited.',
                E_USER_WARNING
            );

            return null;
        }

        $this->alreadyVisited[$id->getSerialization()] = true;

        if ( count( $this->alreadyVisited ) > $this->maxEntityVisits ) {
            throw new MaxReferencedEntityVisitsExhaustedException(
                $fromId,
                $propertyId,
                $toIds,
                $this->maxEntityVisits
            );
        }

        try {
            $entity = $this->entityLookup->getEntity( $id );
        } catch ( EntityLookupException $ex ) {
            throw new ReferencedEntityIdLookupException( $fromId, $propertyId, $toIds, null, $ex );
        }

        if ( !( $entity instanceof StatementListProvider ) ) {
            return null;
        }

        return $entity;
    }

    /**
     * Decide whether a single Snak is pointing to one of the target ids.
     *
     * @param Snak $snak
     * @param EntityId[] &$toVisit List of entities that still need to be checked
     * @param EntityId[] $toIds
     * @return EntityId|null Target id the Snak refers to, null if none.
     */
    private function processSnak( Snak $snak, array &$toVisit, array $toIds ) {
        if ( !( $snak instanceof PropertyValueSnak ) ) {
            return null;
        }
        $dataValue = $snak->getDataValue();
        if ( !( $dataValue instanceof EntityIdValue ) ) {
            return null;
        }

        $entityId = $dataValue->getEntityId();
        if ( in_array( $entityId, $toIds, false ) ) {
            return $entityId;
        }

        $toVisit[] = $entityId;

        return null;
    }

    /**
     * @param StatementListProvider $statementListProvider
     * @param PropertyId $propertyId
     * @return Snak[]
     */
    private function getMainSnaks(
        StatementListProvider $statementListProvider,
        PropertyId $propertyId
    ) {
        return $statementListProvider
            ->getStatements()
            ->getByPropertyId( $propertyId )
            ->getBestStatements()
            ->getMainSnaks();
    }

}