wikimedia/mediawiki-extensions-Wikibase

View on GitHub
repo/includes/Rdf/FullStatementRdfBuilder.php

Summary

Maintainability
B
5 hrs
Test Coverage
<?php

namespace Wikibase\Repo\Rdf;

use InvalidArgumentException;
use Wikibase\DataModel\Entity\EntityDocument;
use Wikibase\DataModel\Entity\EntityId;
use Wikibase\DataModel\Reference;
use Wikibase\DataModel\Statement\Statement;
use Wikibase\DataModel\Statement\StatementList;
use Wikibase\DataModel\Statement\StatementListProvider;
use Wikimedia\Purtle\RdfWriter;

/**
 * Fully reified RDF mapping for wikibase statements, including deprecated and non-"best"
 * statements, ranks, qualifiers, and references. This modells statements as identifiable objects
 * and does not output a direct property to value mapping as the TruthyStatementRdfBuilder does. If
 * both forms (direct and full) are desired, use TruthyStatementRdfBuilder in addition to
 * FullStatementRdfBuilder.
 *
 * @see TruthyStatementRdfBuilder
 *
 * @license GPL-2.0-or-later
 * @author Daniel Kinzler
 * @author Stas Malyshev
 */
class FullStatementRdfBuilder implements EntityRdfBuilder {

    /**
     * @var DedupeBag
     */
    private $dedupeBag;

    /**
     * @var bool
     */
    private $produceQualifiers = true;

    /**
     * @var bool
     */
    private $produceReferences = true;

    /**
     * @var RdfVocabulary
     */
    private $vocabulary;

    /**
     * @var RdfWriter
     */
    private $statementWriter;

    /**
     * @var RdfWriter
     */
    private $referenceWriter;

    /**
     * @var SnakRdfBuilder
     */
    private $snakBuilder;

    public function __construct( RdfVocabulary $vocabulary, RdfWriter $writer, SnakRdfBuilder $snakBuilder ) {
        $this->vocabulary = $vocabulary;

        // Note: since we process references as nested structures, they need a separate
        // rdf writer, so outputting references doesn't destroy the state of the statement writer.
        $this->statementWriter = $writer;
        $this->referenceWriter = $writer->sub();

        $this->snakBuilder = $snakBuilder;

        $this->dedupeBag = new NullDedupeBag();
    }

    public function setDedupeBag( DedupeBag $dedupeBag ) {
        $this->dedupeBag = $dedupeBag;
    }

    /**
     * @return bool
     */
    public function getProduceQualifiers() {
        return $this->produceQualifiers;
    }

    /**
     * @param bool $produceQualifiers
     */
    public function setProduceQualifiers( $produceQualifiers ) {
        $this->produceQualifiers = $produceQualifiers;
    }

    /**
     * @return bool
     */
    public function getProduceReferences() {
        return $this->produceReferences;
    }

    /**
     * @param bool $produceReferences
     */
    public function setProduceReferences( $produceReferences ) {
        $this->produceReferences = $produceReferences;
    }

    /**
     * Adds Statements to the RDF graph.
     *
     * @param EntityId $entityId
     * @param StatementList $statementList
     */
    public function addStatements( EntityId $entityId, StatementList $statementList ) {
        $bestList = [];

        // FIXME: This is expensive, share the result with TruthyStatementRdfBuilder!
        foreach ( $statementList->getPropertyIds() as $propertyId ) {
            $bestStatements = $statementList->getByPropertyId( $propertyId )->getBestStatements();
            foreach ( $bestStatements->toArray() as $statement ) {
                $guid = $statement->getGuid();
                if ( $guid === null ) {
                    throw new InvalidArgumentException( 'Can only add statements that have a non-null GUID' );
                }
                $bestList[$guid] = true;
            }
        }

        foreach ( $statementList->toArray() as $statement ) {
            $this->addStatement( $entityId, $statement, isset( $bestList[$statement->getGuid()] ) );
        }
    }

    /**
     * Adds the given Statement from the given Entity to the RDF graph.
     *
     * @param EntityId $entityId
     * @param Statement $statement
     * @param bool $isBest Is this best ranked statement?
     */
    private function addStatement( EntityId $entityId, Statement $statement, $isBest ) {
        $statementLName = $this->vocabulary->getStatementLName( $statement );

        $entityRepository = $this->vocabulary->getEntityRepositoryName( $entityId );

        $this->addMainSnak( $entityId, $statementLName, $statement, $isBest );

        // XXX: separate builder for qualifiers?
        if ( $this->produceQualifiers ) {
            // this assumes statement was added by addMainSnak
            foreach ( $statement->getQualifiers() as $q ) {
                $propertyRepository = $this->vocabulary->getEntityRepositoryName( $q->getPropertyId() );
                $this->snakBuilder->addSnak(
                    $this->statementWriter,
                    $this->vocabulary->statementNamespaceNames[$entityRepository][RdfVocabulary::NS_VALUE],
                    $q,
                    $this->vocabulary->propertyNamespaceNames[$propertyRepository][RdfVocabulary::NSP_QUALIFIER],
                    $statementLName
                );
            }
        }

        // XXX: separate builder for references?
        if ( $this->produceReferences ) {
            $entityRepository = $this->vocabulary->getEntityRepositoryName( $entityId );
            /** @var Reference $reference */
            foreach ( $statement->getReferences() as $reference ) { //FIXME: split body into separate method
                $hash = $reference->getSnaks()->getHash();
                $refLName = $hash;

                $this->statementWriter->about(
                    $this->vocabulary->statementNamespaceNames[$entityRepository][RdfVocabulary::NS_STATEMENT],
                    $statementLName
                )
                    ->say( RdfVocabulary::NS_PROV, 'wasDerivedFrom' )->is(
                        $this->vocabulary->statementNamespaceNames[$entityRepository][RdfVocabulary::NS_REFERENCE],
                        $refLName
                    );
                if ( $this->dedupeBag->alreadySeen( $hash, 'R' ) !== false ) {
                    continue;
                }

                $this->referenceWriter->about(
                    $this->vocabulary->statementNamespaceNames[$entityRepository][RdfVocabulary::NS_REFERENCE],
                    $refLName
                )
                    ->a( RdfVocabulary::NS_ONTOLOGY, 'Reference' );

                foreach ( $reference->getSnaks() as $refSnak ) {
                    $propertyRepository = $this->vocabulary->getEntityRepositoryName( $refSnak->getPropertyId() );
                    $this->snakBuilder->addSnak(
                        $this->referenceWriter,
                        $this->vocabulary->statementNamespaceNames[$entityRepository][RdfVocabulary::NS_VALUE],
                        $refSnak,
                        $this->vocabulary->propertyNamespaceNames[$propertyRepository][RdfVocabulary::NSP_REFERENCE],
                        $refLName
                    );
                }
            }
        }
    }

    /**
     * Adds the given Statement's main Snak to the RDF graph.
     *
     * @param EntityId $entityId
     * @param string $statementLName
     * @param Statement $statement
     * @param bool $isBest Is this best ranked statement?
     */
    private function addMainSnak( EntityId $entityId, $statementLName, Statement $statement, $isBest ) {
        $snak = $statement->getMainSnak();

        $entityLName = $this->vocabulary->getEntityLName( $entityId );
        $entityRepository = $this->vocabulary->getEntityRepositoryName( $entityId );
        $propertyId = $snak->getPropertyId();
        $propertyLName = $this->vocabulary->getEntityLName( $propertyId );
        $propertyRepository = $this->vocabulary->getEntityRepositoryName( $propertyId );

        $this->statementWriter->about(
            $this->vocabulary->entityNamespaceNames[$entityRepository],
            $entityLName
        )
            ->say(
                $this->vocabulary->propertyNamespaceNames[$propertyRepository][RdfVocabulary::NSP_CLAIM],
                $propertyLName
            )
            ->is( $this->vocabulary->statementNamespaceNames[$entityRepository][RdfVocabulary::NS_STATEMENT], $statementLName );

        $this->statementWriter->about(
            $this->vocabulary->statementNamespaceNames[$entityRepository][RdfVocabulary::NS_STATEMENT],
            $statementLName
        )
            ->a( RdfVocabulary::NS_ONTOLOGY, 'Statement' );

        $rank = $statement->getRank();
        if ( isset( RdfVocabulary::RANK_MAP[$rank] ) ) {
            if ( $isBest ) {
                $this->statementWriter->a( RdfVocabulary::NS_ONTOLOGY, RdfVocabulary::WIKIBASE_RANK_BEST );
            }
            $this->statementWriter->about(
                $this->vocabulary->statementNamespaceNames[$entityRepository][RdfVocabulary::NS_STATEMENT],
                $statementLName
            )
                ->say( RdfVocabulary::NS_ONTOLOGY, 'rank' )->is( RdfVocabulary::NS_ONTOLOGY, RdfVocabulary::RANK_MAP[$rank] );
        } else {
            wfLogWarning( "Unknown rank $rank encountered for $entityId:{$statement->getGuid()}" );
        }

        $this->snakBuilder->addSnak(
            $this->statementWriter,
            $this->vocabulary->statementNamespaceNames[$entityRepository][RdfVocabulary::NS_VALUE],
            $snak,
            $this->vocabulary->propertyNamespaceNames[$propertyRepository][RdfVocabulary::NSP_CLAIM_STATEMENT],
            $statementLName
        );
    }

    /**
     * Add fully reified statements for the given entity to the RDF graph.
     * This may include qualifiers and references, depending on calls to
     * setProduceQualifiers() resp. setProduceReferences().
     *
     * @param EntityDocument $entity the entity to output.
     */
    public function addEntity( EntityDocument $entity ) {
        $entityId = $entity->getId();

        if ( $entity instanceof StatementListProvider ) {
            $this->addStatements( $entityId, $entity->getStatements() );
        }
    }

}