wikimedia/mediawiki-extensions-Wikibase

View on GitHub
lib/includes/Store/Sql/Terms/DatabaseMatchingTermsLookup.php

Summary

Maintainability
B
6 hrs
Test Coverage
<?php

declare( strict_types = 1 );

namespace Wikibase\Lib\Store\Sql\Terms;

use InvalidArgumentException;
use MediaWiki\Storage\NameTableAccessException;
use Psr\Log\LoggerInterface;
use Wikibase\DataModel\Entity\EntityId;
use Wikibase\DataModel\Services\EntityId\EntityIdComposer;
use Wikibase\Lib\Rdbms\RepoDomainDb;
use Wikibase\Lib\Store\MatchingTermsLookup;
use Wikibase\Lib\Store\Sql\Terms\Util\StatsdMonitoring;
use Wikibase\Lib\Store\TermIndexSearchCriteria;
use Wikibase\Lib\TermIndexEntry;
use Wikimedia\Rdbms\FakeResultWrapper;
use Wikimedia\Rdbms\IExpression;
use Wikimedia\Rdbms\IReadableDatabase;
use Wikimedia\Rdbms\IResultWrapper;
use Wikimedia\Rdbms\LikeValue;

/**
 * MatchingTermsLookup implementation in the new term store. Mostly used for search.
 *
 * @see @ref docs_storage_terms
 * @license GPL-2.0-or-later
 */
class DatabaseMatchingTermsLookup implements MatchingTermsLookup {

    use StatsdMonitoring;

    private RepoDomainDb $repoDb;

    private LoggerInterface $logger;

    private TypeIdsAcquirer $typeIdsAcquirer;

    private TypeIdsResolver $typeIdsResolver;

    private EntityIdComposer $entityIdComposer;

    public function __construct(
        RepoDomainDb $repoDb,
        TypeIdsAcquirer $typeIdsAcquirer,
        TypeIdsResolver $typeIdsResolver,
        EntityIdComposer $entityIdComposer,
        LoggerInterface $logger
    ) {
        $this->repoDb = $repoDb;
        $this->typeIdsAcquirer = $typeIdsAcquirer;
        $this->typeIdsResolver = $typeIdsResolver;
        $this->entityIdComposer = $entityIdComposer;
        $this->logger = $logger;
    }

    /**
     * @inheritDoc
     */
    public function getMatchingTerms(
        array $criteria,
        $termType = null,
        $entityType = null,
        array $options = []
    ) {
        if ( !$criteria ) {
            return [];
        }

        $dbr = $this->getDbr();

        $results = $this->criteriaToQueryResults( $dbr, $criteria, $termType, $entityType, $options );

        $this->incrementForQuery( 'MatchingTermsLookup_getMatchingTerms' );

        if ( isset( $options['LIMIT'] ) && $options['LIMIT'] > 0 ) {
            return $this->buildTermResult( $results, $options['LIMIT'] );
        } else {
            return $this->buildTermResult( $results );
        }
    }

    /**
     * @param IReadableDatabase $dbr Used for query construction and selects
     * @param TermIndexSearchCriteria[] $criteria
     * @param string|string[]|null $termType
     * @param string|string[]|null $entityType
     * @param array $options
     *
     * @return IResultWrapper[]
     */
    private function criteriaToQueryResults(
        IReadableDatabase $dbr,
        array $criteria,
        $termType = null,
        $entityType = null,
        array $options = []
    ): array {
        $termQueries = [];

        foreach ( $criteria as $mask ) {
            if ( $entityType === null ) {
                $termQueries[] = $this->getTermMatchQueries( $dbr, $mask, 'item', $termType, $options );
                $termQueries[] = $this->getTermMatchQueries( $dbr, $mask, 'property', $termType, $options );
            } elseif ( is_array( $entityType ) ) {
                foreach ( $entityType as $entityTypeCase ) {
                    $termQueries[] = $this->getTermMatchQueries( $dbr, $mask, $entityTypeCase, $termType, $options );
                }
            } else {
                $termQueries[] = $this->getTermMatchQueries( $dbr, $mask, $entityType, $termType, $options );
            }
        }

        return $termQueries;
    }

    /**
     * @param IReadableDatabase $dbr Used for query construction and selects
     * @param TermIndexSearchCriteria $mask
     * @param string $entityType
     * @param string|string[]|null $termType
     * @param array $options
     * @return IResultWrapper
     */
    private function getTermMatchQueries(
        IReadableDatabase $dbr,
        TermIndexSearchCriteria $mask,
        string $entityType,
        $termType = null,
        array $options = []
    ): IResultWrapper {
        $options = array_merge(
            [
                'caseSensitive' => true,
                'prefixSearch' => false,
            ],
            $options
        );
        // TODO: Fix case insensitive: T242644

        $queryBuilder = $dbr->newSelectQueryBuilder();

        $queryBuilder->select( [ 'wbtl_id', 'wbtl_type_id', 'wbxl_language', 'wbx_text' ] );

        if ( $entityType === 'item' ) {
            $queryBuilder->select( 'wbit_item_id' )
                ->from( 'wbt_item_terms' )
                ->join( 'wbt_term_in_lang', null, 'wbit_term_in_lang_id=wbtl_id' );
        } elseif ( $entityType === 'property' ) {
            $queryBuilder->select( 'wbpt_property_id' )
                ->from( 'wbt_property_terms' )
                ->join( 'wbt_term_in_lang', null, 'wbpt_term_in_lang_id=wbtl_id' );
        } else {
            throw new InvalidArgumentException( 'Unknown entity type for search: ' . $entityType );
        }

        $queryBuilder->join( 'wbt_text_in_lang', null, 'wbtl_text_in_lang_id=wbxl_id' )
            ->join( 'wbt_text', null, 'wbxl_text_id=wbx_id' );

        $language = $mask->getLanguage();
        if ( $language !== null ) {
            $queryBuilder->where( [ 'wbxl_language' => $language ] );
        }

        $text = $mask->getText();
        if ( $text !== null ) {
            if ( $options['prefixSearch'] ) {
                $queryBuilder->where( $dbr->expr(
                    'wbx_text',
                    IExpression::LIKE,
                    new LikeValue( $text, $dbr->anyString() )
                ) );
            } else {
                $queryBuilder->where( [ 'wbx_text' => $text ] );
            }
        }

        if ( $mask->getTermType() !== null ) {
            $termType = $mask->getTermType();
        }
        if ( $termType !== null ) {
            try {
                $queryBuilder->where( [
                    'wbtl_type_id' => $this->typeIdsAcquirer->acquireTypeIds( [ $termType ] )[$termType],
                ] );
            } catch ( NameTableAccessException $e ) {
                // Edge case: attempting to do a term lookup before the first insert of the respective term type. Unlikely to happen in
                // production, but annoying/confusing if it happens in tests.
                return new FakeResultWrapper( [] );
            }
        }

        if ( isset( $options['LIMIT'] ) && $options['LIMIT'] > 0 ) {
            // @phan-suppress-next-line PhanTypeMismatchArgument False positive
            $queryBuilder->limit( $options['LIMIT'] );
        }

        return $queryBuilder->caller( __METHOD__ )->fetchResultSet();
    }

    /**
     * Modifies the provided terms to use the field names expected by the interface
     * rather then the table field names. Also ensures the values are of the correct type.
     *
     * @param IResultWrapper[] $results
     * @param int|null $limit
     * @return TermIndexEntry[]
     */
    private function buildTermResult( array $results, ?int $limit = null ): array {
        $matchingTerms = [];
        // Union in SQL doesn't have limit, we need to enforce it here
        $counter = 0;

        foreach ( $results as $result ) {
            foreach ( $result as $obtainedTerm ) {
                $counter += 1;
                $typeId = (int)$obtainedTerm->wbtl_type_id;
                $matchingTerms[] = new TermIndexEntry( [
                    'entityId' => $this->getEntityId( $obtainedTerm ),
                    'termType' => $this->typeIdsResolver->resolveTypeIds( [ $typeId ] )[$typeId],
                    'termLanguage' => $obtainedTerm->wbxl_language,
                    'termText' => $obtainedTerm->wbx_text,
                ] );

                if ( $counter === $limit ) {
                    return $matchingTerms;
                }
            }
        }

        return $matchingTerms;
    }

    private function getEntityId( object $termRow ): ?EntityId {
        if ( isset( $termRow->wbpt_property_id ) ) {
            return $this->entityIdComposer->composeEntityId(
                'property', $termRow->wbpt_property_id
            );
        } elseif ( isset( $termRow->wbit_item_id ) ) {
            return $this->entityIdComposer->composeEntityId(
                'item', $termRow->wbit_item_id
            );
        } else {
            return null;
        }
    }

    private function getDbr(): IReadableDatabase {
        return $this->repoDb->connections()->getReadConnection();
    }
}