wikimedia/mediawiki-core

View on GitHub
includes/deferred/LinksUpdate/CategoryLinksTable.php

Summary

Maintainability
B
6 hrs
Test Coverage
<?php

namespace MediaWiki\Deferred\LinksUpdate;

use Collation;
use MediaWiki\DAO\WikiAwareEntity;
use MediaWiki\Languages\LanguageConverterFactory;
use MediaWiki\Page\PageReferenceValue;
use MediaWiki\Page\WikiPageFactory;
use MediaWiki\Parser\ParserOutput;
use MediaWiki\Parser\Sanitizer;
use MediaWiki\Title\NamespaceInfo;
use MediaWiki\Title\Title;
use PurgeJobUtils;

/**
 * categorylinks
 *
 * Link ID format: string[]
 *   - 0: Category name
 *   - 1: User-specified sort key (cl_sortkey_prefix)
 *
 * @since 1.38
 */
class CategoryLinksTable extends TitleLinksTable {
    /**
     * @var array Associative array of new links, with the category name in the
     *   key. The value is a list consisting of the sort key prefix and the sort
     *   key.
     */
    private $newLinks = [];

    /**
     * @var array|null Associative array of existing links, or null if it has
     *   not been loaded yet
     */
    private $existingLinks;

    /**
     * @var array Associative array of saved timestamps, if there is a force
     *   refresh due to a page move
     */
    private $savedTimestamps = null;

    /** @var \ILanguageConverter */
    private $languageConverter;

    /** @var \Collation */
    private $collation;

    /** @var string The collation name for cl_collation */
    private $collationName;

    /** @var string The table name */
    private $tableName = 'categorylinks';

    /** @var bool */
    private $isTempTable;

    /** @var string The category type, which depends on the source page */
    private $categoryType;

    /** @var NamespaceInfo */
    private $namespaceInfo;

    /** @var WikiPageFactory */
    private $wikiPageFactory;

    /**
     * @param LanguageConverterFactory $converterFactory
     * @param NamespaceInfo $namespaceInfo
     * @param WikiPageFactory $wikiPageFactory
     * @param Collation $collation
     * @param string $collationName
     * @param string $tableName
     * @param bool $isTempTable
     */
    public function __construct(
        LanguageConverterFactory $converterFactory,
        NamespaceInfo $namespaceInfo,
        WikiPageFactory $wikiPageFactory,
        Collation $collation,
        $collationName,
        $tableName,
        $isTempTable
    ) {
        $this->languageConverter = $converterFactory->getLanguageConverter();
        $this->namespaceInfo = $namespaceInfo;
        $this->wikiPageFactory = $wikiPageFactory;
        $this->collation = $collation;
        $this->collationName = $collationName;
        $this->tableName = $tableName;
        $this->isTempTable = $isTempTable;
    }

    /**
     * Cache the category type after the source page has been set
     */
    public function startUpdate() {
        $this->categoryType = $this->namespaceInfo
            ->getCategoryLinkType( $this->getSourcePage()->getNamespace() );
    }

    public function setParserOutput( ParserOutput $parserOutput ) {
        $this->newLinks = [];
        $sourceTitle = Title::castFromPageIdentity( $this->getSourcePage() );
        $sortKeyInputs = [];
        foreach ( $parserOutput->getCategoryNames() as $name ) {
            $sortKey = $parserOutput->getCategorySortKey( $name );
            '@phan-var string $sortKey'; // sort key will never be null

            if ( $sortKey == '' ) {
                $sortKey = $parserOutput->getPageProperty( "defaultsort" ) ?? '';
            }
            $sortKey = $this->languageConverter->convertCategoryKey( $sortKey );

            // Clean up the sort key, regardless of source
            $sortKey = Sanitizer::decodeCharReferences( $sortKey );
            $sortKey = str_replace( "\n", '', $sortKey );

            // If the sort key is longer then 255 bytes, it is truncated by DB,
            // and then doesn't match when comparing existing vs current
            // categories, causing T27254.
            $sortKeyPrefix = mb_strcut( $sortKey, 0, 255 );

            $targetTitle = Title::makeTitle( NS_CATEGORY, $name );
            $this->languageConverter->findVariantLink( $name, $targetTitle, true );
            // Ignore the returned text, DB key should be used for links (T328477).
            $name = $targetTitle->getDBKey();

            // Treat custom sort keys as a prefix, so that if multiple
            // things are forced to sort as '*' or something, they'll
            // sort properly in the category rather than in page_id
            // order or such.
            $sortKeyInputs[$name] = $sourceTitle->getCategorySortkey( $sortKeyPrefix );
            $this->newLinks[$name] = [ $sortKeyPrefix ];
        }
        $sortKeys = $this->collation->getSortKeys( $sortKeyInputs );
        foreach ( $sortKeys as $name => $sortKey ) {
            $this->newLinks[$name][1] = $sortKey;
        }
    }

    protected function getTableName() {
        return $this->tableName;
    }

    protected function getFromField() {
        return 'cl_from';
    }

    protected function getExistingFields() {
        $fields = [ 'cl_to', 'cl_sortkey_prefix' ];
        if ( $this->needForcedLinkRefresh() ) {
            $fields[] = 'cl_timestamp';
        }
        return $fields;
    }

    /**
     * Get the new link IDs. The link ID is a list with the name in the first
     * element and the sort key prefix in the second element.
     *
     * @return iterable<array>
     */
    protected function getNewLinkIDs() {
        foreach ( $this->newLinks as $name => [ $prefix, ] ) {
            yield [ (string)$name, $prefix ];
        }
    }

    /**
     * Get the existing links from the database
     */
    private function fetchExistingLinks() {
        $this->existingLinks = [];
        $this->savedTimestamps = [];
        $force = $this->needForcedLinkRefresh();
        foreach ( $this->fetchExistingRows() as $row ) {
            $this->existingLinks[$row->cl_to] = $row->cl_sortkey_prefix;
            if ( $force ) {
                $this->savedTimestamps[$row->cl_to] = $row->cl_timestamp;
            }
        }
    }

    /**
     * Get the existing links as an associative array, with the category name
     * in the key and the sort key prefix in the value.
     *
     * @return array
     */
    private function getExistingLinks() {
        if ( $this->existingLinks === null ) {
            $this->fetchExistingLinks();
        }
        return $this->existingLinks;
    }

    private function getSavedTimestamps() {
        if ( $this->savedTimestamps === null ) {
            $this->fetchExistingLinks();
        }
        return $this->savedTimestamps;
    }

    /**
     * @return \Generator
     */
    protected function getExistingLinkIDs() {
        foreach ( $this->getExistingLinks() as $name => $sortkey ) {
            yield [ (string)$name, $sortkey ];
        }
    }

    protected function isExisting( $linkId ) {
        $links = $this->getExistingLinks();
        [ $name, $prefix ] = $linkId;
        return \array_key_exists( $name, $links ) && $links[$name] === $prefix;
    }

    protected function isInNewSet( $linkId ) {
        [ $name, $prefix ] = $linkId;
        return \array_key_exists( $name, $this->newLinks )
            && $this->newLinks[$name][0] === $prefix;
    }

    protected function insertLink( $linkId ) {
        [ $name, $prefix ] = $linkId;
        $sortKey = $this->newLinks[$name][1];
        $savedTimestamps = $this->getSavedTimestamps();

        // Preserve cl_timestamp in the case of a forced refresh
        $timestamp = $this->getDB()->timestamp( $savedTimestamps[$name] ?? 0 );

        $this->insertRow( [
            'cl_to' => $name,
            'cl_sortkey' => $sortKey,
            'cl_timestamp' => $timestamp,
            'cl_sortkey_prefix' => $prefix,
            'cl_collation' => $this->collationName,
            'cl_type' => $this->categoryType,
        ] );
    }

    protected function deleteLink( $linkId ) {
        $this->deleteRow( [ 'cl_to' => $linkId[0] ] );
    }

    protected function needForcedLinkRefresh() {
        // cl_sortkey and possibly cl_type will change if it is a page move
        return $this->isMove();
    }

    protected function makePageReferenceValue( $linkId ): PageReferenceValue {
        return new PageReferenceValue( NS_CATEGORY, $linkId[0], WikiAwareEntity::LOCAL );
    }

    protected function makeTitle( $linkId ): Title {
        return Title::makeTitle( NS_CATEGORY, $linkId[0] );
    }

    protected function deduplicateLinkIds( $linkIds ) {
        $seen = [];
        foreach ( $linkIds as $linkId ) {
            if ( !\array_key_exists( $linkId[0], $seen ) ) {
                $seen[$linkId[0]] = true;
                yield $linkId;
            }
        }
    }

    protected function finishUpdate() {
        if ( $this->isTempTable ) {
            // Don't do invalidations for temporary collations
            return;
        }

        // A update of sortkey on move is detected as insert + delete,
        // but the categories does not need to update the counters or invalidate caches
        $allInsertedLinks = array_column( $this->insertedLinks, 0 );
        $allDeletedLinks = array_column( $this->deletedLinks, 0 );
        $insertedLinks = array_diff( $allInsertedLinks, $allDeletedLinks );
        $deletedLinks = array_diff( $allDeletedLinks, $allInsertedLinks );

        $this->invalidateCategories( $insertedLinks, $deletedLinks );
        $this->updateCategoryCounts( $insertedLinks, $deletedLinks );
    }

    private function invalidateCategories( array $insertedLinks, array $deletedLinks ) {
        $changedCategoryNames = array_merge(
            $insertedLinks,
            $deletedLinks
        );
        PurgeJobUtils::invalidatePages(
            $this->getDB(), NS_CATEGORY, $changedCategoryNames );
    }

    /**
     * Update all the appropriate counts in the category table.
     * @param array $insertedLinks
     * @param array $deletedLinks
     */
    private function updateCategoryCounts( array $insertedLinks, array $deletedLinks ) {
        if ( !$insertedLinks && !$deletedLinks ) {
            return;
        }

        $domainId = $this->getDB()->getDomainID();
        $wp = $this->wikiPageFactory->newFromTitle( $this->getSourcePage() );
        $lbf = $this->getLBFactory();
        $size = $this->getBatchSize();
        // T163801: try to release any row locks to reduce contention
        $lbf->commitAndWaitForReplication( __METHOD__, $this->getTransactionTicket() );

        if ( count( $insertedLinks ) + count( $deletedLinks ) < $size ) {
            $wp->updateCategoryCounts(
                $insertedLinks,
                $deletedLinks,
                $this->getSourcePageId()
            );
            $lbf->commitAndWaitForReplication( __METHOD__, $this->getTransactionTicket() );
        } else {
            $addedChunks = array_chunk( $insertedLinks, $size );
            foreach ( $addedChunks as $chunk ) {
                $wp->updateCategoryCounts( $chunk, [], $this->getSourcePageId() );
                if ( count( $addedChunks ) > 1 ) {
                    $lbf->commitAndWaitForReplication( __METHOD__, $this->getTransactionTicket() );
                }
            }

            $deletedChunks = array_chunk( $deletedLinks, $size );
            foreach ( $deletedChunks as $chunk ) {
                $wp->updateCategoryCounts( [], $chunk, $this->getSourcePageId() );
                if ( count( $deletedChunks ) > 1 ) {
                    $lbf->commitAndWaitForReplication( __METHOD__, $this->getTransactionTicket() );
                }
            }

        }
    }
}