includes/deferred/LinksUpdate/LinksTable.php
<?php
namespace MediaWiki\Deferred\LinksUpdate;
use InvalidArgumentException;
use MediaWiki\Linker\LinkTargetLookup;
use MediaWiki\Page\PageIdentity;
use MediaWiki\Page\PageReference;
use MediaWiki\Parser\ParserOutput;
use MediaWiki\Revision\RevisionRecord;
use Wikimedia\Rdbms\IDatabase;
use Wikimedia\Rdbms\IResultWrapper;
use Wikimedia\Rdbms\LBFactory;
/**
* The base class for classes which update a single link table.
*
* A LinksTable object is a container for new and existing link sets outbound
* from a single page, and an abstraction of the associated DB schema. The
* object stores state related to an update of the outbound links of a page.
*
* Explanation of link ID concept
* ------------------------------
*
* Link IDs identify a link in the new or old state, or in the change arrays.
* They are opaque to the base class and are type-hinted here as mixed.
*
* Conventionally, the link ID is string|string[] and contains the link target
* fields.
*
* The link ID should contain enough information so that the base class can
* tell whether an existing link is in the new set, or vice versa, for the
* purposes of incremental updates. If a change to a field would cause a DB
* update, the field should be in the link ID.
*
* For example, a change to cl_timestamp does not trigger an update, so
* cl_timestamp is not in the link ID.
*
* @stable to extend
* @since 1.38
*/
abstract class LinksTable {
/** Link type: Inserted (added) links */
public const INSERTED = 1;
/** Link type: Deleted (removed) links */
public const DELETED = 2;
/** Link type: Changed (inserted or removed) links */
public const CHANGED = 3;
/** Link type: existing/old links */
public const OLD = 4;
/** Link type: new links (from the ParserOutput) */
public const NEW = 5;
/**
* Rows to delete. An array of associative arrays, each associative array
* being the conditions for a delete query. Common conditions should be
* leftmost in the associative array so that they can be factored out.
*
* @var array
*/
protected $rowsToDelete = [];
/**
* Rows to insert. An array of associative arrays, each associative array
* mapping field names to values.
*
* @var array
*/
protected $rowsToInsert = [];
/** @var array Link IDs for inserted links */
protected $insertedLinks = [];
/** @var array Link IDs for deleted links */
protected $deletedLinks = [];
/** @var LBFactory */
private $lbFactory;
/** @var LinkTargetLookup */
protected $linkTargetLookup;
/** @var IDatabase */
private $db;
/** @var PageIdentity */
private $sourcePage;
/** @var PageReference|null */
private $movedPage;
/** @var int */
private $batchSize;
/** @var mixed */
private $ticket;
/** @var RevisionRecord */
private $revision;
/** @var bool */
protected $strictTestMode;
/**
* This is called by the factory to inject dependencies for the base class.
* This is used instead of the constructor so that changes can be made to
* the injected parameters without breaking the subclass constructors.
*
* @param LBFactory $lbFactory
* @param LinkTargetLookup $linkTargetLookup
* @param PageIdentity $sourcePage
* @param int $batchSize
*/
final public function injectBaseDependencies(
LBFactory $lbFactory,
LinkTargetLookup $linkTargetLookup,
PageIdentity $sourcePage,
$batchSize
) {
$this->lbFactory = $lbFactory;
$this->db = $this->lbFactory->getPrimaryDatabase();
$this->sourcePage = $sourcePage;
$this->batchSize = $batchSize;
$this->linkTargetLookup = $linkTargetLookup;
}
/**
* Set the empty transaction ticket
*
* @param mixed $ticket
*/
public function setTransactionTicket( $ticket ) {
$this->ticket = $ticket;
}
/**
* Set the revision associated with the edit.
*
* @param RevisionRecord $revision
*/
public function setRevision( RevisionRecord $revision ) {
$this->revision = $revision;
}
/**
* Notify the object that the operation is a page move, and set the
* original title.
*
* @param PageReference $movedPage
*/
public function setMoveDetails( PageReference $movedPage ) {
$this->movedPage = $movedPage;
}
/**
* Subclasses should implement this to extract the data they need from the
* ParserOutput.
*
* To support a future refactor of LinksDeletionUpdate, if this method is
* not called, the subclass should assume that the new state is empty.
*
* @param ParserOutput $parserOutput
*/
abstract public function setParserOutput( ParserOutput $parserOutput );
/**
* Get the table name.
*
* @return string
*/
abstract protected function getTableName();
/**
* Get the name of the field which links to page_id.
*
* @return string
*/
abstract protected function getFromField();
/**
* Get the fields to be used in fetchExistingRows(). Note that
* fetchExistingRows() is just a helper for subclasses. The value returned
* here is effectively private to the subclass.
*
* @return array
*/
abstract protected function getExistingFields();
/**
* Get an array (or iterator) of link IDs for the new state.
*
* See the LinksTable doc comment for an explanation of link IDs.
*
* @return iterable<mixed>
*/
abstract protected function getNewLinkIDs();
/**
* Get an array (or iterator) of link IDs for the existing state. The
* subclass should load the data from the database. There is
* fetchExistingRows() to make this easier but the subclass is responsible
* for caching.
*
* See the LinksTable doc comment for an explanation of link IDs.
*
* @return iterable<mixed>
*/
abstract protected function getExistingLinkIDs();
/**
* Determine whether a link (from the new set) is in the existing set.
*
* @param mixed $linkId
* @return bool
*/
abstract protected function isExisting( $linkId );
/**
* Determine whether a link (from the existing set) is in the new set.
*
* @param mixed $linkId
* @return bool
*/
abstract protected function isInNewSet( $linkId );
/**
* Insert a link identified by ID. The subclass is expected to queue the
* insertion by calling insertRow().
*
* @param mixed $linkId
*/
abstract protected function insertLink( $linkId );
/**
* Delete a link identified by ID. The subclass is expected to queue the
* deletion by calling deleteRow().
*
* @param mixed $linkId
*/
abstract protected function deleteLink( $linkId );
/**
* Subclasses can override this to return true in order to force
* reinsertion of all the links due to some property of the link
* changing for reasons not represented by the link ID.
*
* @return bool
*/
protected function needForcedLinkRefresh() {
return false;
}
/**
* @stable to override
* @return IDatabase
*/
protected function getDB(): IDatabase {
return $this->db;
}
/**
* @return LBFactory
*/
protected function getLBFactory(): LBFactory {
return $this->lbFactory;
}
/**
* Get the page_id of the source page
*
* @return int
*/
protected function getSourcePageId(): int {
return $this->sourcePage->getId();
}
/**
* Get the source page, i.e. the page which is being updated and is the
* source of links.
*
* @return PageIdentity
*/
protected function getSourcePage(): PageIdentity {
return $this->sourcePage;
}
/**
* Determine whether the page was moved
*
* @return bool
*/
protected function isMove() {
return $this->movedPage !== null;
}
/**
* Determine whether the page was moved to a different namespace.
*
* @return bool
*/
protected function isCrossNamespaceMove() {
return $this->movedPage !== null
&& $this->sourcePage->getNamespace() !== $this->movedPage->getNamespace();
}
/**
* Assuming the page was moved, get the original page title before the move.
* This will throw an exception if the page wasn't moved.
*
* @return PageReference
*/
protected function getMovedPage(): PageReference {
return $this->movedPage;
}
/**
* Get the maximum number of rows to update in a batch.
*
* @return int
*/
protected function getBatchSize(): int {
return $this->batchSize;
}
/**
* Get the empty transaction ticket, or null if there is none.
*
* @return mixed
*/
protected function getTransactionTicket() {
return $this->ticket;
}
/**
* Get the RevisionRecord of the new revision, if the LinksUpdate caller
* injected one.
*
* @return RevisionRecord|null
*/
protected function getRevision(): ?RevisionRecord {
return $this->revision;
}
/**
* Get field=>value associative array for the from field(s)
*
* @stable to override
* @return array
*/
protected function getFromConds() {
return [ $this->getFromField() => $this->getSourcePageId() ];
}
/**
* Do a select query to fetch the existing rows. This is a helper for
* subclasses.
*
* @return IResultWrapper
*/
protected function fetchExistingRows(): IResultWrapper {
return $this->getDB()->newSelectQueryBuilder()
->select( $this->getExistingFields() )
->from( $this->getTableName() )
->where( $this->getFromConds() )
->caller( __METHOD__ )
->fetchResultSet();
}
/**
* Execute an edit/delete update
*/
final public function update() {
$this->startUpdate();
$force = $this->needForcedLinkRefresh();
foreach ( $this->getNewLinkIDs() as $link ) {
if ( $force || !$this->isExisting( $link ) ) {
$this->insertLink( $link );
$this->insertedLinks[] = $link;
}
}
foreach ( $this->getExistingLinkIDs() as $link ) {
if ( $force || !$this->isInNewSet( $link ) ) {
$this->deleteLink( $link );
$this->deletedLinks[] = $link;
}
}
$this->doWrites();
$this->finishUpdate();
}
/**
* Queue a row for insertion. Subclasses are expected to call this from
* insertLink(). The "from" field should not be included in the row.
*
* @param array $row Associative array mapping fields to values.
*/
protected function insertRow( $row ) {
$row += $this->getFromConds();
$this->rowsToInsert[] = $row;
}
/**
* Queue a deletion operation. Subclasses are expected to call this from
* deleteLink(). The "from" field does not need to be included in the
* conditions.
*
* Most often, the conditions match a single row, but this is not required.
*
* @param array $conds Associative array mapping fields to values,
* specifying the conditions for a delete query.
*/
protected function deleteRow( $conds ) {
// Put the "from" field leftmost, so it can be factored out
$conds = $this->getFromConds() + $conds;
$this->rowsToDelete[] = $conds;
}
/**
* Subclasses can override this to do any necessary setup before the lock
* is acquired.
*
* @stable to override
*/
public function beforeLock() {
}
/**
* Subclasses can override this to do any necessary setup before individual
* write operations begin.
*
* @stable to override
*/
protected function startUpdate() {
}
/**
* Subclasses can override this to do any updates associated with their
* link data, for example dispatching HTML update jobs.
*
* @stable to override
*/
protected function finishUpdate() {
}
/**
* Do the common DB operations
*/
protected function doWrites() {
$db = $this->getDB();
$table = $this->getTableName();
$domainId = $db->getDomainID();
$batchSize = $this->getBatchSize();
$ticket = $this->getTransactionTicket();
$deleteBatches = array_chunk( $this->rowsToDelete, $batchSize );
foreach ( $deleteBatches as $chunk ) {
$db->newDeleteQueryBuilder()
->deleteFrom( $table )
->where( $db->factorConds( $chunk ) )
->caller( __METHOD__ )->execute();
if ( count( $deleteBatches ) > 1 ) {
$this->lbFactory->commitAndWaitForReplication( __METHOD__, $ticket );
}
}
$insertBatches = array_chunk( $this->rowsToInsert, $batchSize );
foreach ( $insertBatches as $insertBatch ) {
$db->newInsertQueryBuilder()
->options( $this->getInsertOptions() )
->insertInto( $table )
->rows( $insertBatch )
->caller( __METHOD__ )->execute();
if ( count( $insertBatches ) > 1 ) {
$this->lbFactory->commitAndWaitForReplication( __METHOD__, $ticket );
}
}
}
/**
* Omit conflict resolution options from the insert query so that testing
* can confirm that the incremental update logic was correct.
*
* @param bool $mode
*/
public function setStrictTestMode( $mode = true ) {
$this->strictTestMode = $mode;
}
/**
* Get the options for the insert queries
*
* @return array
*/
protected function getInsertOptions() {
if ( $this->strictTestMode ) {
return [];
} else {
return [ 'IGNORE' ];
}
}
/**
* Get an array or iterator of link IDs of a given type. Some subclasses
* use this to provide typed data to callers. This is not public because
* link IDs are a private concept.
*
* @param int $setType One of the class constants: self::INSERTED, self::DELETED,
* self::CHANGED, self::OLD or self::NEW.
* @return iterable<mixed>
*/
protected function getLinkIDs( $setType ) {
switch ( $setType ) {
case self::INSERTED:
return $this->insertedLinks;
case self::DELETED:
return $this->deletedLinks;
case self::CHANGED:
return array_merge( $this->insertedLinks, $this->deletedLinks );
case self::OLD:
return $this->getExistingLinkIDs();
case self::NEW:
return $this->getNewLinkIDs();
default:
throw new InvalidArgumentException( __METHOD__ . ": Unknown link type" );
}
}
/**
* Normalization stage of the links table (see T222224)
* @return int
*/
protected function linksTargetNormalizationStage(): int {
return SCHEMA_COMPAT_OLD;
}
}