wikimedia/mediawiki-core

View on GitHub
includes/Revision/SlotRecord.php

Summary

Maintainability
C
1 day
Test Coverage
<?php
/**
 * Value object representing a content slot associated with a page revision.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 * http://www.gnu.org/copyleft/gpl.html
 *
 * @file
 */

namespace MediaWiki\Revision;

use InvalidArgumentException;
use LogicException;
use MediaWiki\Content\Content;
use OutOfBoundsException;
use Wikimedia\Assert\Assert;
use Wikimedia\NonSerializable\NonSerializableTrait;

/**
 * Value object representing a content slot associated with a page revision.
 * SlotRecord provides direct access to a Content object.
 * That access may be implemented through a callback.
 *
 * @since 1.31
 * @since 1.32 Renamed from MediaWiki\Storage\SlotRecord
 */
class SlotRecord {
    use NonSerializableTrait;

    public const MAIN = 'main';

    /**
     * @var \stdClass database result row, as a raw object. Callbacks are supported for field values,
     *      to enable on-demand emulation of these values. This is primarily intended for use
     *      during schema migration.
     */
    private $row;

    /**
     * @var Content|callable
     */
    private $content;

    /**
     * @var bool
     */
    private $derived;

    /**
     * Returns a new SlotRecord just like the given $slot, except that calling getContent()
     * will fail with an exception.
     *
     * @param SlotRecord $slot
     *
     * @return SlotRecord
     */
    public static function newWithSuppressedContent( SlotRecord $slot ) {
        $row = $slot->row;

        return new SlotRecord(
            $row,
            /**
             * @return never
             */
            static function () {
                throw new SuppressedDataException( 'Content suppressed!' );
            }
        );
    }

    /**
     * Returns a SlotRecord for a derived slot.
     *
     * @param string $role
     * @param Content $content Initial content
     *
     * @return SlotRecord
     * @since 1.36
     */
    public static function newDerived( string $role, Content $content ) {
        return self::newUnsaved( $role, $content, true );
    }

    /**
     * Constructs a new SlotRecord from an existing SlotRecord, overriding some fields.
     * The slot's content cannot be overwritten.
     *
     * @param SlotRecord $slot
     * @param array $overrides
     *
     * @return SlotRecord
     */
    private static function newFromSlotRecord( SlotRecord $slot, array $overrides = [] ) {
        $row = clone $slot->row;
        $row->slot_id = null; // never copy the row ID!

        foreach ( $overrides as $key => $value ) {
            $row->$key = $value;
        }

        return new SlotRecord( $row, $slot->content, $slot->isDerived() );
    }

    /**
     * Constructs a new SlotRecord for a new revision, inheriting the content of the given SlotRecord
     * of a previous revision.
     *
     * Note that a SlotRecord constructed this way are intended as prototypes,
     * to be used wit newSaved(). They are incomplete, so some getters such as
     * getRevision() will fail.
     *
     * @param SlotRecord $slot
     *
     * @return SlotRecord
     */
    public static function newInherited( SlotRecord $slot ) {
        // We can't inherit from a Slot that's not attached to a revision.
        $slot->getRevision();
        $slot->getOrigin();
        $slot->getAddress();

        // NOTE: slot_origin and content_address are copied from $slot.
        return self::newFromSlotRecord( $slot, [
            'slot_revision_id' => null,
        ] );
    }

    /**
     * Constructs a new Slot from a Content object for a new revision.
     * This is the preferred way to construct a slot for storing Content that
     * resulted from a user edit. The slot is assumed to be not inherited.
     *
     * Note that a SlotRecord constructed this way are intended as prototypes,
     * to be used wit newSaved(). They are incomplete, so some getters such as
     * getAddress() will fail.
     *
     * @param string $role
     * @param Content $content
     * @param bool $derived
     * @return SlotRecord An incomplete proto-slot object, to be used with newSaved() later.
     */
    public static function newUnsaved( string $role, Content $content, bool $derived = false ) {
        $row = [
            'slot_id' => null, // not yet known
            'slot_revision_id' => null, // not yet known
            'slot_origin' => null, // not yet known, will be set in newSaved()
            'content_size' => null, // compute later
            'content_sha1' => null, // compute later
            'slot_content_id' => null, // not yet known, will be set in newSaved()
            'content_address' => null, // not yet known, will be set in newSaved()
            'role_name' => $role,
            'model_name' => $content->getModel(),
        ];

        return new SlotRecord( (object)$row, $content, $derived );
    }

    /**
     * Constructs a complete SlotRecord for a newly saved revision, based on the incomplete
     * proto-slot. This adds information that has only become available during saving,
     * particularly the revision ID, content ID and content address.
     *
     * @param int $revisionId the revision the slot is to be associated with (field slot_revision_id).
     *        If $protoSlot already has a revision, it must be the same.
     * @param int|null $contentId the ID of the row in the content table describing the content
     *        referenced by $contentAddress (field slot_content_id).
     *        If $protoSlot already has a content ID, it must be the same.
     * @param string $contentAddress the slot's content address (field content_address).
     *        If $protoSlot already has an address, it must be the same.
     * @param SlotRecord $protoSlot The proto-slot that was provided as input for creating a new
     *        revision. $protoSlot must have a content address if inherited.
     *
     * @return SlotRecord If the state of $protoSlot is inappropriate for saving a new revision.
     */
    public static function newSaved(
        int $revisionId,
        ?int $contentId,
        string $contentAddress,
        SlotRecord $protoSlot
    ) {
        if ( $protoSlot->hasRevision() && $protoSlot->getRevision() !== $revisionId ) {
            throw new LogicException(
                "Mismatching revision ID $revisionId: "
                . "The slot already belongs to revision {$protoSlot->getRevision()}. "
                . "Use SlotRecord::newInherited() to re-use content between revisions."
            );
        }

        if ( $protoSlot->hasAddress() && $protoSlot->getAddress() !== $contentAddress ) {
            throw new LogicException(
                "Mismatching blob address $contentAddress: "
                . "The slot already has content at {$protoSlot->getAddress()}."
            );
        }

        if ( $protoSlot->hasContentId() && $protoSlot->getContentId() !== $contentId ) {
            throw new LogicException(
                "Mismatching content ID $contentId: "
                . "The slot already has content row {$protoSlot->getContentId()} associated."
            );
        }

        if ( $protoSlot->isInherited() ) {
            if ( !$protoSlot->hasAddress() ) {
                throw new InvalidArgumentException(
                    "An inherited blob should have a content address!"
                );
            }
            if ( !$protoSlot->hasField( 'slot_origin' ) ) {
                throw new InvalidArgumentException(
                    "A saved inherited slot should have an origin set!"
                );
            }
            $origin = $protoSlot->getOrigin();
        } else {
            $origin = $revisionId;
        }

        return self::newFromSlotRecord( $protoSlot, [
            'slot_revision_id' => $revisionId,
            'slot_content_id' => $contentId,
            'slot_origin' => $origin,
            'content_address' => $contentAddress,
        ] );
    }

    /**
     * The following fields are supported by the $row parameter:
     *
     *   $row->blob_data
     *   $row->blob_address
     *
     * @param \stdClass $row A database row composed of fields of the slot and content tables,
     *        as a raw object. Any field value can be a callback that produces the field value
     *        given this SlotRecord as a parameter. However, plain strings cannot be used as
     *        callbacks here, for security reasons.
     * @param Content|callable $content The content object associated with the slot, or a
     *        callback that will return that Content object, given this SlotRecord as a parameter.
     * @param bool $derived Is this handler for a derived slot? Derived slots allow information that
     *        is derived from the content of a page to be stored even if it is generated
     *        asynchronously or updated later. Their size is not included in the revision size,
     *        their hash does not contribute to the revision hash, and updates are not included
     *        in revision history.
     */
    public function __construct( \stdClass $row, $content, bool $derived = false ) {
        Assert::parameterType( [ 'Content', 'callable' ], $content, '$content' );

        Assert::parameter(
            property_exists( $row, 'slot_revision_id' ),
            '$row->slot_revision_id',
            'must exist'
        );
        Assert::parameter(
            property_exists( $row, 'slot_content_id' ),
            '$row->slot_content_id',
            'must exist'
        );
        Assert::parameter(
            property_exists( $row, 'content_address' ),
            '$row->content_address',
            'must exist'
        );
        Assert::parameter(
            property_exists( $row, 'model_name' ),
            '$row->model_name',
            'must exist'
        );
        Assert::parameter(
            property_exists( $row, 'slot_origin' ),
            '$row->slot_origin',
            'must exist'
        );
        Assert::parameter(
            !property_exists( $row, 'slot_inherited' ),
            '$row->slot_inherited',
            'must not exist'
        );
        Assert::parameter(
            !property_exists( $row, 'slot_revision' ),
            '$row->slot_revision',
            'must not exist'
        );

        $this->row = $row;
        $this->content = $content;
        $this->derived = $derived;
    }

    /**
     * Returns the Content of the given slot.
     *
     * @note This is free to load Content from whatever subsystem is necessary,
     * performing potentially expensive operations and triggering I/O-related
     * failure modes.
     *
     * @note This method does not apply audience filtering.
     *
     * @throws SuppressedDataException if access to the content is not allowed according
     * to the audience check performed by RevisionRecord::getSlot().
     * @throws BadRevisionException if the revision is permanently missing
     * @throws RevisionAccessException for other storage access errors
     *
     * @return Content The slot's content. This is a direct reference to the internal instance,
     * copy before exposing to application logic!
     */
    public function getContent() {
        if ( $this->content instanceof Content ) {
            return $this->content;
        }

        $obj = call_user_func( $this->content, $this );

        Assert::postcondition(
            $obj instanceof Content,
            'Slot content callback should return a Content object'
        );

        $this->content = $obj;

        return $this->content;
    }

    /**
     * Returns the string value of a data field from the database row supplied to the constructor.
     * If the field was set to a callback, that callback is invoked and the result returned.
     *
     * @param string $name
     *
     * @throws OutOfBoundsException
     * @throws IncompleteRevisionException
     * @return mixed Returns the field's value, never null.
     */
    private function getField( $name ) {
        if ( !isset( $this->row->$name ) ) {
            // distinguish between unknown and uninitialized fields
            if ( property_exists( $this->row, $name ) ) {
                throw new IncompleteRevisionException(
                    'Uninitialized field: {name}',
                    [ 'name' => $name ]
                );
            } else {
                throw new OutOfBoundsException( 'No such field: ' . $name );
            }
        }

        $value = $this->row->$name;

        // NOTE: allow callbacks, but don't trust plain string callables from the database!
        if ( !is_string( $value ) && is_callable( $value ) ) {
            $value = call_user_func( $value, $this );
            $this->setField( $name, $value );
        }

        return $value;
    }

    /**
     * Returns the string value of a data field from the database row supplied to the constructor.
     *
     * @param string $name
     *
     * @throws OutOfBoundsException
     * @throws IncompleteRevisionException
     * @return string
     */
    private function getStringField( $name ) {
        return strval( $this->getField( $name ) );
    }

    /**
     * Returns the int value of a data field from the database row supplied to the constructor.
     *
     * @param string $name
     *
     * @throws OutOfBoundsException
     * @throws IncompleteRevisionException
     * @return int
     */
    private function getIntField( $name ) {
        return intval( $this->getField( $name ) );
    }

    /**
     * @param string $name
     * @return bool whether this record contains the given field
     */
    private function hasField( $name ) {
        if ( isset( $this->row->$name ) ) {
            // if the field is a callback, resolve first, then re-check
            if ( !is_string( $this->row->$name ) && is_callable( $this->row->$name ) ) {
                $this->getField( $name );
            }
        }

        return isset( $this->row->$name );
    }

    /**
     * Returns the ID of the revision this slot is associated with.
     *
     * @return int
     */
    public function getRevision() {
        return $this->getIntField( 'slot_revision_id' );
    }

    /**
     * Returns the revision ID of the revision that originated the slot's content.
     *
     * @return int
     */
    public function getOrigin() {
        return $this->getIntField( 'slot_origin' );
    }

    /**
     * Whether this slot was inherited from an older revision.
     *
     * If this SlotRecord is already attached to a revision, this returns true
     * if the slot's revision of origin is the same as the revision it belongs to.
     *
     * If this SlotRecord is not yet attached to a revision, this returns true
     * if the slot already has an address.
     *
     * @return bool
     */
    public function isInherited() {
        if ( $this->hasRevision() ) {
            return $this->getRevision() !== $this->getOrigin();
        } else {
            return $this->hasAddress();
        }
    }

    /**
     * Whether this slot has an address. Slots will have an address if their
     * content has been stored. While building a new revision,
     * SlotRecords will not have an address associated.
     *
     * @return bool
     */
    public function hasAddress() {
        return $this->hasField( 'content_address' );
    }

    /**
     * Whether this slot has an origin (revision ID that originated the slot's content.
     *
     * @since 1.32
     *
     * @return bool
     */
    public function hasOrigin() {
        return $this->hasField( 'slot_origin' );
    }

    /**
     * Whether this slot has a content ID. Slots will have a content ID if their
     * content has been stored in the content table. While building a new revision,
     * SlotRecords will not have an ID associated.
     *
     * Also, during schema migration, hasContentId() may return false when encountering an
     * un-migrated database entry in SCHEMA_COMPAT_WRITE_BOTH mode.
     * It will however always return true for saved revisions on SCHEMA_COMPAT_READ_NEW mode,
     * or without SCHEMA_COMPAT_WRITE_NEW mode. In the latter case, an emulated content ID
     * is used, derived from the revision's text ID.
     *
     * Note that hasContentId() returning false while hasRevision() returns true always
     * indicates an unmigrated row in SCHEMA_COMPAT_WRITE_BOTH mode, as described above.
     * For an unsaved slot, both these methods would return false.
     *
     * @since 1.32
     *
     * @return bool
     */
    public function hasContentId() {
        return $this->hasField( 'slot_content_id' );
    }

    /**
     * Whether this slot has revision ID associated. Slots will have a revision ID associated
     * only if they were loaded as part of an existing revision. While building a new revision,
     * Slotrecords will not have a revision ID associated.
     *
     * @return bool
     */
    public function hasRevision() {
        return $this->hasField( 'slot_revision_id' );
    }

    /**
     * Returns the role of the slot.
     *
     * @return string
     */
    public function getRole() {
        return $this->getStringField( 'role_name' );
    }

    /**
     * Returns the address of this slot's content.
     * This address can be used with BlobStore to load the Content object.
     *
     * @return string
     */
    public function getAddress() {
        return $this->getStringField( 'content_address' );
    }

    /**
     * Returns the ID of the content meta data row associated with the slot.
     * This information should be irrelevant to application logic, it is here to allow
     * the construction of a full row for the revision table.
     *
     * Note that this method may return an emulated value during schema migration in
     * SCHEMA_COMPAT_WRITE_OLD mode. See RevisionStore::emulateContentId for more information.
     *
     * @return int
     */
    public function getContentId() {
        return $this->getIntField( 'slot_content_id' );
    }

    /**
     * Returns the content size
     *
     * @return int size of the content, in bogo-bytes, as reported by Content::getSize.
     */
    public function getSize() {
        try {
            $size = $this->getIntField( 'content_size' );
        } catch ( IncompleteRevisionException $ex ) {
            $size = $this->getContent()->getSize();
            $this->setField( 'content_size', $size );
        }

        return $size;
    }

    /**
     * Returns the content size
     *
     * @return string hash of the content.
     */
    public function getSha1() {
        try {
            $sha1 = $this->getStringField( 'content_sha1' );
        } catch ( IncompleteRevisionException $ex ) {
            $sha1 = null;
        }

        // Compute if missing. Missing could mean null or empty.
        if ( $sha1 === null || $sha1 === '' ) {
            $format = $this->hasField( 'format_name' )
                ? $this->getStringField( 'format_name' )
                : null;

            $data = $this->getContent()->serialize( $format );
            $sha1 = self::base36Sha1( $data );
            $this->setField( 'content_sha1', $sha1 );
        }

        return $sha1;
    }

    /**
     * Returns the content model. This is the model name that decides
     * which ContentHandler is appropriate for interpreting the
     * data of the blob referenced by the address returned by getAddress().
     *
     * @return string the content model of the content
     */
    public function getModel() {
        try {
            $model = $this->getStringField( 'model_name' );
        } catch ( IncompleteRevisionException $ex ) {
            $model = $this->getContent()->getModel();
            $this->setField( 'model_name', $model );
        }

        return $model;
    }

    /**
     * Returns the blob serialization format as a MIME type.
     *
     * @note When this method returns null, the caller is expected
     * to auto-detect the serialization format, or to rely on
     * the default format associated with the content model.
     *
     * @return string|null
     */
    public function getFormat() {
        // XXX: we currently do not plan to store the format for each slot!

        if ( $this->hasField( 'format_name' ) ) {
            return $this->getStringField( 'format_name' );
        }

        return null;
    }

    /**
     * @param string $name
     * @param string|int|null $value
     */
    private function setField( $name, $value ) {
        $this->row->$name = $value;
    }

    /**
     * Get the base 36 SHA-1 value for a string of text
     *
     * MCR migration note: this replaced Revision::base36Sha1
     *
     * @param string $blob
     * @return string
     */
    public static function base36Sha1( $blob ) {
        return \Wikimedia\base_convert( sha1( $blob ), 16, 36, 31 );
    }

    /**
     * Returns true if $other has the same content as this slot.
     * The check is performed based on the model, address size, and hash.
     * Two slots can have the same content if they use different content addresses,
     * but if they have the same address and the same model, they have the same content.
     * Two slots can have the same content if they belong to different
     * revisions or pages.
     *
     * Note that hasSameContent() may return false even if Content::equals returns true for
     * the content of two slots. This may happen if the two slots have different serializations
     * representing equivalent Content. Such false negatives are considered acceptable. Code
     * that has to be absolutely sure the Content is really not the same if hasSameContent()
     * returns false should call getContent() and compare the Content objects directly.
     *
     * @since 1.32
     *
     * @param SlotRecord $other
     * @return bool
     */
    public function hasSameContent( SlotRecord $other ) {
        if ( $other === $this ) {
            return true;
        }

        if ( $this->getModel() !== $other->getModel() ) {
            return false;
        }

        if ( $this->hasAddress()
            && $other->hasAddress()
            && $this->getAddress() == $other->getAddress()
        ) {
            return true;
        }

        if ( $this->getSize() !== $other->getSize() ) {
            return false;
        }

        if ( $this->getSha1() !== $other->getSha1() ) {
            return false;
        }

        return true;
    }

    /**
     * @return bool Is this a derived slot?
     * @since 1.36
     */
    public function isDerived(): bool {
        return $this->derived;
    }

}