wikimedia/mediawiki-core

View on GitHub
includes/libs/objectcache/MultiWriteBagOStuff.php

Summary

Maintainability
B
6 hrs
Test Coverage
<?php
/**
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 * http://www.gnu.org/copyleft/gpl.html
 *
 * @file
 */
namespace Wikimedia\ObjectCache;

use InvalidArgumentException;
use Wikimedia\ObjectFactory\ObjectFactory;

/**
 * Wrap multiple BagOStuff objects, to implement different caching tiers.
 *
 * The order of the caches is important. The first tier is considered the primary
 * and highest tier which must handle the majority of the load for reads,
 * and is generally less persistent, smaller, and faster (e.g. evicts data
 * regularly based on demand, keeping fewer keys at a given time).
 * The other caches are consider secondary and lower tiers, which should
 * hold more data and retain it for longer than the primary tier.
 *
 * Data writes ("set") go to all given BagOStuff caches.
 * If the `replication => async` option is set, then only the primary write
 * is blocking during the web request, with other writes deferred until
 * after the web response is sent.
 *
 * Data reads try each cache in the order they are given, until a value is found.
 * When a value is found at a secondary tier, it is automatically copied (back)
 * to the primary tier.
 *
 * **Example**: Keep popular data in memcached, with a fallback to a MySQL database.
 * This is how ParserCache is used at Wikimedia Foundation (as of 2024).
 *
 * ```
 * $wgObjectCaches['parsercache-multiwrite'] = [
 *    'class' => 'MultiWriteBagOStuff',
 *    'caches' => [
 *      0 => [
 *        'class' => 'MemcachedPeclBagOStuff',
 *        'servers' => [ '127.0.0.1:11212' ],
 *      ],
 *      1 => [
 *        'class' => 'SqlBagOStuff',
 *        'servers' => $parserCacheDbServers,
 *        'purgePeriod' => 0,
 *        'tableName' => 'pc',
 *        'shards' => 256,
 *        'reportDupes' => false
 *      ],
 *    ]
 * ];
 * ```
 *
 * If you configure a memcached server for MultiWriteBagOStuff that is the same
 * as the one used for MediaWiki more generally, it is recommended to specify
 * the tier via ObjectCache::getInstance() so that the same object and Memcached
 * connection can be re-used.
 *
 * ```
 * $wgObjectCaches['my-memcached'] = [ .. ];
 * $wgMainCacheType = 'my-memcached';
 *
 * $wgObjectCaches['parsercache-multiwrite'] = [
 *    'class' => 'MultiWriteBagOStuff',
 *    'caches' => [
 *      0 => [
 *        'factory' => [ 'ObjectCache', 'getInstance' ],
 *        'args' => [ 'my-memcached' ],
 *      ],
 *      1 => [
 *        'class' => 'SqlBagOStuff',
 *        'servers' => $parserCacheDbServers,
 *        'purgePeriod' => 0,
 *        'tableName' => 'pc',
 *        'shards' => 256,
 *        'reportDupes' => false
 *      ],
 *    ]
 * ];
 * ```
 *
 * The makeKey() method of this class uses an implementation-agnostic encoding.
 * When it forward gets and sets to the other BagOStuff objects, keys are
 * automatically re-encoded. For example, to satisfy the character and length
 * constraints of MemcachedBagOStuff.
 *
 * @newable
 * @ingroup Cache
 */
class MultiWriteBagOStuff extends BagOStuff {
    /** @var BagOStuff[] Backing cache stores in order of highest to lowest tier */
    protected $caches;

    /** @var bool Use async secondary writes */
    protected $asyncWrites = false;
    /** @var int[] List of all backing cache indexes */
    protected $cacheIndexes = [];

    /** @var int TTL when a key is copied to a higher cache tier */
    private static $UPGRADE_TTL = 3600;

    /**
     * @stable to call
     *
     * @param array $params
     *   - caches: A numbered array of either ObjectFactory::getObjectFromSpec
     *      arrays yielding BagOStuff objects or direct BagOStuff objects.
     *      If using the former, the 'args' field *must* be set.
     *      The first cache is the primary one, being the first to
     *      be read in the fallback chain. Writes happen to all stores
     *      in the order they are defined. However, lock()/unlock() calls
     *      only use the primary store.
     *   - replication: Either 'sync' or 'async'. This controls whether writes
     *      to secondary stores are deferred when possible. To use 'async' writes
     *      requires the 'asyncHandler' option to be set as well.
     *      Async writes can increase the chance of some race conditions
     *      or cause keys to expire seconds later than expected. It is
     *      safe to use for modules when cached values: are immutable,
     *      invalidation uses logical TTLs, invalidation uses etag/timestamp
     *      validation against the DB, or merge() is used to handle races.
     *
     * @phan-param array{caches:array<int,array|BagOStuff>,replication:string} $params
     */
    public function __construct( $params ) {
        parent::__construct( $params );

        if ( empty( $params['caches'] ) || !is_array( $params['caches'] ) ) {
            throw new InvalidArgumentException(
                __METHOD__ . ': "caches" parameter must be an array of caches'
            );
        }

        $this->caches = [];
        foreach ( $params['caches'] as $cacheInfo ) {
            if ( $cacheInfo instanceof BagOStuff ) {
                $this->caches[] = $cacheInfo;
            } else {
                $this->caches[] = ObjectFactory::getObjectFromSpec( $cacheInfo );
            }
        }

        $this->attrMap = $this->mergeFlagMaps( $this->caches );

        $this->asyncWrites = (
            isset( $params['replication'] ) &&
            $params['replication'] === 'async' &&
            is_callable( $this->asyncHandler )
        );

        $this->cacheIndexes = array_keys( $this->caches );
    }

    public function get( $key, $flags = 0 ) {
        $args = func_get_args();

        if ( $this->fieldHasFlags( $flags, self::READ_LATEST ) ) {
            // If the latest write was a delete(), we do NOT want to fallback
            // to the other tiers and possibly see the old value. Also, this
            // is used by merge(), which only needs to hit the primary.
            return $this->callKeyMethodOnTierCache(
                0,
                __FUNCTION__,
                self::ARG0_KEY,
                self::RES_NONKEY,
                $args
            );
        }

        $value = false;
        // backends checked
        $missIndexes = [];
        foreach ( $this->cacheIndexes as $i ) {
            $value = $this->callKeyMethodOnTierCache(
                $i,
                __FUNCTION__,
                self::ARG0_KEY,
                self::RES_NONKEY,
                $args
            );
            if ( $value !== false ) {
                break;
            }
            $missIndexes[] = $i;
        }

        if (
            $value !== false &&
            $this->fieldHasFlags( $flags, self::READ_VERIFIED ) &&
            $missIndexes
        ) {
            // Backfill the value to the higher (and often faster/smaller) cache tiers
            $this->callKeyWriteMethodOnTierCaches(
                $missIndexes,
                'set',
                self::ARG0_KEY,
                self::RES_NONKEY,
                [ $key, $value, self::$UPGRADE_TTL ]
            );
        }

        return $value;
    }

    public function set( $key, $value, $exptime = 0, $flags = 0 ) {
        return $this->callKeyWriteMethodOnTierCaches(
            $this->cacheIndexes,
            __FUNCTION__,
            self::ARG0_KEY,
            self::RES_NONKEY,
            func_get_args()
        );
    }

    public function delete( $key, $flags = 0 ) {
        return $this->callKeyWriteMethodOnTierCaches(
            $this->cacheIndexes,
            __FUNCTION__,
            self::ARG0_KEY,
            self::RES_NONKEY,
            func_get_args()
        );
    }

    public function add( $key, $value, $exptime = 0, $flags = 0 ) {
        // Try the write to the top-tier cache
        $ok = $this->callKeyMethodOnTierCache(
            0,
            __FUNCTION__,
            self::ARG0_KEY,
            self::RES_NONKEY,
            func_get_args()
        );

        if ( $ok ) {
            // Relay the add() using set() if it succeeded. This is meant to handle certain
            // migration scenarios where the same store might get written to twice for certain
            // keys. In that case, it makes no sense to return false due to "self-conflicts".
            $okSecondaries = $this->callKeyWriteMethodOnTierCaches(
                array_slice( $this->cacheIndexes, 1 ),
                'set',
                self::ARG0_KEY,
                self::RES_NONKEY,
                [ $key, $value, $exptime, $flags ]
            );
            if ( $okSecondaries === false ) {
                $ok = false;
            }
        }

        return $ok;
    }

    public function merge( $key, callable $callback, $exptime = 0, $attempts = 10, $flags = 0 ) {
        return $this->callKeyWriteMethodOnTierCaches(
            $this->cacheIndexes,
            __FUNCTION__,
            self::ARG0_KEY,
            self::RES_NONKEY,
            func_get_args()
        );
    }

    public function changeTTL( $key, $exptime = 0, $flags = 0 ) {
        return $this->callKeyWriteMethodOnTierCaches(
            $this->cacheIndexes,
            __FUNCTION__,
            self::ARG0_KEY,
            self::RES_NONKEY,
            func_get_args()
        );
    }

    public function lock( $key, $timeout = 6, $exptime = 6, $rclass = '' ) {
        // Only need to lock the first cache; also avoids deadlocks
        return $this->callKeyMethodOnTierCache(
            0,
            __FUNCTION__,
            self::ARG0_KEY,
            self::RES_NONKEY,
            func_get_args()
        );
    }

    public function unlock( $key ) {
        // Only the first cache is locked
        return $this->callKeyMethodOnTierCache(
            0,
            __FUNCTION__,
            self::ARG0_KEY,
            self::RES_NONKEY,
            func_get_args()
        );
    }

    public function deleteObjectsExpiringBefore(
        $timestamp,
        callable $progress = null,
        $limit = INF,
        string $tag = null
    ) {
        $ret = false;
        foreach ( $this->caches as $cache ) {
            if ( $cache->deleteObjectsExpiringBefore( $timestamp, $progress, $limit, $tag ) ) {
                $ret = true;
            }
        }

        return $ret;
    }

    public function getMulti( array $keys, $flags = 0 ) {
        // Just iterate over each key in order to handle all the backfill logic
        $res = [];
        foreach ( $keys as $key ) {
            $val = $this->get( $key, $flags );
            if ( $val !== false ) {
                $res[$key] = $val;
            }
        }

        return $res;
    }

    public function setMulti( array $valueByKey, $exptime = 0, $flags = 0 ) {
        return $this->callKeyWriteMethodOnTierCaches(
            $this->cacheIndexes,
            __FUNCTION__,
            self::ARG0_KEYMAP,
            self::RES_NONKEY,
            func_get_args()
        );
    }

    public function deleteMulti( array $keys, $flags = 0 ) {
        return $this->callKeyWriteMethodOnTierCaches(
            $this->cacheIndexes,
            __FUNCTION__,
            self::ARG0_KEYARR,
            self::RES_NONKEY,
            func_get_args()
        );
    }

    public function changeTTLMulti( array $keys, $exptime, $flags = 0 ) {
        return $this->callKeyWriteMethodOnTierCaches(
            $this->cacheIndexes,
            __FUNCTION__,
            self::ARG0_KEYARR,
            self::RES_NONKEY,
            func_get_args()
        );
    }

    public function incrWithInit( $key, $exptime, $step = 1, $init = null, $flags = 0 ) {
        return $this->callKeyWriteMethodOnTierCaches(
            $this->cacheIndexes,
            __FUNCTION__,
            self::ARG0_KEY,
            self::RES_NONKEY,
            func_get_args()
        );
    }

    public function setMockTime( &$time ) {
        parent::setMockTime( $time );
        foreach ( $this->caches as $cache ) {
            $cache->setMockTime( $time );
        }
    }

    /**
     * Call a method on the cache instance for the given cache tier (index)
     *
     * @param int $index Cache tier
     * @param string $method Method name
     * @param int $arg0Sig BagOStuff::A0_* constant describing argument 0
     * @param int $rvSig BagOStuff::RV_* constant describing the return value
     * @param array $args Method arguments
     *
     * @return mixed The result of calling the given method
     */
    private function callKeyMethodOnTierCache( $index, $method, $arg0Sig, $rvSig, array $args ) {
        return $this->caches[$index]->proxyCall( $method, $arg0Sig, $rvSig, $args, $this );
    }

    /**
     * Call a write method on the cache instances, in order, for the given tiers (indexes)
     *
     * @param int[] $indexes List of cache tiers
     * @param string $method Method name
     * @param int $arg0Sig BagOStuff::ARG0_* constant describing argument 0
     * @param int $resSig BagOStuff::RES_* constant describing the return value
     * @param array $args Method arguments
     *
     * @return mixed First synchronous result or false if any failed; null if all asynchronous
     */
    private function callKeyWriteMethodOnTierCaches(
        array $indexes,
        $method,
        $arg0Sig,
        $resSig,
        array $args
    ) {
        $res = null;

        if ( $this->asyncWrites && array_diff( $indexes, [ 0 ] ) && $method !== 'merge' ) {
            // Deep-clone $args to prevent misbehavior when something writes an
            // object to the BagOStuff then modifies it afterwards, e.g. T168040.
            $args = unserialize( serialize( $args ) );
        }

        foreach ( $indexes as $i ) {
            $cache = $this->caches[$i];

            if ( $i == 0 || !$this->asyncWrites ) {
                // Tier 0 store or in sync mode: write synchronously and get result
                $storeRes = $cache->proxyCall( $method, $arg0Sig, $resSig, $args, $this );
                if ( $storeRes === false ) {
                    $res = false;
                } elseif ( $res === null ) {
                    // first synchronous result
                    $res = $storeRes;
                }
            } else {
                // Secondary write in async mode: do not block this HTTP request
                ( $this->asyncHandler )(
                    function () use ( $cache, $method, $arg0Sig, $resSig, $args ) {
                        $cache->proxyCall( $method, $arg0Sig, $resSig, $args, $this );
                    }
                );
            }
        }

        return $res;
    }
}

/** @deprecated class alias since 1.43 */
class_alias( MultiWriteBagOStuff::class, 'MultiWriteBagOStuff' );