wikimedia/mediawiki-core

View on GitHub
includes/parser/RevisionOutputCache.php

Summary

Maintainability
B
5 hrs
Test Coverage
<?php
/**
 * Cache for outputs of the PHP parser
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 * http://www.gnu.org/copyleft/gpl.html
 *
 * @file
 * @ingroup Cache Parser
 */

namespace MediaWiki\Parser;

use CacheTime;
use InvalidArgumentException;
use JsonException;
use MediaWiki\Json\JsonCodec;
use MediaWiki\Revision\RevisionRecord;
use MediaWiki\Utils\MWTimestamp;
use ParserOptions;
use Psr\Log\LoggerInterface;
use WANObjectCache;
use Wikimedia\Stats\StatsFactory;
use Wikimedia\UUID\GlobalIdGenerator;

/**
 * Cache for ParserOutput objects.
 * The cache is split per ParserOptions.
 *
 * @since 1.36
 * @ingroup Cache Parser
 */
class RevisionOutputCache {

    /** @var string The name of this cache. Used as a root of the cache key. */
    private $name;

    /** @var WANObjectCache */
    private $cache;

    /**
     * Anything cached prior to this is invalidated
     *
     * @var string
     */
    private $cacheEpoch;

    /**
     * Expiry time for cache entries.
     *
     * @var int
     */
    private $cacheExpiry;

    /** @var JsonCodec */
    private $jsonCodec;

    /** @var StatsFactory */
    private $stats;

    /** @var LoggerInterface */
    private $logger;

    private GlobalIdGenerator $globalIdGenerator;

    /**
     * @param string $name
     * @param WANObjectCache $cache
     * @param int $cacheExpiry Expiry for ParserOutput in $cache.
     * @param string $cacheEpoch Anything before this timestamp is invalidated
     * @param JsonCodec $jsonCodec
     * @param StatsFactory $stats
     * @param LoggerInterface $logger
     * @param GlobalIdGenerator $globalIdGenerator
     */
    public function __construct(
        string $name,
        WANObjectCache $cache,
        int $cacheExpiry,
        string $cacheEpoch,
        JsonCodec $jsonCodec,
        StatsFactory $stats,
        LoggerInterface $logger,
        GlobalIdGenerator $globalIdGenerator
    ) {
        $this->name = $name;
        $this->cache = $cache;
        $this->cacheExpiry = $cacheExpiry;
        $this->cacheEpoch = $cacheEpoch;
        $this->jsonCodec = $jsonCodec;
        $this->stats = $stats;
        $this->logger = $logger;
        $this->globalIdGenerator = $globalIdGenerator;
    }

    /**
     * @param string $status e.g. hit, miss etc.
     * @param string|null $reason
     */
    private function incrementStats( string $status, string $reason = null ) {
        $metricSuffix = $reason ? "{$status}_{$reason}" : $status;

        $this->stats->getCounter( 'RevisionOutputCache_operation_total' )
            ->setLabel( 'name', $this->name )
            ->setLabel( 'status', $status )
            ->setLabel( 'reason', $reason ?: 'n/a' )
            ->copyToStatsdAt( "RevisionOutputCache.{$this->name}.{$metricSuffix}" )
            ->increment();
    }

    /**
     * Get a key that will be used by this cache to store the content
     * for a given page considering the given options and the array of
     * used options.
     *
     * If there is a possibility the revision does not have a revision id, use
     * makeParserOutputKeyOptionalRevId() instead.
     *
     * @warning The exact format of the key is considered internal and is subject
     * to change, thus should not be used as storage or long-term caching key.
     * This is intended to be used for logging or keying something transient.
     *
     * @param RevisionRecord $revision
     * @param ParserOptions $options
     * @param array|null $usedOptions currently ignored
     * @return string
     * @internal
     */
    public function makeParserOutputKey(
        RevisionRecord $revision,
        ParserOptions $options,
        array $usedOptions = null
    ): string {
        $usedOptions = ParserOptions::allCacheVaryingOptions();

        $revId = $revision->getId();
        if ( !$revId ) {
            // If RevId is null, this would probably be unsafe to use as a cache key.
            throw new InvalidArgumentException( "Revision must have an id number" );
        }
        $hash = $options->optionsHash( $usedOptions );
        return $this->cache->makeKey( $this->name, $revId, $hash );
    }

    /**
     * Get a key that will be used for locks or pool counter
     *
     * Similar to makeParserOutputKey except the revision id might be null,
     * in which case it is unsafe to cache, but still needs a key for things like
     * poolcounter.
     *
     * @warning The exact format of the key is considered internal and is subject
     * to change, thus should not be used as storage or long-term caching key.
     * This is intended to be used for logging or keying something transient.
     *
     * @param RevisionRecord $revision
     * @param ParserOptions $options
     * @param array|null $usedOptions currently ignored
     * @return string
     * @internal
     */
    public function makeParserOutputKeyOptionalRevId(
        RevisionRecord $revision,
        ParserOptions $options,
        array $usedOptions = null
    ): string {
        $usedOptions = ParserOptions::allCacheVaryingOptions();

        // revId may be null.
        $revId = (string)$revision->getId();
        $hash = $options->optionsHash( $usedOptions );
        return $this->cache->makeKey( $this->name, $revId, $hash );
    }

    /**
     * Retrieve the ParserOutput from cache.
     * false if not found or outdated.
     *
     * @param RevisionRecord $revision
     * @param ParserOptions $parserOptions
     *
     * @return ParserOutput|false False on failure
     */
    public function get( RevisionRecord $revision, ParserOptions $parserOptions ) {
        if ( $this->cacheExpiry <= 0 ) {
            // disabled
            return false;
        }

        if ( !$parserOptions->isSafeToCache() ) {
            $this->incrementStats( 'miss', 'unsafe' );
            return false;
        }

        $cacheKey = $this->makeParserOutputKey( $revision, $parserOptions );
        $json = $this->cache->get( $cacheKey );

        if ( $json === false ) {
            $this->incrementStats( 'miss', 'absent' );
            return false;
        }

        $output = $this->restoreFromJson( $json, $cacheKey, ParserOutput::class );
        if ( $output === null ) {
            $this->incrementStats( 'miss', 'unserialize' );
            return false;
        }

        $cacheTime = (int)MWTimestamp::convert( TS_UNIX, $output->getCacheTime() );
        $expiryTime = (int)MWTimestamp::convert( TS_UNIX, $this->cacheEpoch );
        $expiryTime = max( $expiryTime, (int)MWTimestamp::now( TS_UNIX ) - $this->cacheExpiry );

        if ( $cacheTime < $expiryTime ) {
            $this->incrementStats( 'miss', 'expired' );
            return false;
        }

        $this->logger->debug( 'old-revision cache hit' );
        $this->incrementStats( 'hit' );
        return $output;
    }

    /**
     * @param ParserOutput $output
     * @param RevisionRecord $revision
     * @param ParserOptions $parserOptions
     * @param string|null $cacheTime TS_MW timestamp when the output was generated
     */
    public function save(
        ParserOutput $output,
        RevisionRecord $revision,
        ParserOptions $parserOptions,
        string $cacheTime = null
    ) {
        if ( !$output->hasText() ) {
            throw new InvalidArgumentException( 'Attempt to cache a ParserOutput with no text set!' );
        }

        if ( $this->cacheExpiry <= 0 ) {
            // disabled
            return;
        }

        $cacheKey = $this->makeParserOutputKey( $revision, $parserOptions );

        // Ensure cache properties are set in the ParserOutput
        // T350538: These should be turned into assertions that the
        // properties are already present (and the $cacheTime argument
        // removed).
        if ( $cacheTime ) {
            $output->setCacheTime( $cacheTime );
        } else {
            $cacheTime = $output->getCacheTime();
        }
        if ( !$output->getCacheRevisionId() ) {
            $output->setCacheRevisionId( $revision->getId() );
        }
        if ( !$output->getRenderId() ) {
            $output->setRenderId( $this->globalIdGenerator->newUUIDv1() );
        }
        if ( !$output->getRevisionTimestamp() ) {
            $output->setRevisionTimestamp( $revision->getTimestamp() );
        }

        $msg = "Saved in RevisionOutputCache with key $cacheKey" .
            " and timestamp $cacheTime" .
            " and revision id {$revision->getId()}.";

        $output->addCacheMessage( $msg );

        // The ParserOutput might be dynamic and have been marked uncacheable by the parser.
        $output->updateCacheExpiry( $this->cacheExpiry );

        $expiry = $output->getCacheExpiry();
        if ( $expiry <= 0 ) {
            $this->incrementStats( 'save', 'uncacheable' );
            return;
        }

        if ( !$parserOptions->isSafeToCache() ) {
            $this->incrementStats( 'save', 'unsafe' );
            return;
        }

        $json = $this->encodeAsJson( $output, $cacheKey );
        if ( $json === null ) {
            $this->incrementStats( 'save', 'nonserializable' );
            return;
        }

        $this->cache->set( $cacheKey, $json, $expiry );
        $this->incrementStats( 'save', 'success' );
    }

    /**
     * @param string $jsonData
     * @param string $key
     * @param string $expectedClass
     * @return CacheTime|ParserOutput|null
     */
    private function restoreFromJson( string $jsonData, string $key, string $expectedClass ) {
        try {
            /** @var CacheTime $obj */
            $obj = $this->jsonCodec->unserialize( $jsonData, $expectedClass );
            return $obj;
        } catch ( JsonException $e ) {
            $this->logger->error( 'Unable to unserialize JSON', [
                'name' => $this->name,
                'cache_key' => $key,
                'message' => $e->getMessage()
            ] );
            return null;
        }
    }

    /**
     * @param CacheTime $obj
     * @param string $key
     * @return string|null
     */
    private function encodeAsJson( CacheTime $obj, string $key ) {
        try {
            return $this->jsonCodec->serialize( $obj );
        } catch ( JsonException $e ) {
            $this->logger->error( 'Unable to serialize JSON', [
                'name' => $this->name,
                'cache_key' => $key,
                'message' => $e->getMessage(),
            ] );
            return null;
        }
    }
}