includes/page/ParserOutputAccess.php
<?php
/**
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
*/
namespace MediaWiki\Page;
use InvalidArgumentException;
use MapCacheLRU;
use MediaWiki\Logger\Spi as LoggerSpi;
use MediaWiki\MainConfigNames;
use MediaWiki\MediaWikiServices;
use MediaWiki\Parser\ParserCacheFactory;
use MediaWiki\Parser\ParserOutput;
use MediaWiki\Parser\Parsoid\PageBundleParserOutputConverter;
use MediaWiki\Parser\RevisionOutputCache;
use MediaWiki\PoolCounter\PoolCounterWork;
use MediaWiki\PoolCounter\PoolWorkArticleView;
use MediaWiki\PoolCounter\PoolWorkArticleViewCurrent;
use MediaWiki\PoolCounter\PoolWorkArticleViewOld;
use MediaWiki\Revision\RevisionLookup;
use MediaWiki\Revision\RevisionRecord;
use MediaWiki\Revision\RevisionRenderer;
use MediaWiki\Status\Status;
use MediaWiki\Title\TitleFormatter;
use ParserCache;
use ParserOptions;
use Wikimedia\Assert\Assert;
use Wikimedia\Parsoid\Parsoid;
use Wikimedia\Rdbms\ChronologyProtector;
use Wikimedia\Rdbms\ILBFactory;
use Wikimedia\Stats\StatsFactory;
/**
* Service for getting rendered output of a given page.
*
* This is a high level service, encapsulating concerns like caching
* and stampede protection via PoolCounter.
*
* @since 1.36
* @ingroup Page
*/
class ParserOutputAccess {
/** @internal */
public const PARSOID_PCACHE_NAME = 'parsoid-' . ParserCacheFactory::DEFAULT_NAME;
/** @internal */
public const PARSOID_RCACHE_NAME = 'parsoid-' . ParserCacheFactory::DEFAULT_RCACHE_NAME;
/**
* @var int Do not check the cache before parsing (force parse)
*/
public const OPT_NO_CHECK_CACHE = 1;
/** @var int Alias for NO_CHECK_CACHE */
public const OPT_FORCE_PARSE = self::OPT_NO_CHECK_CACHE;
/**
* @var int Do not update the cache after parsing.
*/
public const OPT_NO_UPDATE_CACHE = 2;
/**
* @var int Bypass audience check for deleted/suppressed revisions.
* The caller is responsible for ensuring that unauthorized access is prevented.
* If not set, output generation will fail if the revision is not public.
*/
public const OPT_NO_AUDIENCE_CHECK = 4;
/**
* @var int Do not check the cache before parsing,
* and do not update the cache after parsing (not cacheable).
*/
public const OPT_NO_CACHE = self::OPT_NO_UPDATE_CACHE | self::OPT_NO_CHECK_CACHE;
/**
* @var int Do perform an opportunistic LinksUpdate on cache miss
* @since 1.41
*/
public const OPT_LINKS_UPDATE = 8;
/**
* Apply page view semantics. This relaxes some guarantees, specifically:
* - Use PoolCounter for stampede protection, causing the request to
* block until another process has finished rendering the content.
* - Allow stale parser output to be returned to prevent long waits for
* slow renders.
* - Allow cacheable placeholder output to be returned when PoolCounter
* fails to obtain a lock. See the PoolCounterConf setting for details.
*
* @see Bug T352837
* @since 1.42
*/
public const OPT_FOR_ARTICLE_VIEW = 16;
/**
* @var int Ignore the profile version of the result from the cache.
* Otherwise, if it's not Parsoid's default, it will be invalidated.
*/
public const OPT_IGNORE_PROFILE_VERSION = 128;
/** @var string Do not read or write any cache */
private const CACHE_NONE = 'none';
/** @var string Use primary cache */
private const CACHE_PRIMARY = 'primary';
/** @var string Use secondary cache */
private const CACHE_SECONDARY = 'secondary';
/**
* In cases that an extension tries to get the same ParserOutput of
* the page right after it was parsed (T301310).
* @var MapCacheLRU<string,ParserOutput>
*/
private MapCacheLRU $localCache;
private ParserCacheFactory $parserCacheFactory;
private RevisionLookup $revisionLookup;
private RevisionRenderer $revisionRenderer;
private StatsFactory $statsFactory;
private ILBFactory $lbFactory;
private ChronologyProtector $chronologyProtector;
private LoggerSpi $loggerSpi;
private WikiPageFactory $wikiPageFactory;
private TitleFormatter $titleFormatter;
public function __construct(
ParserCacheFactory $parserCacheFactory,
RevisionLookup $revisionLookup,
RevisionRenderer $revisionRenderer,
StatsFactory $statsFactory,
ILBFactory $lbFactory,
ChronologyProtector $chronologyProtector,
LoggerSpi $loggerSpi,
WikiPageFactory $wikiPageFactory,
TitleFormatter $titleFormatter
) {
$this->parserCacheFactory = $parserCacheFactory;
$this->revisionLookup = $revisionLookup;
$this->revisionRenderer = $revisionRenderer;
$this->statsFactory = $statsFactory;
$this->lbFactory = $lbFactory;
$this->chronologyProtector = $chronologyProtector;
$this->loggerSpi = $loggerSpi;
$this->wikiPageFactory = $wikiPageFactory;
$this->titleFormatter = $titleFormatter;
$this->localCache = new MapCacheLRU( 10 );
}
/**
* Use a cache?
*
* @param PageRecord $page
* @param RevisionRecord|null $rev
*
* @return string One of the CACHE_XXX constants.
*/
private function shouldUseCache(
PageRecord $page,
?RevisionRecord $rev
) {
if ( $rev && !$rev->getId() ) {
// The revision isn't from the database, so the output can't safely be cached.
return self::CACHE_NONE;
}
// NOTE: Keep in sync with ParserWikiPage::shouldCheckParserCache().
// NOTE: when we allow caching of old revisions in the future,
// we must not allow caching of deleted revisions.
$wikiPage = $this->wikiPageFactory->newFromTitle( $page );
if ( !$page->exists() || !$wikiPage->getContentHandler()->isParserCacheSupported() ) {
return self::CACHE_NONE;
}
$isOld = $rev && $rev->getId() !== $page->getLatest();
if ( !$isOld ) {
return self::CACHE_PRIMARY;
}
if ( !$rev->audienceCan( RevisionRecord::DELETED_TEXT, RevisionRecord::FOR_PUBLIC ) ) {
// deleted/suppressed revision
return self::CACHE_NONE;
}
return self::CACHE_SECONDARY;
}
/**
* Returns the rendered output for the given page if it is present in the cache.
*
* @param PageRecord $page
* @param ParserOptions $parserOptions
* @param RevisionRecord|null $revision
* @param int $options Bitfield using the OPT_XXX constants
*
* @return ParserOutput|null
*/
public function getCachedParserOutput(
PageRecord $page,
ParserOptions $parserOptions,
?RevisionRecord $revision = null,
int $options = 0
): ?ParserOutput {
$isOld = $revision && $revision->getId() !== $page->getLatest();
$useCache = $this->shouldUseCache( $page, $revision );
$primaryCache = $this->getPrimaryCache( $parserOptions );
$classCacheKey = $primaryCache->makeParserOutputKey( $page, $parserOptions );
if ( $useCache === self::CACHE_PRIMARY ) {
if ( $this->localCache->hasField( $classCacheKey, $page->getLatest() ) && !$isOld ) {
return $this->localCache->getField( $classCacheKey, $page->getLatest() );
}
$output = $primaryCache->get( $page, $parserOptions );
} elseif ( $useCache === self::CACHE_SECONDARY && $revision ) {
$secondaryCache = $this->getSecondaryCache( $parserOptions );
$output = $secondaryCache->get( $revision, $parserOptions );
} else {
$output = null;
}
$notHitReason = 'miss';
if (
$output && !( $options & self::OPT_IGNORE_PROFILE_VERSION ) &&
$parserOptions->getUseParsoid()
) {
$pageBundleData = $output->getExtensionData(
PageBundleParserOutputConverter::PARSOID_PAGE_BUNDLE_KEY
);
// T333606: Force a reparse if the version coming from cache is not the default
$cachedVersion = $pageBundleData['version'] ?? null;
if (
$cachedVersion !== null && // T325137: BadContentModel, no sense in reparsing
$cachedVersion !== Parsoid::defaultHTMLVersion()
) {
$notHitReason = 'obsolete';
$output = null;
}
}
if ( $output && !$isOld ) {
$this->localCache->setField( $classCacheKey, $page->getLatest(), $output );
}
if ( $output ) {
$this->statsFactory
->getCounter( 'parseroutputaccess_cache' )
->setLabel( 'cache', $useCache )
->setLabel( 'reason', 'hit' )
->setLabel( 'type', 'hit' )
->copyToStatsdAt( "ParserOutputAccess.Cache.$useCache.hit" )
->increment();
} else {
$this->statsFactory
->getCounter( 'parseroutputaccess_cache' )
->setLabel( 'reason', $notHitReason )
->setLabel( 'cache', $useCache )
->setLabel( 'type', 'miss' )
->copyToStatsdAt( "ParserOutputAccess.Cache.$useCache.$notHitReason" )
->increment();
}
return $output ?: null; // convert false to null
}
/**
* Returns the rendered output for the given page.
* Caching and concurrency control is applied.
*
* @param PageRecord $page
* @param ParserOptions $parserOptions
* @param RevisionRecord|null $revision
* @param int $options Bitfield using the OPT_XXX constants
*
* @return Status containing a ParserOutput if no error occurred.
* Well known errors and warnings include the following messages:
* - 'view-pool-dirty-output' (warning) The output is dirty (from a stale cache entry).
* - 'view-pool-contention' (warning) Dirty output was returned immediately instead of
* waiting to acquire a work lock (when "fast stale" mode is enabled in PoolCounter).
* - 'view-pool-timeout' (warning) Dirty output was returned after failing to acquire
* a work lock (got QUEUE_FULL or TIMEOUT from PoolCounter).
* - 'pool-queuefull' (error) unable to acquire work lock, and no cached content found.
* - 'pool-timeout' (error) unable to acquire work lock, and no cached content found.
* - 'pool-servererror' (error) PoolCounterWork failed due to a lock service error.
* - 'pool-unknownerror' (error) PoolCounterWork failed for an unknown reason.
* - 'nopagetext' (error) The page does not exist
*/
public function getParserOutput(
PageRecord $page,
ParserOptions $parserOptions,
?RevisionRecord $revision = null,
int $options = 0
): Status {
$error = $this->checkPreconditions( $page, $revision, $options );
if ( $error ) {
$this->statsFactory
->getCounter( 'parseroutputaccess_case' )
->setLabel( 'case', 'error' )
->copyToStatsdAt( 'ParserOutputAccess.Case.error' )
->increment();
return $error;
}
$isOld = $revision && $revision->getId() !== $page->getLatest();
if ( $isOld ) {
$this->statsFactory
->getCounter( 'parseroutputaccess_case' )
->setLabel( 'case', 'old' )
->copyToStatsdAt( 'ParserOutputAccess.Case.old' )
->increment();
} else {
$this->statsFactory
->getCounter( 'parseroutputaccess_case' )
->setLabel( 'case', 'current' )
->copyToStatsdAt( 'ParserOutputAccess.Case.current' )
->increment();
}
if ( !( $options & self::OPT_NO_CHECK_CACHE ) ) {
$output = $this->getCachedParserOutput( $page, $parserOptions, $revision );
if ( $output ) {
return Status::newGood( $output );
}
}
if ( !$revision ) {
$revId = $page->getLatest();
$revision = $revId ? $this->revisionLookup->getRevisionById( $revId ) : null;
if ( !$revision ) {
$this->statsFactory
->getCounter( 'parseroutputaccess_status' )
->setLabel( 'status', 'norev' )
->copyToStatsdAt( "ParserOutputAccess.Status.norev" )
->increment();
return Status::newFatal( 'missing-revision', $revId );
}
}
if ( $options & self::OPT_FOR_ARTICLE_VIEW ) {
$work = $this->newPoolWorkArticleView( $page, $parserOptions, $revision, $options );
/** @var Status $status */
$status = $work->execute();
} else {
// XXX: we could try harder to reuse a cache lookup above to
// provide the $previous argument here
$status = $this->renderRevision( $page, $parserOptions, $revision, $options, null );
}
$output = $status->getValue();
Assert::postcondition( $output || !$status->isOK(), 'Inconsistent status' );
if ( $output && !$isOld ) {
$primaryCache = $this->getPrimaryCache( $parserOptions );
$classCacheKey = $primaryCache->makeParserOutputKey( $page, $parserOptions );
$this->localCache->setField( $classCacheKey, $page->getLatest(), $output );
}
if ( $status->isGood() ) {
$this->statsFactory->getCounter( 'parseroutputaccess_status' )
->setLabel( 'status', 'good' )
->copyToStatsdAt( 'ParserOutputAccess.Status.good' )
->increment();
} elseif ( $status->isOK() ) {
$this->statsFactory->getCounter( 'parseroutputaccess_status' )
->setLabel( 'status', 'ok' )
->copyToStatsdAt( 'ParserOutputAccess.Status.ok' )
->increment();
} else {
$this->statsFactory->getCounter( 'parseroutputaccess_status' )
->setLabel( 'status', 'error' )
->copyToStatsdAt( 'ParserOutputAccess.Status.error' )
->increment();
}
return $status;
}
/**
* Render the given revision.
*
* This method will update the parser cache if appropriate, and will
* trigger a links update if OPT_LINKS_UPDATE is set.
*
* This method does not perform access checks, and will not load content
* from caches. The caller is assumed to have taken care of that.
*
* Where possible, pass in a $previousOutput, which will prevent an
* unnecessary double-lookup in the cache.
*
* @see PoolWorkArticleView::renderRevision
*/
private function renderRevision(
PageRecord $page,
ParserOptions $parserOptions,
RevisionRecord $revision,
int $options,
?ParserOutput $previousOutput = null
): Status {
$this->statsFactory->getCounter( 'parseroutputaccess_poolwork' )
->copyToStatsdAt( 'ParserOutputAccess.PoolWork.None' )
->setLabel( 'cache', self::CACHE_NONE )
->increment();
$useCache = $this->shouldUseCache( $page, $revision );
// T371713: Temporary statistics collection code to determine
// feasibility of Parsoid selective update
$sampleRate = MediaWikiServices::getInstance()->getMainConfig()->get(
MainConfigNames::ParsoidSelectiveUpdateSampleRate
);
$doSample = ( $sampleRate && mt_rand( 1, $sampleRate ) === 1 );
if ( $previousOutput === null && ( $doSample || $parserOptions->getUseParsoid() ) ) {
// If $useCache === self::CACHE_SECONDARY we could potentially
// try to reuse the parse of $revision-1 from the secondary cache,
// but it is likely those template transclusions are out of date.
// Try to reuse the template transclusions from the most recent
// parse, which are more likely to reflect the current template.
if ( !( $options & self::OPT_NO_CHECK_CACHE ) ) {
$previousOutput = $this->getPrimaryCache( $parserOptions )->getDirty( $page, $parserOptions ) ?: null;
}
}
$renderedRev = $this->revisionRenderer->getRenderedRevision(
$revision,
$parserOptions,
null,
[
'audience' => RevisionRecord::RAW,
'previous-output' => $previousOutput,
]
);
$output = $renderedRev->getRevisionParserOutput();
if ( $doSample ) {
$labels = [
'source' => 'ParserOutputAccess',
'type' => $previousOutput === null ? 'full' : 'selective',
'reason' => $parserOptions->getRenderReason(),
'parser' => $parserOptions->getUseParsoid() ? 'parsoid' : 'legacy',
'opportunistic' => 'false',
];
$totalStat = $this->statsFactory->getCounter( 'parsercache_selective_total' );
$timeStat = $this->statsFactory->getCounter( 'parsercache_selective_cpu_seconds' );
foreach ( $labels as $key => $value ) {
$totalStat->setLabel( $key, $value );
$timeStat->setLabel( $key, $value );
}
$totalStat->increment();
$timeStat->incrementBy( $output->getTimeProfile( 'cpu' ) );
}
if ( !( $options & self::OPT_NO_UPDATE_CACHE ) && $output->isCacheable() ) {
if ( $useCache === self::CACHE_PRIMARY ) {
$primaryCache = $this->getPrimaryCache( $parserOptions );
$primaryCache->save( $output, $page, $parserOptions );
} elseif ( $useCache === self::CACHE_SECONDARY ) {
$secondaryCache = $this->getSecondaryCache( $parserOptions );
$secondaryCache->save( $output, $revision, $parserOptions );
}
}
if ( $options & self::OPT_LINKS_UPDATE ) {
$this->wikiPageFactory->newFromTitle( $page )
->triggerOpportunisticLinksUpdate( $output );
}
return Status::newGood( $output );
}
/**
* @param PageRecord $page
* @param RevisionRecord|null $revision
* @param int $options
*
* @return Status|null
*/
private function checkPreconditions(
PageRecord $page,
?RevisionRecord $revision = null,
int $options = 0
): ?Status {
if ( !$page->exists() ) {
return Status::newFatal( 'nopagetext' );
}
if ( !( $options & self::OPT_NO_UPDATE_CACHE ) && $revision && !$revision->getId() ) {
throw new InvalidArgumentException(
'The revision does not have a known ID. Use OPT_NO_CACHE.'
);
}
if ( $revision && $revision->getPageId() !== $page->getId() ) {
throw new InvalidArgumentException(
'The revision does not belong to the given page.'
);
}
if ( $revision && !( $options & self::OPT_NO_AUDIENCE_CHECK ) ) {
// NOTE: If per-user checks are desired, the caller should perform them and
// then set OPT_NO_AUDIENCE_CHECK if they passed.
if ( !$revision->audienceCan( RevisionRecord::DELETED_TEXT, RevisionRecord::FOR_PUBLIC ) ) {
return Status::newFatal(
'missing-revision-permission',
$revision->getId(),
$revision->getTimestamp(),
$this->titleFormatter->getPrefixedDBkey( $page )
);
}
}
return null;
}
/**
* @param PageRecord $page
* @param ParserOptions $parserOptions
* @param RevisionRecord $revision
* @param int $options
*
* @return PoolCounterWork
*/
protected function newPoolWorkArticleView(
PageRecord $page,
ParserOptions $parserOptions,
RevisionRecord $revision,
int $options
): PoolCounterWork {
$useCache = $this->shouldUseCache( $page, $revision );
switch ( $useCache ) {
case self::CACHE_PRIMARY:
$this->statsFactory->getCounter( 'parseroutputaccess_poolwork' )
->setLabel( 'cache', self::CACHE_PRIMARY )
->copyToStatsdAt( 'ParserOutputAccess.PoolWork.Current' )
->increment();
$primaryCache = $this->getPrimaryCache( $parserOptions );
$parserCacheMetadata = $primaryCache->getMetadata( $page );
$cacheKey = $primaryCache->makeParserOutputKey( $page, $parserOptions,
$parserCacheMetadata ? $parserCacheMetadata->getUsedOptions() : null
);
$workKey = $cacheKey . ':revid:' . $revision->getId();
return new PoolWorkArticleViewCurrent(
$workKey,
$page,
$revision,
$parserOptions,
$this->revisionRenderer,
$primaryCache,
$this->lbFactory,
$this->chronologyProtector,
$this->loggerSpi,
$this->wikiPageFactory,
!( $options & self::OPT_NO_UPDATE_CACHE ),
(bool)( $options & self::OPT_LINKS_UPDATE )
);
case self::CACHE_SECONDARY:
$this->statsFactory->getCounter( 'parseroutputaccess_poolwork' )
->setLabel( 'cache', self::CACHE_SECONDARY )
->copyToStatsdAt( 'ParserOutputAccess.PoolWork.Old' )
->increment();
$secondaryCache = $this->getSecondaryCache( $parserOptions );
$workKey = $secondaryCache->makeParserOutputKey( $revision, $parserOptions );
return new PoolWorkArticleViewOld(
$workKey,
$secondaryCache,
$revision,
$parserOptions,
$this->revisionRenderer,
$this->loggerSpi
);
default:
$this->statsFactory->getCounter( 'parseroutputaccess_poolwork' )
->setLabel( 'cache', self::CACHE_NONE )
->copyToStatsdAt( 'ParserOutputAccess.PoolWork.Uncached' )
->increment();
$secondaryCache = $this->getSecondaryCache( $parserOptions );
$workKey = $secondaryCache->makeParserOutputKeyOptionalRevId( $revision, $parserOptions );
return new PoolWorkArticleView(
$workKey,
$revision,
$parserOptions,
$this->revisionRenderer,
$this->loggerSpi
);
}
// unreachable
}
private function getPrimaryCache( ParserOptions $pOpts ): ParserCache {
if ( $pOpts->getUseParsoid() ) {
return $this->parserCacheFactory->getParserCache(
self::PARSOID_PCACHE_NAME
);
}
return $this->parserCacheFactory->getParserCache(
ParserCacheFactory::DEFAULT_NAME
);
}
private function getSecondaryCache( ParserOptions $pOpts ): RevisionOutputCache {
if ( $pOpts->getUseParsoid() ) {
return $this->parserCacheFactory->getRevisionOutputCache(
self::PARSOID_RCACHE_NAME
);
}
return $this->parserCacheFactory->getRevisionOutputCache(
ParserCacheFactory::DEFAULT_RCACHE_NAME
);
}
}