wikimedia/mediawiki-core

View on GitHub
includes/Rest/Handler/ParsoidHandler.php

Summary

Maintainability
F
6 days
Test Coverage
<?php
/**
 * Copyright (C) 2011-2020 Wikimedia Foundation and others.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */

namespace MediaWiki\Rest\Handler;

use Composer\Semver\Semver;
use ExtensionRegistry;
use InvalidArgumentException;
use LanguageCode;
use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface;
use LogicException;
use MediaWiki\Context\RequestContext;
use MediaWiki\Logger\LoggerFactory;
use MediaWiki\MainConfigNames;
use MediaWiki\MediaWikiServices;
use MediaWiki\Page\PageIdentity;
use MediaWiki\Page\ProperPageIdentity;
use MediaWiki\Rest\Handler;
use MediaWiki\Rest\Handler\Helper\HtmlInputTransformHelper;
use MediaWiki\Rest\Handler\Helper\HtmlOutputRendererHelper;
use MediaWiki\Rest\Handler\Helper\ParsoidFormatHelper;
use MediaWiki\Rest\HttpException;
use MediaWiki\Rest\LocalizedHttpException;
use MediaWiki\Rest\Response;
use MediaWiki\Revision\MutableRevisionRecord;
use MediaWiki\Revision\RevisionAccessException;
use MediaWiki\Revision\RevisionLookup;
use MediaWiki\Revision\SlotRecord;
use MediaWiki\Revision\SuppressedDataException;
use MediaWiki\Title\MalformedTitleException;
use MediaWiki\Title\Title;
use MediaWiki\WikiMap\WikiMap;
use MobileContext;
use Wikimedia\Http\HttpAcceptParser;
use Wikimedia\Message\DataMessageValue;
use Wikimedia\Message\MessageValue;
use Wikimedia\Parsoid\Config\DataAccess;
use Wikimedia\Parsoid\Config\PageConfig;
use Wikimedia\Parsoid\Config\PageConfigFactory;
use Wikimedia\Parsoid\Config\SiteConfig;
use Wikimedia\Parsoid\Core\ClientError;
use Wikimedia\Parsoid\Core\PageBundle;
use Wikimedia\Parsoid\Core\ResourceLimitExceededException;
use Wikimedia\Parsoid\DOM\Document;
use Wikimedia\Parsoid\Parsoid;
use Wikimedia\Parsoid\Utils\ContentUtils;
use Wikimedia\Parsoid\Utils\DOMCompat;
use Wikimedia\Parsoid\Utils\DOMUtils;
use Wikimedia\Parsoid\Utils\Timing;
use WikitextContent;

/**
 * Base class for Parsoid handlers.
 * @internal For use by the Parsoid extension
 */
abstract class ParsoidHandler extends Handler {

    // TODO logging, timeouts(?), CORS
    // TODO content negotiation (routes.js routes.acceptable)
    // TODO handle MaxConcurrentCallsError (pool counter?)

    /** @var SiteConfig */
    protected $siteConfig;

    /** @var PageConfigFactory */
    protected $pageConfigFactory;

    /** @var DataAccess */
    protected $dataAccess;

    /** @var ExtensionRegistry */
    protected $extensionRegistry;

    /** @var ?StatsdDataFactoryInterface A statistics aggregator */
    protected $metrics;

    /** @var array */
    private $requestAttributes;

    private RevisionLookup $revisionLookup;

    /**
     * @return static
     */
    public static function factory(): ParsoidHandler {
        $services = MediaWikiServices::getInstance();
        // @phan-suppress-next-line PhanTypeInstantiateAbstractStatic
        return new static(
            $services->getRevisionLookup(),
            $services->getParsoidSiteConfig(),
            $services->getParsoidPageConfigFactory(),
            $services->getParsoidDataAccess()
        );
    }

    /**
     * @param RevisionLookup $revisionLookup
     * @param SiteConfig $siteConfig
     * @param PageConfigFactory $pageConfigFactory
     * @param DataAccess $dataAccess
     */
    public function __construct(
        RevisionLookup $revisionLookup,
        SiteConfig $siteConfig,
        PageConfigFactory $pageConfigFactory,
        DataAccess $dataAccess
    ) {
        $this->revisionLookup = $revisionLookup;
        $this->siteConfig = $siteConfig;
        $this->pageConfigFactory = $pageConfigFactory;
        $this->dataAccess = $dataAccess;
        $this->extensionRegistry = ExtensionRegistry::getInstance();
        $this->metrics = $siteConfig->metrics();
    }

    /**
     * Verify that the {domain} path parameter matches the actual domain.
     * @todo Remove this when we no longer need to support the {domain}
     *       parameter with backwards compatibility with the parsoid
     *       extension.
     * @param string $domain Domain name parameter to validate
     */
    protected function assertDomainIsCorrect( $domain ): void {
        // We are cutting some corners here (IDN, non-ASCII casing)
        // since domain name support is provisional.
        // TODO use a proper validator instead
        $server = RequestContext::getMain()->getConfig()->get( MainConfigNames::Server );
        $expectedDomain = parse_url( $server, PHP_URL_HOST );
        if ( !$expectedDomain ) {
            throw new LogicException( 'Cannot parse $wgServer' );
        }
        if ( strcasecmp( $expectedDomain, $domain ) === 0 ) {
            return;
        }

        // TODO: This should really go away! It's only acceptable because
        //       this entire method is going to be removed once we no longer
        //       need the parsoid extension endpoints with the {domain} parameter.
        if ( $this->extensionRegistry->isLoaded( 'MobileFrontend' ) ) {
            // @phan-suppress-next-line PhanUndeclaredClassMethod
            $mobileServer = MobileContext::singleton()->getMobileUrl( $server );
            $expectedMobileDomain = parse_url( $mobileServer, PHP_URL_HOST );
            if ( $expectedMobileDomain && strcasecmp( $expectedMobileDomain, $domain ) === 0 ) {
                return;
            }
        }

        $msg = new DataMessageValue(
            'mwparsoid-invalid-domain',
            [],
            'invalid-domain',
            [ 'expected' => $expectedDomain, 'actual' => $domain, ]
        );

        throw new LocalizedHttpException( $msg, 400, [
            'error' => 'parameter-validation-failed',
            'name' => 'domain',
            'value' => $domain,
            'failureCode' => $msg->getCode(),
            'failureData' => $msg->getData(),
        ] );
    }

    /**
     * Get the parsed body by content-type
     *
     * @return array
     */
    protected function getParsedBody(): array {
        $request = $this->getRequest();
        [ $contentType ] = explode( ';', $request->getHeader( 'Content-Type' )[0] ?? '', 2 );
        switch ( $contentType ) {
            case 'application/x-www-form-urlencoded':
            case 'multipart/form-data':
                return $request->getPostParams();
            case 'application/json':
                $json = json_decode( $request->getBody()->getContents(), true );
                if ( !is_array( $json ) ) {
                    throw new LocalizedHttpException(
                        new MessageValue( "rest-json-body-parse-error", [ 'not a valid JSON object' ] ), 400 );
                }
                return $json;
            default:
                throw new LocalizedHttpException(
                    new MessageValue( "rest-unsupported-content-type", [ $contentType ?? '(null)' ] ),
                    415
                );
        }
    }

    /**
     * Rough equivalent of req.local from Parsoid-JS.
     * FIXME most of these should be replaced with more native ways of handling the request.
     * @return array
     */
    protected function &getRequestAttributes(): array {
        if ( $this->requestAttributes ) {
            return $this->requestAttributes;
        }

        $request = $this->getRequest();
        $body = ( $request->getMethod() === 'POST' ) ? $this->getParsedBody() : [];
        $opts = array_merge( $body, array_intersect_key( $request->getPathParams(),
            [ 'from' => true, 'format' => true ] ) );
        '@phan-var array<string,array|bool|string> $opts'; // @var array<string,array|bool|string> $opts
        $contentLanguage = $request->getHeaderLine( 'Content-Language' ) ?: null;
        if ( $contentLanguage ) {
            $contentLanguage = LanguageCode::normalizeNonstandardCodeAndWarn(
                $contentLanguage
            );
        }
        $attribs = [
            'pageName' => $request->getPathParam( 'title' ) ?? '',
            'oldid' => $request->getPathParam( 'revision' ),
            // "body_only" flag to return just the body (instead of the entire HTML doc)
            // We would like to deprecate use of this flag: T181657
            'body_only' => $request->getQueryParams()['body_only'] ?? $body['body_only'] ?? null,
            'errorEnc' => ParsoidFormatHelper::ERROR_ENCODING[$opts['format']] ?? 'plain',
            'iwp' => WikiMap::getCurrentWikiId(), // PORT-FIXME verify
            'offsetType' => $body['offsetType']
                ?? $request->getQueryParams()['offsetType']
                // Lint requests should return UCS2 offsets by default
                ?? ( $opts['format'] === ParsoidFormatHelper::FORMAT_LINT ? 'ucs2' : 'byte' ),
            'pagelanguage' => $contentLanguage,
        ];

        // For use in getHtmlOutputRendererHelper
        $opts['stash'] = $request->getQueryParams()['stash'] ?? false;

        if ( $request->getMethod() === 'POST' ) {
            if ( isset( $opts['original']['revid'] ) ) {
                $attribs['oldid'] = $opts['original']['revid'];
            }
            if ( isset( $opts['original']['title'] ) ) {
                $attribs['pageName'] = $opts['original']['title'];
            }
        }
        if ( $attribs['oldid'] !== null ) {
            if ( $attribs['oldid'] === '' ) {
                $attribs['oldid'] = null;
            } else {
                $attribs['oldid'] = (int)$attribs['oldid'];
            }
        }

        // For use in getHtmlOutputRendererHelper
        $opts['accept-language'] = $request->getHeaderLine( 'Accept-Language' ) ?: null;

        $acceptLanguage = null;
        if ( $opts['accept-language'] !== null ) {
            $acceptLanguage = LanguageCode::normalizeNonstandardCodeAndWarn(
                $opts['accept-language']
            );
        }

        // Init pageName if oldid is provided and is a valid revision
        if ( ( $attribs['pageName'] === '' ) && $attribs['oldid'] ) {
            $rev = $this->revisionLookup->getRevisionById( $attribs['oldid'] );
            if ( $rev ) {
                $attribs['pageName'] = $rev->getPage()->getDBkey();
            }
        }

        $attribs['envOptions'] = [
            // We use `prefix` but ought to use `domain` (T206764)
            'prefix' => $attribs['iwp'],
            // For the legacy "domain" path parameter used by the endpoints exposed
            // by the parsoid extension. Will be null for core endpoints.
            'domain' => $request->getPathParam( 'domain' ),
            'pageName' => $attribs['pageName'],
            'cookie' => $request->getHeaderLine( 'Cookie' ),
            'reqId' => $request->getHeaderLine( 'X-Request-Id' ),
            'userAgent' => $request->getHeaderLine( 'User-Agent' ),
            'htmlVariantLanguage' => $acceptLanguage,
            // Semver::satisfies checks below expect a valid outputContentVersion value.
            // Better to set it here instead of adding the default value at every check.
            'outputContentVersion' => Parsoid::defaultHTMLVersion(),
        ];

        # Convert language codes in $opts['updates']['variant'] if present
        $sourceVariant = $opts['updates']['variant']['source'] ?? null;
        if ( $sourceVariant ) {
            $sourceVariant = LanguageCode::normalizeNonstandardCodeAndWarn(
                $sourceVariant
            );
            $opts['updates']['variant']['source'] = $sourceVariant;
        }
        $targetVariant = $opts['updates']['variant']['target'] ?? null;
        if ( $targetVariant ) {
            $targetVariant = LanguageCode::normalizeNonstandardCodeAndWarn(
                $targetVariant
            );
            $opts['updates']['variant']['target'] = $targetVariant;
        }
        if ( isset( $opts['wikitext']['headers']['content-language'] ) ) {
            $contentLanguage = $opts['wikitext']['headers']['content-language'];
            $contentLanguage = LanguageCode::normalizeNonstandardCodeAndWarn(
                $contentLanguage
            );
            $opts['wikitext']['headers']['content-language'] = $contentLanguage;
        }
        if ( isset( $opts['original']['wikitext']['headers']['content-language'] ) ) {
            $contentLanguage = $opts['original']['wikitext']['headers']['content-language'];
            $contentLanguage = LanguageCode::normalizeNonstandardCodeAndWarn(
                $contentLanguage
            );
            $opts['original']['wikitext']['headers']['content-language'] = $contentLanguage;
        }

        $attribs['opts'] = $opts;

        // TODO: Remove assertDomainIsCorrect() once we no longer need to support the {domain}
        //       parameter for the endpoints exposed by the parsoid extension.
        if ( $attribs['envOptions']['domain'] !== null ) {
            $this->assertDomainIsCorrect( $attribs['envOptions']['domain'] );
        }

        $this->requestAttributes = $attribs;
        return $this->requestAttributes;
    }

    /**
     * @param array $attribs
     * @param ?string $source
     * @param PageIdentity $page
     * @param ?int $revId
     *
     * @return HtmlOutputRendererHelper
     */
    private function getHtmlOutputRendererHelper(
        array $attribs,
        ?string $source,
        PageIdentity $page,
        ?int $revId
    ): HtmlOutputRendererHelper {
        $services = MediaWikiServices::getInstance();

        // Request lenient rev handling
        $lenientRevHandling = true;
        $helper = $services->getPageRestHelperFactory()->newHtmlOutputRendererHelper( $lenientRevHandling );

        $authority = $this->getAuthority();

        $params = [];
        $helper->init( $page, $params, $authority, $revId );

        // XXX: should default to the page's content model?
        $model = $attribs['opts']['contentmodel']
            ?? ( $attribs['envOptions']['contentmodel'] ?? CONTENT_MODEL_WIKITEXT );

        if ( $source !== null ) {
            $helper->setContentSource( $source, $model );
        }

        if ( isset( $attribs['opts']['stash'] ) ) {
            $helper->setStashingEnabled( $attribs['opts']['stash'] );
        }

        if ( isset( $attribs['envOptions']['outputContentVersion'] ) ) {
            $helper->setOutputProfileVersion( $attribs['envOptions']['outputContentVersion'] );
        }

        if ( isset( $attribs['pagelanguage'] ) ) {
            $helper->setPageLanguage( $attribs['pagelanguage'] );
        }

        if ( isset( $attribs['opts']['accept-language'] ) ) {
            $helper->setVariantConversionLanguage( $attribs['opts']['accept-language'] );
        }

        return $helper;
    }

    /**
     * @param array $attribs
     * @param string $html
     * @param PageIdentity $page
     *
     * @return HtmlInputTransformHelper
     */
    protected function getHtmlInputTransformHelper(
        array $attribs,
        string $html,
        PageIdentity $page
    ): HtmlInputTransformHelper {
        $services = MediaWikiServices::getInstance();

        $helper = $services->getPageRestHelperFactory()->newHtmlInputTransformHelper(
            $attribs['envOptions']
        );

        $metrics = $this->siteConfig->metrics();

        if ( $metrics ) {
            $helper->setMetrics( $metrics );
        }

        $parameters = $attribs['opts'] + $attribs;
        $body = $attribs['opts'];

        $body['html'] = $html;

        $helper->init( $page, $body, $parameters );

        return $helper;
    }

    /**
     * FIXME: Combine with ParsoidFormatHelper::parseContentTypeHeader
     */
    private const NEW_SPEC =
        '#^https://www.mediawiki.org/wiki/Specs/(HTML|pagebundle)/(\d+\.\d+\.\d+)$#D';

    /**
     * This method checks if we support the requested content formats
     * As a side-effect, it updates $attribs to set outputContentVersion
     * that Parsoid should generate based on request headers.
     *
     * @param array &$attribs Request attributes from getRequestAttributes()
     * @return bool
     */
    protected function acceptable( array &$attribs ): bool {
        $request = $this->getRequest();
        $format = $attribs['opts']['format'];

        if ( $format === ParsoidFormatHelper::FORMAT_WIKITEXT ) {
            return true;
        }

        $acceptHeader = $request->getHeader( 'Accept' );
        if ( !$acceptHeader ) {
            return true;
        }

        $parser = new HttpAcceptParser();
        $acceptableTypes = $parser->parseAccept( $acceptHeader[0] );  // FIXME: Multiple headers valid?
        if ( !$acceptableTypes ) {
            return true;
        }

        // `acceptableTypes` is already sorted by quality.
        foreach ( $acceptableTypes as $t ) {
            $type = "{$t['type']}/{$t['subtype']}";
            $profile = $t['params']['profile'] ?? null;
            if (
                ( $format === ParsoidFormatHelper::FORMAT_HTML && $type === 'text/html' ) ||
                ( $format === ParsoidFormatHelper::FORMAT_PAGEBUNDLE && $type === 'application/json' )
            ) {
                if ( $profile ) {
                    preg_match( self::NEW_SPEC, $profile, $matches );
                    if ( $matches && strtolower( $matches[1] ) === $format ) {
                        $contentVersion = Parsoid::resolveContentVersion( $matches[2] );
                        if ( $contentVersion ) {
                            // $attribs mutated here!
                            $attribs['envOptions']['outputContentVersion'] = $contentVersion;
                            return true;
                        } else {
                            continue;
                        }
                    } else {
                        continue;
                    }
                } else {
                    return true;
                }
            } elseif (
                ( $type === '*/*' ) ||
                ( $format === ParsoidFormatHelper::FORMAT_HTML && $type === 'text/*' )
            ) {
                return true;
            }
        }

        return false;
    }

    /**
     * Try to create a PageConfig object. If we get an exception (because content
     * may be missing or inaccessible), throw an appropriate HTTP response object
     * for callers to handle.
     *
     * @param array $attribs
     * @param ?string $wikitextOverride
     *   Custom wikitext to use instead of the real content of the page.
     * @param bool $html2WtMode
     * @return PageConfig
     * @throws HttpException
     */
    protected function tryToCreatePageConfig(
        array $attribs, ?string $wikitextOverride = null, bool $html2WtMode = false
    ): PageConfig {
        $revId = $attribs['oldid'];
        $pagelanguageOverride = $attribs['pagelanguage'];
        $title = $attribs['pageName'];

        $title = ( $title !== '' ) ? Title::newFromText( $title ) : Title::newMainPage();
        if ( !$title ) {
            // TODO use proper validation
            throw new LogicException( 'Title not found!' );
        }
        $user = RequestContext::getMain()->getUser();

        if ( $wikitextOverride === null ) {
            $revisionRecord = null;
        } else {
            // Create a mutable revision record point to the same revision
            // and set to the desired wikitext.
            $revisionRecord = new MutableRevisionRecord( $title );
            // Don't set id to $revId if we have $wikitextOverride
            // A revision corresponds to specific wikitext, which $wikitextOverride
            // might not be.
            $revisionRecord->setId( 0 );
            $revisionRecord->setSlot(
                SlotRecord::newUnsaved(
                    SlotRecord::MAIN,
                    new WikitextContent( $wikitextOverride )
                )
            );
        }

        $hasOldId = ( $revId !== null );
        $ensureAccessibleContent = !$html2WtMode || $hasOldId;

        try {
            // Note: Parsoid by design isn't supposed to use the user
            // context right now, and all user state is expected to be
            // introduced as a post-parse transform.  So although we pass a
            // User here, it only currently affects the output in obscure
            // corner cases; see PageConfigFactory::create() for more.
            // @phan-suppress-next-line PhanUndeclaredMethod method defined in subtype
            $pageConfig = $this->pageConfigFactory->create(
                $title, $user, $revisionRecord ?? $revId, null, $pagelanguageOverride,
                $ensureAccessibleContent
            );
        } catch ( SuppressedDataException $e ) {
            throw new LocalizedHttpException(
                new MessageValue( "rest-permission-denied-revision", [ $e->getMessage() ] ), 403
            );
        } catch ( RevisionAccessException $e ) {
            throw new LocalizedHttpException(
                new MessageValue( "rest-specified-revision-unavailable", [ $e->getMessage() ] ), 404
            );
        }

        // All good!
        return $pageConfig;
    }

    /**
     * Try to create a PageIdentity object.
     * If no page is specified in the request, this will return the wiki's main page.
     * If an invalid page is requested, this throws an appropriate HTTPException.
     *
     * @param array $attribs
     * @return PageIdentity
     * @throws HttpException
     */
    protected function tryToCreatePageIdentity( array $attribs ): PageIdentity {
        if ( $attribs['pageName'] === '' ) {
            return Title::newMainPage();
        }

        // XXX: Should be injected, but the Parsoid extension relies on the
        //      constructor signature. Also, ParsoidHandler should go away soon anyway.
        $pageStore = MediaWikiServices::getInstance()->getPageStore();

        $page = $pageStore->getPageByText( $attribs['pageName'] );

        if ( !$page ) {
            throw new LocalizedHttpException(
                new MessageValue( "rest-invalid-title", [ 'pageName' ] ), 400
            );
        }

        return $page;
    }

    /**
     * Get the path for the transform endpoint. May be overwritten to override the path.
     *
     * This is done in the parsoid extension, for backwards compatibility
     * with the old endpoint URLs.
     *
     * @stable to override
     *
     * @param string $format The format the endpoint is expected to return.
     *
     * @return string
     */
    protected function getTransformEndpoint( string $format = ParsoidFormatHelper::FORMAT_HTML ): string {
        return '/coredev/v0/transform/{from}/to/{format}/{title}/{revision}';
    }

    /**
     * Get the path for the page content endpoint. May be overwritten to override the path.
     *
     * This is done in the parsoid extension, for backwards compatibility
     * with the old endpoint URLs.
     *
     * @stable to override
     *
     * @param string $format The format the endpoint is expected to return.
     *
     * @return string
     */
    protected function getPageContentEndpoint( string $format = ParsoidFormatHelper::FORMAT_HTML ): string {
        if ( $format !== ParsoidFormatHelper::FORMAT_HTML ) {
            throw new InvalidArgumentException( 'Unsupported page content format: ' . $format );
        }
        return '/v1/page/{title}/html';
    }

    /**
     * Get the path for the page content endpoint. May be overwritten to override the path.
     *
     * This is done in the parsoid extension, for backwards compatibility
     * with the old endpoint URLs.
     *
     * @stable to override
     *
     * @param string $format The format the endpoint is expected to return.
     *
     * @return string
     */
    protected function getRevisionContentEndpoint( string $format = ParsoidFormatHelper::FORMAT_HTML ): string {
        if ( $format !== ParsoidFormatHelper::FORMAT_HTML ) {
            throw new InvalidArgumentException( 'Unsupported revision content format: ' . $format );
        }
        return '/v1/revision/{revision}/html';
    }

    public function wtLint(
        PageConfig $pageConfig, array $attribs, ?array $linterOverrides = []
    ) {
        $envOptions = $attribs['envOptions'] + [
            'linterOverrides' => $linterOverrides,
        ];
        try {
            $parsoid = $this->newParsoid();
            return $parsoid->wikitext2lint( $pageConfig, $envOptions );
        } catch ( ClientError $e ) {
            throw new LocalizedHttpException( new MessageValue( "rest-parsoid-error", [ $e->getMessage() ] ), 400 );
        } catch ( ResourceLimitExceededException $e ) {
            throw new LocalizedHttpException(
                new MessageValue( "rest-parsoid-resource-exceeded", [ $e->getMessage() ] ), 413
            );
        }
    }

    /**
     * Wikitext -> HTML helper.
     * Spec'd in https://phabricator.wikimedia.org/T75955 and the API tests.
     *
     * @param PageConfig $pageConfig
     * @param array $attribs Request attributes from getRequestAttributes()
     * @param ?string $wikitext Wikitext to transform (or null to use the
     *   page specified in the request attributes).
     *
     * @return Response
     */
    protected function wt2html(
        PageConfig $pageConfig, array $attribs, ?string $wikitext = null
    ) {
        $request = $this->getRequest();
        $opts = $attribs['opts'];
        $format = $opts['format'];
        $oldid = $attribs['oldid'];
        $stash = $opts['stash'] ?? false;

        if ( $format === ParsoidFormatHelper::FORMAT_LINT ) {
            $linterOverrides = [];
            if ( $this->extensionRegistry->isLoaded( 'Linter' ) ) { // T360809
                $disabled = [];
                $services = MediaWikiServices::getInstance();
                $linterCategories = $services->getMainConfig()->get( 'LinterCategories' );
                foreach ( $linterCategories as $name => $cat ) {
                    if ( $cat['priority'] === 'none' ) {
                        $disabled[] = $name;
                    }
                }
                $linterOverrides['disabled'] = $disabled;
            }
            $lints = $this->wtLint( $pageConfig, $attribs, $linterOverrides );
            $response = $this->getResponseFactory()->createJson( $lints );
            return $response;
        }

        // TODO: This method should take a PageIdentity + revId,
        //       to reduce the usage of PageConfig in MW core.
        $helper = $this->getHtmlOutputRendererHelper(
            $attribs,
            $wikitext,
            $this->pageConfigToPageIdentity( $pageConfig ),
            // Id will be 0 if we have $wikitext but that isn't valid
            // to call $helper->setRevision with.  In any case, the revision
            // will be reset when $helper->setContent is called with $wikitext.
            // Ideally, the revision would be pass through here instead of
            // the id and wikitext.
            $pageConfig->getRevisionId() ?: null
        );

        $needsPageBundle = ( $format === ParsoidFormatHelper::FORMAT_PAGEBUNDLE );

        if ( $attribs['body_only'] ) {
            $helper->setFlavor( 'fragment' );
        } elseif ( !$needsPageBundle ) {
            // Inline data-parsoid. This will happen when no special params are set.
            $helper->setFlavor( 'edit' );
        }

        if ( $wikitext === null && $oldid !== null ) {
            $mstr = 'pageWithOldid';
        } else {
            $mstr = 'wt';
        }

        $parseTiming = Timing::start();

        if ( $needsPageBundle ) {
            $pb = $helper->getPageBundle();

            // Handle custom offset requests as a pb2pb transform
            if ( $attribs['offsetType'] !== 'byte' ) {
                $parsoid = $this->newParsoid();
                $pb = $parsoid->pb2pb(
                    $pageConfig,
                    'convertoffsets',
                    $pb,
                    [
                        'inputOffsetType' => 'byte',
                        'outputOffsetType' => $attribs['offsetType']
                    ]
                );
            }

            $response = $this->getResponseFactory()->createJson( $pb->responseData() );
            $helper->putHeaders( $response, false );

            ParsoidFormatHelper::setContentType(
                $response,
                ParsoidFormatHelper::FORMAT_PAGEBUNDLE,
                $pb->version
            );
        } else {
            $out = $helper->getHtml();

            // TODO: offsetType conversion isn't supported right now for non-pagebundle endpoints
            // Once the OutputTransform framework lands, we might revisit this.

            $response = $this->getResponseFactory()->create();
            $response->getBody()->write( $out->getRawText() );

            $helper->putHeaders( $response, true );

            // Emit an ETag only if stashing is enabled. It's not reliably useful otherwise.
            if ( $stash ) {
                $eTag = $helper->getETag();
                if ( $eTag ) {
                    $response->setHeader( 'ETag', $eTag );
                }
            }
        }

        // XXX: For pagebundle requests, this can be somewhat inflated
        // because of pagebundle json-encoding overheads
        $outSize = $response->getBody()->getSize();
        $parseTime = $parseTiming->end();

        // Ignore slow parse metrics for non-oldid parses
        if ( $mstr === 'pageWithOldid' ) {
            if ( $parseTime > 3000 ) {
                LoggerFactory::getInstance( 'slow-parsoid' )
                    ->info( 'Parsing {title} was slow, took {time} seconds', [
                        'time' => number_format( $parseTime / 1000, 2 ),
                        'title' => Title::newFromLinkTarget( $pageConfig->getLinkTarget() )->getPrefixedText(),
                    ] );
            }

            if ( $parseTime > 10 && $outSize > 100 ) {
                // * Don't bother with this metric for really small parse times
                //   p99 for initialization time is ~7ms according to grafana.
                //   So, 10ms ensures that startup overheads don't skew the metrics
                // * For body_only=false requests, <head> section isn't generated
                //   and if the output is small, per-request overheads can skew
                //   the timePerKB metrics.

                // NOTE: This is slightly misleading since there are fixed costs
                // for generating output like the <head> section and should be factored in,
                // but this is good enough for now as a useful first degree of approxmation.
                $timePerKB = $parseTime * 1024 / $outSize;
                if ( $timePerKB > 500 ) {
                    // At 100ms/KB, even a 100KB page which isn't that large will take 10s.
                    // So, we probably want to shoot for a threshold under 100ms.
                    // But, let's start with 500ms+ outliers first and see what we uncover.
                    LoggerFactory::getInstance( 'slow-parsoid' )
                        ->info( 'Parsing {title} was slow, timePerKB took {timePerKB} ms, total: {time} seconds', [
                            'time' => number_format( $parseTime / 1000, 2 ),
                            'timePerKB' => number_format( $timePerKB, 1 ),
                            'title' => Title::newFromLinkTarget( $pageConfig->getLinkTarget() )->getPrefixedText(),
                        ] );
                }
            }
        }

        if ( $wikitext !== null ) {
            // Don't cache requests when wt is set in case somebody uses
            // GET for wikitext parsing
            // XXX: can we just refuse to do wikitext parsing in a GET request?
            $response->setHeader( 'Cache-Control', 'private,no-cache,s-maxage=0' );
        } elseif ( $oldid !== null ) {
            // XXX: can this go away? Parsoid's PageContent class doesn't expose supressed revision content.
            if ( $request->getHeaderLine( 'Cookie' ) ||
                $request->getHeaderLine( 'Authorization' ) ) {
                // Don't cache requests with a session.
                $response->setHeader( 'Cache-Control', 'private,no-cache,s-maxage=0' );
            }
        }
        return $response;
    }

    protected function newParsoid(): Parsoid {
        return new Parsoid( $this->siteConfig, $this->dataAccess );
    }

    protected function parseHTML( string $html, bool $validateXMLNames = false ): Document {
        return DOMUtils::parseHTML( $html, $validateXMLNames );
    }

    /**
     * @param PageConfig|PageIdentity $page
     * @param array $attribs Attributes gotten from requests
     * @param string $html Original HTML
     *
     * @return Response
     * @throws HttpException
     */
    protected function html2wt(
        $page, array $attribs, string $html
    ) {
        if ( $page instanceof PageConfig ) {
            // TODO: Deprecate passing a PageConfig.
            //       Ideally, callers would use HtmlToContentTransform directly.
            $page = Title::newFromLinkTarget( $page->getLinkTarget() );
        }

        try {
            $transform = $this->getHtmlInputTransformHelper( $attribs, $html, $page );

            $response = $this->getResponseFactory()->create();
            $transform->putContent( $response );

            return $response;
        } catch ( ClientError $e ) {
            throw new LocalizedHttpException( new MessageValue( "rest-parsoid-error", [ $e->getMessage() ] ), 400 );
        }
    }

    /**
     * Pagebundle -> pagebundle helper.
     *
     * @param array<string,array|string> $attribs
     * @return Response
     * @throws HttpException
     */
    protected function pb2pb( array $attribs ) {
        $opts = $attribs['opts'];

        $revision = $opts['previous'] ?? $opts['original'] ?? null;
        if ( !isset( $revision['html'] ) ) {
            throw new LocalizedHttpException( new MessageValue( "rest-missing-revision-html" ), 400 );
        }

        $vOriginal = ParsoidFormatHelper::parseContentTypeHeader(
            $revision['html']['headers']['content-type'] ?? '' );
        if ( $vOriginal === null ) {
            throw new LocalizedHttpException( new MessageValue( "rest-missing-revision-html-content-type" ), 400 );
        }
        $attribs['envOptions']['inputContentVersion'] = $vOriginal;
        '@phan-var array<string,array|string> $attribs'; // @var array<string,array|string> $attribs

        $this->metrics->increment(
            'pb2pb.original.version.' . $attribs['envOptions']['inputContentVersion']
        );

        if ( !empty( $opts['updates'] ) ) {
            // FIXME: Handling missing revisions uniformly for all update types
            // is not probably the right thing to do but probably okay for now.
            // This might need revisiting as we add newer types.
            $pageConfig = $this->tryToCreatePageConfig( $attribs, null, true );
            // If we're only updating parts of the original version, it should
            // satisfy the requested content version, since we'll be returning
            // that same one.
            // FIXME: Since this endpoint applies the acceptable middleware,
            // `getOutputContentVersion` is not what's been passed in, but what
            // can be produced.  Maybe that should be selectively applied so
            // that we can update older versions where it makes sense?
            // Uncommenting below implies that we can only update the latest
            // version, since carrot semantics is applied in both directions.
            // if ( !Semver::satisfies(
            //     $attribs['envOptions']['inputContentVersion'],
            //     "^{$attribs['envOptions']['outputContentVersion']}"
            // ) ) {
            //  throw new HttpException(
            //         'We do not know how to do this conversion.', 415
            //     );
            // }
            if ( !empty( $opts['updates']['redlinks'] ) ) {
                // Q(arlolra): Should redlinks be more complex than a bool?
                // See gwicke's proposal at T114413#2240381
                return $this->updateRedLinks( $pageConfig, $attribs, $revision );
            } elseif ( isset( $opts['updates']['variant'] ) ) {
                return $this->languageConversion( $pageConfig, $attribs, $revision );
            } else {
                throw new LocalizedHttpException( new MessageValue( "rest-unknown-parsoid-transformation" ), 400 );
            }
        }

        // TODO(arlolra): subbu has some sage advice in T114413#2365456 that
        // we should probably be more explicit about the pb2pb conversion
        // requested rather than this increasingly complex fallback logic.
        $downgrade = Parsoid::findDowngrade(
            $attribs['envOptions']['inputContentVersion'],
            $attribs['envOptions']['outputContentVersion']
        );
        if ( $downgrade ) {
            $pb = new PageBundle(
                $revision['html']['body'],
                $revision['data-parsoid']['body'] ?? null,
                $revision['data-mw']['body'] ?? null
            );
            $this->validatePb( $pb, $attribs['envOptions']['inputContentVersion'] );
            Parsoid::downgrade( $downgrade, $pb );

            if ( !empty( $attribs['body_only'] ) ) {
                $doc = $this->parseHTML( $pb->html );
                $body = DOMCompat::getBody( $doc );
                $pb->html = ContentUtils::toXML( $body, [ 'innerXML' => true ] );
            }

            $response = $this->getResponseFactory()->createJson( $pb->responseData() );
            ParsoidFormatHelper::setContentType(
                $response, ParsoidFormatHelper::FORMAT_PAGEBUNDLE, $pb->version
            );
            return $response;
            // Ensure we only reuse from semantically similar content versions.
        } elseif ( Semver::satisfies( $attribs['envOptions']['outputContentVersion'],
            '^' . $attribs['envOptions']['inputContentVersion'] ) ) {
            $pageConfig = $this->tryToCreatePageConfig( $attribs );
            return $this->wt2html( $pageConfig, $attribs );
        } else {
            throw new LocalizedHttpException( new MessageValue( "rest-unsupported-profile-conversion" ), 415 );
        }
    }

    /**
     * Update red links on a document.
     *
     * @param PageConfig $pageConfig
     * @param array $attribs
     * @param array $revision
     * @return Response
     */
    protected function updateRedLinks(
        PageConfig $pageConfig, array $attribs, array $revision
    ) {
        $parsoid = $this->newParsoid();

        $pb = new PageBundle(
            $revision['html']['body'],
            $revision['data-parsoid']['body'] ?? null,
            $revision['data-mw']['body'] ?? null,
            $attribs['envOptions']['inputContentVersion'],
            $revision['html']['headers'] ?? null,
            $revision['contentmodel'] ?? null
        );

        $out = $parsoid->pb2pb( $pageConfig, 'redlinks', $pb, [] );

        $this->validatePb( $out, $attribs['envOptions']['inputContentVersion'] );

        $response = $this->getResponseFactory()->createJson( $out->responseData() );
        ParsoidFormatHelper::setContentType(
            $response, ParsoidFormatHelper::FORMAT_PAGEBUNDLE, $out->version
        );
        return $response;
    }

    /**
     * Do variant conversion on a document.
     *
     * @param PageConfig $pageConfig
     * @param array $attribs
     * @param array $revision
     * @return Response
     * @throws HttpException
     */
    protected function languageConversion(
        PageConfig $pageConfig, array $attribs, array $revision
    ) {
        $opts = $attribs['opts'];
        $target = $opts['updates']['variant']['target'] ??
            $attribs['envOptions']['htmlVariantLanguage'];
        $source = $opts['updates']['variant']['source'] ?? null;

        if ( !$target ) {
            throw new LocalizedHttpException( new MessageValue( "rest-target-variant-required" ), 400 );
        }

        $pageIdentity = $this->tryToCreatePageIdentity( $attribs );

        $pb = new PageBundle(
            $revision['html']['body'],
            $revision['data-parsoid']['body'] ?? null,
            $revision['data-mw']['body'] ?? null,
            $attribs['envOptions']['inputContentVersion'],
            $revision['html']['headers'] ?? null,
            $revision['contentmodel'] ?? null
        );

        // XXX: DI should inject HtmlTransformFactory
        $languageVariantConverter = MediaWikiServices::getInstance()
            ->getHtmlTransformFactory()
            ->getLanguageVariantConverter( $pageIdentity );
        $languageVariantConverter->setPageConfig( $pageConfig );
        $httpContentLanguage = $attribs['pagelanguage' ] ?? null;
        if ( $httpContentLanguage ) {
            $languageVariantConverter->setPageLanguageOverride( $httpContentLanguage );
        }

        try {
            $out = $languageVariantConverter->convertPageBundleVariant( $pb, $target, $source );
        } catch ( InvalidArgumentException $e ) {
            throw new LocalizedHttpException(
                new MessageValue( "rest-unsupported-language-conversion", [ $source ?? '(unspecified)', $target ] ),
                400,
                [ 'reason' => $e->getMessage() ]
            );
        }

        $response = $this->getResponseFactory()->createJson( $out->responseData() );
        ParsoidFormatHelper::setContentType(
            $response, ParsoidFormatHelper::FORMAT_PAGEBUNDLE, $out->version
        );
        return $response;
    }

    /** @inheritDoc */
    abstract public function execute(): Response;

    /**
     * Validate a PageBundle against the given contentVersion, and throw
     * an HttpException if it does not match.
     * @param PageBundle $pb
     * @param string $contentVersion
     * @throws HttpException
     */
    private function validatePb( PageBundle $pb, string $contentVersion ): void {
        $errorMessage = '';
        if ( !$pb->validate( $contentVersion, $errorMessage ) ) {
            throw new LocalizedHttpException(
                new MessageValue( "rest-page-bundle-validation-error", [ $errorMessage ] ),
                400
            );
        }
    }

    /**
     * @param PageConfig $page
     *
     * @return ProperPageIdentity
     * @throws HttpException
     */
    private function pageConfigToPageIdentity( PageConfig $page ): ProperPageIdentity {
        $services = MediaWikiServices::getInstance();

        $title = $page->getLinkTarget();
        try {
            $page = $services->getPageStore()->getPageForLink( $title );
        } catch ( MalformedTitleException | InvalidArgumentException $e ) {
            // Note that even some well-formed links are still invalid
            // parameters for getPageForLink(), e.g. interwiki links or special pages.
            throw new HttpException(
                "Bad title: $title", # uses LinkTarget::__toString()
                400
            );
        }

        return $page;
    }

}