client/includes/Hooks/ShortDescHandler.php from wikimedia/mediawiki-extensions-Wikibase

client/includes/Hooks/ShortDescHandler.php
Summary

Maintainability

25 mins
Test Coverage

Issues
<?php

namespace Wikibase\Client\Hooks;

use MediaWiki\Parser\Parser;
use Wikibase\Client\Store\DescriptionLookup;

/**
 * Handles the {{SHORTDESC:...}} parser function.
 *
 * @license GPL-2.0-or-later
 */
class ShortDescHandler {

    /**
     * Parser function callback
     *
     * @param Parser $parser
     * @param string $shortDesc Short description of the current page, as plain text.
     * @param string $controlArg An extra argument to control behavior (such as 'noreplace').
     *
     * @return string
     */
    public static function handle( Parser $parser, $shortDesc, $controlArg = '' ) {
        $handler = self::factory();
        $handler->doHandle( $parser, $shortDesc, $controlArg );
        return '';
    }

    /**
     * @return self
     */
    private static function factory() {
        return new self();
    }

    /**
     * Validates a short description.
     * Valid descriptions are not empty (contain something other than whitespace/punctuation).
     *
     * @param string $shortDesc Short description of the current page, as plain text.
     *
     * @return bool
     */
    public function isValid( $shortDesc ) {
        if ( $shortDesc === '' ) {
            // T326898#8654683 the empty string is a valid value, indicating
            // that the short description should be suppressed.
            return true;
        }
        return !preg_match( '/^[\s\p{P}\p{Z}]*$/u', $shortDesc );
    }

    /**
     * Sanitizes a short description by converting it into plaintext.
     *
     * Note that the sanitized description can still contain HTML (that was encoded as entities in
     * the original) as there is no reason why someone shouldn't mention HTML tags in a description.
     * That means the sanitized value is actually less safe for HTML inclusion than the original
     * one (can contain <script> tags)! It is clients' responsibility to handle it safely.
     *
     * @param string $shortDesc Short description of the current page, as HTML.
     *
     * @return string Plaintext of description.
     */
    public function sanitize( $shortDesc ) {
        // Remove accidental formatting - descriptions are plaintext.
        $shortDesc = strip_tags( $shortDesc );
        // Unescape - clients are not necessarily HTML-based and using HTML tags as part of
        // the descript (i.e. with <nowiki> or such) should be possible.
        $shortDesc = html_entity_decode( $shortDesc, ENT_QUOTES, 'utf-8' );
        // Remove newlines, tabs and other weird whitespace
        $shortDesc = preg_replace( '/\s+/', ' ', $shortDesc );
        // Get rid of leading/trailing space - no valid usecase for it, easy for it to go unnoticed
        // in HTML, and clients might display the description in an environment that does not
        // ignore spaces like HTML does.
        return trim( $shortDesc );
    }

    /**
     * Parser function
     *
     * @param Parser $parser
     * @param string $shortDesc Short description of the current page, as plain text.
     * @param string $controlArg An extra argument to control behavior (such as 'noreplace').
     *
     * @return void
     */
    public function doHandle( Parser $parser, $shortDesc, $controlArg ) {
        $noReplace = $this->parseNoReplace( $parser, $controlArg );
        $out = $parser->getOutput();

        if ( $out->getPageProperty( 'wikibase-shortdesc' ) !== null && $noReplace ) {
            return;
        }

        // T326898#8654683 the empty string is a valid value, indicating
        // that the short description should be suppressed, but distinguish
        // this from a short description containing junk that happens to be
        // removed during sanitization.  ie '<b></b>' is not valid, even
        // though it sanitizes to '' which is valid.
        if ( $shortDesc !== '' ) {
            $shortDesc = $this->sanitize( $shortDesc );
            if ( $shortDesc === '' || !$this->isValid( $shortDesc ) ) {
                return; // invalid
            }
        }
        $out->setUnsortedPageProperty( DescriptionLookup::LOCAL_PROPERTY_NAME, $shortDesc );
    }

    /**
     * @param Parser $parser
     * @param string $controlArg
     * @return bool
     */
    private function parseNoReplace( $parser, $controlArg ) {
        static $magicWord = null;
        if ( $magicWord === null ) {
            $magicWord = $parser->getMagicWordFactory()->get( 'shortdesc_noreplace' );
        }
        return $magicWord->matchStartToEnd( $controlArg );
    }

}