wikimedia/mediawiki-extensions-Translate

View on GitHub
src/MessageProcessing/StringMatcher.php

Summary

Maintainability
A
35 mins
Test Coverage
<?php
declare( strict_types = 1 );

namespace MediaWiki\Extension\Translate\MessageProcessing;

use MediaWiki\Extension\Translate\MessageGroupConfiguration\MetaYamlSchemaExtender;
use MediaWiki\Title\Title;

/**
 * The versatile default implementation of StringMangler interface.
 * It supports exact matches and patterns with any-wildcard (*).
 * All matching strings are prefixed with the same prefix.
 *
 * @author Niklas Laxström
 * @license GPL-2.0-or-later
 */
class StringMatcher implements StringMangler, MetaYamlSchemaExtender {
    /** @var string Prefix for mangled message keys */
    protected string $sPrefix = '';
    /** @var string[] Exact message keys */
    protected array $aExact = [];
    /** @var int[] Patterns of type foo* */
    protected array $aPrefix = [];
    /** @var string[] Patterns that contain wildcard anywhere else than in the end */
    protected array $aRegex = [];

    public function __construct( string $prefix = '', array $patterns = [] ) {
        $this->sPrefix = $prefix;
        $this->init( $patterns );
    }

    /**
     * Preprocesses the patterns.
     *
     * They are split into exact keys, prefix matches and pattern matches to
     * speed up matching process.
     *
     * @param string[] $strings Key patterns.
     */
    protected function init( array $strings ): void {
        foreach ( $strings as $string ) {
            $pos = strpos( $string, '*' );
            if ( $pos === false ) {
                $this->aExact[] = $string;
            } elseif ( $pos + 1 === strlen( $string ) ) {
                $prefix = substr( $string, 0, -1 );
                $this->aPrefix[$prefix] = strlen( $prefix );
            } else {
                $string = str_replace( '\\*', '.+', preg_quote( $string, '/' ) );
                $this->aRegex[] = "/^$string$/";
            }
        }
    }

    protected static function getValidKeyChars(): string {
        static $valid = null;
        if ( $valid === null ) {
            $valid = strtr(
                Title::legalChars(),
                [
                    '=' => '', // equals sign, which is itself usef for escaping
                    '&' => '', // ampersand, for entities
                    '%' => '', // percent sign, which is used in URL encoding
                ]
            );
        }

        return $valid;
    }

    /** @inheritDoc */
    public function setConf( array $conf ): void {
        $this->sPrefix = $conf['prefix'];
        $this->init( $conf['patterns'] );
    }

    /** @inheritDoc */
    public function matches( string $key ): bool {
        if ( in_array( $key, $this->aExact ) ) {
            return true;
        }

        foreach ( $this->aPrefix as $prefix => $len ) {
            if ( strncmp( $key, $prefix, $len ) === 0 ) {
                return true;
            }
        }

        foreach ( $this->aRegex as $regex ) {
            if ( preg_match( $regex, $key ) ) {
                return true;
            }
        }

        return false;
    }

    /** @inheritDoc */
    public function mangle( string $key ): string {
        if ( $this->matches( $key ) ) {
            $key = $this->sPrefix . $key;
        }

        $escaper = static function ( $match ) {
            $esc = '';
            foreach ( str_split( $match[0] ) as $c ) {
                $esc .= '=' . sprintf( '%02X', ord( $c ) );
            }
            return $esc;
        };

        // Apply a "quoted-printable"-like escaping
        $valid = self::getValidKeyChars();
        $key = preg_replace_callback( "/[^$valid]/", $escaper, $key );
        // Additional limitations in MediaWiki, see MediaWikiTitleCodec::splitTitleString
        $key = preg_replace_callback( '/(~~~|^[ _]|[ _]$|[ _]{2,}|^:)/', $escaper, $key );
        // TODO: length check + truncation
        // TODO: forbid path travels

        return $key;
    }

    /** @inheritDoc */
    public function mangleList( array $list ): array {
        return array_map( [ $this, 'mangle' ], $list );
    }

    /** @inheritDoc */
    public function mangleArray( array $array ): array {
        $out = [];
        foreach ( $array as $key => $value ) {
            $out[$this->mangle( (string)$key )] = $value;
        }

        return $out;
    }

    /** @inheritDoc */
    public function unmangle( string $key ): string {
        // Unescape the "quoted-printable"-like escaping,
        // which is applied in mangle
        $unescapedString = preg_replace_callback(
            '/=([A-F0-9]{2})/',
            static function ( $match ) {
                return chr( hexdec( $match[1] ) );
            },
            $key
        );

        if ( strncmp( $unescapedString, $this->sPrefix, strlen( $this->sPrefix ) ) === 0 ) {
            $unmangled = substr( $unescapedString, strlen( $this->sPrefix ) );

            // Check if this string should be mangled / un-mangled to begin with
            if ( $this->matches( $unmangled ) ) {
                return $unmangled;
            }
        }
        return $unescapedString;
    }

    /** @inheritDoc */
    public function unmangleList( array $list ): array {
        foreach ( $list as $index => $key ) {
            $list[$index] = $this->unmangle( $key );
        }

        return $list;
    }

    /** @inheritDoc */
    public function unmangleArray( array $array ): array {
        $out = [];
        foreach ( $array as $key => $value ) {
            $out[$this->unmangle( $key )] = $value;
        }

        return $out;
    }

    /** @inheritDoc */
    public static function getExtraSchema(): array {
        $schema = [
            'root' => [
                '_type' => 'array',
                '_children' => [
                    'MANGLER' => [
                        '_type' => 'array',
                        '_children' => [
                            'prefix' => [
                                '_type' => 'text',
                                '_not_empty' => true,
                            ],
                            'patterns' => [
                                '_type' => 'array',
                                '_required' => true,
                                '_ignore_extra_keys' => true,
                                '_children' => [],
                            ],
                        ],
                    ],
                ],
            ],
        ];

        return $schema;
    }
}