src/MessageProcessing/StringMatcher.php
<?php
declare( strict_types = 1 );
namespace MediaWiki\Extension\Translate\MessageProcessing;
use MediaWiki\Extension\Translate\MessageGroupConfiguration\MetaYamlSchemaExtender;
use MediaWiki\Title\Title;
/**
* The versatile default implementation of StringMangler interface.
* It supports exact matches and patterns with any-wildcard (*).
* All matching strings are prefixed with the same prefix.
*
* @author Niklas Laxström
* @license GPL-2.0-or-later
*/
class StringMatcher implements StringMangler, MetaYamlSchemaExtender {
/** @var string Prefix for mangled message keys */
protected string $sPrefix = '';
/** @var string[] Exact message keys */
protected array $aExact = [];
/** @var int[] Patterns of type foo* */
protected array $aPrefix = [];
/** @var string[] Patterns that contain wildcard anywhere else than in the end */
protected array $aRegex = [];
public function __construct( string $prefix = '', array $patterns = [] ) {
$this->sPrefix = $prefix;
$this->init( $patterns );
}
/**
* Preprocesses the patterns.
*
* They are split into exact keys, prefix matches and pattern matches to
* speed up matching process.
*
* @param string[] $strings Key patterns.
*/
protected function init( array $strings ): void {
foreach ( $strings as $string ) {
$pos = strpos( $string, '*' );
if ( $pos === false ) {
$this->aExact[] = $string;
} elseif ( $pos + 1 === strlen( $string ) ) {
$prefix = substr( $string, 0, -1 );
$this->aPrefix[$prefix] = strlen( $prefix );
} else {
$string = str_replace( '\\*', '.+', preg_quote( $string, '/' ) );
$this->aRegex[] = "/^$string$/";
}
}
}
protected static function getValidKeyChars(): string {
static $valid = null;
if ( $valid === null ) {
$valid = strtr(
Title::legalChars(),
[
'=' => '', // equals sign, which is itself usef for escaping
'&' => '', // ampersand, for entities
'%' => '', // percent sign, which is used in URL encoding
]
);
}
return $valid;
}
/** @inheritDoc */
public function setConf( array $conf ): void {
$this->sPrefix = $conf['prefix'];
$this->init( $conf['patterns'] );
}
/** @inheritDoc */
public function matches( string $key ): bool {
if ( in_array( $key, $this->aExact ) ) {
return true;
}
foreach ( $this->aPrefix as $prefix => $len ) {
if ( strncmp( $key, $prefix, $len ) === 0 ) {
return true;
}
}
foreach ( $this->aRegex as $regex ) {
if ( preg_match( $regex, $key ) ) {
return true;
}
}
return false;
}
/** @inheritDoc */
public function mangle( string $key ): string {
if ( $this->matches( $key ) ) {
$key = $this->sPrefix . $key;
}
$escaper = static function ( $match ) {
$esc = '';
foreach ( str_split( $match[0] ) as $c ) {
$esc .= '=' . sprintf( '%02X', ord( $c ) );
}
return $esc;
};
// Apply a "quoted-printable"-like escaping
$valid = self::getValidKeyChars();
$key = preg_replace_callback( "/[^$valid]/", $escaper, $key );
// Additional limitations in MediaWiki, see MediaWikiTitleCodec::splitTitleString
$key = preg_replace_callback( '/(~~~|^[ _]|[ _]$|[ _]{2,}|^:)/', $escaper, $key );
// TODO: length check + truncation
// TODO: forbid path travels
return $key;
}
/** @inheritDoc */
public function mangleList( array $list ): array {
return array_map( [ $this, 'mangle' ], $list );
}
/** @inheritDoc */
public function mangleArray( array $array ): array {
$out = [];
foreach ( $array as $key => $value ) {
$out[$this->mangle( (string)$key )] = $value;
}
return $out;
}
/** @inheritDoc */
public function unmangle( string $key ): string {
// Unescape the "quoted-printable"-like escaping,
// which is applied in mangle
$unescapedString = preg_replace_callback(
'/=([A-F0-9]{2})/',
static function ( $match ) {
return chr( hexdec( $match[1] ) );
},
$key
);
if ( strncmp( $unescapedString, $this->sPrefix, strlen( $this->sPrefix ) ) === 0 ) {
$unmangled = substr( $unescapedString, strlen( $this->sPrefix ) );
// Check if this string should be mangled / un-mangled to begin with
if ( $this->matches( $unmangled ) ) {
return $unmangled;
}
}
return $unescapedString;
}
/** @inheritDoc */
public function unmangleList( array $list ): array {
foreach ( $list as $index => $key ) {
$list[$index] = $this->unmangle( $key );
}
return $list;
}
/** @inheritDoc */
public function unmangleArray( array $array ): array {
$out = [];
foreach ( $array as $key => $value ) {
$out[$this->unmangle( $key )] = $value;
}
return $out;
}
/** @inheritDoc */
public static function getExtraSchema(): array {
$schema = [
'root' => [
'_type' => 'array',
'_children' => [
'MANGLER' => [
'_type' => 'array',
'_children' => [
'prefix' => [
'_type' => 'text',
'_not_empty' => true,
],
'patterns' => [
'_type' => 'array',
'_required' => true,
'_ignore_extra_keys' => true,
'_children' => [],
],
],
],
],
],
];
return $schema;
}
}