src/Utilities/GettextPlural.php
<?php
/**
* @file
* @license GPL-2.0-or-later
*/
namespace MediaWiki\Extension\Translate\Utilities;
use InvalidArgumentException;
use MediaWiki\Extension\Translate\FileFormatSupport\GettextPluralException;
/** @since 2019.09 */
class GettextPlural {
private const PRE = '{{PLURAL:GETTEXT|';
private const POST = '}}';
/**
* Returns Gettext plural rule for given language.
*
* @param string $code Language tag in MediaWiki internal format.
* @return string Empty string if no plural rule found
*/
public static function getPluralRule( $code ) {
global $wgTranslateDocumentationLanguageCode;
if ( $code === $wgTranslateDocumentationLanguageCode ) {
return 'nplurals=1; plural=0;';
}
$rulefile = __DIR__ . '/../../data/plural-gettext.txt';
$rules = file_get_contents( $rulefile );
foreach ( explode( "\n", $rules ) as $line ) {
if ( trim( $line ) === '' ) {
continue;
}
[ $rulecode, $rule ] = explode( "\t", $line );
if ( $rulecode === $code ) {
return $rule;
}
}
return '';
}
/**
* Returns how many plural forms are expected by a given plural rule.
*
* @param string $rule Gettext style plural rule.
* @return int
* @throws InvalidArgumentException
*/
public static function getPluralCount( $rule ) {
$m = [];
$ok = preg_match( '/nplurals=([0-9]+).*;/', $rule, $m );
if ( !$ok ) {
throw new InvalidArgumentException( "Rule $rule is malformed" );
}
return (int)$m[ 1 ];
}
/**
* Quick way to check if the text contains plural syntax.
*
* @param string $text
* @return bool
*/
public static function hasPlural( $text ) {
return str_contains( $text, self::PRE );
}
/**
* Format plural forms as single string suitable for translation.
*
* @param string[] $forms
* @return string
*/
public static function flatten( array $forms ) {
return self::PRE . implode( '|', $forms ) . self::POST;
}
/**
* Format translation with plural forms as array of forms.
*
* Reverse of flatten. Do note that A may be != flatten( unflatten( A ) ) because
* translators can place part of the text outside the plural markup or use multiple
* instances of the markup.
*
* @param string $text
* @param int $expectedPluralCount
* @return string[]
*/
public static function unflatten( $text, $expectedPluralCount ) {
[ $template, $instanceMap ] = self::parsePluralForms( $text );
return self::expandTemplate( $template, $instanceMap, $expectedPluralCount );
}
/**
* Replaces problematic markup which can confuse our plural syntax markup with placeholders
*
* @param string $text
* @return array [ string $text, array $map ]
*/
private static function armour( $text ) {
// |/| is commonly used in KDE to support inflections. It needs to be escaped
// to avoid it messing up the plural markup.
$replacements = [
'|/|' => Utilities::getPlaceholder(),
];
// {0} is a common variable format
preg_match_all( '/\{\d+\}/', $text, $matches );
foreach ( $matches[0] as $m ) {
$replacements[$m] = Utilities::getPlaceholder();
}
$text = strtr( $text, $replacements );
$map = array_flip( $replacements );
return [ $text, $map ];
}
/**
* Reverse of armour.
*
* @param string $text
* @param array $map Map returned by armour.
* @return string
*/
private static function unarmour( $text, array $map ) {
return strtr( $text, $map );
}
/**
* Parses plural markup into a structure form.
*
* @param string $text
* @return array [ string $template, array $instanceMap ]
*/
public static function parsePluralForms( $text ) {
$m = [];
$pre = preg_quote( self::PRE, '/' );
$post = preg_quote( self::POST, '/' );
[ $armouredText, $armourMap ] = self::armour( $text );
$ok = preg_match_all( "/$pre(.*)$post/Us", $armouredText, $m );
if ( $ok === false ) {
throw new GettextPluralException( "Plural regular expression failed for text: $text" );
}
$template = $armouredText;
$instanceMap = [];
foreach ( $m[0] as $instanceIndex => $instanceText ) {
$ph = Utilities::getPlaceholder();
// Using preg_replace instead of str_replace because of the limit parameter
$pattern = '/' . preg_quote( $instanceText, '/' ) . '/';
$template = preg_replace( $pattern, $ph, $template, 1 );
$instanceForms = explode( '|', $m[ 1 ][ $instanceIndex ] );
foreach ( $instanceForms as $i => $v ) {
$instanceForms[ $i ] = self::unarmour( $v, $armourMap );
}
$instanceMap[$ph] = $instanceForms;
}
$template = self::unarmour( $template, $armourMap );
return [ $template, $instanceMap ];
}
/**
* Gives fully expanded forms given a template and parsed plural markup instances.
*
* @param string $template
* @param array $instanceMap
* @param int $expectedPluralCount
* @return string[]
*/
public static function expandTemplate( $template, array $instanceMap, $expectedPluralCount ) {
$formArray = [];
for ( $formIndex = 0; $formIndex < $expectedPluralCount; $formIndex++ ) {
// Start with the whole string
$form = $template;
// Loop over each plural markup instance and replace it with the plural form belonging
// to the current index
foreach ( $instanceMap as $ph => $instanceForms ) {
// For missing forms, fall back to empty text.
// Extra forms are excluded because $formIndex < $expectedPluralCount
$replacement = $instanceForms[ $formIndex ] ?? '';
$form = str_replace( $ph, $replacement, $form );
}
$formArray[ $formIndex ] = $form;
}
return $formArray;
}
}