wikimedia/mediawiki-extensions-CirrusSearch

View on GitHub
includes/Profile/PhraseSuggesterProfileRepoWrapper.php

Summary

Maintainability
C
1 day
Test Coverage
<?php

namespace CirrusSearch\Profile;

use CirrusSearch\Util;
use MediaWiki\Config\Config;
use Wikimedia\ObjectCache\BagOStuff;

/**
 * Wrapper to augment the phrase suggester profile settings
 * with customization on-wiki using system messages.
 */
class PhraseSuggesterProfileRepoWrapper implements SearchProfileRepository {

    private const MAX_ERRORS_HARD_LIMIT = 2;
    private const MAX_TERM_FREQ_HARD_LIMIT = 0.6;
    private const PREFIX_LENGTH_HARD_LIMIT = 2;
    public const CIRRUSSEARCH_DIDYOUMEAN_SETTINGS = 'cirrussearch-didyoumean-settings';

    /**
     * @var string[]
     */
    private static $ALLOWED_MODE = [ 'missing', 'popular', 'always' ];

    /**
     * @var SearchProfileRepository
     */
    private $wrapped;

    /**
     * @var BagOStuff
     */
    private $bagOStuff;

    /**
     * @param SearchProfileRepository $wrapped
     * @param BagOStuff $bagOStuff
     */
    public function __construct( SearchProfileRepository $wrapped, BagOStuff $bagOStuff ) {
        $this->wrapped = $wrapped;
        $this->bagOStuff = $bagOStuff;
    }

    /**
     * @param string $type
     * @param string $name
     * @param string $phpFile
     * @param BagOStuff $cache
     * @return SearchProfileRepository
     */
    public static function fromFile( $type, $name, $phpFile, BagOStuff $cache ) {
        return new self( ArrayProfileRepository::fromFile( $type, $name, $phpFile ), $cache );
    }

    /**
     * @param string $type
     * @param string $name
     * @param string $configEntry
     * @param Config $config
     * @param BagOStuff $cache
     * @return PhraseSuggesterProfileRepoWrapper
     */
    public static function fromConfig( $type, $name, $configEntry, Config $config, BagOStuff $cache ) {
        return new self( new ConfigProfileRepository( $type, $name, $configEntry, $config ), $cache );
    }

    /**
     * The repository type
     * @return string
     */
    public function repositoryType() {
        return $this->wrapped->repositoryType();
    }

    /**
     * The repository name
     * @return string
     */
    public function repositoryName() {
        return $this->wrapped->repositoryName();
    }

    /**
     * Load a profile named $name
     * @param string $name
     * @return array|null the profile data or null if not found
     */
    public function getProfile( $name ) {
        $settings = $this->wrapped->getProfile( $name );
        if ( $settings === null ) {
            return null;
        }
        $lines = $this->bagOStuff->getWithSetCallback(
            $this->bagOStuff->makeKey( self::CIRRUSSEARCH_DIDYOUMEAN_SETTINGS ),
            600,
            static function () {
                $source = wfMessage( 'cirrussearch-didyoumean-settings' )->inContentLanguage();
                if ( $source->isDisabled() ) {
                    return [];
                }
                return Util::parseSettingsInMessage( $source->plain() );
            }
        );

        $laplaceAlpha = null;
        $stupidBackoffDiscount = null;
        foreach ( $lines as $line ) {
            $linePieces = explode( ':', $line, 2 );
            if ( count( $linePieces ) !== 2 ) {
                // Skip improperly formatted lines without a key:value
                continue;
            }
            [ $k, $v ] = $linePieces;

            switch ( $k ) {
                case 'max_errors':
                    if ( is_numeric( $v ) && $v >= 1 && $v <= self::MAX_ERRORS_HARD_LIMIT ) {
                        $settings['max_errors'] = floatval( $v );
                    }
                    break;
                case 'confidence':
                    if ( is_numeric( $v ) && $v >= 0 ) {
                        $settings['confidence'] = floatval( $v );
                    }
                    break;
                case 'max_term_freq':
                    if ( is_numeric( $v ) && $v >= 0 && $v <= self::MAX_TERM_FREQ_HARD_LIMIT ) {
                        $settings['max_term_freq'] = floatval( $v );
                    }
                    break;
                case 'min_doc_freq':
                    if ( is_numeric( $v ) && $v >= 0 && $v < 1 ) {
                        $settings['min_doc_freq'] = floatval( $v );
                    }
                    break;
                case 'prefix_length':
                    if ( is_numeric( $v ) && $v >= 0 && $v <= self::PREFIX_LENGTH_HARD_LIMIT ) {
                        $settings['prefix_length'] = intval( $v );
                    }
                    break;
                case 'suggest_mode':
                    if ( in_array( $v, self::$ALLOWED_MODE ) ) {
                        $settings['mode'] = $v;
                    }
                    break;
                case 'collate':
                    if ( $v === 'true' ) {
                        $settings['collate'] = true;
                    } elseif ( $v === 'false' ) {
                        $settings['collate'] = false;
                    }
                    break;
                case 'smoothing':
                    if ( $v === 'laplace' ) {
                        $settings['smoothing_model'] = [
                            'laplace' => [
                                'alpha' => 0.5
                            ]
                        ];
                    } elseif ( $v === 'stupid_backoff' ) {
                        $settings['smoothing_model'] = [
                            'stupid_backoff' => [
                                'discount' => 0.4
                            ]
                        ];
                    }
                    break;
                case 'laplace_alpha':
                    if ( is_numeric( $v ) && $v >= 0 && $v <= 1 ) {
                        $laplaceAlpha = floatval( $v );
                    }
                    break;
                case 'stupid_backoff_discount':
                    if ( is_numeric( $v ) && $v >= 0 && $v <= 1 ) {
                        $stupidBackoffDiscount = floatval( $v );
                    }
                    break;
            }
        }

        // Apply smoothing model options, if none provided we'll use elasticsearch defaults
        if ( isset( $settings['smoothing_model']['laplace'] ) && $laplaceAlpha !== null ) {
            $settings['smoothing_model']['laplace'] = [
                'alpha' => $laplaceAlpha
            ];
        }
        if ( isset( $settings['smoothing_model']['stupid_backoff'] ) && $stupidBackoffDiscount !== null ) {
            $settings['smoothing_model']['stupid_backoff'] = [
                'discount' => $stupidBackoffDiscount
            ];
        }
        return $settings;
    }

    /**
     * Check if a profile named $name exists in this repository
     * @param string $name
     * @return bool
     */
    public function hasProfile( $name ) {
        return $this->wrapped->hasProfile( $name );
    }

    /**
     * Get the list of profiles that we want to expose to the user.
     *
     * @return array[] list of profiles index by name
     */
    public function listExposedProfiles() {
        return $this->wrapped->listExposedProfiles();
    }
}