wikimedia/mediawiki-extensions-CirrusSearch

View on GitHub
includes/UserTestingEngine.php

Summary

Maintainability
A
0 mins
Test Coverage
<?php

namespace CirrusSearch;

use MediaWiki\Config\Config;
use Wikimedia\Assert\Assert;

/**
 * Decision making around user testing
 *
 * See docs/user_testing.md for more information.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 * http://www.gnu.org/copyleft/gpl.html
 */
class UserTestingEngine {
    /** @var array */
    private $tests;

    /** @var ?string */
    private $activeTest;

    /**
     * @var callable Called with the test name, returns a float between 0 and 1
     *  which is uniformly distributed across users and stable for an individual
     *  user+testName combination.
     */
    private $callback;

    /**
     * @param array $tests
     * @param ?string $activeTest Array key in test to enable autoenroll for, or null
     *  for no autoenrollment.
     * @param callable $callback Called with the test name, returns a float
     *  between 0 and 1 which is uniformly distributed across users and stable
     *  for an individual user+testName combination.
     */
    public function __construct( array $tests, ?string $activeTest, callable $callback ) {
        $this->tests = $tests;
        $this->activeTest = $activeTest;
        $this->callback = $callback;
    }

    public static function fromConfig( Config $config ): UserTestingEngine {
        return new self(
            // While we shouldn't get null in normal operations, the global
            // initialization of user testing is a bit sloppy and this gets
            // invoked during ElasticsearchIntermediary unit testing, and unit
            // testing doesn't have any globally accessible config.
            $config->get( 'CirrusSearchUserTesting' ) ?? [],
            $config->get( 'CirrusSearchActiveTest' ),
            [ __CLASS__, 'stableUserProbability' ]
        );
    }

    public function decideTestByTrigger( string $trigger ): UserTestingStatus {
        if ( strpos( $trigger, ':' ) === false ) {
            return UserTestingStatus::inactive();
        }
        [ $testName, $bucket ] = explode( ':', $trigger, 2 );
        if ( isset( $this->tests[$testName]['buckets'][$bucket] ) ) {
            return UserTestingStatus::active( $testName, $bucket );
        } else {
            return UserTestingStatus::inactive();
        }
    }

    public function decideTestByAutoenroll(): UserTestingStatus {
        if ( $this->activeTest === null || !isset( $this->tests[$this->activeTest] ) ) {
            return UserTestingStatus::inactive();
        }
        $bucketProbability = call_user_func( $this->callback, $this->activeTest );
        $bucket = self::chooseBucket( $bucketProbability, array_keys(
            $this->tests[$this->activeTest]['buckets'] ) );
        return UserTestingStatus::active( $this->activeTest, $bucket );
    }

    public function decideActiveTest( ?string $trigger ): UserTestingStatus {
        if ( $trigger !== null ) {
            return $this->decideTestByTrigger( $trigger );
        } elseif ( MW_ENTRY_POINT == 'index' ) {
            return $this->decideTestByAutoenroll();
        } else {
            return UserTestingStatus::inactive();
        }
    }

    /**
     * If provided status is in an active state enable the related configuration.
     *
     * @param UserTestingStatus $status
     */
    public function activateTest( UserTestingStatus $status ) {
        if ( !$status->isActive() ) {
            return;
        }
        // boldly assume we created this status and it exists
        $testConfig = $this->tests[$status->getTestName()];
        $globals = array_merge(
            $testConfig['globals'] ?? [],
            $testConfig['buckets'][$status->getBucket()]['globals'] ?? [] );

        foreach ( $globals as $key => $value ) {
            // (T317951) Don't call array_key_exists unless we have to, as it's slow
            // on PHP 8.1+ for $GLOBALS. When the key is set but is explicitly set
            // to null, we still need to fall back to array_key_exists, but that's
            // rarer.
            if ( isset( $GLOBALS[$key] ) || array_key_exists( $key, $GLOBALS ) ) {
                $GLOBALS[$key] = $value;
            }
        }
    }

    /**
     * @param float $probability A number between 0 and 1
     * @param string[] $buckets List of buckets to choose from.
     * @return string The chosen bucket.
     */
    public static function chooseBucket( float $probability, array $buckets ): string {
        $n = count( $buckets );
        $pos = (int)min( $n - 1, $n * $probability );
        return $buckets[ $pos ];
    }

    /**
     * Converts a hex string into a probability between 0 and 1.
     * Retains uniform distribution of incoming hash string.
     *
     * @param string $hash
     * @return float Probability between 0 and 1
     */
    public static function hexToProbability( string $hash ): float {
        Assert::parameter( strlen( $hash ) > 0, '$hash',
            'Provided string must not be empty' );
        $len = strlen( $hash );
        $sum = 0;
        // TODO: Since the input is from a cryptographic hash simply
        // truncating is probably equally correct.
        for ( $i = 0; $i < $len; $i += 4 ) {
            $piece = substr( $hash, $i, 4 );
            $dec = hexdec( $piece );
            // xor will retain the uniform distribution
            $sum ^= $dec;
        }
        return $sum / ( ( 1 << 16 ) - 1 );
    }

    /**
     * @param string $testName
     * @return float Returns a value between 0 and 1 that is uniformly
     *  distributed between users, but constant for a single user+test
     *  combination.
     */
    public static function stableUserProbability( string $testName ): float {
        $hash = Util::generateIdentToken( $testName );
        return self::hexToProbability( $hash );
    }
}