internal/extract_arg_info.php

Summary

Maintainability
D
2 days
Test Coverage
#!/usr/bin/env php
<?php

declare(strict_types=1);

require_once __DIR__ . '/../src/Phan/Bootstrap.php';

use Phan\AST\TolerantASTConverter\Shim;
use Phan\CodeBase;
use Phan\Language\Context;
use Phan\Language\Type;
use Phan\Language\Type\ArrayType;
use Phan\Language\Type\FalseType;
use Phan\Language\Type\FloatType;
use Phan\Language\Type\GenericArrayType;
use Phan\Language\Type\IntType;
use Phan\Language\Type\MixedType;
use Phan\Language\Type\NullType;
use Phan\Language\Type\ObjectType;
use Phan\Language\Type\ResourceType;
use Phan\Language\Type\StringType;
use Phan\Language\Type\TrueType;
use Phan\Language\Type\VoidType;
use Phan\Language\UnionType;

/**
 * This extracts the real signature types for commonly used functions from opcache.
 *
 * Note that php 8.0 has TypeError and ArgumentCountError for internal functions,
 * so the return types are much more specific.
 * @phan-file-suppress PhanPluginRemoveDebugAny
 */
class OpcacheFuncInfoParser
{
    const OVERRIDES = [
        'config_get_hash' => null,  // skip php debug method
        // wrong handling of string for 32-bit
        'mysqli_get_charset' => null,
        'mysqli_get_client_stats' => null,
        'mysqli_insert_id' => null,
        'mysqli_stmt_affected_rows' => null,
        'mysqli_stat' => null,

        // etc.
        'pathinfo' => 'array|string',  // temporary override
        'parse_url' => 'array|false|int|string|null',  // conservative guess
        'pg_result_error_field' => 'false|null|string',
        'set_error_handler' => '?callable',  // probably? Might not work for private methods as arrays.
        'set_socket_blocking' => null,  // this is new
        // not common enough to investigate
        'stream_bucket_append' => null,
        'stream_bucket_new' => null,
        'stream_bucket_prepend' => null,
    ];

    /**
     * @return array<string,UnionType> maps internal function names to their real union types (from Reflection)
     */
    private static function extractInfoFromReflection(): array
    {
        $result = [];
        $function_names = get_defined_functions();
        unset($function_names['user']);
        foreach (array_merge(...array_values($function_names)) as $function_name) {
            $function = new ReflectionFunction($function_name);
            if (!$function->hasReturnType()) {
                continue;
            }
            $return_type = $function->getReturnType();
            $union_type = UnionType::fromReflectionType($return_type);
            $result[$function_name] = $union_type;
        }
        // This works, but methods with return types are uncommon.
        foreach (get_declared_classes() as $class_name) {
            $reflection_class = new ReflectionClass($class_name);
            if (!$reflection_class->isInternal()) {
                continue;
            }
            foreach ($reflection_class->getMethods() as $method) {
                if (!$method->hasReturnType()) {
                    continue;
                }
                $method_name = $class_name . '::' . $method->getName();
                $union_type = UnionType::fromReflectionType($method->getReturnType());
                $result[$method_name] = $union_type;
            }
        }

        return $result;
    }

    /**
     * @return array<string,UnionType> maps internal function names to their real union types (From opcache's signatures)
     */
    private static function extractInfoFromOpcache(string $contents): array
    {
        $lines = explode("\n", $contents);
        $result = [];
        foreach ($lines as $line) {
            if (preg_match('@^\s*F[01NRXC]\(\s*"(\w+)",\s*(\w+(\s*\|\s*\w+)*)\s*\),@', $line, $matches)) {
                $function_name = $matches[1];
                if (array_key_exists($function_name, self::OVERRIDES)) {
                    $union_type_string = self::OVERRIDES[$function_name];
                    if (!$union_type_string) {
                        continue;
                    }
                    $union_type = UnionType::fromStringInContext($union_type_string, new Context(), Type::FROM_TYPE);
                } else {
                    $flags = array_map('trim', explode('|', $matches[2]));
                    $union_type = self::extractUnionType($flags);
                }
                if (!$union_type->isEmpty()) {
                    $result[$function_name] = $union_type;
                }
            }
        }
        return $result;
    }

    /**
     * @param list<string> $flags
     */
    private static function extractUnionType(array $flags): UnionType
    {
        static $type_lookup = null;
        if ($type_lookup === null) {
            $type_lookup = [
                'MAY_BE_ARRAY' => ArrayType::instance(false),
                'MAY_BE_ARRAY_KEY_ANY' => ArrayType::instance(false),
                'MAY_BE_ARRAY_KEY_LONG' => ArrayType::instance(false),
                'MAY_BE_ARRAY_KEY_STRING' => ArrayType::instance(false),
                'MAY_BE_ARRAY_OF_ANY' => ArrayType::instance(false),
                'MAY_BE_ARRAY_OF_ARRAY' => ArrayType::instance(false),
                'MAY_BE_ARRAY_OF_DOUBLE' => ArrayType::instance(false),
                'MAY_BE_ARRAY_OF_FALSE' => ArrayType::instance(false),
                'MAY_BE_ARRAY_OF_OBJECT' => ArrayType::instance(false),
                'MAY_BE_ARRAY_OF_RESOURCE' => ArrayType::instance(false),
                'MAY_BE_ARRAY_OF_LONG' => ArrayType::instance(false),
                'MAY_BE_ARRAY_OF_NULL' => ArrayType::instance(false),
                'MAY_BE_ARRAY_OF_REF' => ArrayType::instance(false),
                'MAY_BE_ARRAY_OF_STRING' => ArrayType::instance(false),
                'MAY_BE_ARRAY_OF_TRUE' => ArrayType::instance(false),
                'MAY_BE_DOUBLE' => FloatType::instance(false),
                'MAY_BE_FALSE' => FalseType::instance(false),
                'MAY_BE_LONG' => IntType::instance(false),
                'MAY_BE_NULL' => NullType::instance(false),
                'MAY_BE_OBJECT' => ObjectType::instance(false),
                'MAY_BE_RESOURCE' => ResourceType::instance(false),
                'MAY_BE_STRING' => StringType::instance(false),
                'MAY_BE_TRUE' => TrueType::instance(false),
            ];
        }
        $result = UnionType::empty();
        if ($flags === ['MAY_BE_NULL']) {
            return VoidType::instance(false)->asPHPDocUnionType();
        }
        $flags = array_combine($flags, $flags);
        foreach ($flags as $flag) {
            if (in_array($flag, ['UNKNOWN_INFO', 'MAY_BE_ANY', 'zend_range_info'], true)) {
                return UnionType::empty();
            }
            $type = $type_lookup[$flag] ?? null;
            if ($type === null) {
                fwrite(STDERR, "Unknown flag \"$flag\"\n");
                return UnionType::empty();
            }
            $result = $result->withType($type);
        }
        if (isset($flags['MAY_BE_ARRAY'])) {
            // @phan-suppress-next-line PhanPartialTypeMismatchArgument TODO implement https://github.com/phan/phan/issues/3242
            $array_type = self::arrayTypeFromFlags(array_flip($flags));
            $result = $result->withoutType(ArrayType::instance(false))->withUnionType($array_type);
        }
        return $result->asNormalizedTypes();
    }

    /**
     * @param array<string,string> $flag_set
     * @return UnionType of 1 or more ArrayTypes to include
     */
    private static function arrayTypeFromFlags(array $flag_set): UnionType
    {
        // 1. Convert key types from opcache to Phan's representation
        if (isset($flag_set['MAY_BE_ARRAY_KEY_ANY'])) {
            $key_type = GenericArrayType::KEY_MIXED;
        } else {
            $key_type = GenericArrayType::KEY_EMPTY;
            if (isset($flag_set['MAY_BE_ARRAY_KEY_LONG'])) {
                $key_type |= GenericArrayType::KEY_INT;
            }
            if (isset($flag_set['MAY_BE_ARRAY_KEY_STRING'])) {
                $key_type |= GenericArrayType::KEY_STRING;
            }
            $key_type = $key_type ?: GenericArrayType::KEY_MIXED;
        }
        // 2. Convert value types from opcache to Phan's representation and normalize
        if (!isset($flag_set['MAY_BE_ARRAY_OF_ANY'])) {
            static $element_type_map = null;
            if ($element_type_map === null) {
                $element_type_map = [
                    'MAY_BE_ARRAY_OF_ARRAY' => ArrayType::instance(false),
                    'MAY_BE_ARRAY_OF_DOUBLE' => FloatType::instance(false),
                    'MAY_BE_ARRAY_OF_FALSE' => FalseType::instance(false),
                    'MAY_BE_ARRAY_OF_OBJECT' => ObjectType::instance(false),
                    'MAY_BE_ARRAY_OF_RESOURCE' => ResourceType::instance(false),
                    'MAY_BE_ARRAY_OF_LONG' => IntType::instance(false),
                    'MAY_BE_ARRAY_OF_NULL' => NullType::instance(false),
                    'MAY_BE_ARRAY_OF_STRING' => StringType::instance(false),
                    'MAY_BE_ARRAY_OF_TRUE' => TrueType::instance(false),
                ];
            }
            $possible_types = array_values(array_intersect_key($element_type_map, $flag_set));
            $element_type = UnionType::of($possible_types)->asNormalizedTypes()->asRealUnionType();
        } else {
            $element_type = UnionType::empty();
        }
        // 3. Combine key and value types, or just return a regular array if nothing is known.
        if ($element_type->isEmpty()) {
            if ($key_type === GenericArrayType::KEY_MIXED) {
                return ArrayType::instance(false)->asPHPDocUnionType();
            }
            $element_type = MixedType::instance(false)->asPHPDocUnionType();
        }
        return $element_type->asGenericArrayTypes($key_type);
    }

    /**
     * Parses the real types to expect for global functions from opcache and returns the result.
     */
    public static function main(): void
    {
        Shim::load();
        global $argv;
        if (count($argv) !== 2) {
            fwrite(STDERR, "Usage: {$argv[0]} path/to/php-src" . PHP_EOL);
            fwrite(STDERR, "  Extracts the real function return types for a php version from opcache's zend_func_info.c declarations." . PHP_EOL);
            fwrite(STDERR, "  The real return types are used by Phan to be certain if a type check is redundant or impossible." . PHP_EOL);
            exit(1);
        }

        $func_info_path = $argv[1] . "/ext/opcache/Optimizer/zend_func_info.c";
        if (!file_exists($func_info_path)) {
            fwrite(STDERR, "Could not find $func_info_path\n");
            exit(1);
        }
        $contents = file_get_contents($func_info_path);
        if (!is_string($contents)) {
            fwrite(STDERR, "Could not read contents of $func_info_path\n");
            exit(1);
        }
        $code_base = require(dirname(__DIR__) . '/src/codebase.php');
        $opcache_data = self::extractInfoFromOpcache($contents);
        $reflection_data = self::extractInfoFromReflection();
        self::checkOpcacheAndReflectionAreConsistent($code_base, $reflection_data, $opcache_data);

        // NOTE: Reflection is often updated before zend_func_info.c gets updated,
        // so union types in reflection take priority.
        $data = array_merge($opcache_data, $reflection_data);

        $inner_contents = '';

        require_once __DIR__ . '/lib/IncompatibleSignatureDetectorBase.php';
        IncompatibleSignatureDetectorBase::sortSignatureMap($data);

        foreach ($data as $function_name => $union_type) {
            $inner_contents .= "'$function_name' => '$union_type',\n";
        }
        $inner_contents = rtrim($inner_contents);
        echo <<<EOT
<?php
declare(strict_types=1);

/**
 * This lists all of the possible real return types of various global functions.
 * This is useful because php won't provide many of these until php 8,
 * and even then won't be able to represent types such as string|false.
 *
 * This is conservative to avoid false positives, and includes types returned for all possible failure modes
 * (invalid arguments/argument counts, spurious errors, etc.)
 *
 * Generated by Phan's internal/extract_arg_info.php, from ext/opcache/Optimizer/zend_func_info.c of php-src.
 */
return [
$inner_contents
];

EOT;
    }

    /**
     * @param array<string,UnionType> $reflection_data
     * @param array<string,UnionType> $opcache_data
     */
    private static function checkOpcacheAndReflectionAreConsistent(CodeBase $code_base, array $reflection_data, array $opcache_data): void
    {
        foreach ($opcache_data as $function_name => $opcache_type) {
            $reflection_type = $reflection_data[$function_name] ?? null;
            if (!$reflection_type) {
                continue;
            }
            if (!$opcache_type->canStrictCastToUnionType($code_base, $reflection_type)) {
                fwrite(STDERR, "Error for $function_name: Opcache infers the type is $opcache_type but reflection infers that the type is $reflection_type (check if the corresponding php versions are the same)\n");
            } else {
                if ($opcache_type->asNormalizedTypes()->isEqualTo($reflection_type->asNormalizedTypes())) {
                    foreach ($reflection_type->getTypeSet() as $type) {
                        if ($type instanceof IntType || $type->isInBoolFamily() || $type instanceof VoidType || $type instanceof FloatType || $type instanceof NullType) {
                            continue;
                        }
                        continue 2;
                    }
                    fwrite(STDERR, "$function_name: Opcache duplicates the reflection type $opcache_type\n");
                }
                // fwrite(STDERR, "$function_name: Opcache infers the type is $opcache_type and reflection infers that the type is $reflection_type\n");
            }
        }
    }
}
OpcacheFuncInfoParser::main();