internal/lib/IncompatibleXMLSignatureDetector.php
<?php
declare(strict_types=1);
use Phan\Config;
use Phan\Memoize;
require_once __DIR__ . '/IncompatibleSignatureDetectorBase.php';
require_once __DIR__ . '/IncompatibleStubsSignatureDetector.php';
require_once __DIR__ . '/IncompatibleRealStubsSignatureDetector.php';
/**
* A utility to read php.net's xml documentation for functions, methods,
* and use that to update Phan's internal signature map (Currently just return types of functions and methods)
* Author: Tyson Andre
*
* Usage:
* php internal/internalsignatures.php path/to/phpdoc-en
*
* TODO: Refactor this class into multiple classes
* TODO: This has a bit of code in common with sanitycheck.php, refactor?
* phpdoc-en can be downloaded via 'svn checkout https://svn.php.net/repository/phpdoc/modules/doc-en phpdoc-en'
* @phan-file-suppress PhanSuspiciousTruthyString TODO: Fix
* @phan-file-suppress PhanPluginRemoveDebugAny
*/
class IncompatibleXMLSignatureDetector extends IncompatibleSignatureDetectorBase
{
use Memoize;
/** @var string the directory for english PHP element references */
private $reference_directory;
/** @var string the base directory of the svn phpdoc repo */
private $doc_base_directory;
public function __construct(string $dir)
{
if (!is_dir($dir)) {
echo "Could not find '$dir'\n";
static::printUsageAndExit();
}
Config::setValue('ignore_undeclared_functions_with_known_signatures', false);
$en_reference_dir = "$dir/en/reference";
if (!is_dir($en_reference_dir)) {
fwrite(STDERR, "Could not find subdirectory '$en_reference_dir'\n");
static::printUsageAndExit();
}
$this->reference_directory = self::realpath($en_reference_dir);
$this->doc_base_directory = self::realpath($dir);
$this->aliases = $this->parseAliases();
}
/**
* Parse information about which global functions are aliases of other global functions.
* @return array<string,string> maps alias name to original name
*/
private function parseAliases(): array
{
$file_name = $this->doc_base_directory . '/en/appendices/aliases.xml';
$xml = $this->getSimpleXMLForFile($file_name);
if (!$xml) {
return [];
}
$result = [];
// @phan-suppress-next-line PhanPluginUnknownObjectMethodCall TODO fix https://github.com/phan/phan/issues/3723
foreach ($xml->children()[2]->table->tgroup->tbody->children() as $row) {
$entry = $row->entry;
$alias = (string)$entry[0];
$original = (string)$entry[1]->function;
if (!$original || !$alias) {
// E.g. an alias to a method such as ociassignelem
continue;
}
$result[$alias] = $original;
}
return $result;
}
/**
* @throws RuntimeException if the real path could not be determined
*/
private static function realpath(string $dir): string
{
$realpath = realpath($dir);
if (!is_string($realpath)) {
fwrite(STDERR, "Could not find realpath of '$dir'\n");
static::printUsageAndExit();
throw new RuntimeException("unreachable");
}
return $realpath;
}
/** @return array<string,array<string,string>> a set of unique file names */
private function getFilesForFunctionNameList(): array
{
return $this->memoize(__METHOD__, /** @return array<string,array<string,string>> */ function (): array {
$files_for_function_name_list = [];
$reference_directory = $this->reference_directory;
foreach (static::scandir($reference_directory) as $subpath) {
$functions_subsubdir = "$reference_directory/$subpath/functions";
if (is_dir($functions_subsubdir)) {
foreach (static::scandirForXML($functions_subsubdir) as $function_doc_fullpath => $unused_function_name) {
$xml = $this->getSimpleXMLForFile($function_doc_fullpath);
if (!$xml) {
continue;
}
$real_function_name = self::getFunctionNameFromXML($xml);
if (!$real_function_name) {
continue;
}
if (in_array($real_function_name, static::INVALID_FUNCTION_NAMES, true)) {
continue;
}
$files_for_function_name_list[strtolower($real_function_name)][$function_doc_fullpath] = $function_doc_fullpath;
}
}
}
return $files_for_function_name_list;
});
}
/**
* @return array<string,string>
*/
private static function scandirForXML(string $dir): array
{
$result = [];
foreach (static::scandir($dir) as $basename) {
if (substr($basename, -4) !== '.xml') {
continue;
}
$full_path = "$dir/$basename";
if (is_file($full_path)) {
$normalized_name = strtolower(str_replace('-', '_', (string)substr($basename, 0, -4)));
$result[$full_path] = $normalized_name;
}
}
return $result;
}
// These aren't built in functions, but they're documented like them.
const INVALID_FUNCTION_NAMES = [
// keywords.
'array',
'list',
// Deprecated method that has an effect if defined by PHP code.
'__autoload',
];
/**
* @var array<string,array<string,string>>
* Maps class names to a unique set of folders [$class_name => [$folder_name => $folder_name]]
*/
private $folders_for_class_name_list;
/**
* @return array<string,array<string,string>>
*/
private function getFoldersForClassNameList(): array
{
if ($this->folders_for_class_name_list === null) {
$this->folders_for_class_name_list = $this->populateFoldersForClassNameList();
}
return $this->folders_for_class_name_list;
}
/**
* @return array<string,array<string,string>>
*/
private function populateFoldersForClassNameList(): array
{
$this->folders_for_class_name_list = [];
// TODO: Extract inheritance from classname.xml
// TODO: Just parse every single xml file and extract the class name (including namespace)
// from the XML itself instead of guessing based on heuristics.
foreach (static::scandir($this->reference_directory) as $subpath) {
$this->populateFoldersRecursively($subpath);
}
return $this->folders_for_class_name_list;
}
private function populateFoldersRecursively(string $subpath): void
{
$extension_directory = "$this->reference_directory/$subpath";
foreach (static::scandir($extension_directory) as $subsubpath) {
$class_subpath = "$extension_directory/$subsubpath";
if (is_dir($class_subpath) && strtolower($subsubpath) !== 'functions') {
$class_name = $this->parseClassName("$subpath/$subsubpath");
$normalized_class_name = strtolower(str_replace(['-', '/'], ['_', '\\'], $class_name));
// echo "Reading $class_subpath $normalized_class_name\n";
$this->folders_for_class_name_list[$normalized_class_name][$class_subpath] = $class_subpath;
$this->populateFoldersRecursively("$subpath/$subsubpath");
}
}
}
/**
* @return array<string,SimpleXMLElement>
*/
private function getClassXMLFiles(): array
{
return $this->memoize(__METHOD__, /** @return array<string,SimpleXMLElement> */ function (): array {
$remaining_folders = [
$this->reference_directory,
$this->doc_base_directory . '/en/language/predefined'
];
$result = [];
while (count($remaining_folders) > 0) {
$folder = array_pop($remaining_folders);
if (!$folder) {
// impossible
break;
}
foreach (static::scandir($folder) as $basename) {
if ($basename === 'functions') {
continue;
}
$path = "$folder/$basename";
if (is_dir($path)) {
$remaining_folders[] = $path;
continue;
}
if (!preg_match('/\.xml$/D', $basename)) {
continue;
}
$contents = (string)$this->fileGetContents($path);
if (!preg_match('/<phpdoc:classref|<classsynopsis/', $contents)) {
continue;
}
$xml = $this->getSimpleXMLForFileContents($contents, $path);
if (!$xml) {
continue;
}
$result[$path] = $xml;
}
}
return $result;
});
}
/**
* @return Generator<string>
*/
private function getPossibleFilesInReferenceDirectory(string $folder_in_reference_directory): Generator
{
$file = $this->reference_directory . '/' . $folder_in_reference_directory . '.xml';
yield $file;
$parts = explode('/', $folder_in_reference_directory, 2);
$alternate_basename = str_replace('/', '.', $parts[1]) . '.xml';
$alternate_file = $this->reference_directory . '/' . $parts[0] . '/' . $alternate_basename;
if ($alternate_file !== $file) {
yield $alternate_file;
}
$alternate_file_2 = $this->reference_directory . '/' . $parts[0] . '/' . str_replace('_', '-', $alternate_basename);
if ($alternate_file_2 !== $alternate_file) {
yield $alternate_file_2;
}
yield $this->reference_directory . '/' . $parts[0] . '/' . $parts[0] . '.' . str_replace('_', '-', $alternate_basename);
}
private function parseClassName(string $folder_in_reference_directory): string
{
foreach ($this->getPossibleFilesInReferenceDirectory($folder_in_reference_directory) as $file_in_reference_directory) {
//echo "Looking for $file_in_reference_directory\n";
if (file_exists($file_in_reference_directory)) {
//echo "Found $file_in_reference_directory\n";
$xml = $this->getSimpleXMLForFile($file_in_reference_directory);
if (!$xml) {
continue;
}
$results = $xml->xpath('//a:classsynopsisinfo/a:ooclass/a:classname');
if (is_array($results) && count($results) === 1) {
// echo "Returning $results[0]\n";
return (string)$results[0];
}
break;
}
}
return preg_replace('@^[^/]*/@', '', $folder_in_reference_directory);
}
/**
* Execute one of several possible commands to update Phan's stub files.
*/
public static function main(): void
{
error_reporting(E_ALL);
ini_set('memory_limit', '2G');
global $argv;
if (\count($argv) < 2) {
// TODO: CLI flags
static::printUsageAndExit();
}
// Attempt to parse newer php syntax such as union types on a best-effort basis.
Config::setValue('use_fallback_parser', true);
$command = $argv[1];
switch ($command) {
case 'sort':
if (count($argv) !== 2) {
fwrite(STDERR, "Invalid argument count, sort expects no arguments\n");
static::printUsageAndExit();
}
static::sortSignatureMapInPlace();
break;
case 'update-svn':
if (count($argv) !== 3) {
fwrite(STDERR, "Invalid argument count, update-svn expects 1 argument\n");
static::printUsageAndExit();
}
$detector = new IncompatibleXMLSignatureDetector($argv[2]);
$detector->selfTest();
$detector->addMissingFunctionLikeSignatures();
$detector->updateFunctionSignatures();
// TODO: Sort .php.extra_signatures and .php.new
break;
case 'update-real-stubs':
if (count($argv) !== 3) {
fwrite(STDERR, "Invalid argument count, update-stubs expects 1 argument\n");
static::printUsageAndExit();
}
// TODO: Add a way to exclude /tests/
$detector = new IncompatibleRealStubsSignatureDetector($argv[2]);
$detector->selfTest();
$detector->addMissingFunctionLikeSignatures();
$detector->updateFunctionSignatures();
break;
case 'update-real-param-names':
if (count($argv) !== 3) {
fwrite(STDERR, "Invalid argument count, update-stubs expects 1 argument\n");
static::printUsageAndExit();
}
// TODO: Add a way to exclude /tests/
$detector = new IncompatibleRealStubsSignatureDetector($argv[2]);
$detector->selfTest();
$detector->addMissingFunctionLikeSignatures();
$detector->updateFunctionSignaturesParamNames();
break;
case 'update-stubs':
if (count($argv) !== 3) {
fwrite(STDERR, "Invalid argument count, update-stubs expects 1 argument\n");
static::printUsageAndExit();
}
// TODO: Add a way to exclude /tests/
$detector = new IncompatibleStubsSignatureDetector($argv[2]);
$detector->selfTest();
$detector->addMissingFunctionLikeSignatures();
$detector->updateFunctionSignatures();
break;
case 'update-descriptions-svn':
if (count($argv) !== 3) {
fwrite(STDERR, "Invalid argument count, update-descriptions-svn expects 1 argument\n");
static::printUsageAndExit();
}
// TODO: Add a way to exclude /tests/
$detector = new IncompatibleXMLSignatureDetector($argv[2]);
$detector->selfTest();
$detector->updatePHPDocSummaries();
break;
case 'update-descriptions-stubs':
if (count($argv) !== 3) {
fwrite(STDERR, "Invalid argument count, update-descriptions-stubs expects 1 argument\n");
static::printUsageAndExit();
}
$detector = new IncompatibleStubsSignatureDetector($argv[2]);
$detector->selfTest();
$detector->updatePHPDocSummaries();
break;
case 'compare-named-parameters':
self::compareNamedParameters();
return; // unreachable
case 'help':
case '--help':
case '-h':
static::printUsageAndExit(0);
return; // unreachable
default:
fwrite(STDERR, "Invalid command '$command'\n");
static::printUsageAndExit(1);
}
}
/**
* Sort the signature map and save to to $filename.sorted
*/
public static function sortSignatureMapInPlace(): void
{
$phan_signatures = static::readSignatureMap();
static::sortSignatureMap($phan_signatures);
$sorted_phan_signatures_path = ORIGINAL_SIGNATURE_PATH . '.sorted';
static::info("Saving sorted Phan signatures to '$sorted_phan_signatures_path'\n");
static::saveSignatureMap($sorted_phan_signatures_path, $phan_signatures);
}
/**
* @suppress PhanPluginMixedKeyNoKey
*/
private function selfTest(): void
{
$this->expectFunctionLikeSignaturesMatch('strlen', ['int', 'string' => 'string']);
$this->expectFunctionLikeSignaturesMatch('ob_clean', ['void']);
$this->expectFunctionLikeSignaturesMatch('disk_free_space', ['float', 'directory' => 'string']);
$this->expectFunctionLikeSignaturesMatch('EvWatcher::feed', ['void', 'revents' => 'int']);
$this->expectFunctionLikeSignaturesMatch('intdiv', ['int', 'dividend' => 'int', 'divisor' => 'int']);
$this->expectFunctionLikeSignaturesMatch('ArrayIterator::seek', ['void', 'position' => 'int']);
$this->expectFunctionLikeSignaturesMatch('mb_chr', ['string', 'cp' => 'int', 'encoding=' => 'string']);
$this->expectFunctionLikeSignaturesMatch('curl_multi_exec', ['int', 'mh' => 'resource', '&still_running' => 'int']);
}
/**
* @param array<int|string,string> $expected
*/
private function expectFunctionLikeSignaturesMatch(string $function_name, array $expected): void
{
$actual = $this->parseFunctionLikeSignature($function_name);
if ($expected !== $actual) {
printf("Extraction failed for %s\nExpected: %s\nActual: %s\n", $function_name, json_encode($expected) ?: 'invalid', json_encode($actual) ?: 'invalid');
exit(1);
}
}
/**
* @return ?SimpleXMLElement the simple xml for the global function $function_name
*/
public function getSimpleXMLForFunctionSignature(string $function_name): ?SimpleXMLElement
{
$function_name_lc = strtolower($function_name);
$function_name_file_map = $this->getFilesForFunctionNameList();
$function_signature_files = $function_name_file_map[$function_name_lc] ?? null;
if ($function_signature_files === null) {
static::debug("Could not find $function_name\n");
return null;
}
if (count($function_signature_files) !== 1) {
static::debug("Expected only one signature for $function_name\n");
return null;
}
$signature_file = \reset($function_signature_files);
if (!is_string($signature_file)) {
static::info("invalid signature file\n");
return null;
}
// Not sure if there's a good way of using an external entity file in PHP.
return $this->getSimpleXMLForFile($signature_file);
}
/**
* @return ?array<mixed,string>
*/
public function parseFunctionSignature(string $function_name): ?array
{
$xml = $this->getSimpleXMLForFunctionSignature($function_name);
if ($xml === null) {
return null;
}
return self::parseFunctionLikeSignatureForXML($function_name, $xml);
}
/**
* Returns the SimpleXMLElement with the documentation of each method in $class_name.
* @return ?array<string,SimpleXMLElement>
*/
public function getMethodsForClassName(string $class_name): ?array
{
$class_name_lc = strtolower($class_name);
$class_name_file_map = $this->getFoldersForClassNameList();
$class_name_files = $class_name_file_map[$class_name_lc] ?? null;
if ($class_name_files === null) {
static::debug("Could not find class directory for $class_name\n");
return null;
}
if (count($class_name_files) !== 1) {
static::debug("Expected only one class implementation for $class_name\n");
return null;
}
$class_folder = \reset($class_name_files);
if (!is_string($class_folder)) {
static::info("Invalid array for $class_name\n");
return null;
}
$result = [];
foreach (static::scandirForXML($class_folder) as $method_xml_path => $method_name_lc) {
$xml = $this->getSimpleXMLForFile($method_xml_path);
if (!$xml) {
static::info("Failed to parse information for $class_name::$method_name_lc from '$method_xml_path'\n");
continue;
}
$case_sensitive_method_name = self::getMethodNameFromXML($xml);
if (!$case_sensitive_method_name) {
static::info("Failed to parse method name for '$class_name::$method_name_lc' in '$method_xml_path'\n");
continue;
}
if (strpos($case_sensitive_method_name, '::') === false) {
static::info("Unexpected format of method name '$case_sensitive_method_name', expected something like '$class_name::$method_name_lc'\n");
continue;
}
if ($class_name_lc) {
$case_sensitive_method_name = $class_name_lc . '::' . explode('::', $case_sensitive_method_name, 2)[1];
}
$result[$case_sensitive_method_name] = $xml;
}
return $result;
}
/**
* @return ?array<mixed,string>
*/
public function parseMethodSignature(string $class_name, string $method_name): ?array
{
$class_name_lc = strtolower($class_name);
$method_name_lc = strtolower($method_name);
$class_name_file_map = $this->getFoldersForClassNameList();
$class_name_files = $class_name_file_map[$class_name_lc] ?? null;
if ($class_name_files === null) {
static::debug("Could not find class directory for $class_name\n");
return null;
}
if (count($class_name_files) !== 1) {
static::debug("Expected only one class implementation for $class_name\n");
return null;
}
$class_folder = \reset($class_name_files);
$method_filename = "$class_folder/" . str_replace('_', '-', $method_name_lc) . ".xml";
$xml = $this->getSimpleXMLForFile($method_filename);
if ($xml === null) {
return null;
}
return self::parseFunctionLikeSignatureForXML("{$class_name}::{$method_name}", $xml);
}
/** @var array<string,?SimpleXMLElement> maps file paths to cached parsed XML elements */
private $simple_xml_cache = [];
private function getSimpleXMLForFile(string $file_path): ?SimpleXMLElement
{
if (array_key_exists($file_path, $this->simple_xml_cache)) {
return $this->simple_xml_cache[$file_path];
}
return $this->simple_xml_cache[$file_path] = $this->getSimpleXMLForFileUncached($file_path);
}
/** @return string|false */
private function fileGetContents(string $file_path)
{
return $this->memoize(__METHOD__ . ':' . $file_path, /** @return string|false */ static function () use ($file_path) {
return file_get_contents($file_path);
});
}
private function getSimpleXMLForFileUncached(string $file_path): ?SimpleXMLElement
{
$signature_file_contents = $this->fileGetContents($file_path);
if (!is_string($signature_file_contents)) {
static::debug("Could not read '$file_path'\n");
return null;
}
return $this->getSimpleXMLForFileContents($signature_file_contents, $file_path);
}
private function getSimpleXMLForFileContents(string $signature_file_contents, string $file_path): ?SimpleXMLElement
{
// Not sure if there's a good way of using an external entity file in PHP.
$signature_file_contents = $this->normalizeEntityFile($signature_file_contents);
// echo $signature_file_contents . "\n";
try {
$result = @new SimpleXMLElement($signature_file_contents, LIBXML_ERR_NONE);
} catch (Exception $e) {
static::info("Failed to parse signature from file '$file_path' : " . $e->getMessage() . "\n");
return null;
}
$result->registerXPathNamespace('a', 'http://docbook.org/ns/docbook');
return $result;
}
private static function getFunctionNameFromXML(SimpleXMLElement $xml): ?string
{
$name = $xml->xpath('/a:refentry/a:refnamediv/a:refname') ?: [];
if (count($name) === 0) {
return null;
}
$valid_names = [];
foreach ($name as $potential_name) {
$potential_name = (string)$potential_name;
if (strpos($potential_name, '$') === false) {
$valid_names[] = $potential_name;
}
}
// E.g. CurlFile::__construct and curl_file_create
if (count($valid_names) === 1) {
return $valid_names[0];
}
return null;
}
private static function getMethodNameFromXML(SimpleXMLElement $xml): ?string
{
$name = $xml->xpath('/a:refentry/a:refnamediv/a:refname') ?: [];
if (count($name) === 0) {
return null;
}
$valid_names = [];
foreach ($name as $potential_name) {
$potential_name = (string)$potential_name;
if (strpos($potential_name, '::') !== false && strpos($potential_name, '$') === false) {
$valid_names[] = $potential_name;
}
}
// E.g. CurlFile::__construct and curl_file_create
if (count($valid_names) === 1) {
return $valid_names[0];
}
return null;
}
/**
* @param string $function_name
* @param ?SimpleXMLElement $xml
* @return ?array<mixed,string>
*/
private static function parseFunctionLikeSignatureForXML(string $function_name, ?SimpleXMLElement $xml): ?array
{
if (!$xml) {
return null;
}
// echo $contents->asXML();
// $function_description = $contents->xpath('/refentity/refsect1[role=description]/methodsynopsis');
$function_description_list = $xml->xpath('/a:refentry/a:refsect1[@role="description"]/a:methodsynopsis') ?: [];
if (count($function_description_list) !== 1) {
static::debug("Too many descriptions for '$function_name'\n");
return null;
}
$function_description = $function_description_list[0];
// $function_return_type = $function_description->type;
$return_type = static::toTypeString($function_description->type);
$params = self::extractMethodParams($function_description->methodparam);
$result = array_merge([$return_type], $params);
return $result;
}
/**
* @return array<string,string>
*/
private static function extractMethodParams(SimpleXMLElement $param): array
{
if ($param->count() === 0) {
return [];
}
$result = [];
$i = 0;
foreach ($param as $part) {
$i++;
$param_details = $part->parameter;
$param_name = (string)$param_details;
if (!$param_name) {
$param_name = 'arg' . $i;
}
// @phan-suppress-next-line PhanPluginUnknownObjectMethodCall TODO fix https://github.com/phan/phan/issues/3723
if (((string)($part->attributes()['choice'] ?? '')) === 'opt') {
$param_name .= '=';
}
// @phan-suppress-next-line PhanPluginUnknownObjectMethodCall TODO fix https://github.com/phan/phan/issues/3723
if (((string)($param_details->attributes()['role'] ?? '')) === 'reference') {
$param_name = "&$param_name";
}
$result[$param_name] = self::toTypeString($part->type);
}
return $result;
}
/** @param string|int|float $type */
private static function toTypeString($type): string
{
// TODO: Validate that Phan can parse these?
$type = (string)$type;
$type = ltrim($type, '\\');
if (strcasecmp($type, 'scalar') === 0) {
return 'int|string|float|bool';
}
if (strcasecmp($type, 'iterator') === 0) {
return 'iterator';
}
return $type;
}
/**
* @var array<string,true> a list of known expandable PHPDoc entities.
* We expand these into stub strings before parsing XML to avoid being overwhelmed with PHPDoc notices from SimpleXMLElement
*/
private $known_entities = null;
/**
* @return array<string,true>
*/
private function computeKnownEntities(): array
{
$this->known_entities = [];
foreach (['doc-base/entities/global.ent', 'en/contributors.ent', 'en/extensions.ent', 'en/language-defs.ent', 'en/language-snippets.ent'] as $sub_path) {
$abs_path = "$this->doc_base_directory/$sub_path";
$contents = file_get_contents($abs_path);
if (!$contents) {
throw new AssertionError("Failed to load $abs_path");
}
foreach (explode("\n", $contents) as $line) {
if (preg_match('/^<!ENTITY\s+(\S+)/', $line, $matches)) {
$entity_name = $matches[1];
$this->known_entities[strtolower($entity_name)] = true;
}
}
}
return $this->known_entities;
}
/**
* @return array<string,true>
*/
private function getKnownEntities(): array
{
if (!is_array($this->known_entities)) {
$this->known_entities = $this->computeKnownEntities();
}
return $this->known_entities;
}
private function normalizeEntityFile(string $contents): string
{
$entities = $this->getKnownEntities();
/**
* @param array<int,string> $matches
*/
return preg_replace_callback('/&([-a-zA-Z_.0-9]+);/', static function (array $matches) use ($entities): string {
$entity_name = $matches[1];
if (isset($entities[strtolower($entity_name)])) {
return "BEGINENTITY{$entity_name}ENDENTITY";
}
if (preg_match('/^reference\./', $entity_name)) {
return "BEGINENTITY{$entity_name}ENDENTITY";
}
// echo "Could not find entity $entity_name in $matches[0]\n";
return $matches[0];
}, $contents);
}
/**
* @return array<string,array<int|string,string>>
* @override
*/
public function getAvailableGlobalFunctionSignatures(): array
{
return $this->memoize(__METHOD__, /** @return array<string,array<int|string,string>> */ function (): array {
$function_name_map = [];
foreach ($this->getFilesForFunctionNameList() as $function_name => $unused_files) {
$signature_from_doc = $this->parseFunctionSignature($function_name);
if ($signature_from_doc === null) {
continue;
}
$function_name_map[$function_name] = $signature_from_doc;
}
return $function_name_map;
});
}
/**
* Normalize the extracted XML and convert it to markdown/HTML.
* @return ?string - Returns null if this is just a placeholder
*/
private static function convertXMLToMarkdown(string $text): ?string
{
$result = preg_replace_callback(
'/BEGINENTITY(\S*)ENDENTITY/',
/**
* @param array{0:string,1:string} $matches
*/
static function (array $matches): string {
$text = $matches[1];
$text = trim(preg_replace("/\\s+/m", " ", $text));
if (strtolower($text) === 'alias') {
return 'Alias of';
}
return "<code>$text</code>";
},
trim($text)
);
switch (strtolower($result)) {
case '':
return null;
}
$result = preg_replace('@<code>([^<>`]+)</code>@', '`\1`', $result);
return $result;
}
/**
* Normalize the extracted XML and convert it to markdown/HTML for a summary.
* @return ?string - Returns null if this is just a placeholder
*/
private static function normalizeExtractedXMLSummary(string $text): ?string
{
$result = preg_replace('/\s+/m', ' ', self::convertXMLToMarkdown($text) ?? '');
if (!$result) {
return null;
}
if (strtolower($result) === 'description') {
return null;
}
return $result;
}
/**
* @return array<string,string>
* @override
*/
protected function getAvailablePropertyPHPDocSummaries(): array
{
return $this->memoize(__METHOD__, /** @return array<string,string> */ function (): array {
$map = [];
foreach ($this->getClassXMLFiles() as $xml) {
$class_name = $xml->xpath('//a:classsynopsis/a:ooclass/a:classname');
if (!is_array($class_name) || count($class_name) !== 1) {
continue;
}
$class_name = (string)$class_name[0];
if (!$class_name) {
continue;
}
$property_entries = $xml->xpath('//a:section/a:variablelist/a:varlistentry');
foreach ($property_entries as $entry) {
$property_name = $entry->term->varname;
if (count($property_name) !== 1) {
continue;
}
$property_name = (string)$property_name[0];
$paragraphs = iterator_to_array($entry->listitem->para, false);
$text = self::extractDescriptionFromParagraphElements($paragraphs);
if (!$text) {
continue;
}
$property_fqsen = "$class_name::$property_name";
$map[$property_fqsen] = $text;
}
}
return $map;
});
}
/**
* Returns short phpdoc summaries of function and method signatures
*
* @return array<string,string>
* @override
*/
protected function getAvailableMethodPHPDocSummaries(): array
{
return $this->memoize(__METHOD__, /** @return array<string,string> */ function (): array {
$method_name_map = [];
$maybe_add_refpurpose = static function (string $name, SimpleXMLElement $xml) use (&$method_name_map): void {
$refpurpose = $xml->xpath('//a:refentry/a:refnamediv/a:refpurpose');
if (is_array($refpurpose) && count($refpurpose) === 1) {
$refpurpose = $refpurpose[0];
if ($refpurpose instanceof SimpleXMLElement) {
// @phan-suppress-next-line PhanPartialTypeMismatchArgumentInternal
$refpurpose = strip_tags($refpurpose->asXML());
}
// echo "Looking at $method_name: refpurpose = $refpurpose\n";
if (!$refpurpose) {
return;
}
$refpurpose = self::normalizeExtractedXMLSummary(trim($refpurpose));
if (!$refpurpose) {
return;
}
$method_name_map[$name] = $refpurpose;
}
};
foreach ($this->getFoldersForClassNameList() as $class_name => $unused_folder) {
foreach ($this->getMethodsForClassName($class_name) ?? [] as $method_name => $xml) {
$maybe_add_refpurpose($method_name, $xml);
}
}
foreach ($this->getFilesForFunctionNameList() as $function_name => $unused_files) {
$xml = $this->getSimpleXMLForFunctionSignature($function_name);
// echo "Looking at $function_name\n";
if (!$xml) {
// echo "Could not find xml\n";
continue;
}
$maybe_add_refpurpose($function_name, $xml);
}
self::sortSignatureMap($method_name_map);
return $method_name_map;
});
}
/**
* @return array<string,string>
* @override
*/
protected function getAvailableConstantPHPDocSummaries(): array
{
return $this->memoize(__METHOD__, /** @return array<string,string> */ function (): array {
$constant_name_map = [];
foreach ($this->getFilesForConstants() as $xml_file_name) {
$xml = $this->getSimpleXMLForFile($xml_file_name);
if (!$xml) {
fwrite(STDERR, "Failed to parse XML for $xml_file_name\n");
continue;
}
$constants_entries = $xml->xpath('//a:variablelist/a:varlistentry');
// var_export($constants_entries);
if (!is_array($constants_entries)) {
continue;
}
$constant_name_map += self::extractConstantEntries($constants_entries);
}
self::sortSignatureMap($constant_name_map);
return $constant_name_map;
});
}
/**
* @return array<string,string>
* @override
*/
protected function getAvailableClassPHPDocSummaries(): array
{
return $this->memoize(__METHOD__, /** @return array<string,string> */ function (): array {
$class_name_map = [];
foreach ($this->getClassXMLFiles() as $xml) {
$class_name = $xml->xpath('//a:classsynopsis/a:ooclass/a:classname');
if (!is_array($class_name) || count($class_name) !== 1) {
continue;
}
$class_name = (string)$class_name[0];
// $class_name = (string)$xml->titleabbrev;
if (!$class_name) {
continue;
}
$class_description_entries = $xml->partintro->section[0];
if (count($class_description_entries) === 0) {
continue;
}
$paragraphs = iterator_to_array($class_description_entries->para, false);
$text = self::extractDescriptionFromParagraphElements($paragraphs);
if (!$text) {
continue;
}
$class_name_map[$class_name] = $text;
// echo "$class_name: $text\n";
}
self::sortSignatureMap($class_name_map);
return $class_name_map;
});
}
private static function convertXMLElementToMarkdown(SimpleXMLElement $element): ?string
{
$xml = (string)$element->asXML();
if (strpos($xml, '<xref') !== false) {
$xml = preg_replace('@<xref linkend="([^"]+)"\s*/>@', 'the PHP manual\'s section on \1', $xml);
}
// TODO: Change this to use tidy if adding the extra dependency won't cause issues.
//
// Convert <literal> to <code>, etc, to use regular HTML.
//
// TODO: Reuse more of the code from
$xml = preg_replace('@<(/?)(literal|classname|interfacename|property|methodname|constant|function|type)\s*>@i', '<\1code>', $xml);
$xml = preg_replace('@<(/?)(emphasis)\s*>@i', '*', $xml);
// echo $xml . "\n";
$xml = strip_tags($xml, '<code><em>');
$xml = preg_replace('/\s+/m', ' ', $xml);
return self::convertXMLToMarkdown($xml);
}
/**
* @param array<int,SimpleXMLElement> $constants_entries
* @return array<string,string>
*/
private static function extractConstantEntries(array $constants_entries): array
{
$result = [];
foreach ($constants_entries as $entry) {
$entry->registerXPathNamespace('a', 'http://docbook.org/ns/docbook');
$name = $entry->term->constant;
// var_export($entry);
// echo "The extracted names are:\n";
// var_export($name);
// @phan-suppress-next-line PhanPluginUnknownObjectMethodCall TODO fix https://github.com/phan/phan/issues/3723
if ($name->count() !== 1) {
fwrite(STDERR, "Failed to parse $entry\n");
continue;
}
$name = (string)$name[0];
// @phan-suppress-next-line PhanPluginUnknownObjectMethodCall Phan can't infer that listitem is SimpleXMLElement
$description_paragraphs = iterator_to_array($entry->listitem->children(), false);
if (count($description_paragraphs) === 0) {
fwrite(STDERR, "Failed to extract description for $entry\n");
continue;
}
$text = self::extractDescriptionFromParagraphElements($description_paragraphs);
if (!$text) {
continue;
}
$result[$name] = $text;
echo "$name: $result[$name]\n";
}
self::sortSignatureMap($result);
return $result;
}
/**
* Returns a markdown/HTML description for $description_paragraphs
*
* @param array<int,SimpleXMLElement> $description_paragraphs
*/
private static function extractDescriptionFromParagraphElements(array $description_paragraphs): ?string
{
$lines = [];
foreach ($description_paragraphs as $element) {
// TODO: Do a better job than strip_tags
$line = self::convertXMLElementToMarkdown($element);
// fwrite(STDERR, "Extracted $line from $element\n");
if ($line) {
$lines[] = preg_replace('/\s+/m', ' ', $line);
}
}
if (!$lines) {
return null;
}
return implode("\n\n", $lines);
}
/**
* @return array<string,string> maps extension name to constants.xml
*/
private function getFilesForConstants(): array
{
return $this->memoize(__METHOD__, /** @return array<string,string> */ function (): array {
$constants_files = [];
$reserved_constants_file = $this->doc_base_directory . '/en/appendices/reserved.constants.core.xml';
if (!file_exists($reserved_constants_file)) {
throw new RuntimeException("Failed to load $reserved_constants_file");
}
$constants_files['reserved.core'] = $reserved_constants_file;
foreach (static::scandir($this->reference_directory) as $extension) {
$subpath = $this->reference_directory . "/$extension";
$constants_file_name = "$subpath/constants.xml";
if (file_exists($constants_file_name)) {
$constants_files[$extension] = $constants_file_name;
}
}
return $constants_files;
});
}
/**
* @return array<string,array<int|string,string>>
* @override
*/
public function getAvailableMethodSignatures(): array
{
return $this->memoize(__METHOD__, /** @return array<string,array<int|string,string>> */ function (): array {
$method_name_map = [];
foreach ($this->getFoldersForClassNameList() as $class_name => $unused_folder) {
foreach ($this->getMethodsForClassName($class_name) ?? [] as $method_name => $xml) {
$signature_from_doc = self::parseFunctionLikeSignatureForXML($method_name, $xml);
if ($signature_from_doc === null) {
continue;
}
// echo "For $class_name found $method_name\n";
$method_name_map[$method_name] = $signature_from_doc;
}
}
return $method_name_map;
});
}
/**
* Same as scandir, but ignores hidden files
* @return array<int,string>
*/
private static function scandir(string $directory): array
{
if (!is_dir($directory)) {
return [];
}
$result = [];
foreach (scandir($directory) as $subpath) {
if ($subpath[0] !== '.') {
$result[] = $subpath;
}
}
return $result;
}
/**
* Implements compare-named-parameters to prepare docs and stub files for php 8.0 named parameters
*/
public static function compareNamedParameters(): void
{
if (PHP_MAJOR_VERSION < 8) {
fwrite(STDERR, "compare-named-parameters MUST BE RUN IN PHP 8.0+, BUT WAS RUN IN " . PHP_VERSION . "\n");
fwrite(STDERR, "exiting without generating stubs\n");
exit(1);
}
global $argc, $argv;
if ($argc !== 4) {
fwrite(STDERR, "Invalid argument count, compare-named-parameters expects 2 arguments\n");
static::printUsageAndExit();
}
$stub_signature_detector = new IncompatibleStubsSignatureDetector($argv[2]);
$stub_signature_detector->selfTest();
$stub_signatures = array_merge(
$stub_signature_detector->getAvailableGlobalFunctionSignatures(),
$stub_signature_detector->getAvailableMethodSignatures()
);
$doc_signature_detector = new IncompatibleXMLSignatureDetector($argv[3]);
$doc_signatures = array_merge(
$doc_signature_detector->getAvailableGlobalFunctionSignatures(),
$doc_signature_detector->getAvailableMethodSignatures()
);
$doc_signatures_normalized = [];
foreach ($doc_signatures as $function_name => $parameters) {
$doc_signatures_normalized[strtolower($function_name)] = $parameters;
}
ksort($stub_signatures);
ksort($doc_signatures);
printf("Parsed %d signatures from stubs and %d from documentation\n", count($stub_signatures), count($doc_signatures));
echo "Comparing signatures\n";
foreach ($stub_signatures as $function_name => $parameters) {
if (count($parameters) <= 1) {
continue;
}
$doc_parameters = $doc_signatures_normalized[strtolower($function_name)] ?? null;
if (!$doc_parameters) {
continue;
}
unset($parameters[0]);
unset($doc_parameters[0]);
if (array_keys($parameters) !== array_keys($doc_parameters)) {
echo "Saw parameter name mismatch for $function_name\n";
echo "Reflection parameters: " . json_encode($parameters) . "\n";
echo "php.net documentation: " . json_encode($doc_parameters) . "\n";
echo "\n";
}
}
echo "Done comparing signatures\n";
exit(0);
}
}