src/Phan/AST/Parser.php
<?php
declare(strict_types=1);
namespace Phan\AST;
use ast\Node;
use CompileError;
use Error;
use Microsoft\PhpParser\Diagnostic;
use Microsoft\PhpParser\FilePositionMap;
use ParseError;
use Phan\AST\TolerantASTConverter\CachingTolerantASTConverter;
use Phan\AST\TolerantASTConverter\ParseException;
use Phan\AST\TolerantASTConverter\ParseResult;
use Phan\AST\TolerantASTConverter\ShimFunctions;
use Phan\AST\TolerantASTConverter\TolerantASTConverter;
use Phan\AST\TolerantASTConverter\TolerantASTConverterPreservingOriginal;
use Phan\AST\TolerantASTConverter\TolerantASTConverterWithNodeMapping;
use Phan\CodeBase;
use Phan\Config;
use Phan\Daemon\Request;
use Phan\Issue;
use Phan\Language\Context;
use Phan\Library\Cache;
use Phan\Library\DiskCache;
use Phan\Library\FileCacheEntry;
use Phan\Library\StringUtil;
use Phan\Phan;
use Phan\Plugin\ConfigPluginSet;
use Throwable;
use function error_clear_last;
use function error_get_last;
use function error_reporting;
/**
* Parser parses the passed in PHP code based on configuration settings.
*
* It has options for error-tolerant parsing,
* annotating \ast\Nodes with additional information used by the language server
*/
class Parser
{
/** @var ?Cache<ParseResult> */
private static $cache = null;
/**
* Creates a cache if Phan is configured to use caching in the current phase.
*
* @return ?Cache<ParseResult>
*/
private static function maybeGetCache(CodeBase $code_base): ?Cache
{
if ($code_base->getExpectChangesToFileContents()) {
return null;
}
if (!Config::getValue('cache_polyfill_asts')) {
return null;
}
return self::getCache();
}
/**
* @return Cache<ParseResult>
* @suppress PhanPartialTypeMismatchReturn
*/
private static function getCache(): Cache
{
return self::$cache ?? self::$cache = self::makeNewCache();
}
/**
* @return DiskCache<ParseResult>
*/
private static function makeNewCache(): DiskCache
{
$igbinary_version = \phpversion('igbinary') ?: '';
$use_igbinary = \version_compare($igbinary_version, '2.0.5') >= 0;
$user = \getenv('USERNAME') ?: \getenv('USER');
$directory = \sys_get_temp_dir() . '/phan';
if (StringUtil::isNonZeroLengthString($user)) {
$directory .= "-$user";
}
return new DiskCache($directory, '-ast', ParseResult::class, $use_igbinary);
}
/**
* Parses the code with the native parser or the polyfill.
* If $suppress_parse_errors is false, this also emits SyntaxError.
*
* @param CodeBase $code_base
* @param Context $context
* @param ?Request $request (A daemon mode request if in daemon mode. May affect the parser used for $file_path)
* @param string $file_path file path for error reporting
* @param string $file_contents file contents to pass to parser. This may deliberately differ from what is currently on disk (e.g. for the language server mode or daemon mode)
* @param bool $suppress_parse_errors (If true, don't emit SyntaxError)
* @throws ParseError
* @throws CompileError (possible in php 7.3)
* @throws ParseException
*/
public static function parseCode(
CodeBase $code_base,
Context $context,
?Request $request,
string $file_path,
string $file_contents,
bool $suppress_parse_errors
): Node {
try {
// This will choose the parser to use based on the config and $file_path
// (For "Go To Definition", one of the files will have a slower parser which records the requested AST node)
if (self::shouldUsePolyfill($file_path, $request)) {
// This helper method has its own exception handling.
// It may throw a ParseException, which is unintentionally not caught here.
return self::parseCodePolyfill($code_base, $context, $file_path, $file_contents, $suppress_parse_errors, $request);
}
return self::parseCodeHandlingDeprecation($code_base, $context, $file_contents, $file_path);
} catch (CompileError | ParseError $native_parse_error) {
return self::handleParseError($code_base, $context, $file_path, $file_contents, $suppress_parse_errors, $native_parse_error, $request);
}
}
private static function parseCodeHandlingDeprecation(CodeBase $code_base, Context $context, string $file_contents, string $file_path): Node
{
global $__no_echo_phan_errors;
// Suppress errors such as "declare(encoding=...) ignored because Zend multibyte feature is turned off by settings" (#1076)
// E_COMPILE_WARNING can't be caught by a PHP error handler,
// the errors are printed to stderr by default (can't be captured),
// and those errors might mess up language servers, etc. if ever printed to stdout
$original_error_reporting = error_reporting();
error_reporting($original_error_reporting & ~\E_COMPILE_WARNING);
$__no_echo_phan_errors = static function (int $errno, string $errstr, string $unused_errfile, int $errline) use ($code_base, $context): bool {
if ($errno === \E_DEPRECATED && \preg_match('/Version.*is deprecated/i', $errstr)) {
return false;
}
// Catch errors such as E_DEPRECATED in php 7.4 for the (real) cast.
Issue::maybeEmit(
$code_base,
$context,
Issue::CompatibleSyntaxNotice,
$errline,
$errstr
);
// Return true to prevent printing to stderr
return true;
};
try {
error_clear_last();
$root_node = \ast\parse_code(
$file_contents,
Config::AST_VERSION,
$file_path
);
$error = error_get_last();
if ($error && $error['type'] === \E_COMPILE_WARNING) {
Issue::maybeEmit(
$code_base,
$context,
Issue::SyntaxCompileWarning,
$error['line'],
$error['message']
);
}
return $root_node;
} finally {
$__no_echo_phan_errors = false;
error_reporting($original_error_reporting);
}
}
/**
* Handles ParseError|CompileError from the native parser.
* This will return a Node or re-throw an error, depending on the configuration and parameters.
*
* @param CodeBase $code_base
* @param Context $context
* @param string $file_path file path for error reporting
* @param string $file_contents file contents to pass to parser. May be overridden to ignore what is currently on disk.
* @param ParseError|CompileError $native_parse_error (can be CompileError in 7.3+, will be ParseError in most cases)
* @param ?Request $request used to check if caching should be enabled to save time.
* @throws ParseError most of the time
* @throws CompileError in PHP 7.3+
*/
public static function handleParseError(
CodeBase $code_base,
Context $context,
string $file_path,
string $file_contents,
bool $suppress_parse_errors,
Error $native_parse_error,
?Request $request = null
): Node {
if ($file_path !== 'internal') {
if (!$suppress_parse_errors) {
self::emitSyntaxErrorForNativeParseError($code_base, $context, $file_path, new FileCacheEntry($file_contents), $native_parse_error, $request);
}
if (!Config::getValue('use_fallback_parser')) {
// By default, don't try to re-parse files with syntax errors.
throw $native_parse_error;
}
}
// If there's a parse error in a file that's excluded from analysis, give up on parsing it.
// Users might not see the parse error, and ignoring it (e.g. acting as though a file in vendor/ or ext/
// that can't be parsed has class and function definitions)
// may lead to users not noticing bugs.
if (Phan::isExcludedAnalysisFile($file_path)) {
throw $native_parse_error;
}
// But if the user would see the syntax error, go ahead and retry.
if ($request) {
$converter = new CachingTolerantASTConverter();
} else {
$converter = new TolerantASTConverter();
}
$converter->setPHPVersionId(Config::get_closest_target_php_version_id());
$errors = [];
try {
$node = $converter->parseCodeAsPHPAST($file_contents, Config::AST_VERSION, $errors);
} catch (\Exception $_) {
// Generic fallback. TODO: log.
throw $native_parse_error;
}
// TODO: loop over $errors?
return $node;
}
/**
* Emit PhanSyntaxError for ParseError|CompileError from the native parser.
*
* @param CodeBase $code_base
* @param Context $context
* @param string $file_path file path for error reporting
* @param FileCacheEntry $file_cache_entry for file contents that were passed to the polyfill parser. May be overridden to ignore what is currently on disk.
* @param ParseError|CompileError $native_parse_error (can be CompileError in 7.3+, will be ParseError in most cases)
*/
public static function emitSyntaxErrorForNativeParseError(
CodeBase $code_base,
Context $context,
string $file_path,
FileCacheEntry $file_cache_entry,
Error $native_parse_error,
?Request $request = null
): void {
// Try to get the raw diagnostics by reference.
// For efficiency, reuse the last result if this was called multiple times in a row.
$line = $native_parse_error->getLine();
$message = $native_parse_error->getMessage();
$diagnostic_error_column = self::guessErrorColumnUsingTokens($file_cache_entry, $native_parse_error) ?:
self::guessErrorColumnUsingPolyfill($code_base, $context, $file_path, $file_cache_entry, $native_parse_error, $request);
Issue::maybeEmitWithParameters(
$code_base,
$context,
Issue::SyntaxError,
$line,
[$message],
null,
$diagnostic_error_column
);
}
/**
* Returns the 1-based error column, or 0 if unknown.
*
* This will return the corresponding unexpected token only when there's exactly one token with that value on the line with the error.
*/
private static function guessErrorColumnUsingTokens(
FileCacheEntry $file_cache_entry,
Error $native_parse_error
): int {
if (!\function_exists('token_get_all')) {
return 0;
}
$message = $native_parse_error->getMessage();
$prefix = "unexpected (?:token )?('(?:.+)'|\"(?:.+)\")";
if (!\preg_match("/$prefix \((T_\w+)\)/", $message, $matches)) {
if (!\preg_match("/$prefix, expecting/", $message, $matches)) {
if (!\preg_match("/$prefix$/D", $message, $matches)) {
return 0;
}
}
}
$token_name = $matches[2] ?? null;
if (\is_string($token_name)) {
if (!\defined($token_name)) {
return 0;
}
$token_kind = \constant($token_name);
} else {
$token_kind = null;
}
$token_str = \substr($matches[1], 1, -1);
$tokens = \token_get_all($file_cache_entry->getContents());
$candidates = [];
$desired_line = $native_parse_error->getLine();
foreach ($tokens as $i => $token) {
if (!\is_array($token)) {
if ($token_str === $token) {
$candidates[] = $i;
}
continue;
}
$line = $token[2];
if ($line < $desired_line) {
continue;
} elseif ($line > $desired_line) {
break;
}
if ($token_kind !== $token[0]) {
continue;
}
if ($token_str !== $token[1]) {
continue;
}
$candidates[] = $i;
}
if (\count($candidates) !== 1) {
return 0;
}
return self::computeColumnForTokenAtIndex($tokens, $candidates[0], $desired_line);
}
/**
* @param list<array{0:int,1:string,2:int}|string> $tokens
* @return int the 1-based line number, or 0 on failure
*/
private static function computeColumnForTokenAtIndex(array $tokens, int $i, int $desired_line): int
{
if ($i <= 0) {
return 1;
}
$column = 0;
for ($j = $i - 1; $j >= 0; $j--) {
$token = $tokens[$j];
if (!\is_array($token)) {
$column += \strlen($token);
continue;
}
$token_str = $token[1];
if ($token[2] >= $desired_line) {
$column += \strlen($token_str);
continue;
}
$last_newline = \strrpos($token_str, "\n");
if ($last_newline !== false) {
$column += \strlen($token_str) - $last_newline;
}
break;
}
return $column;
}
/**
* Returns the 1-based error column, or 0 if unknown.
*/
private static function guessErrorColumnUsingPolyfill(
CodeBase $code_base,
Context $context,
string $file_path,
FileCacheEntry $file_cache_entry,
Error $native_parse_error,
?Request $request
): int {
$file_contents = $file_cache_entry->getContents();
static $last_file_contents = null;
static $errors = [];
if ($last_file_contents !== $file_contents) {
// Create a brand new reference group
$new_errors = [];
$errors =& $new_errors;
try {
self::parseCodePolyfill($code_base, $context, $file_path, $file_contents, true, $request, $errors);
} catch (Throwable $_) {
// ignore this exception
}
}
// If the polyfill parser emits the first error on the same line as the native parser,
// mention the column that the polyfill parser found for the error.
$diagnostic = $errors[0] ?? null;
// $diagnostic_error_column is either 0 or the column of the error determined by the polyfill parser
if (!$diagnostic) {
return 0;
}
// Using FilePositionMap is much faster than substr_count to count lines if you have more than one diagnostic to report (e.g. a string has an unmatched quote).
$file_position_map = $file_cache_entry->getFilePositionMap();
$start = (int) $diagnostic->start;
$diagnostic_error_start_line = $file_position_map->getLineNumberForOffset($start);
if ($diagnostic_error_start_line > $native_parse_error->getLine()) {
return 0;
}
// If the current character is whitespace, keep searching forward for the next non-whitespace character
$file_length = \strlen($file_contents);
while ($start + 1 < $file_length && \ctype_space($file_contents[$start])) {
$start++;
}
$diagnostic_error_start_line = $file_position_map->getLineNumberForOffset($start);
if ($diagnostic_error_start_line !== $native_parse_error->getLine()) {
return 0;
}
return $start - (\strrpos($file_contents, "\n", $start - \strlen($file_contents) - 1) ?: 0);
}
/** Set an arbitrary limit on the number of warnings for the polyfill diagnostics to prevent excessively large errors for unmatched string quotes, etc. */
private const MAX_POLYFILL_WARNINGS = 1000;
/**
* Parses the code with the polyfill. If $suppress_parse_errors is false, this also emits SyntaxError.
*
* @param CodeBase $code_base
* @param Context $context
* @param string $file_path file path for error reporting
* @param string $file_contents file contents to pass to parser. May be overridden to ignore what is currently on disk.
* @param bool $suppress_parse_errors (If true, don't emit SyntaxError)
* @param ?Request $request - May affect the parser used for $file_path
* @param list<Diagnostic> &$errors @phan-output-reference
* @throws ParseException
* @suppress PhanThrowTypeMismatch
*/
public static function parseCodePolyfill(CodeBase $code_base, Context $context, string $file_path, string $file_contents, bool $suppress_parse_errors, ?Request $request, array &$errors = []): Node
{
// @phan-suppress-next-line PhanRedundantCondition
if (!\in_array(Config::AST_VERSION, TolerantASTConverter::SUPPORTED_AST_VERSIONS, true)) {
throw new \Error(\sprintf("Unexpected polyfill version: want %s, got %d", \implode(', ', TolerantASTConverter::SUPPORTED_AST_VERSIONS), Config::AST_VERSION));
}
$converter = self::createConverter($file_path, $file_contents, $request);
$converter->setPHPVersionId(Config::get_closest_target_php_version_id());
$errors = [];
error_clear_last();
try {
$node = $converter->parseCodeAsPHPAST($file_contents, Config::AST_VERSION, $errors, self::maybeGetCache($code_base));
} catch (\Exception $e) {
// Generic fallback. TODO: log.
throw new ParseException('Unexpected Exception of type ' . \get_class($e) . ': ' . $e->getMessage(), 0);
}
if (!$suppress_parse_errors) {
$error = error_get_last();
if ($error) {
self::handleWarningFromPolyfill($code_base, $context, $error);
}
}
if (!$errors) {
return $node;
}
$file_position_map = new FilePositionMap($file_contents);
$emitted_warning_count = 0;
foreach ($errors as $diagnostic) {
if ($diagnostic->kind === 0) {
$start = (int)$diagnostic->start;
$diagnostic_error_message = 'Fallback parser diagnostic error: ' . $diagnostic->message;
$len = \strlen($file_contents);
$diagnostic_error_start_line = $file_position_map->getLineNumberForOffset($start);
$diagnostic_error_column = $start - (\strrpos($file_contents, "\n", $start - $len - 1) ?: 0);
if (!$suppress_parse_errors) {
$emitted_warning_count++;
if ($emitted_warning_count <= self::MAX_POLYFILL_WARNINGS) {
Issue::maybeEmitWithParameters(
$code_base,
$context,
Issue::SyntaxError,
$diagnostic_error_start_line,
[$diagnostic_error_message],
null,
$diagnostic_error_column
);
}
}
if (!Config::getValue('use_fallback_parser')) {
// By default, don't try to re-parse files with syntax errors.
throw new ParseException($diagnostic_error_message, $diagnostic_error_start_line);
}
// If there's a parse error in a file that's excluded from analysis, give up on parsing it.
// Users might not see the parse error, and ignoring it (e.g. acting as though a file in vendor/ or ext/
// that can't be parsed has class and function definitions)
// may lead to users not noticing bugs.
if (Phan::isExcludedAnalysisFile($file_path)) {
throw new ParseException($diagnostic_error_message, $diagnostic_error_start_line);
}
}
}
return $node;
}
/**
* @param array<string,mixed> $error
*/
private static function handleWarningFromPolyfill(CodeBase $code_base, Context $context, array $error): void
{
if (\in_array($error['type'], [\E_DEPRECATED, \E_COMPILE_WARNING], true) &&
\basename($error['file']) === 'PhpTokenizer.php') {
$line = $error['line'];
if (\preg_match('/line ([0-9]+)$/D', $error['message'], $matches)) {
$line = (int)$matches[1];
}
Issue::maybeEmit(
$code_base,
$context,
$error['type'] === \E_COMPILE_WARNING ? Issue::SyntaxCompileWarning : Issue::CompatibleSyntaxNotice,
$line,
$error['message']
);
}
}
/**
* Remove the leading #!/path/to/interpreter/of/php from a CLI script, if any was found.
*/
public static function removeShebang(string $file_contents): string
{
if (\substr($file_contents, 0, 2) !== "#!") {
return $file_contents;
}
for ($i = 2; $i < \strlen($file_contents); $i++) {
$c = $file_contents[$i];
if ($c === "\r") {
if (($file_contents[$i + 1] ?? '') === "\n") {
$i++;
break;
}
} elseif ($c === "\n") {
break;
}
}
if ($i >= \strlen($file_contents)) {
return '';
}
$rest = (string)\substr($file_contents, $i + 1);
if (\strcasecmp(\substr($rest, 0, 5), "<?php") === 0) {
// declare(strict_types=1) must be the first part of the script.
// Even empty php tags aren't allowed prior to it, so avoid adding empty tags if possible.
return "<?php\n" . \substr($rest, 5);
}
// Preserve the line numbers by adding a no-op newline instead of the removed shebang
return "<?php\n?>" . $rest;
}
private static function shouldUsePolyfill(string $file_path, Request $request = null): bool
{
if (Config::getValue('use_polyfill_parser')) {
return true;
}
if ($request) {
return $request->shouldUseMappingPolyfill($file_path);
}
return false;
}
private static function createConverter(string $file_path, string $file_contents, Request $request = null): TolerantASTConverter
{
if ($request) {
if ($request->shouldUseMappingPolyfill($file_path)) {
// TODO: Rename to something better
$converter = new TolerantASTConverterWithNodeMapping(
$request->getTargetByteOffset($file_contents),
static function (Node $node): void {
// @phan-suppress-next-line PhanAccessMethodInternal
ConfigPluginSet::instance()->prepareNodeSelectionPluginForNode($node);
}
);
if ($request->shouldAddPlaceholdersForPath($file_path)) {
$converter->setShouldAddPlaceholders(true);
}
return $converter;
}
return new CachingTolerantASTConverter();
}
if (Config::getValue('__parser_keep_original_node')) {
return new TolerantASTConverterPreservingOriginal();
}
return new TolerantASTConverter();
}
/**
* Get a string representation of the AST kind value.
* @suppress PhanAccessMethodInternal
*/
public static function getKindName(int $kind): string
{
static $use_native = null;
$use_native = ($use_native ?? self::shouldUseNativeAST());
if ($use_native) {
return \ast\get_kind_name($kind);
}
// The native function doesn't exist or is missing some constants Phan would use.
return ShimFunctions::getKindName($kind);
}
// TODO: Refactor and make more code use this check
private static function shouldUseNativeAST(): bool
{
if (\PHP_VERSION_ID >= 80000) {
$min_version = '1.0.10';
} elseif (\PHP_VERSION_ID >= 70400) {
$min_version = '1.0.2';
} else {
$min_version = Config::MINIMUM_AST_EXTENSION_VERSION;
}
return \version_compare(\phpversion('ast') ?: '0.0.0', $min_version) >= 0;
}
}