src/Phan/Language/Element/MarkupDescription.php

Summary

Maintainability
F
4 days
Test Coverage
<?php

declare(strict_types=1);

namespace Phan\Language\Element;

use Exception;
use Phan\CodeBase;
use Phan\Language\Element\Comment\Builder;
use Phan\Language\UnionType;
use Phan\Library\StringUtil;

use function count;

/**
 * APIs for generating markup (markdown) description of elements
 *
 * This is used by the language server to send markup to the language client (e.g. for hover text of an element).
 */
class MarkupDescription
{
    /**
     * Generates a markup snippet with a description of the declaration of $element,
     * and its doc comment summary and description.
     */
    public static function buildForElement(
        AddressableElementInterface $element,
        CodeBase $code_base
    ): string {
        // TODO: Use the doc comments of the ancestors if unavailable or if (at)inheritDoc is used.
        $markup = $element->getMarkupDescription();
        $result = "```php\n$markup\n```";
        $extracted_doc_comment = self::extractDescriptionFromDocCommentOrAncestor($element, $code_base);
        if (StringUtil::isNonZeroLengthString($extracted_doc_comment)) {
            $result .= "\n\n" . $extracted_doc_comment;
        }

        return $result;
    }

    /**
     * @template T
     * @param array<string,T> $signatures
     * @return array<string,T>
     */
    private static function signaturesToLower(array $signatures): array
    {
        $result = [];
        foreach ($signatures as $fqsen => $summary) {
            $result[\strtolower($fqsen)] = $summary;
        }
        return $result;
    }

    /**
     * Eagerly load all of the hover signatures into memory before potentially forking.
     */
    public static function eagerlyLoadAllDescriptionMaps(): void
    {
        if (!\extension_loaded('pcntl')) {
            // There's no forking, so descriptions will always be available after the first time they're loaded.
            // No need to force phan to load these prior to forking.
            return;
        }
        self::loadClassDescriptionMap();
        self::loadConstantDescriptionMap();
        self::loadFunctionDescriptionMap();
        self::loadPropertyDescriptionMap();
    }

    /**
     * @return array<string,string> mapping lowercase function/method FQSENs to short summaries.
     * @internal - The data format may change
     */
    public static function loadFunctionDescriptionMap(): array
    {
        static $descriptions = null;
        if (\is_array($descriptions)) {
            return $descriptions;
        }
        return $descriptions = self::signaturesToLower(require(\dirname(__DIR__) . '/Internal/FunctionDocumentationMap.php'));
    }

    /**
     * @return array<string,string> mapping lowercase constant/class constant FQSENs to short summaries.
     * @internal - The data format may change
     */
    public static function loadConstantDescriptionMap(): array
    {
        static $descriptions = null;
        if (\is_array($descriptions)) {
            return $descriptions;
        }
        return $descriptions = self::signaturesToLower(require(\dirname(__DIR__) . '/Internal/ConstantDocumentationMap.php'));
    }

    /**
     * @return array<string,string> mapping class FQSENs to short summaries.
     */
    public static function loadClassDescriptionMap(): array
    {
        static $descriptions = null;
        if (\is_array($descriptions)) {
            return $descriptions;
        }
        return $descriptions = self::signaturesToLower(require(\dirname(__DIR__) . '/Internal/ClassDocumentationMap.php'));
    }

    /**
     * @return array<string,string> mapping property FQSENs to short summaries.
     * @internal - The data format may change
     */
    public static function loadPropertyDescriptionMap(): array
    {
        static $descriptions = null;
        if (\is_array($descriptions)) {
            return $descriptions;
        }
        return $descriptions = self::signaturesToLower(require(\dirname(__DIR__) . '/Internal/PropertyDocumentationMap.php'));
    }

    /**
     * Extracts a plaintext description of the element from the doc comment of an element or its ancestor.
     * (or from FunctionDocumentationMap.php)
     *
     * @param array<string,true> $checked_class_fqsens
     */
    public static function extractDescriptionFromDocCommentOrAncestor(
        AddressableElementInterface $element,
        CodeBase $code_base,
        array &$checked_class_fqsens = []
    ): ?string {
        $extracted_doc_comment = self::extractDescriptionFromDocComment($element, $code_base);
        if (StringUtil::isNonZeroLengthString($extracted_doc_comment)) {
            return $extracted_doc_comment;
        }
        if ($element instanceof ClassElement) {
            try {
                $fqsen_string = $element->getClassFQSEN()->__toString();
                if (isset($checked_class_fqsens[$fqsen_string])) {
                    // We already checked this and either succeeded or returned null
                    return null;
                }
                $checked_class_fqsens[$fqsen_string] = true;
                return self::extractDescriptionFromDocCommentOfAncestorOfClassElement($element, $code_base);
            } catch (Exception $_) {
                // ignore
            }
        }
        return null;
    }

    /**
     * @param array<string,true> $checked_class_fqsens used to guard against recursion
     */
    private static function extractDescriptionFromDocCommentOfAncestorOfClassElement(
        ClassElement $element,
        CodeBase $code_base,
        array &$checked_class_fqsens = []
    ): ?string {
        if (!$element->isOverride() && $element->getRealDefiningFQSEN() === $element->getFQSEN()) {
            return null;
        }
        $class_fqsen = $element->getDefiningClassFQSEN();
        $class = $code_base->getClassByFQSEN($class_fqsen);
        foreach ($class->getAncestorFQSENList() as $ancestor_fqsen) {
            $ancestor_element = Clazz::getAncestorElement($code_base, $ancestor_fqsen, $element);
            if (!$ancestor_element) {
                continue;
            }

            $extracted_doc_comment = self::extractDescriptionFromDocCommentOrAncestor($ancestor_element, $code_base, $checked_class_fqsens);
            if (StringUtil::isNonZeroLengthString($extracted_doc_comment)) {
                return $extracted_doc_comment;
            }
        }
        return null;
    }

    /**
     * Extracts a plaintext description of the element from the doc comment of an element.
     * (or from FunctionDocumentationMap.php)
     */
    public static function extractDescriptionFromDocComment(
        AddressableElementInterface $element,
        CodeBase $code_base = null
    ): ?string {
        $extracted_doc_comment = self::extractDescriptionFromDocCommentRaw($element);
        if (StringUtil::isNonZeroLengthString($extracted_doc_comment)) {
            return $extracted_doc_comment;
        }

        // This is an element internal to PHP.
        if ($element->isPHPInternal()) {
            if ($element instanceof FunctionInterface) {
                // This is a function/method - Use Phan's FunctionDocumentationMap.php to try to load a markup description.
                if ($element instanceof Method && \strtolower($element->getName()) !== '__construct') {
                    $fqsen = $element->getDefiningFQSEN();
                } else {
                    $fqsen = $element->getFQSEN();
                }
                $key = \strtolower(\ltrim((string)$fqsen, '\\'));
                $result = self::loadFunctionDescriptionMap()[$key] ?? null;
                if (StringUtil::isNonZeroLengthString($result)) {
                    return $result;
                }
                if ($code_base && $element instanceof Method) {
                    try {
                        if (\strtolower($element->getName()) === '__construct') {
                            $class = $element->getClass($code_base);
                            $class_description = self::extractDescriptionFromDocComment($class, $code_base);
                            if (StringUtil::isNonZeroLengthString($class_description)) {
                                return "Construct an instance of `{$class->getFQSEN()}`.\n\n$class_description";
                            }
                        }
                    } catch (Exception $_) {
                    }
                }
            } elseif ($element instanceof ConstantInterface) {
                // This is a class or global constant - Use Phan's ConstantDocumentationMap.php to try to load a markup description.
                if ($element instanceof ClassConstant) {
                    $fqsen = $element->getDefiningFQSEN();
                } else {
                    $fqsen = $element->getFQSEN();
                }
                $key = \strtolower(\ltrim((string)$fqsen, '\\'));
                return self::loadConstantDescriptionMap()[$key] ?? null;
            } elseif ($element instanceof Clazz) {
                $key = \strtolower(\ltrim((string)$element->getFQSEN(), '\\'));
                return self::loadClassDescriptionMap()[$key] ?? null;
            } elseif ($element instanceof Property) {
                $key = \strtolower(\ltrim((string)$element->getDefiningFQSEN(), '\\'));
                return self::loadPropertyDescriptionMap()[$key] ?? null;
            }
        }
        return null;
    }

    private static function extractDescriptionFromDocCommentRaw(AddressableElementInterface $element): ?string
    {
        $doc_comment = $element->getDocComment();
        if (!StringUtil::isNonZeroLengthString($doc_comment)) {
            return null;
        }
        $comment_category = null;
        if ($element instanceof Property) {
            $comment_category = Comment::ON_PROPERTY;
        } elseif ($element instanceof ConstantInterface) {
            $comment_category = Comment::ON_CONST;
        } elseif ($element instanceof FunctionInterface) {
            $comment_category = Comment::ON_FUNCTION;
        }
        $extracted_doc_comment = self::extractDocComment($doc_comment, $comment_category, $element->getUnionType());
        return StringUtil::isNonZeroLengthString($extracted_doc_comment) ? $extracted_doc_comment : null;
    }

    /**
     * @return array<string,string> information about the param tags
     */
    public static function extractParamTagsFromDocComment(AddressableElementInterface $element, bool $with_param_details = true): array
    {
        $doc_comment = $element->getDocComment();
        if (!\is_string($doc_comment)) {
            return [];
        }
        if (\strpos($doc_comment, '@param') === false) {
            return [];
        }
        // Trim the start and the end of the doc comment.
        //
        // We leave in the second `*` of `/**` so that every single non-empty line
        // of a typical doc comment will begin with a `*`
        $doc_comment = \preg_replace('@(^/\*)|(\*/$)@D', '', $doc_comment);

        $results = [];
        $lines = \explode("\n", $doc_comment);
        foreach ($lines as $i => $line) {
            $line = self::trimLine($line);
            if (\preg_match('/^\s*@param(\s|$)/D', $line) > 0) {
                // Extract all of the (at)param annotations.
                $param_tag_summary = self::extractTagSummary($lines, $i);
                if (\end($param_tag_summary) === '') {
                    \array_pop($param_tag_summary);
                }
                $full_comment = \implode("\n", self::trimLeadingWhitespace($param_tag_summary));
                // @phan-suppress-next-line PhanAccessClassConstantInternal
                $matched = \preg_match(Builder::PARAM_COMMENT_REGEX, $full_comment, $match);
                if (!$matched) {
                    continue;
                }
                if (!isset($match[17])) {
                    continue;
                }

                $name = $match[17];
                if ($with_param_details) {
                    // Keep the param details and put them in a markdown quote
                    // @phan-suppress-next-line PhanAccessClassConstantInternal
                    $full_comment = \preg_replace(Builder::PARAM_COMMENT_REGEX, '`\0`', $full_comment);
                } else {
                    // Drop the param details
                    // @phan-suppress-next-line PhanAccessClassConstantInternal
                    $full_comment = \trim(\preg_replace(Builder::PARAM_COMMENT_REGEX, '', $full_comment));
                }
                $results[$name] = $full_comment;
            }
        }
        return $results;
    }

    /**
     * Returns a doc comment with:
     *
     * - leading `/**` and trailing `*\/` removed
     * - leading/trailing space on lines removed,
     * - blank lines removed from the beginning and end.
     *
     * @return string simplified version of the doc comment, with leading `*` on lines preserved.
     */
    public static function getDocCommentWithoutWhitespace(string $doc_comment): string
    {
        // Trim the start and the end of the doc comment.
        //
        // We leave in the second `*` of `/**` so that every single non-empty line
        // of a typical doc comment will begin with a `*`
        $doc_comment = \preg_replace('@(^/\*)|(\*/$)@D', '', $doc_comment);

        $lines = \explode("\n", $doc_comment);
        $lines = \array_map('trim', $lines);
        $lines = MarkupDescription::trimLeadingWhitespace($lines);
        while (\in_array(\end($lines), ['*', ''], true)) {
            \array_pop($lines);
        }
        while (\in_array(\reset($lines), ['*', ''], true)) {
            \array_shift($lines);
        }
        return \implode("\n", $lines);
    }

    /**
     * Returns a markup string with the extracted description of this element (known to be a comment of an element with type $comment_category).
     * On success, this is a non-empty string.
     *
     * @return string markup string
     * @internal
     */
    public static function extractDocComment(string $doc_comment, int $comment_category = null, UnionType $element_type = null, bool $remove_type = false): string
    {
        // Trim the start and the end of the doc comment.
        //
        // We leave in the second `*` of `/**` so that every single non-empty line
        // of a typical doc comment will begin with a `*`
        $doc_comment = \preg_replace('@(^/\*)|(\*/$)@D', '', $doc_comment);

        $results = [];
        $lines = \explode("\n", $doc_comment);
        $saw_phpdoc_tag = false;
        $did_build_from_phpdoc_tag = false;

        foreach ($lines as $i => $line) {
            $line = self::trimLine($line);
            if (\preg_match('/^\s*@/', $line) > 0) {
                $saw_phpdoc_tag = true;
                if (count($results) === 0) {
                    // Special cases:
                    if (\in_array($comment_category, [Comment::ON_PROPERTY, Comment::ON_CONST], true)) {
                        // Treat `@var T description of T` as a valid single-line comment of constants and properties.
                        // Variables don't currently have associated comments
                        if (\preg_match('/^\s*@var\s/', $line) > 0) {
                            $new_lines = self::extractTagSummary($lines, $i);
                            if (isset($new_lines[0])) {
                                $did_build_from_phpdoc_tag = true;
                                // @phan-suppress-next-line PhanAccessClassConstantInternal
                                $new_lines[0] = \preg_replace(Builder::PARAM_COMMENT_REGEX, $remove_type ? '' : '`\0`', $new_lines[0]);
                            }
                            $results = \array_merge($results, $new_lines);
                        }
                    } elseif (\in_array($comment_category, Comment::FUNCTION_LIKE, true)) {
                        // Treat `@return T description of return value` as a valid single-line comment of closures, functions, and methods.
                        // Variables don't currently have associated comments
                        if (\preg_match('/^\s*@return(\s|$)/D', $line) > 0) {
                            $new_lines = self::extractTagSummary($lines, $i);
                            if (isset($new_lines[0])) {
                                // @phan-suppress-next-line PhanAccessClassConstantInternal
                                $new_lines[0] = \preg_replace(Builder::RETURN_COMMENT_REGEX, $remove_type ? '' : '`\0`', $new_lines[0]);
                            }
                            $results = \array_merge($results, $new_lines);
                        }
                    }
                }
            }
            if ($saw_phpdoc_tag) {
                continue;
            }
            if (\trim($line) === '') {
                $line = '';
                if (\in_array(\end($results), ['', false], true)) {
                    continue;
                }
            }
            $results[] = $line;
        }
        if (\end($results) === '') {
            \array_pop($results);
        }
        $results = self::trimLeadingWhitespace($results);
        $str = \implode("\n", $results);
        if ($comment_category === Comment::ON_PROPERTY && !$did_build_from_phpdoc_tag && !$remove_type) {
            if ($element_type && !$element_type->isEmpty()) {
                $str = \trim("`@var $element_type` $str");
            }
        }
        return $str;
    }

    /**
     * Remove leading * and spaces (and trailing spaces) from the provided line of text.
     * This is useful for trimming raw doc comment lines
     */
    public static function trimLine(string $line): string
    {
        $line = \rtrim($line);
        $pos = \strpos($line, '*');
        if ($pos !== false) {
            return (string)\substr($line, $pos + 1);
        } else {
            return \ltrim($line, "\n\t ");
        }
    }

    /**
     * @param list<string> $lines
     * @param int $i the offset of the tag in $lines
     * @return list<string> the trimmed lines
     * @internal
     */
    public static function extractTagSummary(array $lines, int $i): array
    {
        $summary = [];
        $summary[] = self::trimLine($lines[$i]);
        for ($j = $i + 1; $j < count($lines); $j++) {
            $line = self::trimLine($lines[$j]);
            if (\preg_match('/^\s*\{?@/', $line)) {
                // Break on other annotations such as (at)internal, {(at)inheritDoc}, etc.
                break;
            }
            if ($line === '' && \end($summary) === '') {
                continue;
            }
            $summary[] = $line;
        }
        if (\end($summary) === '') {
            \array_pop($summary);
        }
        if (count($summary) === 1 && count(\preg_split('/\s+/', \trim($summary[0]))) <= 2) {
            // For something uninformative such as "* (at)return int" (and nothing else),
            // don't treat it as a summary.
            //
            // The caller would already show the return type
            return [];
        }
        return $summary;
    }

    /**
     * @param list<string> $lines
     * @return list<string>
     */
    private static function trimLeadingWhitespace(array $lines): array
    {
        if (count($lines) === 0) {
            return [];
        }
        $min_whitespace = \PHP_INT_MAX;
        foreach ($lines as $line) {
            if ($line === '') {
                continue;
            }
            $min_whitespace = \min($min_whitespace, \strlen($line));
            $min_whitespace = \min($min_whitespace, \strspn($line, ' ', 0, $min_whitespace));
            if ($min_whitespace === 0) {
                return $lines;
            }
        }
        if ($min_whitespace > 0) {
            foreach ($lines as $i => $line) {
                if ($line === '') {
                    continue;
                }
                $lines[$i] = (string)\substr($line, $min_whitespace);
            }
        }
        return $lines;
    }
}