.phan/plugins/DuplicateArrayKeyPlugin.php

Summary

Maintainability
D
2 days
Test Coverage
<?php

declare(strict_types=1);

use ast\Node;
use Phan\AST\ASTHasher;
use Phan\AST\ASTReverter;
use Phan\AST\UnionTypeVisitor;
use Phan\Issue;
use Phan\Parse\ParseVisitor;
use Phan\PluginV3;
use Phan\PluginV3\PluginAwarePostAnalysisVisitor;
use Phan\PluginV3\PostAnalyzeNodeCapability;

/**
 * Checks for duplicate/equivalent array keys and case statements, as well as arrays mixing `key => value, with `value,`.
 *
 * @see DollarDollarPlugin for generic plugin documentation.
 */
class DuplicateArrayKeyPlugin extends PluginV3 implements PostAnalyzeNodeCapability
{
    /**
     * @return string - name of PluginAwarePostAnalysisVisitor subclass
     * @override
     */
    public static function getPostAnalyzeNodeVisitorClassName(): string
    {
        return DuplicateArrayKeyVisitor::class;
    }
}

/**
 * This class has visitArray called on all array literals in files to check for potential problems with keys.
 *
 * When __invoke on this class is called with a node, a method
 * will be dispatched based on the `kind` of the given node.
 *
 * Visitors such as this are useful for defining lots of different
 * checks on a node based on its kind.
 */
class DuplicateArrayKeyVisitor extends PluginAwarePostAnalysisVisitor
{
    private const HASH_PREFIX = "\x00__phan_dnu_";

    // Do not define the visit() method unless a plugin has code and needs to visit most/all node types.

    /**
     * @param Node $node
     * A switch statement's case statement(AST_SWITCH_LIST) node to analyze
     * @override
     */
    public function visitSwitchList(Node $node): void
    {
        $children = $node->children;
        if (count($children) <= 1) {
            // This plugin will never emit errors if there are 0 or 1 elements.
            return;
        }

        $case_constant_set = [];
        $values_to_check = [];
        foreach ($children as $i => $case_node) {
            if (!$case_node instanceof Node) {
                throw new AssertionError("Switch list must contain nodes");
            }
            $case_cond = $case_node->children['cond'];
            if ($case_cond === null) {
                continue;  // This is `default:`. php --syntax-check already checks for duplicates.
            }
            // Skip array entries without literal keys. (Do it before resolving the key value)
            if (!is_scalar($case_cond)) {
                $original_case_cond = $case_cond;
                $case_cond = UnionTypeVisitor::unionTypeFromNode($this->code_base, $this->context, $case_cond)->asSingleScalarValueOrNullOrSelf();
                if (is_object($case_cond)) {
                    $case_cond = $original_case_cond;
                }
            }
            if (is_string($case_cond)) {
                $cond_key = "s$case_cond";
                $values_to_check[$i] = $case_cond;
            } elseif (is_int($case_cond)) {
                $cond_key = $case_cond;
                $values_to_check[$i] = $case_cond;
            } elseif (is_bool($case_cond)) {
                $cond_key = $case_cond ? "T" : "F";
                $values_to_check[$i] = $case_cond;
            } else {
                // could be literal null?
                $cond_key = ASTHasher::hash($case_cond);
                if (!is_object($case_cond)) {
                    $values_to_check[$i] = $case_cond;
                }
            }
            if (isset($case_constant_set[$cond_key])) {
                $normalized_case_cond = is_object($case_cond) ? ASTReverter::toShortString($case_cond) : self::normalizeSwitchKey($case_cond);
                $this->emitPluginIssue(
                    $this->code_base,
                    clone($this->context)->withLineNumberStart($case_node->lineno),
                    'PhanPluginDuplicateSwitchCase',
                    "Duplicate/Equivalent switch case({STRING_LITERAL}) detected in switch statement - the later entry will be ignored in favor of case {CODE} at line {LINE}.",
                    [$normalized_case_cond, ASTReverter::toShortString($case_constant_set[$cond_key]->children['cond']), $case_constant_set[$cond_key]->lineno],
                    Issue::SEVERITY_NORMAL,
                    Issue::REMEDIATION_A,
                    15071
                );
                // Add a fake value to indicate loose equality checks are redundant
                $values_to_check[-1] = true;
            }
            $case_constant_set[$cond_key] = $case_node;
        }
        if (!isset($values_to_check[-1]) && count($values_to_check) > 1 && !self::areAllSwitchCasesTheSameType($values_to_check)) {
            // @phan-suppress-next-line PhanPartialTypeMismatchArgument array keys are integers for switch
            $this->extendedLooseEqualityCheck($values_to_check, $children);
        }
    }

    /**
     * @param array<mixed,mixed> $values_to_check scalar constant values of case statements
     */
    private static function areAllSwitchCasesTheSameType(array $values_to_check): bool
    {
        $categories = 0;
        foreach ($values_to_check as $value) {
            if (is_int($value)) {
                $categories |= 1;
                if ($categories !== 1) {
                    return false;
                }
            } elseif (is_string($value)) {
                if (is_numeric($value)) {
                    // This includes float-like strings such as `"1e0"`, which adds ambiguity ("1e0" == "1")
                    return false;
                }
                $categories |= 2;
                if ($categories !== 2) {
                    return false;
                }
            } else {
                return false;
            }
        }
        return true;
    }

    /**
     * Perform a heuristic check if any element is `==` a previous element.
     *
     * This is intended to perform well for large arrays.
     *
     * TODO: Do a better job for small arrays.
     * @param array<mixed, mixed> $values_to_check
     * @param list<mixed> $children an array of scalars
     */
    private function extendedLooseEqualityCheck(array $values_to_check, array $children): void
    {
        $numeric_set = [];
        $fuzzy_numeric_set = [];
        foreach ($values_to_check as $i => $value) {
            if (is_numeric($value)) {
                // For `"1"`, search for `"1foo"`, `"1bar"`, etc.
                $value = is_float($value) ? $value : filter_var($value, FILTER_VALIDATE_FLOAT);
                $old_index = $numeric_set[$value] ?? $fuzzy_numeric_set[$value] ?? null;
                $numeric_set[$value] = $i;
            } else {
                $value = (float)$value;
                // For `"1foo"`, search for `1` but not `"1bar"`
                $old_index = $numeric_set[$value] ?? null;
                // @phan-suppress-next-line PhanTypeMismatchDimAssignment
                $fuzzy_numeric_set[$value] = $i;
            }
            if ($old_index !== null) {
                $this->emitPluginIssue(
                    $this->code_base,
                    (clone($this->context))->withLineNumberStart($children[$i]->lineno),
                    'PhanPluginDuplicateSwitchCaseLooseEquality',
                    "Switch case({STRING_LITERAL}) is loosely equivalent (==) to an earlier case ({STRING_LITERAL}) in switch statement - the earlier entry may be chosen instead.",
                    [self::normalizeSwitchKey($values_to_check[$i]), self::normalizeSwitchKey($values_to_check[$old_index])],
                    Issue::SEVERITY_NORMAL,
                    Issue::REMEDIATION_A,
                    15072
                );
            }
        }
    }

    /**
     * @param Node $node
     * A match expressions's arms list (AST_MATCH_ARM_LIST) node to analyze
     * @override
     * @suppress PhanPossiblyUndeclaredProperty
     */
    public function visitMatchArmList(Node $node): void
    {
        $children = $node->children;
        if (!$children) {
            // This plugin will never emit errors if there are 0 elements.
            return;
        }

        $arm_expr_constant_set = [];
        foreach ($children as $arm_node) {
            foreach ($arm_node->children['cond']->children ?? [] as $arm_expr_cond) {
                if ($arm_expr_cond === null) {
                    continue;  // This is `default:`. php --syntax-check already checks for duplicates.
                }
                $lineno = $arm_expr_cond->lineno ?? $arm_node->lineno;
                // Skip array entries without literal keys. (Do it before resolving the key value)
                if (is_object($arm_expr_cond) && ParseVisitor::isConstExpr($arm_expr_cond)) {
                    // Only infer the value for values not affected by conditions - that will change after the expressions are analyzed
                    $original_cond = $arm_expr_cond;
                    $arm_expr_cond = UnionTypeVisitor::unionTypeFromNode($this->code_base, $this->context, $arm_expr_cond)->asSingleScalarValueOrNullOrSelf();
                    if (is_object($arm_expr_cond)) {
                        $arm_expr_cond = $original_cond;
                    }
                }
                if (is_string($arm_expr_cond)) {
                    $cond_key = "s$arm_expr_cond";
                } elseif (is_int($arm_expr_cond)) {
                    $cond_key = $arm_expr_cond;
                } elseif (is_bool($arm_expr_cond)) {
                    $cond_key = $arm_expr_cond ? "T" : "F";
                } else {
                    // TODO: This seems like it'd be flaky with ast\Node->flags and lineno?
                    $cond_key = ASTHasher::hash($arm_expr_cond);
                }
                if (isset($arm_expr_constant_set[$cond_key])) {
                    $normalized_arm_expr_cond = ASTReverter::toShortString($arm_expr_cond);
                    $this->emitPluginIssue(
                        $this->code_base,
                        clone($this->context)->withLineNumberStart($lineno),
                        'PhanPluginDuplicateMatchArmExpression',
                        "Duplicate match arm expression({STRING_LITERAL}) detected in match expression - the later entry will be ignored in favor of expression {CODE} at line {LINE}.",
                        [$normalized_arm_expr_cond, ASTReverter::toShortString($arm_expr_constant_set[$cond_key][0]), $arm_expr_constant_set[$cond_key][1]],
                        Issue::SEVERITY_NORMAL,
                        Issue::REMEDIATION_A,
                        15071
                    );
                }
                $arm_expr_constant_set[$cond_key] = [$arm_expr_cond, $arm_node->lineno];
            }
        }
    }

    /**
     * @param Node $node
     * An array literal(AST_ARRAY) node to analyze
     * @override
     */
    public function visitArray(Node $node): void
    {
        $children = $node->children;
        if (count($children) <= 1) {
            // This plugin will never emit errors if there are 0 or 1 elements.
            return;
        }

        $has_entry_without_key = false;
        $key_set = [];
        foreach ($children as $entry) {
            if (!($entry instanceof Node)) {
                continue;  // Triggered by code such as `list(, $a) = $expr`. In php 7.1, the array and list() syntax was unified.
            }
            $key = $entry->children['key'] ?? null;
            // Skip array entries without literal keys. (Do it before resolving the key value)
            if (is_null($key)) {
                $has_entry_without_key = true;
                continue;
            }
            if (is_object($key)) {
                $key = UnionTypeVisitor::unionTypeFromNode($this->code_base, $this->context, $key)->asSingleScalarValueOrNullOrSelf();
                if (is_object($key)) {
                    $key = self::HASH_PREFIX . ASTHasher::hash($entry->children['key']);
                }
            }

            if (isset($key_set[$key])) {
                // @phan-suppress-next-line PhanTypeMismatchDimFetchNullable
                $this->warnAboutDuplicateArrayKey($entry, $key, $key_set[$key]);
            }
            // @phan-suppress-next-line PhanTypeMismatchDimAssignment
            $key_set[$key] = $entry;
        }
        if ($has_entry_without_key && count($key_set) > 0) {
            // This is probably a typo in most codebases. (e.g. ['foo' => 'bar', 'baz'])
            // In phan, InternalFunctionSignatureMap.php does this deliberately with the first parameter being the return type.
            $this->emit(
                'PhanPluginMixedKeyNoKey',
                "Should not mix array entries of the form [key => value,] with entries of the form [value,].",
                [],
                Issue::SEVERITY_NORMAL,
                Issue::REMEDIATION_A,
                15071
            );
        }
    }

    /**
     * @param int|string|float|bool|null $key
     */
    private function warnAboutDuplicateArrayKey(Node $entry, $key, Node $old_entry): void
    {
        if (is_string($key) && strncmp($key, self::HASH_PREFIX, strlen(self::HASH_PREFIX)) === 0) {
            $this->emitPluginIssue(
                $this->code_base,
                clone($this->context)->withLineNumberStart($entry->lineno),
                'PhanPluginDuplicateArrayKeyExpression',
                "Duplicate dynamic array key expression ({CODE}) detected in array - the earlier entry at line {LINE} will be ignored if the expression had the same value.",
                [ASTReverter::toShortString($entry->children['key']), $old_entry->lineno],
                Issue::SEVERITY_NORMAL,
                Issue::REMEDIATION_A,
                15071
            );
            return;
        }
        $normalized_key = self::normalizeKey($key);
        $this->emitPluginIssue(
            $this->code_base,
            clone($this->context)->withLineNumberStart($entry->lineno),
            'PhanPluginDuplicateArrayKey',
            "Duplicate/Equivalent array key value({STRING_LITERAL}) detected in array - the earlier entry {CODE} at line {LINE} will be ignored.",
            [$normalized_key, ASTReverter::toShortString($old_entry->children['key']), $old_entry->lineno],
            Issue::SEVERITY_NORMAL,
            Issue::REMEDIATION_A,
            15071
        );
    }

    /**
     * Converts a key to the value it would be if used as a case.
     * E.g. 0, 0.5, and "0" all become the same value(0) when used as an array key.
     *
     * @param int|string|float|bool|null $key - The array key literal to be normalized.
     * @return string - The normalized representation.
     */
    private static function normalizeSwitchKey($key): string
    {
        if (is_int($key)) {
            return (string)$key;
        } elseif (!is_string($key)) {
            return (string)json_encode($key);
        }
        $tmp = [$key => true];
        return ASTReverter::toShortString(key($tmp));
    }

    /**
     * Converts a key to the value it would be if used as an array key.
     * E.g. 0, 0.5, and "0" all become the same value(0) when used as an array key.
     *
     * @param int|string|float|bool|null $key - The array key literal to be normalized.
     * @return string - The normalized representation.
     */
    private static function normalizeKey($key): string
    {
        if (is_int($key)) {
            return (string)$key;
        }
        $tmp = [$key => true];
        return ASTReverter::toShortString(key($tmp));
    }
}

// Every plugin needs to return an instance of itself at the
// end of the file in which it's defined.
return new DuplicateArrayKeyPlugin();