src/Phan/CodeBase/UndoTracker.php

Summary

Maintainability
B
4 hrs
Test Coverage
<?php

declare(strict_types=1);

namespace Phan\CodeBase;

use Closure;
use Phan\CodeBase;
use Phan\Daemon;
use Phan\Phan;

use function count;
use function in_array;

/**
 * UndoTracker maps a file path to a list of operations(e.g. Closures) that must be executed to
 * remove all traces of a file from the CodeBase, etc. if a file was removed or edited.
 * This is done to support running phan in daemon mode.
 * - Files will have to be re-parsed to get the new function signatures, check for new parse/analysis errors,
 *   and to update the class/function/method/property/constant/etc. definitions that would have to be created.
 *
 * If a file is edited, its contributions are undone, then it is parsed yet again.
 *
 * (We don't garbage collect reference cycles, so this attempts to work in a way that avoids cycles.
 *  Haven't verified that it does that as expected, yet)
 * @phan-file-suppress PhanPluginDescriptionlessCommentOnPublicMethod
 */
class UndoTracker
{

    /**
     * @var ?string absolute path to currently parsed file, when in parse phase.
     * TODO: Does the Context->getFile() make keeping this redundant?
     */
    private $current_parsed_file;

    /**
     * @var array<string,list<Closure(CodeBase):void>>
     */
    private $undo_operations_for_path = [];

    /**
     * @var array<string,?string> Maps file paths to the modification dates and file size of the paths. - On ext4, milliseconds are available, but php APIs all return seconds.
     */
    private $file_modification_state = [];

    public function __construct()
    {
    }

    /**
     * @return list<string> - The list of files which are successfully parsed.
     * This changes whenever the file list is reloaded from disk.
     * This also includes files which don't declare classes or functions or globals,
     * because those files use classes/functions/constants.
     *
     * (This is the list prior to any analysis exclusion or whitelisting steps)
     */
    public function getParsedFilePathList(): array
    {
        return \array_keys($this->file_modification_state);
    }

    /**
     * @return int The size of $this->getParsedFilePathList()
     */
    public function getParsedFilePathCount(): int
    {
        return count($this->file_modification_state);
    }

    /**
     * Record that Phan has started parsing $current_parsed_file.
     *
     * This allows us to track which changes need to be undone when that file's contents change or the file gets removed.
     */
    public function setCurrentParsedFile(?string $current_parsed_file): void
    {
        if (\is_string($current_parsed_file)) {
            Daemon::debugf("Recording file modification state for %s", $current_parsed_file);
            // This shouldn't be null. TODO: Figure out what to do if it is.
            $this->file_modification_state[$current_parsed_file] = self::getFileState($current_parsed_file);
        }
        $this->current_parsed_file = $current_parsed_file;
    }


    /**
     * @return ?string - This string should change when the file is modified. Returns null if the file somehow doesn't exist
     */
    public static function getFileState(string $path): ?string
    {
        \clearstatcache(true, $path);  // TODO: does this work properly with symlinks? seems to.
        $real = \realpath($path);
        if (!\is_string($real)) {
            return null;
        }
        if (!\file_exists($real)) {
            return null;
        }
        $stat = @\stat($real);  // Double check: suppress to prevent Phan's error_handler from terminating on error.
        if (!$stat) {
            return null;  // It was missing or unreadable.
        }
        return \sprintf('%d_%d', $stat['mtime'], $stat['size']);
    }

    /**
     * Called when a file is unparsable.
     * Removes the classes and functions, etc. from an older version of the file, if one exists.
     */
    public function recordUnparsableFile(CodeBase $code_base, string $current_parsed_file): void
    {
        Daemon::debugf("%s was unparsable, had a syntax error", $current_parsed_file);
        Phan::getIssueCollector()->removeIssuesForFiles([$current_parsed_file]);
        $this->undoFileChanges($code_base, $current_parsed_file);
        unset($this->file_modification_state[$current_parsed_file]);
    }

    /**
     * Undoes all of the changes for the relative path at $path
     */
    private function undoFileChanges(CodeBase $code_base, string $path): void
    {
        Daemon::debugf("Undoing file changes for $path");
        foreach ($this->undo_operations_for_path[$path] ?? [] as $undo_operation) {
            $undo_operation($code_base);
        }
        unset($this->undo_operations_for_path[$path]);
    }

    /**
     * @param \Closure $undo_operation - a closure expecting 1 param - inner. It undoes a change caused by a parsed file.
     * Ideally, this would extend to all changes. (e.g. including dead code detection)
     */
    public function recordUndo(Closure $undo_operation): void
    {
        $file = $this->current_parsed_file;
        if (!\is_string($file)) {
            throw new \RuntimeException("Called recordUndo in CodeBaseMutable, but not parsing a file");
        }
        if (!isset($this->undo_operations_for_path[$file])) {
            $this->undo_operations_for_path[$file] = [];
        }
        $this->undo_operations_for_path[$file][] = $undo_operation;
    }

    /**
     * @param CodeBase $code_base - code base owning this tracker
     * @param list<string> $new_file_list
     * @param array<string,string> $file_mapping_contents
     * @param ?(string[]) $reanalyze_files files to re-parse before re-running analysis.
     *                    This fixes #1921
     * @return list<string> - Subset of $new_file_list which changed on disk and has to be parsed again. Automatically unparses the old versions of files which were modified.
     */
    public function updateFileList(CodeBase $code_base, array $new_file_list, array $file_mapping_contents, array $reanalyze_files = null): array
    {
        $new_file_set = [];
        foreach ($new_file_list as $path) {
            $new_file_set[$path] = true;
        }
        foreach ($file_mapping_contents as $path => $_) {
            $new_file_set[$path] = true;
        }
        unset($new_file_list);
        $removed_file_list = [];
        $changed_or_added_file_list = [];
        foreach ($new_file_set as $path => $_) {
            if (!isset($this->file_modification_state[$path])) {
                $changed_or_added_file_list[] = $path;
            }
        }
        foreach ($this->file_modification_state as $path => $state) {
            if (!isset($new_file_set[$path])) {
                $this->undoFileChanges($code_base, $path);
                $removed_file_list[] = $path;
                unset($this->file_modification_state[$path]);
                continue;
            }
            // TODO: Always invalidate the parsed file if we're about to analyze it?
            if (isset($file_mapping_contents[$path])) {
                // TODO: Move updateFileList to be called before fork()?
                $new_state = 'daemon:' . \sha1($file_mapping_contents[$path]);
            } else {
                $new_state = self::getFileState($path);
            }
            if ($new_state !== $state || in_array($path, $reanalyze_files ?? [], true)) {
                $removed_file_list[] = $path;
                $this->undoFileChanges($code_base, $path);
                // TODO: This will call stat() twice as much as necessary for the modified files. Not important.
                unset($this->file_modification_state[$path]);
                if ($new_state !== null) {
                    $changed_or_added_file_list[] = $path;
                }
            }
        }
        if (count($removed_file_list) > 0) {
            Phan::getIssueCollector()->removeIssuesForFiles($removed_file_list);
        }
        return $changed_or_added_file_list;
    }

    /**
     * @param CodeBase $code_base - code base owning this tracker
     * @param string $file_path
     * @return bool - true if the file existed
     */
    public function beforeReplaceFileContents(CodeBase $code_base, string $file_path): bool
    {
        if (!isset($this->file_modification_state[$file_path])) {
            Daemon::debugf("Tried to replace contents of '$file_path', but that path does not yet exist");
            return false;
        }
        Phan::getIssueCollector()->removeIssuesForFiles([$file_path]);
        $this->undoFileChanges($code_base, $file_path);
        unset($this->file_modification_state[$file_path]);
        return true;
    }
}