
View on GitHub


0 mins
Test Coverage


 * This file is part of the Composer package "eliashaeussler/cache-warmup".
 * Copyright (C) 2020-2024 Elias Häußler <elias@haeussler.dev>
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <https://www.gnu.org/licenses/>.

namespace EliasHaeussler\CacheWarmup\Command;

use EliasHaeussler\CacheWarmup\CacheWarmer;
use EliasHaeussler\CacheWarmup\Config;
use EliasHaeussler\CacheWarmup\Crawler;
use EliasHaeussler\CacheWarmup\Exception;
use EliasHaeussler\CacheWarmup\Formatter;
use EliasHaeussler\CacheWarmup\Helper;
use EliasHaeussler\CacheWarmup\Log;
use EliasHaeussler\CacheWarmup\Result;
use EliasHaeussler\CacheWarmup\Sitemap;
use EliasHaeussler\CacheWarmup\Time;
use GuzzleHttp\Client;
use GuzzleHttp\ClientInterface;
use Psr\Log\LogLevel;
use Symfony\Component\Console;
use Symfony\Component\Filesystem;

use function array_map;
use function array_unshift;
use function count;
use function getenv;
use function implode;
use function in_array;
use function is_string;
use function json_encode;
use function sleep;
use function sprintf;
use function strtolower;

 * CacheWarmupCommand.
 * @author Elias Häußler <elias@heussler.dev>
 * @license GPL-3.0-or-later
final class CacheWarmupCommand extends Console\Command\Command
    private const SUCCESSFUL = 0;
    private const FAILED = 1;

    private readonly Time\TimeTracker $timeTracker;
    private Config\CacheWarmupConfig $config;
    private Console\Style\SymfonyStyle $io;
    private Formatter\Formatter $formatter;
    private Crawler\CrawlerFactory $crawlerFactory;
    private bool $firstRun = true;

    public function __construct(
        private readonly ClientInterface $client = new Client(),
    ) {
        $this->timeTracker = new Time\TimeTracker();

    protected function configure(): void
        $crawler = Crawler\Crawler::class;
        $configurableCrawler = Crawler\ConfigurableCrawler::class;
        $stoppableCrawler = Crawler\StoppableCrawler::class;
        $textFormatter = Formatter\TextFormatter::getType();
        $jsonFormatter = Formatter\JsonFormatter::getType();
        $sortByChangeFrequencyStrategy = Crawler\Strategy\SortByChangeFrequencyStrategy::getName();
        $sortByLastModificationDateStrategy = Crawler\Strategy\SortByLastModificationDateStrategy::getName();
        $sortByPriorityStrategy = Crawler\Strategy\SortByPriorityStrategy::getName();
        $logLevels = implode(
                static fn (string $logLevel): string => '   * <comment>'.strtolower($logLevel).'</comment>',

        $this->setDescription('Warms up caches of URLs provided by a given set of XML sitemaps.');
This command can be used to warm up website caches.
It requires a set of XML sitemaps offering several URLs which will be crawled.

The list of sitemaps to be crawled can be defined as command argument:

   * <comment>%command.full_name% https://www.example.com/sitemap.xml</comment> (URL)
   * <comment>%command.full_name% /var/www/html/sitemap.xml</comment> (local file)

You are free to crawl as many sitemaps as you want.
Alternatively, sitemaps can be specified from user input when application is in interactive mode.

<info>Custom URLs</info>
In addition or as an alternative to sitemaps, it's also possible to provide a given URL set using the <comment>--urls</comment> option:

   <comment>%command.full_name% -u https://www.example.com/foo -u https://www.example.com/baz</comment>

<info>Config file</info>
All command parameters can be configured in an external config file.
Use the <comment>--config</comment> option to specify the config file:

   <comment>%command.full_name% -c cache-warmup.php</comment>

The following formats are currently supported:

   * <comment>json</comment>
   * <comment>php</comment>
   * <comment>yaml/yml</comment>

<info>Exclude patterns</info>
You can specify exclude patterns to be applied on URLs in order to ignore them from cache warming.
Use the <comment>--exclude</comment> (or <comment>-e</comment>) option to specify one or more patterns:

   <comment>%command.full_name% -e "*no_cache=1*" -e "*no_warming=1*"</comment>

You can also specify regular expressions as exclude patterns.
Note that each expression must start and end with a <comment>#</comment> symbol:

   <comment>%command.full_name% -e "#(no_cache|no_warming)=1#"</comment>

<info>Progress bar</info>
You can track the cache warmup progress by using the <comment>--progress</comment> option:

   <comment>%command.full_name% --progress</comment>

This shows a compact progress bar, including current warmup failures.
For a more verbose output, add the <comment>--verbose</comment> option:

   <comment>%command.full_name% --progress --verbose</comment>

<info>URL limit</info>
The number of URLs to be crawled can be limited using the <comment>--limit</comment> option:

   <comment>%command.full_name% --limit 50</comment>

By default, cache warmup will be done using concurrent HEAD requests.
This behavior can be overridden in case a special crawler is defined using the <comment>--crawler</comment> option:

   <comment>%command.full_name% --crawler "Vendor\Crawler\MyCrawler"</comment>

It's up to you to ensure the given crawler class is available and fully loaded.
This can best be achieved by registering the class with Composer autoloader.
Also make sure the crawler implements <comment>{$crawler}</comment>.

<info>Crawler options</info>
For crawlers implementing <comment>{$configurableCrawler}</comment>,
it is possible to pass a JSON-encoded array of crawler options by using the <comment>--crawler-options</comment> option:

   <comment>%command.full_name% --crawler-options '{"concurrency": 3}'</comment>

<info>Crawling strategy</info>
URLs can be crawled using a specific crawling strategy, e.g. by sorting them by a specific property.
For this, use the <comment>--strategy</comment> option together with a predefined value:

   <comment>%command.full_name% --strategy {$sortByPriorityStrategy}</comment>

The following strategies are currently available:

   * <comment>{$sortByChangeFrequencyStrategy}</comment>
   * <comment>{$sortByLastModificationDateStrategy}</comment>
   * <comment>{$sortByPriorityStrategy}</comment>

<info>Allow failures</info>
If a sitemap cannot be parsed or a URL fails to be crawled, this command normally exits
with a non-zero exit code. This is not always the desired behavior. Therefore, you can change
this behavior by using the <comment>--allow-failures</comment> option:

   <comment>%command.full_name% --allow-failures</comment>

<info>Stop on failure</info>
For crawlers implementing <comment>{$stoppableCrawler}</comment>,
you can also configure the crawler to stop on failure. The <comment>--stop-on-failure</comment> option
exists for this case:

   <comment>%command.full_name% --stop-on-failure</comment>

<info>Format output</info>
By default, all user-oriented output is printed as plain text to the console.
However, you can use other formatters by using the <comment>--format</comment> option:

   <comment>%command.full_name% --format json</comment>

Currently, the following formatters are available:

   * <comment>{$textFormatter}</comment> (default)
   * <comment>{$jsonFormatter}</comment>

You can log the crawling results of each crawled URL to an external log file.
For this, the <comment>--log-file</comment> option exists:

   <comment>%command.full_name% --log-file crawling-errors.log</comment>

When logging is enabled, by default only crawling failures are logged.
You can increase the log level to log successful crawlings as well:

   * <comment>%command.full_name% --log-level error</comment> (default)
   * <comment>%command.full_name% --log-level info</comment>

The following log levels are currently available:



            Console\Input\InputArgument::OPTIONAL | Console\Input\InputArgument::IS_ARRAY,
            'URLs or local filenames of XML sitemaps to be used for cache warming',
            Console\Input\InputOption::VALUE_REQUIRED | Console\Input\InputOption::VALUE_IS_ARRAY,
            'Custom additional URLs to be used for cache warming',
            'Path to configuration file',
            Console\Input\InputOption::VALUE_REQUIRED | Console\Input\InputOption::VALUE_IS_ARRAY,
            'Patterns for URLs to be excluded from cache warming',
            'Limit the number of URLs to be processed',
            'Show progress bar during cache warmup',
            'FQCN of the crawler to be used for cache warming',
            'Additional config for configurable crawlers',
            'Optional strategy to prepare URLs before crawling them',
            'Allow failures during URL crawling and exit with zero',
            'Cancel further cache warmup requests on failure',
            'Formatter used to print the cache warmup result',
            'File where to log crawling results',
            'Log level used to determine which crawling results to log (see help for more information)',
            'Run cache warmup in endless loop and repeat x seconds after each run',

     * @throws Exception\ConfigFileIsNotSupported
     * @throws Exception\FormatterIsNotSupported
     * @throws Exception\LogLevelIsNotSupported
    protected function initialize(Console\Input\InputInterface $input, Console\Output\OutputInterface $output): void
        $configFile = $input->getOption('config');
        $configFileFromEnv = getenv('CACHE_WARMUP_CONFIG');
        $configAdapters = [
            new Config\Adapter\ConsoleInputConfigAdapter($input),
            new Config\Adapter\EnvironmentVariablesConfigAdapter(),

        if (false !== $configFileFromEnv) {
            array_unshift($configAdapters, $this->loadConfigFromFile($configFileFromEnv));
        if (null !== $configFile) {
            array_unshift($configAdapters, $this->loadConfigFromFile($configFile));

        $this->config = (new Config\Adapter\CompositeConfigAdapter($configAdapters))->get();
        $this->io = new Console\Style\SymfonyStyle($input, $output);
        $this->formatter = (new Formatter\FormatterFactory($this->io))->get($this->config->getFormat());

        $logFile = $this->config->getLogFile();
        $logLevel = $this->config->getLogLevel();
        $stopOnFailure = $this->config->shouldStopOnFailure();
        $logger = null;

        // Create logger
        if (is_string($logFile)) {
            $logger = new Log\FileLogger($logFile);

        // Validate log level
        if (!in_array($logLevel, Log\LogLevel::getAll(), true)) {
            throw new Exception\LogLevelIsNotSupported($logLevel);

        // Use error output or disable output if formatter is non-verbose
        if (!$this->formatter->isVerbose()) {
            if ($output instanceof Console\Output\ConsoleOutputInterface) {
                $output = $output->getErrorOutput();

            } else {
                $output = new Console\Output\NullOutput();

        $this->crawlerFactory = new Crawler\CrawlerFactory($output, $logger, $logLevel, $stopOnFailure);

    protected function interact(Console\Input\InputInterface $input, Console\Output\OutputInterface $output): void
        // Early return if sitemaps or URLs are already specified
        if ([] !== $this->config->getSitemaps() || [] !== $this->config->getUrls()) {

        // Get sitemaps from interactive user input
        $sitemaps = [];
        $helper = $this->getHelper('question');
        do {
            $question = new Console\Question\Question('Please enter the URL of a XML sitemap: ');
            $sitemap = $helper->ask($input, $output, $question);
            if ($sitemap instanceof Sitemap\Sitemap) {
                $sitemaps[] = $sitemap;
                $output->writeln(sprintf('<info>Sitemap added: %s</info>', $sitemap));
        } while ($sitemap instanceof Sitemap\Sitemap);

        // Throw exception if no sitemaps were added
        if ([] === $sitemaps) {
            throw new Console\Exception\RuntimeException('You must enter at least one sitemap URL.', 1604258903);


     * @throws Exception\ConfigFileIsNotSupported
    protected function execute(Console\Input\InputInterface $input, Console\Output\OutputInterface $output): int
        $sitemaps = $this->config->getSitemaps();
        $urls = $this->config->getUrls();
        $repeatAfter = $this->config->getRepeatAfter();

        // Throw exception if neither sitemaps nor URLs are defined
        if ([] === $sitemaps && [] === $urls) {
            throw new Console\Exception\RuntimeException('Neither sitemaps nor URLs are defined.', 1604261236);

        // Show header
        if ($this->formatter->isVerbose()) {

        // Show warning on endless runs
        if ($this->firstRun && $repeatAfter > 0) {
            $this->firstRun = false;

        // Initialize components
        $crawler = $this->initializeCrawler();
        $cacheWarmer = $this->timeTracker->track(fn () => $this->initializeCacheWarmer($crawler));
        $parseTime = $this->timeTracker->getLastDuration();

        // Start crawling
        $result = $this->timeTracker->track(
            fn () => $this->runCacheWarmup(
                $crawler instanceof Crawler\VerboseCrawler,

        // Print formatted parser result
            new Result\ParserResult($cacheWarmer->getSitemaps(), $cacheWarmer->getUrls()),
            new Result\ParserResult($cacheWarmer->getFailedSitemaps()),
            new Result\ParserResult($cacheWarmer->getExcludedSitemaps(), $cacheWarmer->getExcludedUrls()),

        // Print formatted cache warmup result
        $this->formatter->formatCacheWarmupResult($result, $this->timeTracker->getLastDuration());

        // Early return if parsing or crawling failed
        if (!$this->config->areFailuresAllowed()
            && ([] !== $cacheWarmer->getFailedSitemaps() || !$result->isSuccessful())
        ) {
            return self::FAILED;

        // Repeat on endless mode
        if ($repeatAfter > 0) {

            return $this->execute($input, $output);

        return self::SUCCESSFUL;

    private function runCacheWarmup(CacheWarmer $cacheWarmer, bool $isVerboseCrawler): Result\CacheWarmupResult
        $urlCount = count($cacheWarmer->getUrls());

        if ($this->formatter->isVerbose()) {
            $this->io->write(sprintf('Crawling URL%s... ', 1 === $urlCount ? '' : 's'), $isVerboseCrawler);

        $result = $cacheWarmer->run();

        if ($this->formatter->isVerbose() && !$isVerboseCrawler) {
            if ($result->wasCancelled()) {
            } else {

        return $result;

    private function initializeCacheWarmer(Crawler\Crawler $crawler): CacheWarmer
        if ($this->formatter->isVerbose()) {
            $this->io->write('Parsing sitemaps... ');

        // Initialize crawling strategy
        $strategy = $this->config->getStrategy();
        if (is_string($strategy)) {
            $strategy = match ($strategy) {
                Crawler\Strategy\SortByChangeFrequencyStrategy::getName() => new Crawler\Strategy\SortByChangeFrequencyStrategy(),
                Crawler\Strategy\SortByLastModificationDateStrategy::getName() => new Crawler\Strategy\SortByLastModificationDateStrategy(),
                Crawler\Strategy\SortByPriorityStrategy::getName() => new Crawler\Strategy\SortByPriorityStrategy(),
                default => throw new Console\Exception\RuntimeException('The given crawling strategy is invalid.', 1677618007),

        // Initialize cache warmer
        $cacheWarmer = new CacheWarmer(

        // Add and parse XML sitemaps

        // Add URLs
        foreach ($this->config->getUrls() as $url) {

        if ($this->formatter->isVerbose()) {

        return $cacheWarmer;

    private function initializeCrawler(): Crawler\Crawler
        $crawler = $this->config->getCrawler();
        $crawlerOptions = $this->crawlerFactory->parseCrawlerOptions($this->config->getCrawlerOptions());
        $stopOnFailure = $this->config->shouldStopOnFailure();

        // Select default crawler
        if (null === $crawler) {
            $crawler = $this->config->isProgressBarEnabled()
                ? Crawler\OutputtingCrawler::class
                : Crawler\ConcurrentCrawler::class

        // Initialize crawler
        if (is_string($crawler)) {
            $crawler = $this->crawlerFactory->get($crawler, $crawlerOptions);

        // Print crawler options
        if ($crawler instanceof Crawler\ConfigurableCrawler) {
            if ($this->formatter->isVerbose() && $this->io->isVerbose() && [] !== $crawlerOptions) {
                $this->io->section('Using custom crawler options:');
                $this->io->writeln((string) json_encode($crawlerOptions, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES));
        } elseif ([] !== $crawlerOptions) {
                'You passed crawler options to a non-configurable crawler.',

        // Show notice on unsupported stoppable crawler feature
        if ($stopOnFailure && !($crawler instanceof Crawler\StoppableCrawler)) {
                'You configured "stop on failure" for a non-stoppable crawler.',

        return $crawler;

     * @throws Exception\ConfigFileIsNotSupported
    private function loadConfigFromFile(string $configFile): Config\Adapter\ConfigAdapter
        $configFile = Helper\FilesystemHelper::resolveRelativePath($configFile);
        $extension = Filesystem\Path::getExtension($configFile, true);

        return match ($extension) {
            'php' => new Config\Adapter\PhpConfigAdapter($configFile),
            'json', 'yaml', 'yml' => new Config\Adapter\FileConfigAdapter($configFile),
            default => throw new Exception\ConfigFileIsNotSupported($configFile),

    private function showEndlessModeWarning(int $interval): void
                'Command is scheduled to run forever. It will be repeated %d second%s after each run.',
                1 === $interval ? '' : 's',

    private function validateSitemap(?string $input): ?Sitemap\Sitemap
        if (null === $input) {
            return null;

        return Sitemap\Sitemap::createFromString($input);

    private function printHeader(): void
        $currentVersion = Helper\VersionHelper::getCurrentVersion();

                'Running <info>cache warmup</info>%s by Elias Häußler and contributors.',
                null !== $currentVersion ? ' <comment>'.$currentVersion.'</comment>' : '',