eliashaeussler/typo3-warming

View on GitHub
Classes/Command/WarmupCommand.php

Summary

Maintainability
A
0 mins
Test Coverage
A
100%
<?php

declare(strict_types=1);

/*
 * This file is part of the TYPO3 CMS extension "warming".
 *
 * Copyright (C) 2021-2024 Elias Häußler <elias@haeussler.dev>
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <https://www.gnu.org/licenses/>.
 */

namespace EliasHaeussler\Typo3Warming\Command;

use EliasHaeussler\CacheWarmup;
use EliasHaeussler\Typo3SitemapLocator;
use EliasHaeussler\Typo3Warming\Configuration;
use EliasHaeussler\Typo3Warming\Crawler;
use EliasHaeussler\Typo3Warming\Domain;
use EliasHaeussler\Typo3Warming\Http;
use EliasHaeussler\Typo3Warming\Utility;
use Psr\EventDispatcher;
use Symfony\Component\Console;
use TYPO3\CMS\Core;

/**
 * WarmupCommand
 *
 * @author Elias Häußler <elias@haeussler.dev>
 * @license GPL-2.0-or-later
 */
#[Console\Attribute\AsCommand(
    name: 'warming:cachewarmup',
    description: 'Warm up Frontend caches of single pages and/or whole sites using their XML sitemaps.',
)]
final class WarmupCommand extends Console\Command\Command
{
    private const ALL_LANGUAGES = -1;
    private const ALL_SITES = 'all';

    public function __construct(
        private readonly Http\Client\ClientFactory $clientFactory,
        private readonly Configuration\Configuration $configuration,
        private readonly Crawler\Strategy\CrawlingStrategyFactory $crawlingStrategyFactory,
        private readonly Typo3SitemapLocator\Sitemap\SitemapLocator $sitemapLocator,
        private readonly Core\Site\SiteFinder $siteFinder,
        private readonly EventDispatcher\EventDispatcherInterface $eventDispatcher,
        private readonly Core\Package\PackageManager $packageManager,
    ) {
        parent::__construct();
    }

    protected function configure(): void
    {
        $v = fn(mixed $value) => $value;
        $decoratedCrawlingStrategies = \implode(PHP_EOL, array_map(
            static fn(string $strategy) => '  │  * <info>' . $strategy . '</info>',
            array_keys($this->crawlingStrategyFactory->getAll()),
        ));

        $this->setDescription('Warm up Frontend caches of single pages and/or whole sites using their XML sitemaps.');
        $this->setHelp(
            <<<HELP
This command can be used in many ways to warm up frontend caches.
Some possible combinations and options are shown below.

<info>Sites and pages</info>
<info>===============</info>

To warm up caches, either <info>pages</info> or <info>sites</info> can be specified.
Both types can also be combined or extended by the specification of one or more <info>languages</info>.
If you omit the language option, the caches of all languages of the requested pages and sites
will be warmed up.

You can also use the special keyword <info>all</info> for <info>sites</info>.
This will cause all available sites to be warmed up.

Examples:

* <comment>warming:cachewarmup -p 1,2,3</comment>
  ├─ Pages: <info>1, 2 and 3</info>
  └─ Languages: <info>all</info>

* <comment>warming:cachewarmup -s 1</comment>
* <comment>warming:cachewarmup -s main</comment>
  ├─ Sites: <info>Root page ID 1</info> or <info>identifier "main"</info>
  └─ Languages: <info>all</info>

* <comment>warming:cachewarmup -p 1 -s 1</comment>
* <comment>warming:cachewarmup -p 1 -s main</comment>
  ├─ Pages: <info>1</info>
  ├─ Sites: <info>Root page ID 1</info> or <info>identifier "main"</info>
  └─ Languages: <info>all</info>

* <comment>warming:cachewarmup -s 1 -l 0,1</comment>
  ├─ Sites: <info>Root page ID 1</info> or <info>identifier "main"</info>
  └─ Languages: <info>0 and 1</info>

* <comment>warming:cachewarmup -s all</comment>
  ├─ Sites: <info>all</info>
  └─ Languages: <info>all</info>

<info>Additional options</info>
<info>==================</info>

* <comment>Configuration file</comment>
  ├─ A preconfigured set of configuration options can be written to a configuration file.
  │  This file can be passed using the <info>--config</info> option.
  │  It may also contain extension paths, e.g. <info>EXT:sitepackage/Configuration/cache-warmup.json</info>.
  │  The following file formats are currently supported:
  │  * <info>json</info>
  │  * <info>php</info>
  │  * <info>yaml</info>
  │  * <info>yml</info>
  └─ Example: <comment>warming:cachewarmup --config path/to/cache-warmup.json</comment>

* <comment>Strict mode</comment>
  ├─ You can pass the <info>--strict</info> (or <info>-x</info>) option to terminate execution with an error code
  │  if individual caches warm up incorrectly.
  │  This is especially useful for automated execution of cache warmups.
  ├─ Default: <info>false</info>
  └─ Example: <comment>warming:cachewarmup -s 1 --strict</comment>

* <comment>Crawl limit</comment>
  ├─ The maximum number of pages to be warmed up can be defined via the extension configuration <info>limit</info>.
  │  It can be overridden by using the <info>--limit</info> option.
  │  The value <info>0</info> deactivates the crawl limit.
  ├─ Default: <info>{$v($this->configuration->getLimit())}</info>
  ├─ Example: <comment>warming:cachewarmup -s 1 --limit 100</comment> (limits crawling to 100 pages)
  └─ Example: <comment>warming:cachewarmup -s 1 --limit 0</comment> (no limit)

* <comment>Crawling strategy</comment>
  ├─ A crawling strategy defines how URLs will be crawled, e.g. by sorting them by a specific property.
  │  It can be defined via the extension configuration <info>strategy</info> or by using the <info>--strategy</info> option.
  │  The following strategies are currently available:
{$decoratedCrawlingStrategies}
  ├─ Default: <info>{$v($this->configuration->getStrategy() ?? 'none')}</info>
  └─ Example: <comment>warming:cachewarmup --strategy {$v(CacheWarmup\Crawler\Strategy\SortByPriorityStrategy::getName())}</comment>

* <comment>Format output</comment>
  ├─ By default, all user-oriented output is printed as plain text to the console.
  │  However, you can use other formatters by using the <info>--format</info> (or <info>-f</info>) option.
  ├─ Default: <info>{$v(CacheWarmup\Formatter\TextFormatter::getType())}</info>
  ├─ Example: <comment>warming:cachewarmup --format {$v(CacheWarmup\Formatter\TextFormatter::getType())}</comment> (normal output as plaintext)
  └─ Example: <comment>warming:cachewarmup --format {$v(CacheWarmup\Formatter\JsonFormatter::getType())}</comment> (displays output as JSON)

<info>Crawling configuration</info>
<info>======================</info>

* <comment>Alternative crawler</comment>
  ├─ Use the extension configuration <info>verboseCrawler</info> to use an alternative crawler for
  │  command-line requests. For warmup requests triggered via the TYPO3 backend, you can use the
  │  extension configuration <info>crawler</info>.
  ├─ Currently used default crawler: <info>{$v($this->configuration->getCrawler())}</info>
  └─ Currently used verbose crawler: <info>{$v($this->configuration->getVerboseCrawler())}</info>

* <comment>Custom User-Agent header</comment>
  ├─ When the default crawler is used, each warmup request is executed with a special User-Agent header.
  │  This header is generated from the encryption key of the TYPO3 installation.
  │  It can be used, for example, to exclude warmup requests from your search statistics.
  └─ Current User-Agent: <info>{$v($this->configuration->getUserAgent())}</info>
HELP
        );

        $this->addOption(
            'pages',
            'p',
            Console\Input\InputOption::VALUE_REQUIRED | Console\Input\InputOption::VALUE_IS_ARRAY,
            'Pages whose Frontend caches are to be warmed up.',
        );
        $this->addOption(
            'sites',
            's',
            Console\Input\InputOption::VALUE_REQUIRED | Console\Input\InputOption::VALUE_IS_ARRAY,
            'Site identifiers or root page IDs of sites whose caches are to be warmed up.',
        );
        $this->addOption(
            'languages',
            'l',
            Console\Input\InputOption::VALUE_REQUIRED | Console\Input\InputOption::VALUE_IS_ARRAY,
            'Optional identifiers of languages for which caches are to be warmed up.',
        );
        $this->addOption(
            'config',
            'c',
            Console\Input\InputOption::VALUE_REQUIRED,
            'Path to optional configuration file',
        );
        $this->addOption(
            'limit',
            null,
            Console\Input\InputOption::VALUE_REQUIRED,
            'Maximum number of pages to be crawled. Set to <info>0</info> to disable the limit.',
            $this->configuration->getLimit(),
        );
        $this->addOption(
            'strategy',
            null,
            Console\Input\InputOption::VALUE_REQUIRED,
            'Optional strategy to prepare URLs before crawling them.',
            $this->configuration->getStrategy(),
        );
        $this->addOption(
            'format',
            'f',
            Console\Input\InputOption::VALUE_REQUIRED,
            'Formatter used to print the cache warmup result',
            CacheWarmup\Formatter\TextFormatter::getType(),
        );
        $this->addOption(
            'strict',
            'x',
            Console\Input\InputOption::VALUE_NONE,
            'Fail if an error occurred during cache warmup.',
        );
    }

    /**
     * @throws Console\Exception\ExceptionInterface
     * @throws Core\Exception\SiteNotFoundException
     * @throws \JsonException
     */
    protected function execute(Console\Input\InputInterface $input, Console\Output\OutputInterface $output): int
    {
        // Initialize client
        $clientOptions = $this->configuration->getParserClientOptions();
        $client = $this->clientFactory->get($clientOptions);

        // Initialize sub command
        $subCommand = new CacheWarmup\Command\CacheWarmupCommand($client, $this->eventDispatcher);
        $subCommand->setApplication($this->getApplication() ?? new Console\Application());

        // Initialize sub command input
        $subCommandInput = new Console\Input\ArrayInput(
            $this->prepareCommandParameters($input),
            $subCommand->getDefinition(),
        );
        $subCommandInput->setInteractive(false);

        // Run cache warmup in sub command from eliashaeussler/cache-warmup
        $statusCode = $subCommand->run($subCommandInput, $output);

        // Fail if strict mode is enabled and at least one crawl was erroneous
        if ($input->getOption('strict') && $statusCode > 0) {
            return $statusCode;
        }

        return self::SUCCESS;
    }

    /**
     * @return array<string, mixed>
     * @throws Core\Exception\SiteNotFoundException
     * @throws \JsonException
     * @throws Typo3SitemapLocator\Exception\BaseUrlIsNotSupported
     * @throws Typo3SitemapLocator\Exception\SitemapIsMissing
     */
    private function prepareCommandParameters(Console\Input\InputInterface $input): array
    {
        // Resolve input options
        $languages = $this->resolveLanguages($input->getOption('languages'));
        $urls = array_unique($this->resolvePages($input->getOption('pages'), $languages));
        $sitemaps = array_unique($this->resolveSites($input->getOption('sites'), $languages));
        $config = $input->getOption('config');
        $limit = max(0, (int)$input->getOption('limit'));
        $strategy = $input->getOption('strategy');
        $format = $input->getOption('format');
        $excludePatterns = $this->configuration->getExcludePatterns();

        // Fetch crawler and crawler options
        $crawler = $this->configuration->getVerboseCrawler();
        $crawlerOptions = $this->configuration->getVerboseCrawlerOptions();

        // Initialize sub-command parameters
        $subCommandParameters = [
            'sitemaps' => $sitemaps,
            '--urls' => $urls,
            '--limit' => $limit,
            '--crawler' => $crawler,
            '--format' => $format,
        ];

        // Add crawler options to sub-command parameters
        if ($crawlerOptions !== []) {
            $subCommandParameters['--crawler-options'] = json_encode($crawlerOptions, JSON_THROW_ON_ERROR);
        }

        // Add exclude patterns
        if ($excludePatterns !== []) {
            $subCommandParameters['--exclude'] = $excludePatterns;
        }

        // Add crawling strategy
        if ($strategy !== null) {
            $subCommandParameters['--strategy'] = $strategy;
        }

        // Add config file
        if ($config !== null) {
            if (Core\Utility\PathUtility::isExtensionPath($config)) {
                $config = $this->packageManager->resolvePackagePath($config);
            }

            $subCommandParameters['--config'] = $config;
        }

        return $subCommandParameters;
    }

    /**
     * @param array<string> $pages
     * @param list<int> $languages
     * @return list<string>
     * @throws Core\Exception\SiteNotFoundException
     */
    private function resolvePages(array $pages, array $languages): array
    {
        $resolvedUrls = [];

        foreach ($pages as $pageList) {
            $normalizedPages = Core\Utility\GeneralUtility::intExplode(',', $pageList, true);

            foreach ($normalizedPages as $page) {
                $languageIds = $languages;

                if ($languageIds === [self::ALL_LANGUAGES]) {
                    $site = $this->siteFinder->getSiteByPageId($page);
                    $languageIds = array_keys($site->getLanguages());
                }

                foreach ($languageIds as $languageId) {
                    $uri = Utility\HttpUtility::generateUri($page, $languageId);

                    if ($uri !== null) {
                        $resolvedUrls[] = (string)$uri;
                    }
                }
            }
        }

        return $resolvedUrls;
    }

    /**
     * @param array<string> $sites
     * @param list<int> $languages
     * @return list<Domain\Model\SiteAwareSitemap>
     * @throws CacheWarmup\Exception\LocalFilePathIsMissingInUrl
     * @throws CacheWarmup\Exception\UrlIsEmpty
     * @throws CacheWarmup\Exception\UrlIsInvalid
     * @throws Core\Exception\SiteNotFoundException
     * @throws Typo3SitemapLocator\Exception\BaseUrlIsNotSupported
     * @throws Typo3SitemapLocator\Exception\SitemapIsMissing
     */
    private function resolveSites(array $sites, array $languages): array
    {
        $resolvedSitemaps = [];

        foreach ($sites as $siteList) {
            $siteList = Core\Utility\GeneralUtility::trimExplode(',', $siteList, true);

            if (in_array(self::ALL_SITES, $siteList, true)) {
                $siteList = $this->siteFinder->getAllSites();
            }

            foreach ($siteList as $site) {
                if (Core\Utility\MathUtility::canBeInterpretedAsInteger($site)) {
                    $site = $this->siteFinder->getSiteByRootPageId((int)$site);
                } elseif (is_string($site)) {
                    $site = $this->siteFinder->getSiteByIdentifier($site);
                }

                $languageIds = $languages;

                if ([self::ALL_LANGUAGES] === $languageIds) {
                    $languageIds = array_keys($site->getLanguages());
                }

                foreach ($languageIds as $languageId) {
                    $sitemaps = $this->sitemapLocator->locateBySite($site, $site->getLanguageById($languageId));

                    foreach ($sitemaps as $sitemap) {
                        $resolvedSitemaps[] = Domain\Model\SiteAwareSitemap::fromLocatedSitemap($sitemap);
                    }
                }
            }
        }

        return $resolvedSitemaps;
    }

    /**
     * @param array<string> $languages
     * @return list<int>
     */
    private function resolveLanguages(array $languages): array
    {
        $resolvedLanguages = [];

        if ($languages === []) {
            // Run cache warmup for all languages by default
            return [self::ALL_LANGUAGES];
        }

        foreach ($languages as $languageList) {
            $normalizedLanguages = Core\Utility\GeneralUtility::intExplode(',', $languageList, true);

            foreach ($normalizedLanguages as $languageId) {
                $resolvedLanguages[] = $languageId;
            }
        }

        return $resolvedLanguages;
    }
}