eliashaeussler/cache-warmup

View on GitHub
src/CacheWarmer.php

Summary

Maintainability
A
0 mins
Test Coverage
A
100%
<?php

declare(strict_types=1);

/*
 * This file is part of the Composer package "eliashaeussler/cache-warmup".
 *
 * Copyright (C) 2020-2024 Elias Häußler <elias@haeussler.dev>
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <https://www.gnu.org/licenses/>.
 */

namespace EliasHaeussler\CacheWarmup;

use EliasHaeussler\ValinorXml;
use GuzzleHttp\Client;
use GuzzleHttp\ClientInterface;
use GuzzleHttp\Exception\GuzzleException;

use function array_key_exists;
use function array_values;
use function count;
use function is_array;
use function is_string;

/**
 * CacheWarmer.
 *
 * @author Elias Häußler <elias@haeussler.dev>
 * @license GPL-3.0-or-later
 */
final class CacheWarmer
{
    private readonly Xml\XmlParser $parser;

    /**
     * @var array<string, Sitemap\Url>
     */
    private array $urls = [];

    /**
     * @var array<string, Sitemap\Sitemap>
     */
    private array $sitemaps = [];

    /**
     * @var list<Sitemap\Sitemap>
     */
    private array $failedSitemaps = [];

    /**
     * @var list<Sitemap\Sitemap>
     */
    private array $excludedSitemaps = [];

    /**
     * @var list<Sitemap\Url>
     */
    private array $excludedUrls = [];

    /**
     * @param list<Config\Option\ExcludePattern> $excludePatterns
     */
    public function __construct(
        private readonly int $limit = 0,
        private readonly ClientInterface $client = new Client(),
        private readonly Crawler\Crawler $crawler = new Crawler\ConcurrentCrawler(),
        private readonly ?Crawler\Strategy\CrawlingStrategy $strategy = null,
        private readonly bool $strict = true,
        private readonly array $excludePatterns = [],
    ) {
        $this->parser = new Xml\XmlParser($this->client);
    }

    public function run(): Result\CacheWarmupResult
    {
        $urls = $this->getUrls();

        if (null !== $this->strategy) {
            $urls = $this->strategy->prepareUrls($urls);
        }

        return $this->crawler->crawl($urls);
    }

    /**
     * @param list<string|Sitemap\Sitemap>|string|Sitemap\Sitemap $sitemaps
     *
     * @throws Exception\FileIsMissing
     * @throws Exception\LocalFilePathIsMissingInUrl
     * @throws Exception\SitemapCannotBeParsed
     * @throws Exception\SitemapIsInvalid
     * @throws Exception\UrlIsEmpty
     * @throws Exception\UrlIsInvalid
     * @throws GuzzleException
     * @throws ValinorXml\Exception\ArrayPathHasUnexpectedType
     * @throws ValinorXml\Exception\ArrayPathIsInvalid
     * @throws ValinorXml\Exception\XmlIsMalformed
     */
    public function addSitemaps(array|string|Sitemap\Sitemap $sitemaps): self
    {
        // Early return if no more URLs should be added
        if ($this->exceededLimit()) {
            return $this;
        }

        // Force array of sitemaps to be parsed
        if (!is_array($sitemaps)) {
            $sitemaps = [$sitemaps];
        }

        // Validate and parse given sitemaps
        foreach ($sitemaps as $sitemap) {
            // Parse sitemap URL to valid sitemap object
            if (is_string($sitemap)) {
                $sitemap = Sitemap\Sitemap::createFromString($sitemap);
            }

            // Throw exception if sitemap is invalid
            if (!($sitemap instanceof Sitemap\Sitemap)) {
                throw new Exception\SitemapIsInvalid($sitemap);
            }

            // Skip sitemap if exclude pattern matches
            if ($this->isExcluded((string) $sitemap->getUri())) {
                $this->excludedSitemaps[] = $sitemap;

                continue;
            }

            // Parse sitemap object
            try {
                $result = $this->parser->parse($sitemap);
            } catch (GuzzleException|Exception\FileIsMissing|Exception\SitemapCannotBeParsed|Exception\UrlIsInvalid|ValinorXml\Exception\Exception $exception) {
                // Exit early if running in strict mode
                if ($this->strict) {
                    throw $exception;
                }

                $this->failedSitemaps[] = $sitemap;

                continue;
            }

            $this->addSitemap($sitemap);

            foreach ($result->getSitemaps() as $parsedSitemap) {
                $this->addSitemaps($parsedSitemap);
            }

            foreach ($result->getUrls() as $parsedUrl) {
                $this->addUrl($parsedUrl);
            }
        }

        return $this;
    }

    private function addSitemap(Sitemap\Sitemap $sitemap): self
    {
        if (!array_key_exists((string) $sitemap, $this->sitemaps)) {
            $this->sitemaps[(string) $sitemap] = $sitemap;
        }

        return $this;
    }

    public function addUrl(string|Sitemap\Url $url): self
    {
        if (is_string($url)) {
            $url = new Sitemap\Url($url);
        }

        if ($this->isExcluded((string) $url)) {
            $this->excludedUrls[] = $url;

            return $this;
        }

        if (!$this->exceededLimit() && !array_key_exists((string) $url, $this->urls)) {
            $this->urls[(string) $url] = $url;
        }

        return $this;
    }

    private function exceededLimit(): bool
    {
        return $this->limit > 0 && count($this->urls) >= $this->limit;
    }

    private function isExcluded(string $url): bool
    {
        foreach ($this->excludePatterns as $pattern) {
            if ($pattern->matches($url)) {
                return true;
            }
        }

        return false;
    }

    /**
     * @return list<Sitemap\Url>
     */
    public function getUrls(): array
    {
        return array_values($this->urls);
    }

    /**
     * @return list<Sitemap\Sitemap>
     */
    public function getSitemaps(): array
    {
        return array_values($this->sitemaps);
    }

    /**
     * @return list<Sitemap\Sitemap>
     */
    public function getFailedSitemaps(): array
    {
        return $this->failedSitemaps;
    }

    /**
     * @return list<Sitemap\Sitemap>
     */
    public function getExcludedSitemaps(): array
    {
        return $this->excludedSitemaps;
    }

    /**
     * @return list<Sitemap\Url>
     */
    public function getExcludedUrls(): array
    {
        return $this->excludedUrls;
    }

    public function getLimit(): int
    {
        return $this->limit;
    }
}