VIPnytt/RobotsTxtParser

View on GitHub
src/Parser/Directives/UserAgentParser.php

Summary

Maintainability
A
0 mins
Test Coverage
A
100%
<?php
/**
 * vipnytt/RobotsTxtParser
 *
 * @link https://github.com/VIPnytt/RobotsTxtParser
 * @license https://github.com/VIPnytt/RobotsTxtParser/blob/master/LICENSE The MIT License (MIT)
 */

namespace vipnytt\RobotsTxtParser\Parser\Directives;

use vipnytt\RobotsTxtParser\Client\Directives\UserAgentClient;
use vipnytt\RobotsTxtParser\Client\Directives\UserAgentTools;
use vipnytt\RobotsTxtParser\Handler\Directives\SubDirectiveHandler;
use vipnytt\RobotsTxtParser\Handler\RenderHandler;
use vipnytt\RobotsTxtParser\RobotsTxtInterface;
use vipnytt\UserAgentParser as UserAgentStringParser;

/**
 * Class UserAgentParser
 *
 * @package vipnytt\RobotsTxtParser\Parser\Directives
 */
class UserAgentParser implements ParserInterface, RobotsTxtInterface
{
    use DirectiveParserTrait;

    /**
     * Sub directives white list
     */
    const SUB_DIRECTIVES = [
        self::DIRECTIVE_ALLOW => 'allow',
        self::DIRECTIVE_CACHE_DELAY => 'cacheDelay',
        self::DIRECTIVE_COMMENT => 'comment',
        self::DIRECTIVE_CRAWL_DELAY => 'crawlDelay',
        self::DIRECTIVE_DISALLOW => 'disallow',
        self::DIRECTIVE_NO_INDEX => 'noIndex',
        self::DIRECTIVE_REQUEST_RATE => 'requestRate',
        self::DIRECTIVE_ROBOT_VERSION => 'robotVersion',
        self::DIRECTIVE_VISIT_TIME => 'visitTime',
    ];

    /**
     * Base uri
     * @var string
     */
    private $base;

    /**
     * User-agent handler
     * @var SubDirectiveHandler[]
     */
    private $handler = [];

    /**
     * Current User-agent(s)
     * @var string[]
     */
    private $current = [];

    /**
     * Append User-agent
     * @var bool
     */
    private $append = false;

    /**
     * UserAgent constructor.
     *
     * @param string $base
     */
    public function __construct($base)
    {
        $this->base = $base;
        $this->handlerAdd(self::USER_AGENT);
    }

    /**
     * Add sub-directive handler
     *
     * @param string $group
     * @return bool
     */
    private function handlerAdd($group)
    {
        if (!in_array($group, array_keys($this->handler))) {
            $this->handler[$group] = new SubDirectiveHandler($this->base, $group);
            return true;
        }
        return false;
    }

    /**
     * Add line
     *
     * @param string $line
     * @return bool
     */
    public function add($line)
    {
        if ($line == '' ||
            ($pair = $this->generateRulePair($line, [-1 => self::DIRECTIVE_USER_AGENT] + array_keys(self::SUB_DIRECTIVES))) === false) {
            return $this->append = false;
        }
        if ($pair[0] === self::DIRECTIVE_USER_AGENT) {
            return $this->set($pair[1]);
        }
        $this->append = false;
        $result = [];
        foreach ($this->current as $group) {
            $result[] = $this->handler[$group]->{self::SUB_DIRECTIVES[$pair[0]]}->add($pair[1]);
            $this->handler[$group]->count++;
        }
        return in_array(true, $result, true);
    }

    /**
     * Set new User-agent
     *
     * @param string $group
     * @return bool
     */
    private function set($group)
    {
        if (!$this->append) {
            $this->current = [];
        }
        $group = mb_strtolower($group);
        $this->current[] = $group;
        $this->handlerAdd($group);
        $this->append = true;
        return true;
    }

    /**
     * Render
     *
     * @param RenderHandler $handler
     * @return bool
     */
    public function render(RenderHandler $handler)
    {
        return $handler->getLevel() == 2 ? $this->renderExtensive($handler) : $this->renderCompressed($handler);
    }

    /**
     * Render extensive
     *
     * @param RenderHandler $handler
     * @return bool
     */
    private function renderExtensive(RenderHandler $handler)
    {
        $userAgents = $this->getUserAgents();
        rsort($userAgents);
        foreach ($userAgents as $userAgent) {
            $handler->add(self::DIRECTIVE_USER_AGENT, $userAgent);
            $this->renderAdd($userAgent, $handler);
        }
        return true;
    }

    /**
     * User-agent list
     *
     * @return string[]
     */
    public function getUserAgents()
    {
        $list = array_keys($this->handler);
        sort($list);
        return $list;
    }

    /**
     * Add sub-directives to the RenderHandler
     *
     * @param string $userAgent
     * @param RenderHandler $handler
     */
    private function renderAdd($userAgent, RenderHandler $handler)
    {
        if ($userAgent !== self::USER_AGENT &&
            $this->handler[$userAgent]->count === 0
        ) {
            $handler->add(self::DIRECTIVE_DISALLOW, '');
            return;
        }
        $this->handler[$userAgent]->robotVersion->render($handler);
        $this->handler[$userAgent]->visitTime->render($handler);
        $this->handler[$userAgent]->noIndex->render($handler);
        $this->handler[$userAgent]->disallow->render($handler);
        $this->handler[$userAgent]->allow->render($handler);
        $this->handler[$userAgent]->crawlDelay->render($handler);
        $this->handler[$userAgent]->cacheDelay->render($handler);
        $this->handler[$userAgent]->requestRate->render($handler);
        $this->handler[$userAgent]->comment->render($handler);
    }

    /**
     * Render compressed
     *
     * @param RenderHandler $handler
     * @return bool
     */
    private function renderCompressed(RenderHandler $handler)
    {
        $pair = $this->export();
        while (!empty($pair)) {
            $groupMembers = current($pair);
            foreach (array_keys($pair, $groupMembers) as $userAgent) {
                $handler->add(self::DIRECTIVE_USER_AGENT, $userAgent);
                unset($pair[$userAgent]);
            }
            if (isset($userAgent)) {
                $this->renderAdd($userAgent, $handler);
                unset($userAgent);
            }
        }
        return true;
    }

    /**
     * Export
     *
     * @return array
     */
    public function export()
    {
        $array = [];
        foreach ($this->getUserAgents() as $group) {
            if ($group == self::USER_AGENT &&
                $this->handler[$group]->count === 0
            ) {
                continue;
            }
            $array[$group] = (new UserAgentTools($this->handler[$group], $this->base))->export();
        }
        return $array;
    }

    /**
     * Client
     *
     * @param string $product
     * @param float|int|string|null $version
     * @param int|null $statusCode
     * @return UserAgentClient
     */
    public function client($product = self::USER_AGENT, $version = null, $statusCode = null)
    {
        $parser = new UserAgentStringParser($product, $version);
        $match = $parser->getUserAgent();
        if (!isset($this->handler[$match])) {
            // User-agent does not match any rule sets
            if (($match = $parser->getMostSpecific($this->getUserAgents())) === false) {
                $match = self::USER_AGENT;
            }
        }
        return new UserAgentClient($this->handler[$match], $this->base, $statusCode, $parser->getProduct());
    }
}