railt/parser

View on GitHub
src/Parser/Parser.php

Summary

Maintainability
B
5 hrs
Test Coverage
<?php
/**
 * This file is part of Railt package.
 *
 * For the full copyright and license information, please view the LICENSE
 * file that was distributed with this source code.
 */
declare(strict_types=1);

namespace Railt\Component\Parser;

use Railt\Component\Io\Readable;
use Railt\Component\Lexer\LexerInterface;
use Railt\Component\Lexer\Token\Unknown;
use Railt\Component\Lexer\TokenInterface;
use Railt\Component\Parser\Ast\Builder;
use Railt\Component\Parser\Ast\BuilderInterface;
use Railt\Component\Parser\Ast\RuleInterface;
use Railt\Component\Parser\Exception\GrammarException;
use Railt\Component\Parser\Exception\UnexpectedTokenException;
use Railt\Component\Parser\Rule\Alternation;
use Railt\Component\Parser\Rule\Concatenation;
use Railt\Component\Parser\Rule\Repetition;
use Railt\Component\Parser\Rule\Rule;
use Railt\Component\Parser\Rule\Terminal;
use Railt\Component\Parser\TokenStream\TokenStream;
use Railt\Component\Parser\Trace\Entry;
use Railt\Component\Parser\Trace\Escape;
use Railt\Component\Parser\Trace\Token;
use Railt\Component\Parser\Trace\TraceItem;

/**
 * Class Parser
 */
class Parser implements ParserInterface
{
    /**
     * @var LexerInterface
     */
    protected $lexer;

    /**
     * @var GrammarInterface
     */
    protected $grammar;

    /**
     * Lexer iterator
     *
     * @var TokenStream
     */
    protected $stream;

    /**
     * Possible token causing an error
     *
     * @var TokenInterface|null
     */
    private $errorToken;

    /**
     * Trace of parsed rules
     *
     * @var array|TraceItem[]
     */
    protected $trace = [];

    /**
     * Stack of items which need to be processed
     *
     * @var \SplStack|TraceItem[]
     */
    private $todo;

    /**
     * AbstractParser constructor.
     *
     * @param LexerInterface $lexer
     * @param GrammarInterface $grammar
     */
    public function __construct(LexerInterface $lexer, GrammarInterface $grammar)
    {
        $this->lexer = $lexer;
        $this->grammar = $grammar;
    }

    /**
     * @param string $ruleId
     * @param \Closure $then
     * @return ParserInterface|$this
     * @throws GrammarException
     */
    public function extend(string $ruleId, \Closure $then): ParserInterface
    {
        $maxId = \count($this->grammar->getRules()) - 1;

        $result = $then($this->grammar->fetch($ruleId), $maxId);

        if ($result instanceof \Generator) {
            while ($result->valid()) {
                [$key, $value] = [$result->key(), $result->current()];

                switch (true) {
                    case $value instanceof Rule:
                        $this->grammar->addRule($value);
                        $value = $value->getName();
                        break;

                    case \is_string($key) && \is_string($value):
                        if (! \class_exists($value)) {
                            throw new GrammarException('Delegate class ' . $value . '::class not found');
                        }

                        $this->grammar->addDelegate($key, $value);
                        break;

                    default:
                        throw new GrammarException('Bad parser extension generator arguments');
                }

                $result->send($value);
            }
        }

        return $this;
    }

    /**
     * @return GrammarInterface
     */
    public function getGrammar(): GrammarInterface
    {
        return $this->grammar;
    }

    /**
     * @return LexerInterface
     */
    public function getLexer(): LexerInterface
    {
        return $this->lexer;
    }

    /**
     * @param Readable $input
     * @return RuleInterface|mixed
     * @throws \Railt\Component\Exception\ExternalException
     */
    public function parse(Readable $input)
    {
        return $this->build($this->trace($input));
    }

    /**
     * @param array $trace
     * @return mixed
     */
    protected function build(array $trace)
    {
        return $this->getBuilder($trace)->build();
    }

    /**
     * @param array $trace
     * @return BuilderInterface
     */
    protected function getBuilder(array $trace): BuilderInterface
    {
        return new Builder($trace, $this->grammar);
    }

    /**
     * @param Readable $input
     * @return array
     * @throws \Railt\Component\Exception\ExternalException
     */
    protected function trace(Readable $input): array
    {
        $this->reset($input);
        $this->prepare();

        do {
            if ($this->unfold() && $this->stream->isEoi()) {
                break;
            }

            $this->verifyBacktrace($input);
        } while (true);

        return $this->trace;
    }

    /**
     * @param Readable $input
     * @throws \Railt\Component\Exception\ExternalException
     */
    private function reset(Readable $input): void
    {
        $this->stream = $this->getStream($input);

        $this->errorToken = null;

        $this->trace = [];
        $this->todo = [];
    }

    /**
     * @param Readable $input
     * @return TokenStream
     * @throws \Railt\Component\Exception\ExternalException
     */
    protected function getStream(Readable $input): TokenStream
    {
        return new TokenStream($this->lex($input), \PHP_INT_MAX);
    }

    /**
     * @param Readable $input
     * @return iterable|TokenInterface[]
     * @throws UnexpectedTokenException
     */
    protected function lex(Readable $input): iterable
    {
        foreach ($this->lexer->lex($input) as $token) {
            if ($token->getName() === Unknown::T_NAME) {
                $exception = new UnexpectedTokenException(\sprintf('Unexpected token %s', $token));
                $exception->throwsIn($input, $token->getOffset());

                throw $exception;
            }

            yield $token;
        }
    }

    /**
     * @return void
     */
    private function prepare(): void
    {
        $openRule = new Entry($this->grammar->beginAt(), 0, [
            $closeRule = new Escape($this->grammar->beginAt(), 0),
        ]);

        $this->todo = [$closeRule, $openRule];
    }

    /**
     * Unfold trace.
     *
     * @return bool
     */
    private function unfold(): bool
    {
        while (0 < \count($this->todo)) {
            $rule = \array_pop($this->todo);

            if ($rule instanceof Escape) {
                $this->addTrace($rule);
            } else {
                $out = $this->reduce($this->grammar->fetch($rule->getRule()), $rule->getData());

                if ($out === false && $this->backtrack() === false) {
                    return false;
                }
            }
        }

        return true;
    }

    /**
     * @param TraceItem $item
     * @return TraceItem
     */
    private function addTrace(TraceItem $item): TraceItem
    {
        $this->trace[] = $item;

        $item->at($this->stream->offset());

        return $item;
    }

    /**
     * @param Rule $current
     * @param string|int $next
     * @return bool
     */
    private function reduce(Rule $current, $next): bool
    {
        if (! $this->stream->current()) {
            return false;
        }

        switch (true) {
            case $current instanceof Terminal:
                return $this->parseTerminal($current);

            case $current instanceof Concatenation:
                return $this->parseConcatenation($current);

            case $current instanceof Alternation:
                return $this->parseAlternation($current, $next);

            case $current instanceof Repetition:
                return $this->parseRepetition($current, $next);
        }

        return false;
    }

    /**
     * @param Terminal $token
     * @return bool
     */
    private function parseTerminal(Terminal $token): bool
    {
        /** @var TokenInterface $current */
        $current = $this->stream->current();

        if ($token->getTokenName() !== $current->getName()) {
            return false;
        }

        \array_pop($this->todo);

        $this->addTrace(new Token($current, $token->isKept()));
        $this->errorToken = $this->stream->next();

        return true;
    }

    /**
     * @param Concatenation $concat
     * @return bool
     */
    private function parseConcatenation(Concatenation $concat): bool
    {
        $this->addTrace(new Entry($concat->getName()));

        $children = $concat->getChildren();

        for ($i = \count($children) - 1; $i >= 0; --$i) {
            $nextRule = $children[$i];

            $this->todo[] = new Escape($nextRule, 0);
            $this->todo[] = new Entry($nextRule, 0);
        }

        return true;
    }

    /**
     * @param Alternation $choice
     * @param string|int $next
     * @return bool
     */
    private function parseAlternation(Alternation $choice, $next): bool
    {
        $children = $choice->getChildren();

        if ($next >= \count($children)) {
            return false;
        }

        $this->addTrace(new Entry($choice->getName(), $next, $this->todo));

        $nextRule = $children[$next];

        $this->todo[] = new Escape($nextRule, 0);
        $this->todo[] = new Entry($nextRule, 0);

        return true;
    }

    /**
     * @param Repetition $repeat
     * @param string|int $next
     * @return bool
     */
    private function parseRepetition(Repetition $repeat, $next): bool
    {
        $nextRule = $repeat->getChildren();

        if ($next === 0) {
            $name = $repeat->getName();
            $min = $repeat->getMin();

            $this->addTrace(new Entry($name, $min));

            \array_pop($this->todo);

            $this->todo[] = new Escape($name, $min, $this->todo);

            for ($i = 0; $i < $min; ++$i) {
                $this->todo[] = new Escape($nextRule, 0);
                $this->todo[] = new Entry($nextRule, 0);
            }

            return true;
        }

        $max = $repeat->getMax();

        if ($max !== -1 && $next > $max) {
            return false;
        }

        $this->todo[] = new Escape($repeat->getName(), $next, $this->todo);
        $this->todo[] = new Escape($nextRule, 0);
        $this->todo[] = new Entry($nextRule, 0);

        return true;
    }

    /**
     * Backtrack the trace.
     *
     * @return bool
     */
    private function backtrack(): bool
    {
        $found = false;

        do {
            $last = \array_pop($this->trace);

            if ($last instanceof Entry) {
                $found = $this->grammar->fetch($last->getRule()) instanceof Alternation;
            } elseif ($last instanceof Escape) {
                $found = $this->grammar->fetch($last->getRule()) instanceof Repetition;
            } elseif ($last instanceof Token) {
                if (! $this->stream->prev()) {
                    return false;
                }
            }
        } while (0 < \count($this->trace) && $found === false);

        if ($found === false) {
            return false;
        }

        $this->todo = $last->getTodo();
        $this->todo[] = new Entry($last->getRule(), $last->getData() + 1);

        return true;
    }

    /**
     * @param Readable $input
     * @throws \Railt\Component\Exception\ExternalException
     */
    private function verifyBacktrace(Readable $input): void
    {
        if ($this->backtrack() === false) {
            /** @var TokenInterface $token */
            $token = $this->errorToken ?? $this->stream->current();

            $exception = new UnexpectedTokenException(\sprintf('Unexpected token %s', $token));
            $exception->throwsIn($input, $token->getOffset());

            throw $exception;
        }
    }
}