phug-php/phug

View on GitHub
src/Phug/Lexer/Lexer/State.php

Summary

Maintainability
A
2 hrs
Test Coverage
A
100%
<?php

namespace Phug\Lexer;

use Phug\Lexer;
use Phug\Lexer\Partial\IndentStyleTrait;
use Phug\Lexer\Token\InterpolationEndToken;
use Phug\Lexer\Token\TagInterpolationEndToken;
use Phug\LexerException;
use Phug\Reader;
use Phug\Util\OptionInterface;
use Phug\Util\Partial\LevelTrait;
use Phug\Util\Partial\OptionTrait;
use Phug\Util\SourceLocation;

/**
 * Represents the state of a currently running lexing process.
 */
class State implements OptionInterface
{
    use OptionTrait;
    use LevelTrait;
    use IndentStyleTrait;

    /**
     * Contains the current `Phug\Reader` instance used by the lexer.
     *
     * @var Reader
     */
    private $reader;

    /**
     * Contains the current `Phug\Lexer` instance linked to the state.
     *
     * @var Lexer
     */
    private $lexer;

    /**
     * Contains the stack of indent steps.
     *
     * @var array
     */
    private $indentStack;

    /**
     * List of scanner instances.
     *
     * @var ScannerInterface[]
     */
    private $scanners;

    /**
     * Creates a new instance of the state.
     *
     * @param Lexer  $lexer   linked lexer
     * @param string $input   pug string input
     * @param array  $options indent settings, errors info and reader class name
     */
    public function __construct(Lexer $lexer, $input, array $options)
    {
        $this->lexer = $lexer;
        $this->setOptionsRecursive([
            'reader_class_name'        => Reader::class,
            'encoding'                 => null,
            'level'                    => 0,
            'indent_width'             => null,
            'indent_style'             => null,
            'allow_mixed_indent'       => null,
            'multiline_markup_enabled' => true,
            'path'                     => null,
        ], $options ?: []);

        $readerClassName = $this->getOption('reader_class_name');
        if (!is_a($readerClassName, Reader::class, true)) {
            throw new \InvalidArgumentException(
                'Configuration option `reader_class_name` needs to be a valid FQCN of a class that extends '.
                Reader::class
            );
        }

        $this->reader = new $readerClassName(
            $input,
            $this->getOption('encoding')
        );
        if (!empty($options['path']) && method_exists($this->reader, 'setPath')) {
            $this->reader->setPath($options['path']);
        }
        $this->indentStyle = $this->getOption('indent_style');
        $this->indentWidth = $this->getOption('indent_width');
        $this->indentStack = [];
        $this->level = $this->getOption('level');

        //This will strip \r, \0 etc. from the input
        $this->reader->normalize();
    }

    /**
     * @return TokenInterface
     */
    public function getLastToken()
    {
        return $this->lexer
            ? $this->lexer->getLastToken()
            : null;
    }

    /**
     * @param $classNames
     *
     * @return bool
     */
    public function lastTokenIs($classNames)
    {
        $token = $this->getLastToken();

        foreach ($classNames as $className) {
            if ($token instanceof $className) {
                return true;
            }
        }

        return false;
    }

    /**
     * Return true if previous token was TagInterpolationEndToken or InterpolationEndToken.
     *
     * @return bool
     */
    public function isAfterInterpolation()
    {
        $previous = $this->getLastToken();

        return $previous instanceof TagInterpolationEndToken || $previous instanceof InterpolationEndToken;
    }

    /**
     * Returns the current lexer instance linked.
     *
     * @return Lexer|null
     */
    public function getLexer()
    {
        return $this->lexer;
    }

    /**
     * Returns the current reader instance that is used for parsing the input.
     *
     * @return Reader
     */
    public function getReader()
    {
        return $this->reader;
    }

    /**
     * Returns the currently used indentation style.
     *
     * @return int
     */
    public function getIndentLevel()
    {
        return end($this->indentStack) ?: 0;
    }

    /**
     * Outdent and return the new level.
     *
     * @return int
     */
    public function outdent()
    {
        array_pop($this->indentStack);

        return $this->getIndentLevel();
    }

    /**
     * Return new outdent level if current above expected,
     * or false if expected level reached.
     *
     * @return int
     */
    public function nextOutdent()
    {
        $oldLevel = $this->getIndentLevel();
        $expected = $this->getLevel();

        if ($expected < $oldLevel) {
            $newLevel = $this->outdent();
            if ($newLevel < $expected) {
                $this->throwException(
                    'Inconsistent indentation. '.
                    'Expecting either '.
                    $newLevel.
                    ' or '.
                    $oldLevel.
                    ' spaces/tabs.'
                );
            }

            return $newLevel;
        }

        return false;
    }

    /**
     * Indent and return the new level.
     *
     * @param null $level
     *
     * @return int
     */
    public function indent($level = null)
    {
        $level = $level ?: $this->getLevel();
        array_push($this->indentStack, $level);

        return $level;
    }

    /**
     * Runs all passed scanners once on the input string.
     *
     * The first scan that returns valid tokens will stop the scanning and
     * yields these tokens. If you want to continuously scan on something, rather
     * use the `loopScan`-method
     *
     * @param array|string $scanners the scanners to run
     * @param array        $options  options to be passed for the scanner
     *
     * @throws LexerException
     *
     * @return iterable the generator yielding all tokens found
     */
    public function scan($scanners, array $options = [])
    {
        $scanners = $this->filterScanners($scanners);

        foreach ($scanners as $scanner) {
            /** @var ScannerInterface $scanner */
            $success = false;
            foreach ($scanner->scan($this, $options) as $token) {
                if (!($token instanceof TokenInterface)) {
                    $this->throwException(
                        'Scanner '.get_class($scanner).' generated a result that is not a '.TokenInterface::class
                    );
                }

                yield $token;
                $success = true;
            }

            if ($success) {
                return;
            }
        }
    }

    /**
     * Continuously scans with all scanners passed as the first argument.
     *
     * If the second argument is true, it will throw an exception if none of the scanners
     * produced any valid tokens. The reading also stops when the end of the input as been reached.
     *
     * @param      $scanners
     * @param bool $required
     *
     * @throws LexerException
     *
     * @return iterable
     */
    public function loopScan($scanners, $required = false)
    {
        if ($this->reader->hasLength() && $this->scanners === null) {
            $this->scanners = $this->filterScanners($scanners);
        }

        while ($this->reader->hasLength()) {
            $success = false;

            foreach ($this->scan($scanners) as $token) {
                $success = true;

                yield $token;
            }

            if (!$success) {
                break;
            }
        }

        if ($this->reader->hasLength() && $required) {
            $this->throwException(
                'Unexpected '.$this->reader->peek(20)
            );
        }
    }

    public function createCurrentSourceLocation()
    {
        return new SourceLocation(
            $this->getOption('path'),
            $this->reader->getLine(),
            $this->reader->getOffset()
        );
    }

    /**
     * Creates a new instance of a token.
     *
     * The token automatically receives line/offset/level information through this method.
     *
     * @param string $className the class name of the token
     *
     * @return TokenInterface the token
     */
    public function createToken($className)
    {
        if (!is_subclass_of($className, TokenInterface::class)) {
            $this->throwException(
                "$className is not a valid token sub-class"
            );
        }

        return new $className(
            $this->createCurrentSourceLocation(),
            $this->level,
            str_repeat((string) $this->getIndentStyle(), (int) $this->getIndentWidth())
        );
    }

    /**
     * Quickly scans for a token by a single regular expression pattern.
     *
     * If the pattern matches, this method will yield a new token. If not, it will yield nothing
     *
     * All named capture groups are converted to `set*()`-methods, e.g.
     * `(?:<name>[a-z]+)` will automatically call `setName(<matchedValue>)` on the token.
     *
     * This method could be written without generators, but the way its designed is easier to use
     * in scanners as you can simply return it's value without having to check for it to be null.
     *
     *
     * @param class-string $className
     * @param string       $pattern
     * @param string|null  $modifiers
     *
     * @return iterable
     */
    public function scanToken($className, $pattern, $modifiers = null)
    {
        if (!$this->reader->match($pattern, $modifiers)) {
            return;
        }

        $data = $this->reader->getMatchData();

        $token = $this->createToken($className);
        $this->reader->consume();

        foreach ($data as $key => $value) {
            $method = 'set'.ucfirst($key);

            if (method_exists($token, $method)) {
                call_user_func([$token, $method], $value);
            }
        }

        yield $this->endToken($token);
    }

    /**
     * @param TokenInterface $token
     *
     * @return TokenInterface
     */
    public function endToken(TokenInterface $token)
    {
        $token->getSourceLocation()->setOffsetLength(
            $this->reader->getOffset() - $token->getSourceLocation()->getOffset()
        );

        return $token;
    }

    /**
     * Load a scan class name as a scanner instance (use cache if yet loaded in the state).
     *
     * @param string $scanner scanner class name
     *
     * @return ScannerInterface
     */
    private function loadScanner($scanner)
    {
        if (!isset($this->scanners[$scanner])) {
            $this->scanners[$scanner] = new $scanner();
        }

        return $this->scanners[$scanner];
    }

    /**
     * Filters and validates the passed scanners.
     *
     * This method makes sure that all scanners given are turned into their respective instances.
     *
     * @param array<ScannerInterface|string> $scanners list of scanner instances/class names.
     *
     * @return array
     */
    private function filterScanners($scanners)
    {
        $scannerInstances = [];
        $scanners = is_array($scanners) ? $scanners : [$scanners];

        foreach ($scanners as $key => $scanner) {
            if (!is_a($scanner, ScannerInterface::class, true)) {
                throw new \InvalidArgumentException(
                    "The passed scanner with key `$key` doesn't seem to be either a valid ".ScannerInterface::class.
                    ' instance or extended class'
                );
            }

            $scannerInstances[] = $scanner instanceof ScannerInterface
                ? $scanner
                : $this->loadScanner($scanner);
        }

        return $scannerInstances;
    }

    /**
     * Throws a lexer-exception.
     *
     * The current line and offset of the exception
     * get automatically appended to the message
     *
     * @param string     $message  A meaningful error message
     * @param int        $code     Error code
     * @param \Throwable $previous Source error
     *
     * @throws LexerException
     */
    public function throwException($message, $code = 0, $previous = null)
    {
        throw new LexerException(
            $this->createCurrentSourceLocation(),
            LexerException::message($message, [
                'near'   => $this->reader->peek(20),
                'path'   => $this->getOption('path'),
                'line'   => $this->reader->getLine(),
                'offset' => $this->reader->getOffset(),
            ]),
            $code,
            $previous
        );
    }
}