j84reginato/my-eval

View on GitHub
src/Parsing/Parser.php

Summary

Maintainability
D
1 day
Test Coverage
<?php

declare(strict_types=1);

namespace MyEval\Parsing;

use LucidFrame\Console\ConsoleTable;
use MyEval\Exceptions\DelimeterMismatchException;
use MyEval\Exceptions\DivisionByZeroException;
use MyEval\Exceptions\ExponentialException;
use MyEval\Exceptions\NullOperandException;
use MyEval\Exceptions\SyntaxErrorException;
use MyEval\Exceptions\UnexpectedOperatorException;
use MyEval\Exceptions\UnknownOperatorException;
use MyEval\Lexing\Token;
use MyEval\Lexing\TokenType;
use MyEval\Parsing\Nodes\Node;
use MyEval\Parsing\Nodes\Operand\BooleanNode;
use MyEval\Parsing\Nodes\Operand\ConstantNode;
use MyEval\Parsing\Nodes\Operand\FloatNode;
use MyEval\Parsing\Nodes\Operand\IntegerNode;
use MyEval\Parsing\Nodes\Operand\RationalNode;
use MyEval\Parsing\Nodes\Operand\VariableNode;
use MyEval\Parsing\Nodes\Operator\AbstractExpressionNode;
use MyEval\Parsing\Nodes\Operator\AbstractOperatorNode;
use MyEval\Parsing\Nodes\Operator\CloseBraceNode;
use MyEval\Parsing\Nodes\Operator\CloseParenthesisNode;
use MyEval\Parsing\Nodes\Operator\FunctionNode;
use MyEval\Parsing\Nodes\Operator\InfixExpressionNode;
use MyEval\Parsing\Nodes\Operator\OpenBraceNode;
use MyEval\Parsing\Nodes\Operator\OpenParenthesisNode;
use MyEval\Parsing\Nodes\Operator\PostfixExpressionNode;
use MyEval\Parsing\Nodes\Operator\TerminatorNode;
use MyEval\Parsing\Nodes\Operator\TernaryExpressionNode;
use MyEval\Parsing\Nodes\Operator\UnmappedNode;
use MyEval\Parsing\Operations\OperationBuilder;
use MyEval\Parsing\Traits\Numeric;

use function get_class;

/**
 * Mathematical expression parser, based on the shunting yard algorithm.
 *
 * Parse a token string into an abstract syntax tree (AST).
 *
 * As the parser loops over the individual tokens, two stacks are kept up to date.
 * One stack ($operatorStack) consists of hitherto unhandled tokens corresponding to ''operators'' (unary and binary
 * operators, function applications and parenthesis) and a stack of parsed sub-expressions (the $operandStack).
 *
 * If the current token is a terminal token (number, variable or constant), a corresponding node is pushed onto the
 * operandStack. Otherwise, the precedence of the current token is compared to the top element(t) on the operatorStack,
 * and as long as the current token has lower precedence, we keep popping operators from the stack to construct more
 * complicated subexpressions together with the top items on the operandStack.
 *
 * Once the token list is empty, we pop the remaining operators as above, and if the formula was well-formed, the only
 * thing remaining on the operandStack is a completely parsed AST, which we return.
 */
class Parser
{
    use Numeric;

    /**
     * @var array $tokens Token[] list of tokens to process.
     */
    protected array $tokens;

    /**
     * @var Stack $operandStack Stack of operands waiting to process.
     */
    protected Stack $operandStack;

    /**
     * @var Stack $operatorStack Stack of operators waiting to process.
     */
    protected Stack $operatorStack;

    /**
     * @var Node|null
     */
    private static Node|null $lastNode = null;

    /**
     * @param bool $allowImplicitMultiplication Determine if the parser allows implicit multiplication.
     * @param bool $simplifyingParser           Determine if apply a simplification of the given expression.
     * @param bool $debugMode                   Determine if enable/disable debug mode.
     */
    public function __construct(
        private bool $allowImplicitMultiplication = true,
        private bool $simplifyingParser = true,
        private bool $debugMode = false
    ) {
    }

    /**
     * Parse list of tokens.
     *
     * @param Token[] $tokens Array of input tokens.
     *
     * @return Node AST representing the parsed expression.
     * @throws SyntaxErrorException
     * @throws DelimeterMismatchException
     * @throws DivisionByZeroException
     * @throws UnknownOperatorException
     * @throws UnexpectedOperatorException
     * @throws NullOperandException
     * @throws ExponentialException
     */
    public function parse(array $tokens): Node
    {
        // Filter away any whitespace.
        $tokens = $this->filterTokens($tokens);

        // Insert missing implicit multiplication tokens.
        if ($this->allowImplicitMultiplication) {
            $tokens = $this->parseImplicitMultiplication($tokens);
        }

        $this->tokens = $tokens;

        // Perform the actual parsing.
        return $this->shuntingYard($tokens);
    }

    /**
     * Remove Whitespace from the token list.
     *
     * @param array $tokens Input list of tokens.
     *
     * @return Token[]
     */
    protected function filterTokens(array $tokens): array
    {
        $filteredTokens = array_filter($tokens, static function (Token $token) {
            return $token->type !== TokenType::WHITESPACE;
        });

        // Return the array values only, because array_filter preserves the keys.
        return array_values($filteredTokens);
    }

    /**
     * Insert multiplication tokens where needed (taking care of implicit multiplication).
     *
     * @param array $tokens Input list of tokens (Token[]).
     *
     * @return Token[]
     */
    protected function parseImplicitMultiplication(array $tokens): array
    {
        $result    = [];
        $lastToken = null;
        foreach ($tokens as $token) {
            if (Token::canFactorsInImplicitMultiplication($lastToken, $token)) {
                $result[] = new Token('*', TokenType::MULTIPLICATION_OPERATOR);
            }
            $lastToken = $token;
            $result[]  = $token;
        }

        return $result;
    }

    /**
     * Implementation of the shunting yard parsing algorithm.
     *
     * @param array $tokens Token[] array of tokens to process.
     *
     * @return Node AST of the parsed expression.
     * @throws SyntaxErrorException
     * @throws DelimeterMismatchException
     * @throws DivisionByZeroException
     * @throws UnknownOperatorException
     * @throws NullOperandException
     * @throws UnexpectedOperatorException
     * @throws ExponentialException
     */
    private function shuntingYard(array $tokens): Node
    {
        $this->operatorStack = new Stack();
        $this->operandStack  = new Stack();

        // Remember the last token handled, this is done to recognize unary operators.
        self::$lastNode = null;

        // Loop over the tokens.
        foreach ($tokens as $token) {
            $debug[] = [
                'token'          => (string)$token,
                'rpn_output'     => (string)$this->operandStack,
                'operator_stack' => (string)$this->operatorStack,
            ];
            $this->processToken($token);
        }

        $this->printTable($debug ?? []);

        // Pop remaining operators.
        while (!$this->operatorStack->isEmpty()) {
            /** @var InfixExpressionNode|TernaryExpressionNode $node */
            $node = $this->operatorStack->pop();
            $node = $this->populateNode($node);
            $this->operandStack->push($node);
        }

        // Stack should be empty here.
        if ($this->operandStack->count() > 1) {
            throw new SyntaxErrorException();
        }

        return $this->operandStack->pop();
    }

    /**
     * Process current token.
     *
     * @param Token $token
     *
     * @return void
     * @throws DivisionByZeroException
     * @throws DelimeterMismatchException
     * @throws SyntaxErrorException
     * @throws UnknownOperatorException
     * @throws UnexpectedOperatorException
     * @throws NullOperandException
     * @throws ExponentialException
     */
    private function processToken(Token $token): void
    {
        $node = Node::factory($token);

        switch (get_class($node)) {
            // Operands
            case IntegerNode::class:
            case RationalNode::class:
            case FloatNode::class:
            case BooleanNode::class:
            case VariableNode::class:
            case ConstantNode::class:
                $this->handleOperand($node);
                break;

            // Operators
            case InfixExpressionNode::class:
                $this->handleInfixOperator($node, $token, self::$lastNode);
                break;

            case PostfixExpressionNode::class:
                $this->handlePostfixOperator($node);
                break;

            case TernaryExpressionNode::class:
                $this->handleTernaryOperator($node);
                break;

            case FunctionNode::class:
                $this->handleFunctionOperator($node);
                break;

            case OpenParenthesisNode::class:
                $this->handleOpenParenthesis($node);
                break;

            case CloseParenthesisNode::class:
                $this->handleCloseParenthesis();
                break;

            case OpenBraceNode::class:
                $this->handleOpenBrace($node);
                break;

            case CloseBraceNode::class:
                $this->handleCloseBrace();
                break;

            case TerminatorNode::class:
            case UnmappedNode::class:
            default:
                break;
        }

        // Remember the current token (if it hasn't been nulled, for example being a unary +).
        if (
            !$node instanceof UnmappedNode &&
            !$node instanceof CloseParenthesisNode &&
            !(
                $node instanceof InfixExpressionNode &&
                $this->isUnary($node, self::$lastNode) &&
                $token->type === TokenType::ADDITION_OPERATOR
            )
        ) {
            self::$lastNode = $node;
        }
    }

    /**
     * Push terminal tokens on the operandStack.
     *
     * @param Node $node
     *
     * @return void
     */
    private function handleOperand(Node $node): void
    {
        $this->operandStack->push($node);
    }

    /**
     * @param InfixExpressionNode $node
     * @param Token               $token
     * @param Node|null           $lastNode
     *
     * @return void
     * @throws DivisionByZeroException
     * @throws DelimeterMismatchException
     * @throws SyntaxErrorException
     * @throws UnknownOperatorException
     * @throws UnexpectedOperatorException
     * @throws NullOperandException
     * @throws ExponentialException
     */
    private function handleInfixOperator(InfixExpressionNode $node, Token $token, ?Node $lastNode): void
    {
        // Check for unary minus and unary plus.
        if ($this->isUnary($node, $lastNode)) {
            // Unary +, just ignore it.
            if ($token->type === TokenType::ADDITION_OPERATOR) {
                return;
            }
            // Unary -, replace the token.
            if ($token->type === TokenType::SUBTRACTION_OPERATOR) {
                $node = new InfixExpressionNode('~', null, null);
            }
        }

        // Pop operators with higher priority.
        while ($node->lowerPrecedenceThan($this->operatorStack->peek())) {
            /** @var InfixExpressionNode|TernaryExpressionNode $popped */
            $popped = $this->operatorStack->pop();
            $popped = $this->populateNode($popped);
            $this->operandStack->push($popped);
        }

        $this->operatorStack->push($node);
    }

    /**
     * Handle postfix operators.
     *
     * @param PostfixExpressionNode $node
     *
     * @return void
     * @throws SyntaxErrorException
     */
    private function handlePostfixOperator(PostfixExpressionNode $node): void
    {
        $operand = $this->operandStack->pop();
        if ($operand === null) {
            throw new SyntaxErrorException();
        }
        $this->operandStack->push(new FunctionNode($node->operator, $operand));
    }

    /**
     * Push ternary tokens on the operandStack.
     *
     * @param TerminatorNode $node
     *
     * @return void
     */
    private function handleTernaryOperator(Node $node): void
    {
        $this->operatorStack->push($node);
    }

    /**
     * Push function applications onto the operatorStack.
     *
     * @param FunctionNode $node
     *
     * @return void
     */
    private function handleFunctionOperator(FunctionNode $node): void
    {
        $this->operatorStack->push($node);
    }

    /**
     * Push open parenthesis `(` onto the operatorStack.
     *
     * @param OpenParenthesisNode $node
     *
     * @return void
     */
    private function handleOpenParenthesis(OpenParenthesisNode $node): void
    {
        $this->operatorStack->push($node);
    }

    /**
     * Handle a closing parenthesis.
     *
     * Popping operators off the operator stack until we find a matching opening parenthesis.
     *
     * @throws DelimeterMismatchException
     * @throws SyntaxErrorException
     * @throws DelimeterMismatchException
     * @throws DivisionByZeroException
     * @throws UnknownOperatorException
     * @throws UnexpectedOperatorException
     * @throws NullOperandException
     * @throws ExponentialException
     */
    protected function handleCloseParenthesis(): void
    {
        // Flag, checking for mismatching parentheses.
        $clean = false;

        // Pop operators off the operatorStack until its empty, or we find an opening parenthesis, building
        // subexpressions on the operandStack as we go.
        /** @var InfixExpressionNode|TernaryExpressionNode $popped */
        while ($popped = $this->operatorStack->pop()) {
            // ok, we have our matching opening parenthesis
            if ($popped instanceof OpenParenthesisNode) {
                $clean = true;
                break;
            }

            $node = $this->populateNode($popped);
            $this->operandStack->push($node);
        }

        // Throw an error if the parenthesis couldn't be matched.
        if (!$clean) {
            throw new DelimeterMismatchException();
        }

        // Check to see if the parenthesis pair was in fact part of a function application.
        // If so, create the corresponding FunctionNode and push it onto the operandStack.
        $previous = $this->operatorStack->peek();
        if ($previous instanceof FunctionNode) {
            /** @var FunctionNode $node */
            $node    = $this->operatorStack->pop();
            $operand = $this->operandStack->pop();
            $node->setOperand($operand);
            $this->operandStack->push($node);
        }
    }

    /**
     * Push open brace `{` onto the operatorStack.
     *
     * @param OpenBraceNode $node
     *
     * @return void
     */
    private function handleOpenBrace(OpenBraceNode $node): void
    {
        $this->operatorStack->push($node);
    }

    /**
     * Handle a closing brace.
     *
     * Popping operators off the operator stack until we find a matching opening brace.
     *
     * @throws DelimeterMismatchException
     * @throws SyntaxErrorException
     * @throws DelimeterMismatchException
     * @throws DivisionByZeroException
     * @throws UnknownOperatorException
     * @throws UnexpectedOperatorException
     * @throws NullOperandException
     * @throws ExponentialException
     */
    protected function handleCloseBrace(): void
    {
        // Flag, checking for mismatching brace.
        $clean = false;

        // Pop operators off the operatorStack until its empty, or we find an opening brace.
        /** @var InfixExpressionNode|TernaryExpressionNode $popped */
        while ($popped = $this->operatorStack->pop()) {
            if ($popped instanceof OpenBraceNode) {
                $clean = true;
                break;
            }

            $node = $this->populateNode($popped);
            $this->operandStack->push($node);
        }

        // Throw an error if the brace couldn't be matched.
        if (!$clean) {
            throw new DelimeterMismatchException('{');
        }
    }

    /**
     * Populate node (ExpressionNode or IfNode) with operands.
     *
     * @param AbstractOperatorNode $node
     *
     * @return Node
     * @throws DelimeterMismatchException
     * @throws DivisionByZeroException
     * @throws ExponentialException
     * @throws NullOperandException
     * @throws SyntaxErrorException
     * @throws UnexpectedOperatorException
     * @throws UnknownOperatorException
     */
    protected function populateNode(AbstractOperatorNode $node): Node
    {
        if ($node instanceof FunctionNode) {
            throw new DelimeterMismatchException($node->operator);
        }

        if ($node instanceof OpenParenthesisNode) {
            throw new DelimeterMismatchException($node->operator);
        }

        if (!$node instanceof AbstractExpressionNode) {
            throw new SyntaxErrorException();
        }

        if ($node->operator === '~') {
            return $this->populateUnaryNode();
        }

        $right = $this->operandStack->pop();
        $left  = $this->operandStack->pop();

        if ($right === null || $left === null) {
            throw new SyntaxErrorException();
        }

        $node->setLeft($left);
        $node->setRight($right);

        if ($node instanceof TernaryExpressionNode) {
            $node->setCondition($this->operandStack->pop());
        }

        return $this->simplifyingParser
            ? (new OperationBuilder())->simplify($node)
            : $node;
    }

    /**
     * Populate unary node with operands.
     *
     * @return Node
     * @throws DivisionByZeroException
     * @throws SyntaxErrorException
     * @throws UnknownOperatorException
     * @throws UnexpectedOperatorException
     * @throws NullOperandException
     * @throws ExponentialException
     */
    private function populateUnaryNode(): Node
    {
        $left = $this->operandStack->pop();

        if ($left === null) {
            throw new SyntaxErrorException();
        }

        if ($this->simplifyingParser && $this->isNumeric($left)) {
            return $this->simplifyUnary($left);
        }

        $node = new InfixExpressionNode('-', $left, null);

        return $this->simplifyingParser
            ? (new OperationBuilder())->simplify($node)
            : $node;
    }

    /**
     * @throws DivisionByZeroException
     */
    private function simplifyUnary(Node $left): Node
    {
        return match (get_class($left)) {
            IntegerNode::class  => new IntegerNode(-$left->value),
            RationalNode::class => new RationalNode(-$left->getNumerator(), $left->getDenominator()),
            FloatNode::class    => new FloatNode(-$left->value),
            default             => $left
        };
    }

    /**
     * Determine if $node is in fact a unary operator.
     *
     * If $node can be a unary operator (i.e. is a '+' or '-' node), **and** this is the first node we parse or the
     * previous node was a SubExpressionNode, i.e. an opening parenthesis, or the previous node was already a unary
     * minus, this means that the current node is in fact a unary '+' or '-' and we return true, otherwise return false.
     *
     * @param Node      $node     Current node.
     * @param Node|null $lastNode Previous node handled by the Parser.
     *
     * @return bool
     */
    protected function isUnary(Node $node, ?Node $lastNode): bool
    {
        if (!$node instanceof InfixExpressionNode) {
            return false;
        }

        if (!($node->canBeUnary())) {
            return false;
        }

        // Unary if it is the first token.
        if ($this->operatorStack->isEmpty() && $this->operandStack->isEmpty()) {
            return true;
        }

        // Or if the previous token was '(' or '{'.
        if ($lastNode instanceof OpenParenthesisNode || $lastNode instanceof OpenBraceNode) {
            return true;
        }

        // Or if the previous token was ';'.
        if ($lastNode instanceof TerminatorNode) {
            return true;
        }

        // Or last node was already a unary minus.
        if ($lastNode instanceof InfixExpressionNode && $lastNode->operator === '~') {
            return true;
        }

        return false;
    }

    /**
     * @param bool $flag
     *
     * @return void
     */
    public function allowImplicitMultiplication(bool $flag): void
    {
        $this->allowImplicitMultiplication = $flag;
    }

    /**
     * @param bool $flag
     *
     * @return void
     */
    public function setSimplifying(bool $flag): void
    {
        $this->simplifyingParser = $flag;
    }

    /**
     * @param bool $flag
     *
     * @return void
     */
    public function setDebugMode(bool $flag): void
    {
        $this->debugMode = $flag;
    }

    /**
     * Print a table showing the result of Reverse Polish Notation by extending the shunting yard algorithm.
     * Just for debug.
     *
     * @param array $debug
     *
     * @return void
     */
    private function printTable(array $debug): void
    {
        $debug[] = [
            'token'          => '',
            'rpn_output'     => (string)$this->operandStack,
            'operator_stack' => (string)$this->operatorStack,
        ];

        if ($this->debugMode) {
            echo "\n";
            $tbl = new ConsoleTable();
            $tbl->setHeaders(['Token', 'RPN Output', 'Operator stack']);
            foreach ($debug ?? [] as $item) {
                $tbl->addRow([$item['token'], $item['rpn_output'], $item['operator_stack']]);
            }
            $tbl->display();
            echo "\n";
        }
    }
}