src/Parser/Grammar/Primi.peg

Summary

Maintainability
Test Coverage
<?php

declare(strict_types=1);

namespace Smuuf\Primi\Parser\Compiled;

use \Smuuf\Primi\Parser\GrammarHelpers;
use \Smuuf\Primi\Handlers\Kinds\StarredExpression;

use \hafriedlander\Peg\Parser;

class PrimiParser extends Parser\Packrat {

    private const RESERVED_WORDS = [
        'false', 'true', 'null', 'if', 'else', 'return', 'for', 'and', 'or',
        'function', 'break', 'continue', 'while', 'try', 'catch', 'not', 'in',
        'import'
    ];

    /**
     * Prevent parsing variable name which has the same name as some reserved
     * word. Setting result to false will tell the parser that is should try
     * other parser rules.
     */
    protected function Mutable__finalise(&$result) {
        if (GrammarHelpers::isReservedWord($result['text'])) {
            $result = \false;
        }
    }

    /**
     * Prevent parsing variable name which has the same name as some reserved
     * word. Setting result to false will tell the parser that is should try
     * other parser rules.
     */
    protected function Variable__finalise(&$result) {
        if (GrammarHelpers::isReservedWord($result['text'])) {
            $result = \false;
        }
    }

    /**
     * Add number of stars present at a starred expression.
     */
    protected function StarredExpression__finalise(&$result) {

        $starCount = \strlen($result['stars']['text'] ?? '');
        switch ($starCount) {
            case 1:
                $stars = StarredExpression::STARS_ONE;
                break;
            case 2:
                $stars = StarredExpression::STARS_TWO;
                break;
            default:
                $stars = StarredExpression::STARS_NONE;
        }

        $result['stars'] = $stars;

    }

    /**
     * Add number of stars present at a starred expression.
     */
    protected function MaybeStarredVariableName__finalise(&$result) {

        $starCount = \strlen($result['stars']['text'] ?? '');
        switch ($starCount) {
            case 1:
                $stars = StarredExpression::STARS_ONE;
                break;
            case 2:
                $stars = StarredExpression::STARS_TWO;
                break;
            default:
                $stars = StarredExpression::STARS_NONE;
        }

        $result['stars'] = $stars;

    }

/*!* Primi

#
# Strings.
#

# String content is either:
# a) backslash-followed-by-whatever,
# b) whatever-but-not-the-used-opening-quote-char
StringLiteral: quote:/['"]/ core:StringInside "$quote"
StringInside: / ( \\. | [^{$quote}\\] )*+ /

#
# F-Strings
#

FStringExpr: core:CondExpr
# Either: "backslash followed by whatever" or "{{" or "}}", or "whatever unless { or } or ending quotes"
FStringTxt: / ( \\. | \{\{ | \}\} | [^\{\}{$quote}] )*+ /
FStringInside: parts:FStringTxt ( "{" parts:FStringExpr "}" parts:FStringTxt )*
FStringLiteral: "f" quote:/['"]/ core:FStringInside "$quote"

#
# Others.
#

NumberLiteral: / -?\d[\d_]*(\.[\d_]+)? /

# Literals for true|false must NOT be followed by any possible characters representing variable name.
BoolLiteral: / \b(true|false)\b /

# Literal for null must NOT be followed by any possible characters representing variable name.
NullLiteral: "null" !VariableName
RegexLiteral: "rx" quote:/['"]/ core:StringInside "$quote"

Literal: ( skip:StringLiteral ) | skip:NumberLiteral | skip:BoolLiteral | ( &"n" skip:NullLiteral ) | ( &"rx" skip:RegexLiteral ) | ( &"f" skip:FStringLiteral )
VariableName: / (?:[a-zA-Z_][a-zA-Z0-9_]*) /
Variable: core:VariableName
AnonymousFunction:
    "function" __ "(" __ params:ParameterList? __ ")" __ body:Block | "(" __ params:ParameterList? __ ")" __ "=>" __ body:Block

DictItem: __ key:Expression __ ":" __ value:Expression __
DictDefinition:
    "{" __ ( items:DictItem ( COMMA items:DictItem )* )? __ COMMA? "}"
ListDefinition:
    "[" __ ( items:Expression ( COMMA items:Expression )* )? __ COMMA? "]"
# Tuple literals: "()" or "(1,)" or "(1, ...)" or "(1, ..., x,)"
TupleDefinition:
    ( "(" __ ")" ) | ( "(" __ items:Expression COMMA ")" ) | ( "(" __ ( items:Expression ( COMMA items:Expression )* )* __ COMMA?  ")" )

AbstractLiteral: skip:Literal | &/ [\[\{\(] / ( skip:ListDefinition | skip:DictDefinition | skip:TupleDefinition )
AbstractValue: skip:AbstractLiteral | skip:Variable

AddOperator: "+" | "-"
MultiplyOperator: "*" | "/"
PowerOperator: "**"
AssignmentOperator: "="
# These comparison operators don't have to be surrounded by whitespace: Eg. '1+inside' is valid
ComparisonOperator: "==" | "!=" | ">=" | "<=" | ">" | "<"
# These comparison operators must be surrounded by whitespace: Eg. '1ininside' is not valid.
ComparisonOperatorWithWhitespace: "in"  | "not in"
AndOperator: "and"
OrOperator: "or"
NegationOperator: "!"

Expression: skip:AnonymousFunction | skip:Assignment | skip:CondExpr
Assignment: left:Mutable __ AssignmentOperator __ right:Expression

#
# Accessing variables and their items or attributes - for writing.
#

Mutable: skip:VariableVector | skip:VariableName !( __ ',') | skip:Targets
VariableVector: core:Variable ( ( vector:VectorAttr | vector:VectorItem )+ vector:VectorItemNoIndex? | vector:VectorItemNoIndex )
VectorItem: "[" __ index:Expression __ "]"
VectorItemNoIndex: "[" __ "]"
VectorAttr: "." attr:VariableName

#
# Accessing variables and their items or attributes - for reading.
#

Chain: &/[\[\(\.]/ ( core: AttrAccess | core:Dereference | core:Invocation ) chain:Chain?
Dereference: "[" __ key:Expression __ "]"
Invocation: "(" __ args:ArgumentList? __ ")"
AttrAccess: "." attr:VariableName

# The order of how these rules are nested is very important, as it defines
# their priority. We also need that they all are left-associative.
CondExpr:
    true:LogicalOr ( _ "if" _ "(" __ cond:Expression __ ")" _ "else" _ false:LogicalOr )?
LogicalOr: operands:LogicalAnd ( _ ops:OrOperator _ operands:LogicalAnd )*
LogicalAnd: operands:Comparison ( _ ops:AndOperator _ operands:Comparison )*
Comparison: operands:Addition ( ( __ ops:ComparisonOperator __ | _ ops:ComparisonOperatorWithWhitespace _ ) operands:Addition )*
Addition: operands:Multiplication ( __ ops:AddOperator __ operands:Multiplication )*
Multiplication: operands:Exponentiation ( __ ops:MultiplyOperator __ operands:Exponentiation )*
Exponentiation: operand:Negation ( __ PowerOperator __ factor:Exponentiation )?
Negation: ( nots:NegationOperator )* core:Operand
Operand: ( ( "(" __ core:Expression __ ")" | core:AbstractValue ) chain:Chain? ) | skip:AbstractValue

# Allow argument lists
# 1. Without named argument
# 2. With named arguments, which must be placed after any non-named arguments (this one is handled in ArgumentList node postprocessing).
StarredExpression: stars:"*"{1,2} expr:Expression
Argument: ( (argKey:VariableName __ ":" )? __ argVal:Expression ) | ( argVal:StarredExpression )
ArgumentList: args:Argument ( COMMA args:Argument )*

# Starred parameters: Either one or two asterisks.
MaybeStarredVariableName: stars:"*"{0,2} param:VariableName ( __ "=" __ default:AbstractValue )?
ParameterList: skip:MaybeStarredVariableName ( COMMA skip:MaybeStarredVariableName )*
FunctionDefinition: "function" [ function:VariableName __ "(" __ params:ParameterList? __ ")" __ body:Block
ClassDefinition: "class" _ cls:VariableName ( "(" parent:VariableName ")" )? __ def:Block

# Starred target
Targets: t:VariableName ( COMMA t:VariableName )*

# Import statement:
# 'from a.b.c import d' (import specific symbol from module)
# 'import a.b.c' (import module as a whole)
ImportStatement:
    "import" _ ( module: ( "."* VariableName ("." VariableName)* ) ) > ( ":" > symbols: VariableName ( > "," > symbols: VariableName )* )?

IfStatement:
    "if" __ "(" __ cond:Expression __ ")" __ block:Block
    ( __ "elif" __ "(" __ elifCond:Expression __ ")" __ elifBlock:Block )*
    ( __ "else" __ elseBlock:Block )?

ForStatement: "for" __ "(" __ targets:Targets __ "in" __ left:Expression __ ")" __ ( right:Block )
WhileStatement: "while" __ "(" __ left:Expression __ ")" __ ( right:Block )
TryStatement: "try" __ main:Block __ "catch" __ onerror:Block

# &/[rbc]/ - Don't even try any of the subrules if this doesn't start with expected letters.
CommandStatements: &/[rbc]/ ( skip:ReturnStatement | skip:BreakStatement | skip:ContinueStatement )
# &SEP in the second branch matches a 'return' followed by separator, without consuming the
# separator - which is needed for the Program rule to finish parsing properly.
ReturnStatement: ( "return" [ ( subject:Expression )? ) | ( "return" &SEP )
BreakStatement: "break"
ContinueStatement: "continue"

# &/[iwft]/ - No sub-rules will be even tried if the string does not start with a expected letter.
BlockStatements:
    &/[iwftc]/ (
        skip:IfStatement
        | skip:WhileStatement
        | skip:ForStatement
        | skip:TryStatement
        | skip:FunctionDefinition
        | skip:ClassDefinition
        | skip:ImportStatement
    )

# !/[\s\};]/ Ensures that we won't try to match further if any of those symbols are at the beginning of the substring we're matching.
Statement: !/[\s\};]/ ( skip:BlockStatements | skip:CommandStatements | skip:Expression )
Block: "{" __ ( skip:Program )? __ "}"

# Optional whitespace.
__: / \s* / COMMENT? / \s* /

# Mandatory whitespace.
_: / \s+ / COMMENT? / \s* /

COMMA: __ "," __
COMMENT: ( / \s* \/\/ [^\n]* / )*

# Matches newline, takes care of optional // comments at EOL.
ENDL: / \s* (?:\/\/[^\n]*)?\n /

# Statement separator: Newline or semicolon.
SEP: ( ";" | ENDL ) / $ /?

Program: __ ( __ stmts:Statement SEP )* __? / $ /?

*/

}