src/Parser/Grammar/Primi.peg
<?php
declare(strict_types=1);
namespace Smuuf\Primi\Parser\Compiled;
use \Smuuf\Primi\Parser\GrammarHelpers;
use \Smuuf\Primi\Handlers\Kinds\StarredExpression;
use \hafriedlander\Peg\Parser;
class PrimiParser extends Parser\Packrat {
private const RESERVED_WORDS = [
'false', 'true', 'null', 'if', 'else', 'return', 'for', 'and', 'or',
'function', 'break', 'continue', 'while', 'try', 'catch', 'not', 'in',
'import'
];
/**
* Prevent parsing variable name which has the same name as some reserved
* word. Setting result to false will tell the parser that is should try
* other parser rules.
*/
protected function Mutable__finalise(&$result) {
if (GrammarHelpers::isReservedWord($result['text'])) {
$result = \false;
}
}
/**
* Prevent parsing variable name which has the same name as some reserved
* word. Setting result to false will tell the parser that is should try
* other parser rules.
*/
protected function Variable__finalise(&$result) {
if (GrammarHelpers::isReservedWord($result['text'])) {
$result = \false;
}
}
/**
* Add number of stars present at a starred expression.
*/
protected function StarredExpression__finalise(&$result) {
$starCount = \strlen($result['stars']['text'] ?? '');
switch ($starCount) {
case 1:
$stars = StarredExpression::STARS_ONE;
break;
case 2:
$stars = StarredExpression::STARS_TWO;
break;
default:
$stars = StarredExpression::STARS_NONE;
}
$result['stars'] = $stars;
}
/**
* Add number of stars present at a starred expression.
*/
protected function MaybeStarredVariableName__finalise(&$result) {
$starCount = \strlen($result['stars']['text'] ?? '');
switch ($starCount) {
case 1:
$stars = StarredExpression::STARS_ONE;
break;
case 2:
$stars = StarredExpression::STARS_TWO;
break;
default:
$stars = StarredExpression::STARS_NONE;
}
$result['stars'] = $stars;
}
/*!* Primi
#
# Strings.
#
# String content is either:
# a) backslash-followed-by-whatever,
# b) whatever-but-not-the-used-opening-quote-char
StringLiteral: quote:/['"]/ core:StringInside "$quote"
StringInside: / ( \\. | [^{$quote}\\] )*+ /
#
# F-Strings
#
FStringExpr: core:CondExpr
# Either: "backslash followed by whatever" or "{{" or "}}", or "whatever unless { or } or ending quotes"
FStringTxt: / ( \\. | \{\{ | \}\} | [^\{\}{$quote}] )*+ /
FStringInside: parts:FStringTxt ( "{" parts:FStringExpr "}" parts:FStringTxt )*
FStringLiteral: "f" quote:/['"]/ core:FStringInside "$quote"
#
# Others.
#
NumberLiteral: / -?\d[\d_]*(\.[\d_]+)? /
# Literals for true|false must NOT be followed by any possible characters representing variable name.
BoolLiteral: / \b(true|false)\b /
# Literal for null must NOT be followed by any possible characters representing variable name.
NullLiteral: "null" !VariableName
RegexLiteral: "rx" quote:/['"]/ core:StringInside "$quote"
Literal: ( skip:StringLiteral ) | skip:NumberLiteral | skip:BoolLiteral | ( &"n" skip:NullLiteral ) | ( &"rx" skip:RegexLiteral ) | ( &"f" skip:FStringLiteral )
VariableName: / (?:[a-zA-Z_][a-zA-Z0-9_]*) /
Variable: core:VariableName
AnonymousFunction:
"function" __ "(" __ params:ParameterList? __ ")" __ body:Block | "(" __ params:ParameterList? __ ")" __ "=>" __ body:Block
DictItem: __ key:Expression __ ":" __ value:Expression __
DictDefinition:
"{" __ ( items:DictItem ( COMMA items:DictItem )* )? __ COMMA? "}"
ListDefinition:
"[" __ ( items:Expression ( COMMA items:Expression )* )? __ COMMA? "]"
# Tuple literals: "()" or "(1,)" or "(1, ...)" or "(1, ..., x,)"
TupleDefinition:
( "(" __ ")" ) | ( "(" __ items:Expression COMMA ")" ) | ( "(" __ ( items:Expression ( COMMA items:Expression )* )* __ COMMA? ")" )
AbstractLiteral: skip:Literal | &/ [\[\{\(] / ( skip:ListDefinition | skip:DictDefinition | skip:TupleDefinition )
AbstractValue: skip:AbstractLiteral | skip:Variable
AddOperator: "+" | "-"
MultiplyOperator: "*" | "/"
PowerOperator: "**"
AssignmentOperator: "="
# These comparison operators don't have to be surrounded by whitespace: Eg. '1+inside' is valid
ComparisonOperator: "==" | "!=" | ">=" | "<=" | ">" | "<"
# These comparison operators must be surrounded by whitespace: Eg. '1ininside' is not valid.
ComparisonOperatorWithWhitespace: "in" | "not in"
AndOperator: "and"
OrOperator: "or"
NegationOperator: "!"
Expression: skip:AnonymousFunction | skip:Assignment | skip:CondExpr
Assignment: left:Mutable __ AssignmentOperator __ right:Expression
#
# Accessing variables and their items or attributes - for writing.
#
Mutable: skip:VariableVector | skip:VariableName !( __ ',') | skip:Targets
VariableVector: core:Variable ( ( vector:VectorAttr | vector:VectorItem )+ vector:VectorItemNoIndex? | vector:VectorItemNoIndex )
VectorItem: "[" __ index:Expression __ "]"
VectorItemNoIndex: "[" __ "]"
VectorAttr: "." attr:VariableName
#
# Accessing variables and their items or attributes - for reading.
#
Chain: &/[\[\(\.]/ ( core: AttrAccess | core:Dereference | core:Invocation ) chain:Chain?
Dereference: "[" __ key:Expression __ "]"
Invocation: "(" __ args:ArgumentList? __ ")"
AttrAccess: "." attr:VariableName
# The order of how these rules are nested is very important, as it defines
# their priority. We also need that they all are left-associative.
CondExpr:
true:LogicalOr ( _ "if" _ "(" __ cond:Expression __ ")" _ "else" _ false:LogicalOr )?
LogicalOr: operands:LogicalAnd ( _ ops:OrOperator _ operands:LogicalAnd )*
LogicalAnd: operands:Comparison ( _ ops:AndOperator _ operands:Comparison )*
Comparison: operands:Addition ( ( __ ops:ComparisonOperator __ | _ ops:ComparisonOperatorWithWhitespace _ ) operands:Addition )*
Addition: operands:Multiplication ( __ ops:AddOperator __ operands:Multiplication )*
Multiplication: operands:Exponentiation ( __ ops:MultiplyOperator __ operands:Exponentiation )*
Exponentiation: operand:Negation ( __ PowerOperator __ factor:Exponentiation )?
Negation: ( nots:NegationOperator )* core:Operand
Operand: ( ( "(" __ core:Expression __ ")" | core:AbstractValue ) chain:Chain? ) | skip:AbstractValue
# Allow argument lists
# 1. Without named argument
# 2. With named arguments, which must be placed after any non-named arguments (this one is handled in ArgumentList node postprocessing).
StarredExpression: stars:"*"{1,2} expr:Expression
Argument: ( (argKey:VariableName __ ":" )? __ argVal:Expression ) | ( argVal:StarredExpression )
ArgumentList: args:Argument ( COMMA args:Argument )*
# Starred parameters: Either one or two asterisks.
MaybeStarredVariableName: stars:"*"{0,2} param:VariableName ( __ "=" __ default:AbstractValue )?
ParameterList: skip:MaybeStarredVariableName ( COMMA skip:MaybeStarredVariableName )*
FunctionDefinition: "function" [ function:VariableName __ "(" __ params:ParameterList? __ ")" __ body:Block
ClassDefinition: "class" _ cls:VariableName ( "(" parent:VariableName ")" )? __ def:Block
# Starred target
Targets: t:VariableName ( COMMA t:VariableName )*
# Import statement:
# 'from a.b.c import d' (import specific symbol from module)
# 'import a.b.c' (import module as a whole)
ImportStatement:
"import" _ ( module: ( "."* VariableName ("." VariableName)* ) ) > ( ":" > symbols: VariableName ( > "," > symbols: VariableName )* )?
IfStatement:
"if" __ "(" __ cond:Expression __ ")" __ block:Block
( __ "elif" __ "(" __ elifCond:Expression __ ")" __ elifBlock:Block )*
( __ "else" __ elseBlock:Block )?
ForStatement: "for" __ "(" __ targets:Targets __ "in" __ left:Expression __ ")" __ ( right:Block )
WhileStatement: "while" __ "(" __ left:Expression __ ")" __ ( right:Block )
TryStatement: "try" __ main:Block __ "catch" __ onerror:Block
# &/[rbc]/ - Don't even try any of the subrules if this doesn't start with expected letters.
CommandStatements: &/[rbc]/ ( skip:ReturnStatement | skip:BreakStatement | skip:ContinueStatement )
# &SEP in the second branch matches a 'return' followed by separator, without consuming the
# separator - which is needed for the Program rule to finish parsing properly.
ReturnStatement: ( "return" [ ( subject:Expression )? ) | ( "return" &SEP )
BreakStatement: "break"
ContinueStatement: "continue"
# &/[iwft]/ - No sub-rules will be even tried if the string does not start with a expected letter.
BlockStatements:
&/[iwftc]/ (
skip:IfStatement
| skip:WhileStatement
| skip:ForStatement
| skip:TryStatement
| skip:FunctionDefinition
| skip:ClassDefinition
| skip:ImportStatement
)
# !/[\s\};]/ Ensures that we won't try to match further if any of those symbols are at the beginning of the substring we're matching.
Statement: !/[\s\};]/ ( skip:BlockStatements | skip:CommandStatements | skip:Expression )
Block: "{" __ ( skip:Program )? __ "}"
# Optional whitespace.
__: / \s* / COMMENT? / \s* /
# Mandatory whitespace.
_: / \s+ / COMMENT? / \s* /
COMMA: __ "," __
COMMENT: ( / \s* \/\/ [^\n]* / )*
# Matches newline, takes care of optional // comments at EOL.
ENDL: / \s* (?:\/\/[^\n]*)?\n /
# Statement separator: Newline or semicolon.
SEP: ( ";" | ENDL ) / $ /?
Program: __ ( __ stmts:Statement SEP )* __? / $ /?
*/
}