compiler/src/main/scala/amyc/parsing/Parser.scala
package amyc
package parsing
import scala.language.implicitConversions
import amyc.ast.NominalTreeModule.*
import amyc.utils.*
import Tokens.*
import TokenKinds.*
import amyc.ast.NominalTreeModule
import amyc.{core, parsing}
import amyc.parsing.Parser.literal
import amyc.parsing.keywords.Keyword
import scallion.{~, *}
import scala.annotation.targetName
// The parser for Amy
object Parser extends Pipeline[Iterator[Token], Program] with Parsers:
type Token = amyc.parsing.Token
type Kind = amyc.parsing.TokenKind
import Implicits._
import keywords.*
override def getKind(token: Token): TokenKind = TokenKind.of(token)
// ==============================================================================================
// ================================== HELPER FUNCTIONS ==========================================
// ==============================================================================================
inline def inBrace[A](inline syntax: Syntax[A]): Syntax[A] =
"{" ~>~ syntax ~<~ "}"
inline def inParenthesis[A](inline syntax: Syntax[A]): Syntax[A] =
"(" ~>~ syntax ~<~ ")"
implicit inline def kw(inline k: Keyword): Syntax[Token] = elem(KeywordKind(k.toString))
implicit inline def delimiter(inline string: String): Syntax[Token] = elem(DelimiterKind(string))
// ==============================================================================================
// ========================================== EOF ===============================================
// ==============================================================================================
lazy val eof: Syntax[Token] = elem(EOFKind)
// ==============================================================================================
// ====================================== OPERATORS =============================================
// ==============================================================================================
lazy val l0_op: Syntax[String] = accept(IdentifierKind("^[^*/%+\\-:<>=!&|^a-zA-Z$_].*$".r)) {
case IdentifierToken(value) => value
}
lazy val l1_op : Syntax[String] = accept(IdentifierKind("^[*/%].*$".r)){
case IdentifierToken(value) => value
}
lazy val l2_op : Syntax[String] = accept(IdentifierKind("^[+-].*$".r)) {
case IdentifierToken(value) => value
}
lazy val l3_op: Syntax[String] = accept(IdentifierKind("^:.*$".r)) {
case IdentifierToken(value) => value
}
lazy val l4_op: Syntax[String] = accept(IdentifierKind("^[<>].*$".r)) {
case IdentifierToken(value) => value
}
lazy val l5_op: Syntax[String] = accept(IdentifierKind("^[=!].*$".r)) {
case IdentifierToken(value) => value
}
lazy val l6_op: Syntax[String] = accept(IdentifierKind("^&.*$".r)) {
case IdentifierToken(value) => value
}
lazy val l7_op: Syntax[String] = accept(IdentifierKind("^\\|.*$".r)) {
case IdentifierToken(value) => value
}
lazy val l8_op: Syntax[String] = accept(IdentifierKind("^\\^.*$".r)) {
case IdentifierToken(value) => value
}
lazy val l9_op: Syntax[String] = accept(IdentifierKind("^[a-zA-Z$_].*$".r)) {
case IdentifierToken(value) => value
}
@targetName("plus") lazy val plus: Syntax[String] = elem(OperatorKind("+")) map (_ => "+")
@targetName("minus") lazy val minus: Syntax[String] = elem(OperatorKind("-")) map (_ => "-")
@targetName("not") lazy val not: Syntax[String] = elem(OperatorKind("!")) map (_ => "!")
// ===============================================================================================
// ======================================= MODIFIERS =============================================
// ===============================================================================================
lazy val modifier: Syntax[String] = accept(ModifierKind){
case ModifierToken(value) => value
}
// ==============================================================================================
// ================================== TOP LEVEL - PROGRAM =======================================
// ==============================================================================================
// An entire program (the starting rule for any Amy file).
/**
*/
lazy val program: Syntax[Program] =
many1(many1(module_def) ~<~ eof) map { ms =>
Program(ms.flatten.toList).setPos(ms.head.head)
}
// ==============================================================================================
// ====================================== AMY MODULE ============================================
// ==============================================================================================
// A module (i.e., a collection of definitions and an initializer expression)
lazy val module_def: Syntax[ModuleDef] =
(`module` ~ identifier ~
many(definition) ~
opt(expr) ~
`end` ~ identifier) map { case obj ~ id ~ defs ~ body ~ _ ~ id1 =>
if id == id1 then ModuleDef(id, defs.toList, body).setPos(obj)
else
throw AmycFatalError(s"Begin and end module names do not match: $id and $id1")
}
// ==============================================================================================
// ==================================== DEFINITIONS =============================================
// ==============================================================================================
/**
*/
lazy val definition: Syntax[ClassOrFunDef] =
funDef.up[ClassOrFunDef] | abstractClassDef.up[ClassOrFunDef] | caseClassDef
.up[ClassOrFunDef]
/**
*/
lazy val abstractClassDef: Syntax[AbstractClassDef] =
(`abstract` ~ `class` ~ identifier) map { case _ ~ _ ~ className =>
AbstractClassDef(className)
}
/**
*/
lazy val caseClassDef: Syntax[CaseClassDef] =
(`case` ~ `class` ~ identifier ~ inParenthesis(
parameters
) ~ ":" ~ identifier) map {
case _ ~ _ ~ className ~ params ~ _ ~ superClassName =>
CaseClassDef(className, params, superClassName)
}
/**
*/
lazy val funDef: Syntax[FunDef] =
(many(modifier) ~<~ `fn` ~ (identifier | plus | minus | not) ~ inParenthesis(parameters) ~<~ ":" ~ typeTree ~ opt("=" ~>~ inBrace(expr))) map {
case mods ~ funcName ~ params ~ tpe ~ Some(expr) =>
if mods.toList.contains("native") then
throw AmycFatalError(s"native function cannot have a body")
FunDef(funcName, params, tpe, expr).mods(mods.toList)
case mods ~ funcName ~ params ~ tpe ~ None =>
if ! mods.toList.contains("native") then
throw AmycFatalError(s"non-native function $funcName must have a body")
FunDef(funcName, params, tpe, EmptyExpr()).mods(mods.toList)
}
// ==============================================================================================
// ===================================== IDENTIFIERS ============================================
// ==============================================================================================
// An identifier.
val identifier: Syntax[String] = accept(IdentifierKind(".*".r)) {
case IdentifierToken(name) => name
}
// A QualifiedName (identifier.identifier)
def qualifiedName: Syntax[QualifiedName | Name | FunRef] =
(identifier ~ opt(("." | "::") ~ identifier)) map {
case id ~ Some(DelimiterToken(".") ~ id2) => QualifiedName(Some(id), id2)
case id ~ Some(DelimiterToken("::") ~ id2) =>
FunRef(QualifiedName(Some(id), id2))
case id ~ None => id
}
// ==============================================================================================
// ======================================== TYPES ===============================================
// ==============================================================================================
// A type expression.
lazy val typeTree: Syntax[TypeTree] = recursive {
identifierType | functionType
}
// A user-defined type (such as `List`).
lazy val identifierType: Syntax[TypeTree] =
qualifiedName map {
case qn: QualifiedName => ClassTypeTree(qn)
case id: Name => ClassTypeTree(QualifiedName(None, id))
}
lazy val functionType: Syntax[TypeTree] =
("(" ~>~ repsep(typeTree, ",") ~<~ ")" ~<~ "=>" ~ typeTree) map {
case params ~ rte => FunctionTypeTree(params.toList, rte)
}
// ==============================================================================================
// ===================================== PARAMETERS =============================================
// ==============================================================================================
// A list of parameter definitions.
lazy val parameters: Syntax[List[ParamDef]] =
repsep(parameterDef, ",").map(_.toList)
// A parameter definition, i.e., an identifier along with the expected type.
lazy val parameterDef: Syntax[ParamDef] =
(identifier ~<~ ":" ~ typeTree) map { case name ~ tpe =>
ParamDef(name, tpe)
}
lazy val args: Syntax[Seq[Expr]] =
repsep(expr, ",")
// ==============================================================================================
// =================================== PATTERN MATCHING =========================================
// ==============================================================================================
lazy val matchCase: Syntax[MatchCase] =
(`case` ~>~ pattern ~<~ "=>" ~ expr) map { case pattern ~ expr =>
MatchCase(pattern, expr)
}
// ==============================================================================================
// ======================================== PATTERNS ============================================
// ==============================================================================================
lazy val patterns: Syntax[Seq[Pattern]] = recursive {
repsep(pattern, ",")
}
// A pattern as part of a mach case.
lazy val pattern: Syntax[Pattern] =
idOrCaseClassPattern.up[Pattern] | literalPattern.up[Pattern] | wildPattern
.up[Pattern]
// TODO HR : Restrict return type
lazy val literalPattern: Syntax[Pattern] =
(literal map (LiteralPattern(_))) | unitPattern
// TODO HR : Restrict return type
lazy val unitPattern: Syntax[Pattern] =
("(" ~ ")") map (_ => LiteralPattern(new UnitLiteral))
lazy val wildPattern: Syntax[WildcardPattern] =
wildcard map (_ => WildcardPattern())
lazy val idOrCaseClassPattern: Syntax[IdPattern | CaseClassPattern] =
(qualifiedName ~ opt(inParenthesis(patterns))) map {
// Only an identifier
case (id: Name) ~ None => IdPattern(id)
// Case class wih simple name or qualified name
case (id: Name) ~ Some(patterns) =>
CaseClassPattern(QualifiedName(None, id), patterns.toList)
case (qn: QualifiedName) ~ Some(patterns) =>
CaseClassPattern(qn, patterns.toList)
case QualifiedName(Some(id), id2) ~ None =>
throw AmycFatalError(
s"Cannot have qualified name $id.$id2 without parameters"
)
}
// ============================================================================================
// ====================================== EXPRESSIONS =========================================
// ============================================================================================
// Entry-point to an expression
lazy val expr: Syntax[Expr] = recursive {
sequenceExpression | valDefinitionExpression
}
// ------------------------- First level expressions ----------------------------
// Val definitions
lazy val valDefinitionExpression: Syntax[Expr] =
(`val` ~>~ parameterDef ~<~ "=" ~ simpleExpression ~<~ ";" ~ expr) map {
case param ~ assign ~ rhs => Let(param, assign, rhs)
}
// Sequence definition
lazy val sequenceExpression: Syntax[Expr] =
(simpleExpression ~ opt(";" ~>~ expr)) map {
case expr ~ None => expr
case lhs ~ Some(rhs) => Sequence(lhs, rhs)
}
// ----------------------- Second level expressions ------------------------------
lazy val ifExpression: Syntax[Expr] =
(`if` ~>~ inParenthesis(expr) ~ inBrace(expr) ~<~ `else` ~ inBrace(
expr
)) map { case cond ~ trueBranch ~ falseBranch =>
Ite(cond, trueBranch, falseBranch)
}
lazy val simpleExpression: Syntax[Expr] = recursive {
(ifOrBinary ~ many(`match` ~>~ inBrace(many1(matchCase)))) map {
case expr ~ Seq() => expr
case scrut ~ s =>
s.foldLeft(scrut)((a, b) => Match(a, b.toList))
}
}
lazy val ifOrBinary: Syntax[Expr] = ifExpression | binaryExpression
lazy val binaryExpression: Syntax[Expr] =
operators(termExpression)(
// Defines the different operators, by decreasing priority.
l0_op is LeftAssociative,
l1_op is LeftAssociative,
l2_op | plus | minus is LeftAssociative,
l3_op is LeftAssociative,
l4_op is LeftAssociative,
l5_op is LeftAssociative,
l6_op is LeftAssociative,
l7_op is LeftAssociative,
l8_op is LeftAssociative,
l9_op is LeftAssociative,
) {
case (lhs, op, rhs) => InfixCall(lhs, op, rhs)
}
// ------------------------- Third Level expressions ------------------------------
lazy val termExpression: Syntax[Expr] = recursive {
(opt(minus | not) ~ factor) map {
case None ~ expr => expr
case Some("-") ~ expr => Neg(expr)
case Some("!") ~ expr => Not(expr)
case Some(op) ~ _ =>
throw AmycFatalError(s"Unary operator '$op' is not defined !")
}
}
// --------------------- Fourth Level expressions --------------------------
lazy val factor: Syntax[Expr] =
errorExpression | literal.up[Expr] | variableOrCall | valExpressionOrUnit
lazy val errorExpression: Syntax[Expr] =
(`error` ~>~ termExpression) map { x => Error(x) }
lazy val variableOrCall: Syntax[Expr] =
(qualifiedName ~ opt(inParenthesis(args))) map {
case (qn: QualifiedName) ~ Some(args) =>
Call(qn, args.toList)
case (id: Name) ~ Some(args) =>
Call(QualifiedName(None, id), args.toList)
case (id: Name) ~ None => Variable(id)
case (fr: FunRef) ~ None => fr
case (_: FunRef) ~ Some(_) =>
throw AmycFatalError(s"Cannot reference to a function with parameters")
case QualifiedName(Some(id), id2) ~ None =>
throw AmycFatalError(s"Call to $id.$id2 is missing the parameters")
}
lazy val valExpressionOrUnit: Syntax[Expr] =
inParenthesis(opt(expr)) map {
case Some(expr) => expr
case None => new UnitLiteral
}
// ==============================================================================================
// ===================================== LITERALS ===============================================
// ==============================================================================================
// A literal expression.
lazy val literal: Syntax[Literal[Boolean | Int | String]] =
accept(LiteralKind) {
case BoolLitToken(value) => BooleanLiteral(value)
case IntLitToken(value) => IntLiteral(value)
case StringLitToken(value) => StringLiteral(value)
case tk =>
throw AmycFatalError(
s"""Token of type 'LiteralKind' not defined in the Parser.
|Token is : $tk
|Need to add it a new match case in syntax 'literal'.""".stripMargin
)
}
override val name = "Parser"
override def run(tokens: Iterator[Token])(using core.Context): Program =
val parser = Parser(program)
parser(tokens) match
case Parsed(result, _) => result
case UnexpectedEnd(_) => reporter.fatal("Unexpected end of input.")
case UnexpectedToken(token, rest) =>
reporter.fatal {
s"Unexpected token: $token, possible kinds: ${rest.first.map(_.toString).mkString(", ")}"
}