project-eutopia/keisan

View on GitHub
lib/keisan/parser.rb

Summary

Maintainability
B
5 hrs
Test Coverage
module Keisan
  class Parser
    KEYWORDS = %w(let puts break continue).freeze

    attr_reader :tokens, :components

    def initialize(string: nil, tokens: nil)
      if string.nil? && tokens.nil?
        raise Exceptions::InternalError.new("Invalid arguments")
      end

      if !string.nil?
        @tokens = Tokenizer.new(string).tokens
      else
        raise Exceptions::InternalError.new("Invalid argument: tokens = #{tokens}") if tokens.nil? || !tokens.is_a?(Array)
        @tokens = tokens
      end

      @components = []

      if multi_line?
        parse_multi_line!
      elsif @tokens.first&.is_a?(Tokens::Word) && KEYWORDS.include?(@tokens.first.string)
        parse_keyword!
      else
        parse_components!
        remove_unary_identity!
      end
    end

    def ast
      @ast ||= AST::Builder.new(parser: self).ast
    end

    private

    def multi_line?
      @tokens.any? {|token| token.is_a?(Tokens::LineSeparator)}
    end

    def parse_multi_line!
      line_parsers = Util.array_split(@tokens) {|token| token.is_a?(Tokens::LineSeparator)}.map {|tokens| self.class.new(tokens: tokens)}
      @components = []
      line_parsers.each.with_index do |line_parser, i|
        @components += line_parser.components
        if i < line_parsers.count - 1
          @components << Parsing::LineSeparator.new
        end
      end
    end

    def parse_keyword!
      keyword = tokens.first.string
      arguments = if tokens[1].is_a?(Tokens::Group)
                    Util.array_split(tokens[1].sub_tokens) {|token| token.is_a?(Tokens::Comma)}.map {|argument_tokens|
                      Parsing::Argument.new(argument_tokens)
                    }
                  else
                    Parsing::Argument.new(tokens[1..-1])
                  end
      @components = [
        Parsing::Function.new(keyword, arguments)
      ]
    end

    def parse_components!
      @unparsed_tokens = tokens.dup

      # Components will store the elements (numbers, variables, bracket components, function calls)
      # and the operators in between
      while @unparsed_tokens.count > 0
        token = @unparsed_tokens.shift
        add_token_to_components!(token)
      end
    end

    def remove_unary_identity!
      @components = @components.select do |component|
        !component.is_a?(Parsing::Operator) || !(component.node_class <= AST::UnaryIdentity)
      end
    end

    def is_start_of_line?
      @components.empty? || @components.last.is_a?(Parsing::LineSeparator)
    end

    # Elements are groups of tokens separated by (non-unary) operators
    # The following are basic elements:
    # number
    # variable
    # function (word + round group)
    # list (square group)
    #
    # Additionally these can be modified by having any number of unary operators in front,
    # and any number of indexing groups (square groups) at the back
    #
    def add_token_to_components!(token)
      if token.is_a?(Tokens::LineSeparator)
        @components << Parsing::LineSeparator.new
      elsif is_start_of_line? || @components[-1].is_a?(Parsing::Operator)
        # Expect an element or a unary operator
        if token.type == :operator
          # Here it must be a unary operator
          add_unary_operator_to_components!(token)
        else
          # Here it must be an element
          add_element_to_components!(token)
        end

      elsif @components[-1].is_a?(Parsing::Element)
        # A word followed by a "round group" is actually a function: e.g. sin(x)
        if @components[-1].is_a?(Parsing::Variable) && token.type == :group && token.group_type == :round
          add_function_to_components!(token)
        # Here it is a postfix Indexing (access elements by index)
        elsif token.type == :group && token.group_type == :square
          add_indexing_to_components!(token)
        elsif token.type == :dot
          @components << Parsing::Dot.new
        elsif token.type == :operator
          add_operator_to_components!(token)
        else
          # Concatenation is multiplication
          @components << Parsing::Times.new
          add_token_to_components!(token)
        end

      elsif @components[-1].is_a?(Parsing::Dot)
        # Expect a word
        case token.type
        when :word
          @components[-1] = Parsing::DotWord.new(token.string)
        else
          raise Exceptions::ParseError.new("A word must follow a dot, received #{token.string}")
        end

      elsif @components[-1].is_a?(Parsing::DotWord)
        add_token_after_dot_word!(token)

      else
        raise Exceptions::ParseError.new("Token cannot be parsed, #{token.string}")
      end
    end

    def add_token_after_dot_word!(token)
      case token.type
      when :group
        case token.group_type
        when :round
          # Here it is a method call
          name = @components[-1].name
          @components[-1] = Parsing::DotOperator.new(name, arguments_from_group(token))
        when :square
          # Here we are indexing after method call
          add_indexing_to_components!(token)
        else
          raise Exceptions::ParseError.new("Cannot take curly braces after function call")
        end
      when :dot
        # Chaining method calls
        @components << Parsing::Dot.new
      when :operator
        # End of method call, move on to operator
        add_operator_to_components!(token)
      else
        raise Exceptions::ParseError.new("Expected arguments to dot operator, received #{token.string}")
      end
    end

    def add_unary_operator_to_components!(token)
      case token.operator_type
      when :+
        @components << Parsing::UnaryPlus.new
      when :-
        @components << Parsing::UnaryMinus.new
      when :"~"
        @components << Parsing::BitwiseNot.new
      when :"~~"
        @components << Parsing::BitwiseNotNot.new
      when :"!"
        @components << Parsing::LogicalNot.new
      when :"!!"
        @components << Parsing::LogicalNotNot.new
      else
        raise Exceptions::ParseError.new("Unhandled unary operator type #{token.operator_type}")
      end
    end

    def add_element_to_components!(token)
      case token
      when Tokens::Number
        @components << Parsing::Number.new(token.value)
      when Tokens::String
        @components << Parsing::String.new(token.value)
      when Tokens::Null
        @components << Parsing::Null.new
      when Tokens::Word
        @components << Parsing::Variable.new(token.string)
      when Tokens::Boolean
        @components << Parsing::Boolean.new(token.value)
      when Tokens::Group
        add_group_element_components!(token)
      else
        raise Exceptions::ParseError.new("Unhandled operator type #{token.operator_type}")
      end
    end

    def add_group_element_components!(token)
      case token.group_type
      when :round
        @components << Parsing::RoundGroup.new(token.sub_tokens)
      when :square
        @components << Parsing::List.new(arguments_from_group(token))
      when :curly
        # A hash either has a colon, or is empty
        if token.sub_tokens.any? {|token| token.is_a?(Tokens::Colon)} || token.sub_tokens.empty?
          @components << Parsing::Hash.new(Util.array_split(token.sub_tokens) {|token| token.is_a?(Tokens::Comma)})
        else
          @components << Parsing::CurlyGroup.new(token.sub_tokens)
        end
      else
        raise Exceptions::ParseError.new("Unhandled group type #{token.group_type}")
      end
    end

    def add_operator_to_components!(token)
      case token.operator_type
      # Assignment
      when :"="
        add_assignment_to_components!(token)
      else
        @components << operator_to_component(token.operator_type)
      end
    end

    OPERATOR_TO_PARSING_CLASS = {
      :+    => Parsing::Plus,
      :-    => Parsing::Minus,
      :*    => Parsing::Times,
      :/    => Parsing::Divide,
      :**   => Parsing::Exponent,
      :%    => Parsing::Modulo,
      :"&"  => Parsing::BitwiseAnd,
      :"|"  => Parsing::BitwiseOr,
      :"^"  => Parsing::BitwiseXor,
      :<<   => Parsing::BitwiseLeftShift,
      :>>   => Parsing::BitwiseRightShift,
      :"==" => Parsing::LogicalEqual,
      :"!=" => Parsing::LogicalNotEqual,
      :"&&" => Parsing::LogicalAnd,
      :"||" => Parsing::LogicalOr,
      :">"  => Parsing::LogicalGreaterThan,
      :"<"  => Parsing::LogicalLessThan,
      :">=" => Parsing::LogicalGreaterThanOrEqualTo,
      :"<=" => Parsing::LogicalLessThanOrEqualTo
    }.freeze

    def operator_to_component(operator)
      if klass = OPERATOR_TO_PARSING_CLASS[operator]
        klass.new
      else
        raise Exceptions::ParseError.new("Unhandled operator type #{operator}")
      end
    end

    def add_assignment_to_components!(token)
      if compound_operator = token.compound_operator
        @components << Parsing::CompoundAssignment.new(compound_operator)
      else
        @components << Parsing::Assignment.new
      end
    end

    def add_function_to_components!(token)
      @components[-1] = Parsing::Function.new(@components[-1].name, arguments_from_group(token))
    end

    def add_indexing_to_components!(token)
      # Have an indexing
      @components << Parsing::Indexing.new(arguments_from_group(token))
    end

    def arguments_from_group(token)
      if token.sub_tokens.empty?
        []
      else
        Util.array_split(token.sub_tokens) {|sub_token| sub_token.is_a?(Tokens::Comma)}.map do |sub_tokens|
          Parsing::Argument.new(sub_tokens)
        end
      end
    end
  end
end