smortex/yaml-sort

View on GitHub
lib/yaml/sort/parser.ra

Summary

Maintainability
Test Coverage
# frozen_string_literal: true

# vim:set syntax=racc:

class Yaml::Sort::Parser
token
  START_OF_DOCUMENT END_OF_DOCUMENT
  VALUE KEY UNINDENT ITEM ANCHOR ALIAS
prechigh
  left ITEM
preclow
rule
  document: START_OF_DOCUMENT value END_OF_DOCUMENT { result = val[1] }
          | START_OF_DOCUMENT value                 { result = val[1] }

  value: VALUE               { result = Scalar.new(val[0]) }
       | dictionary UNINDENT { result = val[0] }
       | list UNINDENT       { result = val[0] }
       | ALIAS               { result = Alias.new(@anchors, val[0]) }

  dictionary: dictionary dictionary_item { val[0].add_item(*val[1]); result = val[0] }
            | dictionary_item            { result = Dictionary.create(*val[0]) }

  dictionary_item: KEY ANCHOR value { @anchors[val[1][:value]] = val[2]; result = [Scalar.new(val[0]), Alias.new(@anchors, val[1])] }
                 | KEY value        { result = [Scalar.new(val[0]), val[1]] }

  list: list list_item { val[0].add_item(*val[1]); result = val[0] }
      | list_item      { result = List.create(*val[0]) }

  list_item: ITEM ANCHOR value { @anchors[val[1][:value]] = val[2]; result = [Item.new(val[0]),  Alias.new(@anchors, val[1])] }
           | ITEM value        { result = [Item.new(val[0]), val[1]] }
           | ITEM              { result = [Item.new(val[0]), Scalar.new({value: ''})] }
end

---- header

require 'strscan'

---- inner

def scan(text)
  text = "---\n#{text}" unless text.start_with?("---\n")

  scan_value = false

  @lines = text.lines
  s = StringScanner.new(text)
  @tokens = []
  @lineno = 1
  @fakelineno = 0
  @position = 0
  @indent_stack = []
  @anchors = {}

  until s.eos?
    if scan_value
      @position += s.matched_size if s.scan(/[[:blank:]]*/)
      case
      when s.scan(/&[[:alnum:]]+/)
        emit(:ANCHOR, s.matched[1..-1])
      when s.scan(/\*[[:alnum:]]+/)
        emit(:ALIAS, s.matched[1..-1])
      when s.scan(/"/)
        match = s.matched
        loop do
          match += s.scan_until(/"|\\/)
          if match[-1] == "\\"
            match += s.scan(/.|\n/)
          else
            break
          end
        end
        emit(:VALUE, match)
      when s.scan(/'/)
        match = s.matched
        loop do
          match += s.scan_until(/'/)
          break unless s.match?(/'/)
          match += s.scan(/'/)
        end
        emit(:VALUE, match)
      when s.match?(/\S+/)
        match = s.scan_until(/$/)

        while s.match?("\n" + last_indent_value + " ") || s.match?(/\n[[:blank:]]*(?=\n)/)
          match += s.scan(/\n[^\n]*(?=\n)/)
        end
        emit(:VALUE, match)
      end
      s.scan(/[[:blank:]]*/)
      scan_value = false
    else
      case
      when s.scan(/---/)                    then emit(:START_OF_DOCUMENT, s.matched)
      when s.scan(/\n[[:blank:]]*(?=\n)/)
        # Ignore empty lines
        @lineno += 1
        @position = 0
      when s.scan(/\n[[:blank:]]*#.*/)      then emit(:COMMENT, s.matched)
      when s.scan(/\n([[:blank:]]*-) */)    then emit(:ITEM, s.matched, indent: s.captures[0])
      when s.scan(/\n[[:blank:]]*\.\.\./)   then emit(:END_OF_DOCUMENT, s.matched)
      when s.scan(/\n?([[:blank:]]*)(.+?:)(?=[ \n])/)
        indent = s.captures[0]
        indent = last_indent_value + indent + " " unless s.matched.start_with?("\n")
        emit(:KEY, s.matched, indent: indent, value: s.captures[1])
        scan_value = true if s.rest

      when s.scan(/\n\z/)
        # Done
      when s.match?(/./)
        scan_value = true
      else
        message = if @lines[@lineno - 1][@position] == "\n"
                  <<~MESSAGE
                    #{@filename}:#{@lineno + 1}: unexpected content
                    #{@lines[@lineno].chomp}
                    ^#{"~" * (@lines[@lineno].chomp.length - 1)}
                    MESSAGE
                  else
                  <<~MESSAGE
                    #{@filename}:#{@lineno}: unexpected content
                    #{@lines[@lineno - 1].chomp}
                    #{" " * @position}^
                    MESSAGE
                  end
        raise(Racc::ParseError, message)
      end
    end
  end

  unindent

  comment = []
  @tokens.each do |token|
    if token[0] == :COMMENT
      comment << token[1][:value] + "\n"
    elsif comment.any? && token[0] != :UNINDENT
      token[1][:comment] = comment
      comment = []
    end
  end

  @tokens = @tokens.delete_if { |itm| itm[0] == :COMMENT }

  @tokens
end

def emit(token, match, length: nil, indent: nil, value: nil)
  indent.gsub!("-", " ") if indent
  if token && length.nil?
    raise "length must be explicitly passed when match is not a String (#{match.class.name})" unless match.is_a?(String)
    length = match.length
  end

  if match.start_with?("\n")
    @lineno += 1
    match = match[1..-1]
    length -= 1
    @position = 0
  end

  if indent
    unindent(indent)
    @indent_stack.push(indent) unless @indent_stack.last == indent
  end

  value ||= match

  if token == :KEY && value == '<<:'
    message = <<~MESSAGE
      #{@filename}:#{@lineno}: '<<:' references are not sortable:
      #{@lines[@lineno - 1].chomp}
      #{indent}#{" " * @position}^#{"~" * (length - indent.length - 1)}
    MESSAGE
    raise(Racc::ParseError, message)
  end

  exvalue = {
    value: value,
    lineno: @lineno,
    position: @position,
    length: length,
    indent: indent,
  }
  @tokens << [token, exvalue]

  @lineno += match.count("\n")

  @position += length
end

def last_indent_value
  @indent_stack.last || ""
end

def unindent(new_indent = nil)
  while @indent_stack.count > 0 && (new_indent.nil? || @indent_stack.last.length > new_indent.length)
    value = @indent_stack.pop
    @tokens << [:UNINDENT, { value: value, lineno: @lineno, position: 0, length: value.length, indent: nil }]
  end
end

def next_token
  @current_token = @tokens.shift
end

def parse(text, filename: nil)
  @filename = filename || "<stdin>"
  scan(text)
  do_parse
end

def on_error(error_token_id, error_value, value_stack)
  message = if @current_token
              <<~MESSAGE
                #{@filename}:#{@current_token[1][:lineno]} unexpected #{@current_token[0]}
                #{@lines[@current_token[1][:lineno] - 1].chomp}
                #{" " * @current_token[1][:position]}^#{"~" * ([@current_token[1][:length] - 1, 0].max)}
                MESSAGE
            else
              "#{@filename}:#{@lineno} unexpected end-of-file"
            end

  raise(Racc::ParseError, message)
end

def sort_anchors!
  @anchors.transform_values!(&:sort)
end