lib/rdoc/rd/block_parser.ry

Summary

Maintainability
Test Coverage
class BlockParser

  preclow
    nonassoc DUMMY
    left     ITEMLISTLINE
             ENUMLISTLINE
             DESCLISTLINE
             METHODLISTLINE
             STRINGLINE
  prechigh

  token STRINGLINE
        ITEMLISTLINE
        ENUMLISTLINE
        DESCLISTLINE
        METHODLISTLINE
        WHITELINE
        SUBTREE
        HEADLINE
        INCLUDE
        INDENT
        DEDENT
        DUMMY

  rule
    document : blocks { result = RDoc::Markup::Document.new(*val[0]) }
             |        { raise ParseError, "file empty" }
             ;

    blocks : blocks block { result = val[0].concat val[1] }
           | block        { result = val[0] }
           ;

    block : textblock { result = val }
          | verbatim  { result = val }
          | lists
          | headline  { result = val }
          | include   { result = val }
          | WHITELINE { result = [RDoc::Markup::BlankLine.new] }
          | SUBTREE   { result = val[0].parts }
          ;

    headline : HEADLINE {
      # val[0] is like [level, title]
      title = @inline_parser.parse(val[0][1])
      result = RDoc::Markup::Heading.new(val[0][0], title)
    }
    ;

    include : INCLUDE {
      result = RDoc::Markup::Include.new val[0], @include_path
    }
    ;

    textblock : textblockcontent = DUMMY {
      # val[0] is Array of String
      result = paragraph val[0]
    }
    ;

    textblockcontent : textblockcontent STRINGLINE { result << val[1].rstrip }
                     | STRINGLINE { result = [val[0].rstrip] }
                     ;

    verbatim : INDENT verbatimcontent DEDENT {
      # val[1] is Array of String
      content = cut_off val[1]
      result = RDoc::Markup::Verbatim.new(*content)

      # imform to lexer.
      @in_verbatim = false
    }
    ;

    verbatim_after_lists : verbatimcontent {
      # val[0] is Array of String
      content = cut_off val[0]
      result = RDoc::Markup::Verbatim.new(*content)

      # imform to lexer.
      @in_verbatim = false
    }
    ;

    verbatimcontent : verbatimcontent STRINGLINE {
      result << val[1]
    } | verbatimcontent INDENT verbatimcontent DEDENT {
      result.concat val[2]
    } | verbatimcontent WHITELINE {
      result << "\n"
    } | STRINGLINE {
      result = val
      # inform to lexer.
      @in_verbatim = true
    }
    ;

    list : itemlist
         | enumlist
         | desclist
         | methodlist
         ;

    lists : lists2 = DUMMY {
      result = val[0]
    } | INDENT lists2 DEDENT {
      result = val[1]
    } | INDENT lists2 verbatim_after_lists DEDENT {
      result = val[1].push(val[2])
    }
    ;

    lists2 : lists2 list { result = val[0] << val[1] }
           | list { result = [val[0]] }
           ;

    itemlist : itemlistitems  = DUMMY {
      result = RDoc::Markup::List.new :BULLET, *val[0]
    }
    ;

    itemlistitems : itemlistitems itemlistitem { result.push(val[1]) }
                  | itemlistitem { result = val }
                  ;

    itemlistitem : first_textblock_in_itemlist other_blocks_in_list DEDENT {
      result = RDoc::Markup::ListItem.new nil, val[0], *val[1]
    }
    ;

    enumlist :  enumlistitems  = DUMMY {
      result = RDoc::Markup::List.new :NUMBER, *val[0]
    }
    ;

    enumlistitems : enumlistitems enumlistitem { result.push(val[1]) }
                  | enumlistitem { result = val }
                  ;

    enumlistitem : first_textblock_in_enumlist other_blocks_in_list DEDENT {
      result = RDoc::Markup::ListItem.new nil, val[0], *val[1]
    }
    ;

    desclist : desclistitems  = DUMMY {
      result = RDoc::Markup::List.new :NOTE, *val[0]
    }
    ;

    desclistitems : desclistitems desclistitem { result.push(val[1]) }
                  | desclistitem { result = val }
                  ;

    desclistitem : DESCLISTLINE description_part DEDENT {
      term = @inline_parser.parse val[0].strip

      result = RDoc::Markup::ListItem.new term, *val[1]
    }
    ;

    methodlist : methodlistitems  = DUMMY {
      result = RDoc::Markup::List.new :LABEL, *val[0]
    }
    ;

    methodlistitems : methodlistitems methodlistitem { result.push(val[1]) }
                    | methodlistitem { result = val }
                    ;

    methodlistitem : METHODLISTLINE description_part DEDENT {
      result = RDoc::Markup::ListItem.new "<tt>#{val[0].strip}</tt>", *val[1]
    }
    ;

    description_part : whitelines textblock blocks_in_list {
      result = [val[1]].concat(val[2])
    } | whitelines textblock {
      result = [val[1]]
    } | whitelines INDENT blocks_in_list DEDENT {
      result = val[2]
    } | whitelines {
      result = []
    }
    ;

    blocks_in_list : blocks_in_list block_in_list { result.concat val[1] }
                   | block_in_list
                   ;

    block_in_list : textblock { result = val }
                  | verbatim  { result = val }
                  | lists
                  | WHITELINE { result = [] }
                  ;

    whitelines  : whitelines2
                |
                ;

    whitelines2 : WHITELINE whitelines2
                | WHITELINE
                ;

    first_textblock_in_itemlist : ITEMLISTLINE textblockcontent {
      result = paragraph [val[0]].concat(val[1])
    } | ITEMLISTLINE {
      result = paragraph [val[0]]
    }
    ;

    first_textblock_in_enumlist : ENUMLISTLINE textblockcontent {
      result = paragraph [val[0]].concat(val[1])
    } | ENUMLISTLINE {
      result = paragraph [val[0]]
    }
    ;

    other_blocks_in_list : verbatim blocks_in_list {
      result = [val[0]].concat(val[1])
    } | lists blocks_in_list     { result.concat val[1] }
      | WHITELINE blocks_in_list { result = val[1] }
      | verbatim                 { result = val }
      | lists
      | WHITELINE                { result = [] }
      |                          { result = [] }
      ;
end

---- inner

# :stopdoc:

MARK_TO_LEVEL = {
  '='    => 1,
  '=='   => 2,
  '==='  => 3,
  '====' => 4,
  '+'    => 5,
  '++'   => 6,
}

# :startdoc:

##
# Footnotes for this document

attr_reader :footnotes

##
# Labels for items in this document

attr_reader :labels

##
# Path to find included files in

attr_accessor :include_path

##
# Creates a new RDoc::RD::BlockParser.  Use #parse to parse an rd-format
# document.

def initialize
  @inline_parser = RDoc::RD::InlineParser.new self
  @include_path = []

  # for testing
  @footnotes = []
  @labels    = {}
end

##
# Parses +src+ and returns an RDoc::Markup::Document.

def parse src
  @src = src
  @src.push false

  @footnotes = []
  @labels    = {}

  # @i: index(line no.) of src
  @i = 0

  # stack for current indentation
  @indent_stack = []

  # how indented.
  @current_indent = @indent_stack.join("")

  # RDoc::RD::BlockParser for tmp src
  @subparser = nil

  # which part is in now
  @in_part = nil
  @part_content = []

  @in_verbatim = false

  @yydebug = true

  document = do_parse

  unless @footnotes.empty? then
    blankline = document.parts.pop

    document.parts << RDoc::Markup::Rule.new(1)
    document.parts.concat @footnotes

    document.parts.push blankline
  end

  document
end

##
# Returns the next token from the document

def next_token # :nodoc:
  # preprocessing
  # if it is not in RD part
  # => method
  while @in_part != "rd"
    line = @src[@i]
    @i += 1 # next line

    case line
    # src end
    when false
      return [false, false]
    # RD part begin
    when /^=begin\s*(?:\bRD\b.*)?\s*$/
      if @in_part # if in non-RD part
        @part_content.push(line)
      else
        @in_part = "rd"
        return [:WHITELINE, "=begin\n"] # <= for textblockand
      end
    # non-RD part begin
    when /^=begin\s+(\w+)/
      part = $1
=begin # not imported to RDoc
      if @in_part # if in non-RD part
        @part_content.push(line)
      else
        @in_part = part if @tree.filter[part] # if filter exists
#  p "BEGIN_PART: #{@in_part}" # DEBUG
      end
=end
      @in_part = part
    # non-RD part end
    when /^=end(?:$|[\s\0\C-d\C-z])/
      if @in_part # if in non-RD part
=begin # not imported to RDoc
#  p "END_PART: #{@in_part}" # DEBUG
        # make Part-in object
        part = RDoc::RD::Part.new(@part_content.join(""), @tree, "r")
        @part_content.clear
        # call filter, part_out is output(Part object)
        part_out = @tree.filter[@in_part].call(part)

        if @tree.filter[@in_part].mode == :rd # if output is RD formatted
          subtree = parse_subtree(part_out.to_a)
        else # if output is target formatted
          basename = Tempfile.create(["rdtmp", ".#{@in_part}"], @tree.tmp_dir) do |tmpfile|
            tmpfile.print(part_out)
            File.basename(tmpfile.path)
          end
          subtree = parse_subtree(["=begin\n", "<<< #{basename}\n", "=end\n"])
        end
        @in_part = nil
        return [:SUBTREE, subtree]
=end
      end
    else
=begin # not imported to RDoc
      if @in_part # if in non-RD part
        @part_content.push(line)
      end
=end
    end
  end

  @current_indent = @indent_stack.join("")
  line = @src[@i]
  case line
  when false
    if_current_indent_equal("") do
      [false, false]
    end
  when /^=end/
    if_current_indent_equal("") do
      @in_part = nil
      [:WHITELINE, "=end"] # MUST CHANGE??
    end
  when /^\s*$/
    @i += 1 # next line
    return [:WHITELINE, ':WHITELINE']
  when /^\#/  # comment line
    @i += 1 # next line
    self.next_token()
  when /^(={1,4})(?!=)\s*(?=\S)/, /^(\+{1,2})(?!\+)\s*(?=\S)/
    rest = $'                    # '
    rest.strip!
    mark = $1
    if_current_indent_equal("") do
      return [:HEADLINE, [MARK_TO_LEVEL[mark], rest]]
    end
  when /^<<<\s*(\S+)/
    file = $1
    if_current_indent_equal("") do
      suffix = file[-3 .. -1]
      if suffix == ".rd" or suffix == ".rb"
        subtree = parse_subtree(get_included(file))
        [:SUBTREE, subtree]
      else
        [:INCLUDE, file]
      end
    end
  when /^(\s*)\*(\s*)/
    rest = $'                   # '
    newIndent = $2
    if_current_indent_equal($1) do
      if @in_verbatim
        [:STRINGLINE, line]
      else
        @indent_stack.push("\s" + newIndent)
        [:ITEMLISTLINE, rest]
      end
    end
  when /^(\s*)(\(\d+\))(\s*)/
    rest = $'                     # '
    mark = $2
    newIndent = $3
    if_current_indent_equal($1) do
      if @in_verbatim
        [:STRINGLINE, line]
      else
        @indent_stack.push("\s" * mark.size + newIndent)
        [:ENUMLISTLINE, rest]
      end
    end
  when /^(\s*):(\s*)/
    rest = $'                    # '
    newIndent = $2
    if_current_indent_equal($1) do
      if @in_verbatim
        [:STRINGLINE, line]
      else
        @indent_stack.push("\s#{$2}")
        [:DESCLISTLINE, rest]
      end
    end
  when /^(\s*)---(?!-|\s*$)/
    indent = $1
    rest = $'
    /\s*/ === rest
    term = $'
    new_indent = $&
    if_current_indent_equal(indent) do
      if @in_verbatim
        [:STRINGLINE, line]
      else
        @indent_stack.push("\s\s\s" + new_indent)
        [:METHODLISTLINE, term]
      end
    end
  when /^(\s*)/
    if_current_indent_equal($1) do
      [:STRINGLINE, line]
    end
  else
    raise "[BUG] parsing error may occurred."
  end
end

##
# Yields to the given block if +indent+ matches the current indent, otherwise
# an indentation token is processed.

def if_current_indent_equal(indent)
  indent = indent.sub(/\t/, "\s" * 8)
  if @current_indent == indent
    @i += 1 # next line
    yield
  elsif indent.index(@current_indent) == 0
    @indent_stack.push(indent[@current_indent.size .. -1])
    [:INDENT, ":INDENT"]
  else
    @indent_stack.pop
    [:DEDENT, ":DEDENT"]
  end
end
private :if_current_indent_equal

##
# Cuts off excess whitespace in +src+

def cut_off(src)
  ret = []
  whiteline_buf = []

  line = src.shift
  /^\s*/ =~ line

  indent = Regexp.quote($&)
  ret.push($')

  while line = src.shift
    if /^(\s*)$/ =~ line
      whiteline_buf.push(line)
    elsif /^#{indent}/ =~ line
      unless whiteline_buf.empty?
        ret.concat(whiteline_buf)
        whiteline_buf.clear
      end
      ret.push($')
    else
      raise "[BUG]: probably Parser Error while cutting off.\n"
    end
  end
  ret
end
private :cut_off

def set_term_to_element(parent, term)
#  parent.set_term_under_document_struct(term, @tree.document_struct)
  parent.set_term_without_document_struct(term)
end
private :set_term_to_element

##
# Raises a ParseError when invalid formatting is found

def on_error(et, ev, _values)
  prv, cur, nxt = format_line_num(@i, @i+1, @i+2)

  raise ParseError, <<Msg

RD syntax error: line #{@i+1}:
  #{prv}  |#{@src[@i-1].chomp}
  #{cur}=>|#{@src[@i].chomp}
  #{nxt}  |#{@src[@i+1].chomp}

Msg
end

##
# Current line number

def line_index
  @i
end

##
# Parses subtree +src+

def parse_subtree src
  @subparser ||= RDoc::RD::BlockParser.new

  @subparser.parse src
end
private :parse_subtree

##
# Retrieves the content for +file+ from the include_path

def get_included(file)
  included = []

  @include_path.each do |dir|
    file_name = File.join dir, file

    if File.exist? file_name then
      included = File.readlines file_name
      break
    end
  end

  included
end
private :get_included

##
# Formats line numbers +line_numbers+ prettily

def format_line_num(*line_numbers)
  width = line_numbers.collect{|i| i.to_s.length }.max
  line_numbers.collect{|i| sprintf("%#{width}d", i) }
end
private :format_line_num

##
# Retrieves the content of +values+ as a single String

def content values
 values.map { |value| value.content }.join
end

##
# Creates a paragraph for +value+

def paragraph value
  content = cut_off(value).join(' ').rstrip
  contents = @inline_parser.parse content

  RDoc::Markup::Paragraph.new(*contents)
end

##
# Adds footnote +content+ to the document

def add_footnote content
  index = @footnotes.length / 2 + 1

  footmark_link = "{^#{index}}[rdoc-label:footmark-#{index}:foottext-#{index}]"

  @footnotes << RDoc::Markup::Paragraph.new(footmark_link, ' ', *content)
  @footnotes << RDoc::Markup::BlankLine.new

  index
end

##
# Adds label +label+ to the document

def add_label label
  @labels[label] = true

  label
end

# :stopdoc:

---- header
class RDoc::RD

##
# RD format parser for headings, paragraphs, lists, verbatim sections that
# exist as blocks.

---- footer
end