lib/volt/server/html_parser/sandlebars_parser.rb

Summary

Maintainability
C
1 day
Test Coverage
require 'strscan'

module Volt
  class HTMLParseError < RuntimeError
  end

  # Parses html and bindings
  # based on http://ejohn.org/files/htmlparser.js
  #
  # takes the html and a handler object that will have the following methods
  # called as each is seen: comment, text, binding, start_tag, end_tag
  #
  # This is not a full html parser, but should cover most common cases.
  class SandlebarsParser
    def self.truth_hash(array)
      hash = {}
      array.each { |v| hash[v] = true }

      hash
    end

    # regex matchers
    START_TAG  = /^<([-!\:A-Za-z0-9_]+)((?:\s+[\w\-]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)>/
    END_TAG    = /^<\/([-!\:A-Za-z0-9_]+)[^>]*>/
    ATTRIBUTES = /([-\:A-Za-z0-9_]+)(?:\s*=\s*(?:(?:"((?:\\.|[^"])*)")|(?:'((?:\\.|[^'])*)')|([^>\s]+)))?/

    # Types of elements
    BLOCK      = truth_hash(%w(a address applet blockquote button center dd del dir div dl dt fieldset form frameset hr iframe ins isindex li map menu noframes noscript object ol p pre script table tbody td tfoot th thead tr ul))
    EMPTY      = truth_hash(%w(area base basefont br col frame hr img input isindex link meta param embed))
    INLINE     = truth_hash(%w(abbr acronym applet b basefont bdo big br button cite code del dfn em font i iframe img input ins kbd label map object q s samp script select small span strike strong sub sup textarea tt u var))
    CLOSE_SELF = truth_hash(%w(colgroup dd dt li options p td tfoot th thead tr))
    SPECIAL    = truth_hash(%w(script style))

    FILL_IN_ATTRIBUTES = truth_hash(%w(checked compact declare defer disabled ismap multiple nohref noresize noshade nowrap readonly selected))

    def initialize(html, handler, file_path = nil)
      @html      = StringScanner.new(html)
      @handler   = handler
      @file_path = file_path

      @stack = []

      parse
    end

    def last
      @stack.last
    end

    def parse
      loop do
        if last && SPECIAL[last]
          # In a script or style tag, just look for the first end
          close_tag = "</#{last}>"
          body      = @html.scan_until(/#{close_tag}/)
          special_tag(close_tag, body)
        elsif @html.scan(/\<\!--/)
          # start comment
          comment = @html.scan_until(/--\>/)
          comment = comment[0..-4]

          @handler.comment(comment) if @handler.respond_to?(:comment)
        elsif (tag = @html.scan(START_TAG))
          tag_name = @html[1]
          rest     = @html[2]
          unary    = @html[3]

          start_tag(tag, tag_name, rest, unary)
        elsif @html.scan(END_TAG)
          tag_name = @html[1]

          end_tag(tag_name, tag_name)
        elsif (escaped = @html.scan(/\{\{\{(.*?)\}\}\}([^\}]|$)/))
          # Anything between {{{ and }}} is escaped and not processed (treaded as text)
          if escaped[-1] != '}'
            # Move back if we matched a new non } for close, skip if we hit the end
            @html.pos = @html.pos - 1
          end

          text(@html[1])
        elsif (binding = @html.scan(/\{\{/))
          # We are in text mode and matched the start of a binding
          start_binding
        elsif (text = @html.scan(/\{/))
          # A single { outside of a binding
          text(text)
        elsif (text = @html.scan(/(?:[^\<\{]+)/))
          # matched text up until the next html tag
          text(text)
        else
          # Nothing left
          break
        end
      end

      end_tag(nil, nil)
    end

    def text(text)
      @handler.text(text) if @handler.respond_to?(:text)
    end

    # Findings the end of a binding
    def start_binding
      binding    = ''
      open_count = 1

      # scan until we reach a {{ or }}
      loop do
        binding << @html.scan_until(/(\{\{|\}\}|\n|\Z)/)

        match = @html[1]
        if match == '}}'
          # close
          open_count -= 1
          break if open_count == 0
        elsif match == '{{'
          # open more
          open_count += 1
        elsif match == "\n" || @html.eos?
          # Starting new tag, should be closed before this
          # or end of doc before closed binding
          raise_parse_error("unclosed binding: {#{binding.strip}")
        else
          #:nocov:
          fail 'should not reach here'
          #:nocov:
        end
      end

      binding = binding[0..-3]
      @handler.binding(binding) if @handler.respond_to?(:binding)
    end

    def raise_parse_error(error)
      line_number = @html.pre_match.count("\n") + 1

      error_str = error + " on line: #{line_number}"
      error_str += " of #{@file_path}" if @file_path

      fail HTMLParseError, error_str
    end

    def start_tag(tag, tag_name, rest, unary)
      section_tag = tag_name[0] == ':' && tag_name[1] =~ /[A-Z]/

      if section_tag
        tag_name = tag_name.underscore
      else
        tag_name = tag_name.downcase
      end

      # handle doctype so we get it output exactly the same way
      if tag_name == '!doctype'
        @handler.text(tag) if @handler.respond_to?(:start_tag)
        return
      end

      # Some tags close themselves when a new one of themselves is reached.
      # ex, a tr will close the previous tr
      end_tag(nil, tag_name) if CLOSE_SELF[tag_name] && last == tag_name

      unary = EMPTY[tag_name] || !unary.blank?

      # Section tag's are also unary
      @stack.push(tag_name) unless unary || section_tag

      if @handler.respond_to?(:start_tag)
        attributes = {}

        # Take the rest string and extract the attributes, filling in any
        # "fill in" attribute values if not provided.
        rest.scan(ATTRIBUTES).each do |match|
          name = match[0]

          value = match[1] || match[2] || match[3] || FILL_IN_ATTRIBUTES[name] || ''

          attributes[name] = value
        end

        if section_tag
          @handler.start_section(tag_name, attributes, unary)
        else
          @handler.start_tag(tag_name, attributes, unary)
        end
      end
    end

    def end_tag(tag, tag_name)
      # If no tag name is provided, close all the way up
      new_size = 0

      if tag
        # Find the closest tag that closes.
        (@stack.size - 1).downto(0) do |index|
          if @stack[index] == tag_name
            new_size = index
            break
          end
        end
      end

      if new_size >= 0
        if @handler.respond_to?(:end_tag)
          (@stack.size - 1).downto(new_size) do |index|
            @handler.end_tag(@stack[index])
          end
        end

        @stack = @stack[0...new_size]
      end
    end

    def special_tag(close_tag, body)
      body = body[0..((-1 * close_tag.size) - 1)]

      body = body.gsub(/\<\!--(.*?)--\>/, '\\1').gsub(/\<\!\[CDATA\[(.*?)\]\]\>/, '\\1')

      text(body)

      end_tag(last, last)
    end
  end
end