under-os/under-os

View on GitHub
gems/under-os-ui/lib/under_os/parser/html.rb

Summary

Maintainability
A
3 hrs
Test Coverage
class UnderOs::Parser::HTML
  def parse(html)
    html = html.strip.gsub(/<\!--[\s\S]*?-->/, '').gsub(/>\s+/, '>').gsub(/\s+</, '<')

    [].tap do |top|
      @top   = top
      @stack = []
      @node  = nil
      i      = 0

      while i < html.size
        @chunk = html.slice(i, html.size)

        i += open_tag || close_tag || plain_text
      end

      # closing all the missing tags
      while node = @stack.shift
        node.delete(:children)
        node.delete(:text)
        @top << node if ! @top.include?(node)
      end
    end
  end

  def open_tag
    if m = @chunk.match(/\A<([a-z]+)([^>]*)>/)
      @node = {tag: m[1], attrs: parse_attrs_in(m[2])}

      if parent = @stack.last
        parent[:children] ||= []
        parent[:children] << @node
        parent.delete(:text) # it can have either text or children
      else
        @top << @node
      end

      @stack << @node

      m[0].size
    end
  end

  def close_tag
    if m = @chunk.match(/\A<\/([a-z]+)>/)
      while node = @stack.pop
        if node[:tag] != m[1]
          if @stack.size > 0
            @stack.last[:children] += node[:children] || []
            node.delete(:children)
            node.delete(:text)
          end
        else
          break
        end
      end

      @node = @stack.last

      m[0].size
    end
  end

  def plain_text
    if m = @chunk.match(/\A([^<]+)/)
      @stack.last[:text] = m[1] if @stack.last

      m[0].size
    end
  end

  def parse_attrs_in(string)
    merge_data_attrs({}.tap do |hash|
      string.scan(/([a-z][a-z_\-\d]+)=('|")(.+?)(\2)/).each do |match|
        value = match[0] == match[2] ? true : match[2]
        value = true  if value == 'true'
        value = false if value == 'false'
        hash[match[0].to_sym] = value
      end
    end)
  end

  def merge_data_attrs(hash)
    hash.keys.each do |key|
      if key.to_s.starts_with?('data-')
        hash[:data] ||= {}

        value = hash.delete(key)
        key   = key.to_s.gsub(/^data\-/, '').camelize

        hash[:data][key.to_sym] = value
      end
    end

    hash
  end
end