metanorma/html2doc

View on GitHub
lib/html2doc/lists.rb

Summary

Maintainability
A
45 mins
Test Coverage
require "uuidtools"
require "htmlentities"
require "nokogiri"

class Html2Doc
  def style_list(elem, level, liststyle, listnumber)
    return unless liststyle

    if elem["style"]
      elem["style"] += ";"
    else
      elem["style"] = ""
    end
    elem["style"] += "mso-list:#{liststyle} level#{level} lfo#{listnumber};"
  end

  def list_add1(elem, liststyles, listtype, level)
    if %i[ul ol].include? listtype
      list_add(elem.xpath(".//ul") - elem.xpath(".//ul//ul | .//ol//ul"),
               liststyles, :ul, level + 1)
      list_add(elem.xpath(".//ol") - elem.xpath(".//ul//ol | .//ol//ol"),
               liststyles, :ol, level + 1)
    else
      list_add(elem.xpath(".//ul") - elem.xpath(".//ul//ul | .//ol//ul"),
               liststyles, listtype, level + 1)
      list_add(elem.xpath(".//ol") - elem.xpath(".//ul//ol | .//ol//ol"),
               liststyles, listtype, level + 1)
    end
  end

  def list_add(xpath, liststyles, listtype, level)
    xpath.each do |l|
      level == 1 and l["seen"] = true and @listnumber += 1
      l["id"] ||= UUIDTools::UUID.random_create
      (l.xpath(".//li") - l.xpath(".//ol//li | .//ul//li")).each do |li|
        style_list(li, level, liststyles[listtype], @listnumber)
        list_add1(li, liststyles, listtype, level)
      end
      list_add_tail(l, liststyles, listtype, level)
    end
  end

  def list_add_tail(list, liststyles, listtype, level)
    list.xpath(".//ul[not(ancestor::li/ancestor::*/@id = '#{list['id']}')] | "\
               ".//ol[not(ancestor::li/ancestor::*/@id = '#{list['id']}')]")
      .each do |li|
      list_add1(li.parent, liststyles, listtype, level - 1)
    end
  end

  def list2para(list)
    return if list.xpath("./li").empty?

    list2para_position(list)
    list.xpath("./li").each do |l|
      l.name = "p"
      l["class"] ||= "MsoListParagraphCxSpMiddle"
      next unless l.first_element_child&.name == "p"

      l["style"] ||= ""
      l["style"] += (l.first_element_child["style"]&.sub(/mso-list[^;]+;/, "") || "")
      l.first_element_child.replace(l.first_element_child.children)
    end
    list.replace(list.children)
  end

  def list2para_position(list)
    list.xpath("./li").first["class"] ||= "MsoListParagraphCxSpFirst"
    list.xpath("./li").last["class"] ||= "MsoListParagraphCxSpLast"
    list.xpath("./li/p").each do |p|
      p["class"] ||= "MsoListParagraphCxSpMiddle"
    end
  end

  TOPLIST = "[not(ancestor::ul) and not(ancestor::ol)]".freeze

  def lists1(docxml, liststyles, style)
    case style
    when :ul then list_add(docxml.xpath("//ul[not(@class)]#{TOPLIST}"),
                           liststyles, :ul, 1)
    when :ol then list_add(docxml.xpath("//ol[not(@class)]#{TOPLIST}"),
                           liststyles, :ol, 1)
    else
      list_add(docxml.xpath("//ol[@class = '#{style}']#{TOPLIST} | "\
                            "//ul[@class = '#{style}']#{TOPLIST}"),
               liststyles, style, 1)
    end
  end

  def lists_unstyled(docxml, liststyles)
    liststyles.has_key?(:ul) and
      list_add(docxml.xpath("//ul#{TOPLIST}[not(@seen)]"),
               liststyles, :ul, 1)
    liststyles.has_key?(:ol) and
      list_add(docxml.xpath("//ol#{TOPLIST}[not(@seen)]"),
               liststyles, :ul, 1)
    docxml.xpath("//ul[@seen] | //ol[@seen]").each do |l|
      l.delete("seen")
    end
  end

  def lists(docxml, liststyles)
    return if liststyles.nil?

    @listnumber = 0
    liststyles.each_key { |k| lists1(docxml, liststyles, k) }
    lists_unstyled(docxml, liststyles)
    liststyles.has_key?(:ul) and docxml.xpath("//ul").each { |u| list2para(u) }
    liststyles.has_key?(:ol) and docxml.xpath("//ol").each { |u| list2para(u) }
  end
end