lib/html_sanitizer/scrubber/cleanup.rb
# Copyright (C) 2012-2024 Zammad Foundation, https://zammad-foundation.org/
class HtmlSanitizer
module Scrubber
class Cleanup < Base
def scrub(node)
return if !node.instance_of?(Nokogiri::XML::Text)
return if %w[pre code].include? node.parent&.name
update_node_content(node)
end
private
def update_node_content(node)
content = node.content
return if !content
content = remove_space_if_needed(content)
content = strip_if_needed_previous(node, content)
content = strip_if_needed_next(node, content)
node.content = content
end
def remove_space_if_needed(content)
return content if space_or_nl?(content)
# https://github.com/zammad/zammad/issues/4223
# We are converting multiple line breaks into a more readable format.
# All other whitespace is treated as a single space character.
content.gsub(%r{[[:space:]]+}) do |match|
match.include?("\n\n") ? "\n\n" : ' '
end
end
def strip_if_needed_previous(node, content)
return content if !node.previous
return content if !div_or_p?(node.previous)
content.strip
end
def strip_if_needed_next(node, content)
return content if !node.parent
return content if node.previous
return content if node.next && %w[div p br].exclude?(node.next.name)
return content if !div_or_p?(node.parent)
return content if space_or_nl?(content)
content.strip
end
def space_or_nl?(string)
[' ', "\n"].include?(string)
end
def div_or_p?(node)
%w[div p].include? node.name
end
end
end
end