zammad/zammad

View on GitHub
lib/html_sanitizer/scrubber/wipe.rb

Summary

Maintainability
C
7 hrs
Test Coverage
# Copyright (C) 2012-2024 Zammad Foundation, https://zammad-foundation.org/

class HtmlSanitizer
  module Scrubber
    class Wipe < Base
      def initialize # rubocop:disable Lint/MissingSuper
        @direction = :bottom_up
      end

      def scrub(node)
        return STOP if clear_tags_allowlist(node)
        return STOP if remove_unsafe_src(node)

        clear_css_classes(node)
        move_attrs_to_css(node)
        clear_style(node)
        remove_invalid_links(node)
        remove_attributes_not_in_allowlist(node)
      end

      private

      def remove_attributes_not_in_allowlist(node)
        node.each do |attribute, _value| # rubocop:disable Style/HashEachMethods
          attribute_name = attribute.downcase
          next if attributes_allowlist[:all].include?(attribute_name) || attributes_allowlist[node.name]&.include?(attribute_name)

          node.delete(attribute)
        end
      end

      def remove_invalid_links(node)
        %w[href style].each do |attribute_name|
          next if !node[attribute_name]

          href = cleanup_target(node[attribute_name])
          next if !href.match?(%r{(javascript|livescript|vbscript):}i)

          node.delete(attribute_name)
        end
      end

      def clear_style(node)
        return if !node['style']

        style = clear_style_pairs(node)
          .each_with_object('') do |elem, memo|
            memo << "#{elem};" if clear_style_pair_valid?(node, elem)
          end

        node['style'] = style

        node.delete('style') if style.blank?
      end

      def clear_style_pairs(node)
        node['style'].downcase.gsub(%r{\t|\n|\r}, '').split(';')
      end

      def clear_style_pair_valid?(node, pair)
        prop = pair.split(':')
        return if prop.first.blank?

        return if !clear_style_allowed?(node, prop)
        return if clear_style_blocked?(node, pair)

        true
      end

      def clear_style_allowed?(node, prop)
        return if css_properties_allowlist.exclude?(node.name)
        return if css_properties_allowlist[node.name].exclude?(prop.first.strip)

        true
      end

      def clear_style_blocked?(node, pair)
        css_values_blocklist[node.name]&.include?(pair.gsub(%r{[[:space:]]}, '').strip)
      end

      def move_attrs_to_css(node)
        attributes_2_css.each do |key|
          next if !node[key]

          value = node[key]
          node.delete(key)
          next if value.blank?
          next if node_has_css?(node, key)

          node_set_style(node, key, value)
        end
      end

      def node_has_css?(node, key)
        return false if node['style'].blank?
        return false if node['style'].split(';').blank?

        node['style'].split(';').filter_map { |attr| attr.split(':')&.first&.strip }.include?(key)
      end

      def node_init_style(node)
        if node['style'].blank?
          node['style'] = ''
        else
          node['style'] += ';'
        end
      end

      def node_set_style(node, key, value)
        node_init_style(node)

        value += 'px' if !value.match?(%r{%|px|em}i)
        node['style'] += "#{key}:#{value}"
      end

      def clear_css_classes(node)
        return if !node['class']

        classes = node['class'].gsub(%r{\t|\n|\r}, '').split
        class_new = ''
        classes.each do |local_class|
          next if classes_allowlist.exclude?(local_class.to_s.strip)

          if class_new != ''
            class_new += ' '
          end
          class_new += local_class
        end
        if class_new == ''
          node.delete('class')
        else
          node['class'] = class_new
        end
      end

      def remove_unsafe_src(node)
        return if !node['src']

        src = cleanup_target(CGI.unescape(node['src']))

        return if src !~ %r{(javascript|livescript|vbscript):}i && !src.downcase.start_with?('http', 'ftp', '//')

        node.remove
        true
      end

      def clear_tags_allowlist(node)
        return if tags_allowlist.include?(node.name)

        node.before(node.children)
        node.remove
        true
      end

      def tags_allowlist
        @tags_allowlist ||= Rails.configuration.html_sanitizer_tags_allowlist
      end

      def attributes_allowlist
        @attributes_allowlist ||= Rails.configuration.html_sanitizer_attributes_allowlist
      end

      def css_properties_allowlist
        @css_properties_allowlist ||= Rails.configuration.html_sanitizer_css_properties_allowlist
      end

      def css_values_blocklist
        @css_values_blocklist ||= Rails.application.config.html_sanitizer_css_values_blocklist
      end

      # We allowlist yahoo_quoted because Yahoo Mail marks quoted email content using
      # <div class='yahoo_quoted'> and we rely on this class to identify quoted messages
      def classes_allowlist
        %w[js-signatureMarker yahoo_quoted]
      end

      def attributes_2_css
        %w[width height]
      end

      def cleanup_target(string, **options)
        cleaned_string = string.utf8_encode(fallback: :read_as_sanitized_binary)
        cleaned_string = cleaned_string.gsub(%r{[[:space:]]}, '') if !options[:keep_spaces]
        cleaned_string = cleaned_string.strip
                                       .delete("\t\n\r\u0000")
                                       .gsub(%r{/\*.*?\*/}, '')
                                       .gsub(%r{<!--.*?-->}, '')

        sanitize_attachment_disposition(cleaned_string)
      end

      def sanitize_attachment_disposition(url)
        @fqdn ||= Setting.get('fqdn')
        uri = URI(url)

        if uri.host == @fqdn && uri.query.present?
          params = CGI.parse(uri.query || '')
                      .tap { |p| p.merge!('disposition' => 'attachment') if p.include?('disposition') }
          uri.query = URI.encode_www_form(params)
        end

        uri.to_s
      rescue
        url
      end
    end
  end
end