mysociety/alaveteli

View on GitHub
app/models/foi_attachment.rb

Summary

Maintainability
B
4 hrs
Test Coverage
# == Schema Information
# Schema version: 20230717201410
#
# Table name: foi_attachments
#
#  id                    :integer          not null, primary key
#  content_type          :text
#  filename              :text
#  charset               :text
#  display_size          :text
#  url_part_number       :integer
#  within_rfc822_subject :text
#  incoming_message_id   :integer
#  hexdigest             :string(32)
#  created_at            :datetime
#  updated_at            :datetime
#  prominence            :string           default("normal")
#  prominence_reason     :text
#  masked_at             :datetime
#

# models/foi_attachment.rb:
# An attachment to an email (IncomingMessage)
#
# Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved.
# Email: hello@mysociety.org; WWW: http://www.mysociety.org/
# This is the type which is used to send data about attachments to the view

require 'digest'

class FoiAttachment < ApplicationRecord
  include Rails.application.routes.url_helpers
  include LinkToHelper
  include MessageProminence

  MissingAttachment = Class.new(StandardError)

  belongs_to :incoming_message, inverse_of: :foi_attachments
  has_one :info_request, through: :incoming_message, source: :info_request
  has_one :raw_email, through: :incoming_message, source: :raw_email

  has_one_attached :file, service: :attachments

  validates_presence_of :content_type
  validates_presence_of :filename
  validates_presence_of :display_size

  before_validation :ensure_filename!, only: [:filename]
  before_destroy :delete_cached_file!

  scope :binary, -> { where.not(content_type: AlaveteliTextMasker::TextMask) }

  delegate :expire, :log_event, to: :info_request
  delegate :metadata, to: :file_blob, allow_nil: true

  admin_columns exclude: %i[url_part_number within_rfc822_subject hexdigest],
                include: %i[metadata]

  BODY_MAX_TRIES = 3
  BODY_MAX_DELAY = 5

  # rubocop:disable Layout/LineLength
  CONTENT_TYPE_NAMES = {
    # Plain Text
    "text/plain" => 'Text file',
    'application/rtf' => 'RTF file',

    # Binary Documents
    'application/pdf' => 'PDF file',

    # Images
    'image/tiff' => 'TIFF image',

    # Word Processing
    'application/vnd.ms-word' => 'Word document',
    'application/vnd.openxmlformats-officedocument.wordprocessingml.document' => 'Word document',

    # Presentation
    'application/vnd.ms-powerpoint' => 'PowerPoint presentation',
    'application/vnd.openxmlformats-officedocument.presentationml.presentation' => 'PowerPoint presentation',

    # Spreadsheet
    'application/vnd.ms-excel' => 'Excel spreadsheet',
    'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' => 'Excel spreadsheet'
  }.freeze
  # rubocop:enable Layout/LineLength

  def delete_cached_file!
    @cached_body = nil
    file.purge if file.attached?
  end

  def body=(d)
    self.hexdigest ||= Digest::MD5.hexdigest(d)

    ensure_filename!
    if file.attached?
      file_blob.upload(StringIO.new(d.to_s), identify: false)
      file_blob.save

    else
      file.attach(
        io: StringIO.new(d.to_s),
        filename: filename,
        content_type: content_type
      )
    end

    @cached_body = d.force_encoding("ASCII-8BIT")
    update_display_size!
  end

  def body
    return @cached_body if @cached_body

    begin
      return file.download if masked?
    rescue ActiveStorage::FileNotFoundError
      # file isn't in storage and has gone missing, rescue to allow the masking
      # job to run and rebuild the stored file or even the whole attachment.
    end

    if persisted?
      FoiAttachmentMaskJob.unlock!(self)
      FoiAttachmentMaskJob.perform_now(self)
      return body unless destroyed?
    end

    load_attachment_from_incoming_message!.body if destroyed?
  end

  # body as UTF-8 text, with scrubbing of invalid chars if needed
  def body_as_text
    convert_string_to_utf8(body, 'UTF-8')
  end

  # for text types, the scrubbed UTF-8 text. For all other types, the
  # raw binary
  def default_body
    text_type? ? body_as_text.string : body
  end

  # return the body as it is in the raw email, unmasked without censor rules
  # applied
  def unmasked_body
    MailHandler.attachment_body_for_hexdigest(
      raw_email.mail,
      hexdigest: hexdigest
    )

  rescue MailHandler::MismatchedAttachmentHexdigest
    begin
      attributes = MailHandler.attempt_to_find_original_attachment_attributes(
        raw_email.mail,
        body: file.download
      ) if file.attached?

    rescue ActiveStorage::FileNotFoundError
      raise MissingAttachment, "attachment missing from storage (ID=#{id})"
    end

    unless attributes
      raise MissingAttachment, "attachment missing in raw email (ID=#{id})"
    end

    update(hexdigest: attributes[:hexdigest])
    attributes[:body]
  end

  def masked?
    file.attached? && masked_at.present? && masked_at < Time.zone.now
  end

  def main_body_part?
    self == incoming_message.get_main_body_text_part
  end

  # Returns HTML, of extra comment to put by attachment
  def extra_note
    return unless content_type == 'message/delivery-status'

    dsn = DeliveryStatusNotification.new(body)
    return unless dsn.status && dsn.message

    "DSN: #{dsn.status} #{dsn.message}"
  end

  # Called by controller so old filenames still work
  def old_display_filename
    filename = self.filename

    # Convert weird spaces (e.g. \n) to normal ones
    filename = filename.gsub(/\s/, " ")
    # Remove slashes, they mess with URLs
    filename.gsub(/\//, "-")
  end

  # TODO: changing this will break existing URLs, so have a care - maybe
  # make another old_display_filename see above
  def display_filename
    filename = self.filename
    unless incoming_message.nil?
      filename = info_request.apply_censor_rules_to_text(filename)
    end
    # Sometimes filenames have e.g. %20 in - no point butchering that
    # (without unescaping it, this would remove the % and leave 20s in there)
    filename = CGI.unescape(filename)

    # Remove weird spaces
    filename = filename.gsub(/\s+/, " ")
    # Remove non-alphabetic characters
    filename = filename.gsub(/[^A-Za-z0-9.]/, " ")
    # Remove spaces near dots
    filename = filename.gsub(/\s*\.\s*/, ".")
    # Compress adjacent spaces down to a single one
    filename = filename.gsub(/\s+/, " ")
    filename.strip
  end

  def ensure_filename!
    if filename.blank?
      calc_ext = AlaveteliFileTypes.mimetype_to_extension(content_type)
      calc_ext = "bin" unless calc_ext
      if !within_rfc822_subject.nil?
        computed = within_rfc822_subject + "." + calc_ext
      else
        computed = "attachment." + calc_ext
      end
      self.filename = computed
    end
  end

  def filename=(filename)
    filename.try(:delete!, "\0")
    calc_ext = AlaveteliFileTypes.mimetype_to_extension(content_type)
    # Put right extension on if missing
    if !filename.nil? && !filename.match(/\.#{calc_ext}$/) && calc_ext
      computed = filename + "." + calc_ext
    else
      computed = filename
    end
    write_attribute('filename', computed)
  end

  # Size to show next to the download link for the attachment
  def update_display_size!
    s = body.size

    if s > 1024 * 1024
      self.display_size = format("%.1f", s.to_f / 1024 / 1024) + 'M'
    else
      self.display_size = (s / 1024).to_s + 'K'
    end
  end

  # Whether this type has a "View as HTML"
  def has_body_as_html?
    AttachmentToHTML.extractable?(self)
  end

  # Name of type of attachment type - only valid for things that
  # has_body_as_html?
  def name_of_content_type
    CONTENT_TYPE_NAMES[content_type]
  end

  # For "View as HTML" of attachment
  def body_as_html(dir, opts = {})
    attachment_url = opts.fetch(:attachment_url, nil)
    to_html_opts = opts.merge(tmpdir: dir, attachment_url: attachment_url)
    AttachmentToHTML.to_html(self, to_html_opts)
  end

  def cached_urls
    [
      request_path(info_request)
    ]
  end

  def load_attachment_from_incoming_message
    IncomingMessage.get_attachment_by_url_part_number_and_filename!(
      incoming_message.get_attachments_for_display,
      url_part_number,
      display_filename
    )
  end

  def update_and_log_event(event: {}, **params)
    return false unless update(params)

    log_event(
      'edit_attachment',
      event.merge(
        attachment_id: id,
        old_prominence: prominence_previously_was,
        prominence: prominence,
        old_prominence_reason: prominence_reason_previously_was,
        prominence_reason: prominence_reason
      )
    )
  end

  private

  def load_attachment_from_incoming_message!
    attachment = load_attachment_from_incoming_message
    return attachment if attachment

    raise MissingAttachment, "attachment couldn't be reloaded using " \
      "url_part_number and display_filename attributes"
  end

  def text_type?
    AlaveteliTextMasker::TextMask.include?(content_type)
  end
end