lib/autoheathen/email_processor.rb

Summary

Maintainability
B
4 hrs
Test Coverage
require 'mail'
require 'yaml'
require 'logger'
require 'haml'
require 'filemagic/ext'
require_relative 'config'
require 'heathen'

module AutoHeathen
  class EmailProcessor
    include AutoHeathen::Config

    # The only valid email headers we will allow forward to LEG_wikilex
    ONWARD_HEADERS = ['Date','From','To','Subject','Content-Type','Content-Transfer-Encoding','Mime-Version']

    attr_reader :cfg, :logger

    # Constructs the processor
    # @param cfg a hash of configuration settings:
    #    deliver:          true                           If false, email will not be actually sent (useful for testing)
    #    email:            nil                            Email to send response to (if mode == :email)
    #    from:             'autoheathen'                  Who to say the email is from
    #    cc_blacklist:     nil                            Array of email addresses to excise from CC list of any mails
    #                                                     - used to avoid infinite loops in autoheathen
    #    mail_host:        'localhost'                    Mail relay host for responses (mode in [:return_to_sender,:email]
    #    mail_port:        25                             Mail relay port (ditto)
    #    text_template:    'config/response.text.haml'    Template for text part of response email (mode in [:return_to_sender,:email])
    #    html_template:    'config/response.html.haml'    Template for HTML part of response email (ditto)
    #    logger:           nil                            Optional logger object
    def initialize cfg={}, config_file=nil
      @cfg = load_config( {   # defaults
          deliver:          true,
          language:         'en',
          from:             'autoheathen',
          cc_blacklist:     nil,
          email:            nil,
          verbose:          false,
          mail_host:        'localhost',
          mail_port:        25,
          logger:           nil,
          text_template:    'config/autoheathen.text.haml',
          html_template:    'config/autoheathen.html.haml',
        }, config_file, cfg )
      @logger = @cfg[:logger] || Logger.new(nil)
      @logger.level = @cfg[:verbose] ? Logger::DEBUG : Logger::INFO
    end

    def process_rts email
      process email, email.from, true
    end

    # Processes the given email, submits attachments to the Heathen server, delivers responses as configured
    # @param email [String] The encoded email (suitable to be decoded using Mail.read(input))
    # @return [Hash] a hash of the decoded attachments (or the reason why they could not be decoded)
    def process email, mail_to, is_rts=false
      documents = []

      unless email.has_attachments?
        logger.info "From: #{email.from} Subject: (#{email.subject}) Files: no attachments"
        return
      end

      logger.info "From: #{email.from} Subject: (#{email.subject}) Files: #{email.attachments.map(&:filename).join(',')}"

      #
      # Convert the attachments
      #
      email.attachments.each do |attachment|
        begin
          converter = Heathen::Converter.new( logger: logger )
          language = @cfg[:language]
          input_source = attachment.body.decoded
          action = get_action input_source.content_type
          logger.info "    convert #{attachment.filename} using action: #{action}"
          data = converter.convert action, input_source, language
          converted_filename = Heathen::Filename.suggest attachment.filename, data.mime_type
          documents << { orig_filename: attachment.filename, orig_content: input_source, filename: converted_filename, content: data, error: false }
        rescue StandardError => e
          documents << { orig_filename: attachment.filename, orig_content: input_source, filename: nil, content: nil, error: e.message }
        end
      end

      #
      # deliver the results
      #
      if is_rts
        deliver_rts email, documents, mail_to
      else
        deliver_onward email, documents, mail_to
      end

      #
      # Summarise the processing
      #
      logger.info "Results of conversion"
      documents.each do |doc|
        if doc[:content].nil?
          logger.info "  #{doc[:orig_filename]} was not converted (#{doc[:error]}) "
        else
          logger.info "  #{doc[:orig_filename]} was converted successfully"
        end
      end

      documents
    end

    # Forward the email to sender, with decoded documents replacing the originals
    def deliver_onward email, documents, mail_to
      logger.info "Sending response mail to #{mail_to}"
      email.cc [] # No CCing, just send to the recipient
      email.to mail_to
      email.subject "#{'Fwd: ' unless email.subject.to_s.start_with? 'Fwd:'}#{email.subject}"
      email.return_path email.from unless email.return_path
      # something weird goes on with Sharepoint, where the doc is dropped on the floor
      # so, remove any offending headers
      email.message_id = nil # make sure of message_id too
      good_headers = ONWARD_HEADERS.map{ |h| h.downcase }
      inspect_headers = email.header.map(&:name)
      inspect_headers .each do |name|
        unless good_headers.include? name.downcase
          email.header[name] = nil
        end
      end
      email.received = nil # make sure of received
      # replace attachments with converted files
      email.parts.delete_if { |p| p.attachment? }
      documents.each do |doc|
        if doc[:content]
          email.add_file filename: doc[:filename], content: doc[:content]
        else # preserve non-converted attachments when forwarding
          email.add_file filename: doc[:orig_filename], content: doc[:orig_content]
        end
      end
      email.delivery_method :smtp, address: @cfg[:mail_host], port: @cfg[:mail_port]
      deliver email
    end

    # Send decoded documents back to sender
    def deliver_rts email, documents, mail_to
      logger.info "Sending response mail to #{mail_to}"
      mail = Mail.new
      mail.from @cfg[:from]
      mail.to mail_to
      # CCs to the original email will get a copy of the converted files as well
      mail.cc (email.cc - email.to - (@cfg[:cc_blacklist]||[]) ) if email.cc # Prevent autoheathen infinite loop!
      # Don't prepend yet another Re:
      mail.subject "#{'Re: ' unless email.subject.start_with? 'Re:'}#{email.subject}"
      # Construct received path
      # TODO: is this in the right order?
      #rcv = "by localhost(autoheathen); #{Time.now.strftime '%a, %d %b %Y %T %z'}"
      #[email.received,rcv].flatten.each { |rec| mail.received rec.to_s }
      mail.return_path email.return_path if email.return_path
      mail.header['X-Received'] = email.header['X-Received'] if email.header['X-Received']
      documents.each do |doc|
        next if doc[:content].nil?
        mail.add_file filename: doc[:filename], content: doc[:content]
      end
      cfg = @cfg # stoopid Mail scoping
      me = self # stoopid Mail scoping
      mail.text_part do
        s = Haml::Template.new { me.read_file cfg[:text_template] }.render(Object.new, to: mail_to, documents: documents, cfg: cfg)
        body s
      end
      mail.html_part do
        content_type 'text/html; charset=UTF-8'
        s = Haml::Template.new { me.read_file cfg[:html_template] }.render(Object.new, to: mail_to, documents: documents, cfg: cfg)
        body s
      end
      mail.delivery_method :smtp, address: @cfg[:mail_host], port: @cfg[:mail_port]
      deliver mail
    end

    # Convenience method allowing us to stub out actual mail delivery in RSpec
    def deliver mail
      if @cfg[:deliver]
        mail.deliver!
        logger.debug "Files were emailed to #{mail.to}"
      else
        logger.debug "Files would have been emailed to #{mail.to}, but #{self.class.name} is configured not to"
      end
    end

    # Opens and reads a file, first given the filename, then tries from the project base directory
    def read_file filename
      f = filename
      unless File.exist? f
        f = Pathname.new(__FILE__).realpath.parent.parent.parent + f
      end
      File.read f
    end

    # Returns the correct conversion action based on the content type
    # @raise RuntimeError if there is no conversion action for the content type
    def get_action content_type
      ct = content_type.gsub(/;.*/, '')
      op = {
        'application/pdf' => 'ocr',
        'text/html' => 'pdf',
        'application/zip' => 'pdf',
        'application/msword' => 'pdf',
        'application/vnd.oasis.opendocument.text' => 'pdf',
        'application/vnd.openxmlformats-officedocument.wordprocessingml.document' => 'pdf',
        'application/vnd.ms-excel' => 'pdf',
        'application/vnd.ms-office' => 'pdf',
        'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' => 'pdf',
        'application/vnd.ms-powerpoint' => 'pdf',
        'application/vnd.openxmlformats-officedocument.presentationml.presentation' => 'pdf',
      }[ct]
      op = 'ocr' if ! op && ct.start_with?('image/')
      raise "Conversion from #{ct} is not supported" unless op
      op
    end

  end
end