SysMO-DB/seek

View on GitHub
lib/seek/content_type_detection.rb

Summary

Maintainability
A
25 mins
Test Coverage
module Seek
  module ContentTypeDetection

    include Seek::MimeTypes

    MAX_EXTRACTABLE_SPREADSHEET_SIZE=20*1024*1024
    MAX_SIMULATABLE_SIZE=5*1024*1024
    PDF_CONVERTABLE_FORMAT = %w[doc docx ppt pptx odt odp rtf txt]
    IMAGE_VIEWABLE_FORMAT = %w[gif jpeg png jpg bmp svg]

    def is_excel? blob=self
      is_xls?(blob) || is_xlsx?(blob)
    end

    def is_extractable_spreadsheet? blob=self
      within_size_limit(blob) && is_excel?(blob)
    end

    def is_in_simulatable_size_limit? blob=self
      !blob.filesize.nil? && blob.filesize < MAX_SIMULATABLE_SIZE
    end

    def is_xlsx? blob=self
      mime_extensions(blob.content_type).include?("xlsx")
    end

    def is_xls? blob=self
      mime_extensions(blob.content_type).include?("xls")
    end

    def is_jws_dat? blob=self
      check_content blob,"begin name",25000
    end

    def is_sbml? blob=self
      check_content blob,"<sbml"
    end

    def is_xgmml? blob=self
      check_content(blob,"<graph") && check_content(blob,"<node")
    end

    def is_pdf? blob=self
      mime_extensions(blob.content_type).include?('pdf')
    end

    def is_image? blob=self
      blob.content_type.try(:split, '/').try(:first) == 'image'
    end

    def is_pdf_convertable? blob=self
      !(PDF_CONVERTABLE_FORMAT & mime_extensions(blob.content_type)).empty? && Seek::Config.pdf_conversion_enabled
    end

    def is_viewable_format? blob=self
      if Seek::Config.pdf_conversion_enabled
        !(((PDF_CONVERTABLE_FORMAT + IMAGE_VIEWABLE_FORMAT) << 'pdf') & mime_extensions(blob.content_type)).empty?
      else
        !((IMAGE_VIEWABLE_FORMAT << 'pdf') & (mime_extensions(blob.content_type))).empty?
      end
    end

    def is_content_viewable? blob=self
       blob.asset.is_downloadable_asset? && (blob.is_viewable_format? || File.exist?(blob.filepath('pdf')))
    end

    private

    def within_size_limit blob
      !blob.filesize.nil? && blob.filesize < MAX_EXTRACTABLE_SPREADSHEET_SIZE
    end

    def check_content blob, str, max_length=1500
      char_count=0
      filepath=blob.filepath
      begin
        f = File.open(filepath, "r")
        f.each_line do |line|
          char_count += line.length
          return true if line.downcase.include?(str)
          break if char_count>=max_length
        end
      rescue Exception=>e
        Rails.logger.error("Error reading content_blob contents #{e.class.name}:#{e.message}")
      end
      false
    end

  end
end