hackedteam/rcs-common

View on GitHub
lib/rcs-common/gridfs.rb

Summary

Maintainability
B
6 hrs
Test Coverage
require 'digest/md5'
require 'rcs-common/mongoid'

module RCS
  module Common
    module GridFS
      BSON = Moped::BSON if Mongoid::VERSION < '4.0.0'

      class ReadOnlyFile
        attr_reader :attributes, :bucket, :file_position

        def initialize(bucket, attributes)
          @attributes = attributes
          @bucket = bucket
          @last_chunk_num = (@attributes[:length].to_f / @attributes[:chunk_size]).ceil - 1
          rewind
        end

        def method_missing(name)
          raise NoMethodError.new(name.inspect) unless @attributes.has_key?(name)
          @attributes[name]
        end

        def read(bytes_to_read = nil)
          data = ''

          return data if @file_position >= @attributes[:length]
          return data if bytes_to_read and bytes_to_read <= 0

          if @current_chunk[:n]
            chunk_size = @attributes[:chunk_size]
            offset = @file_position % chunk_size
            offset = chunk_size if offset == 0
            data = @current_chunk[:data][offset..-1] || ''
          end

          if bytes_to_read.nil? or bytes_to_read > data.bytesize
            loop do
              break unless read_next_chunk
              data << @current_chunk[:data]

              break if bytes_to_read and bytes_to_read <= data.bytesize
            end
          end

          bytes_to_read = bytes_to_read ? bytes_to_read - 1 : -1
          data = data[0..bytes_to_read]
          @file_position += data.bytesize
          data
        end

        def rewind
          @current_chunk = {n: nil, data: nil}
          @file_position = 0
        end

        def eof?
          @file_position >= @attributes[:length]
        end

        def id
          @attributes[:_id]
        end

        def file_length
          @attributes[:length]
        end

        alias :content :read
        alias :tell :file_position
        alias :position :file_position
        alias :pos :file_position

        private

        def read_next_chunk
          chunk_num = @current_chunk[:n] ? @current_chunk[:n] + 1 : 0
          return nil if chunk_num == @last_chunk_num + 1

          chunk = bucket.chunks_collection.find(files_id: @attributes[:_id], n: chunk_num).first
          # chunk maybe nil in case of corrupted data
          # e.g.: declared length different than the actual length
          return nil unless chunk

          @current_chunk = {n: chunk['n'], data: chunk['data'].data}
        end
      end

      class Bucket
        attr_reader :name, :mongoid_session_name

        DEFAULT_NAME          = 'fs'
        DEFAULT_CONTENT_TYPE  = 'application/octet-stream'
        DEFAULT_CHUNK_SIZE    =  262144
        BINARY_ENCODING       = 'BINARY'

        def initialize(name = DEFAULT_NAME, options = {})
          @name                 = name.to_s.downcase.strip
          @name                 = DEFAULT_NAME if @name.empty?
          @mongoid_session_name = options[:mongoid_session_name] || :default
          @setup_on_write       = options[:lazy].nil? ? true : options[:lazy]
          @osx                  = RbConfig::CONFIG['host_os'] =~ /darwin/

          setup unless @setup_on_write
        end

        def session_options
          # Allow unsafe write on OSX.
          # @see https://github.com/mongoid/mongoid/issues/3582

          @osx ? {safe: false} : {}
        end

        def session
          Mongoid.session(mongoid_session_name).with(session_options)
        end

        def files_collection
          session[:"#{name}.files"]
        end

        def chunks_collection
          session[:"#{name}.chunks"]
        end

        def put(content, attrs = {}, options = {})
          return if content.nil?

          file = {}

          file[:_id]         = BSON::ObjectId.new
          file[:length]      = content.bytesize
          file[:chunkSize]   = DEFAULT_CHUNK_SIZE

          return if file[:length].zero?

          file[:filename]    = attrs[:filename]
          file[:contentType] = attrs[:content_type] || attrs[:contentType] || DEFAULT_CONTENT_TYPE
          file[:aliases]     = attrs[:aliases] || []
          file[:aliases]     = [file[:aliases]].flatten
          file[:metadata]    = attrs[:metadata] || {}
          file[:metadata]    = {} if file[:metadata].blank?
          file[:uploadDate]  = attrs[:upload_date] || attrs[:uploadDate] || Time.now.utc

          file[:md5] = write(file[:_id], content, options)

          files_collection.insert(file)

          file[:_id]
        end

        def md5(file_id)
          doc = session.command(filemd5: file_id, root: name)
          doc['md5'] if doc.respond_to?(:[])
        end

        def append(file_id, data, options = {})
          attributes = if options[:filename]
            files_collection.find(filename: file_id).first
          else
            file_id = objectid(file_id)
            files_collection.find(_id: file_id).first
          end

          if !attributes and options[:create]
            file_attributes = options[:create].respond_to?(:[]) ? options[:create] : {}
            new_file_id = put(data, file_attributes, options)
            return [new_file_id, data.bytesize]
          end

          raise("File not found: #{file_id}") unless attributes

          attributes = attributes.to_h.symbolize_keys

          file_id = objectid(attributes[:_id])

          length, chunk_size = attributes[:length], attributes[:chunkSize]

          chunk_offset = (length / chunk_size).to_i
          offset = length % chunk_size

          if offset > 0
            data = chunks_collection.find(files_id: file_id, n: chunk_offset).first['data'].data + data
          end

          chunkerize(data) do |chunk_data, chunk_num|
            chunks_collection.find(files_id: file_id, n: chunk_num + chunk_offset).upsert('$set' => {data: binary(chunk_data)})
          end

          new_md5 = md5(file_id) if options[:md5] != false
          new_length = length - offset + data.bytesize

          files_collection.find(_id: file_id).update('$set' => {length: new_length, md5: new_md5})

          [file_id, new_length]
        end

        # Equivalent to #get(id).read
        def content(file_id)
          file_id = objectid(file_id)

          chunks_collection.find(files_id: file_id, n: {'$gte' => 0}).inject("") do |data, chunk|
            data << chunk['data'].data
          end
        end

        def get(file_id, options = {})
          file_id = objectid(file_id)
          attributes = files_collection.find(_id: file_id).first

          return unless attributes

          attributes = attributes.to_h.symbolize_keys
          attributes[:bucket] = self
          attributes[:chunk_size] = attributes[:chunkSize]
          attributes[:content_type] = attributes[:contentType]
          attributes[:upload_date] = attributes[:uploadDate]

          ReadOnlyFile.new(self, attributes)
        end

        def delete(file_id)
          file_id = objectid(file_id)

          files_collection.find(_id: file_id).remove
          chunks_collection.find(files_id: file_id).remove_all

          return nil
        end

        def drop
          [files_collection, chunks_collection].map(&:drop)
          @setup_on_write = true
        end

        alias :remove :delete

        private

        def objectid(id)
          id.respond_to?(:generation_time) ? id : BSON::ObjectId.from_string(id.to_s)
        end

        def setup
          chunks_collection.indexes.create({files_id: 1, n: 1}, {unique: true})
          # This is an optional index (not required by the gridfs specs)
          files_collection.indexes.create({filename: 1}, {background: true})
          nil
        end

        def chunkerize(data)
          offset = 0
          chunk_num = 0

          loop do
            chunk_data = data.byteslice(offset..(offset + DEFAULT_CHUNK_SIZE - 1))
            break if chunk_data.nil?
            chunk_data_size = chunk_data.bytesize
            offset += chunk_data_size
            break if chunk_data_size == 0
            yield(chunk_data, chunk_num)
            break if chunk_data_size < DEFAULT_CHUNK_SIZE
            chunk_num += 1
          end
        end

        def write(file_id, data, options = {})
          @setup_on_write = setup if @setup_on_write

          md5 = Digest::MD5.new if options[:md5] != false

          chunkerize(data) do |chunk_data, chunk_num|
            chunks_collection.insert(files_id: file_id, n: chunk_num, data: binary(chunk_data))
            md5.update(chunk_data) if md5
          end

          md5.hexdigest if md5
        end

        if Mongoid::VERSION < '4.0.0'
          def binary(data)
            data.force_encoding(BINARY_ENCODING) if data.respond_to?(:force_encoding)
            BSON::Binary.new(:generic, data)
          end
        else
          def binary(data)
            data.force_encoding(BINARY_ENCODING) if data.respond_to?(:force_encoding)
            BSON::Binary.new(data, :generic)
          end
        end
      end
    end
  end
end