hackedteam/rcs-db

View on GitHub
lib/rcs-intelligence/infer.rb

Summary

Maintainability
A
2 hrs
Test Coverage
require 'rcs-common/trace'
require 'date'

module RCS
  module Intelligence
    class Position

      # Infer the home and the office of a given target.
      class Infer
        include Tracer
        attr_reader :target, :from, :to

        MIN_DAYS = 3
        HALF_HOUR = 0.5
        HOME_TIMESLOT = [(22..23.5), (0..6)]
        WORKDAY_TIMESLOT = [(8..19)]

        def initialize(target, week_or_datetime)
          @target = target
          @from, @to = week_bounds(week_or_datetime)
        end

        # Find the first and the last day of the week
        # the given datetime belong to
        def week_bounds(week_or_datetime)
          if week_or_datetime.respond_to?(:to_date)
            date = week_or_datetime.to_date
            year = date.cwyear
            week = date.cweek
          else
            year = Time.now.utc.year
            week = week_or_datetime.to_i
          end

          monday = DateTime.commercial(year, week, 1).to_time.utc
          sunday = monday + 3600 * 24 * 6

          [monday, sunday].map { |d| d.strftime('%Y%m%d') }
        end

        def each_aggregate
          Aggregate.target(target).positions.between(day: from..to).each do |aggregate|
            next unless aggregate.info.respond_to?(:each)
            yield(aggregate)
          end
        end

        # Floor the datetime to the prev half hour. Example:
        # 2013-07-29 22:49:28 UTC => 2013-07-29 22:30:00 UTC
        # 2013-07-29 22:10:08 UTC => 2013-07-29 22:00:00 UTC
        def normalize_datetime(datetime)
          minutes_to_shift = datetime.min >= 30 ? datetime.min - 30 : datetime.min
          datetime = datetime - (minutes_to_shift * 60) - datetime.sec
        end

        # Returns an hash. Keys are cwdays (eg. Mon is 1, Thu is 2, etc.)
        # Values are time intervals, eg. [20, 20.5, 21]. 20 represents the interval from 8pm to 8:30pm,
        # 20.5 the one from 8:30pm to 9pm.
        def normalize_interval(interval, timezone = 0)
          return unless interval.respond_to?(:[])

          start, stop = interval['start'], interval['end']
          shift = timezone.to_i * 3600

          return unless start && stop
          return if start > stop

          start = normalize_datetime(start + shift)
          stop = normalize_datetime(stop + shift)

          results = {}

          while start <= stop do
            results[start.to_date.cwday] ||= []
            results[start.to_date.cwday] << (start.min == 0 ? start.hour : start.hour + HALF_HOUR)
            start += 30 * 60
          end

          results
        end

        # Returns an hash like:
        # {
        #   1 => {
        #       14 => [{:latitude=>45.47354920000001, :longitude=>9.232277999999999, :radius=>30}],
        #       14.5 => [{:latitude=>45.47354920000001, :longitude=>9.232277999999999, :radius=>30}]
        #   }
        # }
        def week_distribution
          results = {}

          each_aggregate do |aggregate|
            aggregate.info.each do |interval|
              interval = normalize_interval(interval, aggregate.data['timezone'])

              next unless interval

              interval.each do |day, spans|
                results[day] ||= {}
                spans.each { |span|
                  results[day][span] ||= []
                  results[day][span] << aggregate.position
                }
              end
            end
          end

          results
        end

        # Returns a list of couple POSITION, FREQUENCY. Example:
        # [
        #   [{:latitude=>45.47354920000001, :longitude=>9.232277999999999, :radius=>30}, 4],
        #   [{:latitude=>45.4806173, :longitude=>9.221273499999999, :radius=>30}, 42]
        # ]
        def group_and_count_positions_within(ranges)
          dist = week_distribution
          return [] if dist.keys.size < MIN_DAYS

          grouped = {}

          dist.each do |day, intervals|
            ranges.each do |range|
              intervals.each do |half_hour, positions|
                next if !range.include?(half_hour)
                positions.each { |p| grouped[p] ||= 0; grouped[p] += 1 }
              end
            end
          end

          grouped.sort_by { |key, value| value }
        end

        # Returns the most visited place in the given ranges.
        # Example: {:latitude=>45.4806173, :longitude=>9.221273499999999, :radius=>30}
        def most_visited_place(ranges)
          max_half_hour_per_week = ranges.inject(0) { |num, range| num += range.step(HALF_HOUR).size; num } * 7
          minimum_frequency = (35.0 / 100.0) * max_half_hour_per_week # 35%

          grouped = group_and_count_positions_within(ranges).last
          if grouped
            msg = "Infer: the most visited place of #{target.name} within #{from} and #{to} (#{ranges.inspect}) is #{grouped[0]}, freq: #{grouped[1]}"
            msg << " [not enough]" unless grouped[1] > minimum_frequency
            trace :debug, msg
          end
          grouped[0] if grouped && grouped[1] > minimum_frequency
        end

        def home
          most_visited_place(HOME_TIMESLOT)
        end

        def office
          most_visited_place(WORKDAY_TIMESLOT)
        end
      end
    end
  end
end