davidcelis/recommendable

View on GitHub
lib/recommendable/helpers/calculations.rb

Summary

Maintainability
A
3 hrs
Test Coverage
module Recommendable
  module Helpers
    module Calculations
      class << self
        # Calculate a numeric similarity value that can fall between -1.0 and 1.0.
        # A value of 1.0 indicates that both users have rated the same items in
        # the same ways. A value of -1.0 indicates that both users have rated the
        # same items in opposite ways.
        #
        # @param [Fixnum, String] user_id the ID of the first user
        # @param [Fixnum, String] other_user_id the ID of another user
        # @return [Float] the numeric similarity between this user and the passed user
        # @note Similarity values are asymmetrical. `Calculations.similarity_between(user_id, other_user_id)` will not necessarily equal `Calculations.similarity_between(other_user_id, user_id)`
        def similarity_between(user_id, other_user_id)
          user_id = user_id.to_s
          other_user_id = other_user_id.to_s

          similarity = liked_count = disliked_count = 0
          in_common = Recommendable.config.ratable_classes.each do |klass|
            liked_set = Recommendable::Helpers::RedisKeyMapper.liked_set_for(klass, user_id)
            other_liked_set = Recommendable::Helpers::RedisKeyMapper.liked_set_for(klass, other_user_id)
            disliked_set = Recommendable::Helpers::RedisKeyMapper.disliked_set_for(klass, user_id)
            other_disliked_set = Recommendable::Helpers::RedisKeyMapper.disliked_set_for(klass, other_user_id)

            results = Recommendable.redis.pipelined do
              # Agreements
              Recommendable.redis.sinter(liked_set, other_liked_set)
              Recommendable.redis.sinter(disliked_set, other_disliked_set)

              # Disagreements
              Recommendable.redis.sinter(liked_set, other_disliked_set)
              Recommendable.redis.sinter(disliked_set, other_liked_set)

              Recommendable.redis.scard(liked_set)
              Recommendable.redis.scard(disliked_set)
            end

            # Agreements
            similarity += results[0].size
            similarity += results[1].size

            # Disagreements
            similarity -= results[2].size
            similarity -= results[3].size

            liked_count += results[4]
            disliked_count += results[5]
          end

          similarity / (liked_count + disliked_count).to_f
        end

        # Used internally to update the similarity values between this user and all
        # other users. This is called by the background worker.
        def update_similarities_for(user_id)
          user_id = user_id.to_s # For comparison. Redis returns all set members as strings.
          similarity_set = Recommendable::Helpers::RedisKeyMapper.similarity_set_for(user_id)

          # Only calculate similarities for users who have rated the items that
          # this user has rated
          relevant_user_ids = Recommendable.config.ratable_classes.inject([]) do |memo, klass|
            liked_set = Recommendable::Helpers::RedisKeyMapper.liked_set_for(klass, user_id)
            disliked_set = Recommendable::Helpers::RedisKeyMapper.disliked_set_for(klass, user_id)

            item_ids = Recommendable.redis.sunion(liked_set, disliked_set)

            unless item_ids.empty?
              sets = item_ids.map do |id|
                liked_by_set = Recommendable::Helpers::RedisKeyMapper.liked_by_set_for(klass, id)
                disliked_by_set = Recommendable::Helpers::RedisKeyMapper.disliked_by_set_for(klass, id)

                [liked_by_set, disliked_by_set]
              end

              memo | Recommendable.redis.sunion(*sets.flatten)
            else
              memo
            end
          end

          similarity_values = relevant_user_ids.map { |id| similarity_between(user_id, id) }
          Recommendable.redis.pipelined do
            relevant_user_ids.zip(similarity_values).each do |id, similarity_value|
              next if id == user_id # Skip comparing with self.
              Recommendable.redis.zadd(similarity_set, similarity_value, id)
            end
          end

          if knn = Recommendable.config.nearest_neighbors
            length = Recommendable.redis.zcard(similarity_set)
            kfn = Recommendable.config.furthest_neighbors || 0

            Recommendable.redis.zremrangebyrank(similarity_set, kfn, length - knn - 1)
          end

          true
        end

        # Used internally to update this user's prediction values across all
        # recommendable types. This is called by the background worker.
        #
        # @private
        def update_recommendations_for(user_id)
          user_id = user_id.to_s

          nearest_neighbors = Recommendable.config.nearest_neighbors || Recommendable.config.user_class.count
          Recommendable.config.ratable_classes.each do |klass|
            rated_sets = [
              Recommendable::Helpers::RedisKeyMapper.liked_set_for(klass, user_id),
              Recommendable::Helpers::RedisKeyMapper.disliked_set_for(klass, user_id),
              Recommendable::Helpers::RedisKeyMapper.hidden_set_for(klass, user_id),
              Recommendable::Helpers::RedisKeyMapper.bookmarked_set_for(klass, user_id)
            ]
            temp_set = Recommendable::Helpers::RedisKeyMapper.temp_set_for(Recommendable.config.user_class, user_id)
            similarity_set  = Recommendable::Helpers::RedisKeyMapper.similarity_set_for(user_id)
            recommended_set = Recommendable::Helpers::RedisKeyMapper.recommended_set_for(klass, user_id)
            most_similar_user_ids, least_similar_user_ids = Recommendable.redis.pipelined do
              Recommendable.redis.zrevrange(similarity_set, 0, nearest_neighbors - 1)
              Recommendable.redis.zrange(similarity_set, 0, nearest_neighbors - 1)
            end

            # Get likes from the most similar users
            sets_to_union = most_similar_user_ids.inject([]) do |sets, id|
              sets << Recommendable::Helpers::RedisKeyMapper.liked_set_for(klass, id)
            end

            # Get dislikes from the least similar users
            sets_to_union = least_similar_user_ids.inject(sets_to_union) do |sets, id|
              sets << Recommendable::Helpers::RedisKeyMapper.disliked_set_for(klass, id)
            end

            return if sets_to_union.empty?

            # SDIFF rated items so they aren't recommended
            Recommendable.redis.sunionstore(temp_set, *sets_to_union)
            item_ids = Recommendable.redis.sdiff(temp_set, *rated_sets)
            scores = item_ids.map { |id| [predict_for(user_id, klass, id), id] }
            Recommendable.redis.pipelined do
              scores.each do |s|
                Recommendable.redis.zadd(recommended_set, s[0], s[1])
              end
            end

            Recommendable.redis.del(temp_set)

            if number_recommendations = Recommendable.config.recommendations_to_store
              length = Recommendable.redis.zcard(recommended_set)
              Recommendable.redis.zremrangebyrank(recommended_set, 0, length - number_recommendations - 1)
            end
          end

          true
        end

        # Predict how likely it is that a user will like an item. This probability
        # is not based on percentage. 0.0 indicates that the user will neither like
        # nor dislike the item. Values that approach Infinity indicate a rising
        # likelihood of liking the item while values approaching -Infinity
        # indicate a rising probability of disliking the item.
        #
        # @param [Fixnum, String] user_id the user's ID
        # @param [Class] klass the item's class
        # @param [Fixnum, String] item_id the item's ID
        # @return [Float] the probability that the user will like the item
        def predict_for(user_id, klass, item_id)
          user_id = user_id.to_s
          item_id = item_id.to_s

          liked_by_set = Recommendable::Helpers::RedisKeyMapper.liked_by_set_for(klass, item_id)
          disliked_by_set = Recommendable::Helpers::RedisKeyMapper.disliked_by_set_for(klass, item_id)
          similarity_sum = 0.0

          similarity_sum += similarity_total_for(user_id, liked_by_set)
          similarity_sum -= similarity_total_for(user_id, disliked_by_set)

          liked_by_count, disliked_by_count = Recommendable.redis.pipelined do
            Recommendable.redis.scard(liked_by_set)
            Recommendable.redis.scard(disliked_by_set)
          end
          prediction = similarity_sum / (liked_by_count + disliked_by_count).to_f
          prediction.finite? ? prediction : 0.0
        end

        def similarity_total_for(user_id, set)
          similarity_set = Recommendable::Helpers::RedisKeyMapper.similarity_set_for(user_id)
          ids = Recommendable.redis.smembers(set)
          similarity_values = Recommendable.redis.pipelined do
            ids.each do |id|
              Recommendable.redis.zscore(similarity_set, id)
            end
          end
          similarity_values.map(&:to_f).reduce(&:+).to_f
        end

        def update_score_for(klass, id)
          score_set = Recommendable::Helpers::RedisKeyMapper.score_set_for(klass)
          liked_by_set = Recommendable::Helpers::RedisKeyMapper.liked_by_set_for(klass, id)
          disliked_by_set = Recommendable::Helpers::RedisKeyMapper.disliked_by_set_for(klass, id)
          liked_by_count, disliked_by_count = Recommendable.redis.pipelined do
            Recommendable.redis.scard(liked_by_set)
            Recommendable.redis.scard(disliked_by_set)
          end

          return 0.0 unless liked_by_count + disliked_by_count > 0

          z = 1.96
          n = liked_by_count + disliked_by_count
          phat = liked_by_count / n.to_f

          begin
            score = (phat + z*z/(2*n) - z * Math.sqrt((phat*(1-phat)+z*z/(4*n))/n))/(1+z*z/n)
          rescue Math::DomainError
            score = 0
          end

          Recommendable.redis.zadd(score_set, score, id)
          true
        end
      end
    end
  end
end