Asymmetrik/akin

View on GitHub
lib/similarity.service.js

Summary

Maintainability
A
1 hr
Test Coverage
'use strict';

const _ = require('lodash'),
    Promise = require('bluebird'),

    ModelService = require('./model.service'),

    UserSimilarity = ModelService.model.UserSimilarity;

/**
 * How many users will be processed at a time
 */
let concurrency = 2;

/**
 * Guarantees the same unique value is generated for two user IDs
 * no matter which order they are used in
 */
const getUserSimilarityKey = (user1, user2) => {
    return _.gt(user1 + '', user2 + '') ?
            user1 + '-' + user2 :
            user2 + '-' + user1;
};

/**
 *
 */
const getUserSimilarityNumber = (userWeightRow1, user1Items, userWeightRow2, user2Items) => {

    var user1 = userWeightRow1.user,
        user2 = userWeightRow2.user;

    if( _.isEqual(user1, user2) ) {
        // users have similarity of 1 compared to themselves. skip unnecessary processing
        return 1;
    }

    var matrixSum = 0;

    _.forEach(_.keys(user1Items), (potentialItemIdMatch) => {
        if( _.has(user2Items, potentialItemIdMatch) ) {

            // Multiply the values where the users have weights for the same item
            var u1weight = user1Items[potentialItemIdMatch][0].weight,
                u2weight = user2Items[potentialItemIdMatch][0].weight;

            // log.debug('User %s and %s have weights of %s and %s for item %s', user1, user2, u1weight, u2weight, potentialItemIdMatch);
            matrixSum += (u1weight * u2weight);

        }
        else {
            // log.debug('User %s does not have a score for item %s, so skipping', user2, potentialItemIdMatch);
        }
    });

    var similarity = matrixSum / ( userWeightRow1.rowWeight * userWeightRow2.rowWeight );

    return similarity || 0;
};

/**
 *
 */
const calculateUserSimilarity = (allUserSimilarities, user1) => {

    return new Promise((resolve, reject) => {

        var currentUserItemWeights = null;
        // log.debug('Getting item weights for %s', user1);

        // 1. Find User's Item Weights
        ModelService.getItemWeightsForUser(user1)
            // 2. Get all item IDs for this user
            .then( (userItemWeight) => {
                userItemWeight = _.isEmpty(userItemWeight) ? {} :  userItemWeight;
                // log.debug('Calculating similarity for %s based on item weights: %j', user1, userItemWeight);
                currentUserItemWeights = userItemWeight;
                return _.map(userItemWeight.itemWeights, 'item');
            })
            // 3. Get a cursor for all other user item weights for that user's items
            .then( ModelService.getCursorForUserItemWeightsForItems )
            // 4. Iterate through the user item weights to calculate similarity
            .then( (otherUserItemWeightsCursor) => {

                var userSimilarities = [];
                // log.debug('Got item weights cursor for %s', user1);

                var user1Items = _.groupBy(currentUserItemWeights.itemWeights, 'item');

                otherUserItemWeightsCursor.on('data', (otherUserItemWeight) => {
                    var user2 = otherUserItemWeight.user;
                    // log.debug('Found user %s with %s item weights', user2, _.size(otherUserItemWeight.itemWeights));
                    var userMatchKey = getUserSimilarityKey(user1, user2);
                    if( allUserSimilarities[userMatchKey] ) {
                        // log.debug('Skipping calculation of user similarity because it already exists for %s', userMatchKey);
                        return;
                    }

                    // indicate to other promises that this is being processed to reduce
                    allUserSimilarities[userMatchKey] = true;

                    var user2Items = _.groupBy(otherUserItemWeight.itemWeights, 'item');

                    var similarity = getUserSimilarityNumber(currentUserItemWeights, user1Items, otherUserItemWeight, user2Items);

                    // log.debug('Found cosine similarity between %s and %s to be %s', user1, user2, similarity);

                    userSimilarities.push({
                        users: [user1, user2],
                        similarity: similarity
                    });
                });

                otherUserItemWeightsCursor.on('end', () => {
                    // log.info('Found %s similar users to %s', userSimilarities.length, user1);
                    // Resolve all of this user's similarities to save at once
                    resolve(userSimilarities);
                });

            })
            .catch( (err) => {
                reject(err);
            });


    });

};

/**
 *
 */
const calculateAndSaveUserSimilarity = (allUserSimilarities, userId) => {

    return calculateUserSimilarity(allUserSimilarities, userId)
        .then((usersSimilarities) => {
            if( _.isEmpty(usersSimilarities) ) {
                return Promise.resolve();
            }

            return UserSimilarity.insertMany(usersSimilarities)
                    .then(() => {
                        return Promise.resolve();
                    });
        });
};

/**
 *
 */
const calculateUserSimilarities = (allUserIds) => {
    const allUserSimilarities = {};
    return Promise.map(allUserIds, calculateAndSaveUserSimilarity.bind(this, allUserSimilarities), { concurrency: concurrency });
};

/**
 *
 */
const recalculateUserSimilarities = () => {

    return ModelService.dropUserSimilarities()
        .then(ModelService.getAllUserIdsWithActivity)
        .then( calculateUserSimilarities );

};

/**
 * Updates the concurrency level used for calculating user-to-user similarities. Default: 2
 * @param {number} newConcurrency - the number of concurrent users whose similarity will be calculated against other users
 */
const setConcurrency = (newConcurrency) => {
    concurrency = newConcurrency;
};

module.exports = {
    recalculateUserSimilarities,
    setConcurrency
};