nelsonic/arana

View on GitHub
lib/recorder.js

Summary

Maintainability
F
3 days
Test Coverage
var es    = require('esta');
var aguid = require('aguid');
var url   = require('url');
var url_transform = require('./url_transform');

function determine_record_type(data) {
  var type = 'person'; // person until proved otherwise
  if(data.pcount) {
    type = 'org'
  }
  else if(data.stars) { // only repos have stars
    type = 'repo'
  }
  else if(data.state) { // only repos have stars
    type = 'issue'
  }
  return type;
}


// if the url is /followers we add that list of entries to the
// person's followers object on their profile
function add_people(data, method, callback) {
  data = url_transform(data, method);
  var record = {
    id:    aguid(data.url),
    index: 'github',
    type:  determine_record_type(data)
  }
  // fetch the profile/org record if it exists:
  es.read(record, function(res){
    if(res.found) {
      if(res._source[method]) {
        var existing = Object.keys(res._source[method]).sort();
        data.entries.sort().map(function(f){
          if(existing.indexOf(f) === -1) { // f not currently in list of followers
            res._source[method][f] = Date.now(); // add as new follower
          }
          else {
            // console.log('Existing user: '+f + " > " + existing.indexOf(f))
          }
        });
        es.update(res._source, function(res2) {
          return callback(res2);
        });
      } else {
        res._source[method] = res._source[method] || {}; // empty if not exist
        console.log("NO *EXISTING* Followers")
        // res._source[method] = {}; // create the empty followers object
        data.entries.map(function(f) { // add the follower
          res._source[method][f] = Date.now(); // add as new follower
        });
        es.update(res._source, function(res2) {
          return callback(res2);
        });
      }

    }
    else { // no profile record found lets create one with just the followers
      console.log('NOT FOUND');
      data[method] = data[method] || {};
      data.entries.map(function(f) { // add the follower
        data[method][f] = Date.now(); // add as new follower
      })
      data.id    = record.id;
      data.index = record.index;
      data.type  = record.type;
      delete data.entries;

      es.update(data, function(res2) {
        return callback(res2);
      });
    }
  })
}

function add_repos(data, callback){
  data = url_transform(data);
  var record = {
    id:    aguid(data.url),
    url:   data.url,
    index: 'github',
    type:  determine_record_type(data)
  }
  // fetch the profile/org record if it exists:
  es.read(record, function(res){
    if(res.found) {
      // console.log("Record: "+data.url+" FOUND!")
      res._source.repos = res._source.repos || []; // empty if not exist
      // console.log(" - - - - - - - - - - - - - - - repos:")
      // console.log(res._source);
      var existing_repos = res._source.repos.map(function(repo){
        return repo.name;
      })
      data.entries.map(function(repo) { // add the follower
        if(existing_repos.indexOf(repo.name) === -1){
          res._source.repos.push(repo);
        }
      });
      es.update(res._source, function(res2) {
        return callback(res2);
      });
    }
    else {
      // console.log("add_repos Record: "+data.url+" NOT FOUND!")
      record.repos = [];
      // copy all elements of data to record
      var keys = Object.keys(data);
      keys.map(function(k){
        record[k] = data[k];
      })
      delete record.entries; // entries become "repos" below
      data.entries.map(function(repo) { // add the follower
        record.repos.push(repo);
      });
      es.update(record, function(res2) {
        return callback(res2);
      });
    }
  });
}

/**
 * add list of issues to the repo record
 */
function add_issues(data, callback){
  // console.log('ISSUES URL: '+ data.url)
  data = url_transform(data); // url of the repo
  console.log('add_issues URL transformed to: ' + data.url + " > "+aguid(data.url));
  var record = {
    id:    aguid(data.url),
    url:   data.url,
    index: 'github',
    type:  'repo'
  }
  // fetch the profile/org record if it exists:
  es.read(record, function(res){
    if(res.found) {
      // console.log("Record: "+data.url+" FOUND!")
      res._source.issues = res._source.issues || []; // empty if not exist
      // console.log(" - - - - - - - - - - - - - - - issues:")
      // console.log(res._source);
      var existing_issues = res._source.issues.map(function(issue){
        return issue.url;
      })
      data.entries.map(function(issue) { // add the follower
        if(existing_issues.indexOf(issue.url) === -1){
          res._source.issues.push(issue);
        }
      });
      delete data.entries; // we've already saved these
      var issues_keys = Object.keys(data);
      issues_keys.map(function(k) { // transfer all meta-data to repo record
        res._source[k] = data[k];   // adds number of open & closed issues
      })                            // which is useful info to have ;-)
      es.update(res._source, function(res2) {
        return callback(res2);
      });
    }
    else {
      console.log("add_issues Record: "+data.url+" NOT FOUND!")
      // console.log(data)
      // console.log(" - - - - - - - - - - - - - - - - - - - - - - ")
      record.issues = [];
      data.entries.map(function(issue) { // add the follower
        record.issues.push(issue);
      });
      delete record.entries;        // entries become "issues" below
      var keys = Object.keys(data);
      keys.map(function(k){
        record[k] = data[k];        // copy all elements of data to record
      })
      es.update(record, function(res2) {
        return callback(res2);
      });
    }
  });
}

/**
 * add list of Labels to the repo record
 */
function add_labels(data, callback){
  // console.log('ISSUES URL: '+ data.url)
  data = url_transform(data); // url of the repo
  // console.log('URL transformed to:' + data.url);
  var record = {
    id:    aguid(data.url),
    url:   data.url,
    index: 'github',
    type:  'repo'
  }
  // fetch the profile/org record if it exists:
  es.read(record, function(res){
    if(res.found) {
      // console.log("Record: "+data.url+" FOUND!", res)
      record = res._source;         // copy all fields of existing record
    }
    else {
      // console.log("Record: "+data.url+" NOT FOUND!")
    }
    record.labels = [];  // always overwrite
    data.entries.forEach(function(e){ record.labels.push(e) })
    es.update(record, function(res2) {
      return callback(res2);
    });
  });
}


/**
 * add list of Milestones to the repo record
 */
function add_milestones(data, callback){
  // console.log('ISSUES URL: '+ data.url)
  data = url_transform(data); // url of the repo
  // console.log('URL transformed to:' + data.url);
  var record = {
    id:    aguid(data.url),
    url:   data.url,
    index: 'github',
    type:  'repo'
  }
  // fetch the profile/org record if it exists:
  es.read(record, function(res){
    if(res.found) {
      // console.log("Record: "+data.url+" FOUND!", res)
      record = res._source;         // copy all fields of existing record
    }
    else {
      // console.log("Record: "+data.url+" NOT FOUND!", res)
    }
    record.milestones = [];  // always overwrite
    data.entries.forEach(function(e){ record.milestones.push(e) })
    es.update(record, function(res2) {
      return callback(res2);
    });
  });
}

/**
 * recorder is a general purpose record saver
 */
module.exports = function save_record_to_elasticsearch (data, callback) {
  // console.log(" - - - - - - - - - - - - - - - URL:")
  // console.log(data.url);
  // if the page scraped was a followers page

  if(data.url.match(/^https:\/\/github\.com\/[\w-]+\/followers/g)) {
    return add_people(data, 'followers', callback)
  } // see: https://github.com/nelsonic/arana/issues/4
  else if(data.url.match(/^https:\/\/github\.com\/[\w-]+\/following/g)) {
    return add_people(data, 'following', callback)
  }
  else if(data.url.match(/tab=repositories/)){
    return add_repos(data, callback);
  } //
  else if(data.pcount) {
    return add_repos(data, callback);
  }
  else if(data.url.match('/issues') && data.open) { // matches issues listing
    return add_issues(data, callback);
  }
  else if(data.url.match('/labels')) { // matches issues listing
    return add_labels(data, callback);
  }
  else if(data.url.match('/milestones')) {
    return add_milestones(data, callback);
  }
  else {
    // console.log(data);
    data = url_transform(data); // strip domain to save memory
    data.id    = aguid(data.url)
    data.index = 'github'
    data.type  = determine_record_type(data);
    // console.log(" - - - - >> Saving Data:")
    // console.log(data);

    es.update(data, function(res) {
      return callback(res);
    })
  }

}


module.exports.record_type    = determine_record_type; // exported for func test
module.exports.add_people     = add_people;            // exported for func test
module.exports.add_repos      = add_repos;             // exported for func test
// module.exports.url_transform  = url_transform          // exported for func test
module.exports.add_issues     = add_issues;            // exported for func test
module.exports.add_labels     = add_labels;            // exported for func test
module.exports.add_milestones = add_milestones;        // exported for func test

// if follower is not already a key in followers Object
// we add it with a value of today's timestamp

// unfollowed