alexlangberg/node-miningcompany

View on GitHub
example.js

Summary

Maintainability
A
0 mins
Test Coverage
'use strict';

var Miningcompany = require('./lib/miningcompany.js');

// get headlines from frontpage of cnn
var maps = [
  {
    url: 'http://www.cnn.com',
    targets: 'h3'
  },
  {
    url: 'http://www.sitethatwillobviouslyfail.com',
    targets: 'h1'
  }
];

// trip every 10 seconds
var options = {
  schedule: {
    second: [0, 10, 20, 30, 40, 50]
  }
};

var company = new Miningcompany(maps, options);

company.on('open', function() {
  console.log('Miningcompany open!');
})
.on('shut', function() {
  console.log('Miningcompany closed!');
})
.on('cart', function(cart, s, v) {

  // prepare your custom cart
  var finalCart = {
    uuid: cart.uuid,
    start: cart.started,
    finished: cart.finished,
    results: []
  };

  // use validator to check that cart has a valid UUID
  console.log('UUID: ' + v.isUUID(cart.uuid));

  // go through each result, we ignore errors in the cart
  cart.results.forEach(function(result) {
    var finalResult = {
      url: result.map.url,
      headlines: []
    };

    // bind $ to the cheerio instance of this result and find all hits
    var $ = result.dom;
    var hits = $(result.map.targets);

    // go through each hit. Note that "each" is a cheerio function!
    hits.each(function() {

      // get link of each headline
      var href = company.getClosestHref(result.map.url, $(this));

      // get text using cheerio
      var text = $(this).text();

      // clean text using underscore.string
      text = s(text).replaceAll(' ', ' ')
        .unescapeHTML()
        .stripTags()
        .clean()
        .value();

      finalResult.headlines.push({
        text: text,
        href: href
      });
    });

    finalCart.results.push(finalResult);
  });

  // show cart
  console.log(finalCart);

  // show 3 first headlines of first result of cart
  console.log(finalCart.results[0].headlines[0]);
  console.log(finalCart.results[0].headlines[1]);
  console.log(finalCart.results[0].headlines[2]);
})
.open();

// shut down after 35 seconds
setTimeout(function() {
  company.shut();
}, 35000);