18F/domain-scan

View on GitHub
lambda/headless/lambda_handler.js

Summary

Maintainability
A
2 hrs
Test Coverage
'use strict';

/**
* Node-based Lambda handler for headless browser scan functions.
*/

exports.handler = (event, context, callback) => {
  var start_time = new Date().getTime() / 1000;

  // Tell Lambda to shut it down after the callback executes,
  // even if the container still has stuff (e.g. Chrome) running.
  context.callbackWaitsForEmptyEventLoop = false;

  var domain = event.domain;
  var options = event.options || {};
  var name = event.scanner;
  var environment = event.environment || {};

  // Log all events for debugging purposes.
  console.log(event);

  // TODO: error handling around these.
  var base = require("./scanners/headless/base");
  var scanner = require("./scanners/" + name);

  base.scan(
    domain, environment, options,
    getBrowser, scanner,
    function(err, data) {
      // We capture start and end times locally as well, but it's
      // useful to know the start/end from Lambda's vantage point.
      var end_time = new Date().getTime() / 1000;
      var duration = end_time - start_time;

      var response = {
        lambda: {
          log_group_name: context.logGroupName,
          log_stream_name: context.logStreamName,
          request_id: context.awsRequestId,
          memory_limit: context.memoryLimitInMB,
          start_time: start_time,
          end_time: end_time,
          measured_duration: duration
        }
      }

      if (err) {
        if (err instanceof Error)
          response.error = err.stack;
        else
          response.error = err;
      }

      if (data === undefined) {
        response.data = null;
        if (!err)
          response.error = "Data came back undefined for some reason.";
      }
      else
        response.data = data;

      // TODO: JSON datetime sanitization, like the Python handler does.
      callback(null, response);
    }
  );
};


const puppeteer = require('puppeteer');
var path = require('path');
var fs = require('fs');
var tar = require('tar');

const setupLocalChrome = () => {
  return new Promise((resolve, reject) => {
    fs.createReadStream(localChromePath)
    .on('error', (err) => reject(err))
    .pipe(tar.x({
      C: setupChromePath,
    }))
    .on('error', (err) => reject(err))
    .on('end', () => resolve());
  });
};

const localChromePath = path.join('headless_shell.tar.gz');
const setupChromePath = path.join(path.sep, 'tmp');
const executablePath = path.join(
    setupChromePath,
    'headless_shell'
);


const chromeOptions = [
  // Resolves error:
  //    error when launch(); No usable sandbox! Update your kernel
  '--no-sandbox',

  // Resolves error:
  //    error when launch(); Failed to load libosmesa.so
  '--disable-gpu',

  // Resolves error: freeze when newPage()
  '--single-process'
];


// Async function to load Chrome from the Lambda container.
var getBrowser = (() => {
  let browser;
  return async () => {
    // idempotence
    if (typeof browser === 'undefined' || !await isBrowserAvailable(browser)) {

      // extract .tar.gz to tmp/
      await setupLocalChrome();

      // load the browser from its extracted path
      browser = await puppeteer.launch({
        headless: true,
        executablePath: executablePath,
        args: chromeOptions,
        ignoreHTTPSErrors: true,

        // puppeteer 1.1.0 broke dumpio. Should be fixed soon.
        // dumpio: true
      });

      var version = await browser.version();
      console.log("Launched: " + version);
    }
    return browser;
  };
})();

const isBrowserAvailable = async (browser) => {
  try {
    await browser.version();
  } catch (e) {
    console.log(e); // not opened etc.
    return false;
  }
  return true;
};


/** local test run **/

if (process.env.TEST_LOCAL) {
  var domain = process.argv[2];

  var event = {
    domain: domain,
    scanner: "third_parties",
    options: {},
    environment: {url: "https://" + domain + "/"}
  }
  var context = {}
  var callback = function(err, data) {
    console.log("Done:\n");

    if (err) {
      console.log("Error:")
      if (err instanceof Error)
        console.log(err.stack);
      else
        console.log(error);
    }

    if (data) {
      console.log("Data:")
      console.log(data);
    }
  }

  exports.handler(event, context, callback);
}