ezpaarse-project/ezpaarse

View on GitHub
lib/logparser.js

Summary

Maintainability
D
2 days
Test Coverage
/* eslint global-require: 0 */
'use strict';

var URL     = require('url');
var moment  = require('moment');
var formats = require('../resources/auto-formats.js');
var extractors = {
  'ezproxy': require('./proxyformats/ezproxy.js'),
  'squid':   require('./proxyformats/squid.js'),
  'apache':  require('./proxyformats/apache.js')
};

/**
 * Create a log parser
 * @param  {Object} options -> [proxy] the proxy name (ex: ezproxy)
 *                             [format] the proxy format (ex: %h %u "%r")
 *                             [dateFormat] the date format (default: 'DD/MMM/YYYY:HH:mm:ss Z')
 *                             [logger] a winston-like object for logging
 * @return {Object} the parser
 */
module.exports = function logParser(options) {
  options            = options || {};
  options.dateFormat = options.dateFormat || 'DD/MMM/YYYY:HH:mm:ss Z';

  var parser = {};
  var extractor;
  var autoDetect = false;

  if (options.proxy) {
    if (typeof extractors[options.proxy] === 'function') {
      extractor = extractors[options.proxy](options.format, options.laxist);
    } else {
      extractor = null;
    }
  }

  /**
   * Try standard formats on the given line
   * Parser options are overriden
   * @param  {String} line the log line to parse
   * @return {Object}      the resulting EC or null
   */
  parser.guess = function (line) {
    autoDetect = true;

    for (var proxy in formats) {
      var proxyFormats = formats[proxy];

      for (var i = 0, l = proxyFormats.length; i < l; i++) {
        var autoFormat = proxyFormats[i];

        if (!autoFormat.format) { continue; }
        if (typeof extractors[proxy] !== 'function') { continue; }

        options.proxy      = proxy;
        options.format     = autoFormat.format;
        options.dateFormat = autoFormat.dateFormat || 'DD/MMM/YYYY:HH:mm:ss Z';

        extractor = extractors[proxy](autoFormat.format);
        if (!extractor) { continue; }

        var ec = parser.extract(line);

        if (ec) {
          if (options.logger) {
            options.logger?.info('Format found: ' + options.format + ' (' + proxy + ')');
          }
          return ec;
        }
      }
    }

    extractor = undefined;
    return null;
  };

  /**
   * Use the extractor on the given line to create an EC
   * @param  {String}  line the log line to parse
   * @param  {Boolean} opt  ignoreUrl: set to true to prevent parsing of the url
   *                        ignoreDate: set to true to prevent parsing of the date
   * @return {Object} the resulting EC or null
   */
  parser.extract = function (line, opt) {
    if (!extractor || !line) { return null; }

    var match = extractor.regexp.exec(line);
    if (!match) { return null; }

    var ec = {};

    for (var i = 0, l = extractor.properties.length; i < l; i++) {
      let value = match[i + 1];
      if (/^".*"$/.test(value)) {
        value = value.substr(1, value.length - 2);
      }
      if (value && value !== '-') {
        ec[extractor.properties[i]] = value;
      }
    }

    if (!opt || !opt.ignoreUrl) {
      this.parseUrl(ec);
    }
    if (!opt || !opt.ignoreDate) {
      this.parseDate(ec);
    }

    return ec;
  };

  /**
   * Parse a given log line
   * Try to guess the format if it's not provided
   * @param  {String} line the log line to parse
   * @return {Object}      the resulting EC or null
   */
  parser.parse = function (line) {
    if (!line) { return null; }

    if (extractor === undefined) {
      if (options.logger) { options.logger?.info('Trying to guess line format'); }
      parser.guess(line);
    }

    if (extractor) { return parser.extract(line); }

    return null;
  };

  parser.parseUrl = function (ec) {
    if (ec.url) {
      ec.url = ec.url.replace(/^\/http\//, 'http://');

      if (ec.url.charAt(0) !== '/') {
        // bibliopam urls must be normalized
        if (!/^https?:\/\//.test(ec.url)) { ec.url = 'http://' + ec.url; }
        try {
          ec.domain = URL.parse(ec.url).hostname;
        } catch (err) {
          ec.domain = '';
        }
      }
    }
  };

  parser.parseDate = function (ec, opt) {
    const ignoreFormat = opt && opt.ignoreFormat;

    if (ec.timestamp || ec.epoch) {
      const date   = moment.unix(ec.timestamp || ec.epoch);
      ec.timestamp = ec.timestamp || ec.epoch;
      ec.datetime  = date.format();
      ec.date      = date.format('YYYY-MM-DD');
    } else if (ec.datetime) {
      const date   = moment(ec.datetime, ignoreFormat ? null : options.dateFormat);
      ec.datetime  = date.format();
      ec.timestamp = date.unix();
      ec.date      = date.format('YYYY-MM-DD');
    }
  };

  parser.getFields = function () {
    if (!extractor || !extractor.properties) {
      return false;
    }

    return extractor.properties.slice();
  };

  parser.getRegexp = function (literal) {
    if (!extractor || !extractor.regexp) { return null; }
    return literal ? extractor.regexp.toString() : extractor.regexp;
  };

  parser.getFormat = function () {
    return options.format || null;
  };

  parser.getDateFormat = function () {
    return options.dateFormat || null;
  };

  parser.getProxy = function () {
    return options.proxy || null;
  };

  parser.autoDetect = function () {
    return autoDetect;
  };

  return parser;
};