haraka/haraka-plugin-uribl

View on GitHub
index.js

Summary

Maintainability
F
4 days
Test Coverage
// Look up URLs in SURBL

const url       = require('url');
const dns       = require('dns');
const net       = require('net');
const tlds      = require('haraka-tld');

const net_utils = require('haraka-net-utils');
const utils     = require('haraka-utils');

// Default regexps to extract the URIs from the message
const numeric_ip = /\w{3,16}:\/+(\S+@)?(\d+|0[xX][0-9A-Fa-f]+)\.(\d+|0[xX][0-9A-Fa-f]+)\.(\d+|0[xX][0-9A-Fa-f]+)\.(\d+|0[xX][0-9A-Fa-f]+)/gi;
let schemeless = /(?:%(?:25)?(?:2F|3D|40))?((?:www\.)?[a-zA-Z0-9][a-zA-Z0-9\-.]{0,250}\.(?:aero|arpa|asia|biz|cat|com|coop|edu|gov|info|int|jobs|mil|mobi|museum|name|net|org|pro|tel|travel|xxx|[a-zA-Z]{2}))(?!\w)/gi;
let schemed    = /(\w{3,16}:\/+(?:\S+@)?([a-zA-Z0-9][a-zA-Z0-9\-.]+\.(?:aero|arpa|asia|biz|cat|com|coop|edu|gov|info|int|jobs|mil|mobi|museum|name|net|org|pro|tel|travel|xxx|[a-zA-Z]{2})))(?!\w)/gi;

const excludes = {};

exports.register = function () {

  // Override regexps if top_level_tlds file is present
  if (tlds.top_level_tlds && Object.keys(tlds.top_level_tlds).length) {
    this.logdebug('Building new regexps from TLD file');
    const re_schemeless = `(?:%(?:25)?(?:2F|3D|40))?((?:www\\.)?[a-zA-Z0-9][a-zA-Z0-9\\-.]{0,250}\\.(?:${Object.keys(tlds.top_level_tlds).join('|')}))(?!\\w)`;
    schemeless = new RegExp(re_schemeless, 'gi');
    const re_schemed = `(\\w{3,16}:\\/+(?:\\S+@)?([a-zA-Z0-9][a-zA-Z0-9\\-.]+\\.(?:${Object.keys(tlds.top_level_tlds).join('|')})))(?!\\w)`;
    schemed = new RegExp(re_schemed, 'gi');
  }

  this.load_uribl_ini()
  this.load_uribl_exludes()

  if (this.zones.length === 0) {
    this.logerror('aborting: no zones configured');
  }
  else {
    this.register_hook('lookup_rdns', 'lookup_remote_ip');
    this.register_hook('helo'       , 'lookup_ehlo')
    this.register_hook('ehlo'       , 'lookup_ehlo')
    this.register_hook('mail'       , 'lookup_mailfrom')
    this.register_hook('data'       , 'enable_body_parsing')
    this.register_hook('data_post'  , 'lookup_header_zones')
  }
}

exports.load_uribl_ini = function () {
  const plugin = this
  this.cfg = this.config.get('uribl.ini', () => {
    plugin.load_uribl_ini()
  })

  this.zones = Object.keys(this.cfg).filter(a => a !== 'main')

  // defaults
  if (!this.cfg.main.max_uris_per_list) {
    this.cfg.main.max_uris_per_list = 20;
  }
}

exports.load_uribl_exludes = function () {
  this.config.get('uribl.excludes', 'list').forEach(domain => {
    excludes[domain.toLowerCase()] = 1;
  });
}

function check_excludes_list (host) {
  host = host.split('.').reverse();
  for (let i=0; i < host.length; i++) {
    let check;
    if (i === 0) {
      check = host[i];
    }
    else {
      check = [ host[i], check ].join('.');
    }
    if (excludes[check]) return true;
  }
  return false;
}

// IS: IPv6 compatible (maybe; if the BL supports IPv6 requests)
exports.do_lookups = function (connection, next, hosts, type) {
  const plugin = this;

  // Store the results in the correct place based on the lookup type
  const results = connection?.transaction?.results || connection?.results;
  if (!results) return next();

  if (typeof hosts === 'string') hosts = [ hosts ];

  if (!hosts || !hosts.length) {
    connection.logdebug(plugin, `(${type}) no items found for lookup`);
    results.add(plugin, {skip: type});
    return next();
  }

  connection.logdebug(plugin, `(${type}) found ${hosts.length} items for lookup` );
  utils.shuffle(hosts);

  let j;
  const queries = {};
  for (let host of hosts) {
    host = host.toLowerCase();
    connection.logdebug(plugin, `(${type}) checking: ${host}`);
    // Make sure we have a valid TLD
    if (!net.isIPv4(host) && !net.isIPv6(host) && !tlds.top_level_tlds[(host.split('.').reverse())[0]]) {
      continue;
    }
    // Check the exclusion list
    if (check_excludes_list(host)) {
      results.add(plugin, {skip: `excluded domain:${host}`});
      continue;
    }
    // Loop through the zones
    for (j=0; j < plugin.zones.length; j++) {
      const zone = plugin.zones[j];
      if (zone === 'main') continue;  // skip config
      if (!plugin.cfg[zone] || (plugin.cfg[zone] && !/^(?:1|true|yes|enabled|on)$/i.test(plugin.cfg[zone][type]))) {
        results.add(plugin, {skip: `${type} unsupported for ${zone}` });
        continue;
      }
      // Convert in-addr.arpa into bare IPv4/v6 lookup
      const arpa = host.split(/\./).reverse();
      if (arpa.shift() === 'arpa'){
        const ip_format = arpa.shift();
        if ( ip_format === 'in-addr') {
          if (arpa.length < 4) continue; // Only full IP addresses
          host = arpa.join('.');
        }
        else if ( ip_format === 'ip6') {
          if (arpa.length < 32) continue; // Only full IP addresses
          host = arpa.join('.');
        }
      }
      let lookup;

      // Handle zones that do not allow IP queries (e.g. Spamhaus DBL)
      if (net.isIPv4(host)) {
        if (/^(?:1|true|yes|enabled|on)$/i.test(plugin.cfg[zone].no_ip_lookups)) {
          results.add(plugin, {skip: `IP (${host}) not supported for ${zone}` });
          continue;
        }
        // Skip any private IPs
        if (net_utils.is_private_ip(host)) {
          results.add(plugin, {skip: 'private IP' });
          continue;
        }
        // Reverse IP for lookup
        lookup = host.split(/\./).reverse().join('.');
      }
      else if (net.isIPv6(host)) {
        if (/^(?:1|true|yes|enabled|on)$/i.test(plugin.cfg[zone].not_ipv6_compatible) || /^(?:1|true|yes|enabled|on)$/i.test(plugin.cfg[zone].no_ip_lookups)) {
          results.add(plugin, {skip: `IP (${host}) not supported for ${zone}` });
          continue;
        }
        // Skip any private IPs
        if (net_utils.is_private_ip(host)) {
          results.add(plugin, {skip: 'private IP' });
          continue;
        }
        // Reverse IP for lookup
        lookup = net_utils.ipv6_reverse(host);
      }
      // Handle zones that require host to be stripped to a domain boundary
      else if (/^(?:1|true|yes|enabled|on)$/i.test(plugin.cfg[zone].strip_to_domain)) {
        lookup = (tlds.split_hostname(host, 3))[1];
      }
      // Anything else..
      else {
        lookup = host;
      }

      if (!lookup) continue;
      if (plugin.cfg[zone].dqs_key) {
        lookup = `${lookup}.${plugin.cfg[zone].dqs_key}`;
      }
      if (!queries[zone]) queries[zone] = {};
      if (Object.keys(queries[zone]).length > plugin.cfg.main.max_uris_per_list) {
        connection.logwarn(plugin, `discarding lookup ${lookup} for zone ${zone} maximum query limit reached`);
        results.add(plugin, {skip: `max query limit for ${zone}` });
        continue;
      }
      queries[zone][lookup] = 1;
    }
  }

  // Flatten object into array for easier querying
  const queries_to_run = [];
  for (j=0; j < Object.keys(queries).length; j++) {
    for (const query of Object.keys(queries[Object.keys(queries)[j]])) {
      // host/domain, zone
      queries_to_run.push( [ query, Object.keys(queries)[j] ] );
    }
  }

  if (!queries_to_run.length) {
    results.add(plugin, {skip: `${type} (no queries)` });
    return next();
  }

  utils.shuffle(queries_to_run); // Randomize the order

  // Perform the lookups
  let pending_queries = 0;

  let called_next = false;
  function nextOnce (code, msg) {
    if (called_next) return;
    called_next = true;
    next(code, msg);
  }

  function conclude_if_no_pending () {
    if (pending_queries !== 0) return;
    results.add(plugin, {pass: type});
    nextOnce();
  }

  queries_to_run.forEach(query => {
    let lookup = query.join('.');
    // Add root dot if necessary
    if (lookup[lookup.length-1] !== '.') {
      lookup = `${lookup}.`;
    }

    pending_queries++;
    dns.resolve4(lookup, (err, addrs) => {

      pending_queries--;
      connection.logdebug(plugin, `${lookup} => (${(err) ? err : addrs.join(', ')})`);

      if (err) return conclude_if_no_pending();

      let skip = false;
      function do_reject (msg) {
        if (skip) return;
        if (called_next) return;
        if (!msg) msg = `${query[0]} blacklisted in ${query[1]}`;

        // Check for custom message
        if (plugin.cfg[query[1]] && plugin.cfg[query[1]].custom_msg) {
          msg = plugin.cfg[query[1]].custom_msg
            .replace(/\{uri\}/g,  query[0])
            .replace(/\{zone\}/g, query[1]);
        }
        results.add(plugin, {fail: [type, query[0], query[1]].join('/') });
        nextOnce(DENY, msg);
      }

      // Optionally validate first result against a regexp
      if (plugin.cfg[query[1]] && plugin.cfg[query[1]].validate) {
        const re = new RegExp(plugin.cfg[query[1]].validate);
        if (!re.test(addrs[0])) {
          connection.logwarn(plugin, `ignoring result (${addrs[0]}) for: ${lookup} as it did not match validation rule`);
          skip = true;
        }
      }

      // Check for optional bitmask
      if (plugin.cfg[query[1]] && plugin.cfg[query[1]].bitmask) {
        // A bitmask zone should only return a single result
        // We only support a bitmask of up to 128 in a single octet
        const last_octet = Number((addrs[0].split('.'))[3]);
        const bitmask = Number(plugin.cfg[query[1]].bitmask);
        if ((last_octet & bitmask) > 0) {
          connection.loginfo(plugin, `found ${query[0]} in zone ${query[1]} (${addrs.join(',')}; bitmask=${bitmask})`);
          do_reject();
        }
        else {
          connection.logdebug(plugin, `ignoring result (${addrs[0]}) for: ${lookup} as the bitmask did not match`);
          skip = true;
        }
      }
      else {
        connection.loginfo(plugin, `found ${query[0]} in zone ${query[1]} (${addrs.join(',')})`);
        do_reject();
      }

      conclude_if_no_pending();
    });
  });

  conclude_if_no_pending();
}

function getTimedNext (plugin, connection, next, type) {

  let timer
  let calledNext = false

  function timedNextOnce (code, msg) {
    clearTimeout(timer);
    if (calledNext) return;
    calledNext = true;
    next(code, msg);
  }

  timer = setTimeout(() => {
    connection.logdebug(plugin, 'timeout');
    connection.results.add(plugin, {err: `${type} timeout` });
    timedNextOnce();
  }, ((plugin.cfg.main?.timeout || 30) - 2) * 1000);

  return timedNextOnce
}

exports.lookup_remote_ip = function (next, connection) {
  const plugin = this;

  const timedNext = getTimedNext(plugin, connection, next, 'rdns')

  dns.reverse(connection.remote.ip, (err, rdns) => {
    if (err) {
      switch (err.code) {
        case dns.NXDOMAIN:
        case dns.NOTFOUND:
          break;
        default:
          connection.results.add(plugin, {err });
      }
      return timedNext();
    }
    // console.log(`lookup_remote_ip, ${connection.remote.ip} resolves to ${rdns}`)
    plugin.do_lookups(connection, timedNext, rdns, 'rdns');
  })
}

exports.lookup_ehlo = function (next, connection, helo) {
  const timedNext = getTimedNext(this, connection, next, 'helo')

  // Handle IP literals
  let literal;
  if ((literal = net_utils.get_ipany_re('^\\[(?:IPv6:)?', '\\]$','').exec(helo))) {
    this.do_lookups(connection, timedNext, literal[1], 'helo');
  }
  else {
    this.do_lookups(connection, timedNext, helo, 'helo');
  }
}

exports.lookup_mailfrom = function (next, connection, params) {
  const timedNext = getTimedNext(this, connection, next, 'envfrom')
  this.do_lookups(connection, timedNext, params[0].host, 'envfrom');
}

exports.enable_body_parsing = (next, connection) => {
  if (connection?.transaction) {
    connection.transaction.parse_body = true;
  }
  next();
}

exports.lookup_header_zones = function (next, connection) {

  const email_re = /<?[^@]+@([^> ]+)>?/;
  const plugin = this;
  const trans = connection.transaction;
  const timedNext = getTimedNext(this, connection, next, 'ms, typeg')

  // From header
  function do_from_header (cb) {
    const from = trans.header.get_decoded('from');
    const fmatch = email_re.exec(from);
    if (fmatch) {
      return plugin.do_lookups(connection, cb, fmatch[1], 'from');
    }
    cb();
  }

  // Reply-To header
  function do_replyto_header (cb) {
    const replyto = trans.header.get('reply-to');
    const rmatch = email_re.exec(replyto);
    if (rmatch) {
      return plugin.do_lookups(connection, cb, rmatch[1], 'replyto');
    }
    cb();
  }

  // Message-Id header
  function do_msgid_header (cb) {
    const msgid = trans.header.get('message-id');
    const mmatch = /@([^>]+)>/.exec(msgid);
    if (mmatch) {
      return plugin.do_lookups(connection, cb, mmatch[1], 'msgid');
    }
    cb();
  }

  // Body
  function do_body (cb) {
    const urls = {};
    extract_urls(urls, trans.body, connection, plugin);
    plugin.do_lookups(connection, cb, Object.keys(urls), 'body');
  }

  const chain = [ do_from_header, do_replyto_header, do_msgid_header, do_body ];
  function chain_caller (code, msg) {
    if (code) return timedNext(code, msg);

    if (!chain.length) return timedNext();

    const next_in_chain = chain.shift();
    next_in_chain(chain_caller);
  }
  chain_caller();
}

function extract_urls (urls, body, connection, self) {
  // extract from body.bodytext
  let match;
  if (!body || !body.bodytext) { return; }

  let uri;
  // extract numeric URIs
  while ((match = numeric_ip.exec(body.bodytext))) {
    try {
      uri = url.parse(match[0]);
      // Don't reverse the IPs here; we do it in the lookup
      urls[uri.hostname] = uri;
    }
    catch (error) {
      connection.logerror(self, `parse error: ${match[0]} ${error.message}`);
    }
  }

  // match plain hostname.tld
  while ((match = schemeless.exec(body.bodytext))) {
    try {
      uri = url.parse(`http://${match[1]}`);
      urls[uri.hostname] = uri;
    }
    catch (error) {
      connection.logerror(self, `parse error: ${match[1]} ${error.message}`);
    }
  }

  // match scheme:// URI
  while ((match = schemed.exec(body.bodytext))) {
    try {
      uri = url.parse(match[1]);
      urls[uri.hostname] = uri;
    }
    catch (error) {
      connection.logerror(self, `parse error: ${match[1]} ${error.message}`);
    }
  }

  // TODO: URIHASH
  // TODO: MAILHASH

  for (let i=0,l=body.children.length; i < l; i++) {
    extract_urls(urls, body.children[i], connection, self);
  }
}