data/env.py

Summary

Maintainability
A
1 hr
Test Coverage
import os
import sys
import yaml

DATA_DIR = os.path.dirname(__file__)

# App-level metadata.
META = yaml.safe_load(open(os.path.join(DATA_DIR, "../meta.yml")))
DOMAINS = os.environ.get("DOMAINS", META["data"]["domains_url"])

# domain-scan paths (MUST be set in env)
SCAN_COMMAND = os.environ.get("DOMAIN_SCAN_PATH", None)
GATHER_COMMAND = os.environ.get("DOMAIN_GATHER_PATH", None)


# post-processing and uploading information
PARENTS_DATA = os.path.join(DATA_DIR, "./output/parents")
PARENTS_RESULTS = os.path.join(DATA_DIR, "./output/parents/results")
SUBDOMAIN_DATA = os.path.join(DATA_DIR, "./output/subdomains")
SUBDOMAIN_DATA_GATHERED = os.path.join(DATA_DIR, "./output/subdomains/gather")
SUBDOMAIN_DATA_SCANNED = os.path.join(DATA_DIR, "./output/subdomains/scan")

DB_DATA = os.path.join(DATA_DIR, "./db.json")
BUCKET_NAME = META['bucket']
AWS_REGION = META['aws_region']

# DAP source data
ANALYTICS_URL = META["data"]["analytics_url"]

# a11y source data
A11Y_CONFIG = META["a11y"]["config"]
A11Y_REDIRECTS = META["a11y"]["redirects"]

### Parent domain scanning information
#
scanner_string = os.environ.get("SCANNERS", "pshtt,sslyze,analytics")
SCANNERS = scanner_string.split(",")

### subdomain gathering/scanning information
GATHER_SUFFIXES = os.environ.get("GATHER_SUFFIXES", ".gov,.fed.us")

# names and options must be in corresponding order
GATHERER_NAMES = [
  "censys-snapshot", "rdns-snapshot",
  "dap", "eot2016", "other", "dotgov"
]
GATHERER_OPTIONS = [
  "--censys-snapshot=%s" % META["data"]["censys_snapshot_url"],
  "--rdns-snapshot=%s" % META["data"]["rdns_snapshot_url"],
  "--dap=%s" % META["data"]["analytics_subdomains_url"],
  "--eot2016=%s" % META["data"]["eot_subdomains_url"],
  "--other=%s" % META['data']['other_subdomains_url'],
  "--dotgov=%s" % DOMAINS
]

# Run these scanners over *all* (which is a lot) discovered subdomains.
SUBDOMAIN_SCANNERS = ["pshtt", "sslyze"]

# Used if --lambda is enabled during the scan.
LAMBDA_WORKERS = 900

# Quick and dirty CLI options parser.
def options():
  options = {}
  for arg in sys.argv[1:]:
    if arg.startswith("--"):

      if "=" in arg:
        key, value = arg.split('=')
      else:
        key, value = arg, "true"

      key = key.split("--")[1]
      key = key.lower()
      value = value.lower()

      if value == 'true': value = True
      elif value == 'false': value = False
      options[key] = value

  return options