fedspendingtransparency/usaspending-api

View on GitHub
usaspending_api/references/management/commands/load_bureau_title_lookup.py

Summary

Maintainability
A
0 mins
Test Coverage
A
100%
import csv
import logging

from django.core.management.base import BaseCommand
from django.db import transaction
from django.utils.text import slugify

from usaspending_api.common.retrieve_file_from_uri import RetrieveFileFromUri
from usaspending_api.references.models.bureau_title_lookup import BureauTitleLookup

logger = logging.getLogger("script")


class Command(BaseCommand):

    help = (
        "Loads a CSV from OMB and uses it to create a lookup table (bureau_title_lookup) from "
        "federal accounts to bureau titles"
    )

    def add_arguments(self, parser):
        parser.add_argument(
            "-p",
            "--path",
            help="the path to the csv spreadsheet to load",
            default="https://max.omb.gov/maxportal/assets/public/treasury/FMS_GWA_EXPORT_APPN.csv",
        )

    @transaction.atomic
    def handle(self, *args, **options):
        path = options["path"]
        logger.info("Downloading Bureau Title Lookups")
        logger.info(f"Source: {path}")
        bureau_title_lookups = self.download_and_read_csv(path)

        logger.info("Deleting all existing Bureau Title Lookup records in website")
        deletes = BureauTitleLookup.objects.all().delete()
        logger.info(f"Deleted {deletes[0]:,} records")

        logger.info("Loading new Bureau Title Lookup records into database")
        lookup_count = len(BureauTitleLookup.objects.bulk_create(bureau_title_lookups))
        logger.info(f"Loaded: {lookup_count} unique Bureau Title Lookup records")

    def download_and_read_csv(self, path):
        unique_map = {}

        with RetrieveFileFromUri(path).get_file_object(text=True) as f:
            reader = csv.reader(f)
            for row in reader:

                # Removes extra white spaces and pad with zeroes
                aid = row[0].strip().zfill(3)
                main_acct = row[1].strip().zfill(4)
                bureau_title = row[23].strip()
                federal_account_code = f"{aid}-{main_acct}"
                type = row[44].strip().upper()

                # Rows with a type of "DUMMY" or an Agency id of "999" represent
                # test bureaus that should not be ingested.
                if type == "DUMMY" or aid == "999":
                    continue

                bureau_title_lookup = BureauTitleLookup(
                    **{
                        "federal_account_code": federal_account_code,
                        "bureau_title": bureau_title,
                        "bureau_slug": slugify(bureau_title),
                    }
                )

                # Use unique string and map to remove duplicates
                unique_string = f"{federal_account_code}|{bureau_title}"
                if unique_string not in unique_map:
                    unique_map[unique_string] = bureau_title_lookup

        return list(unique_map.values())