usaspending_api/etl/management/load_base.py from fedspendingtransparency/usaspending-api

usaspending_api/etl/management/load_base.py
Summary

Maintainability

2 hrs
Test Coverage

76%
Issues
Coverage
"""
Code for loaders in management/commands to inherit from or share.
"""
import dateutil
import logging

from decimal import Decimal
from django.core.management.base import BaseCommand
from django.db import connections
from usaspending_api.common.long_to_terse import LONG_TO_TERSE_LABELS
from usaspending_api.etl.broker_etl_helpers import PhonyCursor


# Lists to store for update_awards and update_procurement_awards
award_update_id_list = []
award_contract_update_id_list = []

logger = logging.getLogger("script")


class Command(BaseCommand):
    def add_arguments(self, parser):

        parser.add_argument(
            "--test",
            action="store_true",
            dest="test",
            default=False,
            help="Runs the submission loader in test mode and uses stored data rather than pulling from a database",
        )

    def handle(self, *args, **options):

        # Grab data broker database connections
        if not options["test"]:
            try:
                db_conn = connections["data_broker"]
                db_cursor = db_conn.cursor()
            except Exception as err:
                logger.critical("Could not connect to database. Is DATA_BROKER_DATABASE_URL set?")
                logger.critical(print(err))
                raise
        else:
            db_cursor = PhonyCursor()

        self.handle_loading(db_cursor=db_cursor, *args, **options)

        # Done!
        logger.info("FINISHED")


def format_date(date_string):
    try:
        return dateutil.parser.parse(date_string).date()
    except (TypeError, ValueError):
        return None


def load_data_into_model(model_instance, data, **kwargs):
    """
    Loads data into a model instance
    Data should be a row, a dict of field -> value pairs
    Keyword args:
        field_map - A map of field columns to data columns. This is so you can map
                    a field in the data to a different field in the model. For instance,
                    model.tas_rendering_label = data['tas'] could be set up like this:
                    field_map = {'tas_rendering_label': 'tas'}
                    The algorithm checks for a value map before a field map, so if the
                    column is present in both value_map and field_map, value map takes
                    precedence over the other
        value_map - Want to force or override a value? Specify the field name for the
                    instance and the data you want to load. Example:
                    {'update_date': timezone.now()}
                    The algorithm checks for a value map before a field map, so if the
                    column is present in both value_map and field_map, value map takes
                    precedence over the other
        save - Defaults to False, but when set to true will save the model at the end
        reverse -   Field names matching this regex should be reversed
                    (multiplied by -1) before saving.
        as_dict - If true, returns the model as a dict instead of saving or altering
    """
    field_map = kwargs.get("field_map")
    value_map = kwargs.get("value_map")
    save = kwargs.get("save", False)
    as_dict = kwargs.get("as_dict", False)
    reverse = kwargs.get("reverse")

    # Grab all the field names from the meta class of the model instance
    fields = [field.name for field in model_instance._meta.get_fields()]
    mod = model_instance

    if as_dict:
        mod = {}

    for field in fields:
        # Let's handle the data source field here for all objects
        if field == "data_source" and field not in value_map:
            store_value(mod, field, "DBR", reverse)

        broker_field = field
        # If our field is the 'long form' field, we need to get what it maps to
        # in the broker so we can map the data properly
        if broker_field in LONG_TO_TERSE_LABELS:
            broker_field = LONG_TO_TERSE_LABELS[broker_field]
        sts = False
        if value_map:
            if broker_field in value_map:
                store_value(mod, field, value_map[broker_field], reverse, data)
                sts = True
            elif field in value_map:
                store_value(mod, field, value_map[field], reverse, data)
                sts = True
        if field_map and not sts:
            if broker_field in field_map:
                try:
                    store_value(mod, field, data[field_map[broker_field]], reverse)
                    sts = True
                except KeyError:
                    print("column {} missing from data".format(field_map[broker_field]))
            elif field in field_map:
                store_value(mod, field, data[field_map[field]], reverse)
                sts = True
        if broker_field in data and not sts:
            store_value(mod, field, data[broker_field], reverse)
        elif field in data and not sts:
            store_value(mod, field, data[field], reverse)

    if save:
        model_instance.save()
    if as_dict:
        return mod
    else:
        return model_instance


def store_value(model_instance_or_dict, field, value, reverse=None, data=None):
    # turn datetimes into dates
    if field.endswith("date") and isinstance(value, str):
        try:
            value = dateutil.parser.parse(value).date()
        except (TypeError, ValueError):
            pass

    # handles the value_map containing a function
    if callable(value) and data:
        value = value(data)

    if reverse and reverse.search(field):
        try:
            value = -1 * Decimal(value)
        except TypeError:
            pass

    if isinstance(model_instance_or_dict, dict):
        model_instance_or_dict[field] = value
    else:
        setattr(model_instance_or_dict, field, value)