fedspendingtransparency/usaspending-api

View on GitHub
usaspending_api/submissions/management/commands/load_dabs_submission_window_schedule.py

Summary

Maintainability
A
0 mins
Test Coverage
C
79%
import logging
import csv
from datetime import datetime, timezone

from django.core.management.base import BaseCommand
from django.db import connections, transaction

from usaspending_api.etl.broker_etl_helpers import dictfetchall
from usaspending_api.submissions.models import DABSSubmissionWindowSchedule
from usaspending_api.common.retrieve_file_from_uri import RetrieveFileFromUri

logger = logging.getLogger("script")

"""
NOTE: "Reveal Date" (dabs_submission_window_schedule.submission_reveal_data) is stored as the 0th
second of the day (in UTC time) on which queries surfacing data submitted for this submission
window should allow the data to be displayed. i.e. if the current date (now() in UTC) is greater
than or equal to this date and time, show the data related to this submission. If not, don't show it.

The reveal date is originally set to a future date '9999-12-31'. When a schedules respective
submission_due_date (submission_due_date for monthly and certification_deadline for quarterly) has
been passed its reveal date is set to the current datetime by the 'reveal_dabs_submission_window_schedule'
command. Any reveal dates on schedules with a 'due date' in the past are not updated.

It is logically intended to be the "next day" after the DABS submission deadline communicated to
Agency Submitters. That deadline is exactly midnight Pacific Time on the schedule communicated to
Agency Submitters as part of the DAIMS documentation.

For simplification, Broker data in the submission_window_schedule table only stores the UTC date
part (with zeroed-time) of the above deadline.

Therefore we take the "reveal date" here to be the same date and time of the given deadline in the
broker table, since that achieves the logical intention of revealing data "the next day" after the
deadline communicated to Agency Submitters.
"""


# SQL to create Month Period Schedules using broker table
# Use all periods after Period 9, Year 2020 from table
# Submission Due Date comes from 'publish_deadline' column
MONTH_SCHEDULE_SQL = """
select
    year * 1000 + period * 10 + 0 as id,
    make_date(year, period, 1) - interval '3 months' as period_start_date,
    make_date(year, period, 1) - interval '2 months' - interval '1 day' as period_end_date,
    period_start as submission_start_date,
    certification_deadline as certification_due_date,
    publish_deadline as submission_due_date,
    year as submission_fiscal_year,
    (period + 2) / 3 as submission_fiscal_quarter,
    period as submission_fiscal_month,
    false as is_quarter
from
    submission_window_schedule
where
    period_start >= '2020-05-15'
    and period != 1
"""

# SQL to create Quarter Period Schedules using broker table
# Only use periods 3, 6, 9, and 12 from table
# Submission Due Date comes from 'certification_deadline' column
QUARTER_SCHEDULE_SQL = """
select
    year * 1000 + period * 10 + 1 as id,
    make_date(year, period, 1) - interval '5 months' as period_start_date,
    make_date(year, period, 1) - interval '2 months' - interval '1 day' as period_end_date,
    period_start as submission_start_date,
    certification_deadline as certification_due_date,
    certification_deadline as submission_due_date,
    year as submission_fiscal_year,
    (period + 2) / 3 as submission_fiscal_quarter,
    period as submission_fiscal_month,
    true as is_quarter
from
    submission_window_schedule
where
    period % 3 = 0
"""


FUTURE_DATE = datetime.max.replace(tzinfo=timezone.utc)


class Command(BaseCommand):
    """
    This command will clear and repopulate the dabs_submission_window_schedule table.
    If a csv file is provided with the --file argument, rows will be created based on
    it. If no file is provided, rows will be generated based off of the broker. In
    production, the broker method should be used.
    """

    help = "Update DABS Submission Window Schedule table based on a file or the broker"

    def add_arguments(self, parser):
        parser.add_argument(
            "--file", help="The file containing schdules. If not provided, schedules are generated based on broker."
        )

    @transaction.atomic()
    def handle(self, *args, **options):

        file_path = options["file"]

        if file_path:
            logger.info("Input file provided. Reading schedule from file.")
            incoming_schedule_objs = self.read_schedules_from_csv(file_path)
        else:
            logger.info("No input file provided. Generating schedule from broker.")
            incoming_schedule_objs = self.generate_schedules_from_broker()

        logger.info("Loading existing DABS Submission Window Schedules")
        existing_schedules = list(DABSSubmissionWindowSchedule.objects.all())
        existing_schedule_lookup = {}
        for schedule in existing_schedules:
            existing_schedule_lookup[schedule.id] = schedule

        for incoming_schedule in incoming_schedule_objs:
            # If an incoming schedule has a matching existing schedule, use the existing
            # schedule's submission_reveal_date
            existing_schedule = existing_schedule_lookup.get(int(incoming_schedule.id))
            if existing_schedule:
                incoming_schedule.submission_reveal_date = existing_schedule.submission_reveal_date
            elif incoming_schedule.submission_reveal_date is None:
                # Setting a default of FUTURE_DATE if no 'submission_reveal_date' is found
                incoming_schedule.submission_reveal_date = FUTURE_DATE

            incoming_schedule.parse_dates_fields(timezone.utc)

            # Hide future submission windows by setting the reveal date to a distant future
            # date. The command 'reveal_dabs_submission_window_schedules` is used to set the
            # reveal date when it is ready.
            if incoming_schedule.submission_due_date > datetime.utcnow().replace(tzinfo=timezone.utc):
                incoming_schedule.submission_reveal_date = FUTURE_DATE

        logger.info("Deleting existing DABS Submission Window Schedule")
        DABSSubmissionWindowSchedule.objects.all().delete()

        logger.info("Inserting DABS Submission Window Schedule into website")
        DABSSubmissionWindowSchedule.objects.bulk_create(incoming_schedule_objs)

        logger.info("DABS Submission Window Schedule loader finished successfully!")

    def generate_schedules_from_broker(self):

        logger.info("Creating broker cursor")
        broker_cursor = connections["data_broker"].cursor()

        logger.info("Running MONTH_SCHEDULE_SQL")
        broker_cursor.execute(MONTH_SCHEDULE_SQL)

        logger.info("Getting month schedule values from cursor")
        month_schedule_values = dictfetchall(broker_cursor)

        logger.info("Running QUARTER_SCHEDULE_SQL")
        broker_cursor.execute(QUARTER_SCHEDULE_SQL)

        logger.info("Getting quarter schedule values from cursor")
        quarter_schedule_values = dictfetchall(broker_cursor)

        submission_schedule_objs = [DABSSubmissionWindowSchedule(**values) for values in month_schedule_values]
        submission_schedule_objs += [DABSSubmissionWindowSchedule(**values) for values in quarter_schedule_values]

        return submission_schedule_objs

    def read_schedules_from_csv(self, file_path):

        logger.info("Reading from file: {}".format(file_path))

        with RetrieveFileFromUri(file_path).get_file_object(True) as file:
            csv_reader = csv.DictReader(file)
            submission_schedule_objs = [DABSSubmissionWindowSchedule(**values) for values in csv_reader]
            return submission_schedule_objs