fedspendingtransparency/usaspending-api

View on GitHub
usaspending_api/etl/submission_loader_helpers/submission_ids.py

Summary

Maintainability
A
0 mins
Test Coverage
A
100%
from django.conf import settings
from usaspending_api.common.helpers.sql_helpers import execute_sql_to_named_tuple


def get_new_or_updated_submission_ids(since_datetime=None):
    """
    Identifies Broker submissions that need to be created or updated in USAspending and returns a
    list of their ids.
    """
    since_sql = f"and s.updated_at >= ''{since_datetime}''::timestamp" if since_datetime else ""
    sql = f"""
        select
            bs.submission_id
        from
            dblink(
                '{settings.DATA_BROKER_DBLINK_NAME}',
                '
                    select
                        s.submission_id,
                        (
                            select  max(updated_at)
                            from    publish_history
                            where   submission_id = s.submission_id
                        )::timestamptz as published_date,
                        (
                            select  max(updated_at)
                            from    certify_history
                            where   submission_id = s.submission_id
                        )::timestamptz as certified_date,
                        coalesce(s.cgac_code, s.frec_code) as toptier_code,
                        s.reporting_start_date,
                        s.reporting_end_date,
                        s.reporting_fiscal_year,
                        s.reporting_fiscal_period,
                        s.is_quarter_format
                    from
                        submission as s
                    where
                        s.is_fabs is false
                        and s.publish_status_id in (2, 3)
                        {since_sql}
                '
            ) as bs (
                submission_id integer,
                published_date timestamp with time zone,
                certified_date timestamp with time zone,
                toptier_code text,
                reporting_start_date date,
                reporting_end_date date,
                reporting_fiscal_year integer,
                reporting_fiscal_period integer,
                is_quarter_format boolean
            )
            left outer join submission_attributes sa on
                sa.submission_id = bs.submission_id
                and sa.published_date is not distinct from bs.published_date
                and sa.certified_date is not distinct from bs.certified_date
                and sa.toptier_code is not distinct from bs.toptier_code
                and sa.reporting_period_start is not distinct from bs.reporting_start_date
                and sa.reporting_period_end is not distinct from bs.reporting_end_date
                and sa.reporting_fiscal_year is not distinct from bs.reporting_fiscal_year
                and sa.reporting_fiscal_period is not distinct from bs.reporting_fiscal_period
                and sa.quarter_format_flag is not distinct from bs.is_quarter_format
        where
            sa.submission_id is null
    """

    rows = execute_sql_to_named_tuple(sql)
    return [r.submission_id for r in rows]