fedspendingtransparency/usaspending-api

View on GitHub
usaspending_api/etl/operations/federal_account/update_agency.py

Summary

Maintainability
A
0 mins
Test Coverage
A
100%
from usaspending_api.common.helpers.sql_helpers import execute_dml_sql


DOD_AID = "097"  # DOD's agency identifier.
DOD_SUBSUMED_AIDS = ("017", "021", "057")  # Air Force, Army, and Navy are to be reported under DOD.


# 1. If agency_id is for a DOD subsumed agency (Army, Navy, Air Force), remap it to 097 (DOD).
# 2. If agency_id is a valid CGAC toptier agency, map it to toptier agency using the agency_id.
# 3. If fr_entity_code is a valid FREC toptier agency, map it to toptier agency using the fr_entity_code.
# 4. If agency_id is a shared toptier agency (011, 033, etc) and does not have a toptier FREC, use
#    the associated_cgac_code to chose the toptier code.
# 5. In cases where we have multiple TAS with conflicting agencies, agency selection priority is
#    determined by the number of File A submissions for the agency followed by the number of TAS
#    associated with the agency.  In the event of ties, the smallest agency code value is chosen.  The
#    tiebreaker is largely irrelevant as it is just used to ensure deterministic results.
FEDERAL_ACCOUNT_PARENT_AGENCY_MAPPING = f"""
            select distinct on (taa.agency_id, taa.main_account_code)
                taa.agency_id as agency_identifier,
                taa.main_account_code,
                coalesce(
                    aid_cgac.toptier_agency_id,
                    aid_frec.toptier_agency_id,
                    aid_association.toptier_agency_id
                ) as parent_toptier_agency_id,
                coalesce(
                    aid_cgac.toptier_code,
                    aid_frec.toptier_code,
                    aid_association.toptier_code
                )::int as toptier_code_sorter
            from
                treasury_appropriation_account as taa
                left outer join toptier_agency as aid_cgac on
                    aid_cgac.toptier_code = case
                        when taa.agency_id in {DOD_SUBSUMED_AIDS} then '{DOD_AID}'
                        else taa.agency_id
                    end
                left outer join toptier_agency as aid_frec on
                    aid_frec.toptier_code = taa.fr_entity_code
                left outer join frec on
                    frec.frec_code = taa.fr_entity_code
                left outer join toptier_agency as aid_association on
                    aid_association.toptier_code = frec.associated_cgac_code
                left outer join appropriation_account_balances aab on
                    aab.treasury_account_identifier = taa.treasury_account_identifier
            group by
                taa.agency_id,
                taa.main_account_code,
                parent_toptier_agency_id,
                toptier_code_sorter
            order by
                taa.agency_id,
                taa.main_account_code,
                count(distinct aab.appropriation_account_balances_id) desc,
                count(distinct taa.treasury_account_identifier) desc,
                toptier_code_sorter
"""


def update_federal_account_agency():

    sql = f"""
        with
        federal_account_parent_agency_mapping as (
            {FEDERAL_ACCOUNT_PARENT_AGENCY_MAPPING}
        )
        update
            federal_account as fa
        set
            parent_toptier_agency_id = fapam.parent_toptier_agency_id
        from
            federal_account_parent_agency_mapping as fapam
        where
            fapam.agency_identifier = fa.agency_identifier and
            fapam.main_account_code = fa.main_account_code and
            fapam.parent_toptier_agency_id is distinct from fa.parent_toptier_agency_id
    """

    return execute_dml_sql(sql)