fedspendingtransparency/usaspending-api

View on GitHub
usaspending_api/search/delta_models/subaward_search.py

Summary

Maintainability
A
0 mins
Test Coverage
A
100%
SUBAWARD_SEARCH_COLUMNS = {
    # Broker Subaward Table Meta
    "broker_created_at": {"delta": "TIMESTAMP", "postgres": "TIMESTAMP"},
    "broker_updated_at": {"delta": "TIMESTAMP", "postgres": "TIMESTAMP"},
    "broker_subaward_id": {"delta": "LONG NOT NULL", "postgres": "BIGINT NOT NULL"},
    # Prime Award Fields (from Broker)
    "unique_award_key": {"delta": "STRING", "postgres": "TEXT"},
    "award_piid_fain": {"delta": "STRING", "postgres": "TEXT"},
    "parent_award_id": {"delta": "STRING", "postgres": "TEXT"},
    "award_amount": {"delta": "NUMERIC(23,2)", "postgres": "NUMERIC(23,2)"},
    "action_date": {"delta": "DATE", "postgres": "DATE"},
    "fy": {"delta": "STRING", "postgres": "TEXT"},
    "awarding_agency_code": {"delta": "STRING", "postgres": "TEXT"},
    "awarding_agency_name": {"delta": "STRING", "postgres": "TEXT"},
    "awarding_sub_tier_agency_c": {"delta": "STRING", "postgres": "TEXT"},
    "awarding_sub_tier_agency_n": {"delta": "STRING", "postgres": "TEXT"},
    "awarding_office_code": {"delta": "STRING", "postgres": "TEXT"},
    "awarding_office_name": {"delta": "STRING", "postgres": "TEXT"},
    "funding_agency_code": {"delta": "STRING", "postgres": "TEXT"},
    "funding_agency_name": {"delta": "STRING", "postgres": "TEXT"},
    "funding_sub_tier_agency_co": {"delta": "STRING", "postgres": "TEXT"},
    "funding_sub_tier_agency_na": {"delta": "STRING", "postgres": "TEXT"},
    "funding_office_code": {"delta": "STRING", "postgres": "TEXT"},
    "funding_office_name": {"delta": "STRING", "postgres": "TEXT"},
    "awardee_or_recipient_uniqu": {"delta": "STRING", "postgres": "TEXT"},
    "awardee_or_recipient_uei": {"delta": "STRING", "postgres": "TEXT"},
    "awardee_or_recipient_legal": {"delta": "STRING", "postgres": "TEXT"},
    "dba_name": {"delta": "STRING", "postgres": "TEXT"},
    "ultimate_parent_unique_ide": {"delta": "STRING", "postgres": "TEXT"},
    "ultimate_parent_uei": {"delta": "STRING", "postgres": "TEXT"},
    "ultimate_parent_legal_enti": {"delta": "STRING", "postgres": "TEXT"},
    "legal_entity_country_code": {"delta": "STRING", "postgres": "TEXT"},
    "legal_entity_country_name": {"delta": "STRING", "postgres": "TEXT"},
    "legal_entity_state_code": {"delta": "STRING", "postgres": "TEXT"},
    "legal_entity_state_name": {"delta": "STRING", "postgres": "TEXT"},
    "legal_entity_zip": {"delta": "STRING", "postgres": "TEXT"},
    "legal_entity_county_code": {"delta": "STRING", "postgres": "TEXT"},
    "legal_entity_county_name": {"delta": "STRING", "postgres": "TEXT"},
    "legal_entity_congressional": {"delta": "STRING", "postgres": "TEXT"},
    "legal_entity_foreign_posta": {"delta": "STRING", "postgres": "TEXT"},
    "legal_entity_city_name": {"delta": "STRING", "postgres": "TEXT"},
    "legal_entity_address_line1": {"delta": "STRING", "postgres": "TEXT"},
    "business_types": {"delta": "STRING", "postgres": "TEXT"},
    "place_of_perform_country_co": {"delta": "STRING", "postgres": "TEXT"},
    "place_of_perform_country_na": {"delta": "STRING", "postgres": "TEXT"},
    "place_of_perform_state_code": {"delta": "STRING", "postgres": "TEXT"},
    "place_of_perform_state_name": {"delta": "STRING", "postgres": "TEXT"},
    "place_of_performance_zip": {"delta": "STRING", "postgres": "TEXT"},
    "place_of_perform_county_code": {"delta": "STRING", "postgres": "TEXT"},
    "place_of_perform_county_name": {"delta": "STRING", "postgres": "TEXT"},
    "place_of_perform_congressio": {"delta": "STRING", "postgres": "TEXT"},
    "place_of_perform_city_name": {"delta": "STRING", "postgres": "TEXT"},
    "place_of_perform_street": {"delta": "STRING", "postgres": "TEXT"},
    "award_description": {"delta": "STRING", "postgres": "TEXT"},
    "naics": {"delta": "STRING", "postgres": "TEXT"},
    "naics_description": {"delta": "STRING", "postgres": "TEXT"},
    "cfda_numbers": {"delta": "STRING", "postgres": "TEXT"},
    "cfda_titles": {"delta": "STRING", "postgres": "TEXT"},
    # Subaward Fields (from Broker)
    "subaward_type": {"delta": "STRING", "postgres": "TEXT"},
    "subaward_report_year": {"delta": "SHORT NOT NULL", "postgres": "SMALLINT NOT NULL"},
    "subaward_report_month": {"delta": "SHORT NOT NULL", "postgres": "SMALLINT NOT NULL"},
    "subaward_number": {"delta": "STRING", "postgres": "TEXT"},
    "subaward_amount": {"delta": "NUMERIC(23,2)", "postgres": "NUMERIC(23,2)"},
    "sub_action_date": {"delta": "DATE", "postgres": "DATE"},
    "sub_awardee_or_recipient_uniqu": {"delta": "STRING", "postgres": "TEXT"},
    "sub_awardee_or_recipient_uei": {"delta": "STRING", "postgres": "TEXT"},
    "sub_awardee_or_recipient_legal_raw": {"delta": "STRING", "postgres": "TEXT"},
    "sub_dba_name": {"delta": "STRING", "postgres": "TEXT"},
    "sub_ultimate_parent_unique_ide": {"delta": "STRING", "postgres": "TEXT"},
    "sub_ultimate_parent_uei": {"delta": "STRING", "postgres": "TEXT"},
    "sub_ultimate_parent_legal_enti_raw": {"delta": "STRING", "postgres": "TEXT"},
    "sub_legal_entity_country_code_raw": {"delta": "STRING", "postgres": "TEXT"},
    "sub_legal_entity_country_name_raw": {"delta": "STRING", "postgres": "TEXT"},
    "sub_legal_entity_state_code": {"delta": "STRING", "postgres": "TEXT"},
    "sub_legal_entity_state_name": {"delta": "STRING", "postgres": "TEXT"},
    "sub_legal_entity_zip": {"delta": "STRING", "postgres": "TEXT"},
    "sub_legal_entity_county_code": {"delta": "STRING", "postgres": "TEXT"},
    "sub_legal_entity_county_name": {"delta": "STRING", "postgres": "TEXT"},
    "sub_legal_entity_congressional_raw": {"delta": "STRING", "postgres": "TEXT"},
    "sub_legal_entity_foreign_posta": {"delta": "STRING", "postgres": "TEXT"},
    "sub_legal_entity_city_name": {"delta": "STRING", "postgres": "TEXT"},
    "sub_legal_entity_address_line1": {"delta": "STRING", "postgres": "TEXT"},
    "sub_business_types": {"delta": "STRING", "postgres": "TEXT"},
    "sub_place_of_perform_country_co_raw": {"delta": "STRING", "postgres": "TEXT"},
    "sub_place_of_perform_country_na": {"delta": "STRING", "postgres": "TEXT"},
    "sub_place_of_perform_state_code": {"delta": "STRING", "postgres": "TEXT"},
    "sub_place_of_perform_state_name": {"delta": "STRING", "postgres": "TEXT"},
    "sub_place_of_performance_zip": {"delta": "STRING", "postgres": "TEXT"},
    "sub_place_of_perform_county_code": {"delta": "STRING", "postgres": "TEXT"},
    "sub_place_of_perform_county_name": {"delta": "STRING", "postgres": "TEXT"},
    "sub_place_of_perform_congressio_raw": {"delta": "STRING", "postgres": "TEXT"},
    "sub_place_of_perform_city_name": {"delta": "STRING", "postgres": "TEXT"},
    "sub_place_of_perform_street": {"delta": "STRING", "postgres": "TEXT"},
    "subaward_description": {"delta": "STRING", "postgres": "TEXT"},
    "sub_high_comp_officer1_full_na": {"delta": "STRING", "postgres": "TEXT"},
    "sub_high_comp_officer1_amount": {"delta": "NUMERIC(23,2)", "postgres": "NUMERIC(23,2)"},
    "sub_high_comp_officer2_full_na": {"delta": "STRING", "postgres": "TEXT"},
    "sub_high_comp_officer2_amount": {"delta": "NUMERIC(23,2)", "postgres": "NUMERIC(23,2)"},
    "sub_high_comp_officer3_full_na": {"delta": "STRING", "postgres": "TEXT"},
    "sub_high_comp_officer3_amount": {"delta": "NUMERIC(23,2)", "postgres": "NUMERIC(23,2)"},
    "sub_high_comp_officer4_full_na": {"delta": "STRING", "postgres": "TEXT"},
    "sub_high_comp_officer4_amount": {"delta": "NUMERIC(23,2)", "postgres": "NUMERIC(23,2)"},
    "sub_high_comp_officer5_full_na": {"delta": "STRING", "postgres": "TEXT"},
    "sub_high_comp_officer5_amount": {"delta": "NUMERIC(23,2)", "postgres": "NUMERIC(23,2)"},
    # Additional Prime Award Fields (from Broker)
    "prime_id": {"delta": "INTEGER", "postgres": "INTEGER"},
    "internal_id": {"delta": "STRING", "postgres": "TEXT"},
    "date_submitted": {"delta": "TIMESTAMP", "postgres": "TIMESTAMP"},
    "report_type": {"delta": "STRING", "postgres": "TEXT"},
    "transaction_type": {"delta": "STRING", "postgres": "TEXT"},
    "program_title": {"delta": "STRING", "postgres": "TEXT"},
    "contract_agency_code": {"delta": "STRING", "postgres": "TEXT"},
    "contract_idv_agency_code": {"delta": "STRING", "postgres": "TEXT"},
    "grant_funding_agency_id": {"delta": "STRING", "postgres": "TEXT"},
    "grant_funding_agency_name": {"delta": "STRING", "postgres": "TEXT"},
    "federal_agency_name": {"delta": "STRING", "postgres": "TEXT"},
    "treasury_symbol": {"delta": "STRING", "postgres": "TEXT"},
    "dunsplus4": {"delta": "STRING", "postgres": "TEXT"},
    "recovery_model_q1": {"delta": "BOOLEAN", "postgres": "BOOLEAN"},
    "recovery_model_q2": {"delta": "BOOLEAN", "postgres": "BOOLEAN"},
    "compensation_q1": {"delta": "BOOLEAN", "postgres": "BOOLEAN"},
    "compensation_q2": {"delta": "BOOLEAN", "postgres": "BOOLEAN"},
    "high_comp_officer1_full_na": {"delta": "STRING", "postgres": "TEXT"},
    "high_comp_officer1_amount": {"delta": "NUMERIC(23,2)", "postgres": "NUMERIC(23,2)"},
    "high_comp_officer2_full_na": {"delta": "STRING", "postgres": "TEXT"},
    "high_comp_officer2_amount": {"delta": "NUMERIC(23,2)", "postgres": "NUMERIC(23,2)"},
    "high_comp_officer3_full_na": {"delta": "STRING", "postgres": "TEXT"},
    "high_comp_officer3_amount": {"delta": "NUMERIC(23,2)", "postgres": "NUMERIC(23,2)"},
    "high_comp_officer4_full_na": {"delta": "STRING", "postgres": "TEXT"},
    "high_comp_officer4_amount": {"delta": "NUMERIC(23,2)", "postgres": "NUMERIC(23,2)"},
    "high_comp_officer5_full_na": {"delta": "STRING", "postgres": "TEXT"},
    "high_comp_officer5_amount": {"delta": "NUMERIC(23,2)", "postgres": "NUMERIC(23,2)"},
    # Additional Subaward Fields (from Broker)
    "sub_id": {"delta": "INTEGER", "postgres": "INTEGER"},
    "sub_parent_id": {"delta": "INTEGER", "postgres": "INTEGER"},
    "sub_federal_agency_id": {"delta": "STRING", "postgres": "TEXT"},
    "sub_federal_agency_name": {"delta": "STRING", "postgres": "TEXT"},
    "sub_funding_agency_id": {"delta": "STRING", "postgres": "TEXT"},
    "sub_funding_agency_name": {"delta": "STRING", "postgres": "TEXT"},
    "sub_funding_office_id": {"delta": "STRING", "postgres": "TEXT"},
    "sub_funding_office_name": {"delta": "STRING", "postgres": "TEXT"},
    "sub_naics": {"delta": "STRING", "postgres": "TEXT"},
    "sub_cfda_numbers": {"delta": "STRING", "postgres": "TEXT"},
    "sub_dunsplus4": {"delta": "STRING", "postgres": "TEXT"},
    "sub_recovery_subcontract_amt": {"delta": "STRING", "postgres": "TEXT"},
    "sub_recovery_model_q1": {"delta": "BOOLEAN", "postgres": "BOOLEAN"},
    "sub_recovery_model_q2": {"delta": "BOOLEAN", "postgres": "BOOLEAN"},
    "sub_compensation_q1": {"delta": "BOOLEAN", "postgres": "BOOLEAN"},
    "sub_compensation_q2": {"delta": "BOOLEAN", "postgres": "BOOLEAN"},
    # USAS Links (and associated derivations)
    "award_id": {"delta": "LONG", "postgres": "BIGINT"},
    "prime_award_group": {"delta": "STRING", "postgres": "TEXT"},
    "prime_award_type": {"delta": "STRING", "postgres": "TEXT"},
    "piid": {"delta": "STRING", "postgres": "TEXT"},
    "fain": {"delta": "STRING", "postgres": "TEXT"},
    "latest_transaction_id": {"delta": "LONG", "postgres": "BIGINT"},
    "last_modified_date": {"delta": "DATE", "postgres": "DATE"},
    "awarding_agency_id": {"delta": "INTEGER", "postgres": "INTEGER"},
    "awarding_toptier_agency_name": {"delta": "STRING", "postgres": "TEXT"},
    "awarding_toptier_agency_abbreviation": {"delta": "STRING", "postgres": "TEXT"},
    "awarding_subtier_agency_name": {"delta": "STRING", "postgres": "TEXT"},
    "awarding_subtier_agency_abbreviation": {"delta": "STRING", "postgres": "TEXT"},
    "funding_agency_id": {"delta": "INTEGER", "postgres": "INTEGER"},
    "funding_subtier_agency_abbreviation": {"delta": "STRING", "postgres": "TEXT"},
    "funding_subtier_agency_name": {"delta": "STRING", "postgres": "TEXT"},
    "funding_toptier_agency_abbreviation": {"delta": "STRING", "postgres": "TEXT"},
    "funding_toptier_agency_name": {"delta": "STRING", "postgres": "TEXT"},
    "cfda_id": {"delta": "INTEGER", "postgres": "INTEGER"},
    "cfda_number": {"delta": "STRING", "postgres": "TEXT"},
    "cfda_title": {"delta": "STRING", "postgres": "TEXT"},
    # USAS Derived Fields
    "sub_fiscal_year": {"delta": "INTEGER NOT NULL", "postgres": "INTEGER NOT NULL"},
    "sub_total_obl_bin": {"delta": "STRING NOT NULL", "postgres": "TEXT NOT NULL"},
    "sub_awardee_or_recipient_legal": {"delta": "STRING", "postgres": "TEXT"},
    "sub_ultimate_parent_legal_enti": {"delta": "STRING", "postgres": "TEXT"},
    "business_type_code": {"delta": "STRING", "postgres": "TEXT"},
    "business_categories": {"delta": "ARRAY<STRING>", "postgres": "TEXT[]"},
    "treasury_account_identifiers": {"delta": "ARRAY<INTEGER>", "postgres": "INTEGER[]"},
    "pulled_from": {"delta": "STRING", "postgres": "TEXT"},
    "type_of_contract_pricing": {"delta": "STRING", "postgres": "TEXT"},
    "type_set_aside": {"delta": "STRING", "postgres": "TEXT"},
    "extent_competed": {"delta": "STRING", "postgres": "TEXT"},
    "product_or_service_code": {"delta": "STRING", "postgres": "TEXT"},
    "product_or_service_description": {"delta": "STRING", "postgres": "TEXT"},
    "legal_entity_congressional_current": {"delta": "STRING", "postgres": "TEXT"},
    "sub_legal_entity_country_code": {"delta": "STRING", "postgres": "TEXT"},
    "sub_legal_entity_country_name": {"delta": "STRING", "postgres": "TEXT"},
    "sub_legal_entity_zip5": {"delta": "STRING", "postgres": "TEXT"},
    "sub_legal_entity_city_code": {"delta": "STRING", "postgres": "TEXT"},
    "sub_legal_entity_congressional": {"delta": "STRING", "postgres": "TEXT"},
    "sub_legal_entity_congressional_current": {"delta": "STRING", "postgres": "TEXT"},
    "place_of_performance_congressional_current": {"delta": "STRING", "postgres": "TEXT"},
    "place_of_perform_scope": {"delta": "STRING", "postgres": "TEXT"},
    "sub_place_of_perform_country_co": {"delta": "STRING", "postgres": "TEXT"},
    "sub_place_of_perform_country_name": {"delta": "STRING", "postgres": "TEXT"},
    "sub_place_of_perform_zip5": {"delta": "STRING", "postgres": "TEXT"},
    "sub_place_of_perform_city_code": {"delta": "STRING", "postgres": "TEXT"},
    "sub_place_of_perform_congressio": {"delta": "STRING", "postgres": "TEXT"},
    "sub_place_of_performance_congressional_current": {"delta": "STRING", "postgres": "TEXT"},
    "legal_entity_state_fips": {"delta": "STRING", "postgres": "TEXT"},
    "place_of_perform_state_fips": {"delta": "STRING", "postgres": "TEXT"},
    "legal_entity_county_fips": {"delta": "STRING", "postgres": "TEXT"},
    "place_of_perform_county_fips": {"delta": "STRING", "postgres": "TEXT"},
    "pop_county_name": {"delta": "STRING", "postgres": "TEXT"},
}
SUBAWARD_SEARCH_POSTGRES_VECTORS = {
    "keyword_ts_vector": ["sub_awardee_or_recipient_legal", "product_or_service_description", "subaward_description"],
    "award_ts_vector": ["award_piid_fain", "subaward_number"],
    "recipient_name_ts_vector": ["sub_awardee_or_recipient_legal"],
}
SUBAWARD_SEARCH_DELTA_COLUMNS = {k: v["delta"] for k, v in SUBAWARD_SEARCH_COLUMNS.items()}
SUBAWARD_SEARCH_POSTGRES_COLUMNS = {
    **{k: v["postgres"] for k, v in SUBAWARD_SEARCH_COLUMNS.items()},
    **{col: "TSVECTOR" for col in SUBAWARD_SEARCH_POSTGRES_VECTORS},
}


subaward_search_create_sql_string = rf"""
    CREATE OR REPLACE TABLE {{DESTINATION_TABLE}} (
        {", ".join([f'{key} {val}' for key, val in SUBAWARD_SEARCH_DELTA_COLUMNS.items()])}
    )
    USING DELTA
    LOCATION 's3a://{{SPARK_S3_BUCKET}}/{{DELTA_LAKE_S3_PATH}}/{{DESTINATION_DATABASE}}/{{DESTINATION_TABLE}}'
"""

subaward_search_load_sql_string = fr"""
    WITH location_summary AS (
        SELECT
            UPPER(feature_name) as feature_name,
            state_alpha,
            county_numeric,
            UPPER(county_name) as county_name,
            census_code,
            ROW_NUMBER() OVER (PARTITION BY UPPER(feature_name), state_alpha ORDER BY UPPER(feature_name), state_alpha, county_sequence, coalesce(date_edited, date_created) DESC, id DESC) as row_num
        FROM
            global_temp.ref_city_county_state_code
        WHERE
            feature_class = 'Populated Place'
            AND COALESCE(feature_name, '') <>  ''
            AND COALESCE(state_alpha, '') <> ''
    ),
    recipient_summary AS (
        SELECT
          legal_business_name AS recipient_name,
          uei,
          duns,
          ROW_NUMBER() OVER(PARTITION BY uei ORDER BY uei, duns NULLS LAST, legal_business_name NULLS LAST) AS row
        FROM
            rpt.recipient_lookup AS rlv
    ),
    tas_summary AS (
        SELECT
            faba.award_id,
            SORT_ARRAY(COLLECT_SET(CAST(taa.treasury_account_identifier AS INTEGER))) AS treasury_account_identifiers
        FROM
            global_temp.treasury_appropriation_account AS taa
        INNER JOIN
            int.financial_accounts_by_awards AS faba
                ON taa.treasury_account_identifier = faba.treasury_account_id
        WHERE
            faba.award_id IS NOT NULL
        GROUP BY
            faba.award_id
    ),
    state_fips AS (
        SELECT
            fips,
            code as state_code
        FROM
            global_temp.state_data
        GROUP BY
            fips,
            state_code
    ),
    county_fips AS (
        SELECT
            state_numeric,
            county_numeric,
            county_name,
            state_alpha
        FROM
            global_temp.ref_city_county_state_code
        GROUP BY
            state_numeric,
            county_numeric,
            county_name,
            state_alpha
    ),
    cd_city_grouped_rownum AS (
      SELECT
        *,
        ROW_NUMBER() OVER (PARTITION BY(city_name, state_abbreviation) ORDER BY city_name, state_abbreviation ASC) AS row_num
      FROM global_temp.cd_city_grouped
    ),
    cd_city_grouped_distinct AS (
        SELECT
            city_name,
            state_abbreviation,
            congressional_district_no
        FROM cd_city_grouped_rownum
        WHERE row_num = 1
    )
    INSERT OVERWRITE {{DESTINATION_DATABASE}}.{{DESTINATION_TABLE}}
    (
        {", ".join([key for key in SUBAWARD_SEARCH_DELTA_COLUMNS])}
    )
    SELECT
        -- Broker Subaward Table Meta
        bs.created_at AS broker_created_at,
        bs.updated_at AS broker_updated_at,
        CAST(bs.id AS LONG) AS broker_subaward_id,

        -- Prime Award Fields (from Broker)
        bs.unique_award_key,
        bs.award_id AS award_piid_fain,
        bs.parent_award_id,
        CAST(bs.award_amount AS NUMERIC(23,2)),
        CAST(bs.action_date AS DATE),
        bs.fy,
        bs.awarding_agency_code,
        bs.awarding_agency_name,
        bs.awarding_sub_tier_agency_c,
        bs.awarding_sub_tier_agency_n,
        bs.awarding_office_code,
        bs.awarding_office_name,
        bs.funding_agency_code,
        bs.funding_agency_name,
        bs.funding_sub_tier_agency_co,
        bs.funding_sub_tier_agency_na,
        bs.funding_office_code,
        bs.funding_office_name,
        bs.awardee_or_recipient_uniqu,
        bs.awardee_or_recipient_uei,
        UPPER(bs.awardee_or_recipient_legal) AS awardee_or_recipient_legal,
        bs.dba_name,
        bs.ultimate_parent_unique_ide,
        bs.ultimate_parent_uei,
        bs.ultimate_parent_legal_enti,
        bs.legal_entity_country_code,
        bs.legal_entity_country_name,
        bs.legal_entity_state_code,
        bs.legal_entity_state_name,
        bs.legal_entity_zip,
        bs.legal_entity_county_code,
        bs.legal_entity_county_name,
        bs.legal_entity_congressional,
        bs.legal_entity_foreign_posta,
        bs.legal_entity_city_name,
        bs.legal_entity_address_line1,
        bs.business_types,
        bs.place_of_perform_country_co,
        bs.place_of_perform_country_na,
        bs.place_of_perform_state_code,
        bs.place_of_perform_state_name,
        bs.place_of_performance_zip,
        bs.place_of_performance_county_code AS place_of_perform_county_code,
        bs.place_of_performance_county_name AS place_of_perform_county_name,
        bs.place_of_perform_congressio,
        bs.place_of_perform_city_name,
        bs.place_of_perform_street,
        bs.award_description,
        bs.naics,
        bs.naics_description,
        bs.assistance_listing_numbers AS cfda_numbers,
        bs.assistance_listing_titles AS cfda_titles,

        -- Subaward Fields (from Broker)
        bs.subaward_type,
        CAST(bs.subaward_report_year AS SHORT),
        CAST(bs.subaward_report_month AS SHORT),
        UPPER(bs.subaward_number) AS subaward_number,
        COALESCE(CAST(bs.subaward_amount AS NUMERIC(23,2)), 0.00),
        CAST(bs.sub_action_date AS DATE),
        UPPER(bs.sub_awardee_or_recipient_uniqu) AS sub_awardee_or_recipient_uniqu,
        UPPER(bs.sub_awardee_or_recipient_uei) AS sub_awardee_or_recipient_uei,
        UPPER(bs.sub_awardee_or_recipient_legal) AS sub_awardee_or_recipient_legal_raw,
        UPPER(bs.sub_dba_name) AS sub_dba_name,
        UPPER(bs.sub_ultimate_parent_unique_ide) AS sub_ultimate_parent_unique_ide,
        UPPER(bs.sub_ultimate_parent_uei) AS sub_ultimate_parent_uei,
        UPPER(bs.sub_ultimate_parent_legal_enti) AS sub_ultimate_parent_legal_enti_raw,
        UPPER(bs.sub_legal_entity_country_code) AS sub_legal_entity_country_code_raw,
        bs.sub_legal_entity_country_name AS sub_legal_entity_country_name_raw,
        UPPER(bs.sub_legal_entity_state_code) AS sub_legal_entity_state_code,
        UPPER(bs.sub_legal_entity_state_name) AS sub_legal_entity_state_name,
        bs.sub_legal_entity_zip,
        bs.sub_legal_entity_county_code,
        UPPER(bs.sub_legal_entity_county_name) AS sub_legal_entity_county_name,
        UPPER(bs.sub_legal_entity_congressional) AS sub_legal_entity_congressional_raw,
        bs.sub_legal_entity_foreign_posta,
        UPPER(bs.sub_legal_entity_city_name) AS sub_legal_entity_city_name,
        UPPER(bs.sub_legal_entity_address_line1) AS sub_legal_entity_address_line1,
        UPPER(bs.sub_business_types) AS sub_business_types,
        UPPER(bs.sub_place_of_perform_country_co) AS sub_place_of_perform_country_co_raw,
        bs.sub_place_of_perform_country_na,
        UPPER(bs.sub_place_of_perform_state_code) AS sub_place_of_perform_state_code,
        UPPER(bs.sub_place_of_perform_state_name) AS sub_place_of_perform_state_name,
        bs.sub_place_of_performance_zip,
        bs.sub_place_of_performance_county_code AS sub_place_of_perform_county_code,
        UPPER(bs.sub_place_of_performance_county_name) AS sub_place_of_perform_county_name,
        UPPER(bs.sub_place_of_perform_congressio) AS sub_place_of_perform_congressio_raw,
        UPPER(bs.sub_place_of_perform_city_name) AS sub_place_of_perform_city_name,
        UPPER(bs.sub_place_of_perform_street) AS sub_place_of_perform_street,
        UPPER(bs.subaward_description) AS subaward_description,
        bs.sub_high_comp_officer1_full_na,
        CAST(bs.sub_high_comp_officer1_amount AS NUMERIC(23,2)),
        bs.sub_high_comp_officer2_full_na,
        CAST(bs.sub_high_comp_officer2_amount AS NUMERIC(23,2)),
        bs.sub_high_comp_officer3_full_na,
        CAST(bs.sub_high_comp_officer3_amount AS NUMERIC(23,2)),
        bs.sub_high_comp_officer4_full_na,
        CAST(bs.sub_high_comp_officer4_amount AS NUMERIC(23,2)),
        bs.sub_high_comp_officer5_full_na,
        CAST(bs.sub_high_comp_officer5_amount AS NUMERIC(23,2)),

        -- Additional Prime Award Fields (from Broker)
        bs.prime_id,
        UPPER(bs.internal_id) AS internal_id,
        CAST(bs.date_submitted AS TIMESTAMP),
        bs.report_type,
        bs.transaction_type,
        bs.program_title,
        bs.contract_agency_code,
        bs.contract_idv_agency_code,
        bs.grant_funding_agency_id,
        bs.grant_funding_agency_name,
        bs.federal_agency_name,
        bs.treasury_symbol,
        bs.dunsplus4,
        CAST(bs.recovery_model_q1 AS BOOLEAN),
        CAST(bs.recovery_model_q2 AS BOOLEAN),
        CAST(bs.compensation_q1 AS BOOLEAN),
        CAST(bs.compensation_q2 AS BOOLEAN),
        bs.high_comp_officer1_full_na,
        CAST(bs.high_comp_officer1_amount AS NUMERIC(23,2)),
        bs.high_comp_officer2_full_na,
        CAST(bs.high_comp_officer2_amount AS NUMERIC(23,2)),
        bs.high_comp_officer3_full_na,
        CAST(bs.high_comp_officer3_amount AS NUMERIC(23,2)),
        bs.high_comp_officer4_full_na,
        CAST(bs.high_comp_officer4_amount AS NUMERIC(23,2)),
        bs.high_comp_officer5_full_na,
        CAST(bs.high_comp_officer5_amount AS NUMERIC(23,2)),

        -- Additional Subaward Fields (from Broker)
        bs.sub_id,
        bs.sub_parent_id,
        bs.sub_federal_agency_id,
        bs.sub_federal_agency_name,
        bs.sub_funding_agency_id,
        bs.sub_funding_agency_name,
        bs.sub_funding_office_id,
        bs.sub_funding_office_name,
        bs.sub_naics,
        bs.sub_assistance_listing_numbers AS sub_cfda_numbers,
        bs.sub_dunsplus4,
        bs.sub_recovery_subcontract_amt,
        CAST(bs.sub_recovery_model_q1 AS BOOLEAN),
        CAST(bs.sub_recovery_model_q2 AS BOOLEAN),
        CAST(bs.sub_compensation_q1 AS BOOLEAN),
        CAST(bs.sub_compensation_q2 AS BOOLEAN),

        -- USAS Links (and associated derivations)
        a.id AS award_id,
        CASE
          WHEN bs.subaward_type = 'sub-grant' THEN 'grant'
          WHEN bs.subaward_type = 'sub-contract' THEN 'procurement'
          ELSE NULL
        END AS prime_award_group,
        a.type AS prime_award_type,
        CASE
            WHEN bs.subaward_type = 'sub-contract' THEN bs.award_id
            ELSE NULL
        END AS piid,
        CASE
          WHEN bs.subaward_type = 'sub-grant' THEN bs.award_id
          ELSE NULL
        END AS fain,
        a.latest_transaction_id,
        a.last_modified_date,

        a.awarding_agency_id,
        taa.name AS awarding_toptier_agency_name,
        taa.abbreviation AS awarding_toptier_agency_abbreviation,
        saa.name AS awarding_subtier_agency_name,
        saa.abbreviation AS awarding_subtier_agency_abbreviation,
        a.funding_agency_id,
        sfa.abbreviation AS funding_subtier_agency_abbreviation,
        sfa.name AS funding_subtier_agency_name,
        tfa.abbreviation AS funding_toptier_agency_abbreviation,
        tfa.name AS funding_toptier_agency_name,

        cfda.id AS cfda_id,
        cfda.program_number AS cfda_number,
        cfda.program_title AS cfda_title,

        -- USAS Derived Fields
        YEAR(CAST(bs.sub_action_date AS DATE) + interval '3 months') AS sub_fiscal_year,
        CASE
              WHEN CAST(bs.subaward_amount AS NUMERIC(23,2)) IS NULL THEN NULL
              WHEN COALESCE(CAST(bs.subaward_amount AS NUMERIC(23,2)), 0.00) < 1000000.0 THEN '<1M'         -- under $1 million
              WHEN COALESCE(CAST(bs.subaward_amount AS NUMERIC(23,2)), 0.00) = 1000000.0 THEN '1M'          -- $1 million
              WHEN COALESCE(CAST(bs.subaward_amount AS NUMERIC(23,2)), 0.00) < 25000000.0 THEN '1M..25M'     -- under $25 million
              WHEN COALESCE(CAST(bs.subaward_amount AS NUMERIC(23,2)), 0.00) = 25000000.0 THEN '25M'         -- $25 million
              WHEN COALESCE(CAST(bs.subaward_amount AS NUMERIC(23,2)), 0.00) < 100000000.0 THEN '25M..100M'   -- under $100 million
              WHEN COALESCE(CAST(bs.subaward_amount AS NUMERIC(23,2)), 0.00) = 100000000.0 THEN '100M'        -- $100 million
              WHEN COALESCE(CAST(bs.subaward_amount AS NUMERIC(23,2)), 0.00) < 500000000.0 THEN '100M..500M'  -- under $500 million
              WHEN COALESCE(CAST(bs.subaward_amount AS NUMERIC(23,2)), 0.00) = 500000000.0 THEN '500M'        -- $500 million
              ELSE '>500M'                               --  over $500 million
        END AS sub_total_obl_bin,
        UPPER(COALESCE(recipient_lookup.recipient_name, bs.sub_awardee_or_recipient_legal)) AS sub_awardee_or_recipient_legal,
        UPPER(COALESCE(parent_recipient_lookup.recipient_name, bs.sub_ultimate_parent_legal_enti)) AS sub_ultimate_parent_legal_enti,
        NULL AS business_type_code,
        COALESCE(tn.business_categories, array()) AS business_categories,
        tas.treasury_account_identifiers,
        fpds.pulled_from,
        fpds.type_of_contract_pricing,
        fpds.type_set_aside,
        fpds.extent_competed,
        fpds.product_or_service_code,
        psc.description AS product_or_service_description,

        LATEST_CURRENT_CD.recipient_location_congressional_code_current AS legal_entity_congressional_current,
        COALESCE(UPPER(bs.sub_legal_entity_country_code), 'USA') AS sub_legal_entity_country_code,
        rcc.country_name AS sub_legal_entity_country_name,
        LEFT(COALESCE(bs.sub_legal_entity_zip, ''), 5) AS sub_legal_entity_zip5,
        rec.census_code AS sub_legal_entity_city_code,
        LPAD(CAST(CAST(REGEXP_EXTRACT(UPPER(bs.sub_legal_entity_congressional), '^[A-Z]*(\\d+)(?:\\.\\d+)?$', 1) AS SHORT) AS STRING), 2, '0') AS sub_legal_entity_congressional,
        (CASE
            WHEN (
                UPPER(bs.sub_legal_entity_country_code) <> 'USA'
            ) THEN NULL
            ELSE COALESCE(rl_cd_state_grouped.congressional_district_no, rl_zips.congressional_district_no, rl_cd_zips_grouped.congressional_district_no, rl_cd_city_grouped.congressional_district_no, rl_cd_county_grouped.congressional_district_no)
        END) AS sub_legal_entity_congressional_current,

        LATEST_CURRENT_CD.pop_congressional_code_current AS place_of_performance_congressional_current,
        fabs.place_of_performance_scope AS place_of_perform_scope,
        COALESCE(UPPER(bs.sub_place_of_perform_country_co), 'USA') AS sub_place_of_perform_country_co,
        pcc.country_name AS sub_place_of_perform_country_name,
        LEFT(COALESCE(sub_place_of_performance_zip, ''), 5) AS sub_place_of_perform_zip5,
        pop.census_code AS sub_place_of_perform_city_code,
        LPAD(CAST(CAST(REGEXP_EXTRACT(UPPER(bs.sub_place_of_perform_congressio), '^[A-Z]*(\\d+)(?:\\.\\d+)?$', 1) AS SHORT) AS STRING), 2, '0') AS sub_place_of_perform_congressio,
        (CASE
            WHEN (
                UPPER(bs.sub_place_of_perform_country_co) <> 'USA'
            ) THEN NULL
            ELSE COALESCE(pop_cd_state_grouped.congressional_district_no, pop_zips.congressional_district_no, pop_cd_zips_grouped.congressional_district_no, pop_cd_city_grouped.congressional_district_no, pop_cd_county_grouped.congressional_district_no)
        END) AS sub_place_of_performance_congressional_current,
        rl_state_fips.fips AS legal_entity_state_fips,
        pop_state_fips.fips AS place_of_perform_state_fips,
        CONCAT(rl_state_fips.fips, rl_county_fips.county_numeric) AS legal_entity_county_fips,
        CONCAT(pop_state_fips.fips, pop_county_fips.county_numeric) AS place_of_perform_county_fips,
        UPPER(COALESCE(fpds.place_of_perform_county_na, fabs.place_of_perform_county_na)) AS pop_county_name
    FROM
        raw.subaward AS bs
    LEFT OUTER JOIN
        int.awards AS a
            ON a.generated_unique_award_id = bs.unique_award_key
    LEFT OUTER JOIN
        int.transaction_normalized AS tn
            ON tn.id = a.latest_transaction_id
    LEFT OUTER JOIN
        int.transaction_fpds AS fpds
            ON fpds.transaction_id = a.latest_transaction_id
    LEFT OUTER JOIN
        int.transaction_fabs AS fabs
            ON fabs.transaction_id = a.latest_transaction_id
    LEFT OUTER JOIN
        global_temp.agency AS aa
            ON aa.id = a.awarding_agency_id
    LEFT OUTER JOIN
        global_temp.toptier_agency AS taa
            ON taa.toptier_agency_id = aa.toptier_agency_id
    LEFT OUTER JOIN
        global_temp.subtier_agency AS saa
            ON saa.subtier_agency_id = aa.subtier_agency_id
    LEFT OUTER JOIN
        global_temp.agency AS fa
            ON fa.id = a.funding_agency_id
    LEFT OUTER JOIN
        global_temp.toptier_agency AS tfa
            ON tfa.toptier_agency_id = fa.toptier_agency_id
    LEFT OUTER JOIN
        global_temp.subtier_agency AS sfa
            ON sfa.subtier_agency_id = fa.subtier_agency_id
    LEFT OUTER JOIN
        tas_summary AS tas
            ON tas.award_id = a.id
    LEFT OUTER JOIN
        recipient_summary AS recipient_lookup
            ON (recipient_lookup.uei = UPPER(bs.sub_awardee_or_recipient_uei)
                AND bs.sub_awardee_or_recipient_uei IS NOT NULL AND recipient_lookup.row = 1)
    LEFT OUTER JOIN
        recipient_summary AS parent_recipient_lookup
            ON (parent_recipient_lookup.uei = UPPER(bs.sub_ultimate_parent_uei)
                AND bs.sub_ultimate_parent_uei IS NOT NULL AND parent_recipient_lookup.row = 1)
    LEFT OUTER JOIN
        int.transaction_current_cd_lookup AS LATEST_CURRENT_CD
            ON a.latest_transaction_id = LATEST_CURRENT_CD.transaction_id
    LEFT OUTER JOIN
        location_summary AS pop
            ON (pop.feature_name = UPPER(bs.sub_place_of_perform_city_name)
                AND pop.state_alpha = UPPER(bs.sub_place_of_perform_state_code)
                AND pop.row_num = 1)
    LEFT OUTER JOIN
        location_summary AS rec
            ON (rec.feature_name = UPPER(bs.sub_legal_entity_city_name)
                AND rec.state_alpha = UPPER(bs.sub_legal_entity_state_code)
                AND rec.row_num = 1)
    LEFT OUTER JOIN
        global_temp.ref_country_code AS pcc
            ON (pcc.country_code = UPPER(bs.sub_place_of_perform_country_co)
                AND bs.sub_place_of_perform_country_co IS NOT NULL)
    LEFT OUTER JOIN
        global_temp.ref_country_code AS rcc
            ON (rcc.country_code = UPPER(bs.sub_legal_entity_country_code)
                AND bs.sub_legal_entity_country_code IS NOT NULL)
    -- Congressional District '90' represents multiple congressional districts
    LEFT OUTER JOIN
        global_temp.cd_state_grouped pop_cd_state_grouped ON (
            pop_cd_state_grouped.state_abbreviation=UPPER(bs.sub_place_of_perform_state_code)
            AND pop_cd_state_grouped.congressional_district_no <> '90'
        )
    LEFT OUTER JOIN
        global_temp.cd_state_grouped rl_cd_state_grouped ON (
            rl_cd_state_grouped.state_abbreviation=UPPER(bs.sub_legal_entity_state_code)
            AND rl_cd_state_grouped.congressional_district_no <> '90'
        )
    LEFT OUTER JOIN
        raw.zips pop_zips ON (
            pop_zips.zip5=LEFT(COALESCE(sub_place_of_performance_zip, ''), 5)
            AND pop_zips.zip_last4=RIGHT(COALESCE(sub_place_of_performance_zip, ''), 4)
        )
    LEFT OUTER JOIN
        raw.zips rl_zips ON (
            rl_zips.zip5=LEFT(COALESCE(bs.sub_legal_entity_zip, ''), 5)
            AND rl_zips.zip_last4=RIGHT(COALESCE(bs.sub_legal_entity_zip, ''), 4)
        )
    LEFT OUTER JOIN
        global_temp.cd_zips_grouped pop_cd_zips_grouped ON (
            pop_cd_zips_grouped.zip5=LEFT(COALESCE(sub_place_of_performance_zip, ''), 5)
            AND pop_cd_zips_grouped.state_abbreviation=UPPER(bs.sub_place_of_perform_state_code)
        )
    LEFT OUTER JOIN
        global_temp.cd_zips_grouped rl_cd_zips_grouped ON (
            rl_cd_zips_grouped.zip5=LEFT(COALESCE(bs.sub_legal_entity_zip, ''), 5)
            AND rl_cd_zips_grouped.state_abbreviation=UPPER(bs.sub_legal_entity_state_code)
        )
    LEFT OUTER JOIN
        cd_city_grouped_distinct pop_cd_city_grouped ON (
            pop_cd_city_grouped.city_name=UPPER(bs.sub_place_of_perform_city_name)
            AND pop_cd_city_grouped.state_abbreviation=UPPER(bs.sub_place_of_perform_state_code)
        )
    LEFT OUTER JOIN
        cd_city_grouped_distinct rl_cd_city_grouped ON (
            rl_cd_city_grouped.city_name=UPPER(bs.sub_legal_entity_city_name)
            AND rl_cd_city_grouped.state_abbreviation=UPPER(bs.sub_legal_entity_state_code)
        )
    LEFT OUTER JOIN
        global_temp.cd_county_grouped pop_cd_county_grouped ON (
            pop_cd_county_grouped.county_number=LPAD(CAST(CAST(REGEXP_EXTRACT(pop.county_numeric, '^[A-Z]*(\\d+)(?:\\.\\d+)?$', 1) AS SHORT) AS STRING), 3, '0')
            AND pop_cd_county_grouped.state_abbreviation=UPPER(bs.sub_place_of_perform_state_code)
        )
    LEFT OUTER JOIN
        global_temp.cd_county_grouped rl_cd_county_grouped ON (
            rl_cd_county_grouped.county_number=LPAD(CAST(CAST(REGEXP_EXTRACT(rec.county_numeric, '^[A-Z]*(\\d+)(?:\\.\\d+)?$', 1) AS SHORT) AS STRING), 3, '0')
            AND rl_cd_county_grouped.state_abbreviation=UPPER(bs.sub_legal_entity_state_code)
        )

    LEFT OUTER JOIN
        global_temp.psc
            ON fpds.product_or_service_code = psc.code
    LEFT OUTER JOIN
        global_temp.references_cfda AS cfda
            ON cfda.program_number = split(bs.assistance_listing_numbers, ',')[0]
    LEFT OUTER JOIN state_fips AS pop_state_fips
        ON pop_state_fips.state_code = bs.place_of_perform_state_code
    LEFT OUTER JOIN state_fips AS rl_state_fips
        ON rl_state_fips.state_code = bs.legal_entity_state_code
    LEFT OUTER JOIN county_fips AS pop_county_fips
        ON UPPER(pop_county_fips.county_name) = UPPER(COALESCE(fpds.place_of_perform_county_na, fabs.place_of_perform_county_na))
            AND pop_county_fips.state_alpha = bs.place_of_perform_state_code
    LEFT OUTER JOIN county_fips AS rl_county_fips
        ON UPPER(rl_county_fips.county_name) = UPPER(COALESCE(fpds.legal_entity_county_name, fabs.legal_entity_county_name))
            AND rl_county_fips.state_alpha = bs.legal_entity_state_code
    -- Subaward numbers are crucial for identifying subawards and so those without subaward numbers won't be surfaced.
    WHERE bs.subaward_number IS NOT NULL
"""