analysis-oakland-2014-10-27/acs/aggregate.py from codeforamerica/resident-web-use-research

analysis-oakland-2014-10-27/acs/aggregate.py
Summary

Maintainability

45 mins
Test Coverage

Issues
from __future__ import division
from csv import DictReader, reader, writer

bayarea_tracts = dict()

with open('tracts-bayarea.txt') as file:
    rows = DictReader(file, dialect='excel-tab')
    
    for row in rows:
        tract_geoid = row['State FIPS'] + row['County FIPS'] + row['Tract']
        bayarea_tracts[tract_geoid] = {
            'housing units': row['H00010001'],
            'total population': row['P0010001'],
            }
        
        if row['P0080001'] != '0':
            total = float(row['P0080001'])
            white = float(row['P0080003'])
            black = float(row['P0080004'])
            asian = float(row['P0080006'])
            other = total - white - black - asian
        
            bayarea_tracts[tract_geoid].update({
                'white percent': 100 * white / total,
                'black percent': 100 * black / total,
                'asian percent': 100 * asian / total,
                'other race percent': 100 * other / total,
                })
        
        if row['P0090001'] != '0':
            total = float(row['P0090001'])
            latin = float(row['P0090002'])
            other = total - latin
        
            bayarea_tracts[tract_geoid].update({
                'latin percent': 100 * latin / total,
                'nonlatin percent': 100 * other / total,
                })
        
        '''
        H1. HOUSING UNITS [1]
            Universe: Housing units
            Total H00010001 42 9

        P0010001 - TOTAL POPULATION
        
        P8. RACE [71]
            Universe: Total population
            Total: P0080001 03 9
            Population of one race: P0080002 03 9
                White alone P0080003 03 9
                Black or African American alone P0080004 03 9
                American Indian and Alaska Native alone P0080005 03 9
                Asian alone P0080006 03 9
                Native Hawaiian and Other Pacific Islander alone P0080007 03 9
                Some Other Race alone P0080008 03 9
                Two or More Races: P0080009 03 9

        P12. SEX BY AGE [49]
            Universe: Total population
            Total: P0120001 04 9
                Male: P0120002 04 9
                    Under 5 years P0120003 04 9
                    5 to 9 years P0120004 04 9
                    10 to 14 years P0120005 04 9
                    15 to 17 years P0120006 04 9
                    18 and 19 years P0120007 04 9
                    20 years P0120008 04 9
                    21 years P0120009 04 9
                    22 to 24 years P0120010 04 9
                    25 to 29 years P0120011 04 9
                    30 to 34 years P0120012 04 9
                    35 to 39 years P0120013 04 9
                    40 to 44 years P0120014 04 9
                    45 to 49 years P0120015 04 9
                    50 to 54 years P0120016 04 9
                    55 to 59 years P0120017 04 9
                    60 and 61 years P0120018 04 9
                    62 to 64 years P0120019 04 9
                    65 and 66 years P0120020 04 9
                    67 to 69 years P0120021 04 9
                    70 to 74 years P0120022 04 9
                    75 to 79 years P0120023 04 9
                    80 to 84 years P0120024 04 9
                    85 years and over P0120025 04 9
                Female: P0120026 04 9
                    Under 5 years P0120027 04 9
                    5 to 9 years P0120028 04 9
                    10 to 14 years P0120029 04 9
                    15 to 17 years P0120030 04 9
                    18 and 19 years P0120031 04 9
                    20 years P0120032 04 9
                    21 years P0120033 04 9
                    22 to 24 years P0120034 04 9
                    25 to 29 years P0120035 04 9
                    30 to 34 years P0120036 04 9
                    35 to 39 years P0120037 04 9
                    40 to 44 years P0120038 04 9
                    45 to 49 years P0120039 04 9
                    50 to 54 years P0120040 04 9
                    55 to 59 years P0120041 04 9
                    60 and 61 years P0120042 04 9
                    62 to 64 years P0120043 04 9
                    65 and 66 years P0120044 04 9
                    67 to 69 years P0120045 04 9
                    70 to 74 years P0120046 04 9
                    75 to 79 years P0120047 04 9
                    80 to 84 years P0120048 04 9
                    85 years and over P0120049 04 9

        '''

acs_tracts, acs_area_ids = dict(), set()

with open('g20125ca.txt') as file:
    ''' ACSSF CA080000003021     060019223053000401000                                                                                                                                    08000US060019223053000401000            Census Tract 4010, Oakland city, Oakland CCD, Alameda County, California                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  
    '''
    for line in file:
        summary_level = line[178:181]
        
        if summary_level != '140':
            # not a tract
            continue
        
        state_fips = line[25:27]
        county_fips = line[27:30]
        tract_id = line[40:46]
        tract_geoid = state_fips + county_fips + tract_id
        acs_area_id = line[13:20]
    
        acs_tracts[tract_geoid] = acs_area_id
        acs_area_ids.add(acs_area_id)

#
# quick idiot check for missing bay area tracts
#
if (set(bayarea_tracts.keys()) - set(acs_tracts.keys())):
    raise Exception()

acs_rows = dict()

with open('Sequence_Number_and_Table_Number_Lookup.txt') as file:
    for row in DictReader(file, dialect='excel'):
        if row['Table Title'] == 'MEDIAN HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2012 INFLATION-ADJUSTED DOLLARS)':
            income_column = int(row['Start Position']) - 1
            print '{0} ({1})'.format(income_column, row['Start Position'])

with open('e20125ca0058000.txt') as file:
    rows = reader(file, dialect='excel')
    
    for row in rows:
        acs_area_id = row[5]
        med_income = row[income_column]
        acs_rows[acs_area_id] = None if (med_income == '.') else int(med_income)

#
# quick idiot check for missing ACS area IDs
#
if (acs_area_ids - set(acs_rows.keys())):
    raise Exception()

#
# Now look up the actual stuff
#
with open('aggregated.csv', 'w') as file:
    out = writer(file, dialect='excel')
    out.writerow(('geoid', 'total population', 'housing units', 'median income',
                  'white percent', 'black percent', 'asian percent',
                  'other race percent', 'latin percent', 'non latin percent'))
    
    for (tract_id, sf1_data) in sorted(bayarea_tracts.items()):
        acs_area_id = acs_tracts[tract_id]
        args = tract_id, acs_area_id, acs_rows[acs_area_id], sf1_data['total population'], sf1_data['housing units']

        print 'Tract {0} ({1}) ${2}, {3} people, {4} homes'.format(*args), sf1_data

        row = (
            tract_id, sf1_data['total population'], sf1_data['housing units'],
            acs_rows[acs_area_id], sf1_data.get('white percent', None), sf1_data.get('black percent', None),
            sf1_data.get('asian percent', None), sf1_data.get('other race percent', None),
            sf1_data.get('latin percent', None), sf1_data.get('non latin percent', None)
            )
        out.writerow(row)