bdurand/us_geo

View on GitHub
data/lib/us_geo_data/gnis.rb

Summary

Maintainability
A
3 hrs
Test Coverage
# frozen_string_literal: true

module USGeoData
  class Gnis
    include Processor

    # See https://www.census.gov/library/reference/code-lists/class-codes.html
    # Ignoring C9 since these tend to be ghost towns
    FIPS_CLASSIFICATIONS = {
      "C1" => [:place],
      "C2" => [:place],
      "C3" => [:place],
      "C4" => [:place],
      "C5" => [:place, :subdivision],
      "C6" => [:place, :subdivision],
      "C7" => [:county, :place],
      "M1" => [:place],
      "M2" => [:place],
      "U1" => [:place],
      "U2" => [:place],
      "H1" => [:county],
      "H4" => [:county],
      "H5" => [:county],
      "H6" => [:county],
      "T1" => [:subdivision],
      "T2" => [:subdivision],
      "T5" => [:subdivision],
      "T9" => [:subdivision],
      "Z1" => [:subdivision],
      "Z2" => [:subdivision],
      "Z3" => [:subdivision],
      "Z5" => [:subdivision]
    }.freeze

    COUNTIES_FILE = "gnis_counties.csv"
    SUBDIVISIONS_FILE = "gnis_subdivisions.csv"
    PLACES_FILE = "gnis_places.csv"
    PLACE_COUNTIES_FILE = "gnis_place_counties.csv"

    # Parse out the data from the USGS names with federal codes file into more manageable chunks.
    def preprocess
      counties_file = File.open(processed_file(COUNTIES_FILE), "w")
      subdivisions_file = File.open(processed_file(SUBDIVISIONS_FILE), "w")
      places_file = File.open(processed_file(PLACES_FILE), "w")
      place_counties_file = File.open(processed_file(PLACE_COUNTIES_FILE), "w")

      begin
        counties_csv = CSV.new(counties_file)
        subdivisions_csv = CSV.new(subdivisions_file)
        places_csv = CSV.new(places_file)
        place_counties_csv = CSV.new(place_counties_file)

        counties_csv << ["GNIS ID", "GEOID", "Name", "Short Name", "State", "FIPS Class", "Latitude", "Longitude"]
        subdivisions_csv << ["GNIS ID", "GEOID", "Name", "State", "FIPS Class", "County GEOID", "Latitude", "Longitude"]
        places_csv << ["GNIS ID", "GEOID", "Name", "State", "FIPS Class", "County GEOID", "Latitude", "Longitude"]
        place_counties_csv << ["Place GEOID", "County GEOID"]

        foreach(data_file(USGeoData::GNIS_DATA_FILE), col_sep: "|", quote_char: nil) do |row|
          fips_class_code = row["census_class_code"]
          gnis_id = row["feature_id"].to_i
          name = row["feature_name"]
          state_fips = row["state_numeric"]
          state_code = lookup_state_code(row["state_name"])
          geoid = "#{state_fips}#{row["census_code"]}"
          county_geoid = "#{state_fips}#{row["county_numeric"]}"
          lat = row["prim_lat_dec"]
          lng = row["prim_long_dec"]
          county_num = row["county_sequence"].to_i

          if county?(fips_class_code)
            county_name = row["county_name"].to_s
            county_name = name if county_name.empty?
            counties_csv << [gnis_id, county_geoid, name, county_name, state_code, fips_class_code, lat, lng]
          end

          if subdivision?(fips_class_code) && county_num == 1
            geoid = "#{state_fips}#{row["county_numeric"]}#{row["census_code"]}"
            subdivisions_csv << [gnis_id, geoid, name, state_code, fips_class_code, county_geoid, lat, lng]
          end

          if place?(fips_class_code)
            if county_num == 1
              places_csv << [gnis_id, geoid, name, state_code, fips_class_code, county_geoid, lat, lng]
            end
            place_counties_csv << [geoid, county_geoid]
          end
        end
      ensure
        counties_file.close
        subdivisions_file.close
        places_file.close
        place_counties_file.close
      end
    end

    private

    def lookup_state_code(state_name)
      unless @states
        @states = {}
        foreach(data_file(USGeoData::STATES_FILE)) do |row|
          @states[row["Name"].upcase] = row["Code"]
        end
      end
      @states[state_name.upcase]
    end

    def county?(fips_class_code)
      FIPS_CLASSIFICATIONS[fips_class_code]&.include?(:county)
    end

    def subdivision?(fips_class_code)
      FIPS_CLASSIFICATIONS[fips_class_code]&.include?(:subdivision)
    end

    def place?(fips_class_code)
      FIPS_CLASSIFICATIONS[fips_class_code]&.include?(:place)
    end
  end
end