MakeYourLaws/MakeYourLaws

View on GitHub
app/models/fec/committee.rb

Summary

Maintainability
C
1 day
Test Coverage
class Fec::Committee < ActiveRecord::Base
  self.table_name = 'fec_committees' # use namespaced table
  self.inheritance_column = :_type_disabled # disable STI
  FILES_DIR = Rails.root.join('db', 'data', 'fec', 'committee_master_files')

  has_paper_trail
  # attr_protected :id, :created_at, :fec_id

  # source: ftp://ftp.fec.gov/FEC/cm12.zip - updated weekly
  #                                                                      length start end
  validates :fec_id, uniqueness: true, length: { is: 9 }, presence: true #  9    0   8
  validates :name, presence: true, length: { maximum: 90 }              # 90    9  98
  validates :treasurer_name, length: { maximum: 38 }                    # 38   99 136
  validates :street_1, :street_2, length: { maximum: 34 }               # 34  137 170 street 1
  #                                                                       34  171 204 street 2
  validates :city, length: { maximum: 18 }                              # 18  205 222
  validates :state, length: { maximum: 2 }                              #  2  223 224
  validates :zip, length: { maximum: 5 }                                #  5  225 229
  validates :designation, allow_nil: true,                              #  1  230 230
                          inclusion: { in: Fec::CommitteeDesignation::TYPES.keys }
  validates :type, inclusion: { in: Fec::CommitteeType::TYPES.keys }    #  1  231 231
  validates :party, length: { maximum: 3 }                              #  3  232 234
  validates :filing_frequency, allow_nil: true,                         #  1  235 235
                               inclusion: { in: Fec::CommitteeFilingFrequency::TYPES.keys }
  validates :interest_group_category,                                   #  1  236 236
    allow_nil: true, inclusion: { in: Fec::CommitteeInterestGroupCategory::TYPES.keys }
  validates :connected_organization_name, length: { maximum: 38 }       # 38  237 274

  # if committee type H S or P
  validates :candidate_id, length: { is: 9 }, allow_nil: true           #  9  275 283

  # not very reliable, since the name is often partial
  belongs_to :connected_organization, foreign_key: 'connected_organization_name',
    primary_key: 'name', class_name: 'Fec::Committee'

  belongs_to :candidate, primary_key: 'fec_id', class_name: 'Fec::Candidate'

  before_validation do
    attribs = attributes.map do |k, v|
      if v.is_a?(String)
        new_v = v.strip.chars.select { |i| i.valid_encoding? }.join # drop invalid UTF-8 chars
        new_v = nil if new_v.blank? || %w(. NONE UNK 0).include?(new_v)
        [k, new_v] if v != new_v
      end
    end
    attribs -= [nil]
    # will only update the changed ones
    self.attributes = attribs.reduce({}) { |h, (k, v)| h[k] = v; h } # an ugly version of to_hash
  end

  def self.new_from_line line, year = nil
    rec = find_or_initialize_by_fec_id line[0..8]
    rec.name                        = line[9..98]
    rec.treasurer_name              = line[99..136]
    rec.street_1                    = line[137..170]
    rec.street_2                    = line[171..204]
    rec.city                        = line[205..222]
    rec.state                       = line[223..224]
    rec.zip                         = line[225..229]
    rec.designation                 = line[230]
    rec.type                        = line[231]
    rec.party                       = line[232..234]
    rec.filing_frequency            = line[235]
    rec.interest_group_category     = line[236]
    rec.connected_organization_name = line[237..274]
    rec.candidate_id                = line[275..283]

    rec.last_update_year ||= year
    rec.last_update_year            = [year, rec.last_update_year].max rescue nil
    rec # not saved - meant for batch import
  end

  def self.update_from_line! line, year = nil
    new_from_line(line, year).save!
  end

  def self.last_updated
    last(order: :updated_at).updated_at rescue nil
  end

  def self.update!
    # New years' "master" files may or may not include previous years' orgs. Bah.
    %w(80 82 84 86 88 90 92 94 96 98 00 02 04 06 08 10 12).each do |year|
      prev_mtime = File.mtime(File.join(FILES_DIR, "cm#{year}.zip")) rescue nil
      # wget -N preserves the ftp server's date
      `cd #{FILES_DIR} && wget -N ftp://ftp.fec.gov/FEC#{year.to_i >= 80 ?
        "/19#{year}" : ''}/cm#{year}.zip`
      mtime = File.mtime(File.join(FILES_DIR, "cm#{year}.zip"))
      next unless !prev_mtime || !last_updated || last_updated < mtime || prev_mtime < mtime

      filename = File.join(FILES_DIR, "fec_commitees_#{year}_#{mtime.to_date}.dta")
      `cd #{FILES_DIR} && unzip -u -j -o #{File.join(FILES_DIR, "cm#{year}.zip")}`
      dataname = case year.to_i
                   when 80..87 then "FOIACM.D#{year}"
                   when 88..97 then 'FOIACM.DTA'
                   else 'foiacm.dta'
                 end
      File.rename File.join(FILES_DIR, dataname), filename

      begin
        file = File.open(filename, 'r')
        # batch = []
        while line = file.gets
          self.update_from_line! line, (year.to_i >= 80 ? 1900 + year.to_i : 2000 + year.to_i)
          # batch << self.new_from_line(line)
          # if batch.size > 1000
          #   self.import batch, :on_duplicate_key_update => (self.column_names -
          #      %w(id fec_id created_at)).map(&:to_sym)
          #   batch = []
          # end
        end
        # self.import batch, :on_duplicate_key_update => (self.column_names -
        #    %w(id fec_id created_at)).map(&:to_sym)
        # batch = []

        true
      ensure
        file.close if defined?(file) && file
      end
    end
  end
end