jupyter/limpa-candidatos-2018.ipynb
{
"cells": [
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"import glob\n",
"import json\n",
"import os\n",
"\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [],
"source": [
"path = '../data/consulta_cand_2018'"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [],
"source": [
"files = glob.glob(os.path.join(path,'*.csv'))"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
"labels_final = [\n",
" 'ano_eleicao',\n",
" 'num_turno',\n",
" 'descricao_eleicao',\n",
" 'sigla_uf',\n",
" 'sigla_ue',\n",
" 'descricao_ue',\n",
" 'codigo_cargo',\n",
" 'descricao_cargo',\n",
" 'nome_candidato',\n",
" 'sequencial_candidato',\n",
" 'numero_candidato',\n",
" 'cpf_candidato',\n",
" 'nome_urna_candidato',\n",
" 'cod_situacao_candidatura',\n",
" 'des_situacao_candidatura',\n",
" 'numero_partido',\n",
" 'sigla_partido',\n",
" 'nome_partido', \n",
" 'codigo_legenda',\n",
" 'sigla_legenda',\n",
" 'composicao_legenda',\n",
" 'nome_legenda',\n",
" 'codigo_ocupacao',\n",
" 'descricao_ocupacao',\n",
" 'data_nascimento',\n",
" 'num_titulo_eleitoral_candidato',\n",
" 'idade_data_eleicao',\n",
" 'codigo_sexo',\n",
" 'descricao_sexo',\n",
" 'cod_grau_instrucao',\n",
" 'descricao_grau_instrucao',\n",
" 'codigo_estado_civil',\n",
" 'descricao_estado_civil',\n",
" 'codigo_cor_raca',\n",
" 'descricao_cor_raca',\n",
" 'codigo_nacionalidade',\n",
" 'descricao_nacionalidade',\n",
" 'sigla_uf_nascimento',\n",
" 'codigo_municipio_nascimento',\n",
" 'nome_municipio_nascimento',\n",
" 'despesa_max_campanha',\n",
" 'cod_sit_tot_turno',\n",
" 'desc_sit_tot_turno',\n",
" 'nm_email',\n",
" 'datahora_geracao',\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>DT_GERACAO</th>\n",
" <th>HH_GERACAO</th>\n",
" <th>ANO_ELEICAO</th>\n",
" <th>CD_TIPO_ELEICAO</th>\n",
" <th>NM_TIPO_ELEICAO</th>\n",
" <th>NR_TURNO</th>\n",
" <th>CD_ELEICAO</th>\n",
" <th>DS_ELEICAO</th>\n",
" <th>DT_ELEICAO</th>\n",
" <th>TP_ABRANGENCIA</th>\n",
" <th>...</th>\n",
" <th>DS_COR_RACA</th>\n",
" <th>CD_OCUPACAO</th>\n",
" <th>DS_OCUPACAO</th>\n",
" <th>NR_DESPESA_MAX_CAMPANHA</th>\n",
" <th>CD_SIT_TOT_TURNO</th>\n",
" <th>DS_SIT_TOT_TURNO</th>\n",
" <th>ST_REELEICAO</th>\n",
" <th>ST_DECLARAR_BENS</th>\n",
" <th>NR_PROTOCOLO_CANDIDATURA</th>\n",
" <th>NR_PROCESSO</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>20/08/2018</td>\n",
" <td>15:14:58</td>\n",
" <td>2018</td>\n",
" <td>2</td>\n",
" <td>ELEIÇÃO ORDINÁRIA</td>\n",
" <td>1</td>\n",
" <td>297</td>\n",
" <td>Eleições Gerais Estaduais 2018</td>\n",
" <td>07/10/2018</td>\n",
" <td>ESTADUAL</td>\n",
" <td>...</td>\n",
" <td>PARDA</td>\n",
" <td>125</td>\n",
" <td>ADMINISTRADOR</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>#NULO#</td>\n",
" <td>N</td>\n",
" <td>S</td>\n",
" <td>-1</td>\n",
" <td>6009339520186140000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>20/08/2018</td>\n",
" <td>15:14:58</td>\n",
" <td>2018</td>\n",
" <td>2</td>\n",
" <td>ELEIÇÃO ORDINÁRIA</td>\n",
" <td>1</td>\n",
" <td>297</td>\n",
" <td>Eleições Gerais Estaduais 2018</td>\n",
" <td>07/10/2018</td>\n",
" <td>ESTADUAL</td>\n",
" <td>...</td>\n",
" <td>BRANCA</td>\n",
" <td>131</td>\n",
" <td>ADVOGADO</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>#NULO#</td>\n",
" <td>N</td>\n",
" <td>S</td>\n",
" <td>-1</td>\n",
" <td>6004896220186140000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>20/08/2018</td>\n",
" <td>15:14:58</td>\n",
" <td>2018</td>\n",
" <td>2</td>\n",
" <td>ELEIÇÃO ORDINÁRIA</td>\n",
" <td>1</td>\n",
" <td>297</td>\n",
" <td>Eleições Gerais Estaduais 2018</td>\n",
" <td>07/10/2018</td>\n",
" <td>ESTADUAL</td>\n",
" <td>...</td>\n",
" <td>PARDA</td>\n",
" <td>125</td>\n",
" <td>ADMINISTRADOR</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>#NULO#</td>\n",
" <td>N</td>\n",
" <td>N</td>\n",
" <td>-1</td>\n",
" <td>6003735620186140000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>20/08/2018</td>\n",
" <td>15:14:58</td>\n",
" <td>2018</td>\n",
" <td>2</td>\n",
" <td>ELEIÇÃO ORDINÁRIA</td>\n",
" <td>1</td>\n",
" <td>297</td>\n",
" <td>Eleições Gerais Estaduais 2018</td>\n",
" <td>07/10/2018</td>\n",
" <td>ESTADUAL</td>\n",
" <td>...</td>\n",
" <td>PARDA</td>\n",
" <td>278</td>\n",
" <td>VEREADOR</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>#NULO#</td>\n",
" <td>N</td>\n",
" <td>S</td>\n",
" <td>-1</td>\n",
" <td>6008698520186140000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>20/08/2018</td>\n",
" <td>15:14:58</td>\n",
" <td>2018</td>\n",
" <td>2</td>\n",
" <td>ELEIÇÃO ORDINÁRIA</td>\n",
" <td>1</td>\n",
" <td>297</td>\n",
" <td>Eleições Gerais Estaduais 2018</td>\n",
" <td>07/10/2018</td>\n",
" <td>ESTADUAL</td>\n",
" <td>...</td>\n",
" <td>PRETA</td>\n",
" <td>931</td>\n",
" <td>ESTUDANTE, BOLSISTA, ESTAGIÁRIO E ASSEMELHADOS</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>#NULO#</td>\n",
" <td>N</td>\n",
" <td>N</td>\n",
" <td>-1</td>\n",
" <td>6010716220186140000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 58 columns</p>\n",
"</div>"
],
"text/plain": [
" DT_GERACAO HH_GERACAO ANO_ELEICAO CD_TIPO_ELEICAO NM_TIPO_ELEICAO \\\n",
"0 20/08/2018 15:14:58 2018 2 ELEIÇÃO ORDINÁRIA \n",
"1 20/08/2018 15:14:58 2018 2 ELEIÇÃO ORDINÁRIA \n",
"2 20/08/2018 15:14:58 2018 2 ELEIÇÃO ORDINÁRIA \n",
"3 20/08/2018 15:14:58 2018 2 ELEIÇÃO ORDINÁRIA \n",
"4 20/08/2018 15:14:58 2018 2 ELEIÇÃO ORDINÁRIA \n",
"\n",
" NR_TURNO CD_ELEICAO DS_ELEICAO DT_ELEICAO \\\n",
"0 1 297 Eleições Gerais Estaduais 2018 07/10/2018 \n",
"1 1 297 Eleições Gerais Estaduais 2018 07/10/2018 \n",
"2 1 297 Eleições Gerais Estaduais 2018 07/10/2018 \n",
"3 1 297 Eleições Gerais Estaduais 2018 07/10/2018 \n",
"4 1 297 Eleições Gerais Estaduais 2018 07/10/2018 \n",
"\n",
" TP_ABRANGENCIA ... DS_COR_RACA CD_OCUPACAO \\\n",
"0 ESTADUAL ... PARDA 125 \n",
"1 ESTADUAL ... BRANCA 131 \n",
"2 ESTADUAL ... PARDA 125 \n",
"3 ESTADUAL ... PARDA 278 \n",
"4 ESTADUAL ... PRETA 931 \n",
"\n",
" DS_OCUPACAO NR_DESPESA_MAX_CAMPANHA \\\n",
"0 ADMINISTRADOR -1 \n",
"1 ADVOGADO -1 \n",
"2 ADMINISTRADOR -1 \n",
"3 VEREADOR -1 \n",
"4 ESTUDANTE, BOLSISTA, ESTAGIÁRIO E ASSEMELHADOS -1 \n",
"\n",
" CD_SIT_TOT_TURNO DS_SIT_TOT_TURNO ST_REELEICAO ST_DECLARAR_BENS \\\n",
"0 -1 #NULO# N S \n",
"1 -1 #NULO# N S \n",
"2 -1 #NULO# N N \n",
"3 -1 #NULO# N S \n",
"4 -1 #NULO# N N \n",
"\n",
" NR_PROTOCOLO_CANDIDATURA NR_PROCESSO \n",
"0 -1 6009339520186140000 \n",
"1 -1 6004896220186140000 \n",
"2 -1 6003735620186140000 \n",
"3 -1 6008698520186140000 \n",
"4 -1 6010716220186140000 \n",
"\n",
"[5 rows x 58 columns]"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
"dfs_candidatos = [] \n",
"for file in files:\n",
" df = pd.read_csv(file, sep=';', encoding='latin-1')\n",
" df.rename(columns={\n",
" 'ANO_ELEICAO': 'ano_eleicao',\n",
" 'NR_TURNO': 'num_turno',\n",
" 'DS_ELEICAO': 'descricao_eleicao',\n",
" 'SG_UF': 'sigla_uf',\n",
" 'SG_UE': 'sigla_ue',\n",
" 'NM_UE': 'descricao_ue',\n",
" 'CD_CARGO': 'codigo_cargo',\n",
" 'DS_CARGO': 'descricao_cargo',\n",
" 'NM_CANDIDATO': 'nome_candidato',\n",
" 'SQ_CANDIDATO': 'sequencial_candidato',\n",
" 'NR_CANDIDATO': 'numero_candidato',\n",
" 'NR_CPF_CANDIDATO': 'cpf_candidato',\n",
" 'NM_URNA_CANDIDATO': 'nome_urna_candidato',\n",
" 'CD_SITUACAO_CANDIDATURA': 'cod_situacao_candidatura',\n",
" 'DS_SITUACAO_CANDIDATURA': 'des_situacao_candidatura',\n",
" 'NR_PARTIDO': 'numero_partido',\n",
" 'SG_PARTIDO': 'sigla_partido',\n",
" 'NM_PARTIDO': 'nome_partido', \n",
" 'TP_AGREMIACAO': 'codigo_legenda',\n",
" 'SQ_COLIGACAO': 'sigla_legenda',\n",
" 'DS_COMPOSICAO_COLIGACAO': 'composicao_legenda',\n",
" 'NM_COLIGACAO': 'nome_legenda',\n",
" 'CD_OCUPACAO': 'codigo_ocupacao',\n",
" 'DS_OCUPACAO': 'descricao_ocupacao',\n",
" 'DT_NASCIMENTO': 'data_nascimento',\n",
" 'NR_TITULO_ELEITORAL_CANDIDATO': 'num_titulo_eleitoral_candidato',\n",
" 'NR_IDADE_DATA_POSSE': 'idade_data_eleicao',\n",
" 'CD_GENERO': 'codigo_sexo',\n",
" 'DS_GENERO': 'descricao_sexo',\n",
" 'CD_GRAU_INSTRUCAO': 'cod_grau_instrucao',\n",
" 'DS_GRAU_INSTRUCAO': 'descricao_grau_instrucao',\n",
" 'CD_ESTADO_CIVIL': 'codigo_estado_civil',\n",
" 'DS_ESTADO_CIVIL': 'descricao_estado_civil',\n",
" 'CD_COR_RACA': 'codigo_cor_raca',\n",
" 'DS_COR_RACA': 'descricao_cor_raca',\n",
" 'CD_NACIONALIDADE': 'codigo_nacionalidade',\n",
" 'DS_NACIONALIDADE': 'descricao_nacionalidade',\n",
" 'SG_UF_NASCIMENTO': 'sigla_uf_nascimento',\n",
" 'CD_MUNICIPIO_NASCIMENTO': 'codigo_municipio_nascimento',\n",
" 'NM_MUNICIPIO_NASCIMENTO': 'nome_municipio_nascimento',\n",
" 'NR_DESPESA_MAX_CAMPANHA': 'despesa_max_campanha',\n",
" 'CD_SIT_TOT_TURNO': 'cod_sit_tot_turno',\n",
" 'DS_SIT_TOT_TURNO': 'desc_sit_tot_turno',\n",
" 'NM_EMAIL': 'nm_email',\n",
" }, inplace=True)\n",
" df['datahora_geracao'] = df.DT_GERACAO + ' ' + df.HH_GERACAO\n",
" df = df[labels_final]\n",
" df.desc_sit_tot_turno = df.desc_sit_tot_turno.str.replace('#NULO#', '')\n",
"\n",
" dfs_candidatos.append(df)"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
"df_candidatos_2018 = pd.concat(dfs_candidatos)"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(27887, 45)\n"
]
}
],
"source": [
"print(df_candidatos_2018.shape)\n",
"df_candidatos_2018.to_csv('../data/candidatos-2018-limpo.csv', index=False)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}