Snippets.ipynb
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\cappe\\AppData\\Local\\Continuum\\anaconda3\\lib\\site-packages\\IPython\\core\\interactiveshell.py:3058: DtypeWarning: Columns (5) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" interactivity=interactivity, compiler=compiler, result=result)\n"
]
}
],
"source": [
"df = pd.read_csv(\"farmacie.csv\", sep=\";\", encoding=\"ISO-8859-1\")\n",
"df = df[[\"INDIRIZZO\", \"CAP\", \"DESCRIZIONECOMUNE\", \"FRAZIONE\", \"SIGLAPROVINCIA\", \"DESCRIZIONEPROVINCIA\", \"DESCRIZIONEREGIONE\"]]\n",
"df.columns = [\"address\", \"cap\", \"municipality\", \"hamlet\", \"province_code\", \"province\", \"region\"]\n",
"lower = [\n",
" \"municipality\", \"hamlet\", \"province\", \"region\"\n",
"]\n",
"for key in lower:\n",
" df[key]= df[key].apply(lambda x: \" \".join([\n",
" y.capitalize()\n",
" for y in x.split(\" \")\n",
" ]) if not pd.isna(x) else \"\")\n",
"\n",
"df = df[df.address.str.count(\",\")==1]\n",
"df = df[df.hamlet==\"-\"]\n",
"\n",
"\n",
"values = []\n",
"for val in df.address:\n",
" address, house_number = val.split(\",\")\n",
" values.append({\n",
" \"address\":\" \".join([\n",
" e.capitalize()\n",
" for e in address.strip().split(\" \")\n",
" ]),\n",
" \"house_number\":house_number.strip()\n",
" })\n",
"\n",
"df = df.drop(columns=[\"address\", \"hamlet\"]).reset_index(drop=True)\n",
"df = pd.concat([\n",
" pd.DataFrame(values),\n",
" df\n",
"], axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>address</th>\n",
" <th>house_number</th>\n",
" <th>cap</th>\n",
" <th>municipality</th>\n",
" <th>province_code</th>\n",
" <th>province</th>\n",
" <th>region</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>Via Buggianese</td>\n",
" <td>108</td>\n",
" <td>51019</td>\n",
" <td>Ponte Buggianese</td>\n",
" <td>PT</td>\n",
" <td>Pistoia</td>\n",
" <td>Toscana</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>Via San Francesco Da Paola</td>\n",
" <td>10</td>\n",
" <td>10123</td>\n",
" <td>Torino</td>\n",
" <td>TO</td>\n",
" <td>Torino</td>\n",
" <td>Piemonte</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>Piazza Vittorio Veneto</td>\n",
" <td>10</td>\n",
" <td>10123</td>\n",
" <td>Torino</td>\n",
" <td>TO</td>\n",
" <td>Torino</td>\n",
" <td>Piemonte</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>Piazza Vittorio Veneto</td>\n",
" <td>10</td>\n",
" <td>10123</td>\n",
" <td>Torino</td>\n",
" <td>TO</td>\n",
" <td>Torino</td>\n",
" <td>Piemonte</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>Piazza Vittorio Veneto</td>\n",
" <td>10</td>\n",
" <td>10123</td>\n",
" <td>Torino</td>\n",
" <td>TO</td>\n",
" <td>Torino</td>\n",
" <td>Piemonte</td>\n",
" </tr>\n",
" <tr>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>38855</td>\n",
" <td>Via Piersanti Mattarella</td>\n",
" <td>11/B</td>\n",
" <td>96012</td>\n",
" <td>Avola</td>\n",
" <td>SR</td>\n",
" <td>Siracusa</td>\n",
" <td>Sicilia</td>\n",
" </tr>\n",
" <tr>\n",
" <td>38856</td>\n",
" <td>Via Anna Frank</td>\n",
" <td>9/11</td>\n",
" <td>20121</td>\n",
" <td>Bollate</td>\n",
" <td>MI</td>\n",
" <td>Milano</td>\n",
" <td>Lombardia</td>\n",
" </tr>\n",
" <tr>\n",
" <td>38857</td>\n",
" <td>Contrada Bagni</td>\n",
" <td>38</td>\n",
" <td>84020</td>\n",
" <td>Colliano</td>\n",
" <td>SA</td>\n",
" <td>Salerno</td>\n",
" <td>Campania</td>\n",
" </tr>\n",
" <tr>\n",
" <td>38858</td>\n",
" <td>Via Dei Mille</td>\n",
" <td>3</td>\n",
" <td>20020</td>\n",
" <td>Cesate</td>\n",
" <td>MI</td>\n",
" <td>Milano</td>\n",
" <td>Lombardia</td>\n",
" </tr>\n",
" <tr>\n",
" <td>38859</td>\n",
" <td>Via Carlo Maria Maggi</td>\n",
" <td>38</td>\n",
" <td>20855</td>\n",
" <td>Lesmo</td>\n",
" <td>MB</td>\n",
" <td>Monza - Brianza</td>\n",
" <td>Lombardia</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>38860 rows × 7 columns</p>\n",
"</div>"
],
"text/plain": [
" address house_number cap municipality \\\n",
"0 Via Buggianese 108 51019 Ponte Buggianese \n",
"1 Via San Francesco Da Paola 10 10123 Torino \n",
"2 Piazza Vittorio Veneto 10 10123 Torino \n",
"3 Piazza Vittorio Veneto 10 10123 Torino \n",
"4 Piazza Vittorio Veneto 10 10123 Torino \n",
"... ... ... ... ... \n",
"38855 Via Piersanti Mattarella 11/B 96012 Avola \n",
"38856 Via Anna Frank 9/11 20121 Bollate \n",
"38857 Contrada Bagni 38 84020 Colliano \n",
"38858 Via Dei Mille 3 20020 Cesate \n",
"38859 Via Carlo Maria Maggi 38 20855 Lesmo \n",
"\n",
" province_code province region \n",
"0 PT Pistoia Toscana \n",
"1 TO Torino Piemonte \n",
"2 TO Torino Piemonte \n",
"3 TO Torino Piemonte \n",
"4 TO Torino Piemonte \n",
"... ... ... ... \n",
"38855 SR Siracusa Sicilia \n",
"38856 MI Milano Lombardia \n",
"38857 SA Salerno Campania \n",
"38858 MI Milano Lombardia \n",
"38859 MB Monza - Brianza Lombardia \n",
"\n",
"[38860 rows x 7 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"df.to_csv(\"addresses.csv\", index=False)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>address</th>\n",
" <th>house_number</th>\n",
" <th>cap</th>\n",
" <th>municipality</th>\n",
" <th>province_code</th>\n",
" <th>province</th>\n",
" <th>region</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>Via Buggianese</td>\n",
" <td>108</td>\n",
" <td>51019</td>\n",
" <td>Ponte Buggianese</td>\n",
" <td>PT</td>\n",
" <td>Pistoia</td>\n",
" <td>Toscana</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>Via San Francesco Da Paola</td>\n",
" <td>10</td>\n",
" <td>10123</td>\n",
" <td>Torino</td>\n",
" <td>TO</td>\n",
" <td>Torino</td>\n",
" <td>Piemonte</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>Piazza Vittorio Veneto</td>\n",
" <td>10</td>\n",
" <td>10123</td>\n",
" <td>Torino</td>\n",
" <td>TO</td>\n",
" <td>Torino</td>\n",
" <td>Piemonte</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>Piazza Vittorio Veneto</td>\n",
" <td>10</td>\n",
" <td>10123</td>\n",
" <td>Torino</td>\n",
" <td>TO</td>\n",
" <td>Torino</td>\n",
" <td>Piemonte</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>Piazza Vittorio Veneto</td>\n",
" <td>10</td>\n",
" <td>10123</td>\n",
" <td>Torino</td>\n",
" <td>TO</td>\n",
" <td>Torino</td>\n",
" <td>Piemonte</td>\n",
" </tr>\n",
" <tr>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>38855</td>\n",
" <td>Via Piersanti Mattarella</td>\n",
" <td>11/B</td>\n",
" <td>96012</td>\n",
" <td>Avola</td>\n",
" <td>SR</td>\n",
" <td>Siracusa</td>\n",
" <td>Sicilia</td>\n",
" </tr>\n",
" <tr>\n",
" <td>38856</td>\n",
" <td>Via Anna Frank</td>\n",
" <td>9/11</td>\n",
" <td>20121</td>\n",
" <td>Bollate</td>\n",
" <td>MI</td>\n",
" <td>Milano</td>\n",
" <td>Lombardia</td>\n",
" </tr>\n",
" <tr>\n",
" <td>38857</td>\n",
" <td>Contrada Bagni</td>\n",
" <td>38</td>\n",
" <td>84020</td>\n",
" <td>Colliano</td>\n",
" <td>SA</td>\n",
" <td>Salerno</td>\n",
" <td>Campania</td>\n",
" </tr>\n",
" <tr>\n",
" <td>38858</td>\n",
" <td>Via Dei Mille</td>\n",
" <td>3</td>\n",
" <td>20020</td>\n",
" <td>Cesate</td>\n",
" <td>MI</td>\n",
" <td>Milano</td>\n",
" <td>Lombardia</td>\n",
" </tr>\n",
" <tr>\n",
" <td>38859</td>\n",
" <td>Via Carlo Maria Maggi</td>\n",
" <td>38</td>\n",
" <td>20855</td>\n",
" <td>Lesmo</td>\n",
" <td>MB</td>\n",
" <td>Monza - Brianza</td>\n",
" <td>Lombardia</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>38860 rows × 7 columns</p>\n",
"</div>"
],
"text/plain": [
" address house_number cap municipality \\\n",
"0 Via Buggianese 108 51019 Ponte Buggianese \n",
"1 Via San Francesco Da Paola 10 10123 Torino \n",
"2 Piazza Vittorio Veneto 10 10123 Torino \n",
"3 Piazza Vittorio Veneto 10 10123 Torino \n",
"4 Piazza Vittorio Veneto 10 10123 Torino \n",
"... ... ... ... ... \n",
"38855 Via Piersanti Mattarella 11/B 96012 Avola \n",
"38856 Via Anna Frank 9/11 20121 Bollate \n",
"38857 Contrada Bagni 38 84020 Colliano \n",
"38858 Via Dei Mille 3 20020 Cesate \n",
"38859 Via Carlo Maria Maggi 38 20855 Lesmo \n",
"\n",
" province_code province region \n",
"0 PT Pistoia Toscana \n",
"1 TO Torino Piemonte \n",
"2 TO Torino Piemonte \n",
"3 TO Torino Piemonte \n",
"4 TO Torino Piemonte \n",
"... ... ... ... \n",
"38855 SR Siracusa Sicilia \n",
"38856 MI Milano Lombardia \n",
"38857 SA Salerno Campania \n",
"38858 MI Milano Lombardia \n",
"38859 MB Monza - Brianza Lombardia \n",
"\n",
"[38860 rows x 7 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"from random_italian_person import RandomItalianPerson"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Andrea Aiello è nata/o a Soveria Mannelli (CZ) il 1928-09-09. Ora vive a Cumiana (TO) in Strada Torino 24.'"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"RandomItalianPerson().describe()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"df = pd.DataFrame([\n",
" RandomItalianPerson().data\n",
" for _ in range(5)\n",
" ])"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"from tabulate import tabulate"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+-----------------------+------------+-----------+-----------+-------+----------------------+------------------+----------------+-------------+-----------------------+-------------+-------------------+----------------+-------+----------------+-----------------+------------------+\n",
"| region | province | surname | name | sex | birth_municipality | birth_province | birth_region | birth_cap | birth_province_code | birthdate | address | house_number | cap | municipality | province_code | codice_fiscale |\n",
"+=======================+============+===========+===========+=======+======================+==================+================+=============+=======================+=============+===================+================+=======+================+=================+==================+\n",
"| Marche | Macerata | Di Felice | Giacomina | F | Mosciano Sant'angelo | Teramo | Abruzzo | 64023 | TE | 1945-09-18 | Viale De Amicis | 76 | 62020 | Colmurano | MC | DFLGMN45P58F764B |\n",
"+-----------------------+------------+-----------+-----------+-------+----------------------+------------------+----------------+-------------+-----------------------+-------------+-------------------+----------------+-------+----------------+-----------------+------------------+\n",
"| Friuli Venezia Giulia | Udine | Galli | Imen | F | Isola Dovarese | Cremona | Lombardia | 26031 | CR | 1942-03-10 | Via Udine | 2 | 33020 | Verzegnis | UD | GLLMNI42C50E356T |\n",
"+-----------------------+------------+-----------+-----------+-------+----------------------+------------------+----------------+-------------+-----------------------+-------------+-------------------+----------------+-------+----------------+-----------------+------------------+\n",
"| Abruzzo | Pescara | Rosso | Eva | F | Cellarengo | Asti | Piemonte | 14010 | AT | 2001-12-31 | Via G. Fonzi | 58 | 65010 | Spoltore | PE | RSSVEA01T71C438U |\n",
"+-----------------------+------------+-----------+-----------+-------+----------------------+------------------+----------------+-------------+-----------------------+-------------+-------------------+----------------+-------+----------------+-----------------+------------------+\n",
"| Emilia Romagna | Bologna | Grasso | Emanuele | M | Caposele | Avellino | Campania | 83040 | AV | 1942-08-27 | Via G. Massarenti | 223/5 | 40138 | Bologna | BO | GRSMNL42M27B674L |\n",
"+-----------------------+------------+-----------+-----------+-------+----------------------+------------------+----------------+-------------+-----------------------+-------------+-------------------+----------------+-------+----------------+-----------------+------------------+\n",
"| Sicilia | Palermo | Pastorino | Lenuta | F | Borzonasca | Genova | Liguria | 16041 | GE | 1972-09-05 | Via Montalbo | 124 | 90142 | Palermo | PA | PSTLNT72P45B067T |\n",
"+-----------------------+------------+-----------+-----------+-------+----------------------+------------------+----------------+-------------+-----------------------+-------------+-------------------+----------------+-------+----------------+-----------------+------------------+\n"
]
}
],
"source": [
"print(tabulate(df.values, df.columns, tablefmt=\"grid\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}