LucaCappelletti94/random_italian_person

View on GitHub
Snippets.ipynb

Summary

Maintainability
Test Coverage
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\cappe\\AppData\\Local\\Continuum\\anaconda3\\lib\\site-packages\\IPython\\core\\interactiveshell.py:3058: DtypeWarning: Columns (5) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  interactivity=interactivity, compiler=compiler, result=result)\n"
     ]
    }
   ],
   "source": [
    "df = pd.read_csv(\"farmacie.csv\", sep=\";\", encoding=\"ISO-8859-1\")\n",
    "df = df[[\"INDIRIZZO\", \"CAP\", \"DESCRIZIONECOMUNE\", \"FRAZIONE\", \"SIGLAPROVINCIA\", \"DESCRIZIONEPROVINCIA\", \"DESCRIZIONEREGIONE\"]]\n",
    "df.columns = [\"address\", \"cap\", \"municipality\", \"hamlet\", \"province_code\", \"province\", \"region\"]\n",
    "lower = [\n",
    "   \"municipality\", \"hamlet\", \"province\", \"region\"\n",
    "]\n",
    "for key in lower:\n",
    "    df[key]= df[key].apply(lambda x: \" \".join([\n",
    "        y.capitalize()\n",
    "        for y in x.split(\" \")\n",
    "    ]) if not pd.isna(x) else \"\")\n",
    "\n",
    "df = df[df.address.str.count(\",\")==1]\n",
    "df = df[df.hamlet==\"-\"]\n",
    "\n",
    "\n",
    "values = []\n",
    "for val in df.address:\n",
    "    address, house_number = val.split(\",\")\n",
    "    values.append({\n",
    "        \"address\":\" \".join([\n",
    "            e.capitalize()\n",
    "            for e in address.strip().split(\" \")\n",
    "        ]),\n",
    "        \"house_number\":house_number.strip()\n",
    "    })\n",
    "\n",
    "df = df.drop(columns=[\"address\", \"hamlet\"]).reset_index(drop=True)\n",
    "df = pd.concat([\n",
    "    pd.DataFrame(values),\n",
    "    df\n",
    "], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>address</th>\n",
       "      <th>house_number</th>\n",
       "      <th>cap</th>\n",
       "      <th>municipality</th>\n",
       "      <th>province_code</th>\n",
       "      <th>province</th>\n",
       "      <th>region</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>Via Buggianese</td>\n",
       "      <td>108</td>\n",
       "      <td>51019</td>\n",
       "      <td>Ponte Buggianese</td>\n",
       "      <td>PT</td>\n",
       "      <td>Pistoia</td>\n",
       "      <td>Toscana</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>Via San Francesco Da Paola</td>\n",
       "      <td>10</td>\n",
       "      <td>10123</td>\n",
       "      <td>Torino</td>\n",
       "      <td>TO</td>\n",
       "      <td>Torino</td>\n",
       "      <td>Piemonte</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>Piazza Vittorio Veneto</td>\n",
       "      <td>10</td>\n",
       "      <td>10123</td>\n",
       "      <td>Torino</td>\n",
       "      <td>TO</td>\n",
       "      <td>Torino</td>\n",
       "      <td>Piemonte</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>Piazza Vittorio Veneto</td>\n",
       "      <td>10</td>\n",
       "      <td>10123</td>\n",
       "      <td>Torino</td>\n",
       "      <td>TO</td>\n",
       "      <td>Torino</td>\n",
       "      <td>Piemonte</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>Piazza Vittorio Veneto</td>\n",
       "      <td>10</td>\n",
       "      <td>10123</td>\n",
       "      <td>Torino</td>\n",
       "      <td>TO</td>\n",
       "      <td>Torino</td>\n",
       "      <td>Piemonte</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>38855</td>\n",
       "      <td>Via Piersanti Mattarella</td>\n",
       "      <td>11/B</td>\n",
       "      <td>96012</td>\n",
       "      <td>Avola</td>\n",
       "      <td>SR</td>\n",
       "      <td>Siracusa</td>\n",
       "      <td>Sicilia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>38856</td>\n",
       "      <td>Via Anna Frank</td>\n",
       "      <td>9/11</td>\n",
       "      <td>20121</td>\n",
       "      <td>Bollate</td>\n",
       "      <td>MI</td>\n",
       "      <td>Milano</td>\n",
       "      <td>Lombardia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>38857</td>\n",
       "      <td>Contrada Bagni</td>\n",
       "      <td>38</td>\n",
       "      <td>84020</td>\n",
       "      <td>Colliano</td>\n",
       "      <td>SA</td>\n",
       "      <td>Salerno</td>\n",
       "      <td>Campania</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>38858</td>\n",
       "      <td>Via Dei  Mille</td>\n",
       "      <td>3</td>\n",
       "      <td>20020</td>\n",
       "      <td>Cesate</td>\n",
       "      <td>MI</td>\n",
       "      <td>Milano</td>\n",
       "      <td>Lombardia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>38859</td>\n",
       "      <td>Via Carlo Maria Maggi</td>\n",
       "      <td>38</td>\n",
       "      <td>20855</td>\n",
       "      <td>Lesmo</td>\n",
       "      <td>MB</td>\n",
       "      <td>Monza - Brianza</td>\n",
       "      <td>Lombardia</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>38860 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                          address house_number    cap      municipality  \\\n",
       "0                  Via Buggianese          108  51019  Ponte Buggianese   \n",
       "1      Via San Francesco Da Paola           10  10123            Torino   \n",
       "2          Piazza Vittorio Veneto           10  10123            Torino   \n",
       "3          Piazza Vittorio Veneto           10  10123            Torino   \n",
       "4          Piazza Vittorio Veneto           10  10123            Torino   \n",
       "...                           ...          ...    ...               ...   \n",
       "38855    Via Piersanti Mattarella         11/B  96012             Avola   \n",
       "38856              Via Anna Frank         9/11  20121           Bollate   \n",
       "38857              Contrada Bagni           38  84020          Colliano   \n",
       "38858              Via Dei  Mille            3  20020            Cesate   \n",
       "38859       Via Carlo Maria Maggi           38  20855             Lesmo   \n",
       "\n",
       "      province_code         province     region  \n",
       "0                PT          Pistoia    Toscana  \n",
       "1                TO           Torino   Piemonte  \n",
       "2                TO           Torino   Piemonte  \n",
       "3                TO           Torino   Piemonte  \n",
       "4                TO           Torino   Piemonte  \n",
       "...             ...              ...        ...  \n",
       "38855            SR         Siracusa    Sicilia  \n",
       "38856            MI           Milano  Lombardia  \n",
       "38857            SA          Salerno   Campania  \n",
       "38858            MI           Milano  Lombardia  \n",
       "38859            MB  Monza - Brianza  Lombardia  \n",
       "\n",
       "[38860 rows x 7 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.to_csv(\"addresses.csv\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>address</th>\n",
       "      <th>house_number</th>\n",
       "      <th>cap</th>\n",
       "      <th>municipality</th>\n",
       "      <th>province_code</th>\n",
       "      <th>province</th>\n",
       "      <th>region</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>Via Buggianese</td>\n",
       "      <td>108</td>\n",
       "      <td>51019</td>\n",
       "      <td>Ponte Buggianese</td>\n",
       "      <td>PT</td>\n",
       "      <td>Pistoia</td>\n",
       "      <td>Toscana</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>Via San Francesco Da Paola</td>\n",
       "      <td>10</td>\n",
       "      <td>10123</td>\n",
       "      <td>Torino</td>\n",
       "      <td>TO</td>\n",
       "      <td>Torino</td>\n",
       "      <td>Piemonte</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>Piazza Vittorio Veneto</td>\n",
       "      <td>10</td>\n",
       "      <td>10123</td>\n",
       "      <td>Torino</td>\n",
       "      <td>TO</td>\n",
       "      <td>Torino</td>\n",
       "      <td>Piemonte</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>Piazza Vittorio Veneto</td>\n",
       "      <td>10</td>\n",
       "      <td>10123</td>\n",
       "      <td>Torino</td>\n",
       "      <td>TO</td>\n",
       "      <td>Torino</td>\n",
       "      <td>Piemonte</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>Piazza Vittorio Veneto</td>\n",
       "      <td>10</td>\n",
       "      <td>10123</td>\n",
       "      <td>Torino</td>\n",
       "      <td>TO</td>\n",
       "      <td>Torino</td>\n",
       "      <td>Piemonte</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>38855</td>\n",
       "      <td>Via Piersanti Mattarella</td>\n",
       "      <td>11/B</td>\n",
       "      <td>96012</td>\n",
       "      <td>Avola</td>\n",
       "      <td>SR</td>\n",
       "      <td>Siracusa</td>\n",
       "      <td>Sicilia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>38856</td>\n",
       "      <td>Via Anna Frank</td>\n",
       "      <td>9/11</td>\n",
       "      <td>20121</td>\n",
       "      <td>Bollate</td>\n",
       "      <td>MI</td>\n",
       "      <td>Milano</td>\n",
       "      <td>Lombardia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>38857</td>\n",
       "      <td>Contrada Bagni</td>\n",
       "      <td>38</td>\n",
       "      <td>84020</td>\n",
       "      <td>Colliano</td>\n",
       "      <td>SA</td>\n",
       "      <td>Salerno</td>\n",
       "      <td>Campania</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>38858</td>\n",
       "      <td>Via Dei  Mille</td>\n",
       "      <td>3</td>\n",
       "      <td>20020</td>\n",
       "      <td>Cesate</td>\n",
       "      <td>MI</td>\n",
       "      <td>Milano</td>\n",
       "      <td>Lombardia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>38859</td>\n",
       "      <td>Via Carlo Maria Maggi</td>\n",
       "      <td>38</td>\n",
       "      <td>20855</td>\n",
       "      <td>Lesmo</td>\n",
       "      <td>MB</td>\n",
       "      <td>Monza - Brianza</td>\n",
       "      <td>Lombardia</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>38860 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                          address house_number    cap      municipality  \\\n",
       "0                  Via Buggianese          108  51019  Ponte Buggianese   \n",
       "1      Via San Francesco Da Paola           10  10123            Torino   \n",
       "2          Piazza Vittorio Veneto           10  10123            Torino   \n",
       "3          Piazza Vittorio Veneto           10  10123            Torino   \n",
       "4          Piazza Vittorio Veneto           10  10123            Torino   \n",
       "...                           ...          ...    ...               ...   \n",
       "38855    Via Piersanti Mattarella         11/B  96012             Avola   \n",
       "38856              Via Anna Frank         9/11  20121           Bollate   \n",
       "38857              Contrada Bagni           38  84020          Colliano   \n",
       "38858              Via Dei  Mille            3  20020            Cesate   \n",
       "38859       Via Carlo Maria Maggi           38  20855             Lesmo   \n",
       "\n",
       "      province_code         province     region  \n",
       "0                PT          Pistoia    Toscana  \n",
       "1                TO           Torino   Piemonte  \n",
       "2                TO           Torino   Piemonte  \n",
       "3                TO           Torino   Piemonte  \n",
       "4                TO           Torino   Piemonte  \n",
       "...             ...              ...        ...  \n",
       "38855            SR         Siracusa    Sicilia  \n",
       "38856            MI           Milano  Lombardia  \n",
       "38857            SA          Salerno   Campania  \n",
       "38858            MI           Milano  Lombardia  \n",
       "38859            MB  Monza - Brianza  Lombardia  \n",
       "\n",
       "[38860 rows x 7 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "from random_italian_person import RandomItalianPerson"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Andrea Aiello è nata/o a Soveria Mannelli (CZ) il 1928-09-09. Ora vive a Cumiana (TO) in Strada Torino 24.'"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "RandomItalianPerson().describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.DataFrame([\n",
    "        RandomItalianPerson().data\n",
    "        for _ in range(5)\n",
    "    ])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "from tabulate import tabulate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-----------------------+------------+-----------+-----------+-------+----------------------+------------------+----------------+-------------+-----------------------+-------------+-------------------+----------------+-------+----------------+-----------------+------------------+\n",
      "| region                | province   | surname   | name      | sex   | birth_municipality   | birth_province   | birth_region   |   birth_cap | birth_province_code   | birthdate   | address           | house_number   |   cap | municipality   | province_code   | codice_fiscale   |\n",
      "+=======================+============+===========+===========+=======+======================+==================+================+=============+=======================+=============+===================+================+=======+================+=================+==================+\n",
      "| Marche                | Macerata   | Di Felice | Giacomina | F     | Mosciano Sant'angelo | Teramo           | Abruzzo        |       64023 | TE                    | 1945-09-18  | Viale De Amicis   | 76             | 62020 | Colmurano      | MC              | DFLGMN45P58F764B |\n",
      "+-----------------------+------------+-----------+-----------+-------+----------------------+------------------+----------------+-------------+-----------------------+-------------+-------------------+----------------+-------+----------------+-----------------+------------------+\n",
      "| Friuli Venezia Giulia | Udine      | Galli     | Imen      | F     | Isola Dovarese       | Cremona          | Lombardia      |       26031 | CR                    | 1942-03-10  | Via Udine         | 2              | 33020 | Verzegnis      | UD              | GLLMNI42C50E356T |\n",
      "+-----------------------+------------+-----------+-----------+-------+----------------------+------------------+----------------+-------------+-----------------------+-------------+-------------------+----------------+-------+----------------+-----------------+------------------+\n",
      "| Abruzzo               | Pescara    | Rosso     | Eva       | F     | Cellarengo           | Asti             | Piemonte       |       14010 | AT                    | 2001-12-31  | Via G. Fonzi      | 58             | 65010 | Spoltore       | PE              | RSSVEA01T71C438U |\n",
      "+-----------------------+------------+-----------+-----------+-------+----------------------+------------------+----------------+-------------+-----------------------+-------------+-------------------+----------------+-------+----------------+-----------------+------------------+\n",
      "| Emilia Romagna        | Bologna    | Grasso    | Emanuele  | M     | Caposele             | Avellino         | Campania       |       83040 | AV                    | 1942-08-27  | Via G. Massarenti | 223/5          | 40138 | Bologna        | BO              | GRSMNL42M27B674L |\n",
      "+-----------------------+------------+-----------+-----------+-------+----------------------+------------------+----------------+-------------+-----------------------+-------------+-------------------+----------------+-------+----------------+-----------------+------------------+\n",
      "| Sicilia               | Palermo    | Pastorino | Lenuta    | F     | Borzonasca           | Genova           | Liguria        |       16041 | GE                    | 1972-09-05  | Via Montalbo      | 124            | 90142 | Palermo        | PA              | PSTLNT72P45B067T |\n",
      "+-----------------------+------------+-----------+-----------+-------+----------------------+------------------+----------------+-------------+-----------------------+-------------+-------------------+----------------+-------+----------------+-----------------+------------------+\n"
     ]
    }
   ],
   "source": [
    "print(tabulate(df.values, df.columns, tablefmt=\"grid\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}