prateekiiest/titanic_survival_exploration

View on GitHub
Features/extracting_features.ipynb

Summary

Maintainability
Test Coverage
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>PassengerId</th>\n",
       "      <th>Survived</th>\n",
       "      <th>Pclass</th>\n",
       "      <th>Name</th>\n",
       "      <th>Sex</th>\n",
       "      <th>Age</th>\n",
       "      <th>SibSp</th>\n",
       "      <th>Parch</th>\n",
       "      <th>Ticket</th>\n",
       "      <th>Fare</th>\n",
       "      <th>Cabin</th>\n",
       "      <th>Embarked</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Braund, Mr. Owen Harris</td>\n",
       "      <td>male</td>\n",
       "      <td>22.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>A/5 21171</td>\n",
       "      <td>7.2500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
       "      <td>female</td>\n",
       "      <td>38.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>PC 17599</td>\n",
       "      <td>71.2833</td>\n",
       "      <td>C85</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>Heikkinen, Miss. Laina</td>\n",
       "      <td>female</td>\n",
       "      <td>26.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>STON/O2. 3101282</td>\n",
       "      <td>7.9250</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
       "      <td>female</td>\n",
       "      <td>35.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>113803</td>\n",
       "      <td>53.1000</td>\n",
       "      <td>C123</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Allen, Mr. William Henry</td>\n",
       "      <td>male</td>\n",
       "      <td>35.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>373450</td>\n",
       "      <td>8.0500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Moran, Mr. James</td>\n",
       "      <td>male</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>330877</td>\n",
       "      <td>8.4583</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Q</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>McCarthy, Mr. Timothy J</td>\n",
       "      <td>male</td>\n",
       "      <td>54.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>17463</td>\n",
       "      <td>51.8625</td>\n",
       "      <td>E46</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Palsson, Master. Gosta Leonard</td>\n",
       "      <td>male</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>349909</td>\n",
       "      <td>21.0750</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>9</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)</td>\n",
       "      <td>female</td>\n",
       "      <td>27.0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>347742</td>\n",
       "      <td>11.1333</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>10</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>Nasser, Mrs. Nicholas (Adele Achem)</td>\n",
       "      <td>female</td>\n",
       "      <td>14.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>237736</td>\n",
       "      <td>30.0708</td>\n",
       "      <td>NaN</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>11</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>Sandstrom, Miss. Marguerite Rut</td>\n",
       "      <td>female</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>PP 9549</td>\n",
       "      <td>16.7000</td>\n",
       "      <td>G6</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>12</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Bonnell, Miss. Elizabeth</td>\n",
       "      <td>female</td>\n",
       "      <td>58.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>113783</td>\n",
       "      <td>26.5500</td>\n",
       "      <td>C103</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>13</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Saundercock, Mr. William Henry</td>\n",
       "      <td>male</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>A/5. 2151</td>\n",
       "      <td>8.0500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>14</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Andersson, Mr. Anders Johan</td>\n",
       "      <td>male</td>\n",
       "      <td>39.0</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>347082</td>\n",
       "      <td>31.2750</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>15</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Vestrom, Miss. Hulda Amanda Adolfina</td>\n",
       "      <td>female</td>\n",
       "      <td>14.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>350406</td>\n",
       "      <td>7.8542</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>16</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>Hewlett, Mrs. (Mary D Kingcome)</td>\n",
       "      <td>female</td>\n",
       "      <td>55.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>248706</td>\n",
       "      <td>16.0000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>17</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Rice, Master. Eugene</td>\n",
       "      <td>male</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>382652</td>\n",
       "      <td>29.1250</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Q</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>18</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>Williams, Mr. Charles Eugene</td>\n",
       "      <td>male</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>244373</td>\n",
       "      <td>13.0000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>19</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Vander Planke, Mrs. Julius (Emelia Maria Vande...</td>\n",
       "      <td>female</td>\n",
       "      <td>31.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>345763</td>\n",
       "      <td>18.0000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>20</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>Masselmani, Mrs. Fatima</td>\n",
       "      <td>female</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2649</td>\n",
       "      <td>7.2250</td>\n",
       "      <td>NaN</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>21</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>Fynney, Mr. Joseph J</td>\n",
       "      <td>male</td>\n",
       "      <td>35.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>239865</td>\n",
       "      <td>26.0000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>22</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>Beesley, Mr. Lawrence</td>\n",
       "      <td>male</td>\n",
       "      <td>34.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>248698</td>\n",
       "      <td>13.0000</td>\n",
       "      <td>D56</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>23</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>McGowan, Miss. Anna \"Annie\"</td>\n",
       "      <td>female</td>\n",
       "      <td>15.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>330923</td>\n",
       "      <td>8.0292</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Q</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>24</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Sloper, Mr. William Thompson</td>\n",
       "      <td>male</td>\n",
       "      <td>28.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>113788</td>\n",
       "      <td>35.5000</td>\n",
       "      <td>A6</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>25</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Palsson, Miss. Torborg Danira</td>\n",
       "      <td>female</td>\n",
       "      <td>8.0</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>349909</td>\n",
       "      <td>21.0750</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>26</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>Asplund, Mrs. Carl Oscar (Selma Augusta Emilia...</td>\n",
       "      <td>female</td>\n",
       "      <td>38.0</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>347077</td>\n",
       "      <td>31.3875</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>27</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Emir, Mr. Farred Chehab</td>\n",
       "      <td>male</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2631</td>\n",
       "      <td>7.2250</td>\n",
       "      <td>NaN</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>28</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>Fortune, Mr. Charles Alexander</td>\n",
       "      <td>male</td>\n",
       "      <td>19.0</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>19950</td>\n",
       "      <td>263.0000</td>\n",
       "      <td>C23 C25 C27</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>29</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>O'Dwyer, Miss. Ellen \"Nellie\"</td>\n",
       "      <td>female</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>330959</td>\n",
       "      <td>7.8792</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Q</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>30</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Todoroff, Mr. Lalio</td>\n",
       "      <td>male</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>349216</td>\n",
       "      <td>7.8958</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>861</th>\n",
       "      <td>862</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>Giles, Mr. Frederick Edward</td>\n",
       "      <td>male</td>\n",
       "      <td>21.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>28134</td>\n",
       "      <td>11.5000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>862</th>\n",
       "      <td>863</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Swift, Mrs. Frederick Joel (Margaret Welles Ba...</td>\n",
       "      <td>female</td>\n",
       "      <td>48.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>17466</td>\n",
       "      <td>25.9292</td>\n",
       "      <td>D17</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>863</th>\n",
       "      <td>864</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Sage, Miss. Dorothy Edith \"Dolly\"</td>\n",
       "      <td>female</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8</td>\n",
       "      <td>2</td>\n",
       "      <td>CA. 2343</td>\n",
       "      <td>69.5500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>864</th>\n",
       "      <td>865</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>Gill, Mr. John William</td>\n",
       "      <td>male</td>\n",
       "      <td>24.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>233866</td>\n",
       "      <td>13.0000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>865</th>\n",
       "      <td>866</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>Bystrom, Mrs. (Karolina)</td>\n",
       "      <td>female</td>\n",
       "      <td>42.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>236852</td>\n",
       "      <td>13.0000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>866</th>\n",
       "      <td>867</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>Duran y More, Miss. Asuncion</td>\n",
       "      <td>female</td>\n",
       "      <td>27.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>SC/PARIS 2149</td>\n",
       "      <td>13.8583</td>\n",
       "      <td>NaN</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>867</th>\n",
       "      <td>868</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>Roebling, Mr. Washington Augustus II</td>\n",
       "      <td>male</td>\n",
       "      <td>31.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>PC 17590</td>\n",
       "      <td>50.4958</td>\n",
       "      <td>A24</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>868</th>\n",
       "      <td>869</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>van Melkebeke, Mr. Philemon</td>\n",
       "      <td>male</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>345777</td>\n",
       "      <td>9.5000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>869</th>\n",
       "      <td>870</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>Johnson, Master. Harold Theodor</td>\n",
       "      <td>male</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>347742</td>\n",
       "      <td>11.1333</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>870</th>\n",
       "      <td>871</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Balkic, Mr. Cerin</td>\n",
       "      <td>male</td>\n",
       "      <td>26.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>349248</td>\n",
       "      <td>7.8958</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>871</th>\n",
       "      <td>872</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Beckwith, Mrs. Richard Leonard (Sallie Monypeny)</td>\n",
       "      <td>female</td>\n",
       "      <td>47.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>11751</td>\n",
       "      <td>52.5542</td>\n",
       "      <td>D35</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>872</th>\n",
       "      <td>873</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>Carlsson, Mr. Frans Olof</td>\n",
       "      <td>male</td>\n",
       "      <td>33.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>695</td>\n",
       "      <td>5.0000</td>\n",
       "      <td>B51 B53 B55</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>873</th>\n",
       "      <td>874</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Vander Cruyssen, Mr. Victor</td>\n",
       "      <td>male</td>\n",
       "      <td>47.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>345765</td>\n",
       "      <td>9.0000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>874</th>\n",
       "      <td>875</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>Abelson, Mrs. Samuel (Hannah Wizosky)</td>\n",
       "      <td>female</td>\n",
       "      <td>28.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>P/PP 3381</td>\n",
       "      <td>24.0000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>875</th>\n",
       "      <td>876</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>Najib, Miss. Adele Kiamie \"Jane\"</td>\n",
       "      <td>female</td>\n",
       "      <td>15.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2667</td>\n",
       "      <td>7.2250</td>\n",
       "      <td>NaN</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>876</th>\n",
       "      <td>877</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Gustafsson, Mr. Alfred Ossian</td>\n",
       "      <td>male</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7534</td>\n",
       "      <td>9.8458</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>877</th>\n",
       "      <td>878</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Petroff, Mr. Nedelio</td>\n",
       "      <td>male</td>\n",
       "      <td>19.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>349212</td>\n",
       "      <td>7.8958</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>878</th>\n",
       "      <td>879</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Laleff, Mr. Kristo</td>\n",
       "      <td>male</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>349217</td>\n",
       "      <td>7.8958</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>879</th>\n",
       "      <td>880</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)</td>\n",
       "      <td>female</td>\n",
       "      <td>56.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>11767</td>\n",
       "      <td>83.1583</td>\n",
       "      <td>C50</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>880</th>\n",
       "      <td>881</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>Shelley, Mrs. William (Imanita Parrish Hall)</td>\n",
       "      <td>female</td>\n",
       "      <td>25.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>230433</td>\n",
       "      <td>26.0000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>881</th>\n",
       "      <td>882</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Markun, Mr. Johann</td>\n",
       "      <td>male</td>\n",
       "      <td>33.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>349257</td>\n",
       "      <td>7.8958</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>882</th>\n",
       "      <td>883</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Dahlberg, Miss. Gerda Ulrika</td>\n",
       "      <td>female</td>\n",
       "      <td>22.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7552</td>\n",
       "      <td>10.5167</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>883</th>\n",
       "      <td>884</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>Banfield, Mr. Frederick James</td>\n",
       "      <td>male</td>\n",
       "      <td>28.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>C.A./SOTON 34068</td>\n",
       "      <td>10.5000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>884</th>\n",
       "      <td>885</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Sutehall, Mr. Henry Jr</td>\n",
       "      <td>male</td>\n",
       "      <td>25.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>SOTON/OQ 392076</td>\n",
       "      <td>7.0500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>885</th>\n",
       "      <td>886</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Rice, Mrs. William (Margaret Norton)</td>\n",
       "      <td>female</td>\n",
       "      <td>39.0</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>382652</td>\n",
       "      <td>29.1250</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Q</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>886</th>\n",
       "      <td>887</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>Montvila, Rev. Juozas</td>\n",
       "      <td>male</td>\n",
       "      <td>27.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>211536</td>\n",
       "      <td>13.0000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>887</th>\n",
       "      <td>888</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Graham, Miss. Margaret Edith</td>\n",
       "      <td>female</td>\n",
       "      <td>19.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>112053</td>\n",
       "      <td>30.0000</td>\n",
       "      <td>B42</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>888</th>\n",
       "      <td>889</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Johnston, Miss. Catherine Helen \"Carrie\"</td>\n",
       "      <td>female</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>W./C. 6607</td>\n",
       "      <td>23.4500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>889</th>\n",
       "      <td>890</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Behr, Mr. Karl Howell</td>\n",
       "      <td>male</td>\n",
       "      <td>26.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>111369</td>\n",
       "      <td>30.0000</td>\n",
       "      <td>C148</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>890</th>\n",
       "      <td>891</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Dooley, Mr. Patrick</td>\n",
       "      <td>male</td>\n",
       "      <td>32.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>370376</td>\n",
       "      <td>7.7500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Q</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>891 rows × 12 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     PassengerId  Survived  Pclass  \\\n",
       "0              1         0       3   \n",
       "1              2         1       1   \n",
       "2              3         1       3   \n",
       "3              4         1       1   \n",
       "4              5         0       3   \n",
       "5              6         0       3   \n",
       "6              7         0       1   \n",
       "7              8         0       3   \n",
       "8              9         1       3   \n",
       "9             10         1       2   \n",
       "10            11         1       3   \n",
       "11            12         1       1   \n",
       "12            13         0       3   \n",
       "13            14         0       3   \n",
       "14            15         0       3   \n",
       "15            16         1       2   \n",
       "16            17         0       3   \n",
       "17            18         1       2   \n",
       "18            19         0       3   \n",
       "19            20         1       3   \n",
       "20            21         0       2   \n",
       "21            22         1       2   \n",
       "22            23         1       3   \n",
       "23            24         1       1   \n",
       "24            25         0       3   \n",
       "25            26         1       3   \n",
       "26            27         0       3   \n",
       "27            28         0       1   \n",
       "28            29         1       3   \n",
       "29            30         0       3   \n",
       "..           ...       ...     ...   \n",
       "861          862         0       2   \n",
       "862          863         1       1   \n",
       "863          864         0       3   \n",
       "864          865         0       2   \n",
       "865          866         1       2   \n",
       "866          867         1       2   \n",
       "867          868         0       1   \n",
       "868          869         0       3   \n",
       "869          870         1       3   \n",
       "870          871         0       3   \n",
       "871          872         1       1   \n",
       "872          873         0       1   \n",
       "873          874         0       3   \n",
       "874          875         1       2   \n",
       "875          876         1       3   \n",
       "876          877         0       3   \n",
       "877          878         0       3   \n",
       "878          879         0       3   \n",
       "879          880         1       1   \n",
       "880          881         1       2   \n",
       "881          882         0       3   \n",
       "882          883         0       3   \n",
       "883          884         0       2   \n",
       "884          885         0       3   \n",
       "885          886         0       3   \n",
       "886          887         0       2   \n",
       "887          888         1       1   \n",
       "888          889         0       3   \n",
       "889          890         1       1   \n",
       "890          891         0       3   \n",
       "\n",
       "                                                  Name     Sex   Age  SibSp  \\\n",
       "0                              Braund, Mr. Owen Harris    male  22.0      1   \n",
       "1    Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   \n",
       "2                               Heikkinen, Miss. Laina  female  26.0      0   \n",
       "3         Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   \n",
       "4                             Allen, Mr. William Henry    male  35.0      0   \n",
       "5                                     Moran, Mr. James    male   NaN      0   \n",
       "6                              McCarthy, Mr. Timothy J    male  54.0      0   \n",
       "7                       Palsson, Master. Gosta Leonard    male   2.0      3   \n",
       "8    Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)  female  27.0      0   \n",
       "9                  Nasser, Mrs. Nicholas (Adele Achem)  female  14.0      1   \n",
       "10                     Sandstrom, Miss. Marguerite Rut  female   4.0      1   \n",
       "11                            Bonnell, Miss. Elizabeth  female  58.0      0   \n",
       "12                      Saundercock, Mr. William Henry    male  20.0      0   \n",
       "13                         Andersson, Mr. Anders Johan    male  39.0      1   \n",
       "14                Vestrom, Miss. Hulda Amanda Adolfina  female  14.0      0   \n",
       "15                    Hewlett, Mrs. (Mary D Kingcome)   female  55.0      0   \n",
       "16                                Rice, Master. Eugene    male   2.0      4   \n",
       "17                        Williams, Mr. Charles Eugene    male   NaN      0   \n",
       "18   Vander Planke, Mrs. Julius (Emelia Maria Vande...  female  31.0      1   \n",
       "19                             Masselmani, Mrs. Fatima  female   NaN      0   \n",
       "20                                Fynney, Mr. Joseph J    male  35.0      0   \n",
       "21                               Beesley, Mr. Lawrence    male  34.0      0   \n",
       "22                         McGowan, Miss. Anna \"Annie\"  female  15.0      0   \n",
       "23                        Sloper, Mr. William Thompson    male  28.0      0   \n",
       "24                       Palsson, Miss. Torborg Danira  female   8.0      3   \n",
       "25   Asplund, Mrs. Carl Oscar (Selma Augusta Emilia...  female  38.0      1   \n",
       "26                             Emir, Mr. Farred Chehab    male   NaN      0   \n",
       "27                      Fortune, Mr. Charles Alexander    male  19.0      3   \n",
       "28                       O'Dwyer, Miss. Ellen \"Nellie\"  female   NaN      0   \n",
       "29                                 Todoroff, Mr. Lalio    male   NaN      0   \n",
       "..                                                 ...     ...   ...    ...   \n",
       "861                        Giles, Mr. Frederick Edward    male  21.0      1   \n",
       "862  Swift, Mrs. Frederick Joel (Margaret Welles Ba...  female  48.0      0   \n",
       "863                  Sage, Miss. Dorothy Edith \"Dolly\"  female   NaN      8   \n",
       "864                             Gill, Mr. John William    male  24.0      0   \n",
       "865                           Bystrom, Mrs. (Karolina)  female  42.0      0   \n",
       "866                       Duran y More, Miss. Asuncion  female  27.0      1   \n",
       "867               Roebling, Mr. Washington Augustus II    male  31.0      0   \n",
       "868                        van Melkebeke, Mr. Philemon    male   NaN      0   \n",
       "869                    Johnson, Master. Harold Theodor    male   4.0      1   \n",
       "870                                  Balkic, Mr. Cerin    male  26.0      0   \n",
       "871   Beckwith, Mrs. Richard Leonard (Sallie Monypeny)  female  47.0      1   \n",
       "872                           Carlsson, Mr. Frans Olof    male  33.0      0   \n",
       "873                        Vander Cruyssen, Mr. Victor    male  47.0      0   \n",
       "874              Abelson, Mrs. Samuel (Hannah Wizosky)  female  28.0      1   \n",
       "875                   Najib, Miss. Adele Kiamie \"Jane\"  female  15.0      0   \n",
       "876                      Gustafsson, Mr. Alfred Ossian    male  20.0      0   \n",
       "877                               Petroff, Mr. Nedelio    male  19.0      0   \n",
       "878                                 Laleff, Mr. Kristo    male   NaN      0   \n",
       "879      Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)  female  56.0      0   \n",
       "880       Shelley, Mrs. William (Imanita Parrish Hall)  female  25.0      0   \n",
       "881                                 Markun, Mr. Johann    male  33.0      0   \n",
       "882                       Dahlberg, Miss. Gerda Ulrika  female  22.0      0   \n",
       "883                      Banfield, Mr. Frederick James    male  28.0      0   \n",
       "884                             Sutehall, Mr. Henry Jr    male  25.0      0   \n",
       "885               Rice, Mrs. William (Margaret Norton)  female  39.0      0   \n",
       "886                              Montvila, Rev. Juozas    male  27.0      0   \n",
       "887                       Graham, Miss. Margaret Edith  female  19.0      0   \n",
       "888           Johnston, Miss. Catherine Helen \"Carrie\"  female   NaN      1   \n",
       "889                              Behr, Mr. Karl Howell    male  26.0      0   \n",
       "890                                Dooley, Mr. Patrick    male  32.0      0   \n",
       "\n",
       "     Parch            Ticket      Fare        Cabin Embarked  \n",
       "0        0         A/5 21171    7.2500          NaN        S  \n",
       "1        0          PC 17599   71.2833          C85        C  \n",
       "2        0  STON/O2. 3101282    7.9250          NaN        S  \n",
       "3        0            113803   53.1000         C123        S  \n",
       "4        0            373450    8.0500          NaN        S  \n",
       "5        0            330877    8.4583          NaN        Q  \n",
       "6        0             17463   51.8625          E46        S  \n",
       "7        1            349909   21.0750          NaN        S  \n",
       "8        2            347742   11.1333          NaN        S  \n",
       "9        0            237736   30.0708          NaN        C  \n",
       "10       1           PP 9549   16.7000           G6        S  \n",
       "11       0            113783   26.5500         C103        S  \n",
       "12       0         A/5. 2151    8.0500          NaN        S  \n",
       "13       5            347082   31.2750          NaN        S  \n",
       "14       0            350406    7.8542          NaN        S  \n",
       "15       0            248706   16.0000          NaN        S  \n",
       "16       1            382652   29.1250          NaN        Q  \n",
       "17       0            244373   13.0000          NaN        S  \n",
       "18       0            345763   18.0000          NaN        S  \n",
       "19       0              2649    7.2250          NaN        C  \n",
       "20       0            239865   26.0000          NaN        S  \n",
       "21       0            248698   13.0000          D56        S  \n",
       "22       0            330923    8.0292          NaN        Q  \n",
       "23       0            113788   35.5000           A6        S  \n",
       "24       1            349909   21.0750          NaN        S  \n",
       "25       5            347077   31.3875          NaN        S  \n",
       "26       0              2631    7.2250          NaN        C  \n",
       "27       2             19950  263.0000  C23 C25 C27        S  \n",
       "28       0            330959    7.8792          NaN        Q  \n",
       "29       0            349216    7.8958          NaN        S  \n",
       "..     ...               ...       ...          ...      ...  \n",
       "861      0             28134   11.5000          NaN        S  \n",
       "862      0             17466   25.9292          D17        S  \n",
       "863      2          CA. 2343   69.5500          NaN        S  \n",
       "864      0            233866   13.0000          NaN        S  \n",
       "865      0            236852   13.0000          NaN        S  \n",
       "866      0     SC/PARIS 2149   13.8583          NaN        C  \n",
       "867      0          PC 17590   50.4958          A24        S  \n",
       "868      0            345777    9.5000          NaN        S  \n",
       "869      1            347742   11.1333          NaN        S  \n",
       "870      0            349248    7.8958          NaN        S  \n",
       "871      1             11751   52.5542          D35        S  \n",
       "872      0               695    5.0000  B51 B53 B55        S  \n",
       "873      0            345765    9.0000          NaN        S  \n",
       "874      0         P/PP 3381   24.0000          NaN        C  \n",
       "875      0              2667    7.2250          NaN        C  \n",
       "876      0              7534    9.8458          NaN        S  \n",
       "877      0            349212    7.8958          NaN        S  \n",
       "878      0            349217    7.8958          NaN        S  \n",
       "879      1             11767   83.1583          C50        C  \n",
       "880      1            230433   26.0000          NaN        S  \n",
       "881      0            349257    7.8958          NaN        S  \n",
       "882      0              7552   10.5167          NaN        S  \n",
       "883      0  C.A./SOTON 34068   10.5000          NaN        S  \n",
       "884      0   SOTON/OQ 392076    7.0500          NaN        S  \n",
       "885      5            382652   29.1250          NaN        Q  \n",
       "886      0            211536   13.0000          NaN        S  \n",
       "887      0            112053   30.0000          B42        S  \n",
       "888      2        W./C. 6607   23.4500          NaN        S  \n",
       "889      0            111369   30.0000         C148        C  \n",
       "890      0            370376    7.7500          NaN        Q  \n",
       "\n",
       "[891 rows x 12 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "data = pd.read_csv('../titanic_data.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',\n",
      "       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],\n",
      "      dtype='object')\n"
     ]
    }
   ],
   "source": [
    "#display features\n",
    "print data.columns "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Extracting features as Series"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "passenger_id = data['PassengerId']\n",
    "survival_status = data['Survived']\n",
    "name = data['Name']\n",
    "pclass = data['Pclass']\n",
    "sex = data['Sex']\n",
    "age = data['Age']\n",
    "sibling_spouse_count = data['SibSp']\n",
    "parent_children_count = data['Parch']\n",
    "ticket_no = data['Ticket']\n",
    "fare = data['Fare']\n",
    "cabin = data['Cabin']\n",
    "embark_port = data['Embarked']"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}