frontierspaper.ipynb
{
"cells": [
{
"cell_type": "markdown",
"id": "6359b8c0-9758-486b-ac4f-0e35935c1a39",
"metadata": {},
"source": [
"# Calculations for the Frontiers 2021 paper\n",
"\n",
"(computed post-acceptance)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "8736adb0-2ace-4805-aaba-7d640dd8f7d9",
"metadata": {},
"outputs": [],
"source": [
"import math\n",
"import pandas as pd\n",
"import re "
]
},
{
"cell_type": "markdown",
"id": "1d9a076a-022b-4713-bf2b-2c27800f3b9f",
"metadata": {},
"source": [
"## 1. Recompute Table 1 percentages"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "c1b91646-84aa-48e8-ba83-a53f10662645",
"metadata": {},
"outputs": [],
"source": [
"table_dict = { \"males\": { 1: 209, 2: 133 },\n",
" \"females\": { 1: 215, 2: 213 },\n",
" \"dutch\": { 1: 22, 2: 27 },\n",
" \"no answer 1\": { 1: 402, 2: 319 },\n",
" \"primary\": { 1: 5, 2: 4 },\n",
" \"lower\": { 1: 0, 2: 0 },\n",
" \"school\": { 1: 56, 2: 33 },\n",
" \"intermediate\": { 1: 103, 2: 68 },\n",
" \"higher\": { 1: 137, 2: 124 },\n",
" \"university\": { 1: 40, 2: 58 },\n",
" \"no answer 2\": { 1: 11, 2: 19 },\n",
" \"yes\": { 1: 25, 2: 27 },\n",
" \"no\": { 1: 308, 2: 240 },\n",
" \"no answer 3\": { 1: 91, 2: 79 },\n",
" \"i think\": { 1: 334, 2: 264 },\n",
" \"i want\": { 1: 14, 2: 10 },\n",
" \"something\": { 1: 36, 2: 31 },\n",
" \"others think\": { 1: 14, 2: 6 },\n",
" \"other reasons\": { 1: 24, 2: 29 },\n",
" \"no answer 4\": { 1: 2, 2: 6 },\n",
" \"never\": { 1: 164, 2: 187 },\n",
" \"now and then\": { 1: 36, 2: 24 },\n",
" \"daily\": { 1: 224, 2: 135 },\n",
" }"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "34d1479a-fc27-4c0c-b2c0-cc2ef310dbc7",
"metadata": {},
"outputs": [],
"source": [
"def pretty_print(results):\n",
" results_list = []\n",
" for key in results:\n",
" if results[key][1] + results[key][2] == 0:\n",
" print(f\"pretty_print: skipping unused key value: {key}\")\n",
" continue\n",
" percentage = round(100 * results[key][1] / (results[key][1] + results[key][2]), 1)\n",
" results_list.append({\"key\": key, \n",
" \"dropout:N\": results[key][1],\n",
" \"dropout:%\": percentage,\n",
" \"completer:N\": results[key][2],\n",
" \"completer:%\": 100 - percentage,\n",
" })\n",
" return pd.DataFrame(results_list)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "32b1fbee-aa53-4e62-9e7d-1255d635df19",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"pretty_print: skipping unused key value: lower\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>key</th>\n",
" <th>dropout:N</th>\n",
" <th>dropout:%</th>\n",
" <th>completer:N</th>\n",
" <th>completer:%</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>males</td>\n",
" <td>209</td>\n",
" <td>61.1</td>\n",
" <td>133</td>\n",
" <td>38.9</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>females</td>\n",
" <td>215</td>\n",
" <td>50.2</td>\n",
" <td>213</td>\n",
" <td>49.8</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>dutch</td>\n",
" <td>22</td>\n",
" <td>44.9</td>\n",
" <td>27</td>\n",
" <td>55.1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>no answer 1</td>\n",
" <td>402</td>\n",
" <td>55.8</td>\n",
" <td>319</td>\n",
" <td>44.2</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>primary</td>\n",
" <td>5</td>\n",
" <td>55.6</td>\n",
" <td>4</td>\n",
" <td>44.4</td>\n",
" </tr>\n",
" <tr>\n",
" <td>5</td>\n",
" <td>school</td>\n",
" <td>56</td>\n",
" <td>62.9</td>\n",
" <td>33</td>\n",
" <td>37.1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>6</td>\n",
" <td>intermediate</td>\n",
" <td>103</td>\n",
" <td>60.2</td>\n",
" <td>68</td>\n",
" <td>39.8</td>\n",
" </tr>\n",
" <tr>\n",
" <td>7</td>\n",
" <td>higher</td>\n",
" <td>137</td>\n",
" <td>52.5</td>\n",
" <td>124</td>\n",
" <td>47.5</td>\n",
" </tr>\n",
" <tr>\n",
" <td>8</td>\n",
" <td>university</td>\n",
" <td>40</td>\n",
" <td>40.8</td>\n",
" <td>58</td>\n",
" <td>59.2</td>\n",
" </tr>\n",
" <tr>\n",
" <td>9</td>\n",
" <td>no answer 2</td>\n",
" <td>11</td>\n",
" <td>36.7</td>\n",
" <td>19</td>\n",
" <td>63.3</td>\n",
" </tr>\n",
" <tr>\n",
" <td>10</td>\n",
" <td>yes</td>\n",
" <td>25</td>\n",
" <td>48.1</td>\n",
" <td>27</td>\n",
" <td>51.9</td>\n",
" </tr>\n",
" <tr>\n",
" <td>11</td>\n",
" <td>no</td>\n",
" <td>308</td>\n",
" <td>56.2</td>\n",
" <td>240</td>\n",
" <td>43.8</td>\n",
" </tr>\n",
" <tr>\n",
" <td>12</td>\n",
" <td>no answer 3</td>\n",
" <td>91</td>\n",
" <td>53.5</td>\n",
" <td>79</td>\n",
" <td>46.5</td>\n",
" </tr>\n",
" <tr>\n",
" <td>13</td>\n",
" <td>i think</td>\n",
" <td>334</td>\n",
" <td>55.9</td>\n",
" <td>264</td>\n",
" <td>44.1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>14</td>\n",
" <td>i want</td>\n",
" <td>14</td>\n",
" <td>58.3</td>\n",
" <td>10</td>\n",
" <td>41.7</td>\n",
" </tr>\n",
" <tr>\n",
" <td>15</td>\n",
" <td>something</td>\n",
" <td>36</td>\n",
" <td>53.7</td>\n",
" <td>31</td>\n",
" <td>46.3</td>\n",
" </tr>\n",
" <tr>\n",
" <td>16</td>\n",
" <td>others think</td>\n",
" <td>14</td>\n",
" <td>70.0</td>\n",
" <td>6</td>\n",
" <td>30.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>17</td>\n",
" <td>other reasons</td>\n",
" <td>24</td>\n",
" <td>45.3</td>\n",
" <td>29</td>\n",
" <td>54.7</td>\n",
" </tr>\n",
" <tr>\n",
" <td>18</td>\n",
" <td>no answer 4</td>\n",
" <td>2</td>\n",
" <td>25.0</td>\n",
" <td>6</td>\n",
" <td>75.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>19</td>\n",
" <td>never</td>\n",
" <td>164</td>\n",
" <td>46.7</td>\n",
" <td>187</td>\n",
" <td>53.3</td>\n",
" </tr>\n",
" <tr>\n",
" <td>20</td>\n",
" <td>now and then</td>\n",
" <td>36</td>\n",
" <td>60.0</td>\n",
" <td>24</td>\n",
" <td>40.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>21</td>\n",
" <td>daily</td>\n",
" <td>224</td>\n",
" <td>62.4</td>\n",
" <td>135</td>\n",
" <td>37.6</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" key dropout:N dropout:% completer:N completer:%\n",
"0 males 209 61.1 133 38.9\n",
"1 females 215 50.2 213 49.8\n",
"2 dutch 22 44.9 27 55.1\n",
"3 no answer 1 402 55.8 319 44.2\n",
"4 primary 5 55.6 4 44.4\n",
"5 school 56 62.9 33 37.1\n",
"6 intermediate 103 60.2 68 39.8\n",
"7 higher 137 52.5 124 47.5\n",
"8 university 40 40.8 58 59.2\n",
"9 no answer 2 11 36.7 19 63.3\n",
"10 yes 25 48.1 27 51.9\n",
"11 no 308 56.2 240 43.8\n",
"12 no answer 3 91 53.5 79 46.5\n",
"13 i think 334 55.9 264 44.1\n",
"14 i want 14 58.3 10 41.7\n",
"15 something 36 53.7 31 46.3\n",
"16 others think 14 70.0 6 30.0\n",
"17 other reasons 24 45.3 29 54.7\n",
"18 no answer 4 2 25.0 6 75.0\n",
"19 never 164 46.7 187 53.3\n",
"20 now and then 36 60.0 24 40.0\n",
"21 daily 224 62.4 135 37.6"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pretty_print(table_dict)"
]
},
{
"cell_type": "markdown",
"id": "a9651871-6a09-4467-bd85-b75eeb36fd7f",
"metadata": {},
"source": [
"## 2. Read data\n",
"\n",
"There are 791 records in the file. 21 need to be removed because of missing information. The fuction `count_missing` identifies these."
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "c17d7a0d-13b6-4497-b199-e4bdf84dfe52",
"metadata": {},
"outputs": [],
"source": [
"DATAFILE = \"/home/erikt/projects/e-mental-health/usb/releases/20200320/liwc+intake123+dropout.csv\""
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "9f66923c-9570-4576-9815-b4105dcba786",
"metadata": {},
"outputs": [],
"source": [
"data = pd.read_csv(DATAFILE)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "6a8d7d83-7958-4c73-a329-b542b958bc10",
"metadata": {},
"outputs": [],
"source": [
"def count_missing(data):\n",
" missings = []\n",
" for idx, row in data.iterrows():\n",
" missing = 0\n",
" if pd.isna(row[\"1-geslacht\"]) and pd.isna(row[\"1-geslacht0\"]) and pd.isna(row[\"1-geslachtt0\"]):\n",
" missing += 1\n",
" if pd.isna(row[\"3-national0\"]) or row[\"3-national0\"] == \"EMPTY\":\n",
" missing += 1\n",
" if pd.isna(row[\"4-opleidng\"]) and (pd.isna(row[\"8-opleidng0\"]) or row[\"8-opleidng0\"] == \"EMPTY\") and pd.isna(row[\"4-opleidngt0\"]):\n",
" missing += 1\n",
" if pd.isna(row[\"19-behdrink\"]) and pd.isna(row[\"25-behdrinkt0\"]):\n",
" missing += 1\n",
" if pd.isna(row[\"7-aanleid\"]) and pd.isna(row[\"12-aanleid0\"]) and pd.isna(row[\"7-aanleidt0\"]):\n",
" missing += 1\n",
" if pd.isna(row[\"13-roken\"]) and pd.isna(row[\"13-rokent0\"]):\n",
" missing += 1\n",
" missings.append(missing)\n",
" return missings"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "9fd0091e-f34e-485d-aebd-19412bcf1c30",
"metadata": {},
"outputs": [],
"source": [
"def count_values(missings):\n",
" return pd.DataFrame([{\"count\": x} for x in missings])[\"count\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "221cf056-7b41-4845-97cc-3168d35d1c3c",
"metadata": {},
"outputs": [],
"source": [
"missings = count_missing(data)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "a3394fee-ca1e-4eb5-ac60-db8b5d7b89ec",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1 721\n",
"2 49\n",
"4 21\n",
"Name: count, dtype: int64"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"count_values(missings)"
]
},
{
"cell_type": "markdown",
"id": "e63cc0a2-494b-4525-ad54-0f92a027c45a",
"metadata": {},
"source": [
"## 3. Count gender"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "4f101e34-3b2c-4f5f-9249-22db6df41935",
"metadata": {},
"outputs": [],
"source": [
"def count_gender(data, missings):\n",
" genders = { \"man\": {1: 0, 2: 0}, \"vrouw\": {1: 0, 2: 0} }\n",
" for idx, row in data.iterrows():\n",
" if missings[idx] < 3:\n",
" gender = math.nan\n",
" if not pd.isna(row[\"1-geslacht\"]):\n",
" gender = row[\"1-geslacht\"]\n",
" if not pd.isna(row[\"1-geslacht0\"]):\n",
" if not pd.isna(gender):\n",
" print(\"cannot happen: duplicate gender value\")\n",
" gender = row[\"1-geslacht0\"]\n",
" if not pd.isna(row[\"1-geslachtt0\"]):\n",
" if not pd.isna(gender):\n",
" print(\"cannot happen: duplicate gender value\")\n",
" gender = row[\"1-geslachtt0\"]\n",
" if pd.isna(gender):\n",
" print(\"cannot happen: missing gender value\")\n",
" genders[gender][row[\"dropout\"]] += 1\n",
" return genders"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "defe790b-fa4f-47f9-85ab-d2fda6bd7395",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>key</th>\n",
" <th>dropout:N</th>\n",
" <th>dropout:%</th>\n",
" <th>completer:N</th>\n",
" <th>completer:%</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>man</td>\n",
" <td>209</td>\n",
" <td>61.1</td>\n",
" <td>133</td>\n",
" <td>38.9</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>vrouw</td>\n",
" <td>215</td>\n",
" <td>50.2</td>\n",
" <td>213</td>\n",
" <td>49.8</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" key dropout:N dropout:% completer:N completer:%\n",
"0 man 209 61.1 133 38.9\n",
"1 vrouw 215 50.2 213 49.8"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pretty_print(count_gender(data, missings))"
]
},
{
"cell_type": "markdown",
"id": "d5e6956a-c269-4383-a5d0-1261054651b9",
"metadata": {},
"source": [
"## 4. Count nationalities\n",
"\n",
"Value NaN stands for \"Not a Number\" and signals missing data"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "4b10190c-91a7-4b80-adf9-a3cf5d1a8227",
"metadata": {},
"outputs": [],
"source": [
"def count_nationalities(data, missings):\n",
" nationalities = { \"nederlands\": {1: 0, 2: 0}, \"EMPTY\": {1: 0, 2: 0}, math.nan: {1: 0, 2: 0}}\n",
" for idx, row in data.iterrows():\n",
" if missings[idx] < 3:\n",
" nationality = math.nan\n",
" if not pd.isna(row[\"3-national0\"]):\n",
" nationality = row[\"3-national0\"]\n",
" nationalities[nationality][row[\"dropout\"]] += 1\n",
" return nationalities"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "95674d65-5ae6-4c5d-8002-948b3e3f4b63",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"pretty_print: skipping unused key value: EMPTY\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>key</th>\n",
" <th>dropout:N</th>\n",
" <th>dropout:%</th>\n",
" <th>completer:N</th>\n",
" <th>completer:%</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>nederlands</td>\n",
" <td>22</td>\n",
" <td>44.9</td>\n",
" <td>27</td>\n",
" <td>55.1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>402</td>\n",
" <td>55.8</td>\n",
" <td>319</td>\n",
" <td>44.2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" key dropout:N dropout:% completer:N completer:%\n",
"0 nederlands 22 44.9 27 55.1\n",
"1 NaN 402 55.8 319 44.2"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pretty_print(count_nationalities(data, missings))"
]
},
{
"cell_type": "markdown",
"id": "23590783-9bf7-43e0-9a2f-2adccbb6cfc5",
"metadata": {},
"source": [
"## 5. Count educations\n",
"\n",
"Key value \"MULTIWORD\" represents a phrase removed by the anonymization process. It is unknown why some key fields have been filled with numbers (in this case: 3)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "9223bae4-0071-4e99-8a48-afd92d30f84e",
"metadata": {},
"outputs": [],
"source": [
"def count_educations(data, missings):\n",
" educations = { \"hbo\": {1: 0, 2: 0}, \"mbo\": {1: 0, 2: 0}, \"wo\": {1: 0, 2: 0}, \"lbo/mavo\": {1: 0, 2: 0}, \"havo/vwo\": {1: 0, 2: 0}, \n",
" \"MULTIWORD\": {1: 0, 2: 0}, \"lbo/mavo/vmbo\": {1: 0, 2: 0}, \"basisschool\": {1: 0, 2: 0}, \"EMPTY\": {1: 0, 2: 0}, \"3\": {1: 0, 2: 0}, }\n",
" for idx, row in data.iterrows():\n",
" if missings[idx] < 3:\n",
" education = math.nan\n",
" if not pd.isna(row[\"4-opleidng\"]):\n",
" education = row[\"4-opleidng\"]\n",
" if not pd.isna(row[\"8-opleidng0\"]):\n",
" if not pd.isna(education):\n",
" print(f\"cannot happen: duplicate education value: {education} and {row['8-opleidng0']}\")\n",
" education = row[\"8-opleidng0\"]\n",
" if not pd.isna(row[\"4-opleidngt0\"]):\n",
" if not pd.isna(education):\n",
" print(f\"cannot happen: duplicate education value: {education} and {row['4-opleidngt0']}\")\n",
" education = row[\"4-opleidngt0\"]\n",
" if pd.isna(education):\n",
" print(\"cannot happen: missing education value\")\n",
" educations[education][row[\"dropout\"]] += 1\n",
" return educations"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "cdd21daf-994e-4df6-882d-fcf89025e4e7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"pretty_print: skipping unused key value: EMPTY\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>key</th>\n",
" <th>dropout:N</th>\n",
" <th>dropout:%</th>\n",
" <th>completer:N</th>\n",
" <th>completer:%</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>hbo</td>\n",
" <td>137</td>\n",
" <td>52.5</td>\n",
" <td>124</td>\n",
" <td>47.5</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>mbo</td>\n",
" <td>103</td>\n",
" <td>60.2</td>\n",
" <td>68</td>\n",
" <td>39.8</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>wo</td>\n",
" <td>40</td>\n",
" <td>40.8</td>\n",
" <td>58</td>\n",
" <td>59.2</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>lbo/mavo</td>\n",
" <td>65</td>\n",
" <td>65.0</td>\n",
" <td>35</td>\n",
" <td>35.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>havo/vwo</td>\n",
" <td>56</td>\n",
" <td>62.9</td>\n",
" <td>33</td>\n",
" <td>37.1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>5</td>\n",
" <td>MULTIWORD</td>\n",
" <td>14</td>\n",
" <td>42.4</td>\n",
" <td>19</td>\n",
" <td>57.6</td>\n",
" </tr>\n",
" <tr>\n",
" <td>6</td>\n",
" <td>lbo/mavo/vmbo</td>\n",
" <td>4</td>\n",
" <td>50.0</td>\n",
" <td>4</td>\n",
" <td>50.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>7</td>\n",
" <td>basisschool</td>\n",
" <td>5</td>\n",
" <td>55.6</td>\n",
" <td>4</td>\n",
" <td>44.4</td>\n",
" </tr>\n",
" <tr>\n",
" <td>8</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>100.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" key dropout:N dropout:% completer:N completer:%\n",
"0 hbo 137 52.5 124 47.5\n",
"1 mbo 103 60.2 68 39.8\n",
"2 wo 40 40.8 58 59.2\n",
"3 lbo/mavo 65 65.0 35 35.0\n",
"4 havo/vwo 56 62.9 33 37.1\n",
"5 MULTIWORD 14 42.4 19 57.6\n",
"6 lbo/mavo/vmbo 4 50.0 4 50.0\n",
"7 basisschool 5 55.6 4 44.4\n",
"8 3 0 0.0 1 100.0"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pretty_print(count_educations(data, missings))"
]
},
{
"cell_type": "markdown",
"id": "76a44ed9-40b7-46b0-b1a1-b103e910b477",
"metadata": {},
"source": [
"## 6. Count previous treatments"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "47277603-0ab4-4efc-a126-ecc88f815789",
"metadata": {},
"outputs": [],
"source": [
"def count_treatments(data, missings):\n",
" treatments = { \"nee\": {1: 0, 2: 0}, \"ja\": {1: 0, 2: 0}, \"EMPTY\": {1: 0, 2: 0}, math.nan: {1: 0, 2: 0},\n",
" \"ja ik heb deze internetbehandeling al eens gevolgd.\": {1: 0, 2: 0}, \"MULTIWORD\": {1: 0, 2: 0}, }\n",
" for idx, row in data.iterrows():\n",
" if missings[idx] < 3:\n",
" treatment = math.nan\n",
" if not pd.isna(row[\"19-behdrink\"]):\n",
" treatment = row[\"19-behdrink\"]\n",
" if not pd.isna(row[\"25-behdrinkt0\"]):\n",
" if not pd.isna(treatment):\n",
" print(f\"cannot happen: duplicate treatment value: {reason} and {row['25-behdrinkt0']}\")\n",
" treatment = row[\"25-behdrinkt0\"]\n",
" if not pd.isna(row[\"26-behversl0\"]):\n",
" if not pd.isna(treatment):\n",
" print(f\"cannot happen: duplicate treatment value: {reason} and {row['25-behversl0']}\")\n",
" treatment = row[\"26-behversl0\"]\n",
"\n",
" treatments[treatment][row[\"dropout\"]] += 1\n",
" return treatments"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "0e1698b8-0593-4507-b55d-376e77a2e909",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"pretty_print: skipping unused key value: nan\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>key</th>\n",
" <th>dropout:N</th>\n",
" <th>dropout:%</th>\n",
" <th>completer:N</th>\n",
" <th>completer:%</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>nee</td>\n",
" <td>308</td>\n",
" <td>56.2</td>\n",
" <td>240</td>\n",
" <td>43.8</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>ja</td>\n",
" <td>12</td>\n",
" <td>50.0</td>\n",
" <td>12</td>\n",
" <td>50.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>EMPTY</td>\n",
" <td>91</td>\n",
" <td>53.5</td>\n",
" <td>79</td>\n",
" <td>46.5</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>ja ik heb deze internetbehandeling al eens gev...</td>\n",
" <td>13</td>\n",
" <td>50.0</td>\n",
" <td>13</td>\n",
" <td>50.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>MULTIWORD</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>2</td>\n",
" <td>100.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" key dropout:N dropout:% \\\n",
"0 nee 308 56.2 \n",
"1 ja 12 50.0 \n",
"2 EMPTY 91 53.5 \n",
"3 ja ik heb deze internetbehandeling al eens gev... 13 50.0 \n",
"4 MULTIWORD 0 0.0 \n",
"\n",
" completer:N completer:% \n",
"0 240 43.8 \n",
"1 12 50.0 \n",
"2 79 46.5 \n",
"3 13 50.0 \n",
"4 2 100.0 "
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pretty_print(count_treatments(data, missings))"
]
},
{
"cell_type": "markdown",
"id": "96890e53-53ff-4f29-86d8-d7d920487e03",
"metadata": {},
"source": [
"## 7. Count reasons for treatment"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "75ff3960-0de9-49a6-90ec-ffb55e9b326b",
"metadata": {},
"outputs": [],
"source": [
"def count_reasons(data, missings):\n",
" reasons = { \"ik vind zelf dat ik teveel drink\": {1: 0, 2: 0}, \n",
" \"anderen vinden dat ik teveel drink\": {1: 0, 2: 0},\n",
" \"ik wil advies over mijn alcoholgebruik\": {1: 0, 2: 0},\n",
" \"er is iets vervelends gebeurd en daarom wil ik iets aan mijn drinken doen\": {1: 0, 2: 0},\n",
" \"1\": {1: 0, 2: 0}, \n",
" \"2\": {1: 0, 2: 0}, \n",
" \"3\": {1: 0, 2: 0}, \n",
" \"4\": {1: 0, 2: 0}, \n",
" \"MULTIWORD\": {1: 0, 2: 0}, \n",
" \"EMPTY\": {1: 0, 2: 0}, }\n",
" for idx, row in data.iterrows():\n",
" if missings[idx] < 3:\n",
" reason = math.nan\n",
" if not pd.isna(row[\"7-aanleid\"]):\n",
" reason = row[\"7-aanleid\"]\n",
" if not pd.isna(row[\"12-aanleid0\"]):\n",
" if not pd.isna(reason):\n",
" print(f\"cannot happen: duplicate education value: {reason} and {row['12-aanleid0']}\")\n",
" reason = row[\"12-aanleid0\"]\n",
" if not pd.isna(row[\"7-aanleidt0\"]):\n",
" if not pd.isna(reason):\n",
" print(f\"cannot happen: duplicate education value: {reason} and {row['7-aanleidt0']}\")\n",
" reason = row[\"7-aanleidt0\"]\n",
" if pd.isna(reason):\n",
" print(\"cannot happen: missing reason value\")\n",
" reasons[reason][row[\"dropout\"]] += 1\n",
" return reasons"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "6943b797-27b4-436b-93d0-7d785217b5ed",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>key</th>\n",
" <th>dropout:N</th>\n",
" <th>dropout:%</th>\n",
" <th>completer:N</th>\n",
" <th>completer:%</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>ik vind zelf dat ik teveel drink</td>\n",
" <td>334</td>\n",
" <td>55.9</td>\n",
" <td>264</td>\n",
" <td>44.1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>anderen vinden dat ik teveel drink</td>\n",
" <td>14</td>\n",
" <td>70.0</td>\n",
" <td>6</td>\n",
" <td>30.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>ik wil advies over mijn alcoholgebruik</td>\n",
" <td>14</td>\n",
" <td>58.3</td>\n",
" <td>10</td>\n",
" <td>41.7</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>er is iets vervelends gebeurd en daarom wil ik...</td>\n",
" <td>36</td>\n",
" <td>53.7</td>\n",
" <td>31</td>\n",
" <td>46.3</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>100.0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>40.0</td>\n",
" <td>3</td>\n",
" <td>60.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>6</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>100.0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>7</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>50.0</td>\n",
" <td>1</td>\n",
" <td>50.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>8</td>\n",
" <td>MULTIWORD</td>\n",
" <td>19</td>\n",
" <td>43.2</td>\n",
" <td>25</td>\n",
" <td>56.8</td>\n",
" </tr>\n",
" <tr>\n",
" <td>9</td>\n",
" <td>EMPTY</td>\n",
" <td>2</td>\n",
" <td>25.0</td>\n",
" <td>6</td>\n",
" <td>75.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" key dropout:N dropout:% \\\n",
"0 ik vind zelf dat ik teveel drink 334 55.9 \n",
"1 anderen vinden dat ik teveel drink 14 70.0 \n",
"2 ik wil advies over mijn alcoholgebruik 14 58.3 \n",
"3 er is iets vervelends gebeurd en daarom wil ik... 36 53.7 \n",
"4 1 1 100.0 \n",
"5 2 2 40.0 \n",
"6 3 1 100.0 \n",
"7 4 1 50.0 \n",
"8 MULTIWORD 19 43.2 \n",
"9 EMPTY 2 25.0 \n",
"\n",
" completer:N completer:% \n",
"0 264 44.1 \n",
"1 6 30.0 \n",
"2 10 41.7 \n",
"3 31 46.3 \n",
"4 0 0.0 \n",
"5 3 60.0 \n",
"6 0 0.0 \n",
"7 1 50.0 \n",
"8 25 56.8 \n",
"9 6 75.0 "
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pretty_print(count_reasons(data, missings))"
]
},
{
"cell_type": "markdown",
"id": "ac8f93bf-05dc-443b-a3cb-276b43f4386c",
"metadata": {},
"source": [
"## 8. Count smoking behaviour"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "2cbe5227-4b5d-4fd5-95a4-86613ba1065d",
"metadata": {},
"outputs": [],
"source": [
"def count_smokers(data, missings):\n",
" smokers = { \"ja af en toe\": {1: 0, 2: 0}, \"ja dagelijks\": {1: 0, 2: 0}, \"nee helemaal niet\": {1: 0, 2: 0}, \n",
" math.nan: {1: 0, 2: 0}, \"MULTIWORD\": {1: 0, 2: 0}, \"ja\": {1: 0, 2: 0}, \"nee\": {1: 0, 2: 0}, }\n",
" for idx, row in data.iterrows():\n",
" if missings[idx] < 3:\n",
" smoker = math.nan\n",
" if not pd.isna(row[\"13-roken\"]):\n",
" smoker = row[\"13-roken\"]\n",
" if not pd.isna(row[\"19-tabak0\"]):\n",
" if not pd.isna(smoker):\n",
" print(\"cannot happen: duplicate smoker value\")\n",
" smoker = row[\"19-tabak0\"]\n",
" if not pd.isna(row[\"13-rokent0\"]):\n",
" if not pd.isna(smoker):\n",
" print(\"cannot happen: duplicate smoker value\")\n",
" smoker = row[\"13-rokent0\"]\n",
" smokers[smoker][row[\"dropout\"]] += 1\n",
" return smokers"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "5a120e95-af90-4aa5-8ed3-53f9458f20ca",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"pretty_print: skipping unused key value: nan\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>key</th>\n",
" <th>dropout:N</th>\n",
" <th>dropout:%</th>\n",
" <th>completer:N</th>\n",
" <th>completer:%</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>ja af en toe</td>\n",
" <td>31</td>\n",
" <td>60.8</td>\n",
" <td>20</td>\n",
" <td>39.2</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>ja dagelijks</td>\n",
" <td>210</td>\n",
" <td>62.3</td>\n",
" <td>127</td>\n",
" <td>37.7</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>nee helemaal niet</td>\n",
" <td>159</td>\n",
" <td>48.3</td>\n",
" <td>170</td>\n",
" <td>51.7</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>MULTIWORD</td>\n",
" <td>2</td>\n",
" <td>50.0</td>\n",
" <td>2</td>\n",
" <td>50.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>ja</td>\n",
" <td>21</td>\n",
" <td>46.7</td>\n",
" <td>24</td>\n",
" <td>53.3</td>\n",
" </tr>\n",
" <tr>\n",
" <td>5</td>\n",
" <td>nee</td>\n",
" <td>1</td>\n",
" <td>25.0</td>\n",
" <td>3</td>\n",
" <td>75.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" key dropout:N dropout:% completer:N completer:%\n",
"0 ja af en toe 31 60.8 20 39.2\n",
"1 ja dagelijks 210 62.3 127 37.7\n",
"2 nee helemaal niet 159 48.3 170 51.7\n",
"3 MULTIWORD 2 50.0 2 50.0\n",
"4 ja 21 46.7 24 53.3\n",
"5 nee 1 25.0 3 75.0"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pretty_print(count_smokers(data, missings))"
]
},
{
"cell_type": "markdown",
"id": "64bd44dc-6390-4432-aa30-a81813ea3cc3",
"metadata": {},
"source": [
"## 9. Inspecting metadata field names"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "9d7b4320-e7c5-4422-a02f-7dad9b25b1dd",
"metadata": {},
"outputs": [],
"source": [
"def find_columns(data, column_name):\n",
" for column in data.columns:\n",
" if re.search(column_name, column, flags=re.IGNORECASE):\n",
" print(column)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "6a740654-aa76-42d9-af1e-cc48a8296f45",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"19-tabak0\n",
"19-tabakjr0\n",
"20-tabak300\n",
"20-tabakdg0\n",
"20-tabakhv0\n",
"21-tabak0h\n"
]
}
],
"source": [
"find_columns(data, \"tab\")"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "abd9ab66-5bd9-4452-807b-676fead32a84",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"ja 45\n",
"EMPTY 21\n",
"nee 4\n",
"Name: 19-tabak0, dtype: int64"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[\"19-tabak0\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "0b2b6ac0-6ca4-4b5a-b27e-0dc58c70ff05",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"file\n",
"nbr of mails\n",
"Number of matches\n",
"number count\n",
"1 function\n",
"2 pronoun\n",
"3 ppron\n",
"4 i\n",
"5 we\n",
"6 you\n",
"7 shehe\n",
"8 they\n",
"9 ipron\n",
"10 article\n",
"11 prep\n",
"12 auxverb\n",
"13 adverb\n",
"14 conj\n",
"15 negate\n",
"20 verb\n",
"21 adj\n",
"22 compare\n",
"23 interrog\n",
"24 number\n",
"25 quant\n",
"30 affect\n",
"31 posemo\n",
"32 negemo\n",
"33 anx\n",
"34 anger\n",
"35 sad\n",
"40 social\n",
"41 family\n",
"42 friend\n",
"43 female\n",
"44 male\n",
"50 cogproc\n",
"51 insight\n",
"52 cause\n",
"53 discrep\n",
"54 tentat\n",
"55 certain\n",
"56 differ\n",
"60 percept\n",
"61 see\n",
"62 hear\n",
"63 feel\n",
"70 bio\n",
"71 body\n",
"72 health\n",
"73 sexual\n",
"74 ingest\n",
"80 drives\n",
"81 affiliation\n",
"82 achieve\n",
"83 power\n",
"84 reward\n",
"85 risk\n",
"90 focuspast\n",
"91 focuspresent\n",
"92 focusfuture\n",
"100 relativ\n",
"101 motion\n",
"102 space\n",
"103 time\n",
"110 work\n",
"111 leisure\n",
"112 home\n",
"113 money\n",
"114 relig\n",
"115 death\n",
"120 informal\n",
"121 swear\n",
"122 netspeak\n",
"123 assent\n",
"124 nonflu\n",
"125 filler\n",
"counselor\n",
"dropout\n",
"0-title\n",
"1-geslacht\n",
"1-geslacht0\n",
"1-geslachtt0\n",
"10-dag1\n",
"10-dag1t0\n",
"10-dag2\n",
"10-dag2t0\n",
"10-dag3\n",
"10-dag3t0\n",
"10-dag4\n",
"10-dag4t0\n",
"10-dag5\n",
"10-dag5t0\n",
"10-dag6\n",
"10-dag6t0\n",
"10-dag7\n",
"10-dag7t0\n",
"10-dagb0\n",
"10-typisch\n",
"10-typischt0\n",
"10-week\n",
"10-weekt0\n",
"11-dagritme0\n",
"11-terugbl\n",
"11-terugblt0\n",
"12-aanleid0\n",
"12-medi0\n",
"12-medi0t0\n",
"12-medi1\n",
"12-medi1t0\n",
"12-medi2\n",
"12-medi2t0\n",
"12-medi3\n",
"12-medi3t0\n",
"12-medi4\n",
"12-medi4t0\n",
"12-medi5\n",
"12-medi5t0\n",
"12-medicijn\n",
"12-medicijnt0\n",
"12-medidos0\n",
"12-medidos0t0\n",
"12-medidos1\n",
"12-medidos1t0\n",
"12-medidos2\n",
"12-medidos2t0\n",
"12-medidos3\n",
"12-medidos3t0\n",
"12-medidos4\n",
"12-medidos4t0\n",
"12-medidos5\n",
"12-medidos5t0\n",
"12-mediln0\n",
"12-mediln0t0\n",
"12-mediln1\n",
"12-mediln1t0\n",
"12-mediln2\n",
"12-mediln2t0\n",
"12-mediln3\n",
"12-mediln3t0\n",
"12-mediln4\n",
"12-mediln4t0\n",
"12-mediln5\n",
"12-mediln5t0\n",
"12-medivr0\n",
"12-medivr0t0\n",
"12-medivr1\n",
"12-medivr1t0\n",
"12-medivr2\n",
"12-medivr2t0\n",
"12-medivr3\n",
"12-medivr3t0\n",
"12-medivr4\n",
"12-medivr4t0\n",
"12-medivr5\n",
"12-medivr5t0\n",
"12-verhaal0\n",
"13-hoeinfo0\n",
"13-rodag\n",
"13-rodagt0\n",
"13-roken\n",
"13-rokent0\n",
"13-romaand\n",
"13-romaandt0\n",
"13-rovroe\n",
"13-rovroet0\n",
"14-drugs\n",
"14-drugs0\n",
"14-drugs0t0\n",
"14-drugs1\n",
"14-drugs1t0\n",
"14-drugs2\n",
"14-drugs2t0\n",
"14-drugs3\n",
"14-drugs3t0\n",
"14-drugs4\n",
"14-drugs4t0\n",
"14-drugs5\n",
"14-drugs5t0\n",
"14-drugsln0\n",
"14-drugsln0t0\n",
"14-drugsln1\n",
"14-drugsln1t0\n",
"14-drugsln2\n",
"14-drugsln2t0\n",
"14-drugsln3\n",
"14-drugsln3t0\n",
"14-drugsln4\n",
"14-drugsln4t0\n",
"14-drugsln5\n",
"14-drugsln5t0\n",
"14-drugst0\n",
"14-drugsvk0\n",
"14-drugsvk0t0\n",
"14-drugsvk1\n",
"14-drugsvk2\n",
"14-drugsvk3\n",
"14-drugsvk4\n",
"14-drugsvk5\n",
"15-dag10\n",
"15-dag20\n",
"15-dag30\n",
"15-dag40\n",
"15-dag50\n",
"15-dag60\n",
"15-dag70\n",
"15-gokken\n",
"15-gokkent0\n",
"15-problgok\n",
"15-problgokt0\n",
"15-week0\n",
"16-eetdrang\n",
"16-eetdrangt0\n",
"16-gebleven0\n",
"16-gewgebdg0\n",
"16-gewgebhv0\n",
"16-hoggebdg0\n",
"16-hoggebhv0\n",
"17-depres\n",
"17-diarree\n",
"17-epilep\n",
"17-geheug\n",
"17-ghqa1t0\n",
"17-ghqa2t0\n",
"17-ghqa3t0\n",
"17-ghqa4t0\n",
"17-ghqa5t0\n",
"17-ghqa6t0\n",
"17-ghqa7t0\n",
"17-ghqb1t0\n",
"17-ghqb2t0\n",
"17-ghqb3t0\n",
"17-ghqb4t0\n",
"17-ghqb5t0\n",
"17-ghqb6t0\n",
"17-hartklp\n",
"17-hoofdp\n",
"17-hyper\n",
"17-maag\n",
"17-moe\n",
"17-probleem0\n",
"17-seksuel\n",
"17-transpi\n",
"17-trillen\n",
"18-maaltd\n",
"18-verlang10\n",
"18-verlang20\n",
"18-verlang30\n",
"18-verlang40\n",
"18-verlang50\n",
"18-verlangtot0\n",
"19-behdrink\n",
"19-canna0\n",
"19-cannajr0\n",
"19-coca0\n",
"19-cocajr0\n",
"19-deprest0\n",
"19-diarreet0\n",
"19-drugs0\n",
"19-drugsjr0\n",
"19-epilept0\n",
"19-geheugt0\n",
"19-ghb0\n",
"19-ghbjr0\n",
"19-goTo3\n",
"19-gok0\n",
"19-gokjr0\n",
"19-hartklpt0\n",
"19-hoofdpt0\n",
"19-hypert0\n",
"19-map10t0\n",
"19-map1t0\n",
"19-map2t0\n",
"19-map3t0\n",
"19-map4t0\n",
"19-map5t0\n",
"19-map6t0\n",
"19-map7t0\n",
"19-map8t0\n",
"19-map9t0\n",
"19-maptotaalt0\n",
"19-opiat0\n",
"19-opiatjr0\n",
"19-seksuelt0\n",
"19-slaap0\n",
"19-slaapjr0\n",
"19-speed0\n",
"19-speedjr0\n",
"19-tabak0\n",
"19-tabakjr0\n",
"19-transpit0\n",
"19-xtc0\n",
"19-xtcjr0\n",
"2-leeftijd\n",
"2-leeftijd0\n",
"2-leeftijdt0\n",
"2-medi0\n",
"2-medi00\n",
"2-medi10\n",
"2-medi20\n",
"2-medi30\n",
"2-medi40\n",
"2-medi50\n",
"2-medicijnr0\n",
"2-medidos00\n",
"2-medidos10\n",
"2-medidos20\n",
"2-medidos30\n",
"2-medidos40\n",
"2-medidos50\n",
"2-mediln00\n",
"2-mediln10\n",
"2-mediln20\n",
"2-mediln30\n",
"2-mediln40\n",
"2-mediln50\n",
"2-medivr00\n",
"2-medivr10\n",
"2-medivr20\n",
"2-medivr30\n",
"2-medivr40\n",
"2-medivr50\n",
"20-canna300\n",
"20-cannadg0\n",
"20-cannahv0\n",
"20-coca300\n",
"20-cocadg0\n",
"20-cocahv0\n",
"20-drugs300\n",
"20-drugsdg0\n",
"20-drugshv0\n",
"20-eq1t0\n",
"20-eq2t0\n",
"20-eq3t0\n",
"20-eq4t0\n",
"20-eq5t0\n",
"20-eq6t0\n",
"20-ghb300\n",
"20-ghbdg0\n",
"20-ghbhv0\n",
"20-gok300\n",
"20-gokdg0\n",
"20-gokhv0\n",
"20-insult\n",
"20-opiat300\n",
"20-opiatdg0\n",
"20-opiathv0\n",
"20-slaap300\n",
"20-slaapdg0\n",
"20-slaaphv0\n",
"20-speed300\n",
"20-speeddg0\n",
"20-speedhv0\n",
"20-tabak300\n",
"20-tabakdg0\n",
"20-tabakhv0\n",
"20-xtc300\n",
"20-xtcdg0\n",
"20-xtchv0\n",
"21-canna0h\n",
"21-coca0h\n",
"21-delirium\n",
"21-drugs0h\n",
"21-eetdrang0\n",
"21-eqtht0\n",
"21-ghb0h\n",
"21-gok0h\n",
"21-opiat0h\n",
"21-slaap0h\n",
"21-speed0h\n",
"21-tabak0h\n",
"21-xtc0h\n",
"22-dass10t0\n",
"22-dass11t0\n",
"22-dass12t0\n",
"22-dass13t0\n",
"22-dass1t0\n",
"22-dass2t0\n",
"22-dass3t0\n",
"22-dass4t0\n",
"22-dass5t0\n",
"22-dass6t0\n",
"22-dass7t0\n",
"22-dass8t0\n",
"22-dass9t0\n",
"22-medi0\n",
"22-medi00\n",
"22-medi10\n",
"22-medi20\n",
"22-medi30\n",
"22-medi40\n",
"22-medi50\n",
"22-medicijnr0\n",
"22-medidos00\n",
"22-medidos10\n",
"22-medidos20\n",
"22-medidos30\n",
"22-medidos40\n",
"22-medidos50\n",
"22-mediln00\n",
"22-mediln10\n",
"22-mediln20\n",
"22-mediln30\n",
"22-mediln40\n",
"22-mediln50\n",
"22-medivr00\n",
"22-medivr10\n",
"22-medivr20\n",
"22-medivr30\n",
"22-medivr40\n",
"22-medivr50\n",
"22-ontwen\n",
"23-diarree0\n",
"23-doofgev0\n",
"23-eetlust0\n",
"23-epilep0\n",
"23-geheug0\n",
"23-goTo2\n",
"23-hartklp0\n",
"23-hoofdp0\n",
"23-hyper0\n",
"23-klachttot0\n",
"23-kortad0\n",
"23-ltgeslacht1\n",
"23-maag0\n",
"23-maphss0\n",
"23-missel0\n",
"23-moe0\n",
"23-pijnbor0\n",
"23-pijngewr0\n",
"23-psych\n",
"23-psytijd0\n",
"23-psytijd1\n",
"23-psytijd2\n",
"23-psytijd3\n",
"23-psytijd4\n",
"23-psytijd5\n",
"23-psyvoor0\n",
"23-psyvoor1\n",
"23-psyvoor2\n",
"23-psyvoor3\n",
"23-psyvoor4\n",
"23-psyvoor5\n",
"23-psywaar0\n",
"23-psywaar1\n",
"23-psywaar2\n",
"23-psywaar3\n",
"23-psywaar4\n",
"23-psywaar5\n",
"23-seksuel0\n",
"23-spierp0\n",
"23-transpi0\n",
"23-trillen0\n",
"24-maaltdt0\n",
"24-mindpres\n",
"24-telaat\n",
"24-ziekmeld\n",
"24-zwanger0\n",
"25-behdrinkt0\n",
"25-maaltd0\n",
"25-rcq1\n",
"25-rcq10\n",
"25-rcq11\n",
"25-rcq12\n",
"25-rcq2\n",
"25-rcq3\n",
"25-rcq4\n",
"25-rcq5\n",
"25-rcq6\n",
"25-rcq7\n",
"25-rcq8\n",
"25-rcq9\n",
"25-rcqact\n",
"25-rcqcon\n",
"25-rcqpre\n",
"26-behamb0\n",
"26-behkli0\n",
"26-behtoel0\n",
"26-behversl0\n",
"26-bereik\n",
"26-insultt0\n",
"27-deliriumt0\n",
"27-insult0\n",
"27-toelich\n",
"28-delirium0\n",
"28-ontwent0\n",
"29-ontwen0\n",
"29-psycht0\n",
"29-psytijd0t0\n",
"29-psytijd1t0\n",
"29-psytijd2t0\n",
"29-psytijd3t0\n",
"29-psytijd4t0\n",
"29-psytijd5t0\n",
"29-psyvoor0t0\n",
"29-psyvoor1t0\n",
"29-psyvoor2t0\n",
"29-psyvoor3t0\n",
"29-psyvoor4t0\n",
"29-psyvoor5t0\n",
"29-psywaar0t0\n",
"29-psywaar1t0\n",
"29-psywaar2t0\n",
"29-psywaar3t0\n",
"29-psywaar4t0\n",
"29-psywaar5t0\n",
"3-national0\n",
"3-woonsit\n",
"3-woonsitt0\n",
"30-mindprest0\n",
"30-psych0\n",
"30-psychr0\n",
"30-psytijd00\n",
"30-psytijd10\n",
"30-psytijd20\n",
"30-psytijd30\n",
"30-psytijd40\n",
"30-psytijd50\n",
"30-psyvoor00\n",
"30-psyvoor10\n",
"30-psyvoor20\n",
"30-psyvoor30\n",
"30-psyvoor40\n",
"30-psyvoor50\n",
"30-psywaar00\n",
"30-psywaar10\n",
"30-psywaar20\n",
"30-psywaar30\n",
"30-psywaar40\n",
"30-psywaar50\n",
"30-telaatt0\n",
"30-ziekmeldt0\n",
"31-angst0\n",
"31-dass10\n",
"31-dass100\n",
"31-dass110\n",
"31-dass120\n",
"31-dass130\n",
"31-dass140\n",
"31-dass150\n",
"31-dass160\n",
"31-dass170\n",
"31-dass180\n",
"31-dass190\n",
"31-dass20\n",
"31-dass200\n",
"31-dass210\n",
"31-dass30\n",
"31-dass40\n",
"31-dass50\n",
"31-dass60\n",
"31-dass70\n",
"31-dass80\n",
"31-dass90\n",
"31-dasstot0\n",
"31-depres0\n",
"31-rcq10t0\n",
"31-rcq11t0\n",
"31-rcq12t0\n",
"31-rcq1t0\n",
"31-rcq2t0\n",
"31-rcq3t0\n",
"31-rcq4t0\n",
"31-rcq5t0\n",
"31-rcq6t0\n",
"31-rcq7t0\n",
"31-rcq8t0\n",
"31-rcq9t0\n",
"31-rcqactt0\n",
"31-rcqcont0\n",
"31-rcqpret0\n",
"31-stress0\n",
"32-bereikt0\n",
"32-suicide10\n",
"32-suicide20\n",
"33-halluci0\n",
"33-toelicht0\n",
"34-mdoel\n",
"34-wanen0\n",
"35-justitie0\n",
"35-mhfddoelt0\n",
"36-eq10\n",
"36-eq20\n",
"36-eq30\n",
"36-eq40\n",
"36-eq50\n",
"36-eq60\n",
"36-maanvult0\n",
"36-mdenkent0\n",
"36-mdoodt0\n",
"36-mfamt0\n",
"36-mgezndht0\n",
"36-mlastt0\n",
"36-mminlvnt0\n",
"36-mpolitiet0\n",
"36-mprblmt0\n",
"36-mvinwrkt0\n",
"37-dh1t0\n",
"37-dh2t0\n",
"37-dh3t0\n",
"37-dh4t0\n",
"37-dh5t0\n",
"37-eqth0\n",
"37-re1t0\n",
"37-re2t0\n",
"37-tr1t0\n",
"37-tr2t0\n",
"37-tr3t0\n",
"37-tr4t0\n",
"37-tr5t0\n",
"37-tr6t0\n",
"37-tr7t0\n",
"37-tr8t0\n",
"38-basaal0\n",
"38-beperking0\n",
"38-mateicn10\n",
"38-mateicn100\n",
"38-mateicn110\n",
"38-mateicn120\n",
"38-mateicn130\n",
"38-mateicn140\n",
"38-mateicn150\n",
"38-mateicn160\n",
"38-mateicn170\n",
"38-mateicn180\n",
"38-mateicn190\n",
"38-mateicn20\n",
"38-mateicn30\n",
"38-mateicn40\n",
"38-mateicn50\n",
"38-mateicn60\n",
"38-mateicn70\n",
"38-mateicn80\n",
"38-mateicn90\n",
"38-mopmerkt0\n",
"38-relation0\n",
"39-doel\n",
"4-cultherk0\n",
"4-opleidng\n",
"4-opleidngt0\n",
"40-hfddoel0\n",
"41-benniet0\n",
"41-mailadres0\n",
"41-smsnr0\n",
"41-telnr0\n",
"42-vragen10\n",
"42-vragen20\n",
"42-vragen30\n",
"42-vragen40\n",
"43-toelich0\n",
"5-burstaat0\n",
"5-dagb\n",
"5-dagbt0\n",
"6-dagritme\n",
"6-dagritmet0\n",
"6-kind0\n",
"6-kindn0\n",
"6-kindthuis0\n",
"7-aanleid\n",
"7-aanleidt0\n",
"7-verhaal\n",
"7-verhaalt0\n",
"7-woonsit0\n",
"8-hoeinfo\n",
"8-hoeinfot0\n",
"8-opleidng0\n",
"9-inkomst0\n"
]
}
],
"source": [
"for column in data.columns:\n",
" print(column)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "07ac00bd-6537-490e-b874-6a9d3416688c",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "python37",
"language": "python",
"name": "python37"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}