e-mental-health/data-processing

View on GitHub
tactus-analysis.ipynb

Summary

Maintainability
Test Coverage
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Tactus analysis\n",
    "\n",
    "Analysis of Tactus AdB data for data paper (2020)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import re\n",
    "import sys\n",
    "import xml.etree.ElementTree as ET\n",
    "sys.path.insert(1, '/home/erikt/project/e-mental-health/data-processing')\n",
    "import tactus2table"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "DATADIR = \"/home/erikt/projects/e-mental-health/usb/tmp/20190917/\"\n",
    "INTAKE = \"Intake\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. How many clients have a therapy start date in their profile?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "FILEPATTERN = r\"^A.*z$\"\n",
    "QUERY1 = \"./Treatment/StartDate\"\n",
    "STARTDATE = \"STARTDATE\"\n",
    "NONE = \"NONE\"\n",
    "\n",
    "def query1(root,query):\n",
    "    try: text = root.findall(query)[0].text\n",
    "    except: text = None\n",
    "    if text is None: text = NONE\n",
    "    elif text.strip() == \"\": text = NONE \n",
    "    else: text = STARTDATE\n",
    "    return(text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'STARTDATE': 923, 'NONE': 1060}"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "files = sorted(os.listdir(DATADIR))\n",
    "\n",
    "data = {}\n",
    "for inFileName in files:\n",
    "    if re.search(FILEPATTERN,inFileName):\n",
    "        root = tactus2table.readRootFromFile(DATADIR+inFileName)\n",
    "        text1 = query1(root,QUERY1)\n",
    "        if text1 in data: data[text1] += 1\n",
    "        else: data[text1] = 1\n",
    "data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. How many clients sent an email to the counselor?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "FILEPATTERN = r\"^A.*z$\"\n",
    "QUERY2 = \"./Messages/Message/Sender\"\n",
    "CLIENT = \"CLIENT\"\n",
    "CLIENTMAIL = \"CLIENTMAIL\"\n",
    "NOCLIENTMAIL = \"NOCLIENTMAIL\"\n",
    "\n",
    "def query2(root,query):\n",
    "    clientMail = NOCLIENTMAIL\n",
    "    for node in root.findall(query): \n",
    "        text = node.text.strip()\n",
    "        if text == CLIENT:\n",
    "            clientMail = CLIENTMAIL\n",
    "            break\n",
    "    return(clientMail)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'CLIENTMAIL': 1125, 'NOCLIENTMAIL': 858}"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "files = sorted(os.listdir(DATADIR))\n",
    "\n",
    "data = {CLIENTMAIL:0,NOCLIENTMAIL:0}\n",
    "for inFileName in files:\n",
    "    if re.search(FILEPATTERN,inFileName):\n",
    "        root = tactus2table.readRootFromFile(DATADIR+inFileName)\n",
    "        text2 = query2(root,QUERY2)\n",
    "        data[text2] += 1\n",
    "data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. How many of the intake forms have the title Vragenlijst?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "FILEPATTERN = r\"^A.*z$\"\n",
    "QUERY3 = \"./Intake/Questionnaire/Title\"\n",
    "\n",
    "def query3(root,query):\n",
    "    try: text = root.findall(query)[0].text.strip()\n",
    "    except: text = NONE\n",
    "    return(text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Intake': 1937, 'Vragenlijst': 46}"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "files = sorted(os.listdir(DATADIR))\n",
    "\n",
    "data = {}\n",
    "for inFileName in files:\n",
    "    if re.search(FILEPATTERN,inFileName):\n",
    "        root = tactus2table.readRootFromFile(DATADIR+inFileName)\n",
    "        text3 = query3(root,QUERY3)\n",
    "        if text3 in data: data[text3] += 1\n",
    "        else: data[text3] = 1\n",
    "data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Combination of 1, 2 and 3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'STARTDATE CLIENTMAIL Intake': 770,\n",
       " 'NONE NOCLIENTMAIL Intake': 716,\n",
       " 'STARTDATE NOCLIENTMAIL Intake': 131,\n",
       " 'NONE CLIENTMAIL Intake': 320,\n",
       " 'NONE CLIENTMAIL Vragenlijst': 14,\n",
       " 'NONE NOCLIENTMAIL Vragenlijst': 10,\n",
       " 'STARTDATE CLIENTMAIL Vragenlijst': 21,\n",
       " 'STARTDATE NOCLIENTMAIL Vragenlijst': 1}"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = {}\n",
    "for inFileName in files:\n",
    "    if re.search(FILEPATTERN,inFileName):\n",
    "        root = tactus2table.readRootFromFile(DATADIR+inFileName)\n",
    "        text1 = query1(root,QUERY1)\n",
    "        text2 = query2(root,QUERY2)\n",
    "        text3 = query3(root,QUERY3)\n",
    "        text4 = \" \".join([text1,text2,text3])\n",
    "        if text4 in data: data[text4] += 1\n",
    "        else: data[text4] = 1\n",
    "data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5. Count Intake Question sets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "QUERY5 = \"./Intake/Questionnaire/Content/question\"\n",
    "FILEPATTERN = r\"^A.*z$\"\n",
    "\n",
    "def getAnswerIds(root):\n",
    "    answerIds = []\n",
    "    lastQuestionNbr = \"0\"\n",
    "    for questionNode in root.findall(QUERY5):\n",
    "        try:\n",
    "            try: questionNbr = questionNode.findall(\"./questionNumber\")[0].text.strip()\n",
    "            except: questionNbr = lastQuestionNbr\n",
    "            for answerNode in questionNode.findall(\"./answer\"):\n",
    "                answerIds.append(questionNbr+\"-\"+answerNode.attrib[\"ID\"])\n",
    "            lastQuestionNbr = questionNbr\n",
    "        except: pass\n",
    "    return(answerIds)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "clients: 1014; answers: 259; answer fields: ['1-geslacht0', '2-leeftijd0', '3-national0']\n",
      "clients: 881; answers: 140; answer fields: ['1-geslacht', '2-leeftijd', '3-woonsit']\n",
      "clients: 46; answers: 27; answer fields: ['1-geslacht0', '2-medi0', '2-medicijnr0']\n",
      "clients: 42; answers: 236; answer fields: ['1-geslachtt0', '2-leeftijdt0', '3-woonsitt0']\n"
     ]
    }
   ],
   "source": [
    "files = sorted(os.listdir(DATADIR))\n",
    "\n",
    "answerIdStrings = {}\n",
    "for inFileName in files:\n",
    "    if re.search(FILEPATTERN,inFileName):\n",
    "        root = tactus2table.readRootFromFile(DATADIR+inFileName)\n",
    "        answerIds = getAnswerIds(root)\n",
    "        answerIdString = \" \".join(answerIds)\n",
    "        if not answerIdString in answerIdStrings: answerIdStrings[answerIdString] = 1\n",
    "        else: answerIdStrings[answerIdString] += 1\n",
    "\n",
    "answerIdStrings = {k:answerIdStrings[k] for k in sorted(answerIdStrings.keys(),key=lambda key:answerIdStrings[key],reverse=True)}\n",
    "for key in answerIdStrings:\n",
    "    print(\"clients: {0}; answers: {1}; answer fields: {2}\".format(answerIdStrings[key],len(key.split()),key.split()[0:3]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6. Combination of 4 and 5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'STARTDATE CLIENTMAIL Intake 2-leeftijd': 683,\n",
       " 'NONE NOCLIENTMAIL Intake 2-leeftijd0': 667,\n",
       " 'NONE NOCLIENTMAIL Intake 2-leeftijd': 48,\n",
       " 'STARTDATE NOCLIENTMAIL Intake 2-leeftijd0': 6,\n",
       " 'NONE CLIENTMAIL Intake 2-leeftijd0': 292,\n",
       " 'STARTDATE NOCLIENTMAIL Intake 2-leeftijd': 122,\n",
       " 'STARTDATE CLIENTMAIL Intake 2-leeftijd0': 49,\n",
       " 'NONE CLIENTMAIL Intake 2-leeftijd': 28,\n",
       " 'NONE CLIENTMAIL Vragenlijst 2-medi0': 14,\n",
       " 'STARTDATE CLIENTMAIL Intake 2-leeftijdt0': 38,\n",
       " 'NONE NOCLIENTMAIL Vragenlijst 2-medi0': 10,\n",
       " 'STARTDATE CLIENTMAIL Vragenlijst 2-medi0': 21,\n",
       " 'NONE NOCLIENTMAIL Intake 2-leeftijdt0': 1,\n",
       " 'STARTDATE NOCLIENTMAIL Intake 2-leeftijdt0': 3,\n",
       " 'STARTDATE NOCLIENTMAIL Vragenlijst 2-medi0': 1}"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "QUESTION2 = 1\n",
    "\n",
    "data = {}\n",
    "for inFileName in files:\n",
    "    if re.search(FILEPATTERN,inFileName):\n",
    "        root = tactus2table.readRootFromFile(DATADIR+inFileName)\n",
    "        answerId = getAnswerIds(root)[QUESTION2]\n",
    "        text1 = query1(root,QUERY1)\n",
    "        text2 = query2(root,QUERY2)\n",
    "        text3 = query3(root,QUERY3)\n",
    "        text4 = \" \".join([text1,text2,text3,answerId])\n",
    "        if text4 in data: data[text4] += 1\n",
    "        else: data[text4] = 1\n",
    "data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7. Count all the questionnaires"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "QUERY7 = \".//Questionnaire/Title\"\n",
    "FILEPATTERN = r\"^A.*z$\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Intake': 1937,\n",
       " 'Voordelen, nadelen': 774,\n",
       " 'Drink Wijzer': 594,\n",
       " 'Lijst tussenmeting': 519,\n",
       " 'Doel stellen': 588,\n",
       " 'Lijst motivatie': 447,\n",
       " 'Anders denken': 453,\n",
       " 'Anders doen': 421,\n",
       " 'Beslissingen': 399,\n",
       " 'Actieplan': 360,\n",
       " 'Lijst nameting': 335,\n",
       " 'Lijst 3 maanden': 251,\n",
       " 'Lijst half jaar': 189,\n",
       " 'Vragenlijst': 46,\n",
       " 'Tussenmeting': 31,\n",
       " 'Drop out': 7,\n",
       " 'Lijst 9 maanden': 7}"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "files = sorted(os.listdir(DATADIR))\n",
    "\n",
    "titles = {}\n",
    "for inFileName in files:\n",
    "    if re.search(FILEPATTERN,inFileName):\n",
    "        root = tactus2table.readRootFromFile(DATADIR+inFileName)\n",
    "        for title in root.findall(QUERY7):\n",
    "            titleText = re.sub(r\" RCT-.\",\"\",title.text.strip())\n",
    "            if titleText in titles: titles[titleText] += 1\n",
    "            else: titles[titleText] = 1\n",
    "titles"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 8. Combination of 7 and 4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "AdB0718.xml.gz\n",
      "AdB1102.xml.gz\n",
      "AdB1343.xml.gz\n",
      "AdB1612.xml.gz\n",
      "AdB1771.xml.gz\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'2-leeftijd Actieplan': 296,\n",
       " '2-leeftijd Anders denken': 381,\n",
       " '2-leeftijd Anders doen': 356,\n",
       " '2-leeftijd Beslissingen': 336,\n",
       " '2-leeftijd Doel stellen': 426,\n",
       " '2-leeftijd Drink Wijzer': 509,\n",
       " '2-leeftijd Intake': 881,\n",
       " '2-leeftijd Lijst 3 maanden': 208,\n",
       " '2-leeftijd Lijst half jaar': 153,\n",
       " '2-leeftijd Lijst motivatie': 407,\n",
       " '2-leeftijd Lijst nameting': 272,\n",
       " '2-leeftijd Lijst tussenmeting': 468,\n",
       " '2-leeftijd Voordelen, nadelen': 670,\n",
       " '2-leeftijd0 Actieplan': 29,\n",
       " '2-leeftijd0 Anders denken': 31,\n",
       " '2-leeftijd0 Anders doen': 28,\n",
       " '2-leeftijd0 Beslissingen': 27,\n",
       " '2-leeftijd0 Doel stellen': 115,\n",
       " '2-leeftijd0 Drink Wijzer': 38,\n",
       " '2-leeftijd0 Intake': 1014,\n",
       " '2-leeftijd0 Lijst 3 maanden': 14,\n",
       " '2-leeftijd0 Lijst half jaar': 12,\n",
       " '2-leeftijd0 Lijst motivatie': 30,\n",
       " '2-leeftijd0 Lijst nameting': 28,\n",
       " '2-leeftijd0 Lijst tussenmeting': 39,\n",
       " '2-leeftijd0 Voordelen, nadelen': 46,\n",
       " '2-leeftijdt0 Actieplan': 26,\n",
       " '2-leeftijdt0 Anders denken': 29,\n",
       " '2-leeftijdt0 Anders doen': 28,\n",
       " '2-leeftijdt0 Beslissingen': 27,\n",
       " '2-leeftijdt0 Doel stellen': 31,\n",
       " '2-leeftijdt0 Drink Wijzer': 33,\n",
       " '2-leeftijdt0 Drop out': 7,\n",
       " '2-leeftijdt0 Intake': 42,\n",
       " '2-leeftijdt0 Lijst 3 maanden': 21,\n",
       " '2-leeftijdt0 Lijst 9 maanden': 7,\n",
       " '2-leeftijdt0 Lijst half jaar': 19,\n",
       " '2-leeftijdt0 Lijst nameting': 23,\n",
       " '2-leeftijdt0 Tussenmeting': 31,\n",
       " '2-leeftijdt0 Voordelen, nadelen': 38,\n",
       " '2-medi0 Actieplan': 9,\n",
       " '2-medi0 Anders denken': 12,\n",
       " '2-medi0 Anders doen': 9,\n",
       " '2-medi0 Beslissingen': 9,\n",
       " '2-medi0 Doel stellen': 16,\n",
       " '2-medi0 Drink Wijzer': 14,\n",
       " '2-medi0 Lijst 3 maanden': 8,\n",
       " '2-medi0 Lijst half jaar': 5,\n",
       " '2-medi0 Lijst motivatie': 10,\n",
       " '2-medi0 Lijst nameting': 12,\n",
       " '2-medi0 Lijst tussenmeting': 12,\n",
       " '2-medi0 Voordelen, nadelen': 20,\n",
       " '2-medi0 Vragenlijst': 46}"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "TARGETKEY = \"2-medi0 Lijst half jaar\"\n",
    "\n",
    "files = sorted(os.listdir(DATADIR))\n",
    "\n",
    "titles = {}\n",
    "for inFileName in files:\n",
    "    if re.search(FILEPATTERN,inFileName):\n",
    "        root = tactus2table.readRootFromFile(DATADIR+inFileName)\n",
    "        answerId = getAnswerIds(root)[QUESTION2]\n",
    "        for title in root.findall(QUERY7):\n",
    "            titleText = answerId+\" \"+re.sub(r\" RCT-.\",\"\",title.text.strip())\n",
    "            if titleText in titles: titles[titleText] += 1\n",
    "            else: titles[titleText] = 1\n",
    "            if titleText == TARGETKEY: print(inFileName)\n",
    "{key:titles[key] for key in sorted(titles.keys())}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 9. Get the therapy steps"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'': 903,\n",
       " 'Afsluiting': 1,\n",
       " 'Afsluiting#Na 3 maanden#Na half jaar': 1,\n",
       " 'Doel stellen': 133,\n",
       " 'Doel stellen#Actieplan#Afsluiting#Na 3 maanden#Na half jaar': 8,\n",
       " 'Doel stellen#Afsluiting': 7,\n",
       " 'Doel stellen#Afsluiting#Na 3 maanden#Na half jaar': 4,\n",
       " 'Doel stellen#Gewoontes doorbreken#Actieplan#Afsluiting#Na 3 maanden#Na half jaar': 1,\n",
       " 'Doel stellen#Gewoontes doorbreken#Afsluiting': 1,\n",
       " 'Gewoontes doorbreken': 1,\n",
       " 'Intake': 1,\n",
       " 'Voordelen, nadelen': 163,\n",
       " 'Voordelen, nadelen#Afsluiting': 10,\n",
       " 'Voordelen, nadelen#Alcoholschrift bijhouden': 114,\n",
       " 'Voordelen, nadelen#Alcoholschrift bijhouden#Afsluiting': 7,\n",
       " 'Voordelen, nadelen#Alcoholschrift bijhouden#Anders doen': 1,\n",
       " 'Voordelen, nadelen#Alcoholschrift bijhouden#Situaties analyseren': 48,\n",
       " 'Voordelen, nadelen#Alcoholschrift bijhouden#Situaties analyseren#Afsluiting': 5,\n",
       " 'Voordelen, nadelen#Alcoholschrift bijhouden#Situaties analyseren#Meten en weten': 35,\n",
       " 'Voordelen, nadelen#Alcoholschrift bijhouden#Situaties analyseren#Meten en weten#Afsluiting': 3,\n",
       " 'Voordelen, nadelen#Alcoholschrift bijhouden#Situaties analyseren#Meten en weten#Afsluiting#Na 3 maanden': 1,\n",
       " 'Voordelen, nadelen#Alcoholschrift bijhouden#Situaties analyseren#Meten en weten#Doel stellen': 32,\n",
       " 'Voordelen, nadelen#Alcoholschrift bijhouden#Situaties analyseren#Meten en weten#Doel stellen#Afsluiting': 7,\n",
       " 'Voordelen, nadelen#Alcoholschrift bijhouden#Situaties analyseren#Meten en weten#Doel stellen#Anders denken#Anders doen#Beslissingen#Actieplan#Afsluiting#Na 3 maanden#Na half jaar': 1,\n",
       " 'Voordelen, nadelen#Alcoholschrift bijhouden#Situaties analyseren#Meten en weten#Doel stellen#Anders denken#Beslissingen#Actieplan#Afsluiting#Na 3 maanden#Na half jaar': 1,\n",
       " 'Voordelen, nadelen#Alcoholschrift bijhouden#Situaties analyseren#Meten en weten#Doel stellen#Gewoontes doorbreken': 23,\n",
       " 'Voordelen, nadelen#Alcoholschrift bijhouden#Situaties analyseren#Meten en weten#Doel stellen#Gewoontes doorbreken#Afsluiting': 1,\n",
       " 'Voordelen, nadelen#Alcoholschrift bijhouden#Situaties analyseren#Meten en weten#Doel stellen#Gewoontes doorbreken#Anders denken': 26,\n",
       " 'Voordelen, nadelen#Alcoholschrift bijhouden#Situaties analyseren#Meten en weten#Doel stellen#Gewoontes doorbreken#Anders denken#Afsluiting': 4,\n",
       " 'Voordelen, nadelen#Alcoholschrift bijhouden#Situaties analyseren#Meten en weten#Doel stellen#Gewoontes doorbreken#Anders denken#Anders doen': 23,\n",
       " 'Voordelen, nadelen#Alcoholschrift bijhouden#Situaties analyseren#Meten en weten#Doel stellen#Gewoontes doorbreken#Anders denken#Anders doen#Afsluiting': 1,\n",
       " 'Voordelen, nadelen#Alcoholschrift bijhouden#Situaties analyseren#Meten en weten#Doel stellen#Gewoontes doorbreken#Anders denken#Anders doen#Beslissingen': 29,\n",
       " 'Voordelen, nadelen#Alcoholschrift bijhouden#Situaties analyseren#Meten en weten#Doel stellen#Gewoontes doorbreken#Anders denken#Anders doen#Beslissingen#Actieplan': 32,\n",
       " 'Voordelen, nadelen#Alcoholschrift bijhouden#Situaties analyseren#Meten en weten#Doel stellen#Gewoontes doorbreken#Anders denken#Anders doen#Beslissingen#Actieplan#Afsluiting': 39,\n",
       " 'Voordelen, nadelen#Alcoholschrift bijhouden#Situaties analyseren#Meten en weten#Doel stellen#Gewoontes doorbreken#Anders denken#Anders doen#Beslissingen#Actieplan#Afsluiting#Na 3 maanden': 66,\n",
       " 'Voordelen, nadelen#Alcoholschrift bijhouden#Situaties analyseren#Meten en weten#Doel stellen#Gewoontes doorbreken#Anders denken#Anders doen#Beslissingen#Actieplan#Afsluiting#Na 3 maanden#Na half jaar': 234,\n",
       " 'Voordelen, nadelen#Alcoholschrift bijhouden#Situaties analyseren#Meten en weten#Doel stellen#Gewoontes doorbreken#Anders denken#Anders doen#Beslissingen#Actieplan#Afsluiting#Na 3 maanden#Na half jaar#Na 9 maanden': 9,\n",
       " 'Voordelen, nadelen#Alcoholschrift bijhouden#Situaties analyseren#Meten en weten#Doel stellen#Gewoontes doorbreken#Anders denken#Anders doen#Beslissingen#Actieplan#Afsluiting#Na half jaar': 5,\n",
       " 'Voordelen, nadelen#Alcoholschrift bijhouden#Situaties analyseren#Meten en weten#Doel stellen#Gewoontes doorbreken#Anders denken#Anders doen#Beslissingen#Afsluiting': 2}"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "FILEPATTERN = r\"^AdB.*z$\"\n",
    "QUERY9 = \"./Treatment/TreatmentSteps/TreatmentStep/Title\"\n",
    "\n",
    "files = sorted(os.listdir(DATADIR))\n",
    "\n",
    "titleDict = {}\n",
    "for inFileName in files:\n",
    "    if re.search(FILEPATTERN,inFileName):\n",
    "        root = tactus2table.readRootFromFile(DATADIR+inFileName)\n",
    "        titles = []\n",
    "        for title in root.findall(QUERY9): titles.append(title.text.strip())\n",
    "        titleString = \"#\".join(titles)\n",
    "        if titleString in titleDict: titleDict[titleString] += 1\n",
    "        else: titleDict[titleString] = 1\n",
    "{titleString:titleDict[titleString] for titleString in sorted(titleDict.keys())}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 10. Get therapy completion information"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(774, 354)"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "QUERY10 = \"./Treatment/TreatmentSteps/TreatmentStep\"\n",
    "TITLE = \"./Title\"\n",
    "ADVANTAGES = \"Voordelen, nadelen\"\n",
    "ACTIONPLAN = \"Actieplan\"\n",
    "SUBMISSIONDATE = \"./SubmissionDate\"\n",
    "APPROVALDATE = \"./ApprovalDate\"\n",
    "\n",
    "def checkTreatmentStep(root,targetTitle):\n",
    "    treatmentStepFinished = False\n",
    "    for treatmentStep in root.findall(QUERY10): \n",
    "        for title in treatmentStep.findall(TITLE):\n",
    "            if title.text.strip() == targetTitle:\n",
    "                treatmentStepFinished = True\n",
    "                for date in treatmentStep.findall(SUBMISSIONDATE): \n",
    "                    if date.text == None: treatmentStepFinished = False\n",
    "                for date in treatmentStep.findall(APPROVALDATE): \n",
    "                    if date.text == None: treatmentStepFinished = False\n",
    "    return(treatmentStepFinished)\n",
    "\n",
    "nbrOfAdvantagesStarted = 0\n",
    "nbrOfActionPlanFinished = 0\n",
    "for inFileName in files:\n",
    "    if re.search(FILEPATTERN,inFileName):\n",
    "        root = tactus2table.readRootFromFile(DATADIR+inFileName)\n",
    "        titles = []\n",
    "        if checkTreatmentStep(root,ADVANTAGES): \n",
    "            nbrOfAdvantagesStarted += 1\n",
    "            if checkTreatmentStep(root,ACTIONPLAN): nbrOfActionPlanFinished += 1\n",
    "nbrOfAdvantagesStarted,nbrOfActionPlanFinished"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Get the text of questions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "QUERY6 = \"./Intake/Questionnaire/Content/question\"\n",
    "\n",
    "for inFileName in [\"AdB0174.xml.gz\"]:\n",
    "    if re.search(FILEPATTERN,inFileName):\n",
    "        root = tactus2table.readRootFromFile(DATADIR+inFileName)\n",
    "        lastQuestionNbr = \"0\"\n",
    "        for questionNode in root.findall(QUERY5):\n",
    "            try:\n",
    "                try: \n",
    "                    questionNbr = questionNode.findall(\"./questionNumber\")[0].text.strip()\n",
    "                    questionTitle = re.sub(r\"\\s+\",\" \",questionNode.findall(\"./title\")[0].text.strip())\n",
    "                except: \n",
    "                    questionNbr = lastQuestionNbr\n",
    "                    questionTitle = \"\"\n",
    "                answerNodes = questionNode.findall(\"./answer\")\n",
    "                for a in range(0,len(answerNodes)):\n",
    "                    try: answerTitle = re.sub(r\"\\s+\",\" \",answerNodes[a].findall(\"./title\")[0].text.strip())\n",
    "                    except: answerTitle = \"\"\n",
    "                    if answerTitle != \"\": \n",
    "                        if a == 0 and questionNbr != lastQuestionNbr: print(questionNbr,questionTitle)\n",
    "                        print(questionNbr+\"-\"+answerNodes[a].attrib[\"ID\"],end=\" \")\n",
    "                        print(answerTitle)\n",
    "                    else: \n",
    "                        print(questionNbr+\"-\"+answerNodes[a].attrib[\"ID\"],end=\" \")\n",
    "                        print(questionTitle)\n",
    "                lastQuestionNbr = questionNbr\n",
    "            except: pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}