e-mental-health/data-processing

View on GitHub
xml2csv.ipynb

Summary

Maintainability
Test Coverage
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Convert Tactus xml files to csv files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [],
   "source": [
    "import gzip\n",
    "import matplotlib\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import os\n",
    "import pandas as pd\n",
    "import re\n",
    "import sys\n",
    "import xml.etree.ElementTree as ET\n",
    "import warnings\n",
    "\n",
    "from nltk import word_tokenize\n",
    "from IPython.display import clear_output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "CLIENT = \"client\"\n",
    "COUNSELOR = \"counselor\"\n",
    "DATADIR = \"../usb/tmp/20190917/\"\n",
    "FILEPATTERN = \"AdB.*xml.gz\"\n",
    "OUTDIR = \"../usb/releases/20201018/\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "TACTUSMAIL = \"alcoholdebaas@tactus.nl\"\n",
    "EMAIL = \"Email\"\n",
    "\n",
    "def dictToString(data):\n",
    "    string = \"\"\n",
    "    for key in sorted(data.keys()):\n",
    "        string += key+\":\"+data[key]+\" \"\n",
    "    return(string)\n",
    "\n",
    "def inExcludedCounselors(counselorString):\n",
    "    return(not re.search(EMAIL+\":\",counselorString,flags=re.IGNORECASE) or \\\n",
    "           re.search(TACTUSMAIL,counselorString,flags=re.IGNORECASE))\n",
    "\n",
    "def fileNameToId(fileName):\n",
    "    return(fileName.split(\".\")[0].split(\"-\")[0])\n",
    "\n",
    "def readGzippedXmlFile(inFileName):\n",
    "    inFile = gzip.open(inFileName)\n",
    "    inFileContent = inFile.read()\n",
    "    inFile.close()\n",
    "    root = ET.fromstring(inFileContent)\n",
    "    return(root)\n",
    "\n",
    "def cleanupText(text):\n",
    "    return(re.sub(\"\\n\",r\" \\\\n \",text.strip()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def showValueFrequencies(answerDataDf):\n",
    "    x = sorted(set(answerDataDf[CLIENT]))\n",
    "    answerDataDfGroups = answerDataDf.groupby([CLIENT])\n",
    "    y = [len(answerDataDfGroups.groups[client]) for client in x]\n",
    "    yCounts = {yValue:y.count(yValue) for yValue in y}\n",
    "    yCountsSorted = {yValue:yCounts[yValue] for yValue in sorted(yCounts.keys(),key=lambda yValue:yCounts[yValue],reverse=True)}\n",
    "    return(yCountsSorted)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [],
   "source": [
    "def squeal(text=None):\n",
    "    clear_output(wait=True)\n",
    "    if not text is None: \n",
    "        print(text)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Process tag AssignedCounselor (counselors.csv)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "COUNSELORQUERY = \"./AssignedCounselor\"\n",
    "OUTFILENAME = \"counselors.csv.gz\"\n",
    "\n",
    "def getCounselorData():\n",
    "    inFileNames = sorted(os.listdir(DATADIR))\n",
    "    clientList = []\n",
    "    counselorDict = {\"\":0}\n",
    "    for inFileName in inFileNames:\n",
    "        if re.search(FILEPATTERN,inFileName):\n",
    "            root = readGzippedXmlFile(DATADIR+inFileName)\n",
    "            for counselor in root.findall(COUNSELORQUERY):\n",
    "                counselorData = {}\n",
    "                for i in range(0,len(counselor)):\n",
    "                    try:\n",
    "                        counselorData[counselor[i].tag.strip()] = counselor[i].text.strip()\n",
    "                    except: pass\n",
    "            counselorString = dictToString(counselorData)\n",
    "            if not counselorString in counselorDict:\n",
    "                counselorDict[counselorString] = len(counselorDict)\n",
    "            clientList.append((fileNameToId(inFileName),counselorDict[counselorString]))\n",
    "    return(clientList,counselorDict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "PLOTFILENAME = \"AssignedCounselor.png\"\n",
    "\n",
    "def clientDictToCounselorDf(clientDict):\n",
    "    counselorDf = pd.DataFrame(clientDict,index=[0]).T.rename(columns={0:COUNSELOR})\n",
    "    counselorDf.index.name = CLIENT\n",
    "    return(counselorDf)\n",
    "\n",
    "def saveCounselorDf(counselorDf,outFileName=OUTFILENAME):\n",
    "    counselorDf.to_csv(OUTDIR+outFileName)\n",
    "    \n",
    "def visualizeCounselorDf(counselorDf,counselorDict):\n",
    "    counselorGroups = counselorDf.groupby(COUNSELOR).groups\n",
    "    counselorDictT = {counselorDict[c]:c for c in counselorDict}\n",
    "    x = [c for c in sorted(counselorGroups.keys()) if not inExcludedCounselors(counselorDictT[c])]\n",
    "    y = [len(counselorGroups[g]) for g in x]\n",
    "    plt.figure(figsize=(10,5))\n",
    "    matplotlib.rc(\"font\",**{\"size\":12})\n",
    "    plt.bar(x,y)\n",
    "    plt.xlabel(\"counselor id\")\n",
    "    plt.title(f\"Number of clients per counselor (average: {round(np.average(y),1)}; standard deviation: {round(np.std(y))})\")\n",
    "    plt.savefig(PLOTFILENAME)\n",
    "    plt.show()\n",
    "    \n",
    "def showExcludedCounselors(counselorDf,counselorDict):\n",
    "    counselorGroups = counselorDf.groupby(COUNSELOR).groups\n",
    "    for c in counselorDict:\n",
    "        if inExcludedCounselors(c):\n",
    "            print(f\"id: {counselorDict[c]}; frequency: {len(counselorGroups[counselorDict[c]])}; data: {c}\")\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of client-counselor pairs: total: 1983; one per client: 1983\n"
     ]
    }
   ],
   "source": [
    "clientList,counselorDict = getCounselorData()\n",
    "clientDict = {key:value for (key,value) in clientList}\n",
    "counselorDf = clientDictToCounselorDf(clientDict)\n",
    "saveCounselorDf(counselorDf,outFileName=OUTFILENAME)\n",
    "print(f\"number of client-counselor pairs: total: {len(clientList)}; one per client: {len(clientDict.keys())}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlIAAAFUCAYAAADvbtLzAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAgAElEQVR4nO3deZwmVX3v8c8XRtlHBUYUF0ZBUFBBwbiiKGrcN0wkIIZEJUpMXG5yJQkaIiJ4zVVvgmiIRA1LEBNBr6g3QUWjaHSMQoIggg7KpoOyw4jguX+c01BT9PJ0dQ/dT8/n/Xo9r+7n1KlTp+rUU8+vTp2qJ6UUJEmSNHsbLXQFJEmSxpWBlCRJ0kAGUpIkSQMZSEmSJA1kICVJkjSQgZQkSdJABlKzlGRlkpLkKQtdl64ky5OcnuS6Vr+VA8rYp837wPZ+Ua6r5keSg5Pctp6X8bIk5yXxWKN5k+SjSc5aj+WvTnL4LOdZL8fLJGcn+fA8lLPO8X1DkmSjJN9L8sL1Uf5YHdzah6ck+V+99Ae29H0WqGqLweuBJwJPAe4P/GQeyvxJK+s/5qGsO9heG4Yky4C/Bv6ylPLrha7PYpRk7yT/kuSyJLck+UGSI5Js0smzWZLPJPlxkrVJfprkjCS7jlD+45Oc0+a7MsnRSTae53V4ZRIfSLj+vAx4y2xmSHJbkoN7yedQj+dXzFO9ZlOfeyd5f5Lzk9yU5Kq23z98mnkObt8TMwbMSbZK8vdJft7K/1ySHSemt+PPEcBfr4+TurEKpJq1wB8n2WGhKzLfktxjDrM/DDi/lPJfpZSrSim3z7U+pZTbW1m/mmtZ4yTVXNpigzHDdnopsCnw6bupOlNaxG36ZOAS4ABgV+CtwKHA+zt5CvBvwG8DuwDPB5YBX0iy6VQFJ3lQm+/7wJ7Uk60/AI6a97UYQ0nuudB1GEUp5RellOvnoZxb2/F8IU5q7g88BHg78FjqPrw58MUk9+lnbicJ7wK+MmL5JwL7Ai+ndiYE+Lckm3XynAFsAzxv4DpMrZQyNi/go8AXqD0kJ3fSH0g92OzT3q9s75/Sm/9i4IjO+wL8EfBx4Cbgx60h7gWcDNwA/BDYrzPPRNmvbHW5peXZv7es7Vp917RyvgY8tTN9n1bO84GvUgPE10+x3vcAjgEuB24Fvgcc0Jm+upU18Tp7mm24I/DPwC+Am4HzgBf06vTAqbbjLNbrWdQPwc2tvs/tbffua3WnHf8FuLptjx8CfzrNukws64XAN9s8/w08o5dvp1butcA1wL8Cj+pMPxi4DXg68J22jZ87xTKXAX9J/fL7ZWuTv+1Mvz9walvWLcDZwF6T1PmBvXJvAw7ubfffBj7TtuEPJ6Z35nkNcEFb71+07f3AzvQ927re2Nrrk8AO/fXulfk84Ntt3X4GHAds0fsMnkX93KwGfg1sNsW2OgM4vpf2kFaPK9p6/RdwUGf6a4HrgE17872V+vncaC5tOtPy27ybAce3elzTtsHRwMW9fPsD323bfzXw3u62msNx7i3Az2fIs3vbR3afJs+7gMsmtllL+0PqsW5W9ZxqX+vsz93XR9s8z6Lu/79o2/LLwG/0yi3UwPFE6vHkMuDPenm25s5j9E+BdwIfA87q5Bl1WX8MnNLyfLyzLc+h7vM/oH7uVgOHz7BNfpv6nbK2zf8i7nq8nHI/BZa3ffCAXrnbt333me392cCHR11X7vp9UKY69gBPaG15S6vfKcB9O9OPaOv4YuDC1gZnAw+bh/18m1afF/bSN6cex/enHW9mKGfnVs6zO2n3ae3ZP2Z+DDh1rnW/Sx3mu8D1+eLOg/je1AP4Xi19LoHUVcDvth3+uLZDfY56IN4J+Nu282zTK/sK4EDqGeI7gduBx7Q8m1GDh38B9mrl/EVr2Ef0duoLqYHAQ+h9uXbq+R7g58BvtZ3mz9v679umr6AeaL4C3A/Yeopy7kc9EJ1Fjdp3bB+Q5032Qetvx1mu17nAc6g9ZR8Brgfu0/I8puV5WavTipb+6Va3Pdqynw78zjT7w8SyfgC8AHgEcEJrr/u3PNu1Nv4g8KjWXn/btufEcg9u2/ObbZkPnZg2yTI/Rg0wDmrb7wnAm9u0UIP877bt+6jWLtcA2062jTvlThZI/ZB6sN6J+qV4G7Bzy7Nne/8qYIe2rNd02m5XagD1V8DD2/RPABfRghR6gRTw6Fbm+9o8z6UGLyf2PoPXA6dTv4AeBWw8xba6BnhNL+1RwBvavDtSA7LbgKe36feifgZf0ZvvfOBdc23TmZbf5v0b6ufkRa3so6lfWBd38hzc1u+gVvZTqSclJ/byFGDlLI9z7wB+PM30rdr6XgZsPk2+LwP/0Evbkbt+2Rc6x8VJyplyXwPuSQ3OCvWzfD/gXm2+l3JnL9puwIepX/7b9Jb9U2oAvWOnrH07eU6nHruf0co5qe2D3UBq1GX9vLX/jtRj02bUk6HPtn3iicC3qAHOlIEU9Rh2e9s3dqEey37EusfLUfbTU4DP9cr+n9RhFRMnDWezbiA17bpS9/PbgDdOtMkUx/f7te14SqvfU6j78Fc6yzqCejz9fNsPdqeeaP17r87T7kNTbMOHtvn6J74fAU7oHG9mCqR+j3qitHEv/d+7262lHQr8dDb1HGld5rvA9fnqbtT24Tq7/T+XQOr9nfcrWlq3h+E+Le0FvbKP7JV9Du0gSj2AXgYs6+X54sTyOjv1QTOs8+bUQOXQXvrpwBcn2zbTlHUk9YM96dnoJB+0dbbjLNfrZZ3p27W035ysvTr5zp3Nh7GzrFd30pYBl060D/VA8I3efKH2KL2ps14F2HuG5e3U8r18iun7tum7dtI2Aa4E3j7ZNu7kmyyQektn+sbUM/Y/aO9fSv1yXz7NZ+XUXtom1C+Il3TWuxtInQh8szfPi6kByQ6dcq8FtpxhW927rcOkPXu9vJ8C/r7z/lTgzM77vVpZu8x3m/aXD2xB/by9upfnG6wbSK0GXtfL89S2zIkThpdST5QeMIt9+hHUL7c3TDLt3dTguFBPaHaaoayLaMFnJ22LNv9vddIunGx5nekz7WuvpPV6zFCfjajB54GdtAL8TS/fBcDRvc/cszrT70kNfqY83k2zrBN6+V7Ttul9OmmPbHmnC6ROAr7WS3sD6x4vR9lPn0P97N+vk+e/Jta/vT+bXkAwwrrecTzppO3Dusf3I6nH83t28kz0dD61sw630TmxBF5BPSZs2kmbdh+apM4bU4Ozb7Juj+mrWvtv0d5/dLp2bnn+HLhikvRP0DmOtLSJXsM59xx3X+M4RmrCW4EnJ3nRHMs5d+KfUsoa6lnGeZ20a6jR7n1783299/5r1LMDgMdRo/1rk9w48aL2pD2sN983Z6jfTtQDR/9a8Zc7yxvVnsA5pZSbZjnfhNms13cn/iml/JS6Xbebofz3A3+e5D+SvDvJU0es1x1tUUq5jbpNu22xZ6++N1CDlX6dvzXDch7b/v7rFNN3o16S+V6nPr+k9lLNtq1g3W14O7UnbGIb/hu1x+pHSU5NckiSbTvzPg54aW+9f04ds9Rf7279J9vPQu3hmnBBKeXGGeo+MTZhbTcxyeZJjmmDTn/R6vU8ak/HhI8Bz04y8Zl7FTXA+35n3Qa16QjLn/i8faNXztc7Zaxo+d/bq8PnOmVQSjm9lPLwUsrlU2+mder2MOq+dWop5dhJsryH2hPydGrbn55kq1HKnk6r42TLmzDTvjapJA9JcmKSi5NcTw0Q78W6bQ2d/by5gjv384n97pxOfW/lru066rL6x9tdqfvzNZ3y/5saOE5n126dmq/23o+yn/4b9XN9QFuPx1IDuX+casGzWNeZ7EYN9G6dSCilnEtd9+7x6or23XjHe+ox4b6d+Wbah7r135i6fjtTT7h/3dJ3ofaG7z+H76iZTByPNps21ywtm8/C7k6llIuS/B31LO25vckTg+nSS59ssOlkA6n7aYXZDczfiBpVv3SSaTf33q+vHWZ9mM163TpJnmm3YSnlI0k+Tz1LezrwuSSnl1JeOaSynWV+gXq22Nc9WN5eSlk7SZ75dpd9sx1YJts2/W14x35YSrkxyV7UwcrPBF4H/K8k+5ZSvt3ynUgdW9f38zmtwWj77NWtvlv30t9D7eV6C3UQ9E3A/6Z+EUz41zb/AUk+QB0rcURn+lzadJTl0+o+lYm2eiPwpUmmXzbNvJNK8kjql+qnqIPC76KUcjV1u/wgyTnUdjwQ+NAUxV5JPfHp2q4zbSQj7GtT+Uyr7x9SL1XdSg02+oO8p9zPZ2HUZd2dx9sZ99NSyu1JTqaeLLy3/f1WKeWCacoddV3ny2TtAwNuVmsD/P+J2vP1tFJK97PyROrx4tvJHYfHjdp8t7X8X5uk2CuBbZNsXNa9yWo7aq9s19bUk/pfzLbu0xnnHimo4z+2Bw7ppU9Ez9tPJLSz2wfM47Kf0Hv/JGp3O8Aq6vXf60spF/des7319GLqpYZ+78zTqAPyZuPbwJOSbDHL+SbM13pNfDDvcht2KeXKUspHSimvAl4NHJhk+Qzl3dEWqbfc/wbrtsVuwGWT1HnNJGVN5z/b32dPMf18YJt0bktPvY398dzZVj9rf7fvzLcHdw36Z1TqXZVfKaW8ndrbeCXtzJa63o8GLplkva+ZosjzmXw/K23abOr2K+o693vinkq9UeS0dvb7Q+qZ6TrrRb3Z4yDqSdK9qJf7JsylTWda/sXU/fOJvfnu2MdaD+tPqJca+8u/eLYBeZLHUXv+TqPecDJdELfOrNQexql8DXhW73bv51BPer4zmzrOsK/dCnecEND+34baa3NMKeX/tV7atdy1Z38mE5/jJ3XKvie1t2c+lvU94BFJ7t0pbzfuGlhPNt+TemlP7r0fdT/9GLB7kscAv8P0vVGjruutTHJ87TkfeEL37sUku1PXfbbfLTNKsjl1HOyu1EuH/Uf0nEEdq7VH5/Vpao/+Hky9z36N2knyjM6y7k097vZ7CR8FfKfM852LYx1ItZ3xGOBNvfRbqBv3fybZPcme1J3zl/O4+FcnOSDJzkneQT3wvrdNO5k68PDMJM9OfVDb45P8WZKXzGYhpZSbqYNfj0zyW215f049q37XLOt8HLXNP5Xkya2L+AVJ+j16U5mv9bqaOi7h2UnuN3H7a5JjkzwvyY7tYPYy6hfWDTOUd1ib7xHUgZ0r2roCHEs9oHwq9Zk9K5M8JclRSfoHwmmVUi6mboPjUp+ds2OSxyV5Y8vyReqlg1Pa9p3oot+01QvqF/WlwBFJHp768L73MX0PyF0keXGSNyfZM8mDgZcAD+LOL553UcfbnJTkN1pbPz3J/0ny0CmKfQ/w2CTva3V7DnVw7MmllB/Ppn7NZ6mBWNf3gRe3Ou1KvTtu+7vMWbfbY6knS58ppXTPIOfSptMuv11S+Dvgne2zsXOSo6jbsttGf0F9DMtfJHlkkl2SvKT1kgOQ5KVJLkwy5Qlc6uXrL1B7oo4Gtmufift18uyT5PXtWPbgJE+m3nn7a+odiBP5vpDk6E7xH6R+Kf59kt1Sh0EcSR0DOnLPzAj72o/a3xclWZFkS+qYnTXAa9s2fCK1J+KWUZcLd3zmPg18oO2/u1IHV3cvac5lWadQjy8nte37BOAfRpj3fcAT2z63c5KXAv+jl2ek/bRdSvxOW+69W92nMuq6/gh4epLtM/Vl2GOpdw5+tO3DT6H2Yv97KeXfZ1j/dbT9fLKet4npWwH/jzpA/hXAryf287RHFJRSri2l/Hf3RR2PeVN7f3Mr6w1JLpwou5RyEfXz88EkT0uyB7VdL6fe7NO1D3DmbNZtJGUeB1yt7xeTDDyjfkn9mN7gZepZ5pepXbk/oH4pTzbY/JW98iYbpLeWdvcRdw4EPog6CHAtdaft38K6DfVANvHIgsupA8Qn7uzbh0kGHU+x3tM+/mCqbTNFWTu3elxHPTM9lxHv2pvLevW3K7UL+0ctfXVL+wC1K/YW6mWLM4HdplmXiWW9iDtv2f8enYGpLd8O1ABoTctzKXWw6EPa9IPpPQZghrY4kjrY+FbqZZzuDQv9xx98mc7jD1qex7f63tK2/95MPth8ypslqD0rX2zrtJa6jx/Wy/8o6gHmmrasi6mBw9ZTrTfrPv5gTWvruzz+YMRt9VDqZfIHddIeRD2g3kTt1fgr6p2WZ08y/3fadnjxJNMGtekoy+fOxx9c39rxOOr4vf/qlfUS6tipm1ve79JuKujUoTDNXXtte5bJXr395WzqZ6K7rrv2ylpNe/RAJ+0J1LE8a6k3mhzNXe9uKkx/194o+9r7qb2thTsff/A06v69lhrA7sdox+CzuutBPeac1tpsTVuH/uMPBi2rpT+mteMvqQPB92e0xx/sz52PQfkP6slt/3g57X7ayffGNu/pkyznbNa9a2+UdX0OdRjGrRP7EjM//uBapnj8Qa8+T6G3X4+wD00se7LXwdPM91Hu+p1/BJ3PR0vbCvh77nysz+fp3YzBJMej+XqlLUAaO6lPRv8S9YMx63EpWv+SnADcUEp504yZF7EkXwSuKaXst9B1mU+td/Ji6t2Nk40/kZaEJMcBKaVMOgZxLsZ2sLmksfBnwO8n2aiMyc/EJHkU9bLi16kDeA+i3vww6iXwcfIC4B8NorSUpY4TvIza0zz/5dsjpXFlj5TWhza27cPUcVEbUZ+Rc1Qp5YwFrZikRclASpIkaaCxvmtPkiRpIRlISZIkDbQgg8233XbbsnLlyoVYtCRJ0qx8+9vfvrqUsmKyaQsSSK1cuZJVq1YtxKIlSZJmJcmlU03z0p4kSdJABlKSJEkDGUhJkiQNZCAlSZI0kIGUJEnSQAZSkiRJAxlISZIkDWQgJUmSNJCBlCRJ0kAGUpIkSQMZSEmSJA20IL+1p/Gx8rAzZ8yz+pjn3w01kSRp8bFHSpIkaSADKUmSpIEMpCRJkgYykJIkSRrIQEqSJGkgAylJkqSBDKQkSZIGMpCSJEkayEBKkiRpIAMpSZKkgQykJEmSBjKQkiRJGshASpIkaaCRAqkkK5N8Nsk1Sa5KcmySZW3aHkm+neTm9neP9VtlSZKkxWHUHqnjgJ8B9wf2AJ4GHJrknsCngJOA+wAfAz7V0iVJkpa0UQOphwCnlVLWllKuAj4P7AbsAywD3l9K+WUp5W+AAM9YH5WVJElaTEYNpN4P7J9k8yQPAJ7LncHUeaWU0sl7XkuXJEla0kYNpL5CDY6uBy4DVgFnAFsC1/XyXgds1S8gySFJViVZtWbNmuE1liRJWiRmDKSSbETtffoksAWwLXU81LuBG4HlvVmWAzf0yymlHF9K2auUsteKFSvmWm9JkqQFN0qP1NbAg4Fj2zionwMfAZ4HnA88Okk6+R/d0iVJkpa0ZTNlKKVcneRHwOuT/DX1ct7vUsdCnQ3cDvxxkg8Br22zfXH9VHfDsPKwM2fMs/qY598NNZEkSdMZdYzUy4DnAGuAi4FfAW8updwKvAR4FXAt8PvAS1q6JEnSkjZjjxRAKeW71EcdTDbtO8Ce81gnSZKksTBSIKW583KdJElLj7+1J0mSNJCBlCRJ0kAGUpIkSQMZSEmSJA1kICVJkjSQgZQkSdJABlKSJEkDGUhJkiQNZCAlSZI0kIGUJEnSQAZSkiRJAxlISZIkDWQgJUmSNJCBlCRJ0kAGUpIkSQMZSEmSJA1kICVJkjSQgZQkSdJABlKSJEkDGUhJkiQNZCAlSZI0kIGUJEnSQAZSkiRJAxlISZIkDWQgJUmSNJCBlCRJ0kAGUpIkSQMZSEmSJA1kICVJkjSQgZQkSdJABlKSJEkDGUhJkiQNZCAlSZI0kIGUJEnSQAZSkiRJAxlISZIkDWQgJUmSNJCBlCRJ0kAGUpIkSQMZSEmSJA1kICVJkjSQgZQkSdJABlKSJEkDGUhJkiQNZCAlSZI0kIGUJEnSQAZSkiRJAxlISZIkDWQgJUmSNNDIgVSS/ZNckOSmJJck2bul75vkwiQ3J/lSkh3WX3UlSZIWj2WjZEryLODdwCuAbwL3b+nbAp8EXgP8X+BI4OPAE9ZHZSWta+VhZ86YZ/Uxz78baiJJG6aRAingr4B3lFK+0d5fDpDkEOD8Uson2vsjgKuTPLyUcuF8V1aSJGkxmfHSXpKNgb2AFUkuTnJZkmOTbAbsBpw7kbeUchNwSUuXJEla0kYZI7UdcA/g5cDewB7AY4DDgS2B63r5rwO26heS5JAkq5KsWrNmzZwqLUmStBiMEkjd0v7+bSnlylLK1cB7gecBNwLLe/mXAzf0CymlHF9K2auUsteKFSvmUmdJkqRFYcZAqpRyDXAZULrJ7e/5wO4TiUm2AHZs6ZIkSUvaqIPNPwL8UZLPA78C3gx8BjgdeE+S/YAzgbcD543jQPNR7n4C74CSJEl3GvU5UkcC3wIuAi4AvgMcVUpZA+wHHAVcAzwe2H891FOSJGnRGalHqpTyK+DQ9upPOwt4+DzXS5IkadHzJ2IkSZIGMpCSJEkayEBKkiRpIAMpSZKkgQykJEmSBjKQkiRJGshASpIkaSADKUmSpIEMpCRJkgYa9bf2JGnBjPJbmP4OpqSFYI+UJEnSQAZSkiRJAxlISZIkDWQgJUmSNJCBlCRJ0kAGUpIkSQMZSEmSJA1kICVJkjSQgZQkSdJABlKSJEkDGUhJkiQNZCAlSZI0kIGUJEnSQAZSkiRJAxlISZIkDWQgJUmSNJCBlCRJ0kAGUpIkSQMZSEmSJA1kICVJkjSQgZQkSdJABlKSJEkDGUhJkiQNZCAlSZI0kIGUJEnSQAZSkiRJAy1b6ApIkoZZediZM+ZZfczz74aaSBsue6QkSZIGMpCSJEkayEBKkiRpIAMpSZKkgQykJEmSBjKQkiRJGshASpIkaSADKUmSpIF8IOcGZpQH+IEP8ZMkaRT2SEmSJA1kICVJkjSQl/a0JPkbZJKku4M9UpIkSQMZSEmSJA00q0AqycOSrE1yUiftgCSXJrkpyRlJtp7/akqSJC0+s+2R+gDwrYk3SXYD/g44CNgOuBk4bt5qJ0mStIiNPNg8yf7AtcA5wE4t+UDg/5ZSvtLyvA24IMlWpZQb5ruykiRJi8lIPVJJlgPvAN7Sm7QbcO7Em1LKJcCtwM6TlHFIklVJVq1Zs2Z4jSVJkhaJUS/tHQmcUEq5rJe+JXBdL+06YKt+AaWU40spe5VS9lqxYsXsaypJkrTIzHhpL8kewDOBx0wy+UZgeS9tOeBlPUmStOSNMkZqH2Al8OMkUHuhNk6yK/B5YPeJjEkeCmwCXDTfFZU2FD5MVJLGxyiB1PHAqZ33f0INrF4P3Bf4epK9gf+kjqP6pAPNJUnShmDGQKqUcjP1sQYAJLkRWFtKWQOsSfI64GRgG+As4PfWU10lSZIWlVn/1l4p5Yje+1OAU+arQpIkSePCn4iRJEkayEBKkiRpoFlf2pOmMsrdZuAdZ5KkpcMeKUmSpIEMpCRJkgYykJIkSRrIQEqSJGkgAylJkqSBvGtPC8bflJMkjTt7pCRJkgYykJIkSRrIS3tLgJfIJElaGPZISZIkDWQgJUmSNJCBlCRJ0kAGUpIkSQMZSEmSJA1kICVJkjSQgZQkSdJABlKSJEkDGUhJkiQNZCAlSZI0kIGUJEnSQAZSkiRJAy3pHy32x3wlSdL6ZI+UJEnSQAZSkiRJAxlISZIkDWQgJUmSNJCBlCRJ0kAGUpIkSQMZSEmSJA1kICVJkjSQgZQkSdJABlKSJEkDGUhJkiQNtKR/a09aDPzNR0lauuyRkiRJGshASpIkaSAv7UnSLHipVlKXPVKSJEkDGUhJkiQNZCAlSZI0kIGUJEnSQAZSkiRJAxlISZIkDWQgJUmSNJCBlCRJ0kA+kFPChyxKkoaxR0qSJGmgGQOpJJskOSHJpUluSPLdJM/tTN83yYVJbk7ypSQ7rN8qS5IkLQ6j9EgtA34CPA24F3A4cFqSlUm2BT4JvA3YGlgFfHw91VWSJGlRmXGMVCnlJuCITtJnkvwI2BPYBji/lPIJgCRHAFcneXgp5cL5r64kSdLiMesxUkm2A3YGzgd2A86dmNaCrktauiRJ0pI2q0AqyT2Ak4GPtR6nLYHretmuA7aaZN5DkqxKsmrNmjVD6ytJkrRojBxIJdkIOBG4FXhDS74RWN7Luhy4oT9/KeX4UspepZS9VqxYMbC6kiRJi8dIgVSSACcA2wH7lVJ+1SadD+zeybcFsGNLlyRJWtJG7ZH6IPAI4IWllFs66acDj0yyX5JNgbcD5znQXJIkbQhGeY7UDsAfAHsAVyW5sb0OLKWsAfYDjgKuAR4P7L8+KyxJkrRYjPL4g0uBTDP9LODh81kpSZKkceBPxEiSJA1kICVJkjSQgZQkSdJAM46RkiTdPVYeduaMeVYf8/y7oSaSRmWPlCRJ0kAGUpIkSQN5aU9jw8sei5vto/VhlP0K3Le0cOyRkiRJGshASpIkaSADKUmSpIEMpCRJkgYykJIkSRrIu/akDYh31knS/LJHSpIkaSADKUmSpIEMpCRJkgYykJIkSRrIQEqSJGkg79qTpA2Ad2xK64c9UpIkSQMZSEmSJA1kICVJkjSQgZQkSdJABlKSJEkDGUhJkiQNZCAlSZI0kIGUJEnSQD6QU9KSM5uHT/qgSklzYY+UJEnSQAZSkiRJA3lpT5qlUS4FgZeDJGlDYI+UJEnSQAZSkiRJA3lpT5K0wfKuTc2VPVKSJEkDGUhJkiQN5KU9SXc773zcsHk5TUuJPVKSJEkDGUhJkiQN5KU9SdJdePnt7uXl7vFlj5QkSdJABlKSJEkDGUhJkiQNZCAlSZI0kIGUJEnSQN61J0laMrzbUHc3e6QkSZIGMpCSJEkayEt7krSe+JDFpWW2lw3X52XG2ZQ92/1woet9d9VlvtgjJUmSNNC8BFJJtk5yepKbklya5ID5KFeSJGkxm69Lex8AbgW2A/YAzkxybinl/HkqX5K0SHkJc8M2Dpff1qc590gl2QLYD3hbKeXGUspXgU8DB821bEmSpMVsPi7t7QzcVkq5qJN2LrDbPJQtSZK0aKWUMrcCkr2BT5RS7tdJey1wYClln07aIcAh7e0uwPYqxvwAAAdxSURBVPfntOA7bQtcPU9laeHZnkuHbbm02J5Li+05OzuUUlZMNmE+xkjdCCzvpS0HbugmlFKOB46fh+WtI8mqUspe812uFobtuXTYlkuL7bm02J7zZz4u7V0ELEvysE7a7oADzSVJ0pI250CqlHIT8EngHUm2SPJk4MXAiXMtW5IkaTGbrwdyHgpsBvwM+Cfg9Xfjow/m/XKhFpTtuXTYlkuL7bm02J7zZM6DzSVJkjZU/kSMJEnSQAZSkiRJA41tIOXv+42vJG9IsirJL5N8tDdt3yQXJrk5yZeS7LBA1dSIkmyS5IT2ObwhyXeTPLcz3TYdM0lOSnJlkuuTXJTkNZ1ptucYSvKwJGuTnNRJO6B9bm9KckaSrReyjuNqbAMp1v19vwOBDybxaerj4QrgncA/dBOTbEu9A/RtwNbAKuDjd3vtNFvLgJ8ATwPuBRwOnJZkpW06to4GVpZSlgMvAt6ZZE/bc6x9APjWxJv2ffl31J9z2w64GThuYao23sZysHn7fb9rgEdO/DRNkhOBy0sphy1o5TSyJO8EHlhKObi9PwQ4uJTypPZ+C+qTdx9TSrlwwSqqWUtyHvBXwDbYpmMtyS7A2cAbgXtje46dJPsDLwO+B+xUSnllkndRg+UDWp4dgQuAbUopN0xdmvrGtUfK3/dbmnajtiNwxzPKLsF2HStJtqN+Rs/HNh1bSY5LcjNwIXAl8Flsz7GTZDnwDuAtvUn9tryEepVn57uvdkvDuAZSWwLX99KuA7ZagLpo/mxJbccu23WMJLkHcDLwsdZDYZuOqVLKodR22pt6Oe+X2J7j6EjghFLKZb1023KejGsgNdLv+2ns2K5jLMlG1F80uBV4Q0u2TcdYKeX2UspXgQcCr8f2HCtJ9gCeCbxvksm25TyZjx8tXgh3/L5fKeUHLc3f9xt/5wO/O/Gmjb/YEdt10UsS4ATqoNXnlVJ+1SbZpkvDMu5sN9tzfOwDrAR+XD+ibAlsnGRX4PPU700AkjwU2IT6/apZGMseKX/fb7wlWZZkU2Bj6od60yTLgNOBRybZr01/O3Ceg1jHwgeBRwAvLKXc0km3TcdMkvsm2T/Jlkk2TvKbwO8AX8D2HDfHUwPdPdrrQ8CZwG9SL8G/MMneLSB+B/BJB5rP3lgGUs1C/r6f5uZw4BbgMOCV7f/DSylrgP2Ao6h3ZT4e2H+hKqnRtOcI/QH1QH1Vkhvb60DbdCwV6mW8y6ht9tfAm0opn7Y9x0sp5eZSylUTL+rlvLWllDXt+/J11IDqZ9SxUYcuYHXH1lg+/kCSJGkxGOceKUmSpAVlICVJkjSQgZQkSdJABlKSJEkDGUhJkiQNZCAlSZI0kIGUpA1CkpVJSnv463yU96Ekb5tmekmy03wsS9LiNa4/ESNJC6qU8rqFroOkhWePlCTNYL56sSQtPQZSkuYkyYOSfDLJmiQ/T3JsS98oyeFJLk3ysyT/mORebdo+SS7rlbM6yTPb/0ckOa3Nc0OS85Ps1cn71iSXt2nfT7JvZ5mHJbmk1eW0JFtPUe/tk3w6yS+SXJzktZ1pRyT55yQnJbkeOHiS+T+a5J2d93+a5MokVyT5/blsU0njw0BK0mBJNgY+A1xK/ZX5BwCntskHt9fTgYdSf3n+2FkU/6JW1r2BT0/Mm2QX4A3A40opW1F/gHV1m+ePgJcATwO2p/4e3AemKP9U6u/JbQ+8HHhXkmd0pr8Y+Oe2/JOnq2iS5wB/AjwLeBjwzNFXU9I4M5CSNBe/QQ1E/rSUclMpZW0p5att2oHAe0spPyyl3Aj8GbD/LC6TfbWU8tlSyu3AicDuLf12YBNg1yT3KKWsLqVc0qa9DviLUsplpZRfAkcAL+8vM8mDgCcDb211/i7wYeBVnWxfL6WcUUr5dSnllhnq+tvAR0op/11KuaktV9IGwEBK0lw8CLi0lHLbJNO2p/ZUTbiUeoPLdiOWfVXn/5uBTZMsK6VcDLyJGqz8LMmpSbZv+XYATk9ybZJrgQuogVd/mdsDvyil3NCr3wM6738yYj0nyuvmv3SqjJKWFgMpSXPxE+DBU/QyXUENbCY8GLgN+ClwE7D5xIR2iXDFqAstpZxSSnlKK78A7+7U57mllHt3XpuWUi6fpG5bJ9mqV79uvjJqfYArqUFltyxJGwADKUlz8U1qEHFMki2SbJrkyW3aPwFvTvKQJFsC7wI+3nqvLqL2MD0/yT2Aw6mX62aUZJckz0iyCbAWuAX4dZv8IeCoJDu0vCuSvLhfRinlJ8A5wNGtzo8GXg2cNGgrwGnAwUl2TbI58JcDy5E0ZgykJA3Wxi+9ENgJ+DF18PYr2uR/oI5t+grwI2rQ80dtvuuAQ6njki6n9lCtcxffNDYBjgGupl7+uy91/BXA/6EOTP/XJDcA3wAeP0U5v0MdIH8FcDrwl6WUs0aswzpKKZ8D3g98Ebi4/ZW0AUgps+m9liRJ0gR7pCRJkgYykJIkSRrIQEqSJGkgAylJkqSBDKQkSZIGMpCSJEkayEBKkiRpIAMpSZKkgQykJEmSBvr/uvmsI2qa/jwAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 720x360 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "visualizeCounselorDf(counselorDf,counselorDict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "showExcludedCounselors(counselorDf,counselorDict)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* id: 0; frequency: 763; data: \n",
    "* id: 3; frequency: 254; data: Geen Hulpverlener \n",
    "* id: 19; frequency: 1; data: Aangehouden cliënten \n",
    "* id: 45; frequency: 1; data: Niet Gestart "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Process tag Intake (clients.csv)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "TITLE = \"title\"\n",
    "TITLECAPS = \"Title\"\n",
    "QUESTIONNUMBER = \"questionNumber\"\n",
    "ANSWER = \"answer\"\n",
    "ANSWERID = \"answerId\"\n",
    "ANSWERTEXT = \"answerText\"\n",
    "ANSWERTITLE = \"answerTitle\"\n",
    "ID = \"ID\"\n",
    "INTAKE = \"Intake\"\n",
    "QUERYINTAKE = \"./Intake\"\n",
    "QUERYQUESTION = \".//question\"\n",
    "QUERYTITLE = \"./\"+TITLE\n",
    "QUERYQUESTIONNUMBER = \"./\"+QUESTIONNUMBER\n",
    "QUERYANSWER = \"./\"+ANSWER\n",
    "QUERYANSWERTEXT = \"./\"+ANSWERTEXT\n",
    "OUTFILENAME = \"clients.csv.gz\"\n",
    "PLOTFILENAME = \"Intake.png\"\n",
    "TITLESTRING = \"Number of questions per client in Intake forms\"\n",
    "TREATMENTTITLE = \"treatmentTitle\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def getAnswerDataList(querySection=QUERYINTAKE):\n",
    "    inFileNames = sorted(os.listdir(DATADIR))\n",
    "    answerDataList = []\n",
    "    for inFileName in inFileNames:\n",
    "        if re.search(FILEPATTERN,inFileName):\n",
    "            root = readGzippedXmlFile(DATADIR+inFileName)\n",
    "            for section in root.findall(querySection):\n",
    "                for question in section.findall(QUERYQUESTION):\n",
    "                    answerDict = {CLIENT:fileNameToId(inFileName),TREATMENTTITLE:INTAKE}\n",
    "                    try:\n",
    "                        answerDict[TITLE] = list(question.findall(QUERYTITLE))[0].text.strip()\n",
    "                    except: pass\n",
    "                    try:\n",
    "                        answerDict[QUESTIONNUMBER] = list(question.findall(QUERYQUESTIONNUMBER))[0].text.strip()\n",
    "                    except: pass\n",
    "                    for answer in question.findall(QUERYANSWER):\n",
    "                        answerDictCopy = dict(answerDict)\n",
    "                        answerDictCopy[ANSWERID] = answer.attrib[ID]\n",
    "                        try:\n",
    "                            answerDictCopy[ANSWERTEXT] = list(answer.findall(QUERYANSWERTEXT))[0].text.strip()\n",
    "                        except: pass\n",
    "                        try:\n",
    "                            answerDictCopy[ANSWERTITLE] = list(answer.findall(QUERYTITLE))[0].text.strip()\n",
    "                        except: pass\n",
    "                        answerDataList.append(answerDictCopy)\n",
    "    return(answerDataList)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "DATAFREQUENCYTHRESHOLD = 5\n",
    "REMOVED = \"REMOVED\"\n",
    "\n",
    "def makeAnswerId(answer):\n",
    "    if not QUESTIONNUMBER in answer: questionNumber = \"\"\n",
    "    else: questionNumber = answer[QUESTIONNUMBER]\n",
    "    if not ANSWERID in answer: answerId = \"\"\n",
    "    else: answerId = answer[ANSWERID]\n",
    "    return(questionNumber+\"-\"+answerId)\n",
    "\n",
    "def countAnswerTexts(answerDataList):\n",
    "    answerTextFreqs = {}\n",
    "    for i in range(0,len(answerDataList)):\n",
    "        answerId = makeAnswerId(answerDataList[i])\n",
    "        try:\n",
    "            answerText = answerDataList[i][ANSWERTEXT]\n",
    "        except: answerText = \"\"\n",
    "        if not answerId in answerTextFreqs: answerTextFreqs[answerId] = {}\n",
    "        for token in answerText.split():\n",
    "            if not token in answerTextFreqs[answerId]: answerTextFreqs[answerId][token] = 0\n",
    "            answerTextFreqs[answerId][token] += 1\n",
    "    return(answerTextFreqs)\n",
    "\n",
    "def removeRareDataValues(answerDataList,answerFreqs):\n",
    "    for i in range(0,len(answerDataList)):\n",
    "        answerId = makeAnswerId(answerDataList[i])\n",
    "        if ANSWERTEXT in answerDataList[i] and \\\n",
    "           answerDataList[i][ANSWERTEXT] != \"\":\n",
    "            for token in answerDataList[i][ANSWERTEXT].split():\n",
    "                if answerFreqs[answerId][token] < DATAFREQUENCYTHRESHOLD:\n",
    "                    answerDataList[i][ANSWERTEXT] = REMOVED\n",
    "    return(answerDataList)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def answerDataListToDf(answerDataList):\n",
    "    return(pd.DataFrame(answerDataList))\n",
    "\n",
    "def saveAnswerDataDf(answerDataDf,outFileName=OUTFILENAME):\n",
    "    answerDataDf.to_csv(OUTDIR+outFileName,index=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "def visualizeAnswerDataDf(answerDataDf,titleString=TITLESTRING,plotFileName=PLOTFILENAME):\n",
    "    x = sorted(set(answerDataDf[CLIENT]))\n",
    "    answerDataDfGroups = answerDataDf.groupby([CLIENT])\n",
    "    y = [len(answerDataDfGroups.groups[client]) for client in x]\n",
    "    plt.figure(figsize=(16,5))\n",
    "    matplotlib.rc(\"font\",**{\"size\":20})\n",
    "    plt.bar(x,y)\n",
    "    plt.title(titleString)\n",
    "    plt.xlabel(\"client id\")\n",
    "    plt.tick_params(axis='x',which='both',bottom=False,labelbottom=False)\n",
    "    plt.savefig(plotFileName)\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "GESLACHT = \"geslacht\"\n",
    "GESLACHT0 = \"geslacht0\"\n",
    "GESLACHTT0 = \"geslachtt0\"\n",
    "NONQUESTIONS = \"^(goTo[0-9]|ltgeslacht1|doel)$\"\n",
    "EXCEPTIONANSWERID = \"mdoel\"\n",
    "ANSWERIDORIGINAL = \"answerId_original\"\n",
    "ANSWERIDNORMALIZED = \"answerId_normalized\"\n",
    "\n",
    "def normalize_answer_id(answer_id, first_answer_id):\n",
    "    if first_answer_id == GESLACHT: \n",
    "        new_answer_id = answer_id\n",
    "    elif first_answer_id == GESLACHT0:\n",
    "        if re.search(\"0h$\",answer_id):\n",
    "            new_answer_id = re.sub(\"0h$\",\"0h-ignore\",answer_id)\n",
    "        else:\n",
    "            new_answer_id = re.sub(\"0$\",\"\",answer_id)\n",
    "    elif first_answer_id == GESLACHTT0: \n",
    "        new_answer_id = re.sub(\"t0$\",\"\",answer_id)\n",
    "    else: \n",
    "        sys.exit(f\"unknown first answer id: {first_answer_id}!\")\n",
    "    if re.search(\"^(goTo[0-9]|ltgeslacht1|doel)$\",answer_id):\n",
    "        return(\"\")\n",
    "    if (first_answer_id != GESLACHT and new_answer_id == answer_id and \n",
    "        not answer_id == EXCEPTIONANSWERID and not re.search(NONQUESTIONS,answer_id)):\n",
    "        sys.exit(f\"first answer id {first_answer_id} did not change {answer_id}!\")\n",
    "    return(new_answer_id)\n",
    "\n",
    "def normalize_all_answer_ids(df):\n",
    "    current_client_id = \"\"\n",
    "    first_answer_id = \"\"\n",
    "    answer_ids_normalized = []\n",
    "    for i,row in df.iterrows():\n",
    "        client_id = row[CLIENT]\n",
    "        answer_id = row[ANSWERID]\n",
    "        if client_id != current_client_id:\n",
    "            current_client_id = client_id\n",
    "            first_answer_id = answer_id\n",
    "        answer_ids_normalized.append(normalize_answer_id(answer_id,first_answer_id))\n",
    "    df.rename(columns={ANSWERID:ANSWERIDORIGINAL},inplace=True)\n",
    "    df[ANSWERIDNORMALIZED] = answer_ids_normalized\n",
    "    return(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "def remove_text_from_alcohol_intake(answer_data_list):\n",
    "    new_answer_data_list = []\n",
    "    for row in answer_data_list:\n",
    "        if re.search(\"^dag[0-9]\",row[\"answerId\"]) or re.search(\"^week\",row[\"answerId\"]):\n",
    "            row[\"answerText\"] = re.sub(\"^.*: *\",\"\",row[\"answerText\"])\n",
    "            row[\"answerText\"] = re.sub(\"^± *\",\"\",row[\"answerText\"])\n",
    "        new_answer_data_list.append(dict(row))\n",
    "    return(new_answer_data_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "answerDataList = getAnswerDataList()\n",
    "asnwerDataList = remove_text_from_alcohol_intake(answerDataList)\n",
    "answerTextFreqs = countAnswerTexts(answerDataList)\n",
    "answerDataList = removeRareDataValues(answerDataList,answerTextFreqs)\n",
    "answerDataDf = answerDataListToDf(answerDataList)\n",
    "answerDataDfNormalized = normalize_all_answer_ids(answerDataDf)\n",
    "saveAnswerDataDf(answerDataDfNormalized,outFileName=OUTFILENAME)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA7gAAAFOCAYAAACsbbF2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAgAElEQVR4nO3debgkZXnw/+8NwyIYFnEQl+gICuZ1jQ4KuDDIG7eooEAki4JGcENFlp8EJE5UjInEDVQElDESXzCoKIJoIgyguDAQJcQo66goy8AsLMOw3r8/6mlO01T3qe5zzpxzar6f6+qruqueraqequ67a4vMRJIkSZKk2W696W6AJEmSJEmTwQBXkiRJktQKBriSJEmSpFYwwJUkSZIktYIBriRJkiSpFQxwJUmSJEmtYIAraVJFxMKIyIhYNN1tmQ4RsWNEnBURt0TEA2VZLJzuds02ZbllRMyb7rZockTEon7bQxvXd0TsX+Zp8XS3ZTYY1D+mU0T8ZUT8OCJu7+qnC6a7XZL6M8CV1rKuL/GMiEvHSXvquhwszjYR8VRgMfBqYEvgFuAm4I5pbNaMEhELyp8ge053W6R+ZnM/jYilUxEoloB9YUQ8ZzLLncki4q+BrwI7ARtR7c9vAu6ZznZJGswAV5pez42I1093IzRpDgQ2AS4CtsrMrTNzm8w8dprbNZMsAD4IjBc4/Lq87p3qBmlGmGnrewHN+ukgq6jm6beT0aAZYH+qZbLOBLjAwWX4SWCTsj/fJjMvns5GSRpsznQ3QBIfiogzM/OB6W6IJuzpZfi1zFw5rS2Z5TLzadPdBq09bVzfmflN4JvT3Q5NSGef/qXMvG9aWyKpMY/gStPnAmA11RfoX01zWzQ5HlGGnpIsSbOf+3RpFjLAlabPjcDx5f3CiBjqjIrxbsoSEfM6aWqmLS7T9o+IzSLinyPimoi4KyKujYgPRcTGXel3j4jvlRsn3RkRF0bEixu0cb2IeF9E/KLkuzUivh0Rz2+Q740R8R8RsSwi7omIP0TE6RHxgj55Hry5Vcl/UET8LCJWlvGNT6sr+f82Ii6IiOURsSYirouIEyPiKTXpl5blvKCMOqVr/SxtWm8p63Glnt+Xeq+NiE9ExBb9blozaF13pVkwXnsi4hkR8aUyr2vKsvtRRLw9Ijbok2friPh4RFxR1vGaiPhdRFxc+tGTuttIdYojwH5dy+hhfblB/94uIr5Qls+aiFhR+uVbI2L9Pnm6+/0jSp/5den3N0fEaVFdR91v+ewREedExE0RcW/pG7+OiP8XEW/ol69PWb39ddZtJ6W8nSLiX8s2sCaqfcRlEfGPEbHDEOWMt74fGRFHRsQlEbGq1HVVRHwmIv64T56h1/ew/XSceep7k6kYu052QUQ8Kqpt/LqIuDuqbf+kiHhsk3qG0T0PEfHEUs/1pd7rIuLYiNisbj6AXcuoU3qWx9Ke9C+JiE9HxE9Lf7ynLO9zI2LvEdu9XkR8rtS3IiJ2rkkz9P6rT111+9PruuZ3UU/6jSLikDK/q0r/+nVZp9v0qeMhfSMi/jqq75tby/g9y/ju7TUi4l0R8V8RcUdE3BARX46IJ3SV+9Qy7vqyDK6IiAMGzOuTI+LzEXFlaffqiPhN2Xb+LiIe3XS5STNKZvry5WstvoBFQAKnAVtRXaeVwAE1aU8t0xbVTMvymtennnmdNDXTFpdp7wN+Vd7fQXXjjE653y5p3wk8ANzf1dYE7gZeWFP2wjL9y8A3yvt7gZVdee8D3tCn3X8E/EdX2gd66r0fOGices/sqmdFef+chutnE+B7XfXd09P2u4A9evJcQvWHRWf5rSqfbwQuGaJv/Alwc1ddd1Ad5U/gKuCQ8n5x03XdlWZBSbO0z/SDyrLt1H17WX6dz+dTXYPWnedJwB961uvyss46495e0v5xWR53dC3HG3tef9ykf1PdxOuurjQreWjf/Q9g0wH9/j3AZeX9mq5lnMCtwHY1eY/pSpPAbT1tuHHI/UB3f52N20kA/9SzTFaV5dL5vKgnz6IyfuEw+zOq7WJpV5p7u/pRlj5Xty8aen0zZD8dZxntT832WqZ15udvut7fWdrXadt1wJbD9Kuesgct5z3KvHf68r1d0y4BNujK8wb6798eso8DHtnTJ27r6ZcJfKFPu2v7B9XldP9Wpt0EPHsy9l8Dll+nD9zYlX9Z17hPd6Wd29W3Ov2rextYDuw0qG8An2Fsm11ehnv2bK+LqH4zJNV3b3f/v7a0YyfGtuOVPHQ/fHhNG57b09Z7uvJ3Xq8Ytv/58jUTXtPeAF++1rVX15f4aeVz5wvst8BGPWmnOsBdSRXgvqiM3xB4K2M/do4uX3ofBbYoaZ4EXFym/6ym7IVdZd9HFUQ/okzbDvh+mb6a+kDim2X6pcDLgI3L+C2Bo0p77qfnB21XvbeXHxnvoPygAbYGNmu4fk5g7IfK2zrrBNie6kdS54fo9gOW6/4j9IsNgP8p+a8BXlLGrwe8hirw7QQ/i5uu6640C+gT4FLdSKfzY/Rw4NFd/eHlwJXU/DAFvsRY8P1iYL0yfiPgGcCHKT/UatbTw/p0k/5d+lDnx91iYIeuOg9kLEA4ecD6WUEVPLwcWL8s4xcDvyvTv1azfDs/nj/aWT5l2lxgL+CLQ67vznKYrdvJ4V3r6LPAk7qmPZZq2zmqJ88ihgxwgc3Lukrga8CzgPXLtG0ZC3xupOyjJrq+h+mn4yyj/anZXsu0pV1t+y9g5zJ+DvBaxgKNfx6h3k7Zg5bzCuAHwDO6tp+3MLb9vHPA9tN3/0b1B+G/U+1THtU1fgvgXaXfJbBPTd6H9Q9gY+DbjH1H1u13R9p/NVyW433PfpexQHafrr45H7i8q28+uidfp2/cThWI/j1j37GbAVvX7CduB/66zFeUPnxDmX5CWe9nAdt2lfN5xv6o2aqnDeeVaT8B/rRnHc6nurHWzqP2f1++pvM17Q3w5Wtde/HwAHczxv5Jf29P2qkOcO8FnlIz/Ytd5X+pZvqTGPt3+Ik90xZ25T2qJu/GjB01Prln2v8t438FbN5nvo4oab4zoN4DR1w38xgLZN5WM30T4Ooy/V8HLNf9R6j7jYz9O79DzfQXd83f4qbruivNAmoCXKof/EvLtJf3ybsdVVB/L/DYrvG/LPlqjzL2Kauznh7Wp5v0766+eTU1R2Sogtws/fMpPdM662d1n36/F2N/bmzYNf4vyvj/HaVfjbMcZuN28ujSHxL46BD5FjF8gPuRMv6rA8rtBBmHTcb6HqafjjO/+9dtr2VaZ5u7kZ7Ao0w/tEy/doR6O2UPWs5X0POHapl+XJl+Xs20zvLcfwLLpLOfO3+8/kF1lsL5ZdyV9HzXlDQj778atrfv9ywP3Sc/rG7gMVSBbwIf6tM3Bm5DPdvrfgOWZ1LdsXtOz/T1qP6ATOBNPdM6ZzK8YNT16cvXTH15Da40zTLzNuCfy8e/i4hN12L1/56ZV9eM/8+u9//YOzEzf0MVYEB1pK7OauBTNXnXAP9SPu4VEdE1eb8yPCkzV/Up99/KcLeov9byVqoji6N4HdUPghuBk3snZuZqxtbV6/vUP6rOtWnfyMxf19R9EXDhJNbXsYDqD4srMvN7dQky8xqqf/nnMHadMVRHTKA6YjflSl/Zq3z8ZFkfvU4Gfk91hKPf9X5n9On3nSNFGwHd11p35nPziNhk6IYPNhu3k72p/uxZQXWUfip15vVfBqT5ahn+WZ/pw67vtenEzLy1ZvyZZfjkKfpO+ERm3j2g3n779Yk6qwx3GrT/jIitqI4wL6A6EvrizKx73NICRt9/TVRn/7Kkru7MvInqyCpUf5LVuR/4RIO6rge+UjO++7v62Oy503NWT2c4v3zsXadrdf8trU0GuNLMcBzVtUWPobpebG357z7jby7DNYwFsr1uKsMt+0xfkpl39pl2QRluATy5a/wuZfiBiLix7kV1fRhUP7C36lPvqI9zeG4ZXpSZ9/dJc14Zbgo0vonOEHVfMCDNoGmj6izzp/Zb5mW5d9J139DnnDL8p4j4bETsFhGPYOpsS3XKKoz9aHuI8oNucfn43Lo0jPWh3rz3Mtb3u/v1T6mOxDwW+HFEHBgRT+7NP6LZuJ3sVIbnZ+ZdI5YxrqhuHtW5gc45A+b10yVN7c2mGH59r021baP6k6Zji2mod+TlERFzorpJ37nlRkh3d920aUVJtvGAOh5H1fd3pApMF5Rgsc5E9l8T1dm/1O6Lis73xfZ9/qi4OjNvaVDXL7P+UYI3d72/ok/eft/Vnf33v0bEx6K6YVzjm3FJM5nPwZVmgMxcHREfpfqhdnhEfG7AkZnJdEOf8Z3g7qbMzHHS9PtC/H2f8b3T5lLdJAPG/klu+oOu7mjasoZ568wtw0Ftv74m/WTolPWHAWkGtWtUnWW+EdUfLOPpXub/BDyP6prBd5bXfRFxCdU1oifl5D4PuHt5N1lH/dbP7QPyrinDB/t1Zq6IiDdSXTLwLOALAOWH8/epTuMf9c+H2biddPpJ3RG1ydR9ZGnrBun7HV0fan2vZbVty8w1XQftp6Jt/ZZJZ3mM9PswIh5JdZO+XbpG30XV3zoBWqf/bArUBXedu/6uoLrJ0aDvwonsvyZqmO+LYOzU/m5Nt8Pa7+rMvL+rn4z3fd7bjw6n+pN2F+D95bUmIn5MdR31oqn8A0uaSh7BlWaOL1Dd9GRLquuv1kWdfdLrMjMavJbWlNHvyOswNh4/SWt0lvm3Gi7zhZ2MmXl3Zu4B7Ex16vZPqE757Hy+MiKePUXtXqvrKDPPoTqKeiDVzY7+AGwDvAlYHBEnrsXmzJTtZKp1/0bZssF8zpuuhupBR1MFTLdQnV7+mMzcJDO3zsxtgMd3pY26AqiOLN5O9V34uYgY9Ft15P3XJJrIvmjatsNyavyLqE7t/wzVzc42BHYDPgdc0f0IImk2McCVZohyPVTneraDGzx/rvPF2O/LdfM+49eWxzWc1v0PdudUqidOfnMa6bRlUP3dX/gTOQrWr+6my63bg6eaRtfzi3v06w8TXuaZ+ZPMfH9m7kz1o/QvqY7uzaXmWuYJ6F7eTdbRZK4fMnNVZp6UmW/IzMcDTwdOKpMPiIg/H6HY2biddOp/0lqqB6ZvXjWcfcrw3Zn5r5l5c8/0JkdZLwFeRXW086+AL/Zcg95tOreFYb4vkvqj1dMqK/+Zme/NzOdSHWV+G9UlGdtS3UlZmnUMcKWZ5RSqR8T8EdVdUAfpnPrZ7x/WHSerUSOaP+CGPLuW4Uqqx3d0/LgMXzllrRrssjJ8wYC2v7QM76S6a+Vk1/2SAWl27TO++zTgYftDZ5k/KyIe3ydNY5l5Z2aeRnWkE+B5PdeedU5T7PeDdZBrGZvX3eoSlKM9C8rHy+rSTJbM/GVmHkh15Br6r59BZuN20pnfBVN5zXVmXsdYALO253Ui/bStmiyTzv7nv/pM/79NKsrMH1I9Hu0uqjsOn9AnyJ3U/deQOvuXXQcE4J3viysHXGs/Y2Tmisw8ETiyjBplnyZNOwNcaQYpN31ZWD6+k8F3N+zcIGqP3gkRsRFw8KQ2bnibAu/tHVnadkj5eEbPNb6LyvDlEfGKQYVHxFTcFOYbVD/itmIsQOuucxOq65agutvxZJ5e9u9l+PqIeGpN3bvQJ/jNzDuoHpUB9f1hK6rnG9f5AdWp8esDHx/UwN5lHhEbDkjeuXYrqE576+jcuXPoG+eUvvKN8vG9fQLDt1KdBpmMLdMJGWc+YWxeNxqh+Nm4nZxBNc9bUj2/cyotKsPDBgUwUZnMmzGN3E9brMky6Vwv+8zeCeX63KOaVpaZ51M94/Zuqv3xp2uSjbz/mgRnlOHTqd/vPgZ4e/n4tUmue0IiYr2IGHSd9UT2adK0M8CVZp6vUj1f9BGM/ftbp/OFeUBEvLn8ICYink51DdOgUx/XhlXAhyPivZ2jPBGxLfAt4E+obmbyse4MmXkuVQATwDcj4vCIePBGQRHxqIjYMyK+TbNHKwwlq8cfda6l/Fi5W25nuW4PnE31OJHVVM/nnEynU633jajuGPuiUu965dTXbzD2A7NOpz98ICJe2/nxEhE7UT1KojZIK3eSPYgqIPzLiDgzIp7TmR4RG0TE/Ij4Zx56FBGqa7Q+GhE7doLAEmg8n+rO4ACXZOaKrjz/U4YvqgvkG/go1dHzxwFnR8QOpd6NIuIAqmvJAL5YHg8yGd4REd+LiL+KiAf/dIqILSLiSMaOGNc+pmQcs3E7uQX4h/LxiIg4PiIePE0zIh4bEYdExGQEvx+jOnL/aODiiPiL7qPGEfHEiDiQ6mjanpNQX8dE+2kbdZbJ6yOi3yUP/1GGn4iIB49sRsSOVMFo3R29+8rM71M9Guwe4N0RcWzP9InsvyYkq0e3nVs+fiki9o7y6KOIeB7VDei2pDoLoS44n06bAVdHxFER8cyudq8XEbsDx5R0o+zTpOmXM+BhvL58rUsvxh5mf9qANHsx9vD2pLqbYW+aDRi7qU9SPcR+VXl/K9U/ykk58NWTd3GZtn+f+heU6UsHtLG2DMYeTP9lqh/hSfXjZEVXW+8D9u1T7qZUd+DtpH2g5L2tZ5mc0qfehy2rIdfPJlQ/TLJP29cAewyzTIao+/9QPfahU9ftVMF0AldRHdFLYHFN3i2pTm/vbucd5f1vgL8ZtE6BN1MdKenkX1360X3dy70nz8qedXprWV6dccuAZ9X026u71u3NVEeflwJP6ErXKWNeTVs7py520qzoqfc/gU1HWT+lHUn1aJLOuIN7+t4dPX0igS8Mua47/XW2bidBdX1ed10rGdsHPawOxvZ9C2vKG7S+n0L150/3crmFsW2j89pvMtb3MP10nGW0P/2319p6my6TcertlD3Uci7T53XS1Ex7GmP7iHup7h68FPhhV5ptqbb7Tj13MbYfWg28rF8bxukfe5Y6EzimZvrQ+6+Gy3K85TWX6nTs7vnt3gaXAzsP0zeG3V4btPFhZVAdhe/edu6pWV7XNO3rvnzNtJdHcKWZ6RuMc/1gVv9c/xnVaVlLqX6E3Un1I+F5wC+mtIXjS6objhwC/C/VEcQVwHeAXbK6TvPhmaprOF8HvJpqOfyBKujs/OD8GtWPmXdPSaMzV1Nd7/dW4CKqH0qbUAWJJwPPzMxvTVHdvwSeU+q5gWqeb6QKJHak+rHUL+8KqruXnki1zNaj+sFyHNXzGq/vl7fkP4XqkRGfojpScz/Vv/y3UgUKH+Thz/3dA/hH4EelzkdS/VC6nOrI29Mz8/Keeu4Fdge+QvUDeUuqmxU9iYaPJsnMs6hOgTyJqu9vQrWefkh1KuPLc3Kvd/sq1aNLTqfqy/dSzesNwLeB12bm20Yse7ZuJ5mZ76M6bf50qnX5CKog4zKqI+3H9C9hqLquBv6U6rKN86mWz+ZUP8Yvp+rzf071GKdJMRn9tG0y81dU3znnUv2RsQ3V8nhCV5prgedTrYubqU4dXgn8G7BjVkdkR6n7TKqb190PHBkRH+yZPsr+a8IycxnVXeMPA5ZQ7Rs2pPpD8lNU+8Af9y9h2txGte/4FPAzqj8l/ojqN8QlVKeSPyczB35vSDNVZOZ0t0GS1EBE7E91I7ILMnPB9LZGExERC6l+dH85M/ef3tZIktQeHsGVJEmSJLWCAa4kSZIkqRUMcCVJkiRJrWCAK0mSJElqBW8yJUmSJElqhVbe6v7Rj350zps3b7qbIUmSJEmaApdeeuktmTm3d3wrA9x58+axZMmS6W6GJEmSJGkKRMRv6sZ7Da4kSZIkqRUMcCVJkiRJrWCAK0mSJElqBQNcSZIkSVIrGOBKkiRJklrBAFeSJEmS1AoGuJIkSZKkVjDAlSRJkiS1ggGuJEmSJKkVDHAlSZIkSa1ggCtJkiRJagUD3Gk074izh07T/bku/7wjzn7w1Z2mN1+/8f3KqEs/qD3defrNV7+y6saPN6331aSNdZ+btL1JumGWb7929NZRtz761Tte/n7ljbd8mpTXdFq/cgcZb1027QOD2to7brz2DkrT265B8zHettt0G+jXpvG2xybjeuscND918zeozYPmrUmbm+5zhtkOx1uv/cptkn9QmvE0WZfjbY/DLvvxtoUm28CgvOPtM4bdl/VrS93nQXUOmpdBfWrYfcSg+sfrI033U4PKGrQee/OPty7G2wcM2/+a9LtBZQ+a30F1DVqfg9rQb3kMSt9vOQwqa7z56ze/TeppUsd4+Zr0vyb7j7rP/fpbk8/92j7R7XGYft9kextvHzRsP6oru0lZs5UBriRJkiSpFQxwJUmSJEmt0CjAjYitIuKtEfHNiLg6Iu6KiFUR8cOI+NuIWK8n/byIyAGv0wbUtV9E/Cwi7ih1LI6IV090RiVJkiRJ7TanYbp9gM8DNwDnA78FHgO8HjgZeGVE7JOZ2ZPvF8CZNeVdUVdJRBwLHApcD5wEbAjsC5wVEe/OzOMbtleSJEmStI5pGuBeCbwWODszH+iMjIgjgZ8Be1EFu1/vyffzzFzYpIKI2IUquL0G2DEzV5TxHwcuBY6NiO9k5tKGbZYkSZIkrUManaKcmedl5lndwW0ZfyNwQvm4YIJteXsZHtMJbksdS4HPAhsBb55gHZIkSZKklpqMm0zdW4b31Ux7XES8LSKOLMNnDSjnpWV4bs207/akkSRJkiTpIZqeolwrIuYAbyof6wLTPyuv7jyLgf0y87dd4zYFHg/ckZk31JRzVRluP5H2SpIkSZLaa6JHcD8GPAM4JzO/1zV+NfBh4HnAluW1K9UNqhYAPyhBbcfmZbiqTz2d8Vv0a0hEHBgRSyJiybJly4adD0mSJEnSLDdygBsR76G6KdSvgDd2T8vMmzPz7zPzssxcWV4XAi8Dfgo8BXjrBNr9MJl5YmbOz8z5c+fOncyiJUmSJEmzwEgBbkQcBHwa+CWwW2Yub5IvM++jeqwQwEu6JnWO0G5Ovc74lUM2VZIkSZK0jhg6wI2Ig4HjqJ5lu1u5k/IwOucPP3iKcmbeCfweeGREPLYmz1PL8Moh65IkSZIkrSOGCnAj4v3AJ4GfUwW3N49Q505leG3P+PPK8BU1eV7Zk0aSJEmSpIdoHOBGxNFUN5W6FNg9M28ZkPa5EfGwsiNid+B95eOpPZM7z9M9KiK27MozD3gXcDdwStP2SpIkSZLWLY0eExQR+wEfAu4HLgLeExG9yZZm5qLy/hPAUyPiYuD6Mu5ZjD3H9ujMvLg7c2ZeHBGfAA4BLo+IM4ANgTcAjwLenZlLm8+aJEmSJGld0vQ5uE8uw/WBg/ukuQBYVN5/BXgdsCPV6cUbADcBXwOOz8yL6grIzEMj4r+pjtgeCDwAXAZ8PDO/07CtkiRJkqR1UKMANzMXAgubFpqZXwS+OEqDylHgRaPklSRJkiStu0Z+Dq4kSZIkSTOJAa4kSZIkqRUMcCVJkiRJrWCAK0mSJElqBQNcSZIkSVIrGOBKDc074uy1mm+2auP8tnGeZrI2Lu95R5w9qfPVxmW0Llmb629QXVPZDvvocGbj8pqNbda6wQBXkiRJktQKBriSJEmSpFYwwJUkSZIktYIBriRJkiSpFQxwJUmSJEmtYIArSZIkSWoFA1xJkiRJUisY4EqSJEmSWsEAV5IkSZLUCga4kiRJkqRWMMCVJEmSJLWCAa4kSZIkqRUMcCVJkiRJrWCAK0mSJElqBQNcSZIkSVIrGOBKkiRJklrBAFeSJEmS1AoGuJIkSZKkVjDAlSRJkiS1ggGuJEmSJKkVDHAlSZIkSa1ggCtJkiRJagUDXEmSJElSKxjgSpIkSZJawQBXkiRJktQKBriSJEmSpFYwwJUkSZIktYIBriRJkiSpFQxwJUmSJEmt0CjAjYitIuKtEfHNiLg6Iu6KiFUR8cOI+NuIqC0nInaJiHMiYnnJc3lEHBwR6w+o69URsbiUf0dE/DQi9ht1BiVJkiRJ64Y5DdPtA3weuAE4H/gt8Bjg9cDJwCsjYp/MzE6GiNgD+DqwBjgdWA68Bvgk8MJS5kNExEHAccCtwKnAPcDewKKIeGZmHjbCPEqSJEmS1gFNA9wrgdcCZ2fmA52REXEk8DNgL6pg9+tl/GbAScD9wILMXFLGHw2cB+wdEftm5mldZc0DjqUKhOdn5tIy/kPAJcChEfH1zPzxqDMrSZIkSWqvRqcoZ+Z5mXlWd3Bbxt8InFA+LuiatDcwFzitE9yW9GuAD5SP7+ip5i3ARsDxneC25FkBfLR8fHuT9kqSJEmS1j2TcZOpe8vwvq5xLy3Dc2vSXwisBnaJiI0a5vluTxpJkiRJkh5iQgFuRMwB3lQ+dgemO5Thlb15MvM+4Dqq06O3bZjnBuBO4AkRsclE2ixJkiRJaqeJHsH9GPAM4JzM/F7X+M3LcFWffJ3xW4yQZ/O6iRFxYEQsiYgly5YtG9xqSZIkSVLrjBzgRsR7gEOBXwFvnLQWjSgzT8zM+Zk5f+7cudPdHEmSJEnSWjZSgFse5/Np4JfAbpm5vCfJwKOtXeNXjpCn3xFeSZIkSdI6bOgANyIOpnpW7RVUwe2NNcl+XYbb1+SfAzyZ6qZU1zbM81hgU+D6zFw9bJslSZIkSe03VIAbEe8HPgn8nCq4vblP0vPK8BU1014CbAJcnJl3N8zzyp40kiRJkiQ9ROMANyKOprqp1KXA7pl5y4DkZwC3APtGxPyuMjYGPlI+fr4nzynA3cBBETGvK8+WwJHl4wlIkiRJklRjTpNEEbEf8CHgfuAi4D0R0ZtsaWYuAsjM2yLiAKpAd3FEnAYsB15L9TigM4DTuzNn5nURcTjwGWBJRJwO3APsDTwB+JfM/PEoMylJkiRJar9GAS7VNbMA6wMH90lzAbCo8yEzz4yIXYGjgL2AjYGrgUOAz2Rm9haQmcdFxFLgMKrn665HdSOrD2Tmlxu2VZIkSZK0DmoU4GbmQmDhsIVn5o+AVw2Z5yzgrGHrkiRJkiSt20Z+Dq4kSZIkSTOJAa4kSZIkqRUMcCVJkiRJrWCAK0mSJElqBQNcSZIkSVIrGOBKkiRJklrBAFeSJEmS1AoGuJIkSZKkVjDAlSRJkiS1ggGuJEmSJKkVDHAlSa8EmOEAABMuSURBVJIkSa1ggCtJkiRJagUDXEmSJElSKxjgSpIkSZJawQBXkiRJktQKBriSJEmSpFYwwJUkSZIktYIBriRJkiSpFQxwJUmSJEmtYIArSZIkSWoFA1xJkiRJUisY4EqSJEmSWsEAV5IkSZLUCga4kiRJkqRWMMCVJEmSJLWCAa4kSZIkqRUMcCVJkiRJrWCAK0mSJElqBQNcSZIkSVIrGOBKkiRJklrBAFeSJEmS1AoGuJIkSZKkVjDAlSRJkiS1ggGuJEmSJKkVDHAlSZIkSa3QOMCNiL0j4riIuCgibouIjIhT+6SdV6b3e502oJ79IuJnEXFHRKyKiMUR8epRZk6SJEmStO6YM0TaDwDPBu4Argee1iDPL4Aza8ZfUZc4Io4FDi3lnwRsCOwLnBUR787M44doryRJkiRpHTJMgPs+qsDzamBX4PwGeX6emQubFB4Ru1AFt9cAO2bmijL+48ClwLER8Z3MXDpEmyVJkiRJ64jGpyhn5vmZeVVm5hS15e1leEwnuC31LgU+C2wEvHmK6pYkSZIkzXJTfZOpx0XE2yLiyDJ81oC0Ly3Dc2umfbcnjSRJkiRJDzHMKcqj+LPyelBELAb2y8zfdo3bFHg8cEdm3lBTzlVluP0UtVOSJEmSNMtN1RHc1cCHgecBW5ZX57rdBcAPSlDbsXkZrupTXmf8Fv0qjIgDI2JJRCxZtmzZBJouSZIkSZqNpiTAzcybM/PvM/OyzFxZXhcCLwN+CjwFeOsk13liZs7PzPlz586dzKIlSZIkSbPAVF+D+xCZeR9wcvn4kq5JnSO0m1OvM37lVLRLkiRJkjT7rdUAt+icP/zgKcqZeSfwe+CREfHYmjxPLcMrp7htkiRJkqRZajoC3J3K8Nqe8eeV4Stq8ryyJ40kSZIkSQ8xJQFuRDw3Ih5WdkTsDryvfDy1Z/IJZXhURGzZlWce8C7gbuCUSW+sJEmSJKkVGj8mKCL2BPYsH7cpw50jYlF5f0tmHlbefwJ4akRcDFxfxj2LsefYHp2ZF3eXn5kXR8QngEOAyyPiDGBD4A3Ao4B3Z+bSpu2VJEmSJK1bhnkO7nOA/XrGbVteAL8BOgHuV4DXATtSnV68AXAT8DXg+My8qK6CzDw0Iv6b6ojtgcADwGXAxzPzO0O0VZIkSZK0jmkc4GbmQmBhw7RfBL44SoMycxGwaJS8kiRJkqR113TcZEqSJEmSpEk3zCnKmiTzjjibpR/784d8rtMvTW/6Tnl14+vq7n3f255hyhjvc/f7ThubzHuTcvu1uS5NXZvGa0f3cq0rp1/a8coc5vMgvWkHzc+gdvd73zvPg5Z3d19q0t7uMpv0l1H6wnjrebxlMqhPDGrHoP4yXlv76W1Tv+XSr8669xPp2/3mZ9h+0K+cfu0eps3D6rc/HGWbbFLGMN8D420H/ZZLk7aPsk8ab/12t3W8bWTYuobZl42yPU7WNtu0/U2Xb++ya/qd1a/c8b736+pqup8aNG3YfcMw0+vS987HeOt7InV11zlMeeNt171p6r4Hht1H1+XvLbtpHePN06h9vC5dv2nj1THZ5Yy6jfQzzDqsa1uTZQfNfsd1vx/vd/ZM5hFcSZIkSVIrGOBKkiRJklrBAFeSJEmS1AoGuJIkSZKkVjDAlSRJkiS1ggGuJEmSJKkVDHAlSZIkSa1ggCtJkiRJagUDXEmSJElSKxjgSpIkSZJawQBXkiRJktQKBriSJEmSpFYwwJUkSZIktYIBriRJkiSpFQxwJUmSJEmtYIArSZIkSWoFA1xJkiRJUisY4EqSJEmSWsEAV5IkSZLUCga4kiRJkqRWMMCVJEmSJLWCAa4kSZIkqRUMcCVJkiRJrWCAK0mSJElqBQNcSZIkSVIrGOBKkiRJklrBAFeSJEmS1AoGuJIkSZKkVjDAlSRJkiS1ggGuJEmSJKkVDHAlSZIkSa1ggCtJkiRJagUDXEmSJElSKzQKcCNi74g4LiIuiojbIiIj4tRx8uwSEedExPKIuCsiLo+IgyNi/QF5Xh0RiyNiVUTcERE/jYj9hp0pSZIkSdK6Z07DdB8Ang3cAVwPPG1Q4ojYA/g6sAY4HVgOvAb4JPBCYJ+aPAcBxwG3AqcC9wB7A4si4pmZeVjDtkqSJEmS1kFNT1F+H7A9sBnwjkEJI2Iz4CTgfmBBZv5tZh4OPAf4MbB3ROzbk2cecCxVIDw/M9+Vme8DngVcAxwaETs3nSlJkiRJ0rqnUYCbmedn5lWZmQ2S7w3MBU7LzCVdZayhOhIMDw+S3wJsBByfmUu78qwAPlo+vr1JWyVJkiRJ66apuMnUS8vw3JppFwKrgV0iYqOGeb7bk0aSJEmSpIeZigB3hzK8sndCZt4HXEd17e+2DfPcANwJPCEiNpncpkqSJEmS2mIqAtzNy3BVn+md8VuMkGfzPtOJiAMjYklELFm2bFmjhkqSJEmS2qM1z8HNzBMzc35mzp87d+50N0eSJEmStJZNRYA73tHWzviVI+Tpd4RXkiRJkrSOm4oA99dluH3vhIiYAzwZuA+4tmGexwKbAtdn5urJbaokSZIkqS2mIsA9rwxfUTPtJcAmwMWZeXfDPK/sSSNJkiRJ0sNMRYB7BnALsG9EzO+MjIiNgY+Uj5/vyXMKcDdwUETM68qzJXBk+XjCFLRVkiRJktQSc5okiog9gT3Lx23KcOeIWFTe35KZhwFk5m0RcQBVoLs4Ik4DlgOvpXoc0BnA6d3lZ+Z1EXE48BlgSUScDtwD7A08AfiXzPzxaLMoSZIkSVoXNApwgecA+/WM25axZ9n+BjisMyEzz4yIXYGjgL2AjYGrgUOAz2Rm9laQmcdFxNJSzpuoji7/EvhAZn656QxJkiRJktZNjQLczFwILBym4Mz8EfCqIfOcBZw1TB5JkiRJkqBFz8GVJEmSJK3bDHAlSZIkSa1ggCtJkiRJagUDXEmSJElSKxjgSpIkSZJawQBXkiRJktQKBriSJEmSpFYwwJUkSZIktYIBriRJkiSpFQxwJUmSJEmtYIArSZIkSWoFA1xJkiRJUisY4EqSJEmSWsEAV5IkSZLUCga4kiRJkqRWMMCVJEmSJLWCAa4kSZIkqRUMcCVJkiRJrWCAK0mSJElqBQNcSZIkSVIrGOBKkiRJklrBAFeSJEmS1AoGuJIkSZKkVjDAlSRJkiS1ggGuJEmSJKkVDHAlSZIkSa1ggCtJkiRJagUDXEmSJElSKxjgSpIkSZJawQBXkiRJktQKBriSJEmSpFYwwJUkSZIktYIBriRJkiSpFQxwJUmSJEmtYIArSZIkSWqFKQ1wI2JpRGSf14198uwSEedExPKIuCsiLo+IgyNi/alsqyRJkiRpdpuzFupYBXyqZvwdvSMiYg/g68Aa4HRgOfAa4JPAC4F9pq6ZkiRJkqTZbG0EuCszc+F4iSJiM+Ak4H5gQWYuKeOPBs4D9o6IfTPztKlsrCRJkiRpdppJ1+DuDcwFTusEtwCZuQb4QPn4julomCRJkiRp5lsbR3A3ioi/AZ4I3AlcDlyYmff3pHtpGZ5bU8aFwGpgl4jYKDPvnrLWSpIkSZJmpbUR4G4DfKVn3HUR8ebMvKBr3A5leGVvAZl5X0RcBzwd2Bb43ylpqSRJkiRp1prqU5RPAXanCnI3BZ4JfAGYB3w3Ip7dlXbzMlzVp6zO+C3qJkbEgRGxJCKWLFu2bKLtliRJkiTNMlMa4GbmP2TmeZl5U2auzswrMvPtwCeARwALJ7GuEzNzfmbOnzt37mQVK0mSJEmaJabrJlMnlOFLusZ1jtBuTr3O+JVT0iJJkiRJ0qw2XQFu5xziTbvG/boMt+9NHBFzgCcD9wHXTm3TJEmSJEmz0XQFuDuVYXewel4ZvqIm/UuATYCLvYOyJEmSJKnOlAW4EfEnEbFpzfh5wPHl46ldk84AbgH2jYj5Xek3Bj5SPn5+ShorSZIkSZr1pvIxQW8ADo2IC4HfALcD2wF/DmwMnAMc20mcmbdFxAFUge7iiDgNWA68luoRQmcAp09heyVJkiRJs9hUBrjnUwWmfwq8kOp625XAD6mei/uVzMzuDJl5ZkTsChwF7EUVCF8NHAJ8pje9JEmSJEkdUxbgZuYFwAUj5PsR8KrJb5EkSZIkqc2m6yZTkiRJkiRNKgNcSZIkSVIrGOBKkiRJklrBAFeSJEmS1AoGuJIkSZKkVjDAlSRJkiS1ggGuJEmSJKkVDHAlSZIkSa1ggCtJkiRJagUDXEmSJElSKxjgSpIkSZJawQBXkiRJktQKBriSJEmSpFYwwJUkSZIktYIBrqRG5h1x9oyob223Yyq1aV5ms971MNXrxfWuptZGX5mKOpqWOR3bWhu3vzbOkzQRBriSJEmSpFYwwJUkSZIktYIBriRJkiSpFQxwJUmSJEmtYIArSZIkSWoFA1xJkiRJUisY4EqSJEmSWsEAV5IkSZLUCga4kiRJkqRWMMCVJEmSJLWCAa4kSZIkqRUMcCVJkiRJrWCAK0mSJElqBQNcSZIkSVIrGOBKkiRJklrBAFeSJEmS1AoGuJIkSZKkVjDAlSRJkiS1ggGuJEmSJKkVZlyAGxFPiIgvRcQfIuLuiFgaEZ+KiC2nu22SJEmSpJlrznQ3oFtEbAdcDGwNfAv4FfB84L3AKyLihZl56zQ2UZIkSZI0Q820I7ifowpu35OZe2bmEZn5UuCTwA7AMdPaOkmSJEnSjDVjAtxy9PZlwFLgsz2TPwjcCbwxIjZdy02TJEmSJM0CMybABXYrw+9n5gPdEzLzduBHwCbATmu7YZIkSZKkmW8mBbg7lOGVfaZfVYbbr4W2SJIkSZJmmZkU4G5ehqv6TO+M32IttEWSJEmSNMtEZk53GwCIiBOBA4ADMvPkmunHAEcCR2bmP9ZMPxA4sHzcAfj1FDZXkiRJkjR9npSZc3tHzqTHBHWO0G7eZ3pn/Mq6iZl5InDiZDdKkiRJkjQ7zKRTlDtHXPtdY/vUMux3ja4kSZIkaR02k05R3g64muoxQdt130k5Iv4IuAEIYOvMvHNaGilJkiRJmrFmzBHczLwG+D4wD3hXz+R/ADYFvmJwK0ma6SJiXkRkRCzqGb+ojJ83LQ0bQUQsjoih/g0v87h4ipokSVJfM+kaXIB3AhcDn4mI3YH/BV5A9YzcK4GjprFtkiTNWiVIvSAzF0x3WyRJmioz5gguPHgUdz6wiCqwPRTYDvg0sFNm3jp9rZMkacL+DvgT4PfT3ZAhvImqzZIkzXgz7Qgumfk74M3T3Q5JkiZbZt5AdU+JWSMzfzvdbZAkqakZdQRXkqSZLiKeHxGnR8TvI+LuiLghIr4fEX/RIG/fa3Aj4gURcUZE3BgR90TE7yLiCxHxuJq0i0s5cyLiyIi4qrTldxHxTxGxYVfa/buuod215Ou8FjZoc+01uBGxYUQcHRHXlLqvi4iPRMRG45UpSdJUmXFHcCVJmqki4gDg88D9wLeBq4CtqS6veSfwtRHLfQvVs9zvLuX+jurxeG8FXhMRO/U5kvpV4MXAd4HbgFcB/19pU+dsqJ9T3azxg8BvqC4D6lg8YnuDal73AK4Bjgc2BN4CPHOUMiVJmgwGuJIkNRAR/wf4HFUg+eLM/J+e6U8YsdztgROoHpO3a2b+vmva7lRPGPg08Lqa7NsBT8/M5SX9UcAvgDdFxN9l5o2Z+XPg5xHxQWBpZi4cpZ09/pIquP0JsFtmrin1fxC4ZBLKlyRpJJ6iLElSM++g+mP4w73BLUBmXj+BcjcA3tsd3JYyf0B1RPc15Znwvd7fCW5L+juBf6P6fp8/Ynua6BwdPrIT3Jb6lwMfnsJ6JUkayCO4kiQ1s1MZfneSy925DHeNiB1rpm8NrA9sD1zaM21JTfrfleGWk9O8Ws8FHgB+WDNt8RTWK0nSQAa4kiQ1s0UZTvYjfrYqw8PHSffI3hGZubIm3X1luP5EGjWOzYHlmXlvzbQbp7BeSZIGMsCVJKmZTjD5eOBXk1juqjLcPDNvm8Ryp9Iq4FERsUFNkLvNdDRIkiTwGlxJkpr6SRm+corKffEkl9vrASbvqO5lVL8hXlQzbcEk1SFJ0tAMcCVJaubzVKf/Hl3uqPwQo95FmeoRO/cCnyx3VO4td8OImIzg91bgjyehHIBTyvCYiNi4MzIiHgV8YJLqkCRpaJ6iLElSA5n5y4h4J9Ujff4rIr5F9RzcrYAdqR4ftNsI5f6qPAf3S8D/RMS5wJVUd1Z+ItWR3WXA0yY4Cz8A9o2Is6iOwN4LXJiZF45Q1v8D3gC8FriiLIsNgL2pHhO03QTbKknSSAxwJUlqKDNPiogrgMOoTsXdE7gFuBw4eQLlnhoRvwAOpQqSXwbcCfwBOAM4fWItB+C9QAK7A6+iOovrH4ChA9zMzIjYBzgC2B84CLiB6sjuh4A1/XNLkjR1IjOnuw2SJEmSJE2Y1+BKkiRJklrBAFeSJEmS1AoGuJIkSZKkVjDAlSRJkiS1ggGuJEmSJKkVDHAlSZIkSa1ggCtJkiRJagUDXEmSJElSKxjgSpIkSZJawQBXkiRJktQK/z9gAWzXBTh1vgAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 1152x360 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "visualizeAnswerDataDf(answerDataDf)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{259: 1014, 140: 881, 27: 46, 236: 42}"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "showValueFrequencies(answerDataDf)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Process tag Treatment (treatments.csv)\n",
    "\n",
    "Uses Intake functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "QUERYTREATMENT = \"./Treatment\"\n",
    "QUERYTREATMENTSTEP = \".//TreatmentStep\"\n",
    "OUTFILENAME = \"treatments.csv.gz\"\n",
    "TITLESTRING = \"Number of questions per client in treatment steps\"\n",
    "TREATMENTTITLE = \"treatmentTitle\"\n",
    "PLOTFILENAME = \"Treatment.png\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "def getAnswerDataListTreatments(querySection=QUERYINTAKE):\n",
    "    inFileNames = sorted(os.listdir(DATADIR))\n",
    "    answerDataList = []\n",
    "    for inFileName in inFileNames:\n",
    "        if re.search(FILEPATTERN,inFileName):\n",
    "            root = readGzippedXmlFile(DATADIR+inFileName)\n",
    "            for section in root.findall(querySection):\n",
    "                for treatmentStep in section.findall(QUERYTREATMENTSTEP):\n",
    "                    titleList = treatmentStep.findall(\"./\"+TITLECAPS)\n",
    "                    if not titleList is None and len(titleList) > 0 and not titleList[0].text is None:\n",
    "                        treatmentTitle = titleList[0].text.strip()\n",
    "                    else: treatmentTitle = \"\"\n",
    "                    for question in treatmentStep.findall(QUERYQUESTION):\n",
    "                        answerDict = {CLIENT:fileNameToId(inFileName),TREATMENTTITLE:treatmentTitle}\n",
    "                        try:\n",
    "                            answerDict[TITLE] = list(question.findall(QUERYTITLE))[0].text.strip()\n",
    "                        except: pass\n",
    "                        try:\n",
    "                            answerDict[QUESTIONNUMBER] = list(question.findall(QUERYQUESTIONNUMBER))[0].text.strip()\n",
    "                        except: pass\n",
    "                        for answer in question.findall(QUERYANSWER):\n",
    "                            answerDictCopy = dict(answerDict)\n",
    "                            answerDictCopy[ANSWERID] = answer.attrib[ID]\n",
    "                            try:\n",
    "                                answerDictCopy[ANSWERTEXT] = list(answer.findall(QUERYANSWERTEXT))[0].text.strip()\n",
    "                            except: pass\n",
    "                            try:\n",
    "                                answerDictCopy[ANSWERTITLE] = list(answer.findall(QUERYTITLE))[0].text.strip()\n",
    "                            except: pass\n",
    "                            answerDataList.append(answerDictCopy)\n",
    "    return(answerDataList)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "answerDataList = getAnswerDataListTreatments(querySection=QUERYTREATMENT)\n",
    "answerTextFreqs = countAnswerTexts(answerDataList)\n",
    "answerDataList = removeRareDataValues(answerDataList,answerTextFreqs)\n",
    "answerDataDf = answerDataListToDf(answerDataList)\n",
    "saveAnswerDataDf(answerDataDf,outFileName=OUTFILENAME)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA7gAAAFOCAYAAACsbbF2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAgAElEQVR4nO3debwcVZ338e+XLRDQABpGFOUqI4zr42gQRYGoM+4CCjPqjEr0AdxQUOAxgg5xQ1QUFxwVUKKjDiCCwgRFRwyoqBBQUVEWISoCsoQEISRsv+ePc4pb6XT3rd5ud9f9vF+vflV31TlVp06dqq5frY4IAQAAAAAw7jYYdgEAAAAAAOgHAlwAAAAAQC0Q4AIAAAAAaoEAFwAAAABQCwS4AAAAAIBaIMAFAAAAANQCAS4wg9leZDtsLx52WYbB9s62z7Z9i+37c10sGna5xk2ut7A9MeyyoD9sL261PtRxedtekOdp6bDLAgDoDQEu0EZpJy9sXzJF2q/O5GBx3Nh+rKSlkl4qaStJt0j6q6Q7hliskWJ7fj4IsvewywK0Ms7tNJd7ke0th12WbtjespiHYZdlug2r3dk+JE93YjqnC4wTAlyguqfafsWwC4G+OVDSbEk/kvSQiNgmIh4WEccOuVyjZL6koyRNtQN3Rf7cM+gCYSSM2vKer2rttJ1VSvP0p34UqANH5c9YBrhK5S7mYaaZr97bXTcOydOdmObpAmNjo2EXABgz77f9rYi4f9gFQc+ekLunRcTKoZZkzEXEPwy7DJg+dVzeEXGmpDOHXQ4AQO84gwtUc76k1UpB0b8NuSzoj81yl0uSAQAAaoIAF6jmRknH5++LbHd09cNUD2WxPVGkaTJsaR62wPaDbX/U9h9s32X7Gtvvt71pKf3zbJ+bH5x0p+0LbO9WoYwb2H6H7V/lfLfaPsv20yvke63t79u+2fbdtq+3fartXVrkeeDhVjn/QbYvsr0y93/KVOVtmP7/tX2+7RW219i+1vYJtv++SfrluZ7n514nl5bP8qrTzeN6eJ7OX/J0r7H9iXxfWtOH1rRb1qU086cqj+0n2v5Sntc1ue5+YvtNtjdukWcb2x+z/Zu8jNfY/rPtC3M72r5cRk1edrhfqY7Wa8sV2vcOtr+Q62eN7dtyu9zf9oYt8pTb/Wa5zVyR2/1Ntk9xuo+6Vf3sZfsc23+1fU9uG1fY/m/br2yVr8W4Gtvr2K0neXzPsP2VvA6scdpGXGr7w7Z36mA8Uy3vLWwfYfti26vytK6y/Wnbj2yRp+Pl3Wk7nWKeWj5kKtdX5PVya6d1/Frba53W/RNtb1tlOqVxLva624BrG8q9uDFtrpNZto+0fZntv+X+WzaMe8L2Z3L9rc7pLrH9LtubtyjPdrYPs/3dvKxW277d9i9sv69xGjnPUknXln431v2ictrS8h3I/9g4trs8vj1sn277Oqftwqpc7m/ZfqPtDXK6RXm62+esP2yY5tIB1MlWto/z5Lb7Oqf/vJbtver8AAMVEXz48GnxkbRYUkg6RdJDlO7TCkkHNEn71TxscZNhkT8TLaYzUaRpMmxpHvYOSb/P3++QdHdpvGfltG+RdL+k+0plDUlrJT2rybgX5eFflnRG/n6PpJWlvPdKemWLcj9I0vdLae9vmO59kg6aYrrfKk3ntvz9KRWXz2xJ55amd3dD2e+StFdDnouVDlgU9bcq/75R0sUdtI3HSbqpNK07lM7yh6SrJL0zf19adVmX0szPaZa3GH5Qrtti2n/L9Vf8/qGk2Q15tpd0fcNyXZGXWdHvTTntI3N93FGqxxsbPo+s0r6VHuJ1VynNSq3bdr8vafM27f7tki7N39eU6jgk3SpphyZ5P1RKE5JubyjDjR1uB8rtdRzXE0v6SEOdrMr1Uvxe3JBnce6/qJPtmdJ6sbyU5p5SO4rc5pptizpe3uqwnU5RRwvUZH3Nw4r5eU3p+525fEXZrpW0VQdt6lO5fEX+mxvK/akmy+IYST/X+tu6LUtpX6F12/qdWnd9u0zS3zUpz+mlNGtzXZe3MVdL2q4hzxm53A+sVw2fw5os377/j415uzuwNJ1ied3R0G/TnPawPO5iuaxomOYZfa6TQ/Nyj1wf5bw3SXpcL/PDh88gP0MvAB8+o/xRKcDNvxfl33+SNKsh7aAD3JVKOwbPzv03kbR//tMKSe9V2lk4WnmHRymouTAPv6jJuBeVxn2v0s7HZnnYDpK+V/pzaxZInJmHXyLp+aU/4q0kHZnLc1/jn2hpun9T2pl4s3JAJmkbSQ+uuHw+r8kdkjcWy0TSjkpBXvEHu2Obel3QRbvYWNJvc/4/SNo9999A0svyn3+x87m06rIupZmf0yxvMmxvTQZth0t6aKk9vEDSlXn4FxryfSn3v0rSbpI2yP1nSXqipA9I2rvFclqvTVdp37kNFTs3SyXtVJrmgZoMEE5qs3xuUwoeXiBpw1zHu0n6cx5+WpP6LXYAjy7qJw+bK2kfSV/scHkX9TCu68nhpWX0WUnbl4Ztq7TuHNmQZ7E6DHAlzcnLKiSdJunJkjbMwx4j6WuaDIS2bMjb1fLupJ1OUUcL1GR9zcOWl8r2C0nPzP03krSnJg84fLSL6bb9b2hYFn/L03qlpE3ysO0lbZy/75zb0j2SPijpEbn/hpKeqXRwLySd22QaH5D0NkmP1eS2YWNJe0i6KOdb0iTfRDEPU8xnsXwH8T82lu1O6QDt3/I4vqh1DxpuLemFkr5eLOsm7XF+m3H3o05WKr1Z4KWlNrGHpGvy8N8Uba+X+eHDZxCfoReAD59R/mj9APfBSkdzQ9LBDWkHHeDeI+nvmwz/Ymn8X2oyfHtNnqV7VMOwRaW8RzbJu6kmj7af1DDsn3L/30ua02K+FuY0/9Nmugd2uWwmNBnIvLHJ8NmaPPr8lTb1uqCLab9Wk2cUdmoyfLfS/C2tuqxLaearSYCrtOO1PA97QYu8OygF9fdI2rbU//Kcr+lZxhbjKpbTem26Svsutc2r1XBGOQ8vjvbf39i2S8tndYt2v48mD25sUur/r7n/77ppV1PUwziuJw/N7SEkHd1BvsXqPMD9YO7/9Tbj/U5Oc1hD/66WdyftdIr5XdBsfc3DinXuRqUnrjcOPzQPv6aL6bb9b2hYFiHp+W3S/Vgttod5+NaavIpjXgdl3FrpoN39TZb5RFG2KcZRLN9B/I+NZbuT9PSc/w7l4LNivqI9zm+Tph91cr/ygYiG4Tsp/feFpNf0Oj98+Aziw3XwQAci4nZJH80/393qfqYB+UZEXN2k//+Wvn+4cWBE/FEpwJDSmbpmVkv6ZJO8ayR9PP/cx7ZLg/fL3RMjYlWL8X4td5/j5vda3qp0ZrEbL1c6yn6jpJMaB0bEak0uq1e0mH639s3dMyLiiibT/pGkC/o4vcJ8pR2930TEuc0SRMQfJP1M6ezS/NKg23O3o3sFu5Xbyj7553F5eTQ6SdJflC6h3bfJcEk6vUW7P0tpZ2qWpPK91sV8zrE9u+OCtzeO68m+Sgd7blM6SzdIxbx+vE2ar+fuP7cY3unynk4nRMStTfp/K3cfPeD/hMsi4nvNBtjeQdKzlM66fbFZmohYoRTUSK3rv1W+C5XW0107KXATg/gfG9d2V2yrNla6Baqf+lEnP4qIHzf2zP95p+ef5e32IOcH6AivCQI69xmlSxT/Tum+nfX+jAfk1y3635S7azS5A9Dor0qXnm3VYviyiLizxbDzc3dLSY9WujxJmtzReY/tw1vkLcxW+sO7qaH/soi4d4q8rTw1d38UEfe1SHNe7m6udNT58i6n1Wra57dJc76k3fs0vUJR54+1fWObdHNyt/wQkXMk7SLpI/mhKadL+llE3NXnMhYeUyrHD5sliIj784NR/l2Tddro4hZ577F9k9J6WG7XP1e6t2xbST+1/VlJ34+Iazueg/WN43ryjNz94QCXtfIDa7bLP89x64eobZK7TR9wo86X93RqWjalgzSFLZXOmA/CT9sMK9rZFpKuW/cYyzq2yN316t/pQWlvyuPaTmm72ejhlUraWl//x8a83V2VP49V2lYdr3QA4oqIaDUfU+pjnSxtM5nzld4oUd5uD2R+gG4Q4AIdiojVto9WekjI4bb/s82ZmX66oUX/Irj7a5s/kSJN06frat0dtHbD5mpyx704E7je0zVbaHY27eaKeZuZm7vtyn5dk/T9UIzr+jZp2pWrW0Wdz1La4ZpKuc4/IulpSvcMviV/7rV9sdI9oidGf98HXK7vKsuo1fL5W5u8a3L3gXYdEbfZfq3SLQNPlvQFScoHBL6ndPljuwMT7YzjelK0kz/1MI4qylcGbFMhfauz6x0t72nWtGwRsaYUUA6ybO3aQVH/G6nzbYNsH6Z0xUsxI/cpnfW/O/+eo3Qpfq9nqPv9Pza27S4i7rP9b0pXADxG0ifyZ4Xt8yT9l6SzuwgO+1UnVbZ3D2y3Bzg/QMe4RBnozheUHj6xldL9VzNRsf14eUS4wmd5k3G0OvPaiU2nTlIbRZ1/u2KdLyoyRsTaiNhL6WEzH1W6jDlKv6+0/X8GVO5pXUYRcY7SWdQDlR6wcr2kh0l6naSltk+YxuKMynoyaOX9ia0qzOfEsAo6xtq1g6L+f1WxnS0oMtp+gtIBMCu9Du8JSg/s2zoiHhYRD9PkJaktTw0PyVi3u4hYpnTG8zWSvqJ0YGxrpUt/vy1pSRe31wytTgY0P0DHCHCBLkTEWk3ez3aI7YdOkaXYMWm1oz+nRf/p0u6ys/Kw8hmEv+buo/pfnEqKsrSb/nal772cBWs17ar1VvbApabl9z42aNUeeq7ziPhZRLwrIp6pdIDm1Upn9+aqyb3MPSjXd5Vl1M/lo4hYFREnRsQrI+IRSjvtJ+bBB9h+SRejHcf1pJj+9tM0HWl48zqTFfXf6nLTdvZR2h88NyLeFhGXN7nto8pZ4WEY+3YXEXdFxNciYr+I2EHp7OeHlQ5AvkjpsvFO9KtOqmzv1ttuD2B+gI4R4ALdO1npFTEPUnoKajvFpZ/btRi+c78K1aV5bR7Is0furlR67UChuB/sRQMrVXuX5u4ubcr+3Ny9U9J6D4Pqw7Tb3WO7R4v+5cuAO20PRZ0/2fYj2ky7koi4MyJOUTrTKUlPa3hIzv25281Zm2s0Oa/PaZbA9gaafBDWpc3S9EveaT9Q6cy11Hr5tDOO60kxv/NtbzaoieR7nIsd6+me117a6bAVl2v2WvainW1te5cO8xbboV80G5i3Cc9oNkyTdV88WG5a1bHdRcS1EXGEpFNzr8ZtVdvp9rFO2m0ji2FTbrcrzA/QdwS4QJciPfRlUf75FrV/Om3xYI29GgfYniXpkL4WrnObSzq4sWcu2zvzz9Mb7p1ZnLsvsP3CdiO3PYiHwpyh9Ef/EE0GaOVpzlZ6/6eUnnbcz8s8v5G7r8gPbGqc9q5qEfxGxB1Kr3mQmreHhyi9F7KZHyhdGr+hpI+1K2BjndvepFVaScXDh6zJB49Ik0/FrHr/6ANyWzkj/zy4RWC4v6RHKO3kf6PJ8I5NMZ/S5LzO6mL047ienK40z1tJ+o8BjL9sce4e1u4AjJOO21QbXbfTEdCXskfE7zV5MOOjtlveL2p7s9xmC8UzJJ7UIsuRSgdym7m99H1Y9b84d8eq3fWwraoy3cW520ud7JH/yxrzPFaTT0/+Rqn/ILe9QEcIcIHefF3pybybafJsYTOn5e4Btl9f7Fzke5/OUe9PpuzVKkkfsH1wcZbH9mOU7pl5nNJDNo4pZ4iI7yoFMJZ0pu3DbT/wwAnbW9ve2/ZZSg+a6KtIr40o7qU8xvaBpXrdUdISpdc6rFZ6J2A/naq03GcpPaXy2Xm6G+RLX8/Qujt+jYr28B7be9reKOd/htLrMpruKETEPZIOUgoIX237W7afUgy3vbHtebY/qnXPIkrSb2wfbXvnYkck79w8XenJ4JJ0cUTcVsrz29x9drNAvoKjlc6eP1zp3qud8nRn2T5A0qdzui9Ger1RP7zZ9rm2/832AwedbG9p+whNnjFu+pqlKYzjenKLpPflnwttH2/7gcsWbW9r+522+xH8HqN05v6hki60/a/ls8a2H2X7QKWzPnv3YXqFXtvpMBVlf10f7k18u9L7SXeX9APbz85XScj2hraflJfzNVr3gOz3c/cltt9dHIyyPdf2xyS9W+lVVeuJ9GC64mF7r++x/N0a13b3Yts/tX2A7QduIbA9O28f/z33atxWFdN9tVvf5tKPOrld0hm2X2yns/O2d1N6MvKsXI7TSum7nR+g/2IEXsbLh8+ofpSOgoakU9qkKV4EX3wWN0mzsSYf6lO87H5V/n6r0pm8UD7x1ZB3aR62oMX05+fhy9uUsek4NPmi+i8r7YSH0lMzbyuV9V5Jr2ox3s2VnsBbpL0/5729oU5ObjHd9eqqw+UzW+nJuNGi7Gsk7dVJnXQw7ccrvdqimNbflILpUHpVwjvz96VN8m6ldHl7uZx35O9/VHpAR8tlqrQjubaUf3VuR/eW670hz8qGZXprrq+i382Sntyk3V5dWrY3KZ19Xi5pu1K6YhwTTcr6MqUj90Wa2xqm+7+SNu9m+eRyhKT5pX6HNLS9OxraREj6QofLumiv47qeWNJxDdNaqclt0HrT0OS2b1GT8bVb3n+vdPCnXC+3aHLdKD779WN5d9JOp6ijBWq9vjadbtU6mWK6ry/lvUtp/V8u6dgqy6LJ+F6kddf1Nbn+y+tcSNq+Id83G9rnitwNpXvz27WH95XyFleoLJd0SNXlqx7+x8a13SkFluWyrW6o91A6ULtRQ77nloavVbqqZ7ka9lP6UCeHluZvtdJ/XJHnJkmP78f88OEziA9ncIHenaEp7kOJdObtn5UuK12utMG/U2mn4WmSfjXQEk4tJP2LUlD2O6UziLdJ+h9Ju0a6T3P9TOkezpdLeqlSPVyvFHQWf/ynKe3AvW0ghY5YrbRDt7+kHyn9oc5W2kk8SdKTIuLbA5r25ZKekqdzg9I836gUSOys9MfeKu9tSu+aPEGpzjZQCjg/o/Reweta5c35T1Z6r+8nlY6i3yfpwXkcSyUdlYeX7aX0oI+f5GluobTTe5nS0f4nRMRlDdO5R9LzlF7v8BelwHz7/Kn0mrmIOFvp0scTldr+bKXl9GOlS8tfEK3fLduNr0s6QOks+++UDiZtobSMzpK0Z0S8sctxj+t6EhHxDqUze6cqLcvNlHaOL1U60/6hPk3rakn/qHTbxg+V6meO0s71ZUpt/iVKr3Hqi36002HJ6/IBki5SqqNHKpV7qgcXthrfdyTtqHTVyqVKy3hLpYMpFyqt60+LdAVM2SuVniVRrDNW2lbsFxGtbpkovF/Su5SWrzVZ99N2yfKYtrvzJL1W6cDZr5W2iw9S2o5/X+mp7y+LhndgR8R5kl6u9C7au5Ru89he6Unx5XS91smtkp6u9D/zV6Xt3fVK2/Kn5P/AnucHGARHxLDLAAC1Y3uB0oPIzo+I+cMtDXphe5HSQYMvR+n1KgBQN7aXKj0I6vURsXi4pQG6wxlcAAAAAEAtEOACAAAAAGqBABcAAAAAUAsEuAAAAACAWuAhUwAAAACAWhjpx+d366EPfWhMTEwMuxgAAAAAgAG45JJLbomIuY39axngTkxMaNmyZcMuBgAAAABgAGw3vtNbEvfgAgAAAABqggAXAAAAAFALBLgAAAAAgFogwAUAAAAA1AIBLgAAAACgFghwAQAAAAC1QIALAAAAAKgFAlwAAAAAQC0Q4AIAAAAAaoEAFwAAAABQCwS4AAAAAIBaIMAFAAzdxMIlwy7CtBmVeR2VcmCwWi1nlv/6qJN6YrnOPAS4AAAAAIBaIMAFAAAAANQCAS4AAAAAoBYIcAEAAAAAtUCACwAAAACoBQJcAAAAAEAtEOACAAAAAGqBABcAAAAAUAsEuAAAAACAWiDABQAAAADUAgEuAAAAAKAWCHABAAAAALVAgAsAAAAAqAUCXAAAAABALRDgAgAAAABqgQAXAAAAAFALBLgAAAAAgFogwAXG3MTCJcMuAgC0xDYKADCdCHABAAAAALVAgAsAAAAAqAUCXAAAGnBZbe+ow9HXzTJiuQ4G9dpf1OfMRoALAAAAAKgFAlwAAAAAQC0Q4AIAAAAAaoEAFwAAAABQCwS4AAAAAIBaIMAFAAAAANQCAS4wjXhsPQAAADA4BLgAAAAAgFogwAUAAAAA1AIBLgAAmDbcqgEAGCQCXAAAAABALRDgAgAAAABqgQAXAABMu2Feqjwql0mPSjn6qZN5quP8zwQsN4w6AlwAAAAAQC0Q4AIAAAAAaoEAFwAAAABQCwS4AAAAAIBaIMAFAHSMh4wAADD6ZuL/NQEuAAAAAKAWCHABAAAAALVAgAsAAAAAqIWOA1zbz7N9pu0bba+1fb3tc22/uEnaXW2fY3uF7btsX2b7ENsbthn/S20vtb3K9h22f257v07LCQAAAACYWToKcG1/VNL/Spon6SxJH5e0RNJcSfMb0u4l6QJJu0s6U9LxkjaRdJykU1qM/yBJZ0t6oqSvSjpR0sMlLbZ9bCdlBQAAAADMLBtVTWj7AEmHS/qypAMj4u6G4RuXvj9YKTi9T9L8iFiW+79X0nmS9rX9qog4pZRnQtKxklZImhcRy3P/90u6WNKhtr8ZET/tfDYBAAAAAHVX6Qyu7VmSPiTpT2oS3EpSRNxT+rmv0lndU4rgNqdZI+k9+eebG0bxBkmzJB1fBLc5z22Sjs4/31SlvAAAAACAmafqGdx/VgpYPynpftsvUbqMeI2ki5qcVX1u7n63ybgukLRa0q62Z0XE2gp5vtOQBgAAAACAdVQNcHfO3TWSfqEU3D7A9gWS9o2Im3OvnXL3ysYRRcS9tq+V9ARJj5H0uwp5brB9p6TtbM+OiNUVyw0AAAAAmCGqPmRqm9w9XFJI2k3SgyQ9WdL3lB4k9Y1S+jm5u6rF+Ir+W3aRZ06zgbYPtL3M9rKbb765WRIAAAAAQI1VDXCLdPdK2jMifhwRd0TEryW9XNJ1kvaw/cxBFLKKiDghIuZFxLy5c+cOqxgAAAAAgCGpGuCuzN1flB8AJUn5cuFz88+n527bs62l/itL/armaXWGFwAAAAAwg1UNcK/I3ZUtht+Wu5s1pN+xMaHtjSQ9Wuls8DVNptEsz7aSNpd0HfffAgAAAACaqRrg/kDp3tvH226Wp3jo1LW5e17uvrBJ2t0lzZZ0YekJylPleVFDGgAAAAAA1lEpwI2IP0o6W9KjJB1cHmb7+ZJeoHR2t3jFz+mSbpH0KtvzSmk3lfTB/PNzDZM5WdJaSQfZnijl2UrSEfnn56uUFwAAAAAw81R9TZAkvVXSP0r6RH4P7i+ULjXeW9J9kvaPiFWSFBG32z5AKdBdavsUSSsk7an0OqDTJZ1aHnlEXGv7cEmflrTM9qmS7pa0r6TtJH28yft2AQAAAACQ1EGAGxHX2X6apP9QClR3l3S70pndD0fERQ3pv2V7D0lHStpH0qaSrpb0TkmfjohoMo3P2F4u6TBJr1M6w3y5pPdExJc7nz0AAAAAwEzRyRlcRcTNkt6WP1XS/0TSizucxtlKQTMAAAAAAJVVfcgUAAAAAAAjjQAXAAAAAFALBLgAAAAAgFogwAUAAAAA1AIBLgAAAACgFghwAQAAAAC1QIALjJmJhUuGXQQAAABgJBHgYqQQvAEAAADoFgEuAGDGGfWDaaNevpmMZQOg39iu9BcBLgAAI4gdHgAAOkeACwAAAACoBQJcAAAAAEAtEOACAAAAAGqBABcAAAAAUAsEuAAAAACAWiDABQAAAADUAgEuAAAARgavyALQCwJcAAAAAEAtEOACAAAAAGqBABcAAAAAUAsEuAAAAACAWiDABQAAAADUAgEuAAAAAKAWCHABAAAAALVAgAsAAAAAqAUCXAAAAABALRDgAgAAAABqgQAXAAAAAFALBLgAAAAAgFogwAUAAAAA1AIBLgAAAACgFghwAQAAAAC1QIALAAAAAKgFAlwAAAAAQC0Q4AIAAAAAaoEAFwAAAABQCwS4AAAAAIBaIMAFAAAAANQCAS6AoZpYuGTYRRiamTzvqI52Mr2obwAYbwS4AAAAAIBaIMAFAAAAANQCAS4AAAAAoBYIcAEAAAAAtUCACwAAAACoBQJcAAAAAEAtEOACAAAAAGqBABcAAAAAUAsEuAAAAACAWiDABQAAAADUAgEuAKAjEwuXDLsIAGYQtjnDRf1j3BDgAgAAAABqgQAXqICjl72Zjvqbqctops73OGJZAQAweAS4AIAZgyATAIB6I8AFAAAAANQCAS4wQji7BIwH1lUA3WL7gWGaCe2v6wDX9mtsR/7s3yLNS20vtb3K9h22f257vynGu5/ti3L6VTn/S7stJwAAAABgZugqwLX9SEnHS7qjTZqDJJ0t6YmSvirpREkPl7TY9rEt8hwrabGkbXP6r0p6kqSz8/gAAAAAAGiq4wDXtiWdLOlWSZ9vkWZC0rGSVkiaFxFvjYh3SHqypD9IOtT2Mxvy7Crp0Dz8yRHxjoh4q6Sn5fEcm8cLAAAAAMB6ujmD+3ZJz5X0ekl3tkjzBkmzJB0fEcuLnhFxm6Sj8883NeQpfn8opyvyLJf02Ty+13dRXgAAAADADNBRgGv7cZKOkfSpiLigTdLn5u53mwz7TkOaXvIAAAAAACCpgwDX9kaS/kvSnyQdMUXynXL3ysYBEXGD0pnf7WzPzuPeXNIjJN2Rhze6Knd3rFpeAAAAAMDM0skZ3P+Q9I+SFkTEXVOknZO7q1oMX9WQrmr6LVtN0PaBtpfZXnbzzTdPUTwAAAAAQN1UCnBt76J01vbjEfHTwRapOxFxQkTMi4h5c+fOHXZxAAAAAADTbMoAN1+a/BWly43fW3G8jWdoGzWesa2afmXF6QPAjHiZOXpHOwEAoD6qnMHdQune18dJWmM7io+ko3KaE3O/T+bfV+TuevfM2t5W0uaSrouI1ZIUEXdK+oukLfLwRo/N3fXu6QUAAAAAQJI2qpBmraQvthj2VKX7cn+sFNQWly+fJ+lZkl5Y6ld4USlN2XmSXpvznFwxDwAAAFuwAZ4AABGaSURBVAAAkiqcwY2IuyJi/2YfSWflZF/O/U7Nv09WCowPsj1RjMv2Vpp8AvPnGyZV/D4ypyvyTEh6ax5fY+A79rg0DgAAAAD6o6P34FYVEddKOlzS1pKW2f6s7eMkXSZpBzV5WFVEXCjpE3n4ZbaPs/1ZScvyeA6LiOWDKC8AAAAwzjhpAiRVLlHuSkR8xvZySYdJep1SMH25pPdExJdb5DnU9q+VztgeKOl+SZdK+lhE/M+gygoAAAAAGH89BbgRsUjSojbDz5Z0dofjXCxpcQ/FAgAAAADMQAO5RBkAAAAAgOlGgAsAAAAAqAUC3BHU7CEBVftVHWf5d7thxe+q05ruBxw0K1u7Mkw1b1XKX6W+ujHouutm3qqk76UddjP9bteFTsvaavyt2tB0riO9tPlmw6Yqf69l7rSOGstUZR1tVSf9XK/6ta73Y7zt5q8f61mveplGlbbSyfBW45uObe4g1vcqw6bK0+v6PsyHGVUte7/L2Ov/U9X1vNNtXCdl6Kcq02lXxl7+y3s1HdPpdzk73c/odd9s3B9YRoALAAAAAKgFAlwAAAAAQC0Q4AIAAAAAaoEAFwAAAABQCwS4AAAAAIBaIMAFAAAAANQCAS4AAADQB+P+ehWgDghwAQAAAAC1QIALAAAAAKgFAlwAfcOlWQAAABgmAtwaGUZwUceAppd5qpp3uuutyvSmSlMe3ph23NvBKJR/EGVoNs7paN+DHl+RbxSWmzS8ckzndFtNq5MydFPeUVvWvZjJ8zCxcEkt5n9c9FrXM3VZzfR2Wqd5J8AFAAAAANQCAS4AAAAAoBYIcAEAAAAAtUCACwAAAACoBQJcAAAAAEAtEOACAAAAAGqBABcAAAAAUAsEuAAAAACAWiDABQAAAADUAgEuAAAAAKAWCHABAAAAALVAgIuOTCxc0lH/VmkmFi6plKeTcvRStn5Mf9g6qZNR0GmbGVYZ+plvmBrXv07Sd9t/FJZxv/WrvFXH00sb7XSZD9Kwpz8I0zlP/dq+j8py6GYfoNf9hmEatXJ3s/3pdR563ecbZB0Ocv9xlLbDMwkBLgAAAACgFghwMXY4AgYAAGYq9oOA9ghwAQAAAAC1QIALAAAAAKgFAlzUCpftAAAAADMXAS4AAAAAoBYIcAEAAAAAtUCAi65wKTAAAACAUUOACwAAAACoBQJcAAAAAEAtEOACAAAAmBK3qGEcEOACAAAAfUIQCAwXAS4AAAAAoBYIcAEAAAAAtUCACwAAAACoBQJcAAAAAEAtEOACAAAAAGqBABcAAAAAUAsEuAAAAACAWiDABQAAAADUAgEuAAAAAKAWCHABAAAAALVAgAsAAAAAqAUCXAAAAABALRDgAgAAAABqgQAXADDWJhYuGXYRADTBuglgGAhwAQAAAAC1QIA7Ivp1lLPX8Yzi0dZ+l2mU5rHbskwsXNI07yjN26AV81qe55k0/+jMKLYNtvuD0Wp+Wm03gUEYdlsb9vT7gXW2uW7qZKbVIwEuAAAAAKAWKgW4th9ie3/bZ9q+2vZdtlfZ/rHt/2u76Xhs72r7HNsrcp7LbB9ie8M203qp7aV5/HfY/rnt/bqdQQCja6YdUcTMQduujrqqt8blOwrLexTKAGBwqp7B/RdJJ0raRdLPJX1S0jclPVHSSZJOs+1yBtt7SbpA0u6SzpR0vKRNJB0n6ZRmE7F9kKSz83i/mqf5cEmLbR/byYxhPPAnAwAAAKBfNqqY7kpJe0paEhH3Fz1tHyHpIkn7SHqFUtAr2w9WCk7vkzQ/Ipbl/u+VdJ6kfW2/KiJOKY1rQtKxklZImhcRy3P/90u6WNKhtr8ZET/tdmYBAAAAAPVV6QxuRJwXEWeXg9vc/0ZJn88/55cG7StprqRTiuA2p18j6T3555sbJvMGSbMkHV8EtznPbZKOzj/fVKW8aK3xgTydnEGt89nWcZk3HixQD/1YJt2su51Od6a3nVGa/+luM4NWpSyDKu9MeTDdMOdtVOu1Dsu+023/dM3ndExnnP/DxqXso1RnvejHQ6buyd17S/2em7vfbZL+AkmrJe1qe1bFPN9pSAMAAAAAwDp6CnBtbyTpdflnOTDdKXevbMwTEfdKulbp8ujHVMxzg6Q7JW1ne3YvZQYAAABmslE7Uzdq5cF46/UM7jFKD4Q6JyLOLfWfk7urWuQr+m/ZRZ45zQbaPtD2MtvLbr755valHnNsBIajDvXey3t3Z6p2816HehnGO1TrUG9V1GE+6zAPGIxxaBvjUMa6ou77g3rsTtcBru23SzpU0u8lvbZvJepSRJwQEfMiYt7cuXOHXRwAAAAAwDTrKsDNr/P5lKTLJT0nIlY0JGl7trXUf2UXeVqd4QUAAAAAzGAdB7i2D5H0GUm/UQpub2yS7Irc3bFJ/o0kPVrpoVTXVMyzraTNJV0XEas7LTMAYHpxWRUA9IbtKNCdjgJc2++SdJykXyoFtze1SHpe7r6wybDdJc2WdGFErK2Y50UNaTDDjMpGfqbfwzodr+2YLnVZJoVxmZ9RKGe3r00atmGXd9jTn2la1TfLYXpMVc8sh+Gp+7Mx+qmX14OOs8oBru33Kj1U6hJJz4uIW9okP13SLZJeZXteaRybSvpg/vm5hjwnS1or6SDbE6U8W0k6Iv/8vAAAAAAAaGKjKols7yfp/ZLuk/QjSW+33ZhseUQslqSIuN32AUqB7lLbp0haIWlPpdcBnS7p1HLmiLjW9uGSPi1pme1TJd0taV9J20n6eET8tJuZrLOJhUu0/JiXDLsYAMbMTDmKCyBhnQcwU1Q9g/vo3N1Q0iGSjmryWVDOEBHfkrSHpAsk7SPpbZLukfROSa+KiGicSER8RikI/q3S+3UPlHSjpAURcVgH8zXSBnFpxTj/cU1H2ZtNY5zrDL0Zp2U/KmUdlXIAQF2N0mXR7Dc1N8g66GXc/SxXHZZzpTO4EbFI0qJORx4RP5H04g7znC3p7E6nBQAAAACY2bp+Dy4AAAAAAKOEABcAgBmiDpeeAZ2gzQMzDwEuAAAAAKAWCHABAAAAALVAgAsAAAAAqAUCXAAAAABALRDgAgAAAABqgQAXAAAAAFALBLg1NIqPxK9Spl7KPYrzXNU4l70TM2U+G/Vjvke97oZRvlGvk0bjVl60x/JMBlkP1PFgUK/do+7GBwEuAAAAAKAWCHABAAAAALVAgFsDw7pkYpwv1RhE2VuNs9fLswddz1ONf9jLedCX+A7rErti2KCmP+zlhsEZ9mXv49S2RrGsw9zed2Ni4ZKRLFcnOi1/N/M76nU06FvFqoynk/GPen02areejNu81AEBLgAAAIBaI9CcOQhwAQAAAAC1QICLvuHIGAAAAKYb+6AoI8AFAAAAANQCAS4AAAAAoBYIcAEAAAAAtUCACwAAAACoBQJcAAAAAEAtEOACAAAAAGqBABcAAAAAUAsEuAAAAACAWiDAxUgY1xd091LucZ3nTvVzPotxNXb7Pf5+p+2XcWwz/SpzN+Op0k6mu07HZRmOSzmnU9U66Xe6TvQ6znL+fo5rEPlapRvm/8KgxjeT1sd+zOsw62uY+wYzqZ1MhQAXAAAAAFALBLgAAAAAgFogwAUAAAAA1AIBLgAAAACgFghwAQAAAAC1QIALAAAAAKgFAlygBng0PAAAAECACwAAAACoCQJcAAAAAEAtEOACAAAAAGqBABcAAAAAUAsEuAAAAACAWiDABQAAAADUAgEuAAAAAKAWCHABAAAAALVAgAsAAAAAqAUCXAAAAABALRDgAgAAAABqgQAXAAAAAFALBLgAAAAAgFogwAUAAAAA1AIBLgAAAACgFghwAQAAAAC1QIALAAAAAKgFAlwAAAAAY21i4ZJhFwEjggAXAAAAAFALBLgAAAAAgFogwAUAoKa4ZA8AMNMQ4AIAAAAAaoEAFwAAAABQCyMX4NrezvaXbF9ve63t5bY/aXurYZcNAAAAADC6Nhp2Acps7yDpQknbSPq2pN9LerqkgyW90PazIuLWIRYRAAAAADCiRu0M7n8qBbdvj4i9I2JhRDxX0nGSdpL0oaGWDgAAAAAwskYmwM1nb58vabmkzzYMPkrSnZJea3vzaS4aAAAAAGAMOCKGXQZJku39JZ0o6YSIeGOT4ecqBcD/FBE/aDeuefPmxbJlywZT0D6o+tqG5ce8pOM8zfL2mr+b10yQn2U3E/P3c9n1mn/c6m7c87Psxjc/y25887PsZm5+lv3g8zfW8SiyfUlEzGvsPzJncJUuQZakK1sMvyp3d5yGsgAAAAAAxswoBbhzcndVi+FF/y2noSwAAAAAgDEzSpconyDpAEkHRMRJTYZ/SNIRko6IiA83GX6gpAPzz50kXTHA4gIAAAAAhmf7iJjb2HOUXhNUnKGd02J40X9ls4ERcYKkE/pdKAAAAADAeBilS5SLM66t7rF9bO62ukcXAAAAADCDjdIlyjtIulrpNUE7RMT9pWEPknSDJEvaJiLuHEohAQAAAAAja2TO4EbEHyR9T9KEpLc2DH6fpM0l/RfBLQBg1NmesB22Fzf0X5z7TwylYF2wvdR2R0fD8zwuHVCRAABoaZTuwZWkt0i6UNKnbT9P0u8k7SLpOUqXJh85xLIBADC2cpB6fkTMH3ZZAAAYlJE5gys9cBZ3nqTFSoHtoZJ2kPQpSc+IiFuHVzoAAHr2bkmPk/SXYRekA69TKjMAACNv1M7gKiL+LOn1wy4HAAD9FhE3KD1TYmxExJ+GXQYAAKoaqTO4AACMOttPt32q7b/YXmv7Btvfs/2vFfK2vAfX9i62T7d9o+27bf/Z9hdsP7xJ2qV5PBvZPsL2Vbksf7b9EdublNIuKN1Du0fOV3wWVShz03twbW9i+722/5Cnfa3tD9qeNdU4AQAYlJE7gwsAwKiyfYCkz0m6T9JZkq6StI3S7TVvkXRal+N9g9K73Nfm8f5Z6fV4+0t6me1ntDiT+nVJu0n6jqTbJb1Y0v/LZSquhvql0sMaj5L0R6XbgApLuyyvleZ1L0l/kHS8pE0kvUHSk7oZJwAA/UCACwBABbYfL+k/lQLJ3SLitw3Dt+tyvDtK+rzSa/L2iIi/lIY9T+kNA5+S9PIm2XeQ9ISIWJHTHynpV5JeZ/vdEXFjRPxS0i9tHyVpeUQs6qacDV6tFNz+TNJzImJNnv5Rki7uw/gBAOgKlygDAFDNm5UODH+gMbiVpIi4rofxbizp4HJwm8f5A6Uzui/L74Rv9K4iuM3p75T0NaX/93ldlqeK4uzwEUVwm6e/QtIHBjhdAADa4gwuAADVPCN3v9Pn8T4zd/ewvXOT4dtI2lDSjpIuaRi2rEn6P+fuVv0pXlNPlXS/pB83GbZ0gNMFAKAtAlwAAKrZMnf7/Yqfh+Tu4VOk26KxR0SsbJLu3tzdsJdCTWGOpBURcU+TYTcOcLoAALRFgAsAQDVFMPkISb/v43hX5e6ciLi9j+MdpFWStra9cZMg92HDKBAAABL34AIAUNXPcvdFAxrvbn0eb6P71b+zupcq7UM8u8mw+X2aBgAAHSPABQCgms8pXf773vxE5XV0+xRlpVfs3CPpuPxE5cbxbmK7H8HvrZIe2YfxSNLJufsh25sWPW1vLek9fZoGAAAd4xJlAAAqiIjLbb9F6ZU+v7D9baX34D5E0s5Krw96Thfj/X1+D+6XJP3W9nclXan0ZOVHKZ3ZvVnSP/Q4Cz+Q9CrbZyudgb1H0gURcUEX4/pvSa+UtKek3+S62FjSvkqvCdqhx7ICANAVAlwAACqKiBNt/0bSYUqX4u4t6RZJl0k6qYfxftX2ryQdqhQkP1/SnZKul3S6pFN7K7kk6WBJIel5kl6sdBXX+yR1HOBGRNj+F0kLJS2QdJCkG5TO7L5f0prWuQEAGBxHxLDLAAAAAABAz7gHFwAAAABQCwS4AAAAAIBaIMAFAAAAANQCAS4AAAAAoBYIcAEAAAAAtUCACwAAAACoBQJcAAAAAEAtEOACAAAAAGqBABcAAAAAUAsEuAAAAACAWvj/K8Z1MzFFUkQAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 1152x360 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "visualizeAnswerDataDf(answerDataDf,titleString=TITLESTRING,plotFileName=PLOTFILENAME)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{8: 174,\n",
       " 369: 141,\n",
       " 9: 76,\n",
       " 304: 56,\n",
       " 337: 55,\n",
       " 108: 45,\n",
       " 142: 41,\n",
       " 210: 39,\n",
       " 224: 27,\n",
       " 179: 26,\n",
       " 189: 26,\n",
       " 151: 20,\n",
       " 199: 19,\n",
       " 682: 9,\n",
       " 645: 8,\n",
       " 467: 7,\n",
       " 742: 7,\n",
       " 217: 6,\n",
       " 422: 5,\n",
       " 552: 5,\n",
       " 182: 4,\n",
       " 223: 3,\n",
       " 429: 3,\n",
       " 631: 3,\n",
       " 434: 3,\n",
       " 37: 3,\n",
       " 135: 2,\n",
       " 214: 2,\n",
       " 297: 2,\n",
       " 428: 2,\n",
       " 689: 2,\n",
       " 394: 2,\n",
       " 488: 2,\n",
       " 192: 2,\n",
       " 228: 2,\n",
       " 172: 2,\n",
       " 654: 2,\n",
       " 530: 2,\n",
       " 203: 2,\n",
       " 387: 1,\n",
       " 279: 1,\n",
       " 556: 1,\n",
       " 290: 1,\n",
       " 663: 1,\n",
       " 559: 1,\n",
       " 88: 1,\n",
       " 340: 1,\n",
       " 254: 1,\n",
       " 165: 1,\n",
       " 516: 1,\n",
       " 427: 1,\n",
       " 213: 1,\n",
       " 550: 1,\n",
       " 29: 1,\n",
       " 335: 1,\n",
       " 233: 1,\n",
       " 154: 1,\n",
       " 35: 1,\n",
       " 205: 1,\n",
       " 349: 1,\n",
       " 268: 1,\n",
       " 384: 1,\n",
       " 272: 1,\n",
       " 139: 1,\n",
       " 144: 1}"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "showValueFrequencies(answerDataDf)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'answer': 179958,\n",
       " 'answerText': 179958,\n",
       " 'title': 108423,\n",
       " 'question': 65265,\n",
       " 'questionNumber': 62285,\n",
       " 'Title': 12180,\n",
       " 'TreatmentStep': 6821,\n",
       " 'SubmissionDate': 6821,\n",
       " 'ApprovalDate': 6821,\n",
       " 'ApprovingCounselor': 6821,\n",
       " 'FirstName': 6154,\n",
       " 'LastName': 6154,\n",
       " 'Questionnaire': 5375,\n",
       " 'Type': 5375,\n",
       " 'Content': 5375,\n",
       " 'Status': 1983,\n",
       " 'StartDate': 1983,\n",
       " 'EndDate': 1983,\n",
       " 'TreatmentSteps': 1983}"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "inFileNames = sorted(os.listdir(DATADIR))\n",
    "tags= {}\n",
    "for inFileName in inFileNames:\n",
    "    if re.search(FILEPATTERN,inFileName):\n",
    "        root = readGzippedXmlFile(DATADIR+inFileName)\n",
    "        for section in root.findall(QUERYTREATMENT):\n",
    "            for tag in section.findall(\".//*\"):\n",
    "                if not tag.tag in tags: tags[tag.tag] = 0\n",
    "                tags[tag.tag] += 1\n",
    "{tag:tags[tag] for tag in sorted(tags.keys(),key=lambda t:tags[t],reverse=True)}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "179958"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(answerDataList)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "\n",
    "QUERYTREATMENTSTEP = \"./TreatmentSteps/TreatmentStep\"\n",
    "QUERYTITLE = \"./Title\"\n",
    "QUERYANSWER = \".//\"+ANSWER"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Most common treatment step order:\n",
    "\n",
    "1. Voordelen, nadelen\n",
    "2. Alcoholschrift bijhouden\n",
    "3. Situaties analyseren\n",
    "4. Meten en weten\n",
    "5. Doel stellen\n",
    "6. Gewoontes doorbreken\n",
    "7. Anders denken\n",
    "8. Anders doen\n",
    "9. Beslissingen\n",
    "10. Actieplan\n",
    "11. Afsluiting\n",
    "12. Na 3 maanden\n",
    "13. Na half jaar\n",
    "14. Na 9 maanden"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Voordelen, nadelen': 922,\n",
       " 'Alcoholschrift bijhouden': 749,\n",
       " 'Doel stellen': 689,\n",
       " 'Situaties analyseren': 627,\n",
       " 'Meten en weten': 574,\n",
       " 'Gewoontes doorbreken': 497,\n",
       " 'Anders denken': 472,\n",
       " 'Anders doen': 442,\n",
       " 'Afsluiting': 419,\n",
       " 'Beslissingen': 418,\n",
       " 'Actieplan': 396,\n",
       " 'Na 3 maanden': 326,\n",
       " 'Na half jaar': 264,\n",
       " 'Na 9 maanden': 9,\n",
       " 'Intake': 1}"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "inFileNames = sorted(os.listdir(DATADIR))\n",
    "titles= {}\n",
    "for inFileName in inFileNames:\n",
    "    if re.search(FILEPATTERN,inFileName):\n",
    "        root = readGzippedXmlFile(DATADIR+inFileName)\n",
    "        questionCount = 0\n",
    "        for section in root.findall(QUERYTREATMENT):\n",
    "            questionCountAll = len(section.findall(QUERYANSWER))\n",
    "            for treatmentStep in section.findall(QUERYTREATMENTSTEP):\n",
    "                questionCount += len(treatmentStep.findall(QUERYANSWER))\n",
    "                for title in treatmentStep.findall(QUERYTITLE):\n",
    "                    titleText = cleanupText(title.text) # +\" \"+str(questionCountAll)+\" \"+str(questionCount)\n",
    "                    if not titleText in titles: titles[titleText] = 0\n",
    "                    titles[titleText] += 1\n",
    "{titleText:titles[titleText] for titleText in sorted(titles.keys(),key=lambda t:titles[t],reverse=True)}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Process tag Diary (diaries.csv)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "MINIMALDUPLICATEVALUES = 5\n",
    "OUTFILENAME = \"diaries.csv.gz\"\n",
    "QUERYDIARY = \"./Diary\"\n",
    "QUERYDIARYENTRY = \"./DiaryEntries/DiaryEntry\"\n",
    "REMOVED = \"REMOVED\"\n",
    "SEP = \"_\"\n",
    "UNKNOWN = \"-\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def getAllTextFields(tag,prefix=\"\"):\n",
    "    textDict = {}\n",
    "    for child in tag.findall(\"./*\"):\n",
    "        if prefix == \"\": key = child.tag\n",
    "        else: key = prefix+SEP+child.tag\n",
    "        if child.text != None:\n",
    "            childText = cleanupText(child.text)\n",
    "            if childText != \"\" and childText != \"\" and childText != UNKNOWN:\n",
    "                textDict[key] = childText\n",
    "        textDict.update(getAllTextFields(child,prefix=key))\n",
    "    return(textDict)\n",
    "\n",
    "def readDiaries():\n",
    "    inFileNames = sorted(os.listdir(DATADIR))\n",
    "    dataList = []\n",
    "    for inFileName in inFileNames:\n",
    "        if re.search(FILEPATTERN,inFileName):\n",
    "            root = readGzippedXmlFile(DATADIR+inFileName)\n",
    "            for section in root.findall(QUERYDIARY):\n",
    "                for diaryEntry in section.findall(QUERYDIARYENTRY):\n",
    "                    clientDictData = {CLIENT:fileNameToId(inFileName)}\n",
    "                    diaryEntryDict = getAllTextFields(diaryEntry)\n",
    "                    clientDictData.update(diaryEntryDict)\n",
    "                    dataList.append(clientDictData)\n",
    "    return(dataList)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "def countValues(listOfDict):\n",
    "    countsDict = {}\n",
    "    for i in range(0,len(listOfDict)):\n",
    "        for dictKey in listOfDict[i].keys():\n",
    "            if not dictKey in countsDict: countsDict[dictKey] = {}\n",
    "            for token in word_tokenize(listOfDict[i][dictKey]):\n",
    "                if not token in countsDict[dictKey]: countsDict[dictKey][token] = 0\n",
    "                countsDict[dictKey][token] += 1\n",
    "    return(countsDict)\n",
    "\n",
    "def anonymize(listOfDict,countsDict,minimalDuplicateValues=MINIMALDUPLICATEVALUES):\n",
    "    for i in range(0,len(listOfDict)):\n",
    "        for dictKey in listOfDict[i].keys():\n",
    "            tokenizedTextList = word_tokenize(listOfDict[i][dictKey])\n",
    "            for j in range(0,len(tokenizedTextList)):\n",
    "                if countsDict[dictKey][tokenizedTextList[j]] < minimalDuplicateValues and dictKey != CLIENT:\n",
    "                    tokenizedTextList[j] = REMOVED\n",
    "            listOfDict[i][dictKey] = \" \".join(tokenizedTextList)\n",
    "    return(listOfDict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of entries in list: 122330\n"
     ]
    }
   ],
   "source": [
    "dataList = readDiaries()\n",
    "countsDict = countValues(dataList)\n",
    "dataListAnonymized = anonymize(dataList,countsDict)\n",
    "saveAnswerDataDf(answerDataListToDf(dataListAnonymized),outFileName=OUTFILENAME)\n",
    "print(f\"Number of entries in list: {len(dataListAnonymized)}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAABtwAAAJlCAYAAABZtVZWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAgAElEQVR4nOzdd7htd10n/vcHQkgvJCEFgQAqEEK/gMKM9N6UIipKGSAIjgb5ZRAjM8KAgKAwCCJNTKgOIr2YKIhYaDdSE4gMkFDSAyQkpEE+vz/WOt6dk332Oeeuc+9NyOv1PPtZ+7vWt6299+F5bt58v6u6OwAAAAAAAMDWudaOngAAAAAAAABcnQncAAAAAAAAYAKBGwAAAAAAAEwgcAMAAAAAAIAJBG4AAAAAAAAwgcANAAAAAAAAJhC4AQDAT6CqOqWquqo+tjXX+cnhu77mGL/nrqpjVrjutwAAANuIwA0AAAAAAAAmELgBAADsQFV1j5mVSU/Y0fOBqwor8gAAuDrZaUdPAAAA2P66+9AdPQe2D981S/wWAABg27HCDQAAAAAAACYQuAEAAAAAAMAEAjcAALiaqaqdquq3quoTVfX9qrqgqk6qqhdX1YFr7GPhs5HGMR5QVa+oqk9W1TlVdVlVnT+O9dqqus0qY1zp2WRV9aCqeldVfbOqLh3nv2tVfW+s929rnP+/j/XPqqqd19JmhX72r6r/WVX/OvZ16Xj8aFX9dlXtuqDtc2fu79AaPL6q/mn8vC6uqq9W1cur6vpz2h9aVZ3kH2dO/9VMn0uvjy1rd4XvbuznT8fv5fzx2i+uVH/B/Vyrqh5TVe+sqlOr6qKxvy9V1Z9V1U+v0v46VfWUqjquqk6rqkvG3+apVfWZ8XN4wKI+Vul/6fM4ZizfoaqOHe/v4qo6s6reV1X3X0efD6yqN1fV16rqwvH1H1X1hqq6/Trnc3hVvWb8zi8cr91uK+9156p6UlW9d/xbuWh8fb2q/na8tsdW9LvNfwtLv+vx9dzx3O3G7+rU8Xdx1vhd3X2FPj42/m3ceDx19zl/F11Vh673MwAAgG3FM9wAAOBqpKr2TfJ3Se687NItx9cTqupBGzDUnyQ5cs75PWfGekpVPae7X7iG/qqqXpfkKcsvdPdFVfWmJL+T5Oer6rDuPmlBR3dMshSGHNvdl65h/Hn9/FqS12S4p1kHJLnn+Dqyqh7e3Seu0t2uST6UZHmg9NNJnpHk0VV19+7+2tbMdSXjd/32JHtN7OfGSd6V5A7LLu2S5Fbj62lV9bvd/ao57a+f5Pgkt112aeckuye5UZJNSX47G/Dv0BoC3Ncluc7M6esneWiSh1bVK5Mc2d29QvvrJfnrJPedc/lnxteTqurFSY5eqZ+Z/v5bkr/IcL+TVNWdkrwjyaFzLt9kfD0iw+/2/0wdb874k34Lc/o7IsmrcsXv6oAM39VDqurp3f2aDZk8AADsQAI3AAC4mqiqayV5b7aEbZ9N8vIkJyXZO8nDkzw9yTuT7DZxuJ2SfDvJe5J8KsnXk1yc5JAMwcl/T7Jfkj+qqm9099tX6e/IDGHMp5K8OsmXM4RUdxmvvzZD4JYkT07yzAV9PXnm/evXeD9XUFVPTPLGsfidJH+e5ItJTkuyf5IHJXlakpsl+fuqukN3n7Ggy9cnuWuStyb5vxk+u0MyBEz3T3KDJH+Z5B4zbb6T5NZJ7jQzl+dk+I5nXbjCmDfKEBr9KMkfJvlYkouSHJbklAVzvYKqOiTJJ5IcnOSyJG9O8vdjH5Xh+z4yw2fxyqr6QXcfu6ybV2ZL2HZ8kreM7X+Q5HrjnO6V5H5rndcCt0vya0m+n+QlSf5lnOfdkvxehu/vt5OcleQFyxuPK8M+niE46gzh0nuSfCPD/d8myW+N4zw7w+/+eQvmsynJY5OckeHv8ZNJfpwhsPruem6sqjaNc9tlPPWhDIHqV8c+b5TkvyZ51Hr6Xcf4G/FbmHW/DH/jX84QDn4hw/+2PDDJszIElK+oqo9293/MtHtihqD2uAx/R5vHc8t9Z6tuFAAAtoFa5f+oBwAAXEVU1ZOzJWD6uyQP6+7LltW5f5IPJrn2eOqfuvsec/o6JcN2bStdv2mSU7v7xyvMZZ8MwcCtM4RxP9Pdly+rc49ccbvEtyf59eX1Zup/PEOYcG6SQ+atXKuq3ZKcnmFF19y5r2a8txMzhBpvTvKU7r5kTr2fT/KRDMHgX3b3k5ddf26GoGvJE7v7mGV1rpUhgLr3eOq23f2FZXXukS2f05X6mDOvU7Jlq70zkty1u7+xhvorfdfHZQhGvpXkvt198pw6S+HH3ZJ8L8mNu/sH47VdkpyfYQXTu7v7EQvmsl93n7vo/ha0nf3H67cz3Pe3ltW5UYbA6JAklya5xfLPpqpem+SIJOcleWB3f2LOWDsleVuSR2cINH92Tj+z8zkpyS9s7b2N/e2c5OQMK9s6yRO6+00r1N0pyQHdffoKczq2u58wp90p2Ya/hfH6oRnCyyXHJXn48r+xqnpckqWw7uXdfaWQfbX5AgDAVYlnuAEAwNXHfx+PF2cIZi5bXqG7j8uwkmqS7v76SmHbeP37GVZjJclNc+WtBJc7P8lvrhS2jZa2ldsvyS+tUOeXs2X7xK1a3ZbkqAxh27eSHDEvbEuSMYh59Vj89VrwPLck75kXlI33+6czp+Y+s2qCZy8K21ZTVXfOllVnT5sXsCRJd1+Y5DfH4r654gqr62XLdoEfWzTelEBqmWcuD9vG/r+Z4ftNhtVTT529XlU/lS0rpf5gXtg29vOjDCscL82wIusJq8zn6Rtwb4/Nlm0k/2ylsG1pfsvDtqk26Lew3MVJHr/C39hbMgTGycb/XQAAwHYncAMAgKuBqjowW0KtD66yveEbtsH4e1XVTarqVlV1eFUdnmHlz5Lbr9R29L7uPn+VOn+b5Jzx/ZWe9bbs/HczbJ25NZbCvHd398Wr1P3YeLxukjsuqPeWBdc+M/P+pquMtx6XZXjW1xRLq9HOy7B94Yq6+0sZVh8mw+qmJedkCFaS5FfGFVDb0veSvHvB9Xdm2G4yufIWlg/JlnBw4TaoY4D2xbF4twVVv93d/7SorzV62Mz7l25Af+u1Eb+F5f6hu89coY/Lk5wwFjfy7wIAAHYIz3ADAICrh9vMvP/UKnU/m2Flzs5TBqyqn03yu0kenOSGq1Tff5Xrn19tvO6+pKqOybBC6V5VddPu/vrMfG6Z4TlpSfLmlVamLTJuOXjQWPydqvqdRfWXOXjBta8suDb7HK+9Vqy1fv/R3RdN7GPpeYB7J7m8qtba7j8/i+6+tKrenCEM/fkkp1bVOzNsx/np7j514hyX++y4Am2u7r6sqj6b5J5Jbl1V1VuepXDnmarnbs39zrHqb3uNlgLdr3b3jng22eTfwhyL/i6SLX8bG/l3AQAAO4QVbgAAcPWw38z7uStGloxhxHcX1VlNVT02yZcybB23WtiWDM85W+R7axz6dRmeX1VJnrTs2lOW1dsa19/Kdkmy24JrF650Ydk2mtdeqd5WWOtnusjWfh7LP4tnZFjl1xl+q0/NsPrulKr6VlW9pqpWWwW5Vgt//8vq7JQrhjkbdb+zNuJ7SJIDxuNpG9Tfem2Lz2bFv4vR0t+G/zYBAMDVnhVuAADAFVTVzyR5Y4at9y5M8rIkf5fk/yU5b2llWVXdNMnXlpqt0u2Kz4Ob1d1fraqPJrl3kidU1f/q7h9X1c5JfmOs9m/dfdJ67mnG7L+BXp3kL9bR9ttbOea2sqbPdBVLn8d3kjxgHe2uEKR09w+T/EZVvTDJYzKsLrtThiD2pzIEcE+tqpd39zMnz3rrLd1vZ9iitRfUnXXpgmsb8T1cFWzIbwEAAK6pBG4AAHD1cO7M+wMXVayqnZJcb8JYT8yW7Sgf0d3Hr1Bv3wljLPKaDIHbIRm2s3xfhueuLW1bubWr25Lk7NnC+Cyqa7Kzk9w8wzaCJ85svbhVuvvLSZ6b5LlVdZ0kmzJ8d0/NsNLsd6vqxO7+ywnDLPz9L6vzoySzzw5c+v4ryRndfYXfww52dobVpIfswPE37LcAAADXNLZtAACAq4cvzLy/yyp1b59pz2+79Xj87oKwLRnClG3hvUnOGN8/ednxvAxbFW6tb2TLdpv/dUI/G2lHBhsnjMc9ktxhIzvu7su6+xPd/awk95m59CsTu779GCrPNQZ9S9tXfnFZcHTCzPtfmDiPjbZ5PP5MVd1gB4y/zX4LEwj9AAC42hC4AQDA1UB3n5nk82PxwVV10ILqT15wbS2WwoxdqmruvxnG80+dOM5c3X1Zhi0tk+RBVXW3DCvekuQt3X3RhL4vzxDoJcmtq2o9W+dtK7P3c93tPPa7Zt7/j201SHd/JluedXbAorprsG+GVXMreVSSfcb3ywPj92XLFpDPXOn3vYO8d+b9UTtg/O3yW1inpb+N7f13AQAA63ZV+scFAACw2KvG4y5J3jiu5LmCqrp/kidNHOc/xuNuSX55hTovzZZVRNvC65NcnuTaSf4mW54R9/oN6PuFSS4Z3x9bVXdcVLmqblhVUz/TRU6bef+z23CcK+nujyf56Fh8TFU9Z1H9qtq5qp5UVQfOnLtpVd1rlXZ3zpYtSL8+Zc6jl1XVT80Z50ZJ/mQsXprktbPXu/vrSY4di3dN8upVVstdq6oeVVWHbcCcV/O2bPlsfqeqfmOlilW1U1UdvJGDb8RvYRtY+tv46apa7TmRAACwQ3mGGwAAXH28McnjMmyF+MAkn6yqlyf5cobnYz08ydOTfDPDtnBbu5LoTUl+O0PI9caqun2Sv8+wQunmGVa2/UKSf0nyX7b2Zhbp7lOq6rgM97kULHy6uz+/oNla+/5/VfXkDPd5/SSfqKq3J/lAklMyBH37J7lNkvsnuUeSTyaZ8tyxRfP5dlWdkuTQJE+qqhMzbO+3FAr+sLu/uS3GHv16kk9leH7Y86vqF5Mck+RzSS5IsmeGIPCuSX4xw/MBfybJmWP7GyX5SFV9LcMqrU9n+A1enOHzvXuSp411O8mfT5zv55PcMskJVfWSJP86nr9bkt/Llt/987v7G3PaH5lhy8TbZfgt37Oq3pDkM0m+n2T3JDdJ8nMZVtIdkuS+SU6aOO+FuvuyqnpMkn/OEKq/qap+JUMQ99UMK/NumOE+H5PkZUn+zwZPY+pvYaP9c4bVrfsn+YuqelO2bAmbJF8bV8QCAMAOJ3ADAICrie6+fPwP4B9OcucMocGbl1U7I8kjk7x7wjgnVNXRSV6UZNckzxpfs/4hQ3Bx4taOswavzRC4LdmI1W1Jku5+S1WdnyFE2z9DkPm4BU3O26ixV/C8JH+VZO9cOdj7pwyh3zbR3adX1c8lees4zh3H10ouyZYwcNbNkjxzlXbP6O6PbOVUl3wuycsz/B7+ZIU6r0ryR/MudPcFVXX3DJ/zozIESC9ZMN6Pk1y41bNdh+7ePM7tnRlCrweNr+1iA38LG+U1GcLaAzOEo8u3sb1JhpAcAAB2OIEbAABcjXT3d8dnmj01yW9kWOlz7STfyvB8qpd195lTd1/r7hdX1WeSPCPDSp+9k5yb5EsZ/mP8sUluPGmQ1X0gyTkZArEfJHn7Rnbe3e+rqkOTPDFDsHfbcazKsJrvqxlW+3woycc2cuw5czmmqk7PEC5syrBKa+dtOeay8U/LsNLrPkl+NcMKpkMyrPa6IMOKtS9kCFrf093fn2n+zxlWXd4vyV0yrHg7MMNqqAsyfI4fSfK6FVacbc18j62qL2b4fd49yUFJzs+wEvFV3X3cKu3PT/LoqrpTksdnWLH5UxlWiv4wyXcy/NY/muTd3X3GRsx7Lbr701X1sxl+lw/P8LvcL0Pw950k/57k/RlCuW0x/pTfwkbP5cyq2pQh8L93hv/N2S1btpgFAICrjOruHT0HAACAK6mqG2ZYvXKtDGHN8tUtXINU1dI/Xo/t7ifsyLkAAAAsd60dPQEAAIAVPClb/s3y2h05EQAAAFhE4AYAAFzlVNVeSZ4+Fj/Z3f++I+cDAAAAi3iGGwAAcJVQVTdIsmuGZ2n9rwzPMUuS5++wSQEAAMAaCNwAAICrircmufuyc2/v7g/tiMkAAADAWlV3r16L/7T//vv3oYceuqOnAQAAP3FOPvnkXHDBBamqXPe6181+++2X61//+rnWteyET3LCCSckSfbbb7/4NxkAALCjnHDCCed09wHLz1vhtk6HHnpoNm/evKOnAQAAAAAAwHZWVafOO+//KgoAAAAAAAATCNwAAAAAAABgAoEbAAAAAAAATCBwAwAAAAAAgAkEbgAAAAAAADCBwA0AAAAAAAAmWHPgVlU3qqpnVNX7q+qbVXVJVf2gqj5fVS+uqoNXab9zVT2rqj5XVRdU1fer6hNVdURV1RrGv8849llVdXFVfa2qXlFVB66h7UFj3a+Nbc8c+7r3Wu8fAAAAAAAA5qnuXr1S1Q2TnJpkNhg7P8nuSa49lr+X5JHd/Y9z2u+V5KNJ7jie+mGSnZLsPJY/kOSXuvtHK4z/B0leMBYvT3JBkr3G8tlJ7tXdX1qh7W3GsfebmfceGcLGTnJ0d7947o3PsWnTpt68efNaqwMAAAAAAPAToqpO6O5Ny8+vdYXbUqj2wSSPTnK97t47yW5JHpTkG0n2TfKeqjpoTvvXZwjbvpvkoRkCr92SPCHJxUkekuR5K0z8QdkStv1pkn3GsQ9P8rkkByR5b1Vdd07bXZO8L0PY9tkkh49t9x37qiQvrKr7rfFzAAAAAAAAgCtYa+D2vSS37+6HdPc7u/t7SdLdl3b3hzOEbhdnWHX21NmGVXX7JL88Fp/Y3R/owY+7+9gkzx6v/W5VXX/O2C8cj+/u7qO6+wfj2CdmCO8uSHLTJEfMafvUJDce6zx0bJPuPr+7j0ryngyh24vW+DkAAAAAAADAFawpcOvu87r78wuufyXJJ8fiHZdd/rXxeHJ3v29O89clOS/JrkkeMXuhqm6V5LZj8aVzxv12krePxcfO6Xvp3Nu6+ztzri/1eYequvmc6wAAAAAAALDQWle4rcW54/Hay87fczweP69Rd1+U5J/H4r1WaHtekk+tMO5x4/HOVbXH0smq2jNbwr/jrtRq8Mmx7yS59wp1AAAAAAAAYEUbErhV1U5J7jYWvzRzvpLcYiyeuKCLk8bjYcvOL5W/3N2Xr9J2dqwkueV4bsWxxz5PXmFsAAAAAAAAWNVGrXD7rSQHJbk8ybEz5/dKsvv4/rQF7ZeuHbzs/MHLri9qu7z9wSvUWevYAAAAAAAAsKrJgVtV3SbJi8biq7r7pJnLu8+8v2hBNz8cj3ssO7/Ufi1tl7efOvZ/qqojqmpzVW0+++yzF3QFAAAAAADANc2kwK2qDk7yniS7Jjkhye9txKSuarr7dd29qbs3HXDAATt6OgAAAAAAAFyFbHXgVlXXS3J8kpsk+WqSB3f3xcuqXTjzftcF3e02Hi9Yof1a2i5vP3VsAAAAAAAAWNVWBW5VtXeS45IcnuSbSe7T3WfOqXp+tgRfhyzocuna6cvOn7bs+qK2y9uftkKdtY4NAAAAAAAAq1p34FZVuyf5UJJNSc7IELZ9c17d7u4kXx6Lt1rQ7WHj8aRl55fKt6yqlea61HZ2rCT5ynhuxbHHPm++wtgAAAAAAACwqnUFblW1a5L3J7lrknMzhG1fXaXZP47H+67Q5y5J/utY/MgKbfdOcqcV+r/fePxUd//nNpLd/YMkmxeNneQuY9/zxgYAAAAAAIBVrTlwq6qdk7wryT2TfD/J/br7xDU0fft4vEVVPWTO9adkCL0uSvLu2QvdfVKSz4/F/zFnTock+dWx+NY5fb9tPD62qg6ec/2o8XhCd5+84h0AAAAAAADACtYUuFXVtTOEVw9I8oMkD+zuf19L2+7+bJJ3jMVjqupBS31W1eOS/PF47eXdfdacLo4ej4+sqpdU1Z5j+8MyrLbbM8nXk7x+TtvXJjl1rPOBsU2qas+qekmSRywbAwAAAAAAANalhsesrVKp6heS/NNYvDjJeQuqf6u7r7D9Y1XtleSjSe44nvphkmsnue5Y/kCSX+ruH60w/nOSPH8s/jjJhUn2GsvnJLlnd39phba3zbBd5H7jqfOT7JEhbOwkR3f3ixfczxVs2rSpN2/evHpFAAAAAAAAfqJU1QndvWn5+bVuKTlbb5ckBy54HbC8cXefn+G5b8/OsEVkJ7kkySeTPDXJw1YK28b2L8jwHLYPJvlehqDu60n+LMnhK4VtY9vPJzl8rPv1se25Y1/3XU/YBgAAAAAAAMutaYUbW1jhBgAAVw2HPvuDOeXFD97R0wAAAOAaZOoKNwAAAAAAAGAOgRsAAAAAAABMIHADAAAAAACACQRuAAAAAAAAMIHADQAAAAAAACYQuAEAAAAAAMAEAjcAAAAAAACYQOAGAAAAAAAAEwjcAAAAAAAAYAKBGwAAAAAAAEwgcAMAAAAAAIAJBG4AAAAAAAAwgcANAAAAAAAAJhC4AQAAAAAAwAQCNwAAAAAAAJhA4AYAAAAAAAATCNwAAAAAAABgAoEbAAAAAAAATCBwAwAAAAAAgAkEbgAAAAAAADCBwA0AAAAAAAAmELgBAAAAAADABAI3AAAAAAAAmEDgBgAAAAAAABMI3AAAAAAAAGACgRsAAAAAAABMIHADAAAAAACACQRuAAAAAAAAMIHADQAAAAAAACYQuAEAAAAAAMAEAjcAAAAAAACYQOAGAAAAAAAAEwjcAAAAAAAAYAKBGwAAAAAAAEwgcAMAAAAAAIAJBG4AAAAAAAAwgcANAAAAAAAAJhC4AQAAAAAAwAQCNwAAAAAAAJhA4AYAAAAAAAATCNwAAAAAAABgAoEbAAAAAAAATCBwAwAAAAAAgAkEbgAAAAAAADCBwA0AAAAAAAAmELgBAAAAAADABAI3AAAAAAAAmGDNgVtV7VlVD6uq51fVh6vqnKrq8XWLBe2Omam32uuv5rT/2BravWqVuR9UVa+oqq9V1cVVdWZVvb+q7r3W+wcAAAAAAIB5dlpH3XsnefdWjHFekjMXXL9OkuuN7/99Qb3zk1y04NpcVXWbJB9Nst9M3f2TPCTJg6vq6O5+8YJxAQAAAAAAYEXr3VLyrCQfSvK8JEespUF3H9ndB630SvKCseqlSd62oKtF/Rw9r0FV7ZrkfRnCts8mOby7906yb5I/TVJJXlhV91vLvQAAAAAAAMBy61nh9v7ufs9SoaoO3aA5PH48frC7z92gPpc8NcmNk1yQ5KHd/Z0k6e7zkxxVVTdL8otJXpTk+A0eGwAAAAAAgGuANa9w6+4fb/TgVXXbJLcdi8dsdP9JHjse37YUti3z0vF4h6q6+TYYHwAAAAAAgJ9w691ScqMtrW47O8mHN7LjqtozyR3H4nErVPtkhmfMJcMz6gAAAAAAAGBddljgVlU75Yor0C5bpclRVXVaVV1aVWdX1Ueq6mlVtcsK9W+Z4RltSXLivArdfXmSk8fiYeuZPwAAAAAAACQ7doXbA5Ncf3x/zBrq3yrJ9ZJcmGT/JPdK8uokn66qG82pf/DM+9MW9Lt07eAFdQAAAAAAAGCuHRm4LW0n+YXu/tyCeh9L8rgMgdiu3b1vkgOTHJ3kkiS3TvKhqtp5WbvdZ95ftKD/H47HPVaqUFVHVNXmqtp89tlnL+gKAAAAAACAa5odErhV1fWSPHQsHruobnc/t7vf3N1ndHeP587q7hcleeRY7VZJnrCt5tvdr+vuTd296YADDthWwwAAAAAAAHA1tKNWuP1Kkp2T/CjJW7a2k+7+YJKPj8WHLrt84cz7XRd0s9t4vGBr5wEAAAAAAMA1144K3Ja2k/y77j5rYl+fGo83XXZ+9rlthyxov3Tt9InzAAAAAAAA4BpouwduVXXLJHceiwu3k5zoK0l6fH+rFeZyrSQ3H4snbcO5AAAAAAAA8BNqR6xwW1rd9t0k79uA/u4yHr8xe7K7f5Bk81i874K2e4/vP7IBcwEAAAAAAOAaZrsGbuOKsl8fi3/d3ZeuUr9Wuf6AJL8wFj84p8rbxuNjq+rgOdePGo8ndPfJi8YCAAAAAACAedYVuFXV/kuvJPvOXNpn9toYrM1znyQ3GN+vZTvJZ1fVG6vqvlW158w8DqiqZyV513jq5CRvnNP+tUlOTbJnkg9U1WFj+z2r6iVJHjHWO3oNcwEAAAAAAIAr2Wmd9c9e4fwnlpVvkuSUOfWWtpP8cnd/eg3jXTfJE8dXV9X5GZ7Lts9MnS8meVh3X7K8cXdfVFUPz7Bd5B2SnDj2sUeGsLGTHN3dx69hLgAAAAAAAHAl6w3ctlpV7ZXkl8biWla3JcnfZJjjXZPcLMl+Sa6T5PQkn0vyziRvnRe2Lenuz1fV4Ul+P8lDMqywOzfJp5O8vLs9uw0AAAAAAICttq7ArbsXPlNtlbbnJ9ltnW1OTPKcrR1zpp8zkhw5vgAAAAAAAGDDrOsZbgAAAAAAAMAVCdwAAAAAAABgAoEbAAAAAAAATCBwAwAAAAAAgAkEbgAAAAAAADCBwA0AAAAAAAAmELgBAAAAAADABAI3AAAAAAAAmEDgBgAAAAAAABMI3AAAAAAAAGACgRsAAAAAAABMIHADAAAAAACACQRuAAAAAAAAMIHADQAAAAAAACYQuAEAAAAAAMAEAjcAAAAAAACYQOAGAAAAAAAAEwjcAAAAAAAAYAKBGwAAAAAAAEwgcAMAAAAAAIAJBG4AAAAAAAAwgcANAAAAAAAAJhC4AQAAAAAAwAQCNwAAAAAAAJhA4AYAAAAAAAATCNwAAAAAAABgAoEbAAAAAAAATCBwAwAAAAAAgAkEbgAAAAAAADCBwA0AAAAAAAAmELgBAAAAAADABAI3AAAAAAAAmEDgBgAAAAAAABMI3AAAAAAAAGACgRsAAAAAAABMIHADAAAAAACACQRuAAAAAAAAMIHADQAAAAAAACYQuAEAAAAAAMAEAjcAAAAAAACYQOAGAAAAAAAAEwjcAAAAAAAAYAKBGwAAAAAAAEwgcAMAAAAAAIAJBG4AAAAAAAAwgcANAAAAAAAAJhC4AQAAAAAAwARrDtyqas+qelhVPb+qPkCsMaUAACAASURBVFxV51RVj69brNK21/B61Cp9bKqqv66q06rq4qr6ZlW9oap+eg1z36uqXlBVX66qH1bVuVX1kdXGBAAAAAAAgNXstI66907y7onjnZPkxytcu3ilRlX1+CRvyDDfTnJ+khsmeVKSX6mqh3X3R1do+1NJPp7kJuOpC5LsleReSe5VVX/R3U/finsBAAAAAACAdW8peVaSDyV5XpIjtmK8O3X3QSu8PjCvQVXdJsnrM4Rtb01yYHfvk+TQJH+fZPckf1tVB8xpW0nemSFsOyXJ3bp7zyR7JnlWksuTPK2qnrIV9wIAAAAAAADrCtze390HdveDu/u5GcKu7eF/J7lOks1JHt/dZydJd5+a5BFJvpVknyTPntP24UnukiFY+6Xu/rex7cXd/dIkf7Y0RlXtvE3vAgAAAAAAgJ9Iaw7cunulrSC3maraJ8mDxuLLls+huy9I8pqx+KvjirZZjx2P/9Ddn5szxJ9k2KLyoAxbTAIAAAAAAMC6rHdLye3tv2RY3ZYkx69Q57jxeHCSWy67ds9lda6gu7+T5MSxKHADAAAAAABg3bZ34PaOqvpeVV1SVd+uqr+tqgcvqH/YeDyju89doc5Jc+qnqq6fZL+xeGJWttT+sAV1AAAAAAAAYK7tHbjdKcm1k1yW5AYZnsH2gap6xwrPUDt4PJ62UofdfVGS7y+rv/z9iu1nrh28oA4AAAAAAADMtb0Ct2OTPCDJvt29V3fvkWH7x78arz86yavmtNt9PF60Sv8/HI97zGm7Wvt5ba+gqo6oqs1Vtfnss89eZSoAAAAAAABck2yXwK27n9Ddx3X392fOfaW7/1uSl46nnlxVN98e81mv7n5dd2/q7k0HHHDAjp4OAAAAAAAAVyHbe0vJeZ6XYQVaJXnIsmsXjsddV+ljt/F4wZy2q7Wf1xYAAAAAAADWZIcHbt19YZIvjcWbLru89Hy1Q1ZqX1W7JtlnLJ4+p+3C9jPXTl9QBwAAAAAAAOba4YHbKk4ajwdV1X4r1DlsTv1099lJzhmLt1owxlL7kxbUAQAAAAAAgLl2eOBWVbsnOXwsfmPZ5X9Jctn4/j4rdHG/8Xhaki8vu/aP4/G+K4x9g2wJ4z6ylvkCAAAAAADArG0euFVVrVLlf2Z4xlon+dDshe4+b+bcM6vqCvMdw7rfHItv7+5e1vfbxuP9quq2c8Z+ZoZnx52eLeEcAAAAAAAArNm6Areq2n/plWTfmUv7zF5bFoy9o6r+qKo2VdXOM33dvKpen+T3xlPHdve8bR3/MMMqtzsnOWYcO1V1oyTvSnKjJN9P8sdz2r43yafG+3x3Vf3c2Pa6VfX/JXnG0hjdfel6PgsAAAAAAABIkp3WWf/sFc5/Yln5JklOGd8fkORRSY5O8uOqOi/JdZPsPlP/ndmyUu0KuvvzVfWUJG9I8htJfr2qzk+y91jlwiSPHJ/ZtrxtV9Wjknx8nNMnquqCJLtky72/prtfv8J9AQAAAAAAwELb4xluL0zyyiSfSXJWhqDtWhme1/b2JPfv7kd39yUrddDdxyb5+STvSHJmhi0ov5XkjUlu190fXdD220luN87jKxmCth9k2ELyl7v7aVNvEAAAAAAAgGuuda1w6+7Vnsc2r83xSY5fb7s5/WxO8pitbHt+kj8YXwAAAAAAALBhtscKNwAAAAAAAPiJJXADAAAAAACACQRuAAAAAAAAMIHADQAAAAAAACYQuAEAAAAAAMAEAjcAAAAAAACYQOAGAAAAAAAAEwjcAAAAAAAAYAKBGwAAAAAAAEwgcAMAAAAAAIAJBG4AAAAAAAAwgcANAAAAAAAAJhC4AQAAAAAAwAQCNwAAAAAAAJhA4AYAAAAAAAATCNwAAAAAAABgAoEbAAAAAAAATCBwAwAAAAAAgAkEbgAAAAAAADCBwA0AAAAAAAAmELgBAAAAAADABAI3AAAAAAAAmEDgBgAAAAAAABMI3AAAAAAAAGACgRsAAAAAAABMIHADAAAAAACACQRuAAAAAAAAMIHADQAAAAAAACYQuAEAAAAAAMAEAjcAAAAAAACYQOAGAAAAAAAAEwjcAAAAAAAAYAKBGwAAAAAAAEwgcAMAAAAAAIAJBG4AAAAAAAAwgcANAAAAAAAAJhC4AQAAAAAAwAQCNwAAAAAAAJhA4AYAAAAAAAATCNwAAAAAAABgAoEbAAAAAAAATCBwAwAAAAAAgAkEbgAAAAAAADCBwA0AAAAAAAAmELgBAAAAAADABAI3AAAAAAAAmEDgBgAAAAAAABOsOXCrqj2r6mFV9fyq+nBVnVNVPb5usaDdLlX1yKp6Q1V9oaouqKpLquqbVfV/q+oeq4z7sZlxVnq9apU+DqqqV1TV16rq4qo6s6reX1X3Xuv9AwAAAAAAwDw7raPuvZO8eyvGeH+S+8yUL0lyWZIbjq9frqpXdPczVunn/CQXLbg2V1XdJslHk+w3U3f/JA9J8uCqOrq7X7zqXQAAAAAAAMAc691S8qwkH0ryvCRHrLHNdZJ8Ncmzktyyu3fp7j2S/HSSvxnrHFlVT1+lnyO7+6AVXkfPa1BVuyZ5X4aw7bNJDu/uvZPsm+RPk1SSF1bV/dZ4LwAAAAAAAHAF61nh9v7ufs9SoaoOXWO7o5N8qrt/PHuyu79WVY/JEIbdK8lRSV69jvmsxVOT3DjJBUke2t3fGcc+P8lRVXWzJL+Y5EVJjt/gsQEAAAAAALgGWPMKt+WB2Tra/dtKbbu7k7xpLN6kqq63NWMs8Njx+LalsG2Zl47HO1TVzTd4bAAAAAAAAK4B1rul5LZw7sz7a29Up1W1Z5I7jsXjVqj2ySTnje/vvVFjAwAAAAAAcM1xVQjc7j4ez0xyzoJ6R1XVaVV1aVWdXVUfqaqnVdUuK9S/ZYZntCXJifMqdPflSU4ei4etd+IAAAAAAACwQwO3qrpBkt8ci8eMW0yu5FZJrpfkwiT7Z3ju26uTfLqqbjSn/sEz709b0O/StYMX1AEAAAAAAIC5dljgVlU7JXlrkj2SfDPJi1ao+rEkj8sQiO3a3fsmOTDJ0UkuSXLrJB+qqp2Xtdt95v1FC6byw/G4x4K5HlFVm6tq89lnn72gKwAAAAAAAK5pduQKt1dm2E7y0iS/1t3nzavU3c/t7jd39xlLK+C6+6zuflGSR47VbpXkCdtqot39uu7e1N2bDjjggG01DAAAAAAAAFdDOyRwq6oXZthK8sdJHtvd/7o1/XT3B5N8fCw+dNnlC2fe77qgm93G4wVbMwcAAAAAAACu2bZ74FZVf5Dk95N0kqd09zsndvmp8XjTZednn9t2yIL2S9dOnzgPAAAAAAAAroG2a+BWVb+b5AVj8cju/qttONxXMoR6ybDl5Lz5XCvJzcfiSdtwLgAAAAAAAPyE2m6BW1U9LcnLxuKzu/uVG9T1XcbjN2ZPdvcPkmwei/dd0Hbv8f1HNmg+AAAAAAAAXINsl8Ctqh6f5M/H4v/u7j9eY7ta5foDkvzCWPzgnCpvG4+PraqD51w/ajye0N0nr2VOAAAAAAAAMGtdgVtV7b/0SrLvzKV9Zq+NWzUutXlkkr9MUkle2t1/uI4hn11Vb6yq+1bVnjN9HlBVz0ryrvHUyUneOKf9a5OcmmTPJB+oqsPG9ntW1UuSPGKsd/Q65gQAAAAAAAD/aad11j97hfOfWFa+SZJTxvcvTXLt8f3jqupxC/p/RHf/20z5ukmeOL66qs7P8Fy2fWbqfDHJw7r7kuWddfdFVfXwDNtF3iHJiWMfe2QIGzvJ0d19/II5AQAAAAAAwIrWG7htjdlVdAeuUnfnZeW/yTDHuya5WZL9klwnyelJPpfknUneOi9sW9Ldn6+qw5P8fpKHJLlBknOTfDrJy7vbs9sAAAAAAADYausK3Lp74TPVVmhz6HrbzLQ9Mclztrb9TD9nJDlyfAEAAAAAAMCGWdcz3AAAAAAAAIArErgBAAAAAADABAI3AAAAAAAAmEDgBgAAAAAAABMI3AAAAAAAAGACgRsAAAAAAABMIHADAAAAAACACQRuAAAAAAAAMIHADQAAAAAAACYQuAEAAAAAAMAEAjcAAAAAAACYQOAGAAAAAAAAEwjcAAAAAAAAYAKBGwAAAAAAAEwgcAMAAAAAAIAJBG4AAAAAAAAwgcANAAAAAAAAJhC4AQAAAAAAwAQCNwAAAAAAAJhA4AYAAAAAAAATCNwAAAAAAABgAoEbAAAAAAAATCBwAwAAAAAAgAkEbgAAAAAAADCBwA0AAAAAAAAmELgBAAAAAADABAI3AAAAAAAAmEDgBgAAAAAAABMI3AAAAAAAAGACgRsAAAAAAABMIHADAAAAAACACQRuAAAAAAAAMIHADQAAAGCZQ5/9wR09BQAArkYEbgAAAAAAADCBwA0AAAAAAAAmELgBANdItokCAAAAYKMI3AAAAAAAAGACgRsAAAAAwE8Iu3kA7BgCNwAAAAAAAJhA4AYAAAAAAAATCNwAAAAAAABgAoEbAAAAAAAATCBwAwAAAACA/7+9e4+27qzrQ//9SYDcCZdIghVetKNCyICoQahS5d7KRVopKqADjtbUHnvAGyViO7wcBqLWC9h65FIUK9KjVuBgcICCClagRiRKgmiRiBoIgXDJmxuR/M4fa+6+m83e+917r9tca30+Y6wx11zzeebzzGc+87Lmb825AKYg4AYAAAAAAABTEHADAAAAAACAKQi4AQAAAAAAwBQE3AAAAAAAAGAKBw64VdVZVfV1VfV/V9VvVdVHq6qH1/0OkP/zquqSqnp7VX2iqm6oqj+pqudU1Z0OkP/iqvpvVXVNVd1SVR+sqpdX1T88QN6zq+r5VfXeqrqpqj5WVW+uqn950OUHAAAAAACA3ZxyiLSPSvKaoxRSVXdM8tokjxs++nSSzyS5aHg9paoe2d3H98j/jCQvH+rbST6V5AuTfFuSb6qqr+vut+yR9x8keWuS+w4fHU9ydpJHJnlkVf0/3f1/HmW5AAAAAACAzXHs0sty9Qsfv+xqMEKHfaTkR5K8IckPJ7nkEPmen0mw7ZYkz0xyepIzkjwxyfVJHpzkJbtlrKoHJnlZJsG2VyW5Z3efk+RYkt8e5vPfq+rcXfJWkl/PJNh2dZKv6u6zkpyV5N8luT3Jv6mqbz/EsgAAAAAAAMD/dpiA2+u7+57d/fju/qFMgl0nVVXnJXn2MPrc7n5ld3+mJ34zybcO0546BNd2+pEkd0xyeZJndPd1SdLdf53k65P8TZJzkly6S94nJXlIJoG1f9HdfzjkvaW7fyLJi7fKOMhjLQEAAAAAAGCnAwfcuvszRyzjyUnunOSTSV66y3xfl+QvklSSp22fVlXn5MRjKH9qZx2GR1D+/DD61OGOtu2ePgx/p7vfvUvd/mMmj6g8L5NHTAIAAAAAAMChHPaRkkfxiGH41u6+ZY80bxqGO4NeD8vk7rbtaXZ64zA8P8n99yj7jdlFd/9dkiv3KBsAAAAAAABOahEBtwuG4ZX7pLlqGN5/x11qW3k/3N0fO0ne7elTVZ+f5O6HKPuCfdIAAAAAAADArhYRcDt/GF6zT5qtaWcOrwPn7e6bk3xiR/qd7w9S9vn7pAEAAAAAAIBdLSLgdsYwvHmfNDdte7894HaQvNvz75b3oGWfuVeCqrqkqi6vqsuvu+66k1QFAAAAAACATbKIgNvK6+6XdvfF3X3xueeeu+zqAAAAAAAAMCKLCLjdOAxP2yfN6dveHz9k3u35d8t70LKP75MGAAAAAAAAdrWIgNvWf6Tda580W9OOd/cNh8lbVaclOWcY/dAueQ9a9of2SQMAAAAAAAC7WkTA7aph+IB90lwwDN+7R97zquruJ8m7PX26+7okHz1E2VftkwYAAAAAAAB2tYiA2+8Ow39SVafukeYxw/DNOz7/gyS3De8fvUfexw7Da/K5Abutsh+TXVTVF+REMG5n2QAAAAAAAHBSiwi4/UaSWzN57OO/2jmxqp6Y5EuSdJJXb5/W3Z9M8oZh9Huq6vN25D0jyXcMo6/u7t4x+18Zho+tqgftUrfvSVKZPE7yd3eZDgAAAAAAAPs6VMCtqu6x9Upy122Tztk+bXtgrLs/nORFw+iPV9W3VNUdhvk9LskvDNNe3d1/ukuxP5jJXW5fkeQXh7JTVffOJJh37ySfSPJju+R9XZJ3Dsv5mqp66JD3zlX1vUm+a6uM7v70YdoCAAAAAAAAkuSUQ6a/bo/P375j/L5Jrt42/u+TXJjkcUl+KcnLquozSU4fpv9RTtyp9lm6+4qq+vYkL0/yLUm+uao+leQuQ5Ibkzx5+M+2nXm7qv5lkrcOdXp7VR1PcmpOLPvPd/fL9lguAAAAAAAA2NciHimZ7r4tyRMzCaq9I5NHTHaSdyd5bpKHdfcN++R/ZZJ/nORXk1yb5LQkf5PkFUku6u637JP3b5NclOQFSf48k0DbDZk8QvIbuvvfTLt8AAAAAAAAbK5D3eHW3XXUgrr79iQvGV5HyX95km88Yt5PJfmB4QUAAAAAAAAzs5A73AAAAAAAAGBdCbgBAAAAAADAFATcAAAAAAAAYAoCbgAAAAAAADAFATcAAAAAAACYgoAbAAAb6dilly27CgAAAMCaEHADAAAAAACAKQi4AQAAAAAAwBQE3AAAAAAAAGAKAm4AAPvwP1/zoV0BAACAdSLgBgAAAAAAAFMQcAMAAAAAAIApCLgBAAAAAADAFATcAAAAAAAAYAoCbgCwIMcuvWzZVQCWwLYPAAAA60/ADQAAAAAAAKYg4AYAAAAAAABTEHADAAAAAACAKQi4AQAAAAAAwBQE3AAAAAAAAGAKAm4AAAAAAAAwBQE3AAAAAAAAmIKAGwAAAAAAAExBwA0AAAAAAACmIOAGAAAAAAAAUxBwAwAAAAAAgCkIuAEAAAAAAMAUBNwAAAAAAABgCgJuAAAAAAAAMAUBNwAAAAAAAJiCgBsAAAAAAABMQcANAAAAAAAApiDgBgAAAAAAAFMQcAMAAAAAAIApCLgBAAAAAMzIsUsvW3YVAFgCATcAAAAAAACYgoAbACvDrwQBAAAAgDEScAMAAAAAAIApCLgBAAAAAADAFATcAAAAmDmPggYAADaJgBsAAAAAAABMQcANAAAAAAAApiDgBgAAAAAAAFMQcAMAAAAAAIApCLgBAAAAAADAFATcAAAAAAAAYAoCbgAAAAAAADCFhQTcqqoP8fqaHXmPHTDfxSepw6Or6vVV9ZGquqWq3l9VL6qqe8536QEAAAAAAFhnpyyonGtPMv3sJKcl+XSS9xxxPrftNaGqfiDJ84fR25McT/JFSZ6V5KlV9cju3q9cAAAAAAAA2NVCAm7dfd5+06vq3UkelOQ3u/tjR53PHvN+XE4E234yyQ939w1V9YAkv5zkoiSvq6oLuvvWw84fAAAAAACAzbb0/3CrqosyCbYlySvnUMQLhuFruvv7uvuGJOnuK5M8MSfudrtkDmUDAAAAAACw5pYecEvyjGH4kSRvmOWMh7vYtoJ5P7Fzenf/bZJXD6NPn2XZAAAAAAAAbIalBtyq6pQkTxtGf6W7/37GRTxiGH4yyTv3SPPGYfgVVXXmjMsHAAAAAABgzS37DrevTfL5w/uTPk6yqt5eVZ+qqpur6gNV9ctV9bB9slwwDN/b3bfvkeaqrdknud+Bag0AAAAAAACDZQfcnjkMr+judx8g/UOTbAXOjmXyGMi3VdXPVFXtkv78YXjNPvPcPu38PVMBAAAAAADALpYWcKuquyV5wjC6391ttyT5uSRfneSs7j4nyelJvjzJ64c0z07y/bvkPWMY3rzP/G/a9n7XR0pW1SVVdXlVXX7dddftMysAAAAAAAA2zTLvcHtqkjsl+fskr9orUXd/uLu/s7vf1t3Hh8+6u9/V3V+X5NeGpM+rqnPmUdHufml3X9zdF5977rnzKAIAAAAAAIAVtcyA2zOG4W9190emmM9zh+EZSR61Y9qNw/C0ffKfvu398SnqAQAAAAAAwAZaSsCtqu6f5MHD6H6Pkzyp7v5Akq3nPH7Rjslb/892r31msX3ah6apC3Bwxy69bNlVAAAAAACAmVjWHW7PHIbX58T/sM3DVcPw/lW117JeMAw7yXvnWBcAAAAAAADW0MIDblV1hyTfPIy+urs/PeX87ptk64/VPrBj8u8Ow7vkxB11Oz12GL6zu2/cIw0AAAAAAADsahl3uD06Jx7jeNLHSVZVnSTJC4bhzUnesn1Cd1+V5Iph9Dm7zPteSZ46jL7qZHUBAAAAAACAnZYRcHvGMLyqu//oAOl/r6q+v6ouHO6OS018aVW9Jsk3Del+rLuv3yX/84bhk6vqx6vqrGEeF2TyOMuzkvxVkpcddYEAAAAAAADYXAsNuFXV2Un++TB60rvbBvfJ5C62P0tyc1V9NMmNSd61bV4/m+RHdsvc3W9I8h+G0eck+XhVfTLJlUm+LMlHkzypu2893NIAAGyeY5detuwqAAAAAIzOou9w+4YkpyW5PckvHzDPczK5++yKJNcnOXvI/74kr0jy0O5+Vnf3XjPo7ucneUySy5J8PMmdM7mr7cVJLuzu9xxpaQAAAAAAANh4pyyysO5+eZKXHzLPryX5tRmU/TtJfmfa+QAAAAAAAMB2y/gPNwAAVpxHSwIAAACcIOAGAAAAAAAAUxBwAwAAAAAAgCkIuAEAAAAAAMAUBNwAAAAAAABgCgJuAAAAAAAAMAUBNwAAAAAAAJiCgBsAAAAAAABMQcANAAAAAAAApiDgBgAAAAAAAFMQcAMAAAAAAIApCLgBAAAAAADAFATcAAAAAAAAYAoCbgAAAAAAADAFATcAAAAAAACYgoAbAAAAAAAATEHADQBgD8cuvWzZVQAAANh4vpsBq0DADQAAAAAAAKYg4AYAAAAAAABTEHADAAAAgBnw2DsA2FwCbgAAAAAAADAFATcAAAAAAACYgoAbAAAAAAAATEHADQAAAAAAAKYg4AYAAAAAAABTEHADAAAAAACAKQi4AQAAAAAAwBQE3AAAAObo2KWXLbsKAAAAzJmAG6wIF2oAAAAAAGCcBNwAAAAAAABgCgJuAAAAAAAAMAUBNwAAAAAAAJiCgBsAAAAAAABMQcANAAAAAAAApiDgBgAAAAAwhWOXXrbsKgCwZAJuAAAAAAAAMAUBNwAAAAAAAJiCgBsAAAAAAABMQcANAAAAAAAApiDgBgAAAAAAAFMQcAMAAAAAAIApCLgBAAAAAADAFATcAAAAAAAAYAoCbgAAAAAAADAFATcAAAAAAACYgoAbAMCGO3bpZcuuAgAcmeMYAABjIOAGAAAAAAAAU1hIwK2qnllVfZLX8X3yf15VXVJVb6+qT1TVDVX1J1X1nKq60wHKv7iq/ltVXVNVt1TVB6vq5VX1D2e7pAAAAAAAAGyaUxZc3m1Jrt9j2o27fVhVd0zy2iSPGz76dJLPJLloeD2lqh7Z3bsG7KrqGUlensmydpJPJfnCJN+W5Juq6uu6+y1HWxwAAAAAAAA23aIfKfmH3X3eHq8v3iPP8zMJtt2S5JlJTk9yRpInZhK8e3CSl+yWsaoemORlmQTbXpXknt19TpJjSX57mM9/r6pzZ7WAAAAAAAAAbJZR/4dbVZ2X5NnD6HO7+5Xd/Zme+M0k3zpMe+oQXNvpR5LcMcnlSZ7R3dclSXf/dZKvT/I3Sc5Jcuk8lwMAAAAAAID1NeqAW5InJ7lzkk8meenOid39uiR/kaSSPG37tKo6JyceQ/lT3f2ZHXmPJ/n5YfSpVVWzrToAAAAAAACbYOwBt0cMw7d29y17pHnTMHzkjs8flsndbdvT7PTGYXh+kvsfqYYAAAAAAABstEUH3B5QVVdW1c1VdUNVvaeqfrqq7rtH+guG4ZX7zPOqYXj/HXepbeX9cHd/7CR5t6cHAAA20LFLL1t2FQAAAFhRiw643SOTO8luSnJqkgck+a4kV1bV03ZJf/4wvGafeW5NO3N4HThvd9+c5BM70gMAAAAAAMCBLSrgdk2SH0xyYZJTu/vumQTHHp/JXWanJXllVX31jnxnDMOb95n3Tdvebw+4HSTv9vxn7pWgqi6pqsur6vLrrrvuJLMDAACOwh1mAAAArKqFBNy6+03d/SPdfWV3f3r47NbufkOSr0zyv5KckuSFi6jPYXX3S7v74u6++Nxzz112dZgTF3gAAAAAAICjWPQjJT9Hd38yyQuG0YdW1T22Tb5xGJ62zyxO3/b++CHzbs9/fN9UAAAAAAAAsIulB9wG7xyGleS+2z7f+v+1e+2Td2va8e6+4TB5q+q0JOcMox86WFUBAAAAWDRPpgEAxmwsAbe9XDUMH7BPmguG4Xv3yHteVd39JHm3pwcAAAAAAIADG0vA7SHb3l+97f3vDsN/UlWn7pH3McPwzTs+/4Mktw3vH71H3scOw2vyuQE7AI7IL08BAAAAgE0y94BbVdVJpp+d5NJh9H9293XbJv9Gklszeezjv9ol7xOTfEmSTvLq7dOG/4Z7wzD6PVX1eTvynpHkO4bRV3d3H2iBAAAAFsgPWQAAAMZvEXe43aeq3lFV31ZV9976sKruVFX/LMn/SPKPktye5Pu3Z+zuDyd50TD641X1LVV1hyH/45L8wjDt1d39p7uU/YOZ3OX2FUl+saruMeS9dybBvHsn+USSH5vNogIAAMBmEAwGAIATTllQOQ8ZXqmqW5LcmOTsJHccpt+U5Du6+y275P33SS5M8rgkv5TkZVX1mSSnD9P/KCfuVPss3X1FVX17kpcn+ZYk31xVn0pylyHJjUmevOOuOgAAAAAAADiwRdzhdm2SZyX51STvyyS4dpdheHkmd5dd0N3/dbfM3X1bkidmElR7RyaPmOwk707y3CQP6+4b9iq8u1+Z5B8P5V+b5LQkf5PkFUku2iPIBwAAHIE7XgAAYH/OmWE9zf0Ot+6+OcnP3dgJLQAAG1JJREFUDq+jzuP2JC8ZXkfJf3mSbzxq+QAAAADAyR279LJc/cLHL7saALBwi7jDDQAAAAAAANaWgBsAAAAAsCePvwOAkxNwA47MCTcAALAIvnsAADB2Am7AXPhCDAAAAADAphBwAwAApuKHNgDAOnKOA8BhCLgBAAArxwUwAABmyfklMC0BNwAAAAA2novtAMA0BNwAAAAAAABgCgJuAAAAAAAAMAUBNwAA1pLHQgEAAACLIuDGaLgoBsAWxwQAAAAAVomAGwAAAAAAAExBwA0AAAAAAACmIOAGAMyMR0FyVPoOAADAwfj+BOMk4AYAAAAAwK4EdwAORsANgH05sQYAAFhPi/y+57slAOtOwA0AAICFcLEVAGB5nIvBfAm4AQAAHIELFgAAAGwRcAMAAAAAAIApCLgBwMi4YwIAAAAAVouAGwAAAAAAAExBwA0AANaUO2YB4HM5PgIA8yDgBgAAAAAAK84PCmC5BNwAAAAAAABGRhB1tQi4AQAAAAAAwBQE3AAAAICF8mvtzWS9AwDrTMANAAAWxIVGAADYHM7/1591zHYCbgAAAAAAG2iewQKBCGDTCLgBsNac4AOryL4LAAB251wZGCsBN5bKARKwHwCWbdH7Ifs9AIDN4vwPYDMIuAEAcCQuHAAAAABMCLgBsHAu0gMAAAAA60TADQAA8GMIAObKcQYAWHcCbgAAALBAAg8ArArHLI5K32ETCbgBAKwwX2IAANg0zoEBGCMBN1gwJ4UAAADMiu+YJPoBs6U/ARyNgBuwMVbthHHV6gvAeDiGfC5tAgAAwDwJuAEAAADAIfkxB7DK7MNg9gTcAAAAYARc+AIAgNUl4AbA0rioBAAAADB+ruHAyQm4AcAKc8ILAAAAAMsn4AbA5xDEAQAYD+dme9M2q8c6AwDWlYAbAACj5+IcTMc2BAAAMF8CbjBDY7mQMZZ6AADrybkGAOB8AAA+m4AbsNZ8AQAAAAAAYN4E3ABgxQksAwAAsAp8fwXWmYAbG8HBHABYtjGfj4y5bgAArB/nnxyFfsPYLSTgVlX3rqrvqqrXV9UHq+rWqrqhqq6oqhdW1fl75DtWVX2A18UnKf/RQ9kfqapbqur9VfWiqrrnfJYYYLYWeULh5AUAAGB6vlttJusdYHPNPeBWVV+Y5OokP53kCUm+MMktSU5L8sAkz01yZVU94iSzunaf1237lP8DSX57KPvuSW5N8kVJnpXkz6rqwiMuGgD8b75UAQAAAMDmWsQdbncYhpcleUqSu3X3XZKcnuRxST6Q5K5JXltV5+01k+4+b5/XFbvlqarHJXn+MPqTSc4Zyr4wybuTnJvkdVV15xksJ6ycsQUIxlYfAGAcnCMAAAAwdosIuH08yZd29xO6+9e7++NJ0t2f7u7fyiTodkuSs5P86xmX/YJh+Jru/r7uvmEo+8okT0xyPJO73S6ZcbkAAAAAAKwxPwwDtpt7wK27P7nXHWjD9D9P8o5h9MtnVW5VPSDJg4bRn9il3L9N8uph9OmzKhdYjHU9oVnX5QIApuMcAQAAZsO5NfOyiDvcDuJjw/AO+6Y6nK3/hPtkknfukeaNw/ArqurMGZYNAACwFlyQ4KD0FYD5sp9dXdYdbIalB9yq6pQkXzWMvmefdG+vqk9V1c1V9YGq+uWqetg+s75gGL63u2/fI81VW7NPcr9DVRwAAADWmIuDMGFbAAAOYukBtyTfmeS8JLcneeU+6R46pEmSY5k8BvJtVfUzVVW7pD9/GF6zzzy3Tzt/z1QAAAAAALDG/MAAprPUgFtVPTDJjw6j/6m7r9qR5JYkP5fkq5Oc1d3nJDk9k/96e/2Q5tlJvn+X2Z8xDG/epwo3bXu/5yMlq+qSqrq8qi6/7rrr9pkdAADMhy+/nIw+wqbQ1wEAGKOlBdyq6vwkr01yWpI/TvLcnWm6+8Pd/Z3d/bbuPj581t39ru7+uiS/NiR9XlWdM6+6dvdLu/vi7r743HPPnVcxkMSXR+ZPH4Nxs40CAAAArJ6lBNyq6m5J3pTkvkn+Msnju/uWI8xqK0h3RpJH7Zh24zA8bZ/8p297f/wI5QNrxoVuAJg/x1sAWC7HYoDNYH+/WAsPuFXVXZK8McmFST6Y5NHdfe1R5tXdH0iy9YzHL9oxeev/2e61zyy2T/vQUeoArCcHo81ifQNsLscAGD/bKQAAq2ChAbeqOiPJG5JcnOTDmQTbPjin4rb+D+7+VbXXcl4wDDvJe+dUD2DNuQAwLtYHAMCE8yIAAFichQXcquq0JK9P8pVJPpZJsO0vp5znfZNs/anaB3ZM/t1heJckD95jFo8dhu/s7hv3SAMAwAEc5MKui7+rw7oCAACAg1tIwK2q7pTkN5I8Isknkjy2u688QL46SZIXDMObk7xl+4TuvirJFcPoc3aZ972SPHUYfdXJ6gIAAACwrvzQgk2k3wPzYN+yueYecKuqOyT5lST/LMkNSb62u991wOy/V1XfX1UXDvNJTXxpVb0myTcN6X6su6/fJf/zhuGTq+rHq+qsYR4XZHK33VlJ/irJy460cMDcOUABh2Gfsd6sXwAAAGCsFnGH21clefLw/o5JXltVH97j9Uc78t4nk7vY/izJzVX10SQ3JnlXkn8+pPnZJD+yW8Hd/YYk/2EYfU6Sj1fVJ5NcmeTLknw0yZO6+9aZLCkAjIjgBAAAcFi+R8Di2N5gvSwi4La9jFOT3HOf17k78j4nk7vPrkhyfZKzk9ye5H1JXpHkod39rO7uvQrv7ucneUySy5J8PMmdM7mr7cVJLuzu90y5fIyEAxQAAAAAALAMcw+4dffvdXcd8HVsR95f6+5Luvui7j6vu+/U3Wd29/26+9u6+50HrMPvdPcTuvvc7j61u7+4u5/d3dfOZaEBAIBR2uQfaW3yssOqs/3C+rJ9w8nZTlgVi7jDDTaaAwIAy+D4A7AY9rcAHJRjBsB6E3ADAGBluEgBAMAYOC+FcbAtMiYCbgBztN9B3wkBAMB8Od+a0A4wbscuvcx2uqGs9/nSvsCiCbjBhnCSsV4Osz5Xcd2vYp2BCdsvAMBnc350cNoKgFUm4AYAwCht+gWXTV9+gFmxPz0c7cVe9I1xsl5Wl3W33qzfzSTgxtrZlJ3ZpiwnAADAIqz7d6xVXb5VrTfzsa79YVbLta7tA7AqBNwA1piT7fVhXS6GdgZgi2MCi6CfAQCz4JxiHATcgF3ZSQMAOCdaNu3P2OiTME62TQDGQMAN4JCcyLOp9H2AzWK/DwAsknMPYNUJuLExHLQXT5sDAMvgHIQxW/X+uer1B2DC/vzktBG70S/Yj4AbLIAdMcDmcgxgTPRHgPGzr94c1jXszfYBrCIBNwBGbRVOslehjqw//RBgeeyD2TT6PADA5xJwA2bCFy4AAACA2XGtZTm0O3BUAm4AAPGlCgCYLecWh6fNAIBVJuAGOzjBZ1Po62yyw/Z/2wswZvZRADB/jrew+mzHzJuAG0xh3XfS6758i7RJbblJy7rK5r2e9APWkX59NNqNTWcbYNPZBgCATSHgBkvkiwezpk8BsCocs07QFgAwf463AMybgBsAh7ZqX1RWrb4AAJvMuRurQD8dP+tovFZ93ax6/Xdat+Xh4Kz72RNwY+PZsQDA+Dg+A8zGzv2p/SsA68DxbP60MRyegBvAnDlBgfk6yDZmO9zdItvFOgAAgNlwbr1aZrG+dpuHfrCZrPdxE3CDGbPTGw/rAtiNfQOMj+2SVaGvzo+2ZTf6BfvRPwAYGwE3YKmcILNp9Pn1b4N1X77dbOIys1n0cdgctnc2lb4PsH7s2xdPwA0AgKXxBQAAWBfOa2A1LHtbXXb5wPwIuAEwN04iWWX6LwCbZNrjnuPmCccuvUx7MDf6FsDqOuw+3D5/9Qi4MQp2HsBO9gurY5brynpn0+jzALA4jrswLvPaJm3rbAL9fJwE3IBRcJA4OG0FwKZzLFwv1icAnOC4yGHpMzAeAm6sJAcSgOWxD2bMPKKDMdPfgGnYh8D8bNr2tWnLuwjaFEgE3NgADnge97aurAvGTh9lVW313YP2YX19vKybxdHWAACss2We7zrXXh0CbrBB7JxhdmxPzNPY+tfY6jONdVqWRdJucDTz3nZsm8uzqm1/lHqv6rLCothG4HDGts2MqT5jqgtHI+AGe1i1Hdwi6jttGavWpqtAm3IyJ+sj+hDzMk3f0i9ZhFXvZ6tef2Bc7FOARbCvmZ42hHETcANgZpz4AZti5/7O/m/5rAMAAJwTAssk4AawZo56cumkdLXbYJXrDtPS/xmjg/TLVeq786jrKi3/MqxK+6xKPVm8sfaNsdZrE1kX01uFNlyFOgLMioAbc7XXQdXBFj6bbQLWi22aedPHYJxsmwCLZb+7eNqc3egXe1uFvwFidgTcYMWNYYc6hjpwONYZADAt5xMc1FH6iv61ONoaTrA9ALNkn7J5BNyAteSAxl42tW+s23KPbXnGVp91M+b2HXPdVsH29tOWR6PdWGWb9n+Y6758sM5svyyCfsYi6W/zIeAGjNJBL8D5vzIYD49JAJgv+0AAgNXgvA02k4AbAByQE2aWTVDz8PwwY7Vt2nqY1fJuzWfT2g+Aw3GcGJcxrI8x1GGVaC9gJwE3Vpb/AYDlWub2JOgwsQp13HSrvI5Wue4shz7DOtGf4WDWaVtZp2Vh9e3WH/XR1WcdMhb64vwIuLGW7DSAnRa9X5hFefZlh6O95m9ebWzdATA2q3BsWoU6srexr7+x1w/W1di3vYPWb+zLsc60/XIJuAErz4EEWKSx73PGXj9gc7fTTV1uGCOPXF68ZbTdppQ5L+u0LCezScvKcvjxJptCwI2NNeYd8pjrNq11XrZlm0fbHmSe1imsjnXbXj1elnlYx3U+5mVaxTvQgdVj2wfYPPb9LIOAG6O2fcdoJwmradW23VWr7yrZr21n2e7WIcDe/JgGOCr7htnQjgDLZT/MPAm4AXAgTkhgeWx/e9M2wG5Wad+wSnWFdTX2/19e5f3EKtd9rDa9TTd9+YFxE3Bj9KY5kDoIL5ZHe8HBrFI/XqW6rivrAA5ujNvLGOu0Zcx1WyTtsDjaer6072c7dull2oS1sKx+fNBybWfrwXqE2RBwgzUz7QHSAZbD0F8A+4HFG0ubj6UeR7HKdZ+3dW6bgzzaeJ2XH4DpOU7AONgWGSsBNzgEO3OWQb+br2X+l81Y1+0Y6zXGOgHsZRX2Wcus41HKHkObjv2Rc2OyKsvpDqijGWubjbVeY7LINppHWdbxbGjHzWS9awMWQ8CNlWdnOZ0xtt8Y68T60c+WzzoYr3W/W9ojkJkX6301rNJ6GnNdx1y3eVj08h40GLhp64H506c4iDH2kzHVaUx1WQWb1l6btrybZmMCblV1XlW9qKreX1W3VNW1VfX6qnrUsuvG8tjBTWgHxkJfPGEZbbHs/waY9X92jr0/jb1+i7CMi5djmMcYbF+OdVkm2GSr9kOJZZ7nzOPxoY4vh7dpyztP69qWq7ZfG2sdNsU6tvWq3iW6jusCDmMjAm5V9cAk70nyrCRflOTWJPdI8oQkv11Vly6xeszQ2HfqY6/fqlvH9l2lZZpVXVdpmWfhMMu77IAYy2U9AMu2yGOWfd56GMN6HEMdDuOo9V215TyIky3TOi7zQdnHwvKNeTsac92226ueq1J/2M3aB9yq6rQk/1+Suyf5kyQXdvddktw1yU8mqSQvqKrHLq+WMH5+Cc+sreqvtVaBdmAnfeJgtBPT0H8WZ9PbetOXH2CVrNp/ps5jHmMo3392rodV+B9g/Yy1D7gl+ddJ7pPkeJIndveVSdLdn+ru70vy2kyCbj+6vCrC51qVHfRBH+O2zICdW+ant+7LN2/abz60K3vRN8Zh1sf+TV2vm7rcrJ8xn5PbznanXdiysy/Mu2/M49GvBymP6a1aW/px+cHsdyeadpudRe9rmY9NCLg9fRj+Snf/3S7Tf2IYfllVfcmC6rTRNmVnMY//BLDjnS8XA+drU+5oG2OdmL1lf/lflX62KvVcd4tcD5u4zld9mcdS/4PWY57nw6vWFsu0CnWcpWmPx2Nrr7HVZ2zWsX3WcZn2s6mPIz3oD6LnXf66tu8qWKe2P+yyzONRu+vUnizOWgfcquqsJF8+jL5xj2TvSPLJ4f2j5l4p1oZfwSye/686ulW4aH/UL0V7/aJqmmUbU7ss0ya0wyr8h9462NS2O2z/Ouq+envedWjrWS/DKrbJKtb5ZJbxw6ZZlbnsoMphvnfM87i27B+bTJuW+Vu19bHMgP0894mrth4WZZ3OlY5qUwORi+SHxtOlnZVV+D6xiPNGlmOtA25J7p/J4yKT5MrdEnT37UneN4xesIhKbapZ/Rp1bDuOWZ4oz7JNVum27lX6ZfK0gdZNPcmfxy+NZmEs890v/Tz2lbvtj5d9MXHZZR3VLH98scj+eJR6j/FC6xjmu1sZh23fMfX1dbgQM+vzqcOWs8htalbHj1meN87iBy/TrsPDbHfTlLUK28NRjfV4P+YLfAftU2P7Bf06BG3GXtdpvjcussxF2bmtHGR/ug4X7LfPc8zr52Rm+f1nv3kvYr5j+x60aJuy3cGirXvA7fxt76/ZJ93WtPP3ScMUVuXi2V4nP7P4Qj2rk+x5X5yZdXmzuOgya4dp12naapG/cly1C7rLMs06OcqF9EVfFFrGxbHD9r1ZbBdj2VaOejF72n3FQcubRZqxWlabzbP8WZQ9r741tosps6zPUfdJ87x4Opb22e/caFEXaGa1XS5rm5138H0e5w7zCL6s4gW7w7TDNNv2vI7XW9vpfucfh5nXNBYRAJ+XnW24/fN5lzlN/r0+m+f1k5OVcdDlGsv1jsMc5+bZf+dxfjq27WzLMs9bD7ONL6r9Zl2neX8Pn2Y/eZD9x2HqcpQ6HKVeszav6xAne3+U+bK/6u5l12FuquppSV41jN6xu/9+j3SvSvK0JG/q7n+6y/RLklwyjH5JTtwRBwAAAAAAwOa4T3efu/PDU5ZRk1XT3S9N8tJl1wMAAAAAAIDxWfdHSt647f1p+6Q7fRgen2NdAAAAAAAAWEPrHnDb/r9t99on3da0D82xLgAAAAAAAKyhdQ+4/XmSrT+pe8BuCarq8zL5X7YkuWoRlQIAAAAAAGB9rHXArbtvSHL5MPqYPZI9JMldhvdvnnulAAAAjqiqfqiquqp+cZdpVw/THr74ms1HVT18WKarj5h/7doEAAAYp7UOuA1+ZRg+varO32X69w3DP+7u9y2oTgAAAGutqi4aAoTPXHZdAAAA5m0TAm4vSfLXSc5K8ptVdUGSVNVZVfXjSb5+SPe8JdUPAABgFt6f5H1Jblp2RQYXJfnBJM+cYh43ZbJM759FhQAAAObllGVXYN66++aqelImj4v8siRXVtWnkpyZScCxkzyvu9+0xGoCAABMpbsftew6zFp3/88k91t2PQAAAE5mE+5wS3dfkeTCJC9O8ldJ7pzkY0kuS/KY7n7hEqsHAAAAAADACtuIgFuSdPeHu/vZ3f3F3X1qd39+dz+hu9+87LoBAACbq6ruX1U/X1V/UVU3VdUnqurPqurFVfXlh5jP1VXVVfXwPabfqar+bVW9raqur6pbq+qvq+oVVXX/PfL84jDPH6qqO1TVd1XVFUM9r6+q36yqi3fJ10l+YRj9mmEe21+71nGX+Tx8SH/1PmmeXlXvqKrjQ53eUlWPP8j8AQAAZmXtHykJAAAwVlX1fyX56SR3GD66MZPH3l84vB6Y5OEzKOf8JL+V5EHDR7cPZd07yf+R5KlV9fTu/o09ZnFKJk8I+adJbktya5K7Jnl8kkdV1SO7++3b0l+b5LQkZw/pr98xv09Pu0xJUlX/Kcl3DqO3D2U9PMkjqurZsygDAADgIDbmDjcAAIAxqaqnZPLY+zsk+fUkF3T3md191yR3T/LNSf54BuXcMcnrMgm2vTnJVyY5tbvPTnKvJD+T5NQk/7WqvniP2Xxnkgcn+cYkZ3b3WcP83jPkfdH2xN19XpKtgNcfdvd5O15/OIPlenpOBNv+Y5K7D213fpJfGj47d9pyAAAADkLADQAAYMGGINhPD6Ov7u6ndPd7t6Z39/Xd/aru/t4ZFPeMTIJlb0vytd399u6+bSjnQ9393UlekuT0JN+9xzzOSfKk7v7V7v70kPdPkzxzmP7gqrr3DOp6IFVVSX54GH1ldz+nuz8x1OvaoV6/n8kyAQAAzJ2AGwAAwOI9KskXJPlMkufMuaxnDMMXbQXadvGqYfiYPaa/rbv/YOeH3f3HSf52GL3w6FU8tIuSbN2N96M7J3Z3J3nBAusDAABsOP/hBgAAsHgPHYZXdPffzauQqjolyVcMoy+pqv+8R9Kt/5D7wj2m/9E+xfxdkn+QyX+6LcqXDcNru/t9e6T5wyR/H997AQCABfDFAwAAYPHuOQw/OOdy7pbkTsP7ux8g/Wl7fH7DPnluGYZ3PGilZmDrv9mu2StBd99aVR9Nct5iqgQAAGwyj5QEAABYX9u/831pd9fJXkurKQAAwAoTcAMAAFi8a4fhfeZczscy+Z+4JLn3nMtapOuG4b32SlBVd0pyj8VUBwAA2HQCbgAAAIv3jmH4wKr6gnkV0t23Jbl8GP3aeZWzh9uH4TzumnvXMLxnVf2jPdJ8ZfyNAgAAsCACbgAAAIv35iR/l+QOSX5izmX94jB8ZlU9aL+EVXXXGZb7qWF4zgznueXdSf7X8P65OydWVSW5dA7lAgAA7ErADQAAYMGGO8++dxh9alX9alXdb2t6Vd2tqr69ql48g+L+SyZ31J2a5C3DfM/eVtZ5VfX0qvr9JM+eQXlbrhyGF1TVQ2Y433R3J/mhYfRbq+rHquqcJKmqeyZ5RZJHJrlpluUCAADsRcANAABgCbr7/80k6HZ7kqckeW9V3VBVH8/kv9demuSBMyjntiRPSvI/ktxtmO/Hq+pjVXU8yYeS/HKSr07S05a3rdy/TPLWTB7r+I6hvKuH10NnMP9XJfnPw+i/S/LRqro+k+V5ZpLvy4n/egMAAJgrATcAAIAl6e6fSvKlSX4hydVJ7phJ0OtPk7woyXfPqJyPJPmaJE9P8oZMAlFnDZP/PMkvJfmGJC+cRXnbfH2Sn0vygSRnJrnP8Dp1FjPv7n+b5JuTvDPJrZn8X9zvJ3lCd8/i7kAAAIADqcmTOAAAAAAAAICjcIcbAAAAAAAATEHADQAAAAAAAKYg4AYAAAAAAABTEHADAAAAAACAKQi4AQAAAAAAwBQE3AAAAAAAAGAKAm4AAAAAAAAwBQE3AAAAAAAAmIKAGwAAAAAAAExBwA0AAAAAAACm8P8DwkjYA+2hmj0AAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 2160x720 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "PLOTFILENAME = \"Diary.png\"\n",
    "\n",
    "def showNumberOfDiaryEntriesPerClient():\n",
    "    groups = pd.DataFrame(dataListAnonymized).groupby(CLIENT).groups\n",
    "    x = groups.keys()\n",
    "    y = [len(groups[client]) for client in x]\n",
    "\n",
    "    font = {\"size\":24}\n",
    "    matplotlib.rc(\"font\",**font)\n",
    "    plt.figure(figsize=(30,10))\n",
    "    plt.bar(x,y)\n",
    "    plt.title(\"diary entries per client\")\n",
    "    plt.xlabel(\"client id\")\n",
    "    plt.tick_params(axis='x',which='both',bottom=False,labelbottom=False)\n",
    "    plt.savefig(PLOTFILENAME)\n",
    "    plt.show()\n",
    "    \n",
    "showNumberOfDiaryEntriesPerClient()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1245 398 854 391\n"
     ]
    }
   ],
   "source": [
    "valueFrequencies = showValueFrequencies(pd.DataFrame(dataListAnonymized))\n",
    "print(sum([valueFrequencies[k] for k in list(valueFrequencies.keys())]),\n",
    "      sum([valueFrequencies[k] for k in list(valueFrequencies.keys()) if k < 10]),\n",
    "      sum([valueFrequencies[k] for k in list(valueFrequencies.keys()) if k < 100]),\n",
    "      sum([valueFrequencies[k] for k in list(valueFrequencies.keys()) if k >= 100]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 1983-1245 = 738 empty diaries\n",
    "* 398 more diaries with fewer than 10 entries\n",
    "* 854-398 = 456 more diaries with fewer than 100 entries\n",
    "* 391 diaries with 100 entries or more (max 1961)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'DiaryEntry': 122330,\n",
       " 'Date': 122330,\n",
       " 'Time': 122330,\n",
       " 'Urge': 122330,\n",
       " 'StandardUnits': 122330,\n",
       " 'Quantity': 82118,\n",
       " 'MeasurementUnitName': 72465,\n",
       " 'Snapshot': 18090,\n",
       " 'Situation': 18090,\n",
       " 'Location': 18090,\n",
       " 'Companion': 18090,\n",
       " 'Activity': 18090,\n",
       " 'Occasion': 18090,\n",
       " 'Thoughts': 18090,\n",
       " 'Feeling': 18090,\n",
       " 'Type': 18090,\n",
       " 'Emotion': 18090,\n",
       " 'Description': 18090,\n",
       " 'Behavior': 18090,\n",
       " 'BehaviorDetails': 18090,\n",
       " 'DayTarget': 9653,\n",
       " 'dayOfWeek': 9653,\n",
       " 'DiaryEntries': 1983,\n",
       " 'Targets': 1983,\n",
       " 'DateLastOpenedByClient': 1983,\n",
       " 'Target': 1379,\n",
       " 'StartDate': 1379,\n",
       " 'WeekTargetQuantity': 1379}"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "QUERYDIARY = \"./Diary\"\n",
    "\n",
    "def showDiaryTextFieldFrequencies():\n",
    "    inFileNames = sorted(os.listdir(DATADIR))\n",
    "    tags= {}\n",
    "    for inFileName in inFileNames:\n",
    "        if re.search(FILEPATTERN,inFileName):\n",
    "            root = readGzippedXmlFile(DATADIR+inFileName)\n",
    "            for section in root.findall(QUERYDIARY):\n",
    "                for tag in section.findall(\".//*\"):\n",
    "                    if not tag.tag in tags: tags[tag.tag] = 0\n",
    "                    tags[tag.tag] += 1\n",
    "    return(tags)\n",
    "\n",
    "tags = showDiaryTextFieldFrequencies()\n",
    "{tag:tags[tag] for tag in sorted(tags.keys(),key=lambda t:tags[t],reverse=True)}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Process tag Messages (emails.csv)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "BODY = \"Body\"\n",
    "DATADIRANONYMIZED = \"../usb/releases/20200320/\"\n",
    "DATESENT = \"DateSent\"\n",
    "OUTFILENAME = \"emails.csv.gz\"\n",
    "QUERYMESSAGES = \"./Messages\"\n",
    "QUERYMESSAGESENTRY = \"./Message\"\n",
    "UNKNOWN = \"-\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [],
   "source": [
    "def readMessages():\n",
    "    inFileNames = sorted(os.listdir(DATADIRANONYMIZED))\n",
    "    dataList = []\n",
    "    for inFileName in inFileNames:\n",
    "        if re.search(FILEPATTERN,inFileName):\n",
    "            root = readGzippedXmlFile(DATADIRANONYMIZED+inFileName)\n",
    "            messages = []\n",
    "            for section in root.findall(QUERYMESSAGES):\n",
    "                for message in section.findall(QUERYMESSAGESENTRY):\n",
    "                    clientDictData = {CLIENT:fileNameToId(inFileName)}\n",
    "                    messagesDict = getAllTextFields(message)\n",
    "                    clientDictData.update(messagesDict)\n",
    "                    messages.append(clientDictData)\n",
    "            dataList.extend([message for message in sorted(messages,key=lambda m:m[DATESENT])])\n",
    "    return(dataList)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`getAllTextFields` can be found in the Diary code while `saveAnswerDataDf` is in the Intake code"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of entries in list: 45469\n"
     ]
    }
   ],
   "source": [
    "dataListAnonymized = readMessages()\n",
    "print(f\"Number of entries in list: {len(dataListAnonymized)}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAABs0AAAJlCAYAAACR2byAAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAABJ1UlEQVR4nO3debg8Z10n7M+XhISsEEiQoMIPGDUmyGKiBhAkbDqsAi84wMyIziTqixsiElzGuKGMuKC4THgHQQVmggoKQUE2RQE1LFHCIiMEHAJJCJDkRxYCed4/uo5pOt19us/pPqe7676v61zV1VVP1VNrd59PPVXVWgsAAAAAAAD02S32uwIAAAAAAACw34RmAAAAAAAA9J7QDAAAAAAAgN4TmgEAAAAAANB7QjMAAAAAAAB6T2gGAAAAAABA7wnNAAAAYIeq6sVV1aqqTRh+ztbwqjqwx9UDAADmIDQDAAAAAACg94RmAAAAQJKkqp461DLugftdHwAA2EtCMwAAAFiS1to5rbXq/i7e7/oAAACTCc0AAAAAAADoPaEZAAAAAAAAvSc0AwCANTTuuUNV9ZSqelNVXVZVn6uqf6qqZ1bVESNlH1xVr6qqf62q66rqI1X1a1V12xnnfeeq+qWquqCqrqiqz1fVJ6rqtVX1nVV16DblT6mqF1TVP1bVVVV1Q1VdWlXvrao/qaqnVdWXL6HsV1XV07t6fqxb9mu79fDKqvqOqjpkxnVwz6p6SVf2+qq6pKpeU1WP7IbP/Fyobpl+o9ten+mm93+r6o+r6turqrYpf5+qelFVfaCqDg5tj3+sqpdV1XfPum3HTPvFW8vR9R9bVf+tqi6sqiur6uqqeldVnT26n02Z5o73nzH1Oaqqfqyq/q6qPtUN+/WdLGs3vftU1e9W1UVV9dlu//hwd1w9s6rusoNpnjO0LxzYZtwd7wtV9ZZuHhd3/UdX1bOr6t3dsXKw224/VVVHjyn/wG69/t7Q228eqvvW34vnXQcAALAuqrW233UAAADmVFVPzU3/3H5oku9N8vgJo78lySOSXJvkvyf50QnjfSjJN7fWLpsy32ckeU6Sw6ZU751JHtNa+/iY8v9vkt9Isl049VOttZ9fYNlvT/LKbcolyV8neWxr7dOTRqiqpyX59SSTwp3fSfL3uWn7nNFae8uY6dwig+3x9Ey/oPF1Sb6jtXblmGn8YpKzp5Td8p9aa384w3ij039xku/seu+S5C+T/LsJo38wyUNba/86ZXq73X+G63PXDNbNV42M9vzW2g9Pmf64eh2d5H8meeI2o17YWrvXpDq11m4WalXVOUl+uuu9y7jnmi1oX3hLkm9J8tEkD07y57n5utnyj0ke2Fr7zFD5ByZ585R5b3lJa+2pM4wHAABrZ+oVoAAAwFr4uSSnJ3lFkj9I8vEkd07y7CTfkOSBSZ6Z5OoMArM3JDk3yb8kuX2SH07yrRn8g/1XkvyncTMZ+ef/h5P8VpL3J7k0yYkZhHbfmeTUJH9RVae31j43VP7uuSn0uiLJ7yZ5a5LLk9wyyZ26+j56zLx3XLZzaJKDSV6bQTDwwSSfTXLbDMKXM7vyD0jyh0kePmEdPCbJC7re67s6vbab9skZrMvvS3LvCfUY9qLcFABdmME2+VC3fHfOYDs8NoNt80dV9a2ttRuH6vLvc1Ng9q9JfjuDwOlTSW6VQch1nySPmaEuszgvyd2SvCTJy7v53C3J9ye5f5KvSfL6qrp3a+260cK73X/GeGUG2+6FSf4kyWVJviLbh6qj9TosgzDqvt1bH8tgXf5dkisz2Ee+PoN962YttBZkV/vCiCOTnJ/ky5M8L8lfJPlMBtvqWRms33tkcKx/91C5f0jydRnsL1uh83d37w/7TAAAYENpaQYAAGtopKVZkpzdWnvuyDhHJ7kog0DpqiSHJ3lha+0HRsY7NMnbMgiNbkhyx9bap0bGuV8GIVUl+c0kP9Ja+8KYej02yR934/1ka+0Xhob9bJKf6nrv1Vq7cMry3Xa4tdduynbv3SHJNa21q6aUG57HA1prbx0ZflgGYc+XJ/l8kge31v5mZJxbJnl1BuHGlpu1NKuqJyV5Wdf7rCS/3Mb8OKuqH8qgVVuS/MfW2kuHhv1+BmHKNUm+qrV2yYTlOiTJMa21z45d8ClGWnYlyX9trf3PkXEqg6Dxyd1b57TWfmZknF3vP2Pq05J8e2vtz+ZdrpFp/kKSH+96X5vkCa21ayaMe6fW2scm1WknLc0WsS90w9+SQUuzZHC8P2D0OKmqIzMIVk/KYB++Y2vtipFxnpptWkkCAMCm8kwzAABYf/8wGpglSWvtYJIXd73HZtAq6xljxvtCBi23kkGrrfuMmcdPZBBkvD/J08cFHt20XplBq58kOWtk8B267memhV7ddEZvj7ibsmmtfXJaYNb5mQxaTyXjb3X5mAwCsyT5zdHArJvPDUn+SwaBxDRb4dxfttb++7iQpJve85Nc0PVOWp8fnBSYddP44k4CszH+YjQw66bfMmhdtxW+fN+Y55ItYv8Z9QcLCMyOTbIVIl+S5EmTArOufh+bNGwXFrEvjPpv446Tbtl+s+s9LDe1rgMAACI0AwCATfDyKcOG/3H+J621SWHOe4Ze33V4QNdi7aFd7ytaa1/cpj5v6bp3qqqvGHp/6xlVx1XV47aZxqjdlL2Zqjqsqr6iqr62qu7e3f7xa5P8326UcbdXfMjQ65dMmnb3LK7XT5n3Sd28kunbbstfdd3Tu1ZjW7bWySlVdfoM09mtmwVmW7pA8ryu98syuP1fkoXuP6P+YJvpzOKMJMd0r8+dIVhdqAXuC6NeOmXY3w+9vuvEsQAAoIeEZgAAsP4+OGXYZ3cw3rEjw74+Nz0P+b9VVZv2l5tasiSDZ1VteWkGzwFLkj+uqrdU1TOq6vSqOnxK3XZbNsmgVVFV/WRVvSfJ5zJ4Dtj7kvzT0N+9utGPHzOJr+u612Zw28tpRp8DNewbh16/aIb1udU68LAMnq+15fcyuEXhYUn+pqrOr6qnVdW9x7T0WoS/m2P4PYZeL2r/GfWemWs+2alDr9+8gOnNa1H7wrDLR2+vOmK4JebosQ4AAL0mNAMAgPU38XZySW7cwXijLVhuP3eNbnLk1ovW2v9J8tgkn+ze+pYkz0vy9iRXVtWbqup7quqI0YnspmySdC3J3p/k55LcMzeFONvWe8hWSPGp1tqNY4YPu3TKsEWtz79O8t1Jrsxgmz08yQuSvCvJZ7oQ7SkLDNAu22b4J4de327o9UKWd4zP7GK6W04Yej3xFpdLtIx1M+04T6Yf6wAA0GvLuPoQAADYLMO/G34qyavmKPuR4Z7W2p9X1V0zCMC+Lck3J7lLksMzuFXeGUl+vKoe01p7zyLKVtUtk/xRkjt2b/1Bkv+VQWuxy5JcvxWCVdVfJ7l/Bs/f2o1p5YfX53dnequ0UV8S7LTWXlxVr0ryhAxugXi/DJbz6AxCtIcneVZVPaq19tE55jPO2GdtDZm0zAvbf76kMtvf5nFe2y3fMixsXwAAAHZPaAYAAGzn8qHXt2qtvXc3E2utXZvkZd1fqurEDAKfMzMIwu6U5FVV9dWjz2DbYdkzknxN9/qXWmvPnlK946YM27qt3fFVdYttWpudMGXY8Po8ZAHr87NJXtj9parukuRbk3xfBrdJ/LoMQsL77GY+GTyr7F+nDB9uNXXF0OuF7j8LNly3L0/yL/s4/13vCwAAwO64PSMAALCdd+emW7p926In3lr7RGvt95M8IMn53dt3zgwhz4xlv27o9csnTauqjkly0pTZ/VPXPSLJydtU7RumDHvn0OtlrM+PtNZ+t6vDe7q3T6+qO+9y0t+4zfBvGnr9j0Ovl7r/7NIFQ6/P2If5L3Vf2KH9aHEHAAArQWgGAABM1Vr7dJK/6npPraql/HO/tdaSvH7orWmtteYpO3yHjWnPyDor0+/G8Yah1985aaSu9dvDpkznwiQf7l5/e1WdMmXcHeta2r156K2Z1+cE3z1pQBc4PrHrvTRDodle7T879OYkV3Wvz6qqY/d4/nuyL8zpuqHXh+9bLQAAYB8IzQAAgFmck5taoLykqr5+2shV9dVV9R0j7z2uqm43pUzlS8OmDw8N23HZJP889Hps8FNV90/yc5Om3/nT3PQcqR+oqpu1hKuqQ5P8f5kSNnQB3zld7yFJXtk9q22iqjq1qh4+8t6TquroKWUOT/LArvfGJLt9ptnDq+qpY+ZTSX4ryfHdW7/TWvvCyGjnZJf7zzK01q5O8htd7x2TvLyqJgarVfWVC57/QvaFBfv40OuvXuJ8AABg5XimGQAAsK3W2l9X1U8l+fkMnl31jqp6aQa3RLy4G+32Se6Z5BFJ7pvBc8f+99BkfjDJy6rqLzNotXVRBs++OjLJXZN8V266Rd7bWmvvWlDZv0jyiSQnJjmzqm6b5PczCAdOSPLoJP81yWeTfCw3Pf9sdB18vqr+3ySvyiAUe1NV/UaS1yY5mMEtG384ydcneUeS07eKjpnWH1TVA7r5flWSf6yq38ugtdzHM/itdockp3b1u1eSX+jmteUXk5xbVa9N8pYkH0jymSTHZnCbye9Jcu9u3P/VWht+ftZO/H2SF3X1/l9JPpXkbkm+P4PbY6arw3PHLO8i9p9l+dkkD+rm+fAk76uq385gG16VwXPu7pXkMUlunZvW6UIsaF9YpHcn+VySo5L8WFVdluR9SW7ohl/ZWvvEkuYNAAD7SmgGAADMpLX2C1V1eZJfzeAf6k/t/ia5csx7hyd5ZPc3yQVJ/p9FlW2tXVtVT07y6iRHJ3l89zfs0iSPzSCIGhuaddP606r6/iTPT3KrJD/W/Q377QyeVbUVml2X8c7KIKT7qQzW5/d3f5OMW59HZ3BbxCeOGbblz5N875Ths3pikr/MIKD8rjHD/znJt7bWxi7vgvafhWut3VBV35rkJUkel8Ez8W4W/HUuXFI1FrEvLERr7XNV9csZtID7igwC0mEvyfTtBgAAa0toBgAAzKy1dm5V/XEGrWIelkHrqttmcPu/KzIITt6W5NWttb8bKf6EDFr0PCiD1lh3yKClV8sgtHpXklck+d+ttRsXWDattbdU1b2SPCvJQ5N8eQataT6aQZj2m621ywZ3Gtx2HfxWVf1Nkmdk0Lrt9kk+3dXhd1trr66qHxkqMjbg6G7N93Ndq6KzumX76gxaNt2Q5LIMWm69NcmfttbeOzKJb07y4K7c1yX5sm6d3JDBbST/IclLW2vnb7tQM2itfbSqTsugNd3jk9wlg1v+fyiDFmHPb61du800drP/LE1r7WCSx3ctvp6a5P4ZtEysDFopXpxBq7hXLGn+u90XFl2fn6mqf85gXdwzg210y2XOEwAAVkENvpsDAACwKFX1ogxaY92Q5JjW2vX7XKUdqaoXJ/nOJGmtbZ8oAgAArLFb7HcFAAAANklVHZ3k27ved65rYAYAANA3M4dmVXWnqvrhqnp1VX2sqq6vqqur6sKq+qWqOnFCuQNV1Wb4O22b+T+hqt5UVVdU1TVV9f6q+vmqOmbehQYAANipqvrqKcNumeRFGdxWL91rAAAA1sBMt2esqq/M4F7/w7fjuCqDBxQf0vV/JsnjW2tvHil7IMlHut5Lp8zmW1trYx+qXFXnJjmz6/1CBg/SPrrr/3CS+7fWLtl2QQAAAHapqj6d5D1J/jTJhUk+m+SYDJ61dlYGz+lKkguS3Le1dsPe13Ix3J4RAADok0NnHG8rGDs/yYuTvLG19pmqOiyDB0//VgYPgX5VVX1Na+2T4ybSWrvDvBWsqu/LIDC7MYOHdv9ma+36qrpvkpcluWuS8zJ4CDYAAMCy3SLJGd3fJP+Q5NHrHJgBAAD0zawtzW6d5MCUlmAnJXl3klslOae19jNDww6ka2k275WJVXV4ko8luX2SX2ut/cjI8HsneWcGLeAe3Vp79XbTPP7449uBAwfmqQYAAMC/ueqqq3LllVfm4MGDueGGG/KFL3whSXLooYfmqKOOynHHHZfjjjsuVevfMOviiy/OFVdckSQ59dRT97k2AAAAu/fOd77zU621E8YNm6mlWWvtygxuOzJp+Aeq6h1JHphkkb+kHpJBYNaS/MqY+b67qt6Q5KFJnpJk29DswIEDueCCCxZYRQAAAAAAANZBVX100rBbLHA+V3TdQ6aONZ+t2528t7X28QnjvK7rPmiB8wUAAAAAAKBHFhKaVdWhSe7X9b53ynhvr6qrquraqvpIVf1hVU17FtnWA7QvmjLO+7ruCVV1/Oy1BgAAAAAAgIFFtTR7WpI7JLkxye9PGe/0bpwkOZDBLRXfWlW/XuNv+H9i171kyjSHh504cSwAAAAAAACYYNehWVXdI8lzut4XtNZGW4Vdl+S3kzwgyTGttdskOTKDZ59tPYPsh5I8e8zkj+q6106pwjVDr4+eUMezquqCqrrg8ssvnzIpAAAAAAAA+mhXoVlVnZjkVRmEYO9M8qzRcVprn2ytPa219tbW2sHuvdZae1dr7dFJXtGN+uNVdZvRWWxNZjf1bK2d21o7rbV22gknnLCbSQEAAAAAALCBdhyaVdVtk7w+yV2SfCjJI1pr1+1gUltB21FJHjwy7GDXPXJK+eFhByeOBQAAAAAAABPsKDSrqlsneV2Suyf5WJKHtNYu3cm0WmsfSbJ1z8S7jgzeel7ZHadMYnjYJ3ZSBwAAAAAAAPpt7tCsqo5K8tokpyX5ZAaB2cd2WY9Jt2F8X9c9ZUrZk7vu5a21T+2yHgAAAAAAAPTQXKFZVR2R5NVJ7pvkigwCsw/tpgJVdZckx3e9F48MfnPXPaWqJrU2e1jXfeNu6gEAAAAAAEB/zRyaVdVhSf4kyRlJPpvkYa21i2YoV9uM8pyue22SN40Me2OSy7p6/siYad8zyUO63pduVxcAAAAAAAAYZ6bQrKoOSfKyJN+W5Ook/7619q4Z5/GWqnp2Vd29m05q4N5V9cok/6Eb77mttU8PF2ytXZ/knK736VX1jKo6vJvGfZK8sluGv22tvWbG+gAAAAAAAMCXqNZGHyM2ZqSqByT5q673uiRXThn9X1tr3zBU9uIkd+56b0hyVZIjkxwxVOYFSX6wTahMVZ2b5MyhaVyf5Oiu/8NJ7t9au2TbBUly2mmntQsuuGCWUQEAAAAAANggVfXO1tpp44YdOuM0hluk3ar7m+S6kf5nJnlokm9Mcockt03y+SQfTPK3Sc5trf3dtJm31s6qqjck+d4k98ogcPtAkj/OoIXa1TMuBwAAAAAAANzMTKFZa+0tSbZ7Ntmksq9I8oqdlB2ZznlJztvtdAAAAAAAAGDUTM80AwAAAAAAgE0mNAMAAAAAAKD3hGYAAAAAAAD0ntAMAAAAAACA3hOaAQAAAAAA0HtCMwAAAAAAAHpPaAYAAAAAAEDvCc0AAAAAAADoPaEZAAAAAAAAvSc0AwAAAAAAoPeEZgAAQw6cff5+VwEAAACAfSA0AwAAAAAAoPeEZgAAAAAAAPSe0AwAAAAAAIDeE5oBAAAAAADQe0IzAAAAAAAAek9oBgAAAAAAQO8JzQAAAAAAAOg9oRkAAAAAAAC9JzQDAAAAAACg94RmAAAAAAAA9J7QDAAAAAAAgN4TmgEAAAAAANB7QjMAAAAAAAB6T2gGAAAAAABA7wnNAAAAAAAA6D2hGQAAAAAAAL0nNAMAAAAAAKD3hGYAAAAAAAD0ntAMAAAAAACA3hOaAQAAAAAA0HtCMwAAAAAAAHpPaAYAAAAAAEDvCc0AAAAAAADoPaEZAAAAAAAAvSc0AwAAAAAAoPeEZgAAAAAAAPSe0AwAAAAAAIDeE5oBAAAAAADQe0IzAAAAAAAAek9oBgAAAAAAQO8JzQAAAAAAAOg9oRkAAAAAAAC9JzQDAAAAAACg94RmAAAAAAAA9J7QDAAAAAAAgN4TmgEAAAAAANB7QjMAAAAAAAB6T2gGAAAAAABA7wnNAAAAAAAA6D2hGQAAAAAAAL0nNAMAAAAAAKD3hGYAAAAAAAD0ntAMAAAAAACA3hOaAQAAAAAA0HtCMwAAAAAAAHpPaAYAAAAAAEDvCc0AAAAAAADoPaEZAAAAAAAAvSc0AwAAAAAAoPeEZgAAAAAAAPSe0AwAAAAAAIDeE5oBAAAAAADQe0IzAAAAAAAAek9oBgAAAAAAQO8JzQAAAAAAAOg9oRkAAAAAAAC9JzQDAAAAAACg94RmAAAAAAAA9J7QDAAAAAAAgN4TmgEAAAAAANB7QjMAAAAAAAB6T2gGAAAAAABA7wnNAAAAAAAA6D2hGQAAAAAAAL0nNAMAAAAAAKD3hGYAAAAAAAD0ntAMAAAAAACA3hOaAQAAAAAA0HtCMwAAAAAAAHpPaAYAAAAAAEDvCc0AAAAAAADoPaEZAAAAAAAAvSc0AwAAAAAAoPeEZgAAAAAAAPSe0AwAAAAAAIDeE5oBAAAAAADQe0IzAAAAAAAAek9oBgAAAAAAQO/NHJpV1Z2q6oer6tVV9bGqur6qrq6qC6vql6rqxG3KH1ZVP1ZV76mqg1X12ap6e1WdVVU1w/yfUFVvqqorquqaqnp/Vf18VR0z6zIAAAAAAADAOIfOMlJVfWWSi5MMh1tXJTkqyT26v7Oq6vGttTePKX9skjclObV765okRyQ5vft7VFU9trX2hQnzPzfJmV3vF5Jcl+SkJD+R5ElVdf/W2iWzLAsAAAAAAACMmrWl2SFd9/wkT0hy29barZMcmeThST6S5Lgkr6qqO4wp/8IMArNPJ3lUkqO7sk/NIAB7ZJKfGTfjqvq+DAKzG5M8M8nRrbVjktwvyUeT3DXJeTMuBwAAAAAAANzMrKHZZ5Lcu7X2yNbaH7XWPpMkrbXPt9b+PIPg7Lokxyb5nuGCVXXvJE/ser+rtfaaNvDF1tpLkpzdDXt6Vd1+pOzhSc7pep/fWntea+36bt5vS/LYJC3J/arqUbMvNgAAAAAAANxkptCstXZla+3CKcM/kOQdXe+pI4Of3HU/2Fr7szHFz01yZQa3a3zcyLCHJLl9BsHYr4yZ77uTvKHrfcq0ZQAAAAAAAIBJZm1pNosruu4hI++f0XVfP65Qa+3aJG/teh80oex7W2sfnzDf100oCwAAAAAAADNZSGhWVYdm8IyxJHnv0PuV5KSu96Ipk3hf1z155P2t/lnKnlBVx29fWwAAAAAAAPhSi2pp9rQkd0hyY5LfH3r/2CRHda8vmVJ+a9iJI++fODJ8Wtlx5ZMkVXVWVV1QVRdcfvnlUyYFAAAAAABAH+06NKuqeyR5Ttf7gtbacKuwo4ZeXztlMtd03aNH3t8qP0vZceWTJK21c1trp7XWTjvhhBOmTAoAAAAAAIA+2lVoVlUnJnlVkiOTvDPJs0ZHGXrddjKLXZQFAAAAAACAmew4NKuq2yZ5fZK7JPlQkke01q4bGe3g0Osjp0xua9jBkfcPjgyfVnZceQAAAAAAANjWjkKzqrp1ktcluXuSjyV5SGvt0jGjXpXkc93rO06Z5NawT4y8f8nI8Gllx5UHAAAAAACAbc0dmlXVUUlem+S0JJ/MIDD72LhxW2styfu73lOmTPbkrvu+kfe3+mcpe3lr7VNTxgMAAAAAAICx5grNquqIJK9Oct8kV2QQmH1om2Jv7roPnTDNWyW5f9f7xgllT6mqSa3NHjahLAAAAAAAAMxk5tCsqg5L8idJzkjy2SQPa61dNEPRl3fdk6rqkWOGn5nk1kmuTfLKkWFvTHJZV88fGVOneyZ5SNf70hnqAgAAAAAAADczU2hWVYckeVmSb0tydZJ/31p71yxlW2vvTnJe1/viqnr41jSr6j8neW437Ndaa5eNlL0+yTld79Or6hlVdXhX/j4ZhGy3SPK3rbXXzFIfAAAAAAAAGHXojOPdL8nju9e3TPKqqpo07r+21r5h5L0zk9wtyalJzq+qa5IckuTwbvhrkvz0uIm11n6nqu7dTeN5SX6xqq5PcnQ3yoeTPHHG5QAAAAAAAICbmTU0G26Rdqvub5LrRt9orV1VVfdN8vQkT0ry75Jcn+TdSX4vyQtba23SBFtrZ1XVG5J8b5J7JTkiyQeS/HGS57bWrp5xOQAAAAAAAOBmZgrNWmtvSTKxadmM0/h8BrdifO52404of15uus0jAAAAAAAALMxMzzQDAAAAAACATSY0AwAAAAAAoPeEZgAAAAAAAPSe0AwAAAAAAIDeE5oBAAAAAADQe0IzAAAAAAAAek9oBgAAAAAAQO8JzQAAAAAAAOg9oRkAAAAAAAC9JzQDAAAAAACg94RmAAAAAAAA9J7QDAAAAAAAgN4TmgEAAAAAANB7QjMAAAAAAAB6T2gGAAAAAABA7wnNAAAAAAAA6D2hGQAAAAAAAL0nNAMAAAAAAKD3hGYAAAAAAAD0ntAMAAAAAACA3hOaAQAAAAAA0HtCMwAAAAAAAHpPaAYAAAAAAEDvCc0AAAAAAADoPaEZAAAAAAAAvSc0AwAAAAAAoPeEZgAAAAAAAPSe0AwAAAAAAIDeE5oBAAAAAADQe0IzAAAAAAAAek9oBgAAAAAAQO8JzQAAAAAAAOg9oRkAAAAAAAC9JzQDAAAAAACg94RmAAAAAAAA9J7QDAAAAAAAgN4TmgEAAAAAANB7QjMAAAAAAAB6T2gGAAAAAABA7wnNAAAAAAAA6D2hGQAAAAAAAL0nNAMAAAAAAKD3hGYAAAAAAAD0ntAMAAAAAACA3hOaAQAAAAAA0HtCMwAAAAAAAHpPaAYAAAAAAEDvCc0AAAAAAADoPaEZAAAAAAAAvSc0AwAAAAAAoPeEZgAAAAAAAPSe0AwAAAAAAIDeE5oBAAAAAADQe0IzAAAAAAAAek9oBgAAAAAAQO8JzQAAgH9z4Ozz97sKAAAAsC+EZgAAAAAAAPSe0AwAAAAAAIDeE5oBAAAAAADQe0IzAAAAAAAAek9oBgAAAAAAQO8JzQAAAAAAAOg9oRkAAAAAAAC9JzQDAAAAAACg94RmAAAAAAAA9J7QDAAAAAAAgN4TmgEAADlw9vn7XQUAAADYV0IzAAAAAAAAek9oBgAAAAAAQO8JzQAAAAAAAOg9oRkAAAAAAAC9JzQDAAAAAACg94RmAAAAAAAA9J7QDAAAAAAAgN4TmgEAAAAAANB7QjMAAAAAAAB6T2gGAAAAAABA7wnNAAAAAAAA6D2hGQAAAAAAAL0nNAMAAAAAAKD3hGYAAAAAAAD0ntAMAAAAAACA3hOaAQAAAAAA0HtCMwAAAAAAAHpPaAYAAAAAAEDvCc0AAAAAAADoPaEZAAAAAAAAvSc0AwAAAAAAoPeEZgAAAAAAAPTezKFZVR1TVY+uqp+rqj+vqk9VVev+TppS7sDQeNP+Tttm/k+oqjdV1RVVdU1Vvb+qfr6qjplngQEAAAAAAGDUoXOM++Akr9zl/C6dMuyGSQOq6twkZ3a9X0hyXZKTkvxEkidV1f1ba5fssm4AAAAAAAD01DyhWZJcluSCJP+Q5ONJzp2ncGvtDnPOL1X1fRkEZjcmeVaS32ytXV9V903ysiR3TXJekm+ed9oAAAAAAACQzBeavbq19qqtnqo6sPDajKiqw5Oc0/U+v7X2vK1hrbW3VdVjk7wzyf2q6lGttVcvu04AAAAAAABsnpmfadZa++IyKzLBQ5LcPklL8iujA1tr707yhq73KXtYLwAAAAAAADbIzKHZPjmj6763tfbxCeO8rus+aA/qAwAAAAAAwAba09Csqt5eVVdV1bVV9ZGq+sOqmvYsspO77kVTxnlf1z2hqo5fTE0BAAAAAADok71uaXZ6khu71wcyuKXiW6vq16uqxox/Yte9ZMo0h4edOG6Eqjqrqi6oqgsuv/zyOasMAAAAAADAptuL0Oy6JL+d5AFJjmmt3SbJkUlOTfLqbpwfSvLsMWWP6rrXTpn+NUOvjx43Qmvt3Nbaaa2100444YQ5qg4AAAAAAEAfLD00a619srX2tNbaW1trB7v3WmvtXa21Ryd5RTfqj1fVbUaKb7U+a8uuJwAAm+PA2efvdxVYcfYRAAAARu317RnHeVbXPSrJg0eGHey6R04pPzzs4MSxAAAAAAAAYIJ9D81aax9JsvWgsbuODN56Xtkdp0xieNgnFlUvAAAAAAAA+mPfQ7POpNswvq/rnjKl7Mld9/LW2qcWWisAAAAAAAB6Yd9Ds6q6S5Lju96LRwa/ueueUlWTWps9rOu+ccFVAwAAAAAAoCeWHppVVW0zynO67rVJ3jQy7I1JLsugnj8yZtr3TPKQrvelu6gmAAAAAAAAPTZXaFZVx2/9JTluaNBthodV1fB031JVz66qu1fVId10qqruXVWvTPIfuvGe21r79PD8WmvXJzmn6316VT2jqg7vpnGfJK/sluFvW2uvmWdZAAAAAAAAYMuhc45/+YT33z7Sf5fcdKvFO2fQmuw5SW6oqquSHJnkiKHxX5DkZ8dNuLX2O1V17yRnJnlekl+squuTHN2N8uEkT5xvMQAAAAAAAOAme/FMs2cmeWGSC5N8OsmxSW5M8sEkL0pyemvtB1prbdIEWmtnJfmODJ5xdjCDsO8DSX4hyb1aa5csdQkAAAAAAADYaHO1NGutbfd8snFlXpHkFfOWGzOd85Kct9vpAAAAAAAAwKi9aGkGAAAAAAAAK01oBgAAAAAAQO8JzQAAAAAAAOg9oRkAAHviwNnn73cVAAAAACYSmgEAAAAAANB7QjMAAAAAAAB6T2gGAAAAAABA7wnNAAAAAAAA6D2hGQAAAAAAAL0nNAMAAAAAAKD3hGYAAAAAAAD0ntAMAAAAAACA3hOaAQAAAAAA0HtCMwAAAAAAAHpPaAYAAAAAAEDvCc0AAAAAAADoPaEZAAAAAAAAvSc0AwAAAAAAoPeEZgCslANnn7/fVQAAAAAAekhoBgAAAAAAQO8JzQAAAAAAAOg9oRkAAAAAAAC9JzQDAAAAAACg94RmAADA2jlw9vn7XQUAAAA2jNAMAAAAAACA3hOaAQAAAAAA0HtCMwAAAAAAAHpPaAYAS+a5OwAAAACw+oRmAAAAAAAA9J7QDAAAAAAAgN4TmgEAAAAAANB7QjMAAAAAAAB6T2gGAAAAAABA7wnNAAAAAAAA6D2hGQAAAAAAAL0nNAMAAAAAAKD3hGYAAEkOnH3+flcBAAAAgH0kNAMAAAAAAKD3hGYAAAAAAAD0ntAMAAAAAACA3hOaAQAAAAAA0HtCMwAAAAAAAHpPaAYAAAAAAEDvCc0AAAAAAADoPaEZAHM7cPb5+10FAAAAAICFEpoBAAAAAADQe0IzAAAAAAAAek9oBgAAAAAAQO8JzQAAAAAAAOg9oRkAAAAAAAC9JzQDAAAAAACg94RmACzcgbPP3+8q0HP2QQAAAADmJTQDAAAAAACg94RmABNoqQIAAAAA0B9CMwAAAAAAAHpPaAYAALAEWq0DAACsF6EZAAAAAAAAvSc0AwAAABZOa0sAANaN0AwAAAAAAIDeE5oBAAAAAADQe0IzAAAAAAAAek9oBgAAAAAAQO8JzQCAmzlw9vn7XQUAAAAA2FNCMwAAAAAAAHpPaAYAAMDG0FoaAADYKaEZAAAAAAAAvSc0AwAAAAAAoPeEZgAAAAAAAPSe0AyAjeEZJgAAAADATgnNAAAAAAAA6D2hGQAAAGxDi3YA1o3PLoD5Cc0AAAAAAADoPaEZAAAAAAAAvSc0AwAAAAAAoPeEZgAAAAAAAPSe0AwA1oAHOAMAAADAcgnNAAAAAAAA6D2hGQAAAAD/xl0OAIC+EpoBAAAAAADQe0IzAAAAAAAAek9oBgAAAAAAQO8JzWAXlnGfd/eOBwAAAACAvSc0AwAAAAAAoPeEZkDvaM0HAAAA0G/+PwSMIzQDAAAAAACg94RmAABAL7iaGAAAgGmEZgAAAAAAAPSe0AwAoOe0vgEAAAAQmgEAAAAAAMDsoVlVHVNVj66qn6uqP6+qT1VV6/5OmqH8YVX1Y1X1nqo6WFWfraq3V9VZVVUzlH9CVb2pqq6oqmuq6v1V9fNVdcysywAAAAAAAADjzNPS7MFJ/jTJTyb5tiS3m7VgVR2b5G1JnpvknkkqyRFJTk/yP5L8WVUdOqX8uUnOS3JGkmOTfDHJSUl+Isl7quqOcywHAAAAAAA75Bbvy2X9wv6Z9/aMlyV5bZKfSXLWHOVemOTUJJ9O8qgkRyc5MslTk1yX5JHdNG+mqr4vyZlJbkzyzCRHt9aOSXK/JB9NctcMAjUAAAAAAADYkXlCs1e31r6stfaI1to5Sf5ylkJVde8kT+x6v6u19po28MXW2kuSnN0Ne3pV3X6k7OFJzul6n99ae15r7fokaa29Lcljk7Qk96uqR82xLAAAAAAAa2udWiOtU12Bfps5NGutfXGH83hy1/1ga+3Pxgw/N8mVGdyu8XEjwx6S5PYZBGO/MqZO707yhq73KTusHwAAAAAAAD037+0Zd+KMrvv6cQNba9cmeWvX+6AJZd/bWvv4hOm/bkJZAAD2iCtHAQAAgHW31NCsqirJSV3vRVNGfV/XPXnk/a3+WcqeUFXHz1dDAAAAAAAAWH5Ls2OTHNW9vmTKeFvDThx5/8SR4dPKjiufJKmqs6rqgqq64PLLL58yKWCdjLZq0MoBAABgNfh9xqrZpH1yk5YFmM7xvveWHZodNfT62injXdN1j55Qfpay48onSVpr57bWTmutnXbCCSdMmRQAAAAAAAB9tOzQrIZet12U30lZgKVxlQcAAAAAwGZZdmh2cOj1kVPG2xp2cOT9gyPDp5UdVx4AAAAAAAC2tezQ7Kokn+te33HKeFvDPjHy/iUjw6eVHVceAAAAAAAAtrXU0Ky11pK8v+s9ZcqoJ3fd9428v9U/S9nLW2ufmq+GAAAAAAAAsPyWZkny5q770HEDq+pWSe7f9b5xQtlTqmpSa7OHTSgLAAAAAAAAM9mL0OzlXfekqnrkmOFnJrl1kmuTvHJk2BuTXJZBPX9ktGBV3TPJQ7rely6ktgA9c+Ds8/e7CgAAS+X7DsDyOMcCsEnmCs2q6vitvyTHDQ26zfCwqvq36bbW3p3kvK73xVX18G5ah1TVf07y3G7Yr7XWLhueX2vt+iTndL1Pr6pnVNXhXfn7ZBCy3SLJ37bWXjPPsgAAAAAAAMCWeVuaXT70966h998+MuxOI+XOTPLOJLdLcn5VfS7J55K8JMkRSV6T5KfHzbC19jtJXtjV9XlJrq6qq5O8Lcldknw4yRPnXA4AAABgxWnBwiT2DQBgGfbi9oxprV2V5L5Jzk5yYZKW5Pok70jyPUke3Vr7wpTyZyX5jgyecXYwyaFJPpDkF5Lcq7V2yVIXAAAAAAAAgI02V2jWWqsZ/y4eU/bzrbXnttbu1Vo7urV269bafVpr57bW2gzzPq+19qDW2m1ba0e01r62tfaTrbWr51kGAAAAAFhFWtDtH+segGSPWpoBAAAAAADAKhOaAQAAAAAA0HtCMwAAAAAAAHpPaAYAAAAAsAI8W4152WdWm+2zfoRmAAAAAAAA9J7QDACAleNqPAAAAGCvCc0AAAAAAADoPaEZAAAAAAAAvSc0AwDm5tZ5AADAbvhNAcAqEpoBAAAAAADQe0IzAAD2hauL2ST2ZwAAgPUnNAMAAAAAAKD3hGZsvD5c9duHZWTz2G8BAADYa36LAjCN0AwAAAAAAIDeE5oBAAAAM9FCAzaX4xsAhGYAAAAAAAAgNGO9uOoJVofjEcC5kPVkv4XxHBsD1gMA0GdCMwAAAAAAAHpPaAYAAGwcLSUA4Ev5bFxNtsvqsC2ARGgGAAAAAAAAQjMAAAAAAAAQmgEAAAAAANB7QjMAAAAAAAB6T2gGAADsmAemA7BJfK4BQL8JzQAAAAAAAOg9oRnAErg6EWDzOdezqezbAABAXwnNAAAAAAAA6D2hGczBVbcAAAAA+B8RwGYSmgEAAAAAANB7QjMANp4rAAEAAACA7QjNAAAAAAAA6D2hGQCwdrQeBPaDcw8A7D+fxwAsk9AMAAAAAACA3hOaAQAAAMCctHiC/eHYA5ZJaAYAAAAAAEDvCc0AAAAAAADoPaEZ9NSimrJrEg8AsH98FwPYGedPAGAcoRkAAAAAAAC9JzQDvoSr7Wa36HVl3QMAAKwvv+kAYP0JzQAAAAAAAOg9oRm74ioq2J7jBObnuAH6zDkQYLM5zwPA6hKaAQAAAAAA0HtCMwAAYOW5Kh8AFsNnKsBsnC/7SWgGAAAAAABA7wnNAABgjbjacXbWFQAAAPMQmgEAAAAAANB7QjMAgDWkBQ3AZnFeh51x7AAAiyQ0AwAAAAAAoPeEZqwkV4oBrBfnbWZlX+kH2xkA2ES+49B3jgH6QGgGAAAAAABA7wnNAAAAAAAA6D2hGRtB02DWgf0UABbP5+vyWLe7Zx0Ce8k5B8ab5dhw/ABbhGYAAAAAAAD0ntAM6BVXDgHQBz7vAAAA9pbfYZtBaAYAAAAAAEDvCc0AgJXnai0AAGCR/MZgndl/YXmEZgAAAAAAAPSe0AwAgJXk6snlm3cdL2KbrPp2XfX6AQDAuvDdmnUkNAMAAAAAAKD3hGbsG1caACyG8ykA9Ne47wG+GwCwn3wOAetMaAYAAAAAAEDvCc0AYMO5yg+AWfi82Hy28XqwnSazbpiVfYVlsn/BZhOaAQAAAAAA0HtCMwAAAAAAAHpPaAYAAACwodxGDABgdkIzAAAAAAAAek9oBhtuN1cVuiIRgD7zObj3rHMAABbB90r6wr6+eEIzAAAAAAAAek9oRm9I3QFYtr5/1vR9+VeF7QAAAAA7IzQDAAAAAACg94RmAADQ0UoLYGBdzofrUk+wrwLAehCaAQAAAAAA0HtCM1aeq7EAYDqflQDLs4nn2E1cJgAAWAShGQAAAAAAAL0nNAMAWGF9bA3Qx2UGWATnTzbZKuzfq1AH2E+OAfpq0fu+Y2m1Cc0AAAAAAADoPaEZAAAAAAAAvSc0AwAA2HBuAQPAfvEZxDLYr4BlEZoBAAAAAADQe0IzgBXjaimAyZwjV5dtA6wy56idsd5g9c1ynDqWAWYnNAMAAAAAAKD3hGYA7Jt1vdptXeu9Cqy7zWJ7Ls6Bs8+3PgEAesb3v+WzjoF5Cc0AAAAAAADoPaEZwD5z1RN7Zdn7mn0Z2A3nENh7jjsA+FI+GwGhGQAAAAAAAL0nNAMAmIMrDwEAANhrfouuFttjcwnNAAAAAAAA6D2hGQAAbKhVufpxVeoBAAAA0+xJaFZVT62qts3fwSnlD6uqH6uq91TVwar6bFW9varOqqrai2UAAAAAAABgc+11S7Mbklw65e9mqurYJG9L8twk90xSSY5IcnqS/5Hkz6rq0KXXHKBHtAhYPOt0+axj2J7jBPbeIo+7ZR7D63x+WOe6A0Df+Nxm1e11aPa21todJvzdbUKZFyY5NcmnkzwqydFJjkzy1CTXJXlkkp/Zg7oDAAAAAACwoVb6mWZVde8kT+x6v6u19po28MXW2kuSnN0Ne3pV3X5/agkAAAAAAMC6W+nQLMmTu+4HW2t/Nmb4uUmuzOB2jY/bs1oBAMAGcYsUVp19FABWk89oYNOsemh2Rtd9/biBrbVrk7y1633QntQIAAAAAACAjbPXodkpVXVRVV1bVVdX1Xur6teq6i6jI1ZVJTmp671oyjTf13VPXnRlAegvV8sBAAAAQL/sdWh2fJKvTXJNklslOSXJDye5qKqePDLusUmO6l5fMmWaW8NOXFw1AQAAAAAA6JO9Cs0uSfLTSe6e5FattdslOTrJIzJoKXZEkt+vqgcMlTlq6PW1U6Z9Tdc9etIIVXVWVV1QVRdcfvnlO6k/LJ1WLawL+yoALJ7P181m+7Joq7pPrWq9AGA/+FxcT3sSmrXWXt9a+9nW2kWttc93713fWnttkvsm+T9JDknyS0PFangSu5z/ua2101prp51wwgm7mRQAAAAAAAAbaK9vz3gzrbUrkzyn6z29qrZSrYNDox05ZRJbww5OGQdgrbgSBQAAgFn4/bgebCeA9bDvoVnn77puJTnQvb4qyee613ecUnZr2CcWXy0AAAAAAAD6YFVCs5vdirG11pK8v3vvlCllT+6671tCvQAAgJ5yRfjiWJcArAOfV7Bcq3KMrUo9WE2rEpp949Drjw69fnPXfei4QlV1qyT373rfuIR6AQAAAAAA0ANLD82qqrYZfmySs7vev2+tXT40+OVd96SqeuSY4mcmuXWSa5O8crd1ZT4S+c1ie8JqcmwCAAAAwN7Yi5Zmd66qd1TVf6mqO229WVWHVdW3JfnbJF+d5MYkzx4u2Fp7d5Lzut4XV9XDu7KHVNV/TvLcbtivtdYuW/aCAAAAAAAAsJkO3aP5fFP3l6q6Lsnnkhyb5Jbd8GuSfG9r7U1jyp6Z5G5JTk1yflVdk+SQJId3w1+T5KeXV3UAAAAAAAA23V60NLs0yQ9m0GLsgxkEZLfuuhdk0Frs5NbaH4wr3Fq7Ksl9M7iF44VJWpLrk7wjyfckeXRr7QtLXgYAADaIW5/CanOMwt5wrAEAfKmltzRrrV2b5De7v51O4/MZhGvP3W5cAAAAAAAAmNdetDQDoMdcvbq3rG/WlX0XANhPvovA/nIMAqtCaAYAAAAAAEDvCc2AXduUq4E2ZTn2i/UHAAAAAMvn/3DLIzQDAAAAAACg94Rm0AOuPAAAAFgvfsfBfBwzACyC0AwAAAAAAIDeE5oBLImr3IBNtqxznHPn/rL+95b1DbAanI/Za/Y5Vp19lD4TmgEAAAAAANB7QjNWmqsa4CaOB/aD/Y51Zv+Fm3NcAADrZJ2+u6xTXYHJhGYAAAAAAAD0ntAMAAAAAACA3hOaAQCsALfy2DvWNevM/gsAwCx8b4SdEZoBAAAAAADQe0IzgBXmqiAAAABgi/8TACyX0AwAAAAAAIDeE5oB0GvDV+mt8xV761x3Nt+m75+bvnx9Z/vC3nG8AbDXfPast3XafutU174TmgEAAAAAANB7QjOWSoIOAAAsgt8WAIvjnAoA4wnNAAAAAAAA6D2hGfSQK8pYdfZRGHAsQP847gEAZuN7U/9s4jbfxGVad0IzAAAAAAAAek9oxlqSwFsHLIf9CuBLOS8CLMe6n1/Xvf4AAIwnNAMAAAAAAKD3hGZM5eo5AABgWfzeAAD4Ur4fwf4SmgEAAAAAANB7QjMAAAAAAAB6T2gG0FOa+7MK7IcAAAAkfh8Cq0FoBgAAAAAAQO8JzWCDuCIHAOg734eASdbp/LBOdQW255iG/eP4Y15CMwAAAAAAAHpPaMaekuyvnkVvE9t4PtYXzM9xA7AanI8BWCSfK5vPNl4dtgVMJjQDAAAAAACg94RmwEpwhQsAAAAA68L/smAzCc0AAAAAAADoPaEZsHCutAEAAIDF8TsboF+c9/eP0AwAAAAAAIDeE5oBAOwRV4oBANxcX78jbfpyb/ryLZr1xTj2C9h7QjMAAAAAAAB6T2gGAAAAAABA7wnNAICV4vYTsNocowDAfvAdBIC9IDQDAAAAAACg94RmAMC/cfUmAHvB5w2wV5xvAIB5CM0AAAAAAADoPaEZwAZw9SR7wX4GbDLnOAD2gs8bAFhtQjMAAAAAAAB6T2gGwESuggSAfvCZD1/KMQEAs1v056bP4dlYT8shNAMAAAAAAKD3hGbMTYK9fNYx9Nu6nAPWpZ7b2ZTlWCbrCGD9OZfDanOMro5lbwvbGmC1Cc0AAAAAAADoPaEZMBdXRLFT9h1g0ZxXYHkcX7D6HKdMY/8AWB7n2M0mNAMAAAAAAKD3hGYAAAAAAAD0ntAMekTTYVg9q3Bc7lcdVmHZAQBG+Y6yfmwzWE19OTb7spzQF0IzAAAAAAAAek9oBkASV0YBA84FzGrcvjL63rrtT+tWX4B14hzbb8vY/vYpYFmcX/pNaAYAAAAAAEDvCc1gB1xtAKyDTTxXbeIysTfsOwDMwucFm8Y+DQDzEZoBAAAAAADQe0IzWCGuAANg1fhsAlg+59r1ZxvC3nLMwd5xvNE3QjMAAAAAAAB6T2gGAPSCq+OYlX2FvbZO+9w61RUARq3b59he13fd1s889mrZNnkdQl8IzQAAAAAAAOg9oRkbw5UcAACwWXzHZ172GWBT7eX5bZPPpfMs2yavh90aXjfWE5tGaAYAAAAAAEDvCc0g631FxDrXfRH6vvysF/srwOZwTgdgP6zS589O6rJK9d8rWuRsllXahrPWZZXqDOtAaAYAAAAAAEDvCc0AAAAAAADoPaEZe2ZTmwyvW33pL/vqarE9dm9V16F6Mclut8G08rYvwHjOj+vJdoPdcxyxCPYj+khoBgAAAAAAQO8JzVg6VySwn+x/m8X23H+2AQDs/efhpn3+btryAOvPeQlW26oeo8uq16oub18IzQAAAAAAAOg9oRkwlSsbYPE29RmPu9GnZQWYxLmQcewX6882hPXk2GVd2Xdhd4RmAAAAAAAA9J7QjF4ZvtLCVReTbeq6mWW59mvZN3Wds3z2nb1hPcPeWudjbp3rzvpZ9P5m/wX6ZifnvT6dK/u0rABbhGYAAAAAAAD0ntCMpVmHZ/a4YmZn9nq9rXILMea3iG01zzRGx7WvsGrsk5tt2va17XfPOmQTbbdfr+t+rzXHTcYt1yr+xgKYZDe/yTfNpi8f9JHQDAAAAAAAgN4TmjGzSVdOrNsVFbut77ot75Z1rTf9scqtU9ft+Fl2fddtfezGIpe1T+tt0VZt3e1FfVZtmcE+ufpWdRutar12YxOXaTcOnH2+dbIL1h2sB8cq89rLluX2z8USmgEAAAAAANB7QjMAAAAAAAB6T2jGntBEdDMNb9dN38Zby7fT5Vzm+tn0db8bq7i9Vllfl3tdLHr7bNL2XvdlWcf6r1qdV60+q2CdPwNX/XvTKqyjTdan9TvrbQ3XbZ2sW31XiXPU8ri99vrYOjfuZn2u+rZY1Ufg7Pf857Wo+q7bcrN8QjMAAAAAAAB6T2gGrJVZr8Rc16tEVuHKwnVdd4y3CvvUqtnrdTLv/FZhfW/afjNrXZbVgnqV1kWiPqs2//20X8u+6i3Jljm9RdrrVhOrvC6WbR0/y0fNsi1Xsd57bSfrwHpbrlVYv6tQh1W3l+todF62z+pbhW20CnXYrU1YhnUhNAMAAAAAAKD3hGbMZCdXaO8FCft6W6X79/f9SqVpy9u3dbFIq77uVr1+i9KX5dwEWsre3LTWAPt5RS8Dvvtuhv1qXbuqz9dZ1WVhfe3H/rEpv+9W4TfzJre+X4bRVp2r2lJ13Hz6sH12apO/K4yb5ybejQDmITQDAAAAAACg94RmrIRNuJJg3JUYW/2bsHx7bZWfp7MK23Nd962dXlm1Css5us63q9M6Pa9l2Xb6/K9FtHJe1av4d1NutzZlvvv9PJb9nn9frNL3gXXatqvQ6mAvt91+fiazOc89m3W/Xmar6FU65203rXX5rbWo1jTrvG8n61//7ez374FlWNd6z2Lcb+pZP8vX7c4Uq/x7dNlW4fso7JTQDAAAAAAAgN4TmrGRdnvV/zJbtoxOe12vclvXeu/Ebuq9n/cJ36tWHcsyy5Vm69bCpy/W4TlLi2glsd25fJlXvO7kqsxZ6rSO56dVPp538hmwDsfPflvFc/9Oro7eaSvcSe+tw1X2+93qb1Et17Yrt+jvjjsdb5H12Ivz2bzLNK2Oy97Wy5z2on5n7fS8sx+fQ4s+NyyiNfhuf4tMGj7Pvrkq5+4tq37uXca0FzX/ZU17kf+XmeVcsd0+vd/77KKPm0X9ZtvN78bdzHfe+aziutvN+Dv9brxf+/Skz7D9Pq76am1Cs6q6Q1U9v6r+paquq6pLq+rVVfXg/a4bAAAAAAAA620tQrOqukeS9yb5wSR3TXJ9kuOTPDLJX1bV2ftYPUbs5iqfWZL+ea4em+fKtXnrNO/VUju5SmK3V6bNM69ZyixiGba7CnLalUGj4+2mHpPqMu39adPfSbmdzGPe90bX6aKv9pn1SrNlXl24m+242yvlRvetWff5SdOZ9xjc6ysLdzK9ea5y3o+rNHd6FeW885112+72qubRaezXeX3a+Lutw07LLGJau5nvvOttJ9Pd7baYNP5ur9Kcdh7Y7nvBvPNd1JW1i9wvdrrf7GQd7eRzbRGfl8PvjftcnPXcNs9+Om3/mlSfWeo663xH6z1tvFnGn6fu8xq3rKPDdzLNnZRd1Pe23Xy+LvozZJ59dhHznrY9d7ptdzrebr6/znpuW+SxMK0+08adtcyiPttnKb+TY3DW7wnD3WnLPuk8vJPz6bi6zLqfzzPtecyzb+/083UZ9dluu00qs5t5z2I35/9l2Ov5zvMdbZZ1Nc8+N+2cOu+5ZLf7+rTPkZ1+t5g0vUnzm1an7eow6/fKWcvu5ty+X8dOX6x8aFZVRyT5syS3S/LuJHdvrd06yXFJfiVJJfnFqnrY/tUSAAAAAACAdbbyoVmS70ly5yQHkzyqtXZRkrTWrmqt/WiSV3Xj/eL+VK+fFnF14nbT3C+LuEJot1dWzTq9ea5U2a1FXumx2+kt0rKu+FvEfJd5JdZeX/W27PKj09jJcbzbfXxRFn1F8KTx9+MY3Ok8d3rl2k7mt9v1uig72Q/mvaJuN/biatRFTXPWqzZ3u0zL+vxdxLRX5TN3N9btc2va59AmbI9hs37vXda8lzXNvVqmTdsfkvm/h00af9nrZtZz626uoJ+3/LzT3kur9lm03+tjHvtZ153+7tzr73qr+Bt3r6e/F3y2zU5Ln9nM87tzN58dq/D/jHnmv8z67fey98U6hGZP6bova619fMzwX+66X19VJ+1RnQAAAAAAANggKx2aVdUxSU7tel83YbR3JLmye/2gpVcKAAAAAACAjbPSoVmSr83gmWVJctG4EVprNyb5YNd78l5UCgBg07ntAwCw6vr8faXPyw6wU86dzGLVQ7MTh15fMmW8rWEnThkHAAAAAAAAxqrW2n7XYaKqenKSl3a9t2ytfWHCeC9N8uQkr2+tfeuY4WclOavr/Zrc1DINAAAAAACA/rhza+2EcQMO3euazKm2H2V7rbVzk5y7iGkBAAAAAACweVb99owHh14fMWW8I8eMDwAAAAAAADNZ9dBs+Dlmd5wy3tawTyyxLgAAAAAAAGyoVQ/NPpBk66Frp4wboapukcFzypLkfXtRKQAAAAAAADbLSodmrbWrk1zQ9T50wmjflOTW3es3Lr1SAAAAO1RV51RVq6oXjxl2cTfsgXtesSWpqgd2y3TxDstv3DoBAABW10qHZp2Xdd2nVNWJY4b/aNd9Z2vtg3tUJwAAgI1WVffqQr6n7nddAAAA9sI6hGb/I8lHkxyT5DVVdXKSVNUxVfXfkzyuG+/H96l+AAAAi/AvST6Y5Jr9rkjnXkl+OslTdzGNazJYpn9ZQH0AAACW6tD9rsB2WmvXVtVjMrj14tcnuaiqrkpydAahX0vy46211+9jNQEAAHaltfbg/a7DorXW/j7JSftdDwAAgFmsQ0uztNYuTHL3JL+R5MNJDk9yRZLzkzy0tfZL+1g9AAAAAAAA1txahGZJ0lr7ZGvth1prd2ut3aq1dvvW2iNba2/c77oBAAD9VVVfW1W/W1X/XFWfq6rPVtU/VdVvVNWpc0zn4qpqVfXACcMPq6rvr6q3VtWnq+r6qvpoVb2oqr52QpkXd9M8p6oOqaofrqoLq+qabhqvqarTxpRrSX6v6/2WbhrDf2PrOGY6D+zGv3jKOE+pqndU1cGuTm+qqkfMMn0AAIBFWvnbMwIAAKyqqvqBJL+W5JDurc8lOSyDO2XcPck9kjxwAfM5McmfJ7ln99aN3bzulOS7kjypqp7SWvuTCZM4NMlrknxbkhuSXJ/kuCSPSPLgqnpQa+3tQ+NfmuSIJMd24396ZHqf3+0yJUlVvSDJ07reG7t5PTDJGVX1Q4uYBwAAwKzWpqUZAADAKqmqJ2RwC/lDkvxRkpNba0cnOSrJHZP8xyTvXMB8bpnkTzMIzP46yQOSHNFaOzbJHZL8SpJbJfmDqrrbhMk8Lck3JvmOJEe31o7ppvferuzzh0durd0hyVZo9bbW2h1G/t62gOV6Sm4KzJ6X5HatteOSnJjk97v3TtjtfAAAAGYlNAMAAJhTF2T9atf78tbaE1pr70+SNvCJ1tpLW2vPWMDsvjPJNyT5hyQPa629tbX2+W5el7bWfjTJ7yQ5MsnTJ0zjNkke01o7b6jsPyZ5ajf8G6rqzguo60yqqpKc0/W+pLX2zNbaZ7t6XdrV668yWCYAAIA9ITQDAACY34OTfEWSLyZ55pLn9Z1d97daa9dPGOdlXfehE4a/tbX2N6NvttbemeT/dr2n7LyKc7tXkn/Xvf7F0YGttZbkOXtYHwAAAM80AwAA2IHTu+6FrbWPL2smVXVoBrdVTJJfrarnThh165lqXzlh+D9Mmc3HMwgAj5u/hjv29V33stbaByeM87YkX4jfrQAAwB7x4wMAAGB+X9Z1P7bk+dw2yWFDr7dzxIT3r55S5rque8tZK7UAW88qmxg4ttaur6pPZfDcNgAAgKVze0YAAID51R7NZ/g32z1ba7Xd3x7Va69s2vIAAAArTGgGAAAwv0923TsveT5XZPDctCQ5ecnz2kuXd907Thqhqg5Lcru9qQ4AAIDQDAAAYCfe0XXvUVVfvqyZtNZuSHJB1/u4Zc1nghu77jJae72r635ZVX31hHHuG48UAAAA9pDQDAAAYH5vzOB5XIck+eUlz+vFXffxVXXGtBGr6rgFzveqrnubBU5zy3uS/J/u9bNGB1ZVJTl7CfMFAACYSGgGAAAwp64F2DO63idV1XlVddLW8Ko6sarOrKrfWMDs/mcGLdtukeQ1VfVDVXXboXndvqqeVFVvSfJDC5jflou67slV9U0LnG5aay3JOV3vd1fVc6vqNklSVV+W5EVJHpTkmkXOFwAAYBqhGQAAwA601v53BsHZjUmekOT9VXV1VV2T5JIk5ya5xwLmc0OSxyT52yRHJvn1JJ+qqk9X1dVJLk3ysiTfkqTtdn5D8/1Qkr/O4BaJ76iqK6rq4u7v9AVM/6VJfqvr/bF0y5TkE0memuRHc9OzzwAAAJZOaAYAALBDrbVfTXLvJL+X5OIkt0xyXZJ/TPL8JE9f0HwuyyAUe0qS1ya5LMnRGTxv7AMZtEZ7eJLnLGJ+Qx6X5LeTfKSb3527v1stYuKtte9P8h+T/F2S6zNYnr9K8sjW2iJa6QEAAMysBnfFAAAAAAAAgP7S0gwAAAAAAIDeE5oBAAAAAADQe0IzAAAAAAAAek9oBgAAAAAAQO8JzQAAAAAAAOg9oRkAAAAAAAC9JzQDAAAAAACg94RmAAAAAAAA9J7QDAAAAAAAgN4TmgEAAAAAANB7/z8DHWc9S1DpmAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 2160x720 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "PLOTFILENAME = \"Messages.png\"\n",
    "\n",
    "groups = pd.DataFrame(dataListAnonymized).groupby(CLIENT).groups\n",
    "x = groups.keys()\n",
    "y = [len(groups[client]) for client in x]\n",
    "\n",
    "font = {\"size\":24}\n",
    "matplotlib.rc(\"font\",**font)\n",
    "plt.figure(figsize=(30,10))\n",
    "plt.bar(list(x),y)\n",
    "plt.title(\"messages per client\")\n",
    "plt.xlabel(\"client id\")\n",
    "plt.tick_params(axis='x',which='both',bottom=False,labelbottom=False)\n",
    "plt.savefig(PLOTFILENAME)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [],
   "source": [
    "valueFrequencies = showValueFrequencies(pd.DataFrame(dataListAnonymized))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1982 789 1595 387 240\n"
     ]
    }
   ],
   "source": [
    "print(sum([valueFrequencies[k] for k in list(valueFrequencies.keys())]),\n",
    "      sum([valueFrequencies[k] for k in list(valueFrequencies.keys()) if k < 5]),\n",
    "      sum([valueFrequencies[k] for k in list(valueFrequencies.keys()) if k < 50]),\n",
    "      sum([valueFrequencies[k] for k in list(valueFrequencies.keys()) if k >= 50]),\n",
    "      max([k for k in list(valueFrequencies.keys())]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 1983-1982 = 1 client with no mails\n",
    "* 789 more clients with fewer than 5 mails\n",
    "* 1595-789 = 806 more clients with fewer than 50 mails\n",
    "* 387 clients with 50 mails or more (max 240)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Message': 45469,\n",
       " 'Sender': 45469,\n",
       " 'Recipients': 45469,\n",
       " 'DateSent': 45469,\n",
       " 'Subject': 45469,\n",
       " 'Body': 45469,\n",
       " 'IsReplied': 45469,\n",
       " 'Location': 45469,\n",
       " 'TreatmentStep': 7017,\n",
       " 'Attachments': 1894}"
      ]
     },
     "execution_count": 81,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "QUERYMESSAGES = \"./Messages\"\n",
    "\n",
    "def showMessageTextFieldFrequencies():\n",
    "    inFileNames = sorted(os.listdir(DATADIRANONYMIZED))\n",
    "    tags= {}\n",
    "    for inFileName in inFileNames:\n",
    "        if re.search(FILEPATTERN,inFileName):\n",
    "            root = readGzippedXmlFile(DATADIRANONYMIZED+inFileName)\n",
    "            for section in root.findall(QUERYMESSAGES):\n",
    "                for tag in section.findall(\".//*\"):\n",
    "                    if not tag.tag in tags: tags[tag.tag] = 0\n",
    "                    tags[tag.tag] += 1\n",
    "    return(tags)\n",
    "\n",
    "tags = showMessageTextFieldFrequencies()\n",
    "{tag:tags[tag] for tag in sorted(tags.keys(),key=lambda t:tags[t],reverse=True)}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataListAnonymized = [d for d in sorted(dataListAnonymized, key=lambda d:(d[\"client\"], d[\"DateSent\"]))]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [],
   "source": [
    "N = 20\n",
    "\n",
    "\n",
    "def makePhrase(wordList, index):\n",
    "    return(\" \".join(wordList[index:index+N]))\n",
    "\n",
    "\n",
    "def addPhraseToRefs(phraseRefs, phrase, msgId):\n",
    "    phraseRefs[phrase] = msgId\n",
    "\n",
    "    \n",
    "def countPhrases(phraseRefs, message, msgId):\n",
    "    words = message.split()\n",
    "    inDuplicate = False\n",
    "    duplicateStart = -1\n",
    "    duplicateEnd = -1\n",
    "    duplicates = []\n",
    "    for i in range(0,len(words)-N+1):\n",
    "        phrase = makePhrase(words, i)\n",
    "        if not phrase in phraseRefs:\n",
    "            addPhraseToRefs(phraseRefs, phrase, msgId)\n",
    "            if inDuplicate:\n",
    "                duplicates.append((duplicateStart, duplicateEnd))\n",
    "                inDuplicate = False\n",
    "                duplicateStart = -1\n",
    "                duplicateEnd = -1\n",
    "        elif phraseRefs[phrase] < msgId:\n",
    "            if inDuplicate:\n",
    "                duplicateEnd += 1\n",
    "            else:\n",
    "                inDuplicate = True\n",
    "                duplicateStart = i\n",
    "                duplicateEnd = i+N\n",
    "    if inDuplicate:\n",
    "        duplicates.append((duplicateStart, duplicateEnd))\n",
    "    return(duplicates)\n",
    "\n",
    "\n",
    "def prepareText(text):\n",
    "    text = re.sub(\"</*line>\",\" \",text)\n",
    "    text = re.sub(\">>+\",\" \",text)\n",
    "    text = \" \".join(word_tokenize(text))\n",
    "    return(text)\n",
    "\n",
    "\n",
    "def removeDuplicates(text, duplicates):\n",
    "    words = text.split()\n",
    "    for duplicateStart, duplicateEnd in list(reversed(duplicates)):\n",
    "        del(words[duplicateStart:duplicateEnd])\n",
    "    return(\" \".join(words))\n",
    "\n",
    "\n",
    "def processCorpus(messagesIn, client):\n",
    "    phraseRefs = {}\n",
    "    messagesOut = []\n",
    "    for msgId in range(0, len(messagesIn)):\n",
    "        try:\n",
    "            textIn = prepareText(messagesIn[msgId][BODY])\n",
    "        except:\n",
    "            textIn = \"\"\n",
    "        duplicates = countPhrases(phraseRefs, textIn, msgId)\n",
    "        messageOut = dict(messagesIn[msgId])\n",
    "        messageOut[BODY] = removeDuplicates(textIn, duplicates)\n",
    "        messagesOut.append(messageOut)\n",
    "    return(messagesOut)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "AdB1987\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "45469"
      ]
     },
     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clients = sorted(list(set([message[\"client\"] for message in dataListAnonymized])))\n",
    "\n",
    "dataListAnonymizedWithoutDuplicateText = []\n",
    "for client in clients:\n",
    "    squeal(client)\n",
    "    messagesFromClient = [message for message in dataListAnonymized if message[\"client\"] == client]\n",
    "    dataListAnonymizedWithoutDuplicateText.extend(processCorpus(messagesFromClient, client))\n",
    "\n",
    "len(dataListAnonymizedWithoutDuplicateText)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [],
   "source": [
    "saveAnswerDataDf(answerDataListToDf(dataListAnonymizedWithoutDuplicateText), outFileName=OUTFILENAME)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}