e-mental-health/data-processing

View on GitHub
tactus-visualize.ipynb

Summary

Maintainability
Test Coverage
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Visualizing English Tactus Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The first code block loads the visualization software (lines 1 and 2) and defines which of the English mails are written by the clients. The second step is necessary because we will only visualize client mails, and the English data do not contain labels for who wrote each mail."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import tactusVisualize\n",
    "from tactusVisualize import summarize, summarizeMail, summarizeFeature, visualize, visualizeDAAP, visualizeIndex"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Next, we visualize a selection of LIWC features of a certain file. The standard method is to stack all values on top of each other in vertical bars. With the extra argument 'format=\"split\"', the bars for each separate feature start on the same height."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "INFILE = \"/home/erikt/projects/e-mental-health/usb/tmp/AdB0001.csv\"\n",
    "\n",
    "visualize(file=INFILE,features=[\"function\",\"number\"],format=\"split\",barwidth=1.0,diaries=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "INFILE = \"/home/erikt/projects/e-mental-health/usb/tmp/AdB0012-daapavg.csv\"\n",
    "\n",
    "visualize(file=INFILE,features=[\"daapavg\",\"daappos\",\"daapneg\"],format=\"split\",barwidth=3.0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "INFILE = \"/home/erikt/projects/e-mental-health/usb/tmp/AdB0006-daap-internal.csv\"\n",
    "#INFILE = \"/home/erikt/projects/e-mental-health/usb/tactus_attachment/1-daap-internal.csv\"\n",
    "\n",
    "visualizeDAAP(file=INFILE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "visualize(file=\"1.txt\",features=[\"cogproc\",\"insight\",\"cause\",\"discrep\",\"tentat\",\"certain\"],format=\"split\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# ovk data\n",
    "INFILE = \"AS.csv\"\n",
    "visualizeIndex(file=INFILE,features=[\"cogproc\",\"social\",\"focuspast\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "summarize(file=\"1.txt\",target=\"COUNSELOR\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "visualize(file=\"AdB0015.txt\",features=[\"relig\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "visualize(file=\"AdB0016.txt\",features=[\"i\",\"relativ\",\"bio\",\"social\",\"cogproc\",\"family\"],format=\"split\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "summarize(file=INFILE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "summarizeMail(file=\"AdB0016.txt\",mail=7)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "summarizeFeature(file=\"1.txt\",feature=\"function\",target=\"COUNSELOR\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 20190108 test code for interactive visualization\n",
    "\n",
    "%matplotlib notebook\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import csv\n",
    "import re\n",
    "import sys\n",
    "\n",
    "PLOTWIDTH = 9\n",
    "PLOTHEIGHT = 4\n",
    "BARWIDTH = 1.0\n",
    "IMAGEFILE = \"tactus.png\"\n",
    "\n",
    "def readTexts(inFileName):\n",
    "    inFileName = re.sub(r\"\\.txt$\",\".csv\",inFileName)\n",
    "    texts = []\n",
    "    try:\n",
    "        inFile = open(inFileName,\"r\")\n",
    "        csvReader = csv.DictReader(inFile,delimiter=\",\")\n",
    "        for row in csvReader:\n",
    "            if \"text\" in row: texts.append(row[\"text\"])\n",
    "        inFile.close()\n",
    "    except: pass\n",
    "    return(texts)\n",
    "\n",
    "def pickScatter(event):\n",
    "    plt.gca().set_title(\">>>\"+str(event.artist),fontsize=12)\n",
    "    for i in range(0,len(event.ind)):\n",
    "       outFile = open(\"tmp.txt\",\"a\")\n",
    "       print(event.ind[i],file=outFile)\n",
    "       outFile.close()\n",
    "#    for e in event:\n",
    "#        plt.gca().set_title(\">>>\"+str(e),fontsize=12)\n",
    "\n",
    "def makePlot(fieldDataList,fieldNames,format,texts):\n",
    "    plt.figure(figsize=(PLOTWIDTH,PLOTHEIGHT))\n",
    "    xvalues = range(0,len(fieldDataList[0]))\n",
    "    barplots = []\n",
    "    for i in range(0,len(fieldDataList)):\n",
    "        bottomValues = tactusVisualize.makeBottomValues(fieldDataList,i,format)\n",
    "        # plt.scatter(xvalues,fieldDataList[i],width=BARWIDTH,bottom=bottomValues,picker=1,label=texts[i])\n",
    "        barplot = \\\n",
    "            plt.bar(xvalues,fieldDataList[i],width=BARWIDTH,bottom=bottomValues,picker=1,label=texts[i])\n",
    "        barplots.append(barplot)\n",
    "    plt.legend(tuple([b[0] for b in barplots]),tuple(fieldNames))\n",
    "    plt.xticks(xvalues,[x+1 for x in xvalues])\n",
    "    plt.gcf().canvas.mpl_connect(\"pick_event\",pickScatter)\n",
    "    plt.savefig(IMAGEFILE)\n",
    "    plt.show()\n",
    "\n",
    "def visualize2(file,features,format=\"\",target=\"CLIENT\"):\n",
    "    data = tactusVisualize.readData(file,target)\n",
    "    if len(data) == 0: sys.exit(\"no data!\")\n",
    "    texts = readTexts(file)\n",
    "    featureDataList = tactusVisualize.selectData(data,features)\n",
    "    makePlot(featureDataList,features,format,texts)\n",
    "\n",
    "visualize2(\"AdB0015.txt\",[\"i\",\"cogproc\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "tactusVisualize.readData(\"AdB0016.txt\",\"\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}