LucaCappelletti94/crr_labels

View on GitHub
Report tables.ipynb

Summary

Maintainability
Test Coverage
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from crr_labels import fantom, roadmap, roadmap_available_cell_lines, fantom_available_cell_lines\n",
    "from tqdm.auto import tqdm\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "fantom_enhancers, fantom_promoters = fantom(\n",
    "    fantom_available_cell_lines().cell_line,\n",
    "    window_size=200,\n",
    "    drop_always_inactive_rows=False,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0e5375ea641a464d8382b32a70c49574",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Cell lines', max=25.0, style=ProgressStyle(description_wi…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading to roadmap_data/18/I3.bed.gz', layout=Layout(…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/lucacappelletti/github/crr_labels/crr_labels/roadmap.py:99: UserWarning: Unable to retrieve the data relative to cell line I3\n",
      "  UserWarning\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading to roadmap_data/18/WA7.bed.gz', layout=Layout…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/lucacappelletti/github/crr_labels/crr_labels/roadmap.py:99: UserWarning: Unable to retrieve the data relative to cell line WA7\n",
      "  UserWarning\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading to roadmap_data/18/4STAR.bed.gz', layout=Layo…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/lucacappelletti/github/crr_labels/crr_labels/roadmap.py:99: UserWarning: Unable to retrieve the data relative to cell line 4STAR\n",
      "  UserWarning\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "enhancers_roadmap, promoters_roadmap = roadmap(\n",
    "    roadmap_available_cell_lines().cell_line,\n",
    "    window_size=200\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def report(enhancers, promoters)->pd.DataFrame:\n",
    "    fantom_report = [\n",
    "        {\n",
    "            (\"cell_line\", \"\"):cell_line,\n",
    "            (\"enhancers\", \"active\"):(enhancers[cell_line]==1).values.sum(),\n",
    "            (\"enhancers\", \"inactive\"):(enhancers[cell_line]==0).values.sum(),\n",
    "            (\"enhancers\", \"unbalance\"):(enhancers[cell_line]==1).values.sum()/(enhancers[cell_line]==0).values.sum(),\n",
    "            (\"promoters\", \"active\"):(promoters[cell_line]==1).values.sum(),\n",
    "            (\"promoters\", \"inactive\"):(promoters[cell_line]==0).values.sum(),\n",
    "            (\"promoters\", \"unbalance\"):(promoters[cell_line]==1).values.sum()/(promoters[cell_line]==0).values.sum(),\n",
    "        }\n",
    "        for cell_line in tqdm(enhancers.columns[4:])\n",
    "    ]\n",
    "    return pd.DataFrame({\n",
    "        key:list(values.values())\n",
    "        for key, values in pd.DataFrame(fantom_report).to_dict().items()\n",
    "    })"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8c6c14542847424c978f42bdb54afc2a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "fantom_report = report(fantom_enhancers, fantom_promoters)\n",
    "fantom_report.to_latex(\"fantom_report.tex\", index=False)\n",
    "with open(\"fantom_report.md\", \"w\") as f:\n",
    "    fantom_report.to_markdown(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "00cbb9f8524f49feac20f23d1e33bb35",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=22.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "roadmap_report = report(enhancers_roadmap, promoters_roadmap)\n",
    "roadmap_report.to_latex(\"roadmap_report.tex\", index=False)\n",
    "with open(\"roadmap_report.md\", \"w\") as f:\n",
    "    roadmap_report.to_markdown(f)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}