Report tables.ipynb
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from crr_labels import fantom, roadmap, roadmap_available_cell_lines, fantom_available_cell_lines\n",
"from tqdm.auto import tqdm\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"fantom_enhancers, fantom_promoters = fantom(\n",
" fantom_available_cell_lines().cell_line,\n",
" window_size=200,\n",
" drop_always_inactive_rows=False,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "0e5375ea641a464d8382b32a70c49574",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, description='Cell lines', max=25.0, style=ProgressStyle(description_wi…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, description='Downloading to roadmap_data/18/I3.bed.gz', layout=Layout(…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/lucacappelletti/github/crr_labels/crr_labels/roadmap.py:99: UserWarning: Unable to retrieve the data relative to cell line I3\n",
" UserWarning\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, description='Downloading to roadmap_data/18/WA7.bed.gz', layout=Layout…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/lucacappelletti/github/crr_labels/crr_labels/roadmap.py:99: UserWarning: Unable to retrieve the data relative to cell line WA7\n",
" UserWarning\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, description='Downloading to roadmap_data/18/4STAR.bed.gz', layout=Layo…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/lucacappelletti/github/crr_labels/crr_labels/roadmap.py:99: UserWarning: Unable to retrieve the data relative to cell line 4STAR\n",
" UserWarning\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"enhancers_roadmap, promoters_roadmap = roadmap(\n",
" roadmap_available_cell_lines().cell_line,\n",
" window_size=200\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def report(enhancers, promoters)->pd.DataFrame:\n",
" fantom_report = [\n",
" {\n",
" (\"cell_line\", \"\"):cell_line,\n",
" (\"enhancers\", \"active\"):(enhancers[cell_line]==1).values.sum(),\n",
" (\"enhancers\", \"inactive\"):(enhancers[cell_line]==0).values.sum(),\n",
" (\"enhancers\", \"unbalance\"):(enhancers[cell_line]==1).values.sum()/(enhancers[cell_line]==0).values.sum(),\n",
" (\"promoters\", \"active\"):(promoters[cell_line]==1).values.sum(),\n",
" (\"promoters\", \"inactive\"):(promoters[cell_line]==0).values.sum(),\n",
" (\"promoters\", \"unbalance\"):(promoters[cell_line]==1).values.sum()/(promoters[cell_line]==0).values.sum(),\n",
" }\n",
" for cell_line in tqdm(enhancers.columns[4:])\n",
" ]\n",
" return pd.DataFrame({\n",
" key:list(values.values())\n",
" for key, values in pd.DataFrame(fantom_report).to_dict().items()\n",
" })"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "8c6c14542847424c978f42bdb54afc2a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"fantom_report = report(fantom_enhancers, fantom_promoters)\n",
"fantom_report.to_latex(\"fantom_report.tex\", index=False)\n",
"with open(\"fantom_report.md\", \"w\") as f:\n",
" fantom_report.to_markdown(f)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "00cbb9f8524f49feac20f23d1e33bb35",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, max=22.0), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"roadmap_report = report(enhancers_roadmap, promoters_roadmap)\n",
"roadmap_report.to_latex(\"roadmap_report.tex\", index=False)\n",
"with open(\"roadmap_report.md\", \"w\") as f:\n",
" roadmap_report.to_markdown(f)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}