Updating labels to regression.ipynb
{
"cells": [
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"from crr_labels import fantom\n",
"from tqdm.auto import tqdm"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ad87780a9ea74c99881a2dddf52028e4",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/5 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from epigenomic_dataset.utils import get_cell_lines\n",
"\n",
"window_sizes = [64, 128, 256, 512, 1024]\n",
"\n",
"for window_size, (enhancers_fantom_hg38, promoters_fantom_hg38) in zip(tqdm(window_sizes), fantom(\n",
" # We retrieve the labels for all the cell lines we are interested in,\n",
" # whose epigenomic datasets are already made available from the `epigenomic_dataset`\n",
" # package.\n",
" # Since in FANTOM5 the cell line MCF-7 is encoded as MCF7, we need to\n",
" # remove the hyphen from the cell lines names.\n",
" cell_lines=[\n",
" cell_line.replace(\"-\", \"\")\n",
" for cell_line in get_cell_lines()\n",
" ],\n",
" # We will expand the sequence snippets to a size of 512\n",
" window_sizes=window_sizes,\n",
" # The genome version we want to use\n",
" genome=\"hg38\",\n",
" # We will not, for now, binarize the tasks\n",
" binarize=False,\n",
")):\n",
" path = f\"preprocessed/fantom/hg38/{window_size}/{{region}}.bed.xz\"\n",
" enhancers_fantom_hg38.to_csv(path.format(region=\"enhancers\"), sep=\"\\t\", index=False)\n",
" promoters_fantom_hg38.to_csv(path.format(region=\"promoters\"), sep=\"\\t\", index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}