Untitled.ipynb
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from keras_synthetic_genome_sequence.multivariate_gap_sequence import MultivariateGapSequence\n",
"from ucsc_genomes_downloader import Genome\n",
"from keras_synthetic_genome_sequence.utils import get_gaps_statistics\n",
"import numpy as np\n",
"from typing import Tuple\n",
"from numba import njit, jit"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, description='Loading chromosomes for genome hg19', layout=Layout(flex='2')…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\r"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, description='Rendering gaps in hg19', layout=Layout(flex='2'), max=3, styl…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\r"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, description='Rendering sequences in hg19', layout=Layout(flex='2'), max=2,…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\r"
]
}
],
"source": [
"hg19 = Genome(\"hg19\", chromosomes=[\"chr1\", \"chr2\", \"chr3\"])\n",
"\n",
"_, mean, covariance = get_gaps_statistics(\n",
" hg19,\n",
" 100,\n",
" 200\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, description='Rendering sequences in hg19', layout=Layout(flex='2'), max=1,…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\r"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, description='Converting nucleotides to numeric classes', layout=Layout(fle…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\r"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, description='Generating synthetic gaps', layout=Layout(flex='2'), max=1, s…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\r"
]
}
],
"source": [
"gap_sequence = MultivariateGapSequence(\n",
" assembly=hg19,\n",
" bed=\"tests/utils/test.bed\",\n",
" gaps_mean=mean,\n",
" gaps_covariance=covariance,\n",
" batch_size=32\n",
")\n",
"gap_sequence.on_train_start()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"@njit\n",
"def add_gaps2(gaps_coordinates:dict, indices:np.ndarray, y:np.ndarray):\n",
" # Making a deep copy of y, since we are going to edit the copy.\n",
" x = np.copy(y)\n",
" for i in range(indices.shape[0]):\n",
" x[i][gaps_coordinates[indices[i]]] = 0.25\n",
" return x"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"def get4(self, idx: int) -> Tuple[np.ndarray, np.ndarray]:\n",
" # Retrieves the sequence from the bed generator\n",
" y = self.__getitem__(idx)\n",
" # For i-th row of current batch we apply the nucletides mask\n",
" x = add_gaps2(self._gaps_coordinates, self._gaps_index[idx], y)\n",
" return x, y"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"51.2 µs ± 4.48 µs per loop (mean ± std. dev. of 7 runs, 20000 loops each)\n"
]
}
],
"source": [
"%%timeit -n 20000\n",
"get4(gap_sequence, 1)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"20.5 µs ± 222 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
]
}
],
"source": [
"%%timeit -n 10000\n",
"gap_sequence[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}