Untitled.ipynb
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"enhancers = pd.read_csv(\n",
" \"fantom_data/human_permissive_enhancers_phase_1_and_2_expression_tpm_matrix.txt\",\n",
" comment=\"#\",\n",
" sep=\"\\t\",\n",
" low_memory=False\n",
")\n",
"regions = enhancers[\"Id\"].str.split(r\"[:-]\", expand=True)\n",
"regions.columns = [\"chrom\", \"chromStart\", \"chromEnd\"]\n",
"enhancers.drop(columns=[\"Id\"], inplace=True)\n",
"enhancers = pd.concat([\n",
" regions,\n",
" enhancers,\n",
"], axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>chrom</th>\n",
" <th>chromStart</th>\n",
" <th>chromEnd</th>\n",
" <th>CNhs14406</th>\n",
" <th>CNhs14407</th>\n",
" <th>CNhs14408</th>\n",
" <th>CNhs14410</th>\n",
" <th>CNhs14411</th>\n",
" <th>CNhs14412</th>\n",
" <th>CNhs14413</th>\n",
" <th>...</th>\n",
" <th>CNhs10735</th>\n",
" <th>CNhs11261</th>\n",
" <th>CNhs11828</th>\n",
" <th>CNhs11883</th>\n",
" <th>CNhs11765</th>\n",
" <th>CNhs11676</th>\n",
" <th>CNhs11763</th>\n",
" <th>CNhs12854</th>\n",
" <th>CNhs12844</th>\n",
" <th>CNhs11813</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>chr1</td>\n",
" <td>839741</td>\n",
" <td>840250</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.0</td>\n",
" <td>0.603657</td>\n",
" <td>0.000000</td>\n",
" <td>0.352629</td>\n",
" <td>0.149688</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.258875</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.395451</td>\n",
" <td>0.647647</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>chr1</td>\n",
" <td>840753</td>\n",
" <td>841210</td>\n",
" <td>0.000000</td>\n",
" <td>0.50808</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.149688</td>\n",
" <td>...</td>\n",
" <td>0.158908</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.086292</td>\n",
" <td>0.0</td>\n",
" <td>0.119996</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>chr1</td>\n",
" <td>845485</td>\n",
" <td>845678</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.176314</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>chr1</td>\n",
" <td>855764</td>\n",
" <td>856157</td>\n",
" <td>0.000000</td>\n",
" <td>0.16936</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.202142</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>chr1</td>\n",
" <td>856539</td>\n",
" <td>856757</td>\n",
" <td>0.335382</td>\n",
" <td>0.00000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.404285</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>65418</th>\n",
" <td>chrY</td>\n",
" <td>58986643</td>\n",
" <td>58986891</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>65419</th>\n",
" <td>chrY</td>\n",
" <td>58994460</td>\n",
" <td>58994662</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>65420</th>\n",
" <td>chrY</td>\n",
" <td>59019347</td>\n",
" <td>59019376</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>65421</th>\n",
" <td>chrY</td>\n",
" <td>59019812</td>\n",
" <td>59020026</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.149688</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>65422</th>\n",
" <td>chrY</td>\n",
" <td>59025851</td>\n",
" <td>59026023</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>65423 rows × 1830 columns</p>\n",
"</div>"
],
"text/plain": [
" chrom chromStart chromEnd CNhs14406 CNhs14407 CNhs14408 CNhs14410 \\\n",
"0 chr1 839741 840250 0.000000 0.00000 0.0 0.603657 \n",
"1 chr1 840753 841210 0.000000 0.50808 0.0 0.000000 \n",
"2 chr1 845485 845678 0.000000 0.00000 0.0 0.000000 \n",
"3 chr1 855764 856157 0.000000 0.16936 0.0 0.000000 \n",
"4 chr1 856539 856757 0.335382 0.00000 0.0 0.000000 \n",
"... ... ... ... ... ... ... ... \n",
"65418 chrY 58986643 58986891 0.000000 0.00000 0.0 0.000000 \n",
"65419 chrY 58994460 58994662 0.000000 0.00000 0.0 0.000000 \n",
"65420 chrY 59019347 59019376 0.000000 0.00000 0.0 0.000000 \n",
"65421 chrY 59019812 59020026 0.000000 0.00000 0.0 0.000000 \n",
"65422 chrY 59025851 59026023 0.000000 0.00000 0.0 0.000000 \n",
"\n",
" CNhs14411 CNhs14412 CNhs14413 ... CNhs10735 CNhs11261 CNhs11828 \\\n",
"0 0.000000 0.352629 0.149688 ... 0.000000 0.0 0.0 \n",
"1 0.000000 0.000000 0.149688 ... 0.158908 0.0 0.0 \n",
"2 0.000000 0.176314 0.000000 ... 0.000000 0.0 0.0 \n",
"3 0.202142 0.000000 0.000000 ... 0.000000 0.0 0.0 \n",
"4 0.404285 0.000000 0.000000 ... 0.000000 0.0 0.0 \n",
"... ... ... ... ... ... ... ... \n",
"65418 0.000000 0.000000 0.000000 ... 0.000000 0.0 0.0 \n",
"65419 0.000000 0.000000 0.000000 ... 0.000000 0.0 0.0 \n",
"65420 0.000000 0.000000 0.000000 ... 0.000000 0.0 0.0 \n",
"65421 0.000000 0.000000 0.149688 ... 0.000000 0.0 0.0 \n",
"65422 0.000000 0.000000 0.000000 ... 0.000000 0.0 0.0 \n",
"\n",
" CNhs11883 CNhs11765 CNhs11676 CNhs11763 CNhs12854 CNhs12844 \\\n",
"0 0.258875 0.0 0.000000 0.395451 0.647647 0.0 \n",
"1 0.086292 0.0 0.119996 0.000000 0.000000 0.0 \n",
"2 0.000000 0.0 0.000000 0.000000 0.000000 0.0 \n",
"3 0.000000 0.0 0.000000 0.000000 0.000000 0.0 \n",
"4 0.000000 0.0 0.000000 0.000000 0.000000 0.0 \n",
"... ... ... ... ... ... ... \n",
"65418 0.000000 0.0 0.000000 0.000000 0.000000 0.0 \n",
"65419 0.000000 0.0 0.000000 0.000000 0.000000 0.0 \n",
"65420 0.000000 0.0 0.000000 0.000000 0.000000 0.0 \n",
"65421 0.000000 0.0 0.000000 0.000000 0.000000 0.0 \n",
"65422 0.000000 0.0 0.000000 0.000000 0.000000 0.0 \n",
"\n",
" CNhs11813 \n",
"0 0.0 \n",
"1 0.0 \n",
"2 0.0 \n",
"3 0.0 \n",
"4 0.0 \n",
"... ... \n",
"65418 0.0 \n",
"65419 0.0 \n",
"65420 0.0 \n",
"65421 0.0 \n",
"65422 0.0 \n",
"\n",
"[65423 rows x 1830 columns]"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"enhancers"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}