LucaCappelletti94/crr_labels

View on GitHub
Untitled.ipynb

Summary

Maintainability
Test Coverage
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "enhancers = pd.read_csv(\n",
    "    \"fantom_data/human_permissive_enhancers_phase_1_and_2_expression_tpm_matrix.txt\",\n",
    "    comment=\"#\",\n",
    "    sep=\"\\t\",\n",
    "    low_memory=False\n",
    ")\n",
    "regions = enhancers[\"Id\"].str.split(r\"[:-]\", expand=True)\n",
    "regions.columns = [\"chrom\", \"chromStart\", \"chromEnd\"]\n",
    "enhancers.drop(columns=[\"Id\"], inplace=True)\n",
    "enhancers = pd.concat([\n",
    "    regions,\n",
    "    enhancers,\n",
    "], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>chrom</th>\n",
       "      <th>chromStart</th>\n",
       "      <th>chromEnd</th>\n",
       "      <th>CNhs14406</th>\n",
       "      <th>CNhs14407</th>\n",
       "      <th>CNhs14408</th>\n",
       "      <th>CNhs14410</th>\n",
       "      <th>CNhs14411</th>\n",
       "      <th>CNhs14412</th>\n",
       "      <th>CNhs14413</th>\n",
       "      <th>...</th>\n",
       "      <th>CNhs10735</th>\n",
       "      <th>CNhs11261</th>\n",
       "      <th>CNhs11828</th>\n",
       "      <th>CNhs11883</th>\n",
       "      <th>CNhs11765</th>\n",
       "      <th>CNhs11676</th>\n",
       "      <th>CNhs11763</th>\n",
       "      <th>CNhs12854</th>\n",
       "      <th>CNhs12844</th>\n",
       "      <th>CNhs11813</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>chr1</td>\n",
       "      <td>839741</td>\n",
       "      <td>840250</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.603657</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.352629</td>\n",
       "      <td>0.149688</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.258875</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.395451</td>\n",
       "      <td>0.647647</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>chr1</td>\n",
       "      <td>840753</td>\n",
       "      <td>841210</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.50808</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.149688</td>\n",
       "      <td>...</td>\n",
       "      <td>0.158908</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.086292</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.119996</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>chr1</td>\n",
       "      <td>845485</td>\n",
       "      <td>845678</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.176314</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>chr1</td>\n",
       "      <td>855764</td>\n",
       "      <td>856157</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.16936</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.202142</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>chr1</td>\n",
       "      <td>856539</td>\n",
       "      <td>856757</td>\n",
       "      <td>0.335382</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.404285</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>65418</th>\n",
       "      <td>chrY</td>\n",
       "      <td>58986643</td>\n",
       "      <td>58986891</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>65419</th>\n",
       "      <td>chrY</td>\n",
       "      <td>58994460</td>\n",
       "      <td>58994662</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>65420</th>\n",
       "      <td>chrY</td>\n",
       "      <td>59019347</td>\n",
       "      <td>59019376</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>65421</th>\n",
       "      <td>chrY</td>\n",
       "      <td>59019812</td>\n",
       "      <td>59020026</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.149688</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>65422</th>\n",
       "      <td>chrY</td>\n",
       "      <td>59025851</td>\n",
       "      <td>59026023</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>65423 rows × 1830 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      chrom chromStart  chromEnd  CNhs14406  CNhs14407  CNhs14408  CNhs14410  \\\n",
       "0      chr1     839741    840250   0.000000    0.00000        0.0   0.603657   \n",
       "1      chr1     840753    841210   0.000000    0.50808        0.0   0.000000   \n",
       "2      chr1     845485    845678   0.000000    0.00000        0.0   0.000000   \n",
       "3      chr1     855764    856157   0.000000    0.16936        0.0   0.000000   \n",
       "4      chr1     856539    856757   0.335382    0.00000        0.0   0.000000   \n",
       "...     ...        ...       ...        ...        ...        ...        ...   \n",
       "65418  chrY   58986643  58986891   0.000000    0.00000        0.0   0.000000   \n",
       "65419  chrY   58994460  58994662   0.000000    0.00000        0.0   0.000000   \n",
       "65420  chrY   59019347  59019376   0.000000    0.00000        0.0   0.000000   \n",
       "65421  chrY   59019812  59020026   0.000000    0.00000        0.0   0.000000   \n",
       "65422  chrY   59025851  59026023   0.000000    0.00000        0.0   0.000000   \n",
       "\n",
       "       CNhs14411  CNhs14412  CNhs14413  ...  CNhs10735  CNhs11261  CNhs11828  \\\n",
       "0       0.000000   0.352629   0.149688  ...   0.000000        0.0        0.0   \n",
       "1       0.000000   0.000000   0.149688  ...   0.158908        0.0        0.0   \n",
       "2       0.000000   0.176314   0.000000  ...   0.000000        0.0        0.0   \n",
       "3       0.202142   0.000000   0.000000  ...   0.000000        0.0        0.0   \n",
       "4       0.404285   0.000000   0.000000  ...   0.000000        0.0        0.0   \n",
       "...          ...        ...        ...  ...        ...        ...        ...   \n",
       "65418   0.000000   0.000000   0.000000  ...   0.000000        0.0        0.0   \n",
       "65419   0.000000   0.000000   0.000000  ...   0.000000        0.0        0.0   \n",
       "65420   0.000000   0.000000   0.000000  ...   0.000000        0.0        0.0   \n",
       "65421   0.000000   0.000000   0.149688  ...   0.000000        0.0        0.0   \n",
       "65422   0.000000   0.000000   0.000000  ...   0.000000        0.0        0.0   \n",
       "\n",
       "       CNhs11883  CNhs11765  CNhs11676  CNhs11763  CNhs12854  CNhs12844  \\\n",
       "0       0.258875        0.0   0.000000   0.395451   0.647647        0.0   \n",
       "1       0.086292        0.0   0.119996   0.000000   0.000000        0.0   \n",
       "2       0.000000        0.0   0.000000   0.000000   0.000000        0.0   \n",
       "3       0.000000        0.0   0.000000   0.000000   0.000000        0.0   \n",
       "4       0.000000        0.0   0.000000   0.000000   0.000000        0.0   \n",
       "...          ...        ...        ...        ...        ...        ...   \n",
       "65418   0.000000        0.0   0.000000   0.000000   0.000000        0.0   \n",
       "65419   0.000000        0.0   0.000000   0.000000   0.000000        0.0   \n",
       "65420   0.000000        0.0   0.000000   0.000000   0.000000        0.0   \n",
       "65421   0.000000        0.0   0.000000   0.000000   0.000000        0.0   \n",
       "65422   0.000000        0.0   0.000000   0.000000   0.000000        0.0   \n",
       "\n",
       "       CNhs11813  \n",
       "0            0.0  \n",
       "1            0.0  \n",
       "2            0.0  \n",
       "3            0.0  \n",
       "4            0.0  \n",
       "...          ...  \n",
       "65418        0.0  \n",
       "65419        0.0  \n",
       "65420        0.0  \n",
       "65421        0.0  \n",
       "65422        0.0  \n",
       "\n",
       "[65423 rows x 1830 columns]"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "enhancers"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}