LucaCappelletti94/crr_labels

View on GitHub
crr_labels/utils/normalize_bed_file.py

Summary

Maintainability
A
0 mins
Test Coverage
A
100%
from typing import List
import pandas as pd


def normalize_bed_file(cell_lines: List[str], bed_file: pd.DataFrame) -> pd.DataFrame:
    """Return normalized bed file.

    Parameters
    ---------------------------
    cell_lines:List[str],
        The cell lines to be maintained in the bed file.
    bed_file:pd.DataFrame,
        The bed file to normalized.

    Returns
    ----------------------------
    The normalized bed file.
    """
    if "strand" not in bed_file:
        bed_file["strand"] = "."
    cell_lines = [
        cell_line
        for cell_line in cell_lines
        if cell_line in bed_file.columns
    ]
    df = bed_file[["chrom", "start",
                   "end", "strand", *sorted(cell_lines)]]
    return df.rename(
        columns={
            "start": "chromStart",
            "end": "chromEnd"
        }
    )