vanheeringen-lab/gimmemotifs

View on GitHub
gimmemotifs/tools/memew.py

Summary

Maintainability
B
4 hrs
Test Coverage
A
96%
import io
import os
import re
from subprocess import PIPE, Popen
from tempfile import NamedTemporaryFile

from gimmemotifs.motif import Motif

from .motifprogram import MotifProgram


class MemeW(MotifProgram):

    """
    Predict motifs using MEME

    Reference:
    """

    def __init__(self):
        self.name = "MEMEW"
        self.cmd = "meme"
        self.use_width = False

    def _run_program(self, bin, fastafile, params=None):
        """
        Run MEME and predict motifs from a FASTA file.

        Parameters
        ----------
        bin : str
            Command used to run the tool.

        fastafile : str
            Name of the FASTA input file.

        params : dict, optional
            Optional parameters. For some of the tools required parameters
            are passed using this dictionary.

        Returns
        -------
        motifs : list of Motif instances
            The predicted motifs.

        stdout : str
            Standard out of the tool.

        stderr : str
            Standard error of the tool.
        """
        default_params = {"single": False, "number": 5}
        if params is not None:
            default_params.update(params)

        tmp = NamedTemporaryFile(dir=self.tmpdir)

        strand = "-revcomp"
        number = default_params["number"]

        cmd = [
            bin,
            fastafile,
            "-text",
            "-dna",
            "-nostatus",
            "-mod",
            "zoops",
            "-nmotifs",
            str(number),
            "-minw",
            "6",
            "-maxw",
            "20",
            "-maxsize",
            "10000000",
        ]
        if not default_params["single"]:
            cmd.append(strand)

        # Fix to run in Docker
        env = os.environ.copy()
        env["OMPI_MCA_plm_rsh_agent"] = "sh"

        # sys.stderr.write(" ".join(cmd) + "\n")
        p = Popen(cmd, bufsize=1, stderr=PIPE, stdout=PIPE, env=env)
        stdout, stderr = p.communicate()

        motifs = []
        motifs = self.parse(io.StringIO(stdout.decode()))

        # Delete temporary files
        tmp.close()

        return motifs, stdout, stderr

    def parse(self, fo):
        """
        Convert MEME output to motifs

        Parameters
        ----------
        fo : file-like
            File object containing MEME output.

        Returns
        -------
        motifs : list
            List of Motif instances.
        """
        motifs = []
        nucs = {"A": 0, "C": 1, "G": 2, "T": 3}

        p = re.compile(r"MOTIF.+MEME-(\d+)\s*width\s*=\s*(\d+)\s+sites\s*=\s*(\d+)")
        pa = re.compile(r"\)\s+([A-Z]+)")
        line = fo.readline()
        while line:
            m = p.search(line)
            align = []
            pfm = None
            if m:
                # print(m.group(0))
                id = f"{self.name}_{m.group(1)}_w{m.group(2)}"
                while not line.startswith("//"):
                    ma = pa.search(line)
                    if ma:
                        # print(ma.group(0))
                        match = ma.group(1)
                        align.append(match)
                        if not pfm:
                            pfm = [[0 for x in range(4)] for x in range(len(match))]
                        for pos in range(len(match)):
                            if match[pos] in nucs:
                                pfm[pos][nucs[match[pos]]] += 1
                            else:
                                for i in range(4):
                                    pfm[pos][i] += 0.25

                    line = fo.readline()

                motifs.append(Motif(pfm[:]))
                motifs[-1].id = id
                motifs[-1].align = align[:]
            line = fo.readline()

        return motifs