stephensolis/kameris

View on GitHub
kameris/utils/file_formats.py

Summary

Maintainability
A
0 mins
Test Coverage
from __future__ import absolute_import, division, unicode_literals

import re
from six.moves import zip


# FASTA

def read_fasta(infile, include_other_letters=False, return_headers=False):
    sequences = []
    if return_headers:
        headers = []

    currseq = []
    for line in infile:
        line = line.strip()
        if isinstance(line, bytes) and str != bytes:
            line = line.decode()
        if not line or line[0] == '>':
            if return_headers and line[0] == '>':
                headers.append(line)
            if currseq:
                sequences.append(''.join(currseq))
                currseq = []
        else:
            if not include_other_letters:
                line = re.sub('[^ACGT]', '', line)
            currseq.append(line)
    if currseq:
        sequences.append(''.join(currseq))

    if return_headers:
        return sequences, headers
    else:
        return sequences


def import_fasta(filename, **kwargs):
    with open(filename, 'r') as infile:
        return read_fasta(infile, **kwargs)


def write_fasta(outfile, sequences, headers=iter(str, 0)):
    for header, seq in zip(headers, sequences):
        outfile.write('>' + header + '\n')
        outfile.write(seq)
        outfile.write('\n')


def export_fasta(filename, sequences, **kwargs):
    with open(filename, 'w') as outfile:
        write_fasta(outfile, sequences, **kwargs)