KarrLab/bpforms

View on GitHub
bpforms/util.py

Summary

Maintainability
F
2 wks
Test Coverage
A
97%
""" Utilities for BpForms

:Author: Jonathan Karr <karr@mssm.edu>
:Date: 2019-02-05
:Copyright: 2019, Karr Lab
:License: MIT
"""

from . import core
from .alphabet import dna
from .alphabet import rna
from .alphabet import protein
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from wc_utils.util.chem import OpenBabelUtils
from wc_utils.util.chem.marvin import draw_molecule
import importlib
import jinja2
import math
import openbabel
import os
import pkg_resources
import shutil


def get_alphabets():
    """ Get a list of available alphabets

    Returns:
        :obj:`dict`: dictionary which maps the ids of alphabets to alphabets
    """
    alphabets = [
        dna.dna_alphabet,
        rna.rna_alphabet,
        protein.protein_alphabet,
        dna.canonical_dna_alphabet,
        rna.canonical_rna_alphabet,
        protein.canonical_protein_alphabet,
    ]
    return {alphabet.id: alphabet for alphabet in alphabets}


def get_alphabet(alphabet):
    """ Get an alphabet

    Args:
        alphabet (:obj:`str`): alphabet

    Returns:
        :obj:`core.Alphabet`: alphabet
    """
    alphabet_obj = get_alphabets().get(alphabet, None)
    if alphabet_obj is None:
        raise ValueError('Unknown alphabet "{}"'.format(alphabet))
    return alphabet_obj


def get_form(alphabet):
    """ Get a subclass of BpFrom

    Args:
        alphabet (:obj:`str`): alphabet

    Returns:
        :obj:`type`: subclass of BpForm
    """
    if alphabet == 'dna':
        return dna.DnaForm
    if alphabet == 'canonical_dna':
        return dna.CanonicalDnaForm

    if alphabet == 'rna':
        return rna.RnaForm
    if alphabet == 'canonical_rna':
        return rna.CanonicalRnaForm

    if alphabet == 'protein':
        return protein.ProteinForm
    if alphabet == 'canonical_protein':
        return protein.CanonicalProteinForm

    raise ValueError('Alphabet "{}" must be "dna", "rna", or "protein"'.format(alphabet))


def build_alphabets(ph=None, major_tautomer=False, dearomatize=False, _max_monomers=float('inf'), alphabets=None):
    """ Build DNA, RNA, and protein alphabets

    Args:
        ph (:obj:`float`, optional): pH at which calculate major protonation state of each monomeric form
        major_tautomer (:obj:`bool`, optional): if :obj:`True`, calculate the major tautomer
        dearomatize (:obj:`bool`, optional): if :obj:`True`, dearomatize molecule
        _max_monomers (:obj:`float`, optional): maximum number of monomeric forms to build; used for testing
        alphabets (:obj:`list` of :obj:`str` or :obj:`None`, optional): ids of alphabets to build. If :obj:`None`,
            build all alphabets
    """
    core.cache.clear()

    if alphabets is None or 'dna' in alphabets:
        dna.DnaAlphabetBuilder(_max_monomers=_max_monomers).run(ph=ph, major_tautomer=major_tautomer, dearomatize=dearomatize)
    if alphabets is None or 'rna' in alphabets:
        rna.RnaAlphabetBuilder(_max_monomers=_max_monomers).run(ph=ph, major_tautomer=major_tautomer, dearomatize=dearomatize)
    if alphabets is None or 'protein' in alphabets:
        protein.ProteinAlphabetBuilder(_max_monomers=_max_monomers).run(ph=ph, major_tautomer=major_tautomer, dearomatize=dearomatize)


def gen_html_viz_alphabet(bpform_type, filename):
    """ Create and save an HTML document with images of the monomeric forms in an alphabet

    Args:
        bpform_type (:obj:`type`): subclass of :obj:`core.BpForm`
        filename (:obj:`str`): path to save HTML document with images of monomeric forms
    """
    width = 400
    height = 400

    bpform = bpform_type()
    alphabet = bpform.alphabet

    doc = ''
    doc += '<html>\n'
    doc += '  <style type="text/css">\n'
    doc += '  table {width: 100%;}\n'
    doc += '  thead th {background: #ccc}\n'
    doc += '  tbody tr:nth-child(even) {background: #dedede}\n'
    doc += '  tr td, tr th {padding:5px; text-align:center;}\n'
    doc += '  tr td:first-child, tr tr:first-child {padding-left:10px;}\n'
    doc += '  tr td:last-child, tr tr:last-child {padding-right:10px;}\n'
    doc += '  </style>\n'
    doc += '  <body>\n'
    doc += '    <table cellpadding="0" cellspacing="0">\n'
    doc += '      <thead>\n'
    doc += '        <tr>\n'
    doc += '          <th>Code</th>\n'
    doc += '          <th>Monomeric form</th>\n'
    doc += '          <th>Dimer</th>\n'
    doc += '          <th>Canonical SMILES</th>\n'
    doc += '          <th>Monomeric form bond atoms</th>\n'
    doc += '          <th>Monomeric form displaced atoms</th>\n'
    doc += '          <th>Left bond atoms</th>\n'
    doc += '          <th>Left displaced atoms</th>\n'
    doc += '          <th>Right bond atoms</th>\n'
    doc += '          <th>Right displaced atoms</th>\n'
    doc += '        </tr>\n'
    doc += '      </thead>\n'
    for code, monomer in alphabet.monomers.items():
        doc += '        <tr>\n'
        doc += '          <td>{}</td>\n'.format(code)
        doc += '          <td>{}</td>\n'.format(monomer.get_image(width=width, height=height, include_xml_header=False))

        if monomer.structure and bpform.can_monomer_bond_left(monomer) and bpform.can_monomer_bond_right(monomer):
            dimer = bpform_type()
            dimer.seq.append(monomer)
            dimer.seq.append(monomer)
            doc += '          <td>{}</td>\n'.format(draw_molecule(dimer.export('cml'), 'cml',
                                                                  show_atom_nums=False, width=width, height=height))
        else:
            doc += '          <td>{}</td>\n'.format('')
        doc += '          <td>{}</td>\n'.format(monomer.export('smiles', options=('c',)))
        doc += '          <td>{}</td>\n'.format(', '.join('{}{}'.format(atom.element, atom.position)
                                                          for atom in monomer.backbone_bond_atoms))
        doc += '          <td>{}</td>\n'.format(', '.join('{}{}'.format(atom.element, atom.position)
                                                          for atom in monomer.backbone_displaced_atoms))
        doc += '          <td>{}</td>\n'.format(', '.join('{}{}'.format(atom.element, atom.position) for atom in monomer.r_bond_atoms))
        doc += '          <td>{}</td>\n'.format(', '.join('{}{}'.format(atom.element, atom.position)
                                                          for atom in monomer.r_displaced_atoms))
        doc += '          <td>{}</td>\n'.format(', '.join('{}{}'.format(atom.element, atom.position) for atom in monomer.l_bond_atoms))
        doc += '          <td>{}</td>\n'.format(', '.join('{}{}'.format(atom.element, atom.position)
                                                          for atom in monomer.l_displaced_atoms))
        doc += '        </tr>\n'
    doc += '    </table>\n'
    doc += '  </body>\n'
    doc += '</html>\n'

    with open(filename, 'w') as file:
        file.write(doc)


def validate_bpform_bonds(form_type):
    """ Validate bonds in alphabet

    Args:
        form_type (:obj:`type`): type of BpForm

    Raises:
        :obj:`ValueError`: if any of the bonds are invalid
    """

    form = form_type()

    element_table = openbabel.OBElementTable()

    errors = []

    # validate bonds to backbone
    atom_types = [
        ['backbone', 'monomer_bond_atoms'],
        ['backbone', 'monomer_displaced_atoms'],
        ['bond', 'l_bond_atoms'],
        ['bond', 'r_bond_atoms'],
        ['bond', 'l_displaced_atoms'],
        ['bond', 'r_displaced_atoms'],
    ]
    for molecule_md, atom_type in atom_types:
        molecule = getattr(form, molecule_md)
        selected_hydrogens = []
        for atom_md in getattr(molecule, atom_type):
            if atom_md.molecule == core.Backbone:
                if form.backbone.structure:
                    n_backbone_atoms = form.backbone.structure.NumAtoms()
                else:
                    n_backbone_atoms = 0
                if atom_md.position < 1 or atom_md.position > n_backbone_atoms:
                    errors.append('Invalid position {} for {}.{}'.format(atom_md.position, molecule_md, atom_type))
                    continue

                atom = form.backbone.structure.GetAtom(atom_md.position)
                if atom_md.element == 'H' and atom.GetAtomicNum() != 1:
                    atom = core.get_hydrogen_atom(atom, selected_hydrogens, None)
                    if atom is None:
                        continue

                if element_table.GetSymbol(atom.GetAtomicNum()) != atom_md.element:
                    errors.append('Invalid element {} != {} at position {} for {}.{}'.format(
                        element_table.GetSymbol(atom.GetAtomicNum()), atom_md.element,
                        atom_md.position, molecule_md, atom_type))

    # validate bonds to monomer
    atom_types = [
        'backbone_bond_atoms',
        'backbone_displaced_atoms',
        'r_bond_atoms',
        'l_bond_atoms',
        'r_displaced_atoms',
        'l_displaced_atoms',
    ]
    for i_monomer, monomer in enumerate(form.alphabet.monomers.values()):
        for atom_type in atom_types:
            selected_hydrogens = []
            for atom_md in getattr(monomer, atom_type):
                if atom_md.molecule == core.Monomer:
                    if atom_md.position < 1 or atom_md.position > monomer.structure.NumAtoms():
                        errors.append('Invalid position {} for monomeric form:{} {}'.format(atom_md.position, monomer.id, atom_type))
                        continue

                    atom = monomer.structure.GetAtom(atom_md.position)
                    if atom_md.element == 'H' and atom.GetAtomicNum() != 1:
                        atom = core.get_hydrogen_atom(atom, selected_hydrogens, i_monomer)
                        if atom is None:
                            continue

                    if element_table.GetSymbol(atom.GetAtomicNum()) != atom_md.element:
                        errors.append('Invalid element {} != {} at position {} for monomeric form:{} {}'.format(
                            element_table.GetSymbol(atom.GetAtomicNum()), atom_md.element,
                            atom_md.position, monomer.id, atom_type))

    # validate monomeric forms and dimers
    for monomer in form.alphabet.monomers.values():
        monomer_form = form_type(seq=[monomer])
        try:
            monomer_structure = monomer_form.get_structure()[0]
            if monomer_form.get_formula() != OpenBabelUtils.get_formula(monomer_structure):
                errors.append('Monomeric form of {} has incorrect formula: {} != {}'.format(
                    monomer.id, str(monomer_form.get_formula()), str(OpenBabelUtils.get_formula(monomer_structure))))
                continue
            if monomer_form.get_charge() != monomer_structure.GetTotalCharge():
                errors.append('Monomeric form of {} has incorrect charge: {} != {}'.format(
                    monomer.id, monomer_form.get_charge(), monomer_structure.GetTotalCharge()))
                continue
            OpenBabelUtils.export(monomer_structure, 'smiles')
            OpenBabelUtils.export(monomer_structure, 'inchi')
        except Exception as error:
            errors.append('Unable to create monomeric form of {}:\n    {}'.format(monomer.id, str(error)))

        if form.can_monomer_bond_left(monomer) and form.can_monomer_bond_right(monomer):
            dimer_form = form_type(seq=[monomer, monomer])
            try:
                dimer_structure = dimer_form.get_structure()[0]
                if dimer_form.get_formula() != OpenBabelUtils.get_formula(dimer_structure):
                    errors.append('Dimer of {} has incorrect formula: {} != {}'.format(
                        monomer.id, str(dimer_form.get_formula()), str(OpenBabelUtils.get_formula(dimer_structure))))
                    continue
                if dimer_form.get_charge() != dimer_structure.GetTotalCharge():
                    errors.append('Dimer of {} has incorrect charge: {} != {}'.format(
                        monomer.id, dimer_form.get_charge(), dimer_structure.GetTotalCharge()))
                    continue
                OpenBabelUtils.export(dimer_structure, 'smiles')
                OpenBabelUtils.export(dimer_structure, 'inchi')
            except Exception as error:
                errors.append('Unable to form dimer of {}:\n    {}'.format(monomer.id, str(error)))

    # report errors
    if errors:
        raise ValueError('BpForm {} is invalid:\n  {}'.format(form_type.__name__, '\n  '.join(errors)))


def write_to_fasta(forms, filename):
    """ Write BpForms to a FASTA-formatted file

    Args:
        forms (:obj:`dict`): dictionary which maps the ids of molecules to their BpForms-encoded
            sequences
        filename (:obj:`str`): path to FASTA-formatted file
    """
    seqs = (SeqRecord(id=id, seq=Seq(str(form))) for id, form in forms.items())
    SeqIO.write(seqs, filename, "fasta")


def read_from_fasta(filename, alphabet):
    """ Read BpForms from a FASTA-formatted file

    Args:
        filename (:obj:`str`): path to FASTA-formatted file
        alphabet (:obj:`str`): alphabet of BpForms in file

    Returns:
        :obj:`dict`: dictionary which maps the ids of molecules to their BpForms-encoded
            sequences
    """
    Form = get_form(alphabet)

    forms = {}
    for record in SeqIO.parse(filename, "fasta"):
        forms[record.id] = Form().from_str(str(record.seq))

    return forms


def gen_genomic_viz(polymers, inter_crosslinks=None, polymer_labels=None, seq_features=None,
                    width=800, cols=1, polymer_margin=25,
                    nt_per_track=100, track_sep=10,
                    polymer_label_font_size=15, seq_font_size=13, tick_label_font_size=10,
                    legend_font_size=13, tooltip_font_size=13,
                    x_link_stroke_width=2, x_link_radius=4,
                    nick_stroke_width=2,
                    axis_stroke_width=0.5,
                    seq_color='#000000', non_canonical_color='#e74624',
                    intra_x_link_color='#2daae1', inter_x_link_color='#90e227',
                    nick_color='#dabe2e',
                    axis_color='#000000', polymer_label_color='#000000'):
    """ Get a genomic visualization of the :obj:`BpForm`

    Args:
        polymers (:obj:`list` of :obj:`core.BpForm`): polymers
        inter_crosslinks (:obj:`list`): list of inter-polymer crosslinks
        polymer_labels (:obj:`dict`, optional): dictionary that maps polymers to their labels
        seq_features (:obj:`list` of :obj:`dict`, optional): list of features each
            represented by a dictionary with three keys

            * label (:obj:`str`): description of the type of feature
            * color (:obj:`str`): color
            * positions (:obj:`list` of :obj:`dict`): dictionary which maps 
              indices of polymers to a list of position ranges of the type 
              of feature
        width (:obj:`int`, optional): width
        cols (:obj:`int`, optional): number of columns of polymers
        polymer_margin (:obj:`int`, optional): horizontal and vertical spacing between polymers
        nt_per_track (:obj:`int`, optional): number of nucleotides per track
        track_sep (:obj:`int`, optional): vertical separation between tracks in pixels
        polymer_label_font_size (:obj:`float`, optional): font size of polymer label
        seq_font_size (:obj:`float`, optional): font size of sequence
        tick_label_font_size (:obj:`float`, optional): font size of tick labels
        legend_font_size (:obj:`float`, optional): font size of legend
        tooltip_font_size (:obj:`float`, optional): font size of tooltip
        x_link_stroke_width (:obj:`float`, optional): stroke width of crosslinks
        x_link_radius (:obj:`float`, optional): radius of crosslinks line
        nick_stroke_width (:obj:`float`, optional): stroke width of nicks
        axis_stroke_width (:obj:`float`, optional): stroke width of axis
        seq_color (:obj:`str`, optional): color of canonical monomers
        non_canonical_color (:obj:`str`, optional): color of non-canonical monomers
        intra_x_link_color (:obj:`str`, optional): colors of intrastrand crosslinks
        inter_x_link_color (:obj:`str`, optional): colors of interstrand crosslinks
        nick_color (:obj:`str`, optional): colors of nicks
        axis_color (:obj:`str`, optional): color of axis
        polymer_label_color (:obj:`str`, optional): color of polymer labels

    Returns:
        :obj:`str`: SVG image
    """
    import bpforms
    from bpforms.xlink.core import onto_crosslink_to_id

    inter_crosslinks = inter_crosslinks or []
    polymer_labels = polymer_labels or {}
    seq_features = seq_features or []

    nc_label_sep = 0.1 * seq_font_size
    axis_sep = 0.2 * tick_label_font_size
    tick_len = 0.4 * tick_label_font_size
    tick_label_sep = 0.6 * tick_label_font_size
    legend_label_sep = 1/3 * legend_font_size
    if not polymer_labels:
        polymer_label_font_size = 0
    polymer_label_sep = 0.5 * polymer_label_font_size

    has_nicks = False

    polymers_context = []
    for i_polymer, polymer in enumerate(polymers):
        if isinstance(polymer, dna.DnaForm):
            canonical_monomers = [dna.dna_alphabet.monomers.get(code) for code in 'ACGT']
        elif isinstance(polymer, rna.RnaForm):
            canonical_monomers = [rna.rna_alphabet.monomers.get(code) for code in 'ACGU']
        elif isinstance(polymer, protein.ProteinForm):
            canonical_monomers = [protein.protein_alphabet.monomers.get(code)
                                  for code in bpforms.canonical_protein_alphabet.monomers.keys()]
        else:
            raise ValueError('BpForm must be an instance of `DnaForm`, `RnaForm`, or `ProteinForm`')

        monomer_codes = {monomer: code for code, monomer in polymer.alphabet.monomers.items()}

        seq_tracks = []
        monomer_seq = polymer.seq
        canonical_seq = polymer.get_canonical_seq()

        n_tracks = math.ceil(len(monomer_seq) / nt_per_track)
        max_nc_label_len = 0
        has_non_canonical = False
        for i_track in range(n_tracks):
            seq_track = {
                'i_track': i_track,
                'min': i_track * nt_per_track + 1,
                'max': min((i_track + 1) * nt_per_track, len(monomer_seq)),
                'len': min((i_track + 1) * nt_per_track, len(monomer_seq)) - (i_track * nt_per_track + 1) + 1,
                'seq': [],
                'ticks': [],
            }
            seq_tracks.append(seq_track)

            # seq
            for i_monomer, (monomer, canonical_code) in enumerate(zip(monomer_seq[i_track * nt_per_track:(i_track + 1) * nt_per_track],
                                                                      canonical_seq[i_track * nt_per_track:(i_track + 1) * nt_per_track])):
                non_canonical_label = monomer_codes.get(monomer, None)
                if not non_canonical_label:
                    if monomer.id:
                        non_canonical_label = monomer.id
                    elif monomer.name:
                        non_canonical_label = monomer.name
                    elif monomer.synonyms:
                        non_canonical_label = list(monomer.synonyms)[0]
                    elif monomer.identifiers:
                        non_canonical_label = list(monomer.identifiers)[0].id
                    else:
                        non_canonical_label = 'Non-canonical'

                if monomer.name:
                    tooltip = monomer.name
                elif monomer.synonyms:
                    tooltip = list(monomer.synonyms)[0]
                elif monomer.identifiers:
                    tooltip = list(monomer.identifiers)[0].id
                elif monomer.id:
                    tooltip = monomer.id
                else:
                    tooltip = 'Non-canonical monomer'

                track_monomer = {
                    'i_monomer': i_monomer,
                    'canonical_code': canonical_code,
                    'canonical': monomer in canonical_monomers,
                    'non_canonical_label': non_canonical_label,
                    'tooltip': tooltip,
                }
                has_non_canonical = has_non_canonical or track_monomer['canonical']

                pos = i_track * nt_per_track + i_monomer + 1
                for feature in seq_features:
                    for position in feature['positions'].get(i_polymer, []):
                        if pos >= position[0] and pos <= position[1]:
                            track_monomer['color'] = feature['color']
                            break

                seq_track['seq'].append(track_monomer)

                max_nc_label_len = max(max_nc_label_len, len(non_canonical_label))

            # ticks
            seq_track['ticks'].append({'x': 1, 'label': i_track * nt_per_track + 1})
            for i_tick in range(math.floor((seq_track['max'] - seq_track['min'] + 1) / 10)):
                seq_track['ticks'].append({'x': (i_tick + 1) * 10, 'label': i_track * nt_per_track + (i_tick + 1) * 10})

        x_links = []
        for crosslink in polymer.crosslinks:
            l = crosslink.get_l_bond_atoms()[0].monomer
            r = crosslink.get_r_bond_atoms()[0].monomer
            l_track = math.floor((l - 1) / nt_per_track)
            r_track = math.floor((r - 1) / nt_per_track)
            l_pos = (l - 1) % nt_per_track + 1
            r_pos = (r - 1) % nt_per_track + 1

            if l_pos < r_pos:
                x_link = {
                    'l_track': l_track,
                    'r_track': r_track,
                    'l_pos': l_pos,
                    'r_pos': r_pos,
                }
            else:
                x_link = {
                    'l_track': r_track,
                    'r_track': l_track,
                    'l_pos': r_pos,
                    'r_pos': l_pos,
                }
            if isinstance(crosslink, core.OntoBond):
                x_link['tooltip'] = onto_crosslink_to_id[crosslink.type]
            else:
                x_link['tooltip'] = None
            x_links.append(x_link)

        if polymer.nicks:
            has_nicks = True
        nicks = []
        for nick in polymer.nicks:
            nicks.append({
                'pos': (nick.position - 1) % nt_per_track + 1,
                'track': math.floor((nick.position - 1) / nt_per_track),
                'tooltip': None,
            })

        polymers_context.append({
            'label': polymer_labels.get(i_polymer, None),
            'seq_tracks': seq_tracks,
            'x_links': x_links,
            'nicks': nicks,
        })

    inter_x_links_context = []
    for x_link in inter_crosslinks:
        l = int(float(x_link.get_l_bond_atoms()[0].subunit))
        r = int(float(x_link.get_r_bond_atoms()[0].subunit))
        l_polymer_row = math.floor(l / cols)
        l_polymer_col = l % cols
        r_polymer_row = math.floor(r / cols)
        r_polymer_col = r % cols

        l = x_link.get_l_bond_atoms()[0].monomer
        r = x_link.get_r_bond_atoms()[0].monomer
        l_track = math.floor((l - 1) / nt_per_track)
        r_track = math.floor((r - 1) / nt_per_track)
        l_pos = (l - 1) % nt_per_track + 1
        r_pos = (r - 1) % nt_per_track + 1

        if l_polymer_col > r_polymer_col \
            or (l_polymer_col == r_polymer_col and
                l_pos > r_pos):
            inter_x_link_context = {
                'r_polymer_row': l_polymer_row,
                'r_polymer_col': l_polymer_col,
                'r_track': l_track,
                'r_pos': l_pos,
                'l_polymer_row': r_polymer_row,
                'l_polymer_col': r_polymer_col,
                'l_track': r_track,
                'l_pos': r_pos,
            }
        else:
            inter_x_link_context = {
                'l_polymer_row': l_polymer_row,
                'l_polymer_col': l_polymer_col,
                'l_track': l_track,
                'l_pos': l_pos,
                'r_polymer_row': r_polymer_row,
                'r_polymer_col': r_polymer_col,
                'r_track': r_track,
                'r_pos': r_pos,
            }
        inter_x_link_context['tooltip'] = getattr(x_link, 'comments', None)
        inter_x_links_context.append(inter_x_link_context)

    all_x_links = list(inter_x_links_context)
    for i_polymer, polymer in enumerate(polymers_context):
        for x_link in polymer['x_links']:
            x_link['l_polymer_row'] = x_link['r_polymer_row'] = math.floor(i_polymer / cols)
            x_link['l_polymer_col'] = x_link['r_polymer_col'] = i_polymer % cols
            all_x_links.append(x_link)

    sorted_x_links = sorted(all_x_links, key=lambda x: (
        x['l_polymer_row'] == x['r_polymer_row'] and x['l_track'] == x['r_track'],
        x['l_polymer_row'], x['r_polymer_row'],
        x['l_track'], x['r_track'],
        x['l_polymer_col'], x['l_pos'],
        x['r_polymer_col'], x['r_pos']))
    offset = 0
    prev_horz = False
    prev_row = -1
    prev_col = -1
    prev_track = -1
    prev_pos = -1
    for x_link in sorted_x_links:
        if prev_horz and x_link['l_polymer_row'] == x_link['r_polymer_row'] and \
                x_link['l_track'] == x_link['r_track'] and \
                x_link['l_polymer_row'] == prev_row and \
                x_link['l_track'] == prev_track and \
                (x_link['l_polymer_col'] < prev_col or
                 (x_link['l_polymer_col'] == prev_col and x_link['l_pos'] <= prev_pos)):
            offset += 1
        else:
            offset = 0
            prev_horz = x_link['l_polymer_row'] == x_link['r_polymer_row'] and \
                x_link['l_track'] == x_link['r_track']
            prev_row = x_link['r_polymer_row']
            prev_col = x_link['r_polymer_col']
            prev_track = x_link['r_track']
            prev_pos = x_link['r_pos']
        x_link['v_offset'] = offset

    sorted_x_links = sorted(all_x_links, key=lambda x: (
        x['l_polymer_col'] == x['r_polymer_col'] and x['l_pos'] == x['r_pos'],
        x['l_polymer_col'], x['r_polymer_col'],
        x['l_pos'], x['r_pos'],
        x['l_polymer_row'], x['l_track'],
        x['r_polymer_row'], x['r_track'],))
    offset = 0
    prev_vert = False
    prev_row = -1
    prev_col = -1
    prev_track = -1
    prev_pos = -1
    for x_link in sorted_x_links:
        if prev_vert and x_link['l_polymer_col'] == x_link['r_polymer_col'] and \
                x_link['l_pos'] == x_link['r_pos'] and \
                x_link['l_polymer_col'] == prev_col and \
                x_link['l_pos'] == prev_pos and \
                (x_link['l_polymer_row'] < prev_row or
                 (x_link['l_polymer_row'] == prev_row and x_link['l_track'] <= prev_track)):
            offset += 1
        else:
            offset = 0
            prev_vert = x_link['l_polymer_col'] == x_link['r_polymer_col'] and \
                x_link['l_pos'] == x_link['r_pos']
            prev_row = x_link['r_polymer_row']
            prev_col = x_link['r_polymer_col']
            prev_track = x_link['r_track']
            prev_pos = x_link['r_pos']
        x_link['h_offset'] = offset

    # read template
    with open(pkg_resources.resource_filename('bpforms', 'genomic_viz.template.svg')) as file:
        template = jinja2.Template(file.read())

    # render template
    max_polymer_len = max(len(polymer.seq) for polymer in polymers)
    h_padding = tick_label_font_size * (math.floor(math.log10(max_polymer_len)) + 1) * 0.5 * 0.6
    code_h = seq_font_size
    nc_label_h = max_nc_label_len * math.sin(math.pi / 3) * seq_font_size * 0.65
    track_h = nc_label_h + nc_label_sep + code_h \
        + axis_sep + tick_len + tick_label_sep + tick_label_font_size * 0.25
    legend_sep = polymer_margin

    polymer_w = (width - (cols - 1) * polymer_margin) / cols
    max_n_tracks = math.ceil(max_polymer_len / nt_per_track)
    polymer_hs = [0] * math.ceil(len(polymers) / cols)
    for i_polymer, polymer in enumerate(polymers):
        n_tracks = math.ceil(len(polymer.seq) / nt_per_track)
        polymer_h = (polymer_label_font_size + polymer_label_sep) \
            + track_h * n_tracks + track_sep * (n_tracks - 1)
        i_row = math.floor(i_polymer / cols)
        polymer_hs[i_row] = max(polymer_h, polymer_hs[i_row])

    cum_polymer_h = []
    for i_row, polymer_h in enumerate(polymer_hs):
        cum_polymer_h.append(sum(polymer_hs[0:i_row]))

    legend_rows = []

    if has_non_canonical:
        legend_rows.append({
            'label': 'Non-canonical monomeric form',
            'color': non_canonical_color,
            'symbol': 'X',
        })
    for seq_feature in seq_features:
        legend_rows.append({
            'label': seq_feature['label'],
            'color': seq_feature['color'],
            'symbol': 'X',
        })
    if any(len(polymer.crosslinks) >= 1 for polymer in polymers):
        legend_rows.append({
            'label': 'Intrachain crosslink',
            'color': intra_x_link_color,
            'symbol': None,
            'stroke_width': x_link_stroke_width,
        })
    if inter_crosslinks:
        legend_rows.append({
            'label': 'Interchain crosslink',
            'color': inter_x_link_color,
            'symbol': None,
            'stroke_width': x_link_stroke_width,
        })
    if has_nicks:
        legend_rows.append({
            'label': 'Nick',
            'color': nick_color,
            'symbol': None,
            'stroke_width': nick_stroke_width,
        })

    context = {
        # BpForm
        'polymers': polymers_context,
        'inter_x_links': inter_x_links_context,

        # legend
        'legend_rows': legend_rows,

        # size
        'width': width,
        'height': sum(polymer_hs) + \
        polymer_margin * (math.ceil(len(polymers) / cols)-1) + \
        (len(legend_rows) >= 1) * (\
            legend_sep + \
            len(legend_rows) * legend_font_size + \
            (len(legend_rows) - 1) * legend_label_sep),
        'h_padding': h_padding,
        'cols': cols,
        'polymer_w': polymer_w,
        'cum_polymer_h': cum_polymer_h,
        'polymer_margin': polymer_margin,

        # track
        'nt_per_track': nt_per_track,
        'track_w': polymer_w - 2 * h_padding,
        'track_h': track_h,
        'px_per_nt': (polymer_w - 2 * h_padding) / (nt_per_track - 1),
        'nc_label_h': nc_label_h,
        'nc_label_sep': nc_label_sep,
        'code_h': code_h,
        'axis_sep': axis_sep,
        'tick_len': tick_len,
        'tick_label_sep': tick_label_sep,
        'track_sep': track_sep,
        'polymer_label_sep': polymer_label_sep,

        # crosslinks
        'x_link_radius': x_link_radius,

        # legend
        'legend_sep': legend_sep,
        'legend_label_sep': legend_label_sep,

        # font sizes
        'polymer_label_font_size': polymer_label_font_size,
        'seq_font_size': seq_font_size,
        'tick_label_font_size': tick_label_font_size,
        'legend_font_size': legend_font_size,

        # stroke widths
        'x_link_stroke_width': x_link_stroke_width,
        'nick_stroke_width': nick_stroke_width,
        'axis_stroke_width': axis_stroke_width,

        # colors
        'seq_color': seq_color,
        'non_canonical_color': non_canonical_color,
        'intra_x_link_color': intra_x_link_color,
        'inter_x_link_color': inter_x_link_color,
        'nick_color': nick_color,
        'axis_color': axis_color,
        'polymer_label_color': polymer_label_color,
    }

    return template.render(**context)


def export_ontos_to_obo(alphabets=None, filename=None, _max_monomers=None, _max_xlinks=None):
    """ Exports alphabets of residues and ontology of crosslinks to OBO format

    Args:
        alphabets (:obj:`list` of :obj:`core.Alphabet`, optional): alphabets to export        
        filename (:obj:`str`, optional): path to export alphabets
        _max_monomers (:obj:`int`, optional): maximum number of monomers to export
        _max_xlinks (:obj:`int`, optional): maximum number of crosslinks to export
    """
    from .xlink.core import crosslinks_onto
    import pronto

    if alphabets is None:
        alphabets = [dna.dna_alphabet, rna.rna_alphabet, protein.protein_alphabet]
    if filename is None:
        filename = pkg_resources.resource_filename('bpforms',
                                                   os.path.join('alphabet', 'onto.obo'))

    # create ontology
    onto = pronto.Ontology()

    onto.metadata.synonymtypedefs.add(pronto.SynonymType('structure', 'SMILES-encoded structure', 'EXACT'))
    onto.metadata.synonymtypedefs.add(pronto.SynonymType('backbone_bond_atoms', 'Backbone bond atoms', 'EXACT'))
    onto.metadata.synonymtypedefs.add(pronto.SynonymType('backbone_displaced_atoms', 'Backbone displaced atoms', 'EXACT'))
    onto.metadata.synonymtypedefs.add(pronto.SynonymType('l_bond_atoms', 'Left bond atoms', 'EXACT'))
    onto.metadata.synonymtypedefs.add(pronto.SynonymType('l_displaced_atoms', 'Left displaced atoms', 'EXACT'))
    onto.metadata.synonymtypedefs.add(pronto.SynonymType('r_bond_atoms', 'Right bond atoms', 'EXACT'))
    onto.metadata.synonymtypedefs.add(pronto.SynonymType('r_displaced_atoms', 'Right displaced atoms', 'EXACT'))
    onto.metadata.synonymtypedefs.add(pronto.SynonymType('delta_mass', 'Delta mass', 'EXACT'))
    onto.metadata.synonymtypedefs.add(pronto.SynonymType('delta_charge', 'Delta charge', 'EXACT'))
    onto.metadata.synonymtypedefs.add(pronto.SynonymType('bond_order', 'Bond order', 'EXACT'))
    onto.metadata.synonymtypedefs.add(pronto.SynonymType('bond_stereo', 'Bond stereochemistry', 'EXACT'))

    is_a_relationship = onto.get_relationship('is_a')
    part_of_relationship = onto.create_relationship('part_of')
    derives_from_relationship = onto.create_relationship('derives_from')
    l_monomer_relationship = onto.create_relationship('l_monomer')
    r_monomer_relationship = onto.create_relationship('r_monomer')

    # add terms for monomers to ontology
    alphabet_type_term = onto.create_term(id='BpForms:alphabet')
    alphabet_type_term.name = 'alphabet'

    monomer_type_term = onto.create_term(id='BpForms:monomer')
    monomer_type_term.name = 'monomeric form'
    monomer_type_term.add_synonym('residue', scope='BROAD')
    monomer_type_term.add_synonym('monomer', scope='BROAD')

    xlink_type_term = onto.create_term(id='BpForms:crosslink')
    xlink_type_term.name = 'crosslink'

    monomer_to_term = {}
    for alphabet in alphabets:
        alphabet_term = onto.create_term(id='BpForms:' + alphabet.id)
        alphabet_term.name = alphabet.name
        alphabet_term.desc = alphabet.description
        alphabet_term.relationships = {
            is_a_relationship: [alphabet_type_term],
        }

        for code, monomer in list(alphabet.monomers.items())[0:_max_monomers]:
            synonyms = []
            for syn in monomer.synonyms:
                synonyms.append(pronto.SynonymData(syn, scope='BROAD'))
            for identifier in monomer.identifiers:
                synonyms.append(pronto.SynonymData(
                    '{}: {}'.format(identifier.ns, identifier.id),
                    scope='EXACT'))

            relationships = {
                is_a_relationship: [monomer_type_term],
                part_of_relationship: [alphabet_term],
            }

            other = {'structure': [monomer.export('smiles')]}
            for atom_type in ['backbone_bond_atoms', 'backbone_displaced_atoms',
                              'l_bond_atoms', 'l_displaced_atoms',
                              'r_bond_atoms', 'r_displaced_atoms']:
                atoms = getattr(monomer, atom_type)
                if atoms:
                    other[atom_type] = [_atom_to_str(atom) for atom in atoms]
            if monomer.delta_mass:
                other['delta_mass'] = [monomer.delta_mass]
            if monomer.delta_charge:
                other['delta_charge'] = [monomer.delta_charge]

            term = onto.create_term(id='BpForms:{}_{}'.format(alphabet.id, code))
            term.name = monomer.name or None
            term.desc = monomer.comments or ''
            term.relationships = relationships
            term.other = other
            for syn in synonyms:
                term.add_synonym(syn.description, scope=syn.scope)

            monomer_to_term[monomer] = term

    for alphabet in alphabets:
        monomer_codes = {monomer: code for code, monomer in alphabet.monomers.items()}
        for code, monomer in list(alphabet.monomers.items())[0:_max_monomers]:
            monomer_term = onto['BpForms:{}_{}'.format(alphabet.id, code)]
            relationships = {}
            for base_monomer in monomer.base_monomers:
                base_monomer_term = onto.get('BpForms:{}_{}'.format(
                    alphabet.id, monomer_codes[base_monomer]), None)
                if base_monomer_term is not None:
                    relationships[derives_from_relationship] = [base_monomer_term]
            monomer_term.relationships = relationships

    # add terms for crosslinks to ontology
    for xlink in list(crosslinks_onto.values())[0:_max_xlinks]:
        relationships = {
            is_a_relationship: [xlink_type_term],
        }

        l_monomer_term = monomer_to_term.get(xlink.l_monomer, None)
        r_monomer_term = monomer_to_term.get(xlink.r_monomer, None)
        if l_monomer_term:
            relationships[l_monomer_relationship] = [l_monomer_term]
        if r_monomer_term:
            relationships[r_monomer_relationship] = [r_monomer_term]

        other = {}
        for atom_type in ['l_bond_atoms', 'r_bond_atoms', 'l_displaced_atoms', 'r_displaced_atoms']:
            other[atom_type] = [_atom_to_str(atom) for atom in getattr(xlink, atom_type)]

        other['bond_order'] = [xlink.order.name]
        if xlink.stereo:
            other['bond_stereo'] = [xlink.stereo.name]

        term = onto.create_term(id='BpForms:crosslink_{}'.format(xlink.id))
        term.name = xlink.name or None
        for syn in xlink.synonyms:
            term.add_synonym(syn, scope='BROAD')
        term.desc = xlink.comments or ''
        term.relationships = relationships
        term.other = other

    # export ontology
    with open(filename, 'wb') as file:
        onto.dump(file)


def _atom_to_str(atom):
    """ Get the string representation of an atom in a monomeric form in an
    alphabet

    Args:
        atom (:obj:`core.Atom`): atom

    Returns:
        :obj:`str`: string representation
    """
    if atom.charge:
        return '{0}{1}{2:+d}'.format(atom.element, atom.position, atom.charge)
    else:
        return '{0}{1}'.format(atom.element, atom.position)