bpforms/core.py
""" Classes to represent modified forms of DNA, RNA, and proteins
:Author: Jonathan Karr <karr@mssm.edu>
:Date: 2019-01-31
:Copyright: 2019, Karr Lab
:License: MIT
"""
from ruamel import yaml
from wc_utils.util.chem import EmpiricalFormula, OpenBabelUtils
from wc_utils.util.chem.marvin import get_major_micro_species, draw_molecule
import abc
import attrdict
import copy
import enum
import itertools
import lark
import openbabel
import os
import pkg_resources
import re
import urllib.parse
import warnings
import wc_utils.cache
# setup cache
cache_dir = os.path.expanduser('~/.cache/bpforms')
if not os.path.isdir(cache_dir):
os.makedirs(cache_dir)
cache = wc_utils.cache.Cache(directory=cache_dir)
class Identifier(object):
""" A identifier in a namespace for an external database
Attributes:
ns (:obj:`str`): namespace
id (:obj:`str`): id in namespace
"""
def __init__(self, ns, id):
"""
Args:
ns (:obj:`str`): namespace
id (:obj:`str`): id in namespace
"""
self.ns = ns
self.id = id
@property
def ns(self):
""" Get the namespace
Returns:
:obj:`str`: namespace
"""
return self._ns
@ns.setter
def ns(self, value):
""" Set the namespace
Args:
value (:obj:`str`): namespace
Raises:
:obj:`ValueError`: if the namespace is empty
"""
if not value:
raise ValueError('`ns` cannot be empty')
self._ns = value
@property
def id(self):
""" Get the id
Returns:
:obj:`str`: id
"""
return self._id
@id.setter
def id(self, value):
""" Set the id
Args:
value (:obj:`str`): id
Raises:
:obj:`ValueError`: if the id is empty
"""
if not value:
raise ValueError('`id` cannot be empty')
self._id = value
def __eq__(self, other):
""" Check if two identifiers are semantically equal
Args:
other (:obj:`Identifier`): another identifier
Returns:
:obj:`bool`: True, if the identifiers are semantically equal
"""
return self is other or (self.__class__ == other.__class__ and self.ns == other.ns and self.id == other.id)
def __hash__(self):
""" Generate a hash
Returns:
:obj:`int`: hash
"""
return hash((self.ns, self.id))
class IdentifierSet(set):
""" Set of identifiers """
def __init__(self, identifiers=None):
"""
Args:
identifiers (:obj:iterable of :obj:`Identifier`): iterable of identifiers
"""
super(IdentifierSet, self).__init__()
if identifiers is not None:
for identifier in identifiers:
self.add(identifier)
def add(self, identifier):
""" Add an identifier
Args:
identifier (:obj:`Identifier`): identifier
Raises:
:obj:`ValueError`: if the `identifier` is not an instance of `Indentifier`
"""
if not isinstance(identifier, Identifier):
raise ValueError('`identifier` must be an instance of `Indentifier`')
super(IdentifierSet, self).add(identifier)
def update(self, identifiers):
""" Add a set of identifiers
Args:
identifiers (iterable of :obj:`Identifier`): identifiers
"""
for identifier in identifiers:
self.add(identifier)
def symmetric_difference_update(self, other):
""" Remove common elements with other and add elements from other not in self
Args:
other (:obj:`IdentifierSet`): other set of identifiers
"""
if not isinstance(other, IdentifierSet):
other = IdentifierSet(other)
super(IdentifierSet, self).symmetric_difference_update(other)
class SynonymSet(set):
""" Set of synonyms """
def __init__(self, synonyms=None):
"""
Args:
synonyms (:obj:iterable of :obj:`str`): iterable of synonyms
Raises:
:obj:`ValueError`: if synonyms is not an iterable of string
"""
super(SynonymSet, self).__init__()
if isinstance(synonyms, str):
raise ValueError('synonyms must be an iterable of strings')
if synonyms is not None:
for synonym in synonyms:
self.add(synonym)
def add(self, synonym):
""" Add an synonym
Args:
synonym (:obj:`str`): synonym
Raises:
:obj:`ValueError`: if the `synonym` is not an instance of `Indentifier`
"""
if not synonym or not isinstance(synonym, str):
raise ValueError('`synonyms` must be a non-empty string')
super(SynonymSet, self).add(synonym)
def update(self, synonyms):
""" Add a set of synonyms
Args:
synonyms (iterable of :obj:`SynonymSet`): synonyms
"""
for synonym in synonyms:
self.add(synonym)
def symmetric_difference_update(self, other):
""" Remove common synonyms with other and add synonyms from other not in self
Args:
other (:obj:`SynonymSet`): other set of synonyms
"""
if not isinstance(other, SynonymSet):
other = SynonymSet(other)
super(SynonymSet, self).symmetric_difference_update(other)
class Monomer(object):
""" A monomeric form in a biopolymer
Attributes:
id (:obj:`str`): id
name (:obj:`str`): name
synonyms (:obj:`set` of :obj:`str`): synonyms
identifiers (:obj:`set` of :obj:`Identifier`, optional): identifiers in namespaces for external databases
structure (:obj:`openbabel.OBMol`): chemical structure
delta_mass (:obj:`float`): additional mass (Dalton) relative to structure
delta_charge (:obj:`int`): additional charge relative to structure
start_position (:obj:`tuple`): uncertainty in the location of the monomeric form
end_position (:obj:`tuple`): uncertainty in the location of the monomeric form
monomers_position (:obj:`set` of :obj:`Monomer`): originating monomers within :obj:`start_position` to
:obj:`end_position` where the monomeric form may be located
base_monomers (:obj:`set` of :obj:`Monomer`): monomers which this monomeric form is derived from
backbone_bond_atoms (:obj:`AtomList`): atoms from monomeric form that bond to backbone
backbone_displaced_atoms (:obj:`AtomList`): atoms from monomeric form displaced by bond to backbone
r_bond_atoms (:obj:`AtomList`): atoms that bond with right/suceeding/following/forward monomeric form
l_bond_atoms (:obj:`AtomList`): atoms that bond with left/preceding/previous/backward monomeric form
r_displaced_atoms (:obj:`AtomList`): atoms displaced by bond with right/suceeding/following/forward monomeric form
l_displaced_atoms (:obj:`AtomList`): atoms displaced by bond with left/preceding/previous/backward monomeric form
comments (:obj:`str`): comments
"""
def __init__(self, id=None, name=None, synonyms=None, identifiers=None, structure=None,
delta_mass=None, delta_charge=None,
start_position=None, end_position=None, monomers_position=None,
base_monomers=None,
backbone_bond_atoms=None, backbone_displaced_atoms=None,
r_bond_atoms=None, l_bond_atoms=None,
r_displaced_atoms=None, l_displaced_atoms=None,
comments=None):
"""
Attributes:
id (:obj:`str`, optional): id
name (:obj:`str`, optional): name
synonyms (:obj:`set` of :obj:`str`, optional): synonyms
identifiers (:obj:`set` of :obj:`Identifier`, optional): identifiers in namespaces for external databases
structure (:obj:`openbabel.OBMol` or :obj:`str`, optional): chemical structure
delta_mass (:obj:`float`, optional): additional mass (Dalton) relative to structure
delta_charge (:obj:`float`, optional): additional charge relative to structure
start_position (:obj:`int`, optional): uncertainty in the location of the monomeric form
end_position (:obj:`int`, optional): uncertainty in the location of the monomeric form
monomers_position (:obj:`set` of :obj:`Monomer`, optional): originating monomers within :obj:`start_position` to
:obj:`end_position` where the monomeric form may be located
base_monomers (:obj:`set` of :obj:`Monomer`, optional): monomers which this monomeric form is derived from
backbone_bond_atoms (:obj:`AtomList`, optional): atoms from monomeric form that bond to backbone
backbone_displaced_atoms (:obj:`AtomList`, optional): atoms from monomeric form displaced by bond to backbone
r_bond_atoms (:obj:`AtomList`, optional): atoms that bond with right/suceeding/following/forward monomeric form
l_bond_atoms (:obj:`AtomList`, optional): atoms that bond with left/preceding/previous/backward monomeric form
r_displaced_atoms (:obj:`AtomList`, optional): atoms displaced by bond with right/suceeding/following/forward monomeric form
l_displaced_atoms (:obj:`AtomList`, optional): atoms displaced by bond with left/preceding/previous/backward monomeric form
comments (:obj:`str`, optional): comments
"""
self.id = id
self.name = name
self.synonyms = synonyms or SynonymSet()
self.identifiers = identifiers or IdentifierSet()
self.structure = structure
self.delta_mass = delta_mass
self.delta_charge = delta_charge
self.start_position = start_position
self.end_position = end_position
self.monomers_position = monomers_position or set()
self.base_monomers = base_monomers or set()
self.backbone_bond_atoms = backbone_bond_atoms or AtomList()
self.backbone_displaced_atoms = backbone_displaced_atoms or AtomList()
self.r_bond_atoms = r_bond_atoms or AtomList()
self.l_bond_atoms = l_bond_atoms or AtomList()
self.r_displaced_atoms = r_displaced_atoms or AtomList()
self.l_displaced_atoms = l_displaced_atoms or AtomList()
self.comments = comments
@property
def id(self):
""" Get id
Returns:
:obj:`str`: id
"""
return self._id
@id.setter
def id(self, value):
""" Set id
Args:
value (:obj:`str`): id
Raises:
:obj:`ValueError`: if `value` is not a string or None
"""
if value and not isinstance(value, str):
raise ValueError('`id` must be a string or None')
self._id = value
@property
def name(self):
""" Get name
Returns:
:obj:`str`: name
"""
return self._name
@name.setter
def name(self, value):
""" Set name
Args:
value (:obj:`str`): name
Raises:
:obj:`ValueError`: if `value` is not a string or None
"""
if value and not isinstance(value, str):
raise ValueError('`name` must be a string or None')
self._name = value
@property
def synonyms(self):
""" Get synonyms
Returns:
:obj:`SynonymSet`: synonyms
"""
return self._synonyms
@synonyms.setter
def synonyms(self, value):
""" Set synonyms
Args:
value (:obj:`SynonymSet`): synonyms
Raises:
:obj:`ValueError`: if `synonyms` is not an instance of `SynonymSet`
"""
if value is None:
raise ValueError('`synonyms` must be an instance `SynonymSet`')
if not isinstance(value, SynonymSet):
value = SynonymSet(value)
self._synonyms = value
@property
def identifiers(self):
""" Get identifiers
Returns:
:obj:`IdentifierSet`: identifiers
"""
return self._identifiers
@identifiers.setter
def identifiers(self, value):
""" Set identifiers
Args:
value (:obj:`IdentifierSet`): identifiers
Raises:
:obj:`ValueError`: if `identifiers` is not an instance of `Indentifiers`
"""
if value is None:
raise ValueError('`identifiers` must be an instance `Indentifiers`')
if not isinstance(value, IdentifierSet):
value = IdentifierSet(value)
self._identifiers = value
@property
def structure(self):
""" Get structure
Returns:
:obj:`openbabel.OBMol`: structure
"""
return self._structure
@structure.setter
def structure(self, value):
""" Set structure
Args:
value (:obj:`openbabel.OBMol` or :obj:`str`): Open Babel molecule, canonical SMILES-encoded structure, or None
Raises:
:obj:`ValueError`: if value is not an Open Babel molecule, canonical SMILES-encoded structure, or None
"""
if value and not isinstance(value, openbabel.OBMol):
ob_mol = openbabel.OBMol()
conversion = openbabel.OBConversion()
assert conversion.SetInFormat('smi'), 'Unable to set format to SMILES'
if not conversion.ReadString(ob_mol, value):
raise ValueError('`structure` must be an Open Babel molecule, canonical SMILES-encoded structure, or None')
value = ob_mol
self._structure = value or None
@property
def delta_mass(self):
""" Get extra mass
Returns:
:obj:`float`: extra mass
"""
return self._delta_mass
@delta_mass.setter
def delta_mass(self, value):
""" Set extra mass
Args:
value (:obj:`float`): extra mass
Raises:
:obj:`ValueError`: if value is not a float or None
"""
if value is not None:
if not isinstance(value, (int, float)):
raise ValueError('`delta_mass` must be a float or None')
value = float(value)
self._delta_mass = value
@property
def delta_charge(self):
""" Get extra charge
Returns:
:obj:`int`: extra charge
"""
return self._delta_charge
@delta_charge.setter
def delta_charge(self, value):
""" Set extra charge
Args:
value (:obj:`int`): extra charge
Raises:
:obj:`ValueError`: if value is not an int or None
"""
if value is not None:
if not isinstance(value, (int, float)):
raise ValueError('`delta_charge` must be an integer or None')
if value != int(value):
raise ValueError('`delta_charge` must be an integer or None')
value = int(value)
self._delta_charge = value
@property
def start_position(self):
""" Get start position
Returns:
:obj:`int`: start position
"""
return self._start_position
@start_position.setter
def start_position(self, value):
""" Set start position
Args:
value (:obj:`float`): start position
Raises:
:obj:`ValueError`: if value is not an int or None
"""
if value is not None:
if not isinstance(value, (int, float)):
raise ValueError('`start_position` must be a positive integer or None')
if value != int(value) or value < 1:
raise ValueError('`start_position` must be a positive integer or None')
value = int(value)
self._start_position = value
@property
def end_position(self):
""" Get end position
Returns:
:obj:`int`: end position
"""
return self._end_position
@end_position.setter
def end_position(self, value):
""" Set end position
Args:
value (:obj:`float`): end position
Raises:
:obj:`ValueError`: if value is not an int or None
"""
if value is not None:
if not isinstance(value, (int, float)):
raise ValueError('`end_position` must be a positive integer or None')
if value != int(value) or value < 1:
raise ValueError('`end_position` must be a positive integer or None')
value = int(value)
self._end_position = value
@property
def monomers_position(self):
""" Get the originating monomers within :obj:`start_position` to
:obj:`end_position` where the monomeric form may be located
Returns:
:obj:`set` of :obj:`Monomer`: originating monomers within :obj:`start_position` to
:obj:`end_position` where the monomeric form may be located
"""
return self._monomers_position
@monomers_position.setter
def monomers_position(self, value):
""" Set the originating monomers within :obj:`start_position` to
:obj:`end_position` where the monomeric form may be located
Args:
value (:obj:`set` of :obj:`Monomer`): originating monomers within :obj:`start_position` to
:obj:`end_position` where the monomeric form may be located
Raises:
:obj:`ValueError`: if value is not an instance of :obj:`set`
"""
if isinstance(value, list):
value = set(value)
if not isinstance(value, set):
raise ValueError('`monomers_position` must be an instance of `set`')
self._monomers_position = value
@property
def base_monomers(self):
""" Get base monomeric forms
Returns:
:obj:`set` of :obj:`Monomer`: base monomeric forms
"""
return self._base_monomers
@base_monomers.setter
def base_monomers(self, value):
""" Set base monomeric forms
Args:
value (:obj:`set` of :obj:`Monomer`): base monomeric forms
Raises:
:obj:`ValueError`: if value is not an instance of :obj:`set`
"""
if isinstance(value, list):
value = set(value)
if not isinstance(value, set):
raise ValueError('`base_monomers` must be an instance of `set`')
self._base_monomers = value
@property
def backbone_bond_atoms(self):
""" Get the atoms from the monomeric form that bond to backbone
Returns:
:obj:`AtomList`: atoms from the monomeric form that bond to backbone
"""
return self._backbone_bond_atoms
@backbone_bond_atoms.setter
def backbone_bond_atoms(self, value):
""" Set the atoms from the monomeric form that bond to backbone
Args:
value (:obj:`AtomList`): atoms from the monomeric form that bond to backbone
Raises:
:obj:`ValueError`: if `backbone_bond_atoms` is not an instance of `AtomList`
"""
if value is None:
raise ValueError('`backbone_bond_atoms` must be an instance of `AtomList`')
if not isinstance(value, AtomList):
value = AtomList(value)
self._backbone_bond_atoms = value
@property
def backbone_displaced_atoms(self):
""" Get the atoms from the monomeric form displaced by the bond to the backbone
Returns:
:obj:`AtomList`: atoms from the monomeric form displaced by the bond to the backbone
"""
return self._backbone_displaced_atoms
@backbone_displaced_atoms.setter
def backbone_displaced_atoms(self, value):
""" Set the atoms from the monomeric form displaced by the bond to the backbone
Args:
value (:obj:`AtomList`): atoms from the monomeric form displaced by the bond to the backbone
Raises:
:obj:`ValueError`: if `backbone_displaced_atoms` is not an instance of `AtomList`
"""
if value is None:
raise ValueError('`backbone_displaced_atoms` must be an instance of `AtomList`')
if not isinstance(value, AtomList):
value = AtomList(value)
self._backbone_displaced_atoms = value
@property
def r_bond_atoms(self):
""" Get the left bond atoms
Returns:
:obj:`AtomList`: left bond atoms
"""
return self._r_bond_atoms
@r_bond_atoms.setter
def r_bond_atoms(self, value):
""" Set the left bond atoms
Args:
value (:obj:`AtomList`): left bond atoms
Raises:
:obj:`ValueError`: if `r_bond_atoms` is not an instance of `AtomList`
"""
if value is None:
raise ValueError('`r_bond_atoms` must be an instance of `AtomList`')
if not isinstance(value, AtomList):
value = AtomList(value)
self._r_bond_atoms = value
@property
def l_bond_atoms(self):
""" Get the right bond atoms
Returns:
:obj:`AtomList`: right bond atoms
"""
return self._l_bond_atoms
@l_bond_atoms.setter
def l_bond_atoms(self, value):
""" Set the right bond atoms
Args:
value (:obj:`AtomList`): right bond atoms
Raises:
:obj:`ValueError`: if `l_bond_atoms` is not an instance of `AtomList`
"""
if value is None:
raise ValueError('`l_bond_atoms` must be an instance of `AtomList`')
if not isinstance(value, AtomList):
value = AtomList(value)
self._l_bond_atoms = value
@property
def r_displaced_atoms(self):
""" Get the left displaced atoms
Returns:
:obj:`AtomList`: left displaced atoms
"""
return self._r_displaced_atoms
@r_displaced_atoms.setter
def r_displaced_atoms(self, value):
""" Set the left displaced atoms
Args:
value (:obj:`AtomList`): left displaced atoms
Raises:
:obj:`ValueError`: if `r_displaced_atoms` is not an instance of `AtomList`
"""
if value is None:
raise ValueError('`r_displaced_atoms` must be an instance of `AtomList`')
if not isinstance(value, AtomList):
value = AtomList(value)
self._r_displaced_atoms = value
@property
def l_displaced_atoms(self):
""" Get the right displaced atoms
Returns:
:obj:`AtomList`: right displaced atoms
"""
return self._l_displaced_atoms
@l_displaced_atoms.setter
def l_displaced_atoms(self, value):
""" Set the right displaced atoms
Args:
value (:obj:`AtomList`): right displaced atoms
Raises:
:obj:`ValueError`: if `l_displaced_atoms` is not an instance of `AtomList`
"""
if value is None:
raise ValueError('`l_displaced_atoms` must be an instance of `AtomList`')
if not isinstance(value, AtomList):
value = AtomList(value)
self._l_displaced_atoms = value
@property
def comments(self):
""" Get comments
Returns:
:obj:`str`: comments
"""
return self._comments
@comments.setter
def comments(self, value):
""" Set comments
Args:
value (:obj:`str`): comments
Raises:
:obj:`ValueError`: if value is not a str or None
"""
if value and not isinstance(value, str):
raise ValueError('`comments` must be a string or None')
self._comments = value
def get_root_monomers(self):
""" Get root monomeric forms
Returns:
:obj:`set` of :obj:`Monomer`: root monomeric forms
"""
if not self.base_monomers:
return set([self])
roots = set()
for base_monomer in self.base_monomers:
roots.update(base_monomer.get_root_monomers())
return roots
def get_major_micro_species(self, ph, major_tautomer=False, dearomatize=False):
""" Update to the major protonation and tautomerization state at the pH
Args:
ph (:obj:`float`): pH
major_tautomer (:obj:`bool`, optional): if :obj:`True`, calculate the major tautomer
dearomatize (:obj:`bool`, optional): if :obj:`True`, dearomatize molecule
"""
if self.structure:
structure = get_major_micro_species(self.export('smi', options=('c',)),
'smiles', 'smiles', ph=ph, major_tautomer=major_tautomer, dearomatize=dearomatize)
conv = openbabel.OBConversion()
assert conv.SetInFormat('smi')
assert conv.ReadString(self.structure, structure)
def export(self, format, options=()):
""" Export structure to format
Args:
format (:obj:`str`): format
options (:obj:`list` of :obj:`str`, optional): export options
Returns:
:obj:`str`: format representation of structure
"""
if self.structure:
return OpenBabelUtils.export(self.structure, format, options=options)
else:
return None
IMAGE_URL_PATTERN = ('https://cactus.nci.nih.gov/chemical/structure/{}/image'
'?format=gif'
'&bgcolor=transparent'
'&antialiasing=0')
def get_image_url(self):
""" Get URL for image of structure
Returns:
:obj:`str`: URL for image of structure
"""
smiles = self.export('smi', options=('c',))
if smiles:
return self.IMAGE_URL_PATTERN.format(urllib.parse.quote(smiles))
return None
def get_image(self, bond_label='', displaced_label='', bond_opacity=255, displaced_opacity=63,
backbone_bond_color=0xff0000, left_bond_color=0x00ff00, right_bond_color=0x0000ff,
include_all_hydrogens=True, show_atom_nums=False,
atom_label_font_size=0.6,
width=200, height=200, image_format='svg', include_xml_header=True):
""" Get image
Args:
bond_label (:obj:`str`, optional): label for atoms involved in bonds
displaced_label (:obj:`str`, optional): labels for atoms displaced by bond formation
bond_opacity (:obj:`int`, optional): opacity of atoms involved in bonds
displaced_opacity (:obj:`int`, optional): opacity of atoms dislaced by bond formation
backbone_bond_color (:obj:`int`, optional): color to paint atoms involved in bond with backbone
left_bond_color (:obj:`int`, optional): color to paint atoms involved in bond with monomeric form to left
right_bond_color (:obj:`int`, optional): color to paint atoms involved in bond with monomeric form to right
include_all_hydrogens (:obj:`bool`, optional): if :obj:`True`, show all hydrogens
show_atom_nums (:obj:`bool`, optional): if :obj:`True`, show the numbers of the atoms
atom_label_font_size (:obj:`float`, optional): relative atom label font size
width (:obj:`int`, optional): width in pixels
height (:obj:`int`, optional): height in pixels
image_format (:obj:`str`, optional): format of generated image {emf, eps, jpeg, msbmp, pdf, png, or svg}
include_xml_header (:obj:`bool`, optional): if :obj:`True`, include XML header at the beginning of the SVG
Returns:
:obj:`object`: image
"""
if not self.structure:
return None
atom_md_types = [
(self.backbone_bond_atoms, bond_label, self._blend_color_opacity(backbone_bond_color, bond_opacity)),
(self.backbone_displaced_atoms, displaced_label, self._blend_color_opacity(backbone_bond_color, displaced_opacity)),
(self.r_bond_atoms, bond_label, self._blend_color_opacity(left_bond_color, bond_opacity)),
(self.r_displaced_atoms, displaced_label, self._blend_color_opacity(left_bond_color, displaced_opacity)),
(self.l_bond_atoms, bond_label, self._blend_color_opacity(right_bond_color, bond_opacity)),
(self.l_displaced_atoms, displaced_label, self._blend_color_opacity(right_bond_color, displaced_opacity)),
]
atom_labels = []
atom_sets = {}
mol = openbabel.OBMol()
mol += self.structure
if include_all_hydrogens:
mol.AddHydrogens()
for atom_mds, label, color in atom_md_types:
bonding_hydrogens = []
for atom_md in atom_mds:
atom = mol.GetAtom(atom_md.position)
if atom_md.element == 'H' and atom.GetAtomicNum() != 1:
atom = get_hydrogen_atom(atom, bonding_hydrogens, 0)
if atom:
atom_labels.append({'position': atom.GetIdx(), 'element': atom_md.element, 'label': label, 'color': color})
if color not in atom_sets:
atom_sets[color] = {'positions': [], 'elements': [], 'color': color}
atom_sets[color]['positions'].append(atom.GetIdx())
atom_sets[color]['elements'].append(atom_md.element)
cml = OpenBabelUtils.export(mol, 'cml')
return draw_molecule(cml, 'cml', image_format=image_format,
atom_labels=atom_labels, atom_sets=atom_sets.values(),
show_atom_nums=show_atom_nums,
atom_label_font_size=atom_label_font_size,
width=width, height=height,
include_xml_header=include_xml_header)
@staticmethod
def _blend_color_opacity(color, opacity):
""" Blend color with white to simulate opacity
Args:
color (:obj:`int`): color (0-0xffffff)
opacity (:obj:`int`): opacity (0-0xff)
Returns:
:obj:`int`: blended color
"""
r = color >> 16
g = (color - r * 2**16) >> 8
b = color - r * 2**16 - g * 2**8
w = 0xff
r_opaque = round((r * opacity + w * (255 - opacity)) / 255)
g_opaque = round((g * opacity + w * (255 - opacity)) / 255)
b_opaque = round((b * opacity + w * (255 - opacity)) / 255)
return (r_opaque << 16) + (g_opaque << 8) + b_opaque
def get_formula(self):
""" Get the chemical formula
Returns:
:obj:`EmpiricalFormula`: chemical formula
"""
if not self.structure:
raise ValueError('A structure must be defined to calculate the formula')
return OpenBabelUtils.get_formula(self.structure)
def get_mol_wt(self):
""" Get the molecular weight
Returns:
:obj:`float`: molecular weight
"""
if self.structure:
return self.get_formula().get_molecular_weight() + (self.delta_mass or 0.)
return None
def get_charge(self):
""" Get the charge
Returns:
:obj:`int`: charge
"""
if not self.structure:
raise ValueError('A structure must be defined to calculate the charge')
return self.structure.GetTotalCharge() + (self.delta_charge or 0)
def to_dict(self, alphabet=None):
""" Get a dictionary representation of the monomeric form
Args:
alphabet (:obj:`Alphabet`, optional): alphabet
Returns:
:obj:`dict`: dictionary representation of the monomeric form
"""
dict = {}
attrs = ['id', 'name', 'delta_mass', 'delta_charge', 'start_position', 'end_position', 'comments']
for attr in attrs:
val = getattr(self, attr)
if val is not None:
dict[attr] = val
if self.synonyms:
dict['synonyms'] = list(self.synonyms)
if self.identifiers:
dict['identifiers'] = [{'ns': i.ns, 'id': i.id} for i in self.identifiers]
if self.structure:
dict['structure'] = self.export('smi', options=('c',))
if self.monomers_position and alphabet:
dict['monomers_position'] = []
for monomer in self.monomers_position:
monomer_code = alphabet.get_monomer_code(monomer)
dict['monomers_position'].append(monomer_code)
if self.base_monomers and alphabet:
dict['base_monomers'] = []
for monomer in self.base_monomers:
monomer_code = alphabet.get_monomer_code(monomer)
dict['base_monomers'].append(monomer_code)
attr_names = (
'backbone_bond_atoms', 'backbone_displaced_atoms',
'r_bond_atoms', 'l_bond_atoms',
'r_displaced_atoms', 'l_displaced_atoms')
for attr_name in attr_names:
atoms = getattr(self, attr_name)
if atoms:
dict[attr_name] = []
for atom in atoms:
dict[attr_name].append(atom.to_dict())
return dict
def from_dict(self, dict, alphabet=None):
""" Get a dictionary representation of the monomeric form
Args:
dict (:obj:`dict`): dictionary representation of the monomeric form
alphabet (:obj:`Alphabet`, optional): alphabet
Returns:
:obj:`Monomer`: monomeric form
"""
self.id = None
self.name = None
self.synonyms.clear()
self.identifiers.clear()
self.structure = None
self.delta_mass = None
self.delta_charge = None
self.start_position = None
self.end_position = None
self.monomers_position.clear()
self.base_monomers.clear()
self.comments = None
attrs = ['id', 'name', 'delta_mass', 'delta_charge', 'start_position', 'end_position', 'comments']
for attr in attrs:
val = dict.get(attr, None)
if val is not None:
setattr(self, attr, val)
synonyms = dict.get('synonyms', [])
if synonyms:
self.synonyms = SynonymSet(synonyms)
identifiers = dict.get('identifiers', [])
if identifiers:
self.identifiers = IdentifierSet([Identifier(i['ns'], i['id']) for i in identifiers])
structure = dict.get('structure', None)
if structure:
self.structure = structure
monomers_position_ids = dict.get('monomers_position', [])
if monomers_position_ids and alphabet:
self.monomers_position = set([alphabet.monomers.get(monomer_id) for monomer_id in monomers_position_ids])
base_monomer_ids = dict.get('base_monomers', [])
if base_monomer_ids and alphabet:
self.base_monomers = set([alphabet.monomers.get(monomer_id) for monomer_id in base_monomer_ids])
attr_names = (
'backbone_bond_atoms', 'backbone_displaced_atoms',
'r_bond_atoms', 'l_bond_atoms',
'r_displaced_atoms', 'l_displaced_atoms')
for attr_name in attr_names:
atoms = getattr(self, attr_name)
atoms.from_list(dict.get(attr_name, []))
return self
def __str__(self, alphabet=None):
""" Get a string representation of the monomeric form
Args:
alphabet (:obj:`Alphabet`, optional): alphabet
Returns:
:obj:`str`: string representation of the monomeric form
"""
els = []
if self.id:
els.append('id: "' + self.id + '"')
if self.name:
els.append('name: "' + self.name.replace('"', '\\"') + '"')
for synonym in self.synonyms:
els.append('synonym: "' + synonym.replace('"', '\\"') + '"')
for identifier in self.identifiers:
els.append('identifier: "' + identifier.id + '" @ "' + identifier.ns + '"')
if self.structure:
els.append('structure: "' + self.export('smi', options=('c',)) + '"')
atom_types = [
'backbone_bond_atoms', 'backbone_displaced_atoms',
'r_bond_atoms', 'r_displaced_atoms',
'l_bond_atoms', 'l_displaced_atoms',
]
for atom_type in atom_types:
for atom in getattr(self, atom_type):
if atom.charge > 0:
charge = '+' + str(atom.charge)
elif atom.charge == 0:
charge = ''
else:
charge = str(atom.charge)
els.append('{}: {}{}{}'.format(
atom_type[:-1].replace('_', '-'), atom.element, atom.position, charge))
if self.delta_mass is not None:
els.append('delta-mass: ' + str(self.delta_mass))
if self.delta_charge is not None:
els.append('delta-charge: ' + str(self.delta_charge))
if self.start_position is not None or self.end_position is not None:
el = 'position: {}-{}'.format(self.start_position or '', self.end_position or '')
if self.monomers_position and alphabet:
codes = sorted(alphabet.get_monomer_code(monomer) for monomer in self.monomers_position)
el += ' [{}]'.format(' | '.join(codes))
els.append(el)
if alphabet:
for monomer in self.base_monomers:
els.append('base-monomer: "{}"'.format(alphabet.get_monomer_code(monomer)))
if self.comments:
els.append('comments: "' + self.comments.replace('"', '\\"') + '"')
return '[' + ' | '.join(els) + ']'
def get_canonical_code(self, monomer_codes, default_code='?'):
""" Get IUPAC/IUBMB representation of a monomeric form using the character code
of its parent monomer (e.g. 'methyl-2-adenosine' is represented by 'A')
Args:
monomer_codes (:obj:`dict`): dictionary that maps monomeric forms to codes
default_code (:obj:`str`): default code
Returns:
:obj:`str`: IUPAC/IUBMB representation of monomeric form
"""
roots = self.get_root_monomers()
root_codes = list(set(monomer_codes.get(root, default_code) for root in roots))
if len(root_codes) == 1 and len(root_codes[0]) == 1:
return root_codes[0]
else:
return default_code
def is_equal(self, other):
""" Check if two monomeric forms are semantically equal
Args:
other (:obj:`Monomer`): another monomeric form
Returns:
:obj:`bool`: :obj:`True`, if the objects have the same structure
"""
if self is other:
return True
if self.__class__ != other.__class__:
return False
attrs = ['id', 'name', 'synonyms', 'identifiers',
'delta_mass', 'delta_charge', 'start_position', 'end_position',
'comments']
for attr in attrs:
if getattr(self, attr) != getattr(other, attr):
return False
if self.export('inchi') != other.export('inchi'):
return False
for attr in ['monomers_position', 'base_monomers']:
if len(getattr(self, attr)) != len(getattr(other, attr)):
return False
for base_monomer in getattr(self, attr):
has_equal = False
for other_base_monomer in getattr(other, attr):
if base_monomer.is_equal(other_base_monomer):
has_equal = True
break
if not has_equal:
return False
attr_names = (
'backbone_bond_atoms', 'backbone_displaced_atoms',
'r_bond_atoms', 'l_bond_atoms',
'r_displaced_atoms', 'l_displaced_atoms')
for attr_name in attr_names:
self_atoms = getattr(self, attr_name)
other_atoms = getattr(other, attr_name)
if not self_atoms.is_equal(other_atoms):
return False
return True
class MonomerSequence(list):
""" Sequence of monomeric forms """
def __init__(self, monomers=None):
"""
Args:
monomers (:obj:iterable of :obj:`Monomer`): iterable of monomeric forms
"""
super(MonomerSequence, self).__init__()
if monomers is not None:
for monomer in monomers:
self.append(monomer)
def append(self, monomer):
""" Add a monomeric form
Args:
monomer (:obj:`Monomer`): monomeric form
Raises:
:obj:`ValueError`: if the `monomer` is not an instance of `Monomer`
"""
if not isinstance(monomer, Monomer):
raise ValueError('`monomer` must be an instance of `Monomer`')
super(MonomerSequence, self).append(monomer)
def extend(self, monomers):
""" Add a list of monomeric forms
Args:
monomers (iterable of :obj:`Monomer`): iterable of monomeric forms
"""
for monomer in monomers:
self.append(monomer)
def insert(self, i, monomer):
""" Insert a monomeric form at a position
Args:
i (:obj:`int`): position to insert monomeric form
monomer (:obj:`Monomer`): monomeric form
"""
if not isinstance(monomer, Monomer):
raise ValueError('`monomer` must be an instance of `Monomer`')
super(MonomerSequence, self).insert(i, monomer)
def __setitem__(self, slice, monomer):
""" Set monomeric form(s) at slice
Args:
slice (:obj:`int` or :obj:`slice`): position(s) to set monomeric form
monomer (:obj:`Monomer` or :obj:`list` of :obj:`Monomer`): monomeric form(s)
"""
if isinstance(slice, int):
if not isinstance(monomer, Monomer):
raise ValueError('`monomer` must be a `Monomer`')
else:
for b in monomer:
if not isinstance(b, Monomer):
raise ValueError('`monomer` must be an iterable of `Monomer`')
super(MonomerSequence, self).__setitem__(slice, monomer)
def get_monomer_counts(self):
""" Get the frequency of each monomeric form within the sequence
Returns:
:obj:`dict`: dictionary that maps monomeric forms to their counts
"""
counts = {}
for monomer in self:
if monomer in counts:
counts[monomer] += 1
else:
counts[monomer] = 1
return counts
def is_equal(self, other):
""" Determine if two sequences of monomeric forms are semantically equal
Args:
other (:obj:`MonomerSequence`): other sequence
Returns:
:obj:`bool`: True, of the sequences are semantically equal
"""
if self is other:
return True
if self.__class__ != other.__class__ or len(self) != len(other):
return False
for self_monomer, other_monomer in zip(self, other):
if not self_monomer.is_equal(other_monomer):
return False
return True
class MonomerDict(attrdict.AttrDict):
""" Dictionary for monomeric forms """
def __setitem__(self, code, monomer):
""" Set monomeric form with code
Args:
code (:obj:`str`): characters for monomeric form
monomer (:obj:`Monomer`): monomeric form
"""
pattern = (r'^([^\[\]\{\}":\| \t\f\r\n]|'
r'[^\[\]\{\}":\| \t\f\r\n][^\[\]\{\}":\|\t\f\r\n]*[^\[\]\{\}":\| \t\f\r\n])$')
if not re.match(pattern, code):
raise ValueError(f'`code` "{code}" must be at least one character, excluding '
'square brackets, curly brackets, double quotes, colons, and pipes '
'and must not begin or end with a white space')
super(MonomerDict, self).__setitem__(code, monomer)
class Alphabet(object):
""" Alphabet for monomeric forms
Attributes:
id (:obj:`str`): id
name (:obj:`str`): name
description (:obj:`str`): description
monomers (:obj:`dict`): monomeric forms
"""
def __init__(self, id=None, name=None, description=None, monomers=None):
"""
Args:
id (:obj:`str`, optional): id
name (:obj:`str`, optional): name
description (:obj:`str`, optional): description
monomers (:obj:`dict`, optional): monomeric forms
"""
self.id = id
self.name = name
self.description = description
self.monomers = monomers or MonomerDict()
@property
def monomers(self):
""" Get the monomeric forms
Returns:
:obj:`MonomerDict`: monomeric forms
"""
return self._monomers
@monomers.setter
def monomers(self, value):
""" Set the monomeric forms
Args:
value (:obj:`MonomerDict`): monomeric forms
Raises:
:obj:`ValueError`: if `monomers` is not an instance of `MonomerDict`
"""
if value is None:
raise ValueError('`monomers` must be an instance of `MonomerDict`')
if not isinstance(value, MonomerDict):
value = MonomerDict(value)
self._monomers = value
def get_monomer_code(self, monomer):
""" Get the code for a monomeric form in the alphabet
Args:
monomer (:obj:`Monomer`): monomeric form
Returns:
:obj:`str`: code for monomeric form
Raises:
:obj:`ValueError`: if monomeric form is not in alphabet
"""
for code, alph_monomer in self.monomers.items():
if monomer == alph_monomer:
return code
raise ValueError('Monomer {} is not in alphabet'.format(monomer.id))
def get_major_micro_species(self, ph, major_tautomer=False, dearomatize=False):
""" Calculate the major protonation and tautomerization of each monomeric form
Args:
ph (:obj:`float`): pH
major_tautomer (:obj:`bool`, optional): if :obj:`True`, calculate the major tautomer
dearomatize (:obj:`bool`, optional): if :obj:`True`, dearomatize molecule
"""
monomers = list(filter(lambda monomer: monomer.structure is not None, self.monomers.values()))
structures = []
for monomer in monomers:
structure = monomer.export('smi', options=('c',))
structures.append(structure)
new_structures = get_major_micro_species(structures, 'smiles', 'smiles',
ph=ph, major_tautomer=major_tautomer, dearomatize=dearomatize)
for monomer, new_structure in zip(monomers, new_structures):
conv = openbabel.OBConversion()
assert conv.SetInFormat('smi')
assert conv.ReadString(monomer.structure, new_structure)
def is_equal(self, other):
""" Determine two alphabets are semantically equal
Args:
other (:obj:`type`): other alphabet
Returns:
:obj:`bool`: True, if the alphabets are semantically equal
"""
if self is other:
return True
if self.__class__ != other.__class__:
return False
for attr in ['id', 'name', 'description']:
if getattr(self, attr) != getattr(other, attr):
return False
if len(self.monomers) != len(other.monomers):
return False
for code, self_monomer in self.monomers.items():
if not self_monomer.is_equal(other.monomers.get(code, None)):
return False
return True
def to_dict(self):
""" Get dictionary representation of alphabet
Returns:
:obj:`dict`: dictionary representation of alphabet
"""
dict = {}
for attr in ['id', 'name', 'description']:
val = getattr(self, attr)
if val:
dict[attr] = val
dict['monomers'] = {}
for code, monomer in self.monomers.items():
dict['monomers'][code] = monomer.to_dict(alphabet=self)
return dict
def from_dict(self, dict):
""" Create alphabet from a dictionary representation
Args:
dict (:obj:`dict`): dictionary representation of alphabet
Returns:
:obj:`Alphabet`: alphabet
"""
for attr in ['id', 'name', 'description']:
val = dict.get(attr, None)
setattr(self, attr, val)
self.monomers.clear()
for code, monomer in dict['monomers'].items():
self.monomers[code] = Monomer().from_dict(monomer)
for code, monomer in dict['monomers'].items():
self.monomers[code].from_dict(monomer, alphabet=self)
return self
def to_yaml(self, path):
""" Save alphabet to YAML file
Args:
path (:obj:`str`): path to save alphabet in YAML format
"""
yaml_writer = yaml.YAML()
yaml_writer.default_flow_style = False
with open(path, 'wb') as file:
yaml_writer.dump(self.to_dict(), file)
def from_yaml(self, path):
""" Read alphabet from YAML file
Args:
path (:obj:`str`): path to YAML file which defines alphabet
Returns:
:obj:`Alphabet`: alphabet
"""
self.from_dict(parse_yaml(path))
return self
@cache.memoize(typed=False, expire=30 * 24 * 60 * 60, filename_args=[0])
def parse_yaml(path):
""" Read a YAML file
Args:
path (:obj:`str`): path to YAML file
Returns:
:obj:`object`: content of file
"""
yaml_reader = yaml.YAML()
with open(path, 'rb') as file:
return yaml_reader.load(file)
class AlphabetBuilder(abc.ABC):
""" Builder for alphabets
Attributes:
_max_monomers (:obj:`float`): maximum number of monomeric forms to build; used to limit length of tests
"""
def __init__(self, _max_monomers=float('inf')):
"""
Args:
_max_monomers (:obj:`float`, optional): maximum number of monomeric forms to build; used to limit length of tests
"""
self._max_monomers = _max_monomers
def run(self, ph=None, major_tautomer=False, dearomatize=False, path=None):
""" Build alphabet and, optionally, save to YAML file
Args:
ph (:obj:`float`, optional): pH at which to calculate the major protonation state of each monomeric form
major_tautomer (:obj:`bool`, optional): if :obj:`True`, calculate the major tautomer
dearomatize (:obj:`bool`, optional): if :obj:`True`, dearomatize molecule
path (:obj:`str`, optional): path to save alphabet
Returns:
:obj:`Alphabet`: alphabet
"""
alphabet = self.build(ph=ph, major_tautomer=major_tautomer, dearomatize=dearomatize)
if path:
self.save(alphabet, path)
return alphabet
@abc.abstractmethod
def build(self, ph=None, major_tautomer=False, dearomatize=False):
""" Build alphabet
Args:
ph (:obj:`float`, optional): pH at which to calculate the major protonation state of each monomeric form
major_tautomer (:obj:`bool`, optional): if :obj:`True`, calculate the major tautomer
dearomatize (:obj:`bool`, optional): if :obj:`True`, dearomatize molecule
Returns:
:obj:`Alphabet`: alphabet
"""
pass # pragma: no cover
def save(self, alphabet, path):
""" Save alphabet to YAML file
Args:
alphabet (:obj:`Alphabet`): alphabet
path (:obj:`str`): path to save alphabet
"""
alphabet.to_yaml(path)
class Atom(object):
""" An atom in a compound or bond
Attributes:
molecule (:obj:`type`): type of parent molecule
element (:obj:`str`): code for the element (e.g. 'H')
position (:obj:`int`): position of the atom within the molecule, which should use canonical SMILES atom numbers
charge (:obj:`int`): charge of the atom
monomer (:obj:`int`): index of parent monomeric form within sequence
"""
def __init__(self, molecule, element, position=None, charge=0, monomer=None):
"""
Args:
molecule (:obj:`type`): type of parent molecule
element (:obj:`str`, optional): code for the element (e.g. 'H')
position (:obj:`int`, optional): position of the atom within the molecule, which should use
canonical SMILES atom numbers
charge (:obj:`int`, optional): charge of the atom
monomer (:obj:`int`, optional): index of parent monomeric form within sequence
"""
self.molecule = molecule
self.element = element
self.position = position
self.charge = charge
self.monomer = monomer
@property
def molecule(self):
""" Get type of parent molecule
Returns:
:obj:`type`: type of parent molecule
"""
return self._molecule
@molecule.setter
def molecule(self, value):
""" Set the type of parent molecule
Args:
value (:obj:`type`): type of parent molecule
Raises:
:obj:`ValueError`: if `molecule` is not :obj:`None`, :obj:`Monomer`, or :obj:`Backbone`
"""
if value not in [None, Monomer, Backbone]:
raise ValueError('`molecule` must be `None`, `Monomer`, or `Backbone`')
self._molecule = value
@property
def element(self):
""" Get the element
Returns:
:obj:`str`: element
"""
return self._element
@element.setter
def element(self, value):
""" Set the element
Args:
value (:obj:`str`): element
"""
if not isinstance(value, str):
raise ValueError('`element` must be a string')
self._element = value
@property
def position(self):
""" Get the position
Returns:
:obj:`int`: position
"""
return self._position
@position.setter
def position(self, value):
""" Set the position
Args:
value (:obj:`int`): position
"""
if value is not None:
if (not isinstance(value, (int, float)) or value != int(value) or value < 1):
raise ValueError('`position` must be a positive integer or None')
value = int(value)
self._position = value
@property
def charge(self):
""" Get the charge
Returns:
:obj:`str`: charge
"""
return self._charge
@charge.setter
def charge(self, value):
""" Set the charge
Args:
value (:obj:`str`): charge
"""
if not isinstance(value, (float, int)) or int(value) != value:
raise ValueError('`charge` must be an integer')
value = int(value)
self._charge = value
@property
def monomer(self):
""" Get the index of the parent monomer within the sequence
Returns:
:obj:`int`: index of the parent monomer within the sequence
"""
return self._monomer
@monomer.setter
def monomer(self, value):
""" Set the index of the parent monomer within the sequence
Args:
value (:obj:`int`): index of the parent monomer within the sequence
"""
if value is not None:
if (not isinstance(value, (int, float)) or value != int(value) or value < 1):
raise ValueError('`monomer` must be a positive integer or None')
value = int(value)
self._monomer = value
def is_equal(self, other):
""" Determine if two atoms are semantically equal
Args:
other (:obj:`Atom`): other atom
Returns:
:obj:`bool`: obj:`True` if the atoms are semantically equal
"""
if self is other:
return True
if self.__class__ != other.__class__:
return False
return self is other or (self.__class__ == other.__class__
and self.molecule == other.molecule
and self.element == other.element
and self.position == other.position
and self.charge == other.charge
and self.monomer == other.monomer)
def to_dict(self):
""" Get dictionary representation
Returns:
:obj:`dict`: dictionary representation
"""
dict = {}
if self.molecule:
dict['molecule'] = self.molecule.__name__
dict['element'] = self.element
if self.position is not None:
dict['position'] = self.position
if self.charge:
dict['charge'] = self.charge
if self.monomer is not None:
dict['monomer'] = self.monomer
return dict
def from_dict(self, dict):
""" Load from dictionary representation
Args:
dict (:obj:`dict`): dictionary representation
Returns:
:obj:`Atom`: atom
"""
molecule = dict.get('molecule', None)
if molecule == 'Monomer':
self.molecule = Monomer
elif molecule == 'Backbone':
self.molecule = Backbone
else:
self.molecule = None
self.element = dict['element']
self.position = dict.get('position', None)
self.charge = dict.get('charge', 0)
self.monomer = dict.get('monomer', None)
return self
class AtomList(list):
""" List of atoms """
def __init__(self, atoms=None):
"""
Args:
atoms (:obj:iterable of :obj:`Atom`): iterable of atoms
"""
super(AtomList, self).__init__()
if atoms is not None:
for atom in atoms:
self.append(atom)
def append(self, atom):
""" Add a atom
Args:
atom (:obj:`Atom`): atom
Raises:
:obj:`ValueError`: if the `atom` is not an instance of `Atom`
"""
if not isinstance(atom, Atom):
raise ValueError('`atom` must be an instance of `Atom`')
super(AtomList, self).append(atom)
def extend(self, atoms):
""" Add a list of atoms
Args:
atoms (iterable of :obj:`Atom`): iterable of atoms
"""
for atom in atoms:
self.append(atom)
def insert(self, i, atom):
""" Insert an atom at a position
Args:
i (:obj:`int`): position to insert atom
atom (:obj:`Atom`): atom
"""
if not isinstance(atom, Atom):
raise ValueError('`atom` must be an instance of `Atom`')
super(AtomList, self).insert(i, atom)
def __setitem__(self, slice, atom):
""" Set atom(s) at slice
Args:
slice (:obj:`int` or :obj:`slice`): position(s) to set atom
atom (:obj:`Atom` or :obj:`AtomList`): atom or atoms
"""
if isinstance(slice, int):
if not isinstance(atom, Atom):
raise ValueError('`atom` must be a `Atom`')
else:
for b in atom:
if not isinstance(b, Atom):
raise ValueError('`atom` must be an iterable of `Atom`')
super(AtomList, self).__setitem__(slice, atom)
def is_equal(self, other):
""" Determine if two lists of atoms are semantically equal
Args:
other (:obj:`AtomList`): other list of atoms
Returns:
:obj:`bool`: True, of the lists of atoms are semantically equal
"""
if self is other:
return True
if self.__class__ != other.__class__ or len(self) != len(other):
return False
for self_atom, other_atom in zip(self, other):
if not self_atom.is_equal(other_atom):
return False
return True
def to_list(self):
""" Get list representation
Returns:
:obj:`list`: list representation
"""
list = []
for atom in self:
list.append(atom.to_dict())
return list
def from_list(self, list):
""" Load from list representation
Args:
list (:obj:`list`): list representation
Returns:
:obj:`AtomList`: atom list
"""
self.clear()
for atom in list:
self.append(Atom(None, '').from_dict(atom))
return self
class Backbone(object):
""" Backbone of a monomeric form
Attributes:
structure (:obj:`openbabel.OBMol`): chemical structure
monomer_bond_atoms (:obj:`AtomList`): atoms from backbone that bond to monomeric form
monomer_displaced_atoms (:obj:`AtomList`): atoms from backbone displaced by bond to monomeric form
"""
def __init__(self, structure=None, monomer_bond_atoms=None, monomer_displaced_atoms=None):
"""
Args:
structure (:obj:`str` or :obj:`openbabel.OBMol`, optional): chemical structure as canonical
SMILES-encoded string or Open Babel molecule
monomer_bond_atoms (:obj:`AtomList`, optional): atoms from backbone that bond to monomeric form
monomer_displaced_atoms (:obj:`AtomList`, optional): atoms from backbone displaced by bond to monomeric form
"""
self.structure = structure
self.monomer_bond_atoms = monomer_bond_atoms or AtomList()
self.monomer_displaced_atoms = monomer_displaced_atoms or AtomList()
@property
def structure(self):
""" Get the structure
Returns:
:obj:`openbabel.OBMol`: structure
"""
return self._structure
@structure.setter
def structure(self, value):
""" Set the structure
Args:
value (:obj:`str` or :obj:`openbabel.OBMol`): structure as canonical SMILES-encoded string or Open Babel molecule
"""
if value is not None and not isinstance(value, openbabel.OBMol):
ob_mol = openbabel.OBMol()
conversion = openbabel.OBConversion()
assert conversion.SetInFormat('smi'), 'Unable to set format to SMILES'
if not conversion.ReadString(ob_mol, value):
raise ValueError('`structure` must be an Open Babel molecule, canonical SMILES-encoded structure, or None')
value = ob_mol
self._structure = value
@property
def monomer_bond_atoms(self):
""" Get the backbone bond atoms
Returns:
:obj:`AtomList`: backbone bond atoms
"""
return self._monomer_bond_atoms
@monomer_bond_atoms.setter
def monomer_bond_atoms(self, value):
""" Set the backbone bond atoms
Args:
value (:obj:`AtomList`): backbone bond atoms
Raises:
:obj:`ValueError`: if `monomer_bond_atoms` is not an instance of `AtomList`
"""
if value is None:
raise ValueError('`monomer_bond_atoms` must be an instance of `AtomList`')
if not isinstance(value, AtomList):
value = AtomList(value)
self._monomer_bond_atoms = value
@property
def monomer_displaced_atoms(self):
""" Get the backbone displaced atoms
Returns:
:obj:`AtomList`: backbone displaced atoms
"""
return self._monomer_displaced_atoms
@monomer_displaced_atoms.setter
def monomer_displaced_atoms(self, value):
""" Set the backbone displaced atoms
Args:
value (:obj:`AtomList`): backbone displaced atoms
Raises:
:obj:`ValueError`: if `monomer_displaced_atoms` is not an instance of `AtomList`
"""
if value is None:
raise ValueError('`monomer_displaced_atoms` must be an instance of `AtomList`')
if not isinstance(value, AtomList):
value = AtomList(value)
self._monomer_displaced_atoms = value
def export(self, format, options=()):
""" Export structure to format
Args:
format (:obj:`str`): format
options (:obj:`list` of :obj:`str`, optional): export options
Returns:
:obj:`str`: format representation of structure
"""
if self.structure:
return OpenBabelUtils.export(self.structure, format, options=options)
return None
def get_formula(self):
""" Get the formula
Returns:
:obj:`EmpiricalFormula`: formula
"""
if self.structure:
formula = OpenBabelUtils.get_formula(self.structure)
else:
formula = EmpiricalFormula()
for atom in self.monomer_displaced_atoms:
formula[atom.element] -= 1
return formula
def get_mol_wt(self):
""" Get the molecular weight
Returns:
:obj:`float`: molecular weight
"""
return self.get_formula().get_molecular_weight()
def get_charge(self):
""" Get the charge
Returns:
:obj:`int`: charge
"""
if self.structure:
charge = self.structure.GetTotalCharge()
else:
charge = 0
for atom in self.monomer_bond_atoms:
charge -= atom.charge
for atom in self.monomer_displaced_atoms:
charge -= atom.charge
return charge
def is_equal(self, other):
""" Determine if two backbones are semantically equal
Args:
other (:obj:`Backbone`): other backbone
Returns:
:obj:`bool`: :obj:`True` if the backbones are semantically equal
"""
if self is other:
return True
if self.__class__ != other.__class__:
return False
if self.export('inchi') != other.export('inchi'):
return False
if not self.monomer_bond_atoms.is_equal(other.monomer_bond_atoms)\
or not self.monomer_displaced_atoms.is_equal(other.monomer_displaced_atoms):
return False
return True
class BondOrder(int, enum.Enum):
""" Bond order """
single = 1
double = 2
triple = 3
aromatic = 4
class BondStereo(int, enum.Enum):
""" Bond stereochemistry """
wedge = 1
hash = 2
up = 3
down = 4
class BondBase(abc.ABC):
@abc.abstractmethod
def get_l_bond_atoms(self):
""" Get left bond atoms
Returns:
:obj:`list` of :obj:`Atom`: left bond atoms
"""
pass # pragma: no cover
@abc.abstractmethod
def get_r_bond_atoms(self):
""" Get right bond atoms
Returns:
:obj:`list` of :obj:`Atom`: left bond atoms
"""
pass # pragma: no cover
@abc.abstractmethod
def get_l_displaced_atoms(self):
""" Get left displaced atoms
Returns:
:obj:`list` of :obj:`Atom`: left bond atoms
"""
pass # pragma: no cover
@abc.abstractmethod
def get_r_displaced_atoms(self):
""" Get right displaced atoms
Returns:
:obj:`list` of :obj:`Atom`: left bond atoms
"""
pass # pragma: no cover
@abc.abstractmethod
def get_order(self):
""" Get the order
Returns:
:obj:`BondOrder`: order
"""
pass # pragma: no cover
@abc.abstractmethod
def get_stereo(self):
""" Get the stereochemistry
Returns:
:obj:`BondStereo`: stereochemistry
"""
pass # pragma: no cover
def get_formula(self, none_position=True):
""" Get the formula
Args:
none_position (:obj:`bool`, optional): include atoms whose position is :obj:`None`
Returns:
:obj:`EmpiricalFormula`: formula
"""
formula = EmpiricalFormula()
for atom in self.l_displaced_atoms:
if atom.position is not None or none_position:
formula[atom.element] -= 1
for atom in self.r_displaced_atoms:
if atom.position is not None or none_position:
formula[atom.element] -= 1
return formula
def get_mol_wt(self, none_position=True):
""" Get the molecular weight
Args:
none_position (:obj:`bool`, optional): include atoms whose position is :obj:`None`
Returns:
:obj:`float`: molecular weight
"""
return self.get_formula(none_position=none_position).get_molecular_weight()
def get_charge(self, none_position=True):
""" Get the charge
Args:
none_position (:obj:`bool`, optional): include atoms whose position is :obj:`None`
Returns:
:obj:`int`: charge
"""
charge = 0
for atom in self.l_bond_atoms:
if atom.position is not None or none_position:
charge -= atom.charge
for atom in self.l_displaced_atoms:
if atom.position is not None or none_position:
charge -= atom.charge
for atom in self.r_bond_atoms:
if atom.position is not None or none_position:
charge -= atom.charge
for atom in self.r_displaced_atoms:
if atom.position is not None or none_position:
charge -= atom.charge
return charge
class Bond(BondBase):
""" Bond between monomeric forms (inter-residue bond or crosslink)
Attributes:
id (:obj:`str`): id
name (:obj:`str`): name
synonyms (:obj:`SynonymSet`): synonyms
l_monomer (:obj:`Monomer`): left monomeric form
r_monomer (:obj:`Monomer`): right monomeric form
l_bond_atoms (:obj:`AtomList`): atoms from left monomeric form that bond with right monomeric form
r_bond_atoms (:obj:`AtomList`): atoms from right monomeric form that bond with left monomeric form
l_displaced_atoms (:obj:`AtomList`): atoms from left monomeric form displaced by bond
r_displaced_atoms (:obj:`AtomList`): atoms from right monomeric form displaced by bond
order (:obj:`BondOrder`): order
stereo (:obj:`BondStereo`): stereochemistry
comments (:obj:`str`): comments
"""
def __init__(self, id=None, name=None, synonyms=None,
l_monomer=None, r_monomer=None,
l_bond_atoms=None, r_bond_atoms=None,
l_displaced_atoms=None, r_displaced_atoms=None,
order=BondOrder.single, stereo=None,
comments=None):
"""
Args:
id (:obj:`str`, optional): id
name (:obj:`str`, optional): name
synonyms (:obj:`SynonymSet`, optional): synonyms
l_monomer (:obj:`Monomer`, optional): left monomeric form
r_monomer (:obj:`Monomer`, optional): right monomeric form
l_bond_atoms (:obj:`AtomList`, optional): atoms from left monomeric form that bond with right monomeric form
r_bond_atoms (:obj:`AtomList`, optional): atoms from right monomeric form that bond with left monomeric form
l_displaced_atoms (:obj:`AtomList`, optional): atoms from left monomeric form displaced by bond
r_displaced_atoms (:obj:`AtomList`, optional): atoms from right monomeric form displaced by bond
order (:obj:`BondOrder`, optional): order
stereo (:obj:`BondStereo`, optional): stereochemistry
comments (:obj:`str`, optional): comments
"""
self.id = id
self.name = name
self.synonyms = synonyms or SynonymSet()
self.l_monomer = l_monomer
self.r_monomer = r_monomer
self.l_bond_atoms = l_bond_atoms or AtomList()
self.r_bond_atoms = r_bond_atoms or AtomList()
self.l_displaced_atoms = l_displaced_atoms or AtomList()
self.r_displaced_atoms = r_displaced_atoms or AtomList()
self.order = order
self.stereo = stereo
self.comments = comments
@property
def id(self):
""" Get id
Returns:
:obj:`str`: id
"""
return self._id
@id.setter
def id(self, value):
""" Set id
Args:
value (:obj:`str`): id
Raises:
:obj:`ValueError`: if `value` is not a string or None
"""
if value is not None and not isinstance(value, str):
raise ValueError('`id` must be a string or None')
self._id = value
@property
def name(self):
""" Get name
Returns:
:obj:`str`: name
"""
return self._name
@name.setter
def name(self, value):
""" Set name
Args:
value (:obj:`str`): name
Raises:
:obj:`ValueError`: if `value` is not a string or None
"""
if value is not None and not isinstance(value, str):
raise ValueError('`name` must be a string or None')
self._name = value
@property
def synonyms(self):
""" Get synonyms
Returns:
:obj:`SynonymSet`: synonyms
"""
return self._synonyms
@synonyms.setter
def synonyms(self, value):
""" Set synonyms
Args:
value (:obj:`SynonymSet`): synonyms
Raises:
:obj:`ValueError`: if `synonyms` is not an instance of `SynonymSet`
"""
if value is None:
raise ValueError('`synonyms` must be an instance `SynonymSet`')
if not isinstance(value, SynonymSet):
value = SynonymSet(value)
self._synonyms = value
@property
def l_monomer(self):
""" Get the left monomeric form
Returns:
:obj:`Monomer`: left monomeric form
"""
return self._l_monomer
@l_monomer.setter
def l_monomer(self, value):
""" Set the left monomeric form
Args:
value (:obj:`Monomer`): left monomeric form
Raises:
:obj:`ValueError`: if `l_monomer` is not an instance of `Monomer` or `None`
"""
if not (isinstance(value, Monomer) or value is None):
raise ValueError('`l_monomer` must be an instance of `Monomer` or `None`')
self._l_monomer = value
@property
def r_monomer(self):
""" Get the right monomeric form
Returns:
:obj:`Monomer`: right monomeric form
"""
return self._r_monomer
@r_monomer.setter
def r_monomer(self, value):
""" Set the right monomeric form
Args:
value (:obj:`Monomer`): right monomeric form
Raises:
:obj:`ValueError`: if `r_monomer` is not an instance of `Monomer` or `None`
"""
if not (isinstance(value, Monomer) or value is None):
raise ValueError('`r_monomer` must be an instance of `Monomer` or `None`')
self._r_monomer = value
@property
def l_bond_atoms(self):
""" Get the left bond atoms
Returns:
:obj:`AtomList`: left bond atoms
"""
return self._l_bond_atoms
@l_bond_atoms.setter
def l_bond_atoms(self, value):
""" Set the left bond atoms
Args:
value (:obj:`AtomList`): left bond atoms
Raises:
:obj:`ValueError`: if `l_bond_atoms` is not an instance of `AtomList`
"""
if value is None:
raise ValueError('`l_bond_atoms` must be an instance of `AtomList`')
if not isinstance(value, AtomList):
value = AtomList(value)
self._l_bond_atoms = value
@property
def r_bond_atoms(self):
""" Get the right bond atoms
Returns:
:obj:`AtomList`: right bond atoms
"""
return self._r_bond_atoms
@r_bond_atoms.setter
def r_bond_atoms(self, value):
""" Set the right bond atoms
Args:
value (:obj:`AtomList`): right bond atoms
Raises:
:obj:`ValueError`: if `r_bond_atoms` is not an instance of `AtomList`
"""
if value is None:
raise ValueError('`r_bond_atoms` must be an instance of `AtomList`')
if not isinstance(value, AtomList):
value = AtomList(value)
self._r_bond_atoms = value
@property
def l_displaced_atoms(self):
""" Get the left displaced atoms
Returns:
:obj:`AtomList`: left displaced atoms
"""
return self._l_displaced_atoms
@l_displaced_atoms.setter
def l_displaced_atoms(self, value):
""" Set the left displaced atoms
Args:
value (:obj:`AtomList`): left displaced atoms
Raises:
:obj:`ValueError`: if `l_displaced_atoms` is not an instance of `AtomList`
"""
if value is None:
raise ValueError('`l_displaced_atoms` must be an instance of `AtomList`')
if not isinstance(value, AtomList):
value = AtomList(value)
self._l_displaced_atoms = value
@property
def r_displaced_atoms(self):
""" Get the right displaced atoms
Returns:
:obj:`AtomList`: right displaced atoms
"""
return self._r_displaced_atoms
@r_displaced_atoms.setter
def r_displaced_atoms(self, value):
""" Set the right displaced atoms
Args:
value (:obj:`AtomList`): right displaced atoms
Raises:
:obj:`ValueError`: if `r_displaced_atoms` is not an instance of `AtomList`
"""
if value is None:
raise ValueError('`r_displaced_atoms` must be an instance of `AtomList`')
if not isinstance(value, AtomList):
value = AtomList(value)
self._r_displaced_atoms = value
@property
def order(self):
""" Get the order
Returns:
:obj:`BondOrder`: order
"""
return self._order
@order.setter
def order(self, value):
""" Set the order
Args:
value (:obj:`BondOrder`): order
Raises:
:obj:`ValueError`: if `order` is not an instance of `BondOrder`
"""
if not isinstance(value, BondOrder):
raise ValueError('`order` must be an instance of `BondOrder`')
self._order = value
@property
def stereo(self):
""" Get the stereochemistry
Returns:
:obj:`BondStereo`: stereochemistry
"""
return self._stereo
@stereo.setter
def stereo(self, value):
""" Set the stereo
Args:
value (:obj:`BondStereo`): stereochemistry
Raises:
:obj:`ValueError`: if `stereo` is not an instance of `BondStereo`
"""
if value is not None and not isinstance(value, BondStereo):
raise ValueError('`stereo` must be an instance of `BondStereo` or `None`')
self._stereo = value
@property
def comments(self):
""" Get comments
Returns:
:obj:`str`: comments
"""
return self._comments
@comments.setter
def comments(self, value):
""" Set comments
Args:
value (:obj:`str`): comments
Raises:
:obj:`ValueError`: if value is not a str or None
"""
if value and not isinstance(value, str):
raise ValueError('`comments` must be a string or None')
self._comments = value
def get_l_bond_atoms(self):
""" Get left bond atoms
Returns:
:obj:`list` of :obj:`Atom`: left bond atoms
"""
return self.l_bond_atoms
def get_r_bond_atoms(self):
""" Get right bond atoms
Returns:
:obj:`list` of :obj:`Atom`: left bond atoms
"""
return self.r_bond_atoms
def get_l_displaced_atoms(self):
""" Get left displaced atoms
Returns:
:obj:`list` of :obj:`Atom`: left bond atoms
"""
return self.l_displaced_atoms
def get_r_displaced_atoms(self):
""" Get right displaced atoms
Returns:
:obj:`list` of :obj:`Atom`: left bond atoms
"""
return self.r_displaced_atoms
def get_order(self):
""" Get the order
Returns:
:obj:`BondOrder`: order
"""
return self.order
def get_stereo(self):
""" Get the stereochemistry
Returns:
:obj:`BondStereo`: stereochemistry
"""
return self.stereo
def is_equal(self, other):
""" Determine if two bonds are semantically equal
Args:
other (:obj:`Bond`): other bond
Returns:
:obj:`bool`: :obj:`True` if the bond are semantically equal
"""
if self is other:
return True
if self.__class__ != other.__class__:
return False
if (self.l_monomer is None and other.l_monomer is not None) or \
(self.l_monomer is not None and not self.l_monomer.is_equal(other.l_monomer)):
return False
if (self.r_monomer is None and other.r_monomer is not None) or \
(self.r_monomer is not None and not self.r_monomer.is_equal(other.r_monomer)):
return False
if not self.l_bond_atoms.is_equal(other.l_bond_atoms) \
or not self.r_bond_atoms.is_equal(other.r_bond_atoms) \
or not self.l_displaced_atoms.is_equal(other.l_displaced_atoms) \
or not self.r_displaced_atoms.is_equal(other.r_displaced_atoms):
return False
if self.order != other.order:
return False
if self.stereo != other.stereo:
return False
if self.comments != other.comments:
return False
return True
def __str__(self):
""" Generate string representation of bond
Returns:
:obj:`str`: string representation of bond
"""
attrs = []
for atom_type in ['l_bond_atoms', 'r_bond_atoms',
'l_displaced_atoms', 'r_displaced_atoms']:
for atom in getattr(self, atom_type):
if atom.charge > 0:
charge = '+' + str(atom.charge)
elif atom.charge < 0:
charge = str(atom.charge)
else:
charge = ''
attrs.append('{}: {}{}{}{}'.format(atom_type[0:-1].replace('_', '-'),
atom.monomer or '',
atom.element,
atom.position or '',
charge))
if self.order != BondOrder.single:
attrs.append('order: "{}"'.format(self.order.name))
if self.stereo is not None:
attrs.append('stereo: "{}"'.format(self.stereo.name))
if self.comments:
attrs.append('comments: "{}"'.format(self.comments.replace('"', '\\"')))
return "[{}]".format(' | '.join(attrs))
class OntoBond(BondBase):
""" A crosslinking bond whose molecular details are defined in an
ontology of crosslinks
Attributes:
type (:obj:`Bond`): type of bond
l_monomer (:obj:`int`): location of left monomeric form
r_monomer (:obj:`int`): location of right monomeric form
"""
def __init__(self, type=None, l_monomer=None, r_monomer=None):
self.type = type
self.l_monomer = l_monomer
self.r_monomer = r_monomer
@property
def type(self):
""" Get type
Returns:
:obj:`Bond`: type
"""
return self._type
@type.setter
def type(self, value):
""" Set type
Args:
value (:obj:`Bond`): type
Raises:
:obj:`ValueError`: if value is not a Bond or None
"""
if value is not None:
if not isinstance(value, Bond):
raise ValueError('`type` must be an instance of `Bond` or `None`')
self._type = value
@property
def l_monomer(self):
""" Get location of left monomeric form
Returns:
:obj:`int`: location of left monomeric form
"""
return self._l_monomer
@l_monomer.setter
def l_monomer(self, value):
""" Set location of left monomeric form
Args:
value (:obj:`int`): location of left monomeric form
Raises:
:obj:`ValueError`: if value is not a positive integer or None
"""
if value is not None:
if not isinstance(value, (int, float)):
raise ValueError('`l_monomer` must be a positive integer or None')
if value != int(value) or value < 1:
raise ValueError('`l_monomer` must be a positive integer or None')
value = int(value)
self._l_monomer = value
@property
def r_monomer(self):
""" Get location of right monomeric form
Returns:
:obj:`int`: location of right monomeric form
"""
return self._r_monomer
@r_monomer.setter
def r_monomer(self, value):
""" Set location of right monomeric form
Args:
value (:obj:`int`): location of right monomeric form
Raises:
:obj:`ValueError`: if value is not a positive integer or None
"""
if value is not None:
if not isinstance(value, (int, float)):
raise ValueError('`r_monomer` must be a positive integer or None')
if value != int(value) or value < 1:
raise ValueError('`r_monomer` must be a positive integer or None')
value = int(value)
self._r_monomer = value
def get_l_bond_atoms(self):
""" Get left bond atoms
Returns:
:obj:`list` of :obj:`Atom`: left bond atoms
"""
atoms = []
for atom in self.type.l_bond_atoms:
atoms.append(Atom(atom.molecule, atom.element,
position=atom.position, charge=atom.charge, monomer=self.l_monomer))
return atoms
def get_r_bond_atoms(self):
""" Get right bond atoms
Returns:
:obj:`list` of :obj:`Atom`: left bond atoms
"""
atoms = []
for atom in self.type.r_bond_atoms:
atoms.append(Atom(atom.molecule, atom.element,
position=atom.position, charge=atom.charge, monomer=self.r_monomer))
return atoms
def get_l_displaced_atoms(self):
""" Get left displaced atoms
Returns:
:obj:`list` of :obj:`Atom`: left bond atoms
"""
atoms = []
for atom in self.type.l_displaced_atoms:
atoms.append(Atom(atom.molecule, atom.element,
position=atom.position, charge=atom.charge, monomer=self.l_monomer))
return atoms
def get_r_displaced_atoms(self):
""" Get right displaced atoms
Returns:
:obj:`list` of :obj:`Atom`: left bond atoms
"""
atoms = []
for atom in self.type.r_displaced_atoms:
atoms.append(Atom(atom.molecule, atom.element,
position=atom.position, charge=atom.charge, monomer=self.l_monomer))
return atoms
def get_order(self):
""" Get the order
Returns:
:obj:`BondOrder`: order
"""
return self.type.order
def get_stereo(self):
""" Get the stereochemistry
Returns:
:obj:`BondStereo`: stereochemistry
"""
return self.type.stereo
def __str__(self):
""" Generate string representation of bond
Returns:
:obj:`str`: string representation of bond
"""
attrs = []
if self.type:
attrs.append('type: "{}"'.format(onto_crosslink_to_id[self.type]))
if self.l_monomer:
attrs.append('l: {}'.format(self.l_monomer))
if self.r_monomer:
attrs.append('r: {}'.format(self.r_monomer))
return "[{}]".format(' | '.join(attrs))
def is_equal(self, other):
""" Determine if two bonds are semantically equal
Args:
other (:obj:`Bond`): other bond
Returns:
:obj:`bool`: :obj:`True` if the bond are semantically equal
"""
if self is other:
return True
if self.__class__ != other.__class__:
return False
if (self.type is None and other.type is not None) or \
(self.type is not None and not self.type.is_equal(other.type)):
return False
if self.l_monomer != other.l_monomer or self.r_monomer != other.r_monomer:
return False
return True
class BondSet(set):
""" Set of bonds """
def add(self, bond):
""" Add a bond
Args:
bond (:obj:`BondBase`): bond
Raises:
:obj:`ValueError`: if the `bond` is not an instance of `Bond`
"""
if not isinstance(bond, BondBase):
raise ValueError('`bond` must be an instance of `BondBase`')
super(BondSet, self).add(bond)
def update(self, bonds):
""" Add a set of bonds
Args:
bonds (iterable of :obj:`BondBase`): bonds
"""
for bond in bonds:
self.add(bond)
def symmetric_difference_update(self, other):
""" Remove common elements with other and add elements from other not in self
Args:
other (:obj:`BondSet`): other set of bonds
"""
for o in other:
if o in self:
self.remove(o)
else:
self.add(o)
def is_equal(self, other):
""" Check if two sets of bonds are semantically equal
Args:
other (:obj:`BondSet`): other set of bonds
Returns:
:obj:`bool`: :obj:`True`, if the bond sets are semantically equal
"""
if self is other:
return True
if self.__class__ != other.__class__:
return False
if len(self) != len(other):
return False
o_bonds = set(other)
for s_bond in self:
match = False
for o_bond in set(o_bonds):
if s_bond.is_equal(o_bond):
match = True
o_bonds.remove(o_bond)
break
if not match:
return False
return True
class Nick(object):
""" Nick between adjacent monomeric forms
Attributes:
position (:obj:`int`): position of nick (:math:`1 \ldots L-1`) where
:math:`1` indicates a nick between the first and second residues,
:math:`2` indicates a nick between the second and third residues, etc.
"""
def __init__(self, position=None):
"""
Args:
position (:obj:`int`, optional): position of nick
"""
self.position = position
@property
def position(self):
""" Get the position
Returns:
:obj:`int`: position
"""
return self._position
@position.setter
def position(self, value):
""" Set the position
Args:
value (:obj:`int`): position
"""
if value is not None:
if (not isinstance(value, (int, float)) or value != int(value) or value < 1):
raise ValueError('`position` must be a positive integer or None')
value = int(value)
self._position = value
def is_equal(self, other):
return self is other or (
self.__class__ is other.__class__ and
self.position == other.position)
class NickSet(set):
""" Set of nicks """
def add(self, nick):
""" Add a nick
Args:
nick (:obj:`Nick`): nick
Raises:
:obj:`ValueError`: if the `nick` is not an instance of `Nick`
"""
if not isinstance(nick, Nick):
raise ValueError('`nick` must be an instance of `Nick`')
super(NickSet, self).add(nick)
def update(self, nicks):
""" Add a set of nicks
Args:
nicks (iterable of :obj:`Nick`): nicks
"""
for nick in nicks:
self.add(nick)
def symmetric_difference_update(self, other):
""" Remove common elements with other and add elements from other not in self
Args:
other (:obj:`NickSet`): other set of nicks
"""
for o in other:
if o in self:
self.remove(o)
else:
self.add(o)
def is_equal(self, other):
""" Check if two sets of nicks are semantically equal
Args:
other (:obj:`NickSet`): other set of nicks
Returns:
:obj:`bool`: :obj:`True`, if the nicks sets are semantically equal
"""
if self is other:
return True
if self.__class__ != other.__class__:
return False
if len(self) != len(other):
return False
o_nicks = set(other)
for s_nick in self:
match = False
for o_nick in set(o_nicks):
if s_nick.is_equal(o_nick):
match = True
o_nicks.remove(o_nick)
break
if not match:
return False
return True
class BpForm(object):
""" Biopolymer form
Attributes:
seq (:obj:`MonomerSequence`): sequence of monomeric forms of the biopolymer
alphabet (:obj:`Alphabet`): alphabet of monomeric forms
backbone (:obj:`Backbone`): backbone that connects monomeric forms
bond (:obj:`Bond`): bonds between (backbones of) monomeric forms
circular (:obj:`bool`): if :obj:`True`, indicates that the biopolymer is circular
crosslinks (:obj:`BondSet`): crosslinking intrachain bonds
nicks (:obj:`NickSet`): set of nicks
features (:obj:`BpFormFeatureSet`): set of features
_parser (:obj:`lark.Lark`): parser
"""
DEFAULT_FASTA_CODE = '?'
def __init__(self, seq=None, alphabet=None, backbone=None, bond=None, circular=False,
crosslinks=None, nicks=None):
"""
Args:
seq (:obj:`MonomerSequence`, optional): sequence of monomeric forms of the biopolymer
alphabet (:obj:`Alphabet`, optional): alphabet of monomeric forms
backbone (:obj:`Backbone`, optional): backbone that connects monomeric forms
bond (:obj:`Bond`, optional): bonds between (backbones of) monomeric forms
circular (:obj:`bool`, optional): if :obj:`True`, indicates that the biopolymer is circular
crosslinks (:obj:`BondSet`, optional): crosslinking intrachain bonds
nicks (:obj:`NickSet`, optional): set of nicks
"""
if alphabet is None:
alphabet = Alphabet()
if backbone is None:
backbone = Backbone()
if bond is None:
bond = Bond()
if crosslinks is None:
crosslinks = BondSet()
if nicks is None:
nicks = NickSet()
self.seq = seq or MonomerSequence()
self.alphabet = alphabet
self.backbone = backbone
self.bond = bond
self.circular = circular
self.crosslinks = crosslinks
self.nicks = nicks
self.features = BpFormFeatureSet(self)
@property
def seq(self):
""" Get the sequence of monomeric forms
Returns:
:obj:`MonomerSequence`: sequence of monomeric forms
"""
return self._monomer_seq
@seq.setter
def seq(self, value):
""" Set the sequence of monomeric forms
Args:
value (:obj:`MonomerSequence`): sequence of monomeric forms
Raises:
:obj:`ValueError`: if `seq` is not an instance of `MonomerSequence`
"""
if value is None:
raise ValueError('`seq` must be an instance of `MonomerSequence`')
if not isinstance(value, MonomerSequence):
value = MonomerSequence(value)
self._monomer_seq = value
@property
def alphabet(self):
""" Get the alphabet
Returns:
:obj:`Alphabet`: alphabet
"""
return self._alphabet
@alphabet.setter
def alphabet(self, value):
""" Set the sequence of monomeric forms
Args:
value (:obj:`Alphabet`): alphabet
Raises:
:obj:`ValueError`: if `alphabet` is not an instance of `Alphabet`
"""
if not isinstance(value, Alphabet):
raise ValueError('`alphabet` must be an instance of `Alphabet`')
self._alphabet = value
@property
def backbone(self):
""" Get the backbones
Returns:
:obj:`Backbone`: backbones
"""
return self._backbone
@backbone.setter
def backbone(self, value):
""" Set the backbones
Args:
value (:obj:`Backbone`): backbones
"""
if not isinstance(value, Backbone):
raise ValueError('`backbone` must be an instance of `Backbone`')
self._backbone = value
@property
def bond(self):
""" Get the bonds
Returns:
:obj:`Bond`: bonds
"""
return self._bond
@bond.setter
def bond(self, value):
""" Set the bonds
Args:
value (:obj:`Bond`): bonds
"""
if not isinstance(value, Bond):
raise ValueError('`bond` must be an instance of `Bond`')
self._bond = value
@property
def circular(self):
""" Get the circularity
Returns:
:obj:`bool`: circularity
"""
return self._circular
@circular.setter
def circular(self, value):
""" Set the circularity
Args:
value (:obj:`bool`): circularity
"""
if not isinstance(value, bool):
raise ValueError('`circular` must be an instance of `bool`')
self._circular = value
@property
def crosslinks(self):
""" Get the crosslinking intrachain bonds
Returns:
:obj:`BondSet`: crosslinking intrachain bonds
"""
return self._crosslinks
@crosslinks.setter
def crosslinks(self, value):
""" Set the crosslinking intrachain bonds
Args:
value (:obj:`list` of :obj:`BondSet`): crosslinking intrachain bonds
"""
if not isinstance(value, BondSet):
raise ValueError('`crosslinks` must be an instance of `BondSet`')
self._crosslinks = value
@property
def nicks(self):
""" Get the nicks
Returns:
:obj:`NickSet`: nicks
"""
return self._nicks
@nicks.setter
def nicks(self, value):
""" Set the nicks
Args:
value (:obj:`list` of :obj:`NickSet`): nicks
"""
if not isinstance(value, NickSet):
raise ValueError('`nicks` must be an instance of `NickSet`')
self._nicks = value
@property
def features(self):
""" Get the features
Returns:
:obj:`BpFormFeatureSet`: features
"""
return self._features
@features.setter
def features(self, value):
""" Set the features
Args:
value (:obj:`BpFormFeatureSet`): features
"""
if not isinstance(value, BpFormFeatureSet):
raise ValueError('`features` must be an instance of `BpFormFeatureSet`')
if hasattr(self, '_features'):
raise ValueError('`features` cannot be set outside constructor')
self._features = value
def is_equal(self, other):
""" Check if two biopolymer forms are semantically equal
Args:
other (:obj:`BpForm`): another biopolymer form
Returns:
:obj:`bool`: :obj:`True`, if the objects have the same structure
"""
return self is other or (self.__class__ == other.__class__
and self.seq.is_equal(other.seq)
and self.alphabet.is_equal(other.alphabet)
and self.backbone.is_equal(other.backbone)
and self.bond.is_equal(other.bond)
and self.circular == other.circular
and self.crosslinks.is_equal(other.crosslinks)
and self.nicks.is_equal(other.nicks))
def diff(self, other):
""" Determine the semantic difference between two biopolymer forms
Args:
other (:obj:`BpForm`): another biopolymer form
Returns:
:obj:`str`: description of the semantic difference between
two biopolymer forms
"""
diff = []
# class
if self.__class__ != other.__class__:
diff.append('{} != {}'.format(
self.__class__.__name__,
other.__class__.__name__))
if diff:
return '\n'.join(diff)
# alphabet, backbone, inter-monomeric form bond
if not self.alphabet.is_equal(other.alphabet):
diff.append('Forms have different alphabets')
if (self.backbone is None and other.backbone is not None) or \
(self.backbone is not None and not self.backbone.is_equal(other.backbone)):
diff.append('Forms have different backbones')
if (self.bond is None and self.bond is not None) or \
(self.bond is not None and not self.bond.is_equal(other.bond)):
diff.append('Forms have different inter-monomer bonds')
if diff:
return '\n'.join(diff)
# sequence, crosslinks, nicks, circularity
if len(self.seq) != len(other.seq):
diff.append('Length {} != {}'.format(
len(self.seq), len(other.seq)))
else:
for i_monomer, (monomer, o_monomer) in enumerate(zip(
self.seq, other.seq)):
if not monomer.is_equal(o_monomer):
diff.append('Monomeric form {} {} != {}'.format(
i_monomer + 1, monomer, o_monomer))
if len(self.crosslinks) != len(other.crosslinks):
diff.append('Number of crosslinks {} != {}'.format(
len(self.crosslinks), len(other.crosslinks)))
else:
o_crosslinks = list(other.crosslinks)
for link in self.crosslinks:
match = False
for o_link in list(o_crosslinks):
if link.is_equal(o_link):
match = True
o_crosslinks.remove(o_link)
break
if not match:
diff.append('Crosslink {} not in other'.format(link))
for o_link in o_crosslinks:
diff.append('Crosslink {} not in self'.format(o_link))
if len(self.nicks) != len(other.nicks):
diff.append('Number of nicks {} != {}'.format(
len(self.nicks), len(other.nicks)))
else:
o_nicks = list(other.nicks)
for nick in self.nicks:
match = False
for o_nick in list(o_nicks):
if nick.is_equal(o_nick):
match = True
o_nicks.remove(o_nick)
break
if not match:
diff.append('Nick {} not in other'.format(nick.position))
for o_nick in o_nicks:
diff.append('Nick {} not in self'.format(o_nick._position))
if self.circular != other.circular:
diff.append('Circularity {} != {}'.format(
self.circular, other.circular))
if diff:
return '\n'.join(diff)
return None
def __getitem__(self, slice):
""" Get monomeric form(s) at slice
Args:
slice (:obj:`int` or :obj:`slice`): position(s)
Returns:
:obj:`Monomer` or :obj:`Monomers`: monomeric form(s)
"""
return self.seq.__getitem__(slice)
def __setitem__(self, slice, monomer):
""" Set monomeric form(s) at slice
Args:
slice (:obj:`int` or :obj:`slice`): position(s)
monomer (:obj:`Monomer` or :obj:`Monomers`): monomeric forms(s)
"""
self.seq.__setitem__(slice, monomer)
def __delitem__(self, slice):
""" Delete monomeric form(s) at slice
Args:
slice (:obj:`int` or :obj:`slice`): position(s)
"""
self.seq.__delitem__(slice)
def __iter__(self):
""" Get iterator over sequence of monomeric forms
Returns:
:obj:`iterator` of :obj:`Monomer`: iterator of monomeric forms
"""
return self.seq.__iter__()
def __reversed__(self):
""" Get reverse iterator over sequence of monomeric forms
Returns:
:obj:`iterator` of :obj:`Monomer`: iterator of monomeric forms
"""
return self.seq.__reversed__()
def __contains__(self, monomer):
""" Determine if a monomeric form is in the biopolymer form
Args:
monomer (:obj:`Monomer`): monomeric form
Returns:
:obj:`bool`: true if the monomeric form is in the sequence
"""
return self.seq.__contains__(monomer)
def __len__(self):
""" Get the length of the sequence of the form
Returns:
:obj:`int`: length
"""
return len(self.seq)
def validate(self):
""" Check that the biopolymer form is valid and return any errors
* Check that monomeric forms :math:`1 \ldots L-1` can bond to the right (their right bonding attributes are set)
* Check that monomeric forms :math:`2 \ldots L` can bond to the left (their left bonding attributes are set)
* No atom is involved in multiple bonds
Returns:
:obj:`list` of :obj:`str`: list of errors, if any
"""
errors = []
bonding_backbone_hydrogens = []
bonding_monomer_hydrogens = []
el_table = openbabel.OBElementTable()
def check_atom(molecule, atom_type, i_monomer, i_atom, structure, atom_md, bonding_hydrogens, el_table, errors):
if structure is None:
errors.append('Structure cannot be None')
return
atom_obj = structure.GetAtom(atom_md.position)
if atom_md.element == 'H':
atom_obj = get_hydrogen_atom(atom_obj, bonding_hydrogens, i_monomer)
if atom_obj:
if el_table.GetSymbol(atom_obj.GetAtomicNum()) != atom_md.element:
errors.append("{} atom '{}[{}]' must be {}".format(molecule, atom_type, i_atom, atom_md.element))
# check that bond atoms are defined
atom_types = ['monomer_bond_atoms', 'monomer_displaced_atoms']
for atom_type in atom_types:
for i_atom, atom in enumerate(getattr(self.backbone, atom_type)):
if atom.molecule != Backbone or not atom.element or not atom.position:
errors.append("Backbone atom '{}[{}]' must have a defined element and position".format(atom_type, i_atom))
else:
check_atom('Backbone', atom_type, None, i_atom, self.backbone.structure, atom,
bonding_backbone_hydrogens, el_table, errors)
atom_types = ['l_bond_atoms', 'l_displaced_atoms',
'r_bond_atoms', 'r_displaced_atoms']
for atom_type in atom_types:
if len(set(atom.molecule for atom in getattr(self.bond, atom_type))) > 1:
errors.append("'{}' must have the same molecule type".format(atom_type))
for i_atom, atom in enumerate(getattr(self.bond, atom_type)):
if not atom.element or (atom.molecule == Backbone and not atom.position):
errors.append("Bond atom '{}[{}]' must have a defined element and position".format(atom_type, i_atom))
elif atom.molecule == Backbone:
check_atom('Bond', atom_type, None, i_atom, self.backbone.structure, atom,
bonding_backbone_hydrogens, el_table, errors)
for i_monomer, monomer in enumerate(self.seq):
if not monomer.structure:
errors.append('Monomer {} must have a defined structure'.format(i_monomer + 1))
continue
atom_types = ['backbone_bond_atoms', 'backbone_displaced_atoms']
for atom_type in atom_types:
for i_atom, atom in enumerate(getattr(monomer, atom_type)):
if atom.molecule != Monomer or not atom.element or not atom.position:
errors.append("'{}[{}]' of monomer {} must have a defined element and position".format(
atom_type, i_atom, i_monomer + 1))
else:
check_atom('Monomer {}'.format(i_monomer + 1), atom_type, i_monomer, i_atom,
monomer.structure, atom, bonding_monomer_hydrogens, el_table, errors)
atom_types = ['r_bond_atoms', 'r_displaced_atoms',
'l_bond_atoms', 'l_displaced_atoms']
for atom_type in atom_types:
for i_atom, atom in enumerate(getattr(monomer, atom_type)):
if atom.molecule != Monomer or not atom.element or not atom.position:
errors.append("'{}[{}]' of monomer {} must have a defined element and position".format(
atom_type, i_atom, i_monomer + 1))
else:
check_atom('Monomer {}'.format(i_monomer + 1), atom_type, i_monomer, i_atom,
monomer.structure, atom, bonding_monomer_hydrogens, el_table, errors)
# crosslinks
for i_crosslink, crosslink in enumerate(self.crosslinks):
if isinstance(crosslink, OntoBond):
if self.seq[crosslink.l_monomer - 1] != crosslink.type.l_monomer:
errors.append('Monomer {} must be equal to the left monomer of crosslink {}'.format(
crosslink.l_monomer, onto_crosslink_to_id[crosslink.type]))
if self.seq[crosslink.r_monomer - 1] != crosslink.type.r_monomer:
errors.append('Monomer {} must be equal to the right monomer of crosslink {}'.format(
crosslink.r_monomer, onto_crosslink_to_id[crosslink.type]))
else:
atom_types = ['r_bond_atoms', 'r_displaced_atoms',
'l_bond_atoms', 'l_displaced_atoms']
for atom_type in atom_types:
for i_atom, atom in enumerate(getattr(crosslink, 'get_' + atom_type)()):
if atom.molecule != Monomer or not atom.monomer or not atom.element or not atom.position:
errors.append("'{}[{}]' of crosslink {} must have a defined monomer, element, and position".format(
atom_type, i_atom, i_crosslink + 1))
else:
check_atom('Crosslink {} - monomer {}'.format(i_crosslink, atom.monomer), atom_type, atom.monomer - 1, i_atom,
self.seq[atom.monomer - 1].structure, atom, bonding_monomer_hydrogens,
el_table, errors)
# nicks
max_nick_pos = len(self.seq) - 1
nick_positions = set()
for nick in self.nicks:
if nick.position > max_nick_pos:
errors.append('Nick position {} must be less than or equal to {}'.format(
nick.position, max_nick_pos))
nick_positions.add(nick.position)
if len(nick_positions) < len(self.nicks):
errors.append('Nick positions be unique')
# check that monomers 1 .. L-1 can bond to right (except at locations of nicks)
for i_monomer, monomer in enumerate(self.seq[0:-1]):
if (i_monomer + 1) not in nick_positions and not self.can_monomer_bond_right(monomer):
errors.append('Monomeric form {} must be able to bond to the right'.format(i_monomer + 1))
if self.circular and not self.can_monomer_bond_right(self.seq[-1]):
errors.append('Monomeric form {} must be able to bond to the right'.format(len(self.seq)))
# check that monomers 2 .. L can bond to left (except at locations of nicks)
for i_monomer, monomer in enumerate(self.seq[1:]):
if (i_monomer + 1) not in nick_positions and not self.can_monomer_bond_left(monomer):
errors.append('Monomeric form {} must be able to bond to the left'.format(i_monomer + 2))
if self.circular and not self.can_monomer_bond_left(self.seq[0]):
errors.append('Monomeric form {} must be able to bond to the left'.format(1))
# left/right backbone/monomer atoms same length
if len(self.bond.l_bond_atoms) != len(self.bond.r_bond_atoms):
errors.append('Number or left and right bond atoms must be equal')
n_bond_left_atoms = sum([1 for atom in self.bond.l_bond_atoms if atom.molecule == Backbone])
n_bond_right_atoms = sum([1 for atom in self.bond.r_bond_atoms if atom.molecule == Backbone])
for i_monomer, monomer in enumerate(self.seq):
if len(self.backbone.monomer_bond_atoms) < len(monomer.backbone_bond_atoms):
errors.append(('Number of monomer-backbone atoms for monomer {} '
'must be less than or equal to the number of backbone-monomer atoms').format(i_monomer + 1))
for i_monomer, (monomer_l, monomer_r) in enumerate(zip(self.seq[0:-1], self.seq[1:])):
if len(monomer_l.r_bond_atoms) + n_bond_right_atoms != len(monomer_r.l_bond_atoms) + n_bond_left_atoms:
errors.append('Number of right and left bond atoms must be equal for monomers {}-{}'.format(i_monomer + 1, i_monomer + 2))
if self.circular and \
len(self.seq[-1].r_bond_atoms) + n_bond_right_atoms != len(self.seq[0].l_bond_atoms) + n_bond_left_atoms:
errors.append('Number of right and left bond atoms must be equal for monomers {}, {}'.format(len(self.seq), 1))
for i_crosslink, crosslink in enumerate(self.crosslinks):
if len(crosslink.get_l_bond_atoms()) != len(crosslink.get_r_bond_atoms()):
errors.append('Number of right and left bond atoms must be equal for crosslink {}'.format(i_crosslink + 1))
crosslinks = list(self.crosslinks)
for crosslink in crosslinks:
for other_crosslink in crosslinks[1:]:
if crosslink.is_equal(other_crosslink):
errors.append('Crosslink {} cannot be repeated'.format(str(crosslink)))
# uncertainty
for i_monomer, monomer in enumerate(self.seq):
if monomer.start_position is not None and monomer.start_position > len(self.seq):
errors.append('Start position for monomer {} must be less than the length of the sequence {}'.format(
i_monomer, len(self.seq)))
if monomer.end_position is not None and monomer.end_position > len(self.seq):
errors.append('End position for monomer {} must be less than the length of the sequence {}'.format(
i_monomer, len(self.seq)))
if monomer.start_position is not None and \
monomer.end_position is not None and \
monomer.start_position > monomer.end_position:
errors.append('Start position {} for monomer {} must be less than or equal to the end position {}'.format(
monomer.start_position, i_monomer, monomer.end_position))
# return errors
return errors
def can_monomer_bond_left(self, monomer):
""" Check if monomeric form can bond to the left
Args:
monomer (:obj:`Monomer`): monomeric form
Returns:
:obj:`bool`: :obj:`True`, if the monomeric form can bond to the left
"""
return len(monomer.l_bond_atoms) > 0 or \
(len(monomer.backbone_bond_atoms) > 0
and len(self.backbone.monomer_bond_atoms) > 0
and len(self.bond.l_bond_atoms) > 0
and self.bond.l_bond_atoms[0].molecule == Backbone)
def can_monomer_bond_right(self, monomer):
""" Check if monomeric form can bond to right
Args:
monomer (:obj:`Monomer`): monomeric form
Returns:
:obj:`bool`: :obj:`True`, if the monomeric form can bond to the right
"""
return len(monomer.r_bond_atoms) > 0 or \
(len(monomer.backbone_bond_atoms) > 0
and len(self.backbone.monomer_bond_atoms) > 0
and len(self.bond.r_bond_atoms) > 0
and self.bond.r_bond_atoms[0].molecule == Backbone)
def get_monomer_counts(self):
""" Get the frequency of each monomeric form within the biopolymer
Returns:
:obj:`dict`: dictionary that maps monomeric forms to their counts
"""
return self.seq.get_monomer_counts()
def get_major_micro_species(self, ph, major_tautomer=False, dearomatize=False):
""" Get the major protonation and tautomerization state
Args:
ph (:obj:`float`): pH
major_tautomer (:obj:`bool`, optional): if :obj:`True`, calculate the major tautomer
dearomatize (:obj:`bool`, optional): if :obj:`True`, dearomatize molecule
Returns:
:obj:`openbabel.OBMol`: major protonation and tautomerization state
"""
if not self.seq:
return None
smiles = self.export('smiles')
smiles = get_major_micro_species(smiles, 'smiles', 'smiles',
ph=ph, major_tautomer=major_tautomer, dearomatize=dearomatize)
mol = openbabel.OBMol()
conv = openbabel.OBConversion()
assert conv.SetInFormat('smiles')
assert conv.ReadString(mol, smiles)
return mol
def get_structure(self, include_all_hydrogens=False):
""" Get an Open Babel molecule of the structure
Args:
include_all_hydrogens (:obj:`bool`, optional): if :obj:`True`, explicitly include all
hydrogens
Returns:
:obj:`tuple`:
* :obj:`openbabel.OBMol`: Open Babel molecule of the structure
* :obj:`dict` of :obj:`dict`: dictionary which maps indices (1-based) of monomeric forms
to dictionaries which map types of components of monomeric forms ('monomer' or 'backbone')
to dictionaries which map indices (1-based) of atoms to atoms (instances of :obj:`openbabel.OBAtom`)
"""
if not self.seq:
return (None, None)
mol = openbabel.OBMol()
n_atom = 0
# join molecules and get bonded and displaced atoms
atom_map = {}
atoms = []
n_atoms = []
for i_monomer, monomer in enumerate(self.seq):
if monomer.structure is None:
monomer_structure = openbabel.OBMol()
else:
monomer_structure = openbabel.OBMol(monomer.structure)
if self.backbone.structure is None:
backbone_structure = openbabel.OBMol()
else:
backbone_structure = openbabel.OBMol(self.backbone.structure)
n_atoms.append(n_atom)
atom_map[i_monomer + 1] = {'monomer': {}, 'backbone': {}}
mol += monomer_structure
atom_map[i_monomer + 1]['monomer'] = {i_atom + 1: mol.GetAtom(n_atom + i_atom + 1)
for i_atom in range(monomer_structure.NumAtoms())}
if monomer.backbone_bond_atoms and backbone_structure:
mol += backbone_structure
atom_map[i_monomer + 1]['backbone'] = {i_atom + 1:
mol.GetAtom(n_atom + monomer_structure.NumAtoms() + i_atom + 1)
for i_atom in range(backbone_structure.NumAtoms())}
monomer_atom_attrs = ['monomer', [
['backbone_bond_atoms', monomer.backbone_bond_atoms],
['backbone_displaced_atoms', monomer.backbone_displaced_atoms],
]]
backbone_atom_attrs = ['backbone', [
['monomer_bond_atoms', self.backbone.monomer_bond_atoms],
['monomer_displaced_atoms', self.backbone.monomer_displaced_atoms]]]
if not monomer.backbone_bond_atoms:
backbone_atom_attrs[1][0][1] = []
backbone_atom_attrs[1][1][1] = []
left_atom_attrs = ['left', [
['l_bond_atoms', []],
['l_displaced_atoms', []]]]
if all(atom.position for atom in self.bond.l_bond_atoms):
left_atom_attrs[1][0][1] = self.bond.l_bond_atoms
if all(atom.position for atom in self.bond.l_displaced_atoms):
left_atom_attrs[1][1][1] = self.bond.l_displaced_atoms
if monomer.l_bond_atoms:
left_atom_attrs[1][0][1] = monomer.l_bond_atoms
if monomer.l_displaced_atoms:
left_atom_attrs[1][1][1] = monomer.l_displaced_atoms
right_atom_attrs = ['right', [
['r_bond_atoms', []],
['r_displaced_atoms', []]]]
if all(atom.position for atom in self.bond.r_bond_atoms):
right_atom_attrs[1][0][1] = self.bond.r_bond_atoms
if all(atom.position for atom in self.bond.r_displaced_atoms):
right_atom_attrs[1][1][1] = self.bond.r_displaced_atoms
if monomer.r_bond_atoms:
right_atom_attrs[1][0][1] = monomer.r_bond_atoms
if monomer.r_displaced_atoms:
right_atom_attrs[1][1][1] = monomer.r_displaced_atoms
subunit_atoms = {}
for type, attrs in [monomer_atom_attrs, backbone_atom_attrs,
left_atom_attrs, right_atom_attrs]:
subunit_atoms[type] = {}
for attr in attrs:
subunit_atoms[type][attr[0]] = []
for atom_md in attr[1]:
if atom_md.molecule == Monomer:
atom = mol.GetAtom(n_atom + atom_md.position)
else:
atom = mol.GetAtom(n_atom + atom_md.position + monomer_structure.NumAtoms())
subunit_atoms[type][attr[0]].append([atom, atom_md.molecule, atom_md.position, atom_md.element, atom_md.charge])
atoms.append(subunit_atoms)
n_atom += monomer_structure.NumAtoms()
if backbone_structure:
n_atom += backbone_structure.NumAtoms()
bonding_hydrogens = []
for i_monomer, subunit_atoms in enumerate(atoms):
for residue_atoms in subunit_atoms.values():
for type_atoms in residue_atoms.values():
for i_atom_el, atom_el in enumerate(type_atoms):
if atom_el[-2] == 'H':
type_atoms[i_atom_el] = (
get_hydrogen_atom(atom_el[0], bonding_hydrogens, i_monomer),
i_monomer + 1, atom_el[1], atom_el[2], atom_el[4])
else:
type_atoms[i_atom_el] = (
atom_el[0], i_monomer + 1, atom_el[1],
atom_el[2], atom_el[4])
crosslinks_atoms = []
for crosslink in self.crosslinks:
crosslink_atoms = {}
crosslinks_atoms.append(crosslink_atoms)
for atom_type in ['l_bond_atoms', 'r_bond_atoms', 'l_displaced_atoms', 'r_displaced_atoms']:
crosslink_atoms[atom_type] = []
for atom_md in getattr(crosslink, 'get_' + atom_type)():
atom = mol.GetAtom(n_atoms[atom_md.monomer - 1] + atom_md.position)
if atom_md.element == 'H':
atom = get_hydrogen_atom(atom, bonding_hydrogens, atom_md.monomer - 1)
crosslink_atoms[atom_type].append((atom, atom_md.monomer, atom_md.position))
# bond monomeric forms to backbones
for subunit_atoms in atoms:
self._bond_monomer_backbone(mol, subunit_atoms, atom_map)
# bond left/right pairs of subunits
nick_positions = set(nick.position for nick in self.nicks)
for i_position, (left_atoms, right_atoms) in enumerate(zip(atoms[0:-1], atoms[1:])):
if (i_position + 1) not in nick_positions:
self._bond_subunits(mol, left_atoms, right_atoms, atom_map)
# circularity
if self.circular:
self._bond_subunits(mol, atoms[-1], atoms[0], atom_map)
# crosslinks
i_crosslinks_bond_atoms = []
for crosslink, crosslink_atoms in zip(self.crosslinks, crosslinks_atoms):
i_crosslinks_bond_atoms.append(self._form_crosslink(mol, crosslink_atoms, atom_map,
crosslink.get_order(), crosslink.get_stereo()))
# include all hydrogens
if include_all_hydrogens:
mol.AddHydrogens()
# atom map
for monomer in atom_map.values():
for motif in monomer.keys():
for i_atom, atom in list(monomer[motif].items()):
monomer[motif][i_atom] = atom.GetIdx()
# return molecule
return (mol, atom_map)
def _bond_monomer_backbone(self, mol, subunit_atoms, atom_map):
""" Bond a monomeric form to a backbone
Args:
mol (:obj:`openbabel.OBMol`): molecule with a monomeric form and backbone
subunit_atoms (:obj:`dict`): dictionary of atoms in monomeric form and backbone to bond
atom_map (:obj:`dict`): dictionary which maps indices (1-based) of monomeric forms
to dictionaries which map types of components of monomeric forms ('monomer' or 'backbone')
to dictionaries which map indices (1-based) of atoms to atoms (instances of :obj:`openbabel.OBAtom`)
"""
for atom, i_monomer, monomer_type, i_atom, _ in subunit_atoms['monomer']['backbone_displaced_atoms']:
if atom is not None:
atom_2 = atom_map[i_monomer][monomer_type.__name__.lower()].get(i_atom, None)
if atom_2 and atom_2.GetIdx() == atom.GetIdx():
atom_map[i_monomer][monomer_type.__name__.lower()].pop(i_atom)
assert mol.DeleteAtom(atom, True)
for atom, i_monomer, monomer_type, i_atom, _ in subunit_atoms['backbone']['monomer_displaced_atoms']:
if atom is not None:
atom_2 = atom_map[i_monomer][monomer_type.__name__.lower()].get(i_atom, None)
if atom_2 and atom_2.GetIdx() == atom.GetIdx():
atom_map[i_monomer][monomer_type.__name__.lower()].pop(i_atom)
assert mol.DeleteAtom(atom, True)
for (monomer_atom, _, _, _, monomer_atom_charge), (backbone_atom, _, _, _, backbone_atom_charge) in zip(
subunit_atoms['monomer']['backbone_bond_atoms'],
subunit_atoms['backbone']['monomer_bond_atoms']):
bond = openbabel.OBBond()
bond.SetBegin(monomer_atom)
bond.SetEnd(backbone_atom)
bond.SetBondOrder(1)
assert mol.AddBond(bond)
if monomer_atom_charge:
monomer_atom.SetFormalCharge(monomer_atom.GetFormalCharge() + monomer_atom_charge)
if backbone_atom_charge:
backbone_atom.SetFormalCharge(backbone_atom.GetFormalCharge() + backbone_atom_charge)
def _bond_subunits(self, mol, left_atoms, right_atoms, atom_map):
""" Bond a left/right pair of subunits
Args:
mol (:obj:`openbabel.OBMol`): molecule with left and right subunits
left_atoms (:obj:`dict`): dictionary of atoms in left subunit to bond
right_atoms (:obj:`dict`): dictionary of atoms in right subunit to bond
atom_map (:obj:`dict`): dictionary which maps indices (1-based) of monomeric forms
to dictionaries which map types of components of monomeric forms ('monomer' or 'backbone')
to dictionaries which map indices (1-based) of atoms to atoms (instances of :obj:`openbabel.OBAtom`)
"""
for atom, i_monomer, monomer_type, i_atom, _ in left_atoms['right']['r_displaced_atoms']:
if atom is not None:
atom_2 = atom_map[i_monomer][monomer_type.__name__.lower()].get(i_atom, None)
if atom_2 and atom_2.GetIdx() == atom.GetIdx():
atom_map[i_monomer][monomer_type.__name__.lower()].pop(i_atom)
assert mol.DeleteAtom(atom, True)
for atom, i_monomer, monomer_type, i_atom, _ in right_atoms['left']['l_displaced_atoms']:
if atom is not None:
atom_2 = atom_map[i_monomer][monomer_type.__name__.lower()].get(i_atom, None)
if atom_2 and atom_2.GetIdx() == atom.GetIdx():
atom_map[i_monomer][monomer_type.__name__.lower()].pop(i_atom)
assert mol.DeleteAtom(atom, True)
for (l_atom, _, _, _, l_atom_charge), (r_atom, _, _, _, r_atom_charge) in \
zip(left_atoms['right']['r_bond_atoms'],
right_atoms['left']['l_bond_atoms']):
bond = openbabel.OBBond()
bond.SetBegin(l_atom)
bond.SetEnd(r_atom)
bond.SetBondOrder(1)
assert mol.AddBond(bond)
if l_atom_charge:
l_atom.SetFormalCharge(l_atom.GetFormalCharge() + l_atom_charge)
if r_atom_charge:
r_atom.SetFormalCharge(r_atom.GetFormalCharge() + r_atom_charge)
def _form_crosslink(self, mol, atoms, atom_map, order, stereo):
""" Form bond(s) for a crosslink
Args:
mol (:obj:`openbabel.OBMol`): molecule
atoms (:obj:`dict`): dictionary of atoms to bond and delete
atom_map (:obj:`dict`): dictionary which maps indices (1-based) of monomeric forms
to dictionaries which map types of components of monomeric forms ('monomer' or 'backbone')
to dictionaries which map indices (1-based) of atoms to atoms (instances of :obj:`openbabel.OBAtom`)
order (:obj:`BondOrder`): order
stereo (:obj:`BondStereo`): stereochemistry
"""
for (l_atom, _, _), (r_atom, _, _) in zip(atoms['l_bond_atoms'], atoms['r_bond_atoms']):
bond = openbabel.OBBond()
bond.SetBegin(l_atom)
bond.SetEnd(r_atom)
bond.SetBondOrder(order.value)
if stereo is None:
pass
elif stereo == BondStereo.wedge:
bond.SetWedge()
elif stereo == BondStereo.hash:
bond.SetHash()
elif stereo == BondStereo.up:
bond.SetUp()
elif stereo == BondStereo.down:
bond.SetDown()
assert mol.AddBond(bond)
for atom, i_monomer, i_atom in itertools.chain(atoms['l_displaced_atoms'], atoms['r_displaced_atoms']):
if atom is not None:
atom_2 = atom_map[i_monomer]['monomer'].get(i_atom, None)
if atom_2 and atom_2.GetIdx() == atom.GetIdx():
atom_map[i_monomer]['monomer'].pop(i_atom)
assert mol.DeleteAtom(atom, True)
def export(self, format, include_all_hydrogens=False, options=()):
""" Export structure to format
Args:
format (:obj:`str`): format
include_all_hydrogens (:obj:`bool`, optional): if :obj:`True`, explicitly
include all hydrogens
options (:obj:`list` of :obj:`str`, optional): export options
Returns:
:obj:`str`: format representation of structure
"""
structure = self.get_structure(include_all_hydrogens=include_all_hydrogens)[0]
if structure is None:
return None
else:
return OpenBabelUtils.export(structure, format, options=options)
def get_formula(self):
""" Get the chemical formula
Returns:
:obj:`EmpiricalFormula`: chemical formula
"""
backbone_formula = self.backbone.get_formula()
n_backbone = 0
formula = EmpiricalFormula()
for monomer, count in self.get_monomer_counts().items():
formula += monomer.get_formula() * count
if monomer.backbone_bond_atoms:
n_backbone += count
for atom in monomer.backbone_displaced_atoms:
formula[atom.element] -= count
nick_positions = set(nick.position for nick in self.nicks)
for i_monomer, monomer in enumerate(self.seq[0:-1]):
if (i_monomer + 1) not in nick_positions:
for atom in monomer.r_displaced_atoms:
formula[atom.element] -= 1
for i_monomer, monomer in enumerate(self.seq[1:]):
if (i_monomer + 1) not in nick_positions:
for atom in monomer.l_displaced_atoms:
formula[atom.element] -= 1
if self.circular:
for atom in self.seq[-1].r_displaced_atoms:
formula[atom.element] -= 1
for atom in self.seq[0].l_displaced_atoms:
formula[atom.element] -= 1
# crosslinks
for crosslink in self.crosslinks:
for atom in itertools.chain(crosslink.get_l_displaced_atoms(), crosslink.get_r_displaced_atoms()):
formula[atom.element] -= 1
return formula \
+ backbone_formula * n_backbone \
+ self.bond.get_formula(none_position=False) \
* (len(self.seq) - (1 - self.circular) - len(self.nicks))
def get_mol_wt(self):
""" Get the molecular weight
Returns:
:obj:`float`: molecular weight
"""
return self.get_formula().get_molecular_weight()
def get_charge(self):
""" Get the charge
Returns:
:obj:`int`: charge
"""
backbone_charge = self.backbone.get_charge()
n_backbone = 0
charge = 0
for monomer, count in self.get_monomer_counts().items():
charge += monomer.get_charge() * count
if monomer.backbone_bond_atoms:
n_backbone += count
for atom in monomer.backbone_bond_atoms:
charge -= atom.charge * count
for atom in monomer.backbone_displaced_atoms:
charge -= atom.charge * count
nick_positions = set(nick.position for nick in self.nicks)
for i_monomer, monomer in enumerate(self.seq[0:-1]):
if (i_monomer + 1) not in nick_positions:
for atom in monomer.r_displaced_atoms:
charge -= atom.charge
for i_monomer, monomer in enumerate(self.seq[1:]):
if (i_monomer + 1) not in nick_positions:
for atom in monomer.l_displaced_atoms:
charge -= atom.charge
if self.circular:
for atom in self.seq[-1].r_displaced_atoms:
charge -= atom.charge
for atom in self.seq[0].l_displaced_atoms:
charge -= atom.charge
# crosslinks
for crosslink in self.crosslinks:
for atom in itertools.chain(crosslink.get_l_displaced_atoms(), crosslink.get_r_displaced_atoms()):
charge -= atom.charge
return charge \
+ backbone_charge * n_backbone \
+ self.bond.get_charge(none_position=False) \
* (len(self.seq) - (1 - self.circular) - len(self.nicks))
def __str__(self):
""" Get a string representation of the biopolymer form
Returns:
:obj:`str`: string representation of the biopolymer form
"""
alphabet_monomers = {monomer: code for code, monomer in self.alphabet.monomers.items()}
nick_positions = set(nick.position for nick in self.nicks)
val = ''
for i_monomer, monomer in enumerate(self.seq):
code = alphabet_monomers.get(monomer, None)
if code:
if len(code) == 1:
val += code
else:
val += '{' + code + '}'
else:
val += monomer.__str__(alphabet=self.alphabet)
if (i_monomer + 1) in nick_positions:
val += ':'
# circular
if self.circular:
val += ' | circular'
# crosslinks
for crosslink in self.crosslinks:
val += ' | x-link: ' + str(crosslink)
return val
_grammar_filename = pkg_resources.resource_filename('bpforms', 'grammar.lark')
with open(_grammar_filename, 'r') as file:
_parser = lark.Lark(file.read())
def from_str(self, string):
""" Create biopolymer form its string representation
Args:
string (:obj:`str`): string representation of the biopolymer
Returns:
:obj:`BpForm`: biopolymer form
"""
class ParseTreeTransformer(lark.Transformer):
def __init__(self, form):
super(ParseTreeTransformer, self).__init__()
self.form = form
form.seq.clear()
form.crosslinks.clear()
form.nicks.clear()
form.circular = False
@lark.v_args(inline=True)
def start(self, *args):
return self.form
@lark.v_args(inline=True)
def seq(self, *args):
for arg in args:
if isinstance(arg, Monomer):
self.form.seq.append(arg)
elif isinstance(arg, lark.tree.Tree) and arg.data == 'nick':
self.form.nicks.add(Nick(len(self.form.seq)))
@lark.v_args(inline=True)
def alphabet_monomer(self, code):
code = code.value
if code[0] == '{' and code[-1] == '}':
code = code[1:-1]
monomer = self.form.alphabet.monomers.get(code, None)
if monomer is None:
raise ValueError('"{}" not in alphabet'.format(code))
return monomer
@lark.v_args(inline=True)
def inline_monomer(self, *args):
kwargs = {
'synonyms': SynonymSet(),
'identifiers': IdentifierSet(),
'monomers_position': set(),
'base_monomers': set(),
'backbone_bond_atoms': AtomList(),
'backbone_displaced_atoms': AtomList(),
'r_bond_atoms': AtomList(),
'r_displaced_atoms': AtomList(),
'l_bond_atoms': AtomList(),
'l_displaced_atoms': AtomList(),
}
for arg in args[1:-1:2]:
arg_name, arg_val = arg
if arg_name in ['id', 'name', 'structure', 'delta_mass', 'delta_charge', 'position', 'comments']:
if arg_name in kwargs:
raise ValueError('{} attribute cannot be repeated'.format(arg_name))
if arg_name == 'position':
kwargs['start_position'], kwargs['end_position'], kwargs['monomers_position'] = arg_val
else:
kwargs[arg_name] = arg_val
elif arg_name in ['synonyms', 'identifiers', 'monomers_position', 'base_monomers']:
kwargs[arg_name].add(arg_val)
elif arg_name in ['backbone_bond_atoms', 'backbone_displaced_atoms',
'r_bond_atoms', 'r_displaced_atoms',
'l_bond_atoms', 'l_displaced_atoms']:
kwargs[arg_name].append(arg_val)
else: # pragma: no cover # the grammar ensures this will never be reached
raise ValueError('Invalid attribute {}'.format(arg_name))
return Monomer(**kwargs)
@lark.v_args(inline=True)
def id(self, *args):
return ('id', args[-1].value[1:-1])
@lark.v_args(inline=True)
def name(self, *args):
return ('name', args[-1].value[1:-1])
@lark.v_args(inline=True)
def synonym(self, *args):
return ('synonyms', args[-1].value[1:-1])
@lark.v_args(inline=True)
def identifier(self, *args):
return ('identifiers', Identifier(args[-1].value[1:-1], args[-3].value[1:-1]))
@lark.v_args(inline=True)
def structure(self, *args):
return ('structure', args[-2].value)
@lark.v_args(inline=True)
def backbone_bond_atom(self, *args):
return ('backbone_bond_atoms', args[-1])
@lark.v_args(inline=True)
def backbone_displaced_atom(self, *args):
return ('backbone_displaced_atoms', args[-1])
@lark.v_args(inline=True)
def r_bond_atom(self, *args):
return ('r_bond_atoms', args[-1])
@lark.v_args(inline=True)
def r_displaced_atom(self, *args):
return ('r_displaced_atoms', args[-1])
@lark.v_args(inline=True)
def l_bond_atom(self, *args):
return ('l_bond_atoms', args[-1])
@lark.v_args(inline=True)
def l_displaced_atom(self, *args):
return ('l_displaced_atoms', args[-1])
@lark.v_args(inline=True)
def atom(self, *args):
atom = Atom(Monomer, args[0].value, position=int(float(args[1].value)))
if len(args) >= 3:
atom.charge = int(float(args[2].value))
return atom
@lark.v_args(inline=True)
def delta_mass(self, *args):
return ('delta_mass', float(args[-1].value))
@lark.v_args(inline=True)
def delta_charge(self, *args):
return ('delta_charge', int(float(args[-1].value)))
@lark.v_args(inline=True)
def position(self, *args):
start_position = None
end_position = None
monomers_position = []
before_dash = True
for arg in args:
if isinstance(arg, lark.lexer.Token):
if arg.type == 'DASH':
before_dash = False
if arg.type == 'INT':
if before_dash:
start_position = int(float(arg.value))
else:
end_position = int(float(arg.value))
elif isinstance(arg, list):
monomers_position = arg
return ('position', (start_position, end_position, monomers_position))
@lark.v_args(inline=True)
def monomers_position(self, *args):
monomers = []
for arg in args:
if arg.type in ['CHAR', 'CHARS']:
monomer = self.form.alphabet.monomers.get(arg.value, None)
if monomer is None:
raise ValueError('"{}" not in alphabet'.format(arg.value))
monomers.append(monomer)
return monomers
@lark.v_args(inline=True)
def base_monomer(self, *args):
code = args[-2].value
monomer = self.form.alphabet.monomers.get(code, None)
if monomer is None:
raise ValueError('"{}" not in alphabet'.format(code))
return ('base_monomers', monomer)
@lark.v_args(inline=True)
def comments(self, *args):
return ('comments', args[-1].value[1:-1])
@lark.v_args(inline=True)
def crosslink(self, *args):
for arg in args:
if isinstance(arg, BondBase):
for crosslink in self.form.crosslinks:
if arg.is_equal(crosslink):
raise ValueError('Crosslink {} cannot be repeated'.format(
args))
self.form.crosslinks.add(arg)
return
@lark.v_args(inline=True)
def onto_crosslink(self, *args):
bond = OntoBond()
for arg in args:
if isinstance(arg, lark.tree.Tree) and arg.data == 'onto_crosslink_attr':
attr, val = arg.children[0]
setattr(bond, attr, val)
return bond
@lark.v_args(inline=True)
def onto_crosslink_monomer(self, *args):
return (args[0], int(float(args[-1].value)))
@lark.v_args(inline=True)
def onto_crosslink_monomer_type(self, *args):
return args[0].value + '_monomer'
@lark.v_args(inline=True)
def onto_crosslink_type(self, *args):
for arg in args:
if arg.type == 'CHARS':
type_id = arg.value
type = crosslinks_onto.get(type_id, None)
if type is None:
raise ValueError('"{}" not in ontology'.format(type_id))
return ('type', type)
@lark.v_args(inline=True)
def inline_crosslink(self, *args):
bond = Bond()
for arg in args:
if isinstance(arg, lark.tree.Tree) and arg.data == 'inline_crosslink_attr':
attr, val = arg.children[0]
if attr in ['order', 'stereo', 'comments']:
setattr(bond, attr, val)
else:
attr_val_list = getattr(bond, attr)
attr_val_list.append(val)
return bond
@lark.v_args(inline=True)
def inline_crosslink_atom(self, *args):
atom_type = args[0]
monomer = int(float(args[2].value))
element = args[3].value
position = int(float(args[4].value))
if len(args) >= 6:
charge = int(float(args[5].value))
else:
charge = 0
return (atom_type, Atom(Monomer, monomer=monomer, element=element, position=position, charge=charge))
@lark.v_args(inline=True)
def inline_crosslink_atom_type(self, *args):
return args[0].value.replace('-', '_') + 's'
@lark.v_args(inline=True)
def inline_crosslink_order(self, *args):
return ('order', BondOrder[args[-2].value])
@lark.v_args(inline=True)
def inline_crosslink_stereo(self, *args):
return ('stereo', BondStereo[args[-2].value])
@lark.v_args(inline=True)
def inline_crosslink_comments(self, *args):
return ('comments', args[-1].value[1:-1])
@lark.v_args(inline=True)
def circular(self, *args):
self.form.circular = True
tree = self._parser.parse(string)
parse_tree_transformer = ParseTreeTransformer(self)
return parse_tree_transformer.transform(tree)
def get_canonical_seq(self, monomer_codes=None):
""" Get IUPAC/IUBMB representation of a polymer with bases represented by the character codes
of their parent monomers (e.g. methyl-2-adenosine is represented by 'A')
Args:
monomer_codes (:obj:`dict`, optional): dictionary that maps monomers to their codes
Returns:
:obj:`str`: IUPAC/IUBMB representation of a polymer
"""
if monomer_codes is None:
monomer_codes = {monomer: code for code, monomer in self.alphabet.monomers.items()}
seq = ''
for monomer in self.seq:
seq += monomer.get_canonical_code(default_code=self.DEFAULT_FASTA_CODE, monomer_codes=monomer_codes)
return seq
def get_image(self, monomer_color=0x000000, backbone_color=0xff0000,
left_right_bond_color=0x00ff00, crosslink_bond_color=0x0000ff,
include_all_hydrogens=True, show_atom_nums=False,
atom_label_font_size=0.6,
width=200, height=200, image_format='svg', include_xml_header=True):
""" Get molecular visualization
Args:
monomer_color (:obj:`int`, optional): color to paint atoms involved in monomeric forms
backbone_color (:obj:`int`, optional): color to paint atoms involved in backbones
left_right_bond_color (:obj:`int`, optional): color to paint atoms involved in bond with monomeric form to left
crosslink_bond_color (:obj:`int`, optional): color to paint atoms involved in crosslinks
include_all_hydrogens (:obj:`bool`, optional): if :obj:`True`, show all hydrogens
show_atom_nums (:obj:`bool`, optional): if :obj:`True`, show the numbers of the atoms
atom_label_font_size (:obj:`float`, optional): relative atom label font size
width (:obj:`int`, optional): width in pixels
height (:obj:`int`, optional): height in pixels
image_format (:obj:`str`, optional): format of generated image {emf, eps, jpeg, msbmp, pdf, png, or svg}
include_xml_header (:obj:`bool`, optional): if :obj:`True`, include XML header at the beginning of the SVG
Returns:
:obj:`object`: image
"""
mol, atom_map = self.get_structure(include_all_hydrogens=include_all_hydrogens)
el_table = openbabel.OBElementTable()
atom_labels = []
atom_sets = {}
atom_sets[backbone_color] = {'positions': [], 'elements': [], 'color': backbone_color}
bond_sets = {}
bond_sets[left_right_bond_color] = {'positions': [], 'elements': [], 'color': left_right_bond_color}
bond_sets[crosslink_bond_color] = {'positions': [], 'elements': [], 'color': crosslink_bond_color}
# label monomers
for i_monomer in atom_map.keys():
i_atoms = sorted(atom_map[i_monomer]['monomer'].keys())
if i_atoms:
i_atom = atom_map[i_monomer]['monomer'][i_atoms[0]]
atom = mol.GetAtom(i_atom)
atom_labels.append({'position': i_atom,
'element': el_table.GetSymbol(atom.GetAtomicNum()),
'label': str(i_monomer),
'color': monomer_color})
# color backbones
for i_monomer in atom_map.keys():
for i_atom in atom_map[i_monomer]['backbone'].values():
atom = mol.GetAtom(i_atom)
atom_sets[backbone_color]['positions'].append(i_atom)
atom_sets[backbone_color]['elements'].append(el_table.GetSymbol(atom.GetAtomicNum()))
# color left/right bonds
for i_monomer, (l_monomer, r_monomer) in enumerate(zip(self.seq[0:-1], self.seq[1:])):
l_bond_atoms = l_monomer.r_bond_atoms or self.bond.r_bond_atoms
r_bond_atoms = r_monomer.l_bond_atoms or self.bond.l_bond_atoms
self._add_bonds_to_set(mol,
l_bond_atoms,
r_bond_atoms,
atom_map, bond_sets[left_right_bond_color],
el_table,
i_monomer_1=i_monomer + 1,
i_monomer_2=i_monomer + 2)
# color crosslink bonds
for crosslink in self.crosslinks:
self._add_bonds_to_set(mol, crosslink.get_l_bond_atoms(),
crosslink.get_r_bond_atoms(),
atom_map, bond_sets[crosslink_bond_color],
el_table)
# remove unused atoms and bond sets
if not atom_sets[backbone_color]['positions']:
atom_sets.pop(backbone_color)
if not bond_sets[left_right_bond_color]['positions']:
bond_sets.pop(left_right_bond_color)
if not bond_sets[crosslink_bond_color]['positions']:
bond_sets.pop(crosslink_bond_color)
cml = OpenBabelUtils.export(mol, 'cml')
# draw molecule
return draw_molecule(cml, 'cml', image_format=image_format,
atom_labels=atom_labels, atom_sets=atom_sets.values(), bond_sets=bond_sets.values(),
show_atom_nums=show_atom_nums,
atom_label_font_size=atom_label_font_size,
width=width, height=height, include_xml_header=include_xml_header)
def _add_bonds_to_set(self, mol, r_bond_atoms, l_bond_atoms, atom_map, bond_set, el_table,
i_monomer_1=None, i_monomer_2=None):
for md_atom_1, md_atom_2 in zip(r_bond_atoms, l_bond_atoms):
if i_monomer_1 is None:
i_monomer_1 = md_atom_1.monomer
else:
i_monomer_1 = i_monomer_1
if i_monomer_2 is None:
i_monomer_2 = md_atom_2.monomer
else:
i_monomer_2 = i_monomer_2
type_1 = md_atom_1.molecule.__name__.lower()
type_2 = md_atom_2.molecule.__name__.lower()
i_atom_1 = atom_map[i_monomer_1][type_1].get(md_atom_1.position, None)
i_atom_2 = atom_map[i_monomer_2][type_2].get(md_atom_2.position, None)
atom_1 = mol.GetAtom(i_atom_1)
atom_2 = mol.GetAtom(i_atom_2)
bond_set['positions'].append(
[i_atom_1, i_atom_2])
bond_set['elements'].append(
[el_table.GetSymbol(atom_1.GetAtomicNum()),
el_table.GetSymbol(atom_2.GetAtomicNum())])
def get_genomic_image(self, label=None, seq_features=None, **kwargs):
""" Get a genomic visualization of the :obj:`BpForm`
Args:
label (:obj:`str`, optional): title
seq_features (:obj:`dict`): list of features each
represented by a dictionary with three keys
* label (:obj:`str`): description of the type of feature
* color (:obj:`str`): color
* positions (:obj:`list` of :obj:`list` of :obj:`int`): list of position
ranges of the type of feature
The method also accepts the same arguments as
:obj:`bpforms.util.gen_genomic_viz`.
Returns:
:obj:`str`: SVG image
"""
from bpforms.util import gen_genomic_viz
if label:
kwargs['polymer_labels'] = {0: label}
if seq_features:
kwargs['seq_features'] = copy.deepcopy(seq_features)
for feature in kwargs['seq_features']:
feature['positions'] = {0: feature['positions']}
return gen_genomic_viz([self], **kwargs)
class BpFormFeature(object):
""" A region (start and end positions) of a BpForm
Attributes:
form (:obj:`BpForm`): biopolymer form
start_position (:obj:`int`): start position (1-base)
end_position (:obj:`int`): end position (1-based)
"""
def __init__(self, form, start_position, end_position):
"""
Args:
form (:obj:`BpForm`): biopolymer form
start_position (:obj:`int`): start position (1-base)
end_position (:obj:`int`): end position (1-based)
"""
self.form = form
self.start_position = start_position
self.end_position = end_position
@property
def form(self):
""" Get the biopolymer form
Returns:
:obj:`BpForm`: biopolymer form
"""
return self._form
@form.setter
def form(self, value):
""" Set the biopolymer form
Args:
value (:obj:`BpForm`): biopolymer form
Raises:
:obj:`ValueError`: form is not an instance of :obj:`BpForm`
"""
if value is not None and not isinstance(value, BpForm):
raise ValueError('`form` must be an instance of `BpForm` or None')
if hasattr(self, '_form'):
if value != self._form:
if self._form is not None:
old_form = self._form
self._form = None
if self in old_form.features:
old_form.features.remove(self)
if value is not None:
self._form = value
if self not in value.features:
value.features.add(self)
else:
self._form = value
if value is not None:
if self not in value.features:
value.features.add(self)
@property
def start_position(self):
""" Get the start position
Returns:
:obj:`int`: start position
"""
return self._start_position
@start_position.setter
def start_position(self, value):
""" Set the start position
Args:
value (:obj:`int`): start position
Raises:
:obj:`ValueError`: start position is not a non-negative integer
"""
if int(value) != value or value < 0:
raise ValueError('`start_position` must be a non-negative integer')
self._start_position = int(value)
@property
def end_position(self):
""" Get the end position
Returns:
:obj:`int`: end position
"""
return self._end_position
@end_position.setter
def end_position(self, value):
""" Set the end position
Args:
value (:obj:`int`): end position
Raises:
:obj:`ValueError`: end position is not a non-negative integer
"""
if int(value) != value or value < 0:
raise ValueError('`end_position` must be a non-negative integer')
self._end_position = int(value)
class BpFormFeatureSet(set):
""" Set of features
Attributes:
form (:obj:`BpForm`): form
"""
def __init__(self, form):
"""
Args:
form (:obj:`BpForm`): form
"""
super(BpFormFeatureSet, self).__init__()
self.form = form
@property
def form(self):
""" Get the biopolymer form
Returns:
:obj:`BpForm`: biopolymer form
"""
return self._form
@form.setter
def form(self, value):
""" Set the biopolymer form
Args:
value (:obj:`BpForm`): biopolymer form
Raises:
:obj:`ValueError`: form is not an instance of :obj:`BpForm`
"""
if not isinstance(value, BpForm):
raise ValueError('`form` must be an instance of `BpForm`')
if hasattr(self, '_form'):
raise ValueError('`form` cannot be set outside constructor')
self._form = value
def add(self, feature):
""" Add a feature
Args:
feature (:obj:`BpFormFeature`): feature
Raises:
:obj:`ValueError`: if the `feature` is not an instance of `BpFormFeature`
"""
if not isinstance(feature, BpFormFeature):
raise ValueError('`feature` must be an instance of `BpFormFeature`')
if feature not in self:
super(BpFormFeatureSet, self).add(feature)
if feature.form != self.form:
feature.form = self.form
def remove(self, feature):
""" Remove a feature
Args:
feature (:obj:`BpFormFeature`): feature
"""
super(BpFormFeatureSet, self).remove(feature)
if feature.form != None:
feature.form = None
def update(self, features):
""" Add a set of features
Args:
features (iterable of :obj:`BpFormFeature`): features
"""
for feature in features:
self.add(feature)
def symmetric_difference_update(self, other):
""" Remove common elements with other and add elements from other not in self
Args:
other (:obj:`BpFormFeatureSet`): other set of features
"""
for o in other:
if o in self:
self.remove(o)
else:
self.add(o)
class BpFormsWarning(UserWarning):
""" BpForms warning """
pass
def get_hydrogen_atom(parent_atom, bonding_hydrogens, i_monomer):
""" Get a hydrogen atom attached to a parent atom
Args:
parent_atom (:obj:`openbabel.OBAtom`): parent atom
bonding_hydrogens (:obj:`list`): hydrogens that have already been gotten
i_monomer (:obj:`int`): index of parent monomer in sequence
Returns:
:obj:`openbabel.OBAtom`: hydrogen atom
"""
for other_atom in openbabel.OBAtomAtomIter(parent_atom):
if other_atom.GetAtomicNum() == 1:
tmp = (i_monomer, other_atom.GetIdx())
if tmp not in bonding_hydrogens: # hydrogen
bonding_hydrogens.append(tmp)
return other_atom
return None
from bpforms.xlink.core import crosslinks_onto, onto_crosslink_to_id