KarrLab/bcforms

View on GitHub
bcforms/core.py

Summary

Maintainability
F
1 mo
Test Coverage
A
96%
""" BcForms

:Author: Mike Zheng <xzheng20@colby.edu>
:Author: Jonathan Karr <karr@mssm.edu>
:Date: 2019-06-25
:Copyright: 2019, Karr Lab
:License: MIT
"""

from bpforms import BondOrder, BondStereo
from bpforms.util import gen_genomic_viz
from ruamel import yaml
from wc_utils.util.chem import EmpiricalFormula, OpenBabelUtils
from wc_utils.util.chem.marvin import draw_molecule
import abc
import bpforms
import itertools
import lark
import openbabel
import os
import pkg_resources
import wc_utils.cache

# setup cache
cache_dir = os.path.expanduser('~/.cache/bcforms')
if not os.path.isdir(cache_dir):
    os.makedirs(cache_dir)
cache = wc_utils.cache.Cache(directory=cache_dir)


class Subunit(object):
    """ Subunit in a BcForm macromolecular complex

    Attributes:
        id (:obj:`str`): id of the subunit
        stoichiometry (:obj:`int`): stoichiometry of the subunit
        structure (:obj:`bpforms.BpForm` or :obj:`openbabel.OBMol`, optional): structure of the subunit
        formula (:obj:`EmpiricalFormula`, optional): formula of the subunit
        mol_wt (:obj:`float`, optional): molecular weight of the subunit
        charge (:obj:`int`, optional): charge of the subunit
    """

    def __init__(self, id, stoichiometry, structure=None, formula=None, mol_wt=None, charge=None):
        """

        Args:
            id (:obj:`str`): id of the subunit
            stoichiometry (:obj:`int`): stoichiometry of the subunit
            structure (:obj:`bpforms.BpForm` or :obj:`openbabel.OBMol`, optional): structure of the subunit
            formula (:obj:`EmpiricalFormula`, optional): formula of the subunit
            mol_wt (:obj:`float`, optional): molecular weight of the subunit
            charge (:obj:`int`, optional): charge of the subunit
        """
        self.id = id
        self.stoichiometry = stoichiometry
        self.structure = structure
        if structure is None:
            self.formula = formula
            self.charge = charge
            if formula is None:
                self.mol_wt = mol_wt

    @property
    def id(self):
        """ Get the id of the subunit

        Returns:
            :obj:`str`: id of the subunit

        """
        return self._id

    @id.setter
    def id(self, value):
        """ Set the id of the subunit

        Args:
            value (:obj:`str`): id of the subunit

        Raises:
            :obj:`ValueError`: if :obj:`value` is not an instance of :obj:`str`
        """
        if not isinstance(value, str):
            raise ValueError('`value` must be an instance of `str`')
        self._id = value

    @property
    def stoichiometry(self):
        """ Get the stoichiometry of the subunit

        Returns:
            :obj:`int`: stoichiometry of the subunit

        """
        return self._stoichiometry

    @stoichiometry.setter
    def stoichiometry(self, value):
        """ Set the stoichiometry of the subunit

        Args:
            value (:obj:`int`): stoichiometry of the subunit

        Raises:
            :obj:`ValueError`: if :obj:`value` is not an instance of :obj:`int`
        """
        if not isinstance(value, int):
            raise ValueError('`value` must be an instance of `int`')
        self._stoichiometry = value

    @property
    def structure(self):
        """ Get the structure of the subunit

        Returns:
            :obj:`bpforms.BpForm` or :obj:`openbabel.OBMol` or None: structure of the subunit

        """
        return self._structure

    @structure.setter
    def structure(self, value):
        """ Set the structure of the subunit

        * setting structure will automaticall set formula, mol_wt, charge

        Args:
            value (:obj:`bpforms.BpForm` or :obj:`openbabel.OBMol` or :obj:`str` (SMILES-encoded string) or None): structure of the subunit

        Raises:
            :obj:`ValueError`: if :obj:`value` is not an instance of :obj:`bpforms.BpForm` or :obj:`openbabel.OBMol` or None
            :obj:`ValueError`: unable to convert smiles-encoded string to structure
        """
        if not isinstance(value, bpforms.BpForm) and not isinstance(value, openbabel.OBMol) and not isinstance(value, str) and value is not None:
            raise ValueError('`value` must be an instance of `bpforms.BpForm` or `openbabel.OBMol` or None')

        if isinstance(value, str):
            ob_mol = openbabel.OBMol()
            conversion = openbabel.OBConversion()
            conversion.SetInFormat('smi')
            if not conversion.ReadString(ob_mol, value):
                raise ValueError('unable to convert smiles-encoded string to structure')
            self._structure = ob_mol
        else:
            self._structure = value

        if isinstance(self._structure, openbabel.OBMol):
            self._formula = OpenBabelUtils.get_formula(self._structure)
            self._mol_wt = self.formula.get_molecular_weight()
            self._charge = self._structure.GetTotalCharge()
        elif isinstance(self._structure, bpforms.BpForm):
            self._formula = self._structure.get_formula()
            self._mol_wt = self._structure.get_mol_wt()
            self._charge = self._structure.get_charge()

    @property
    def formula(self):
        """ Get the empirical formula of the subunit

        Returns:
            :obj:`EmpiricalFormula` or None: formula of the subunit

        """
        return self._formula

    @formula.setter
    def formula(self, value):
        """ Set the formula of the subunit

        Args:
            value (:obj:`EmpiricalFormula` or :obj:`str` (string representation of the formula) None): formula of the subunit

        Raises:
            :obj:`ValueError`: if :obj:`value` is not an instance of :obj:`EmpiricalFormula` or None
            :obj:`ValueError`: if formula already set by setting structure attribute
        """
        if not isinstance(value, EmpiricalFormula) and not isinstance(value, str) and value is not None:
            raise ValueError(':obj:`value` is not an instance of :obj:`EmpiricalFormula` or :obj:`str` or None')

        if self.structure is not None:
            raise ValueError('formula already set by setting structure attribute')

        if isinstance(value, str):
            self._formula = EmpiricalFormula(value)
        else:
            self._formula = value

        if isinstance(self._formula, EmpiricalFormula):
            self._mol_wt = self._formula.get_molecular_weight()

    @property
    def mol_wt(self):
        """ Get the molecular weight of the subunit

        Returns:
            :obj:`float` or None: molecular weight of the subunit

        """
        return self._mol_wt

    @mol_wt.setter
    def mol_wt(self, value):
        """ Set the molecular weight of the subunit

        Args:
            value (:obj:`float` or :obj:`int` or :obj:`None`): molecular weight of the subunit

        Raises:
            :obj:`ValueError`: if :obj:`value` is not an instance of :obj:`float` or :obj:`int` or None
            :obj:`ValueError`: if mol_wt already set by setting structure attribute or formula attribute
            :obj:`ValueError`: if mol_wt is not non-negative
        """
        if not isinstance(value, float) and not isinstance(value, int) and value is not None:
            raise ValueError(':obj:`value` is not an instance of :obj:`float` or :obj:`int` or None')

        if self.formula is not None:
            raise ValueError('mol_wt already set by setting structure attribute or formula attribute')

        if isinstance(value, int):
            value = float(value)

        if isinstance(value, float):
            if value < 0:
                raise ValueError('mol_wt must be non-negative')

        self._mol_wt = value

    @property
    def charge(self):
        """ Get the charge of the subunit

        Returns:
            :obj:`int` or None: charge of the subunit

        """
        return self._charge

    @charge.setter
    def charge(self, value):
        """ Set the charge of the subunit

        Args:
            value (:obj:`int` or :obj:`None`): charge of the subunit

        Raises:
            :obj:`ValueError`: if :obj:`value` is not an instance of :obj:`int` or None
            :obj:`ValueError`: if charge already set by setting structure attribute
        """
        if not isinstance(value, int) and value is not None:
            raise ValueError(':obj:`value` is not an instance of :obj:`int` or None')

        if self.structure is not None:
            raise ValueError('charge already set by setting structure attribute')

        self._charge = value

    def __str__(self):
        return str(self.stoichiometry) + ' * ' + self.id

    def is_equal(self, other):
        """ Check if two Subunits are semantically equal

        * Check id and stoichiometry; do not check structure yet

        Args:
            other (:obj:`Subunit`): another Subunit

        Returns:
            :obj:`bool`: :obj:`True`, if the Subunits are semantically equal

        """
        if self is other:
            return True
        if self.__class__ != other.__class__:
            return False

        attrs = ['id', 'stoichiometry']

        for attr in attrs:
            if getattr(self, attr) != getattr(other, attr):
                return False

        return True

    def get_formula(self, formula=None):
        """ Get the empirical formula

        Args:
            formula (:obj:`EmpiricalFormula` or :obj:`None`): Subunit empirical formula per copy

        Returns:
            :obj:`EmpiricalFormula` or None: the empirical formula of the Subunit

        """

        if formula is not None:
            self.formula = formula

        if self.formula is not None:
            return self.formula * self.stoichiometry

        return None

    def get_mol_wt(self, mol_wt=None):
        """ Get the molecular weight

        Args:
            mol_wt (:obj:`float` or :obj:`None`): Subunit molecular weight per copy

        Returns:
            :obj:`float` or None: the molecular weight of the Subunit
        """

        if mol_wt is not None:
            self.mol_wt = mol_wt

        if self.mol_wt is not None:
            return self.mol_wt * self.stoichiometry

        return None

    def get_charge(self, charge=None):
        """ Get the total charge

        Args:
            charge (:obj:`int` or :obj:`None`): Subunit charge per copy

        Returns:
            :obj:`int` or None: the total charge of the Subunit
        """

        if charge is not None:
            self.charge = charge

        if self.charge is not None:
            return self.charge * self.stoichiometry

        return None

    def get_structure(self):
        """ Get an Open Babel molecule of the structure

        Returns:
            :obj:`tuple`:
                * :obj:`openbabel.OBMol`: Open Babel molecule of the structure
                * :obj:`dict` of obj:`dict`: dictionary which maps :obj:`subunit_idx` to
                    atom_maps

        Raises:
            :obj:`ValueError`: Subunit structure is :obj:`None`
        """

        if self.structure is None:
            raise ValueError('Structure is None')

        # join the subunits
        mol = openbabel.OBMol()

        subunit_atom_map = {}
        subunit_idx = 1
        for i in range(self.stoichiometry):

            # get structure
            atom_map = {}
            if isinstance(self.structure, openbabel.OBMol):
                structure = self.structure
                atom_map[1] = {}
                atom_map[1]['monomer'] = {}
                for i_atom in range(structure.NumAtoms()):
                    atom_map[1]['monomer'][i_atom+1] = i_atom+1
            else:
                # structure is a BpForm object
                structure, atom_map = self.structure.get_structure()

            num_atoms = structure.NumAtoms()
            total_atoms = sum(sum(len(y) for y in x.values()) for x in atom_map.values())
            # print(num_atoms, total_atoms)

            mol += structure
            for monomer in atom_map.values():
                for atom_type in monomer.values():
                    for i_atom, atom in atom_type.items():
                        atom_type[i_atom] = atom + num_atoms*(subunit_idx-1)

            subunit_atom_map[subunit_idx] = atom_map
            subunit_idx += 1

        return mol, subunit_atom_map

    def export(self, format='smiles', options=[]):
        """ Export the structure to string

        Args:
            format (:obj:`str`, optional): export format
            options (:obj:`list`, optional): export options

        Returns:
            :obj:`str`: exported string representation of the structure

        """
        if self.structure is None:
            return ''

        return OpenBabelUtils.export(self.get_structure()[0], format=format, options=options)


class Atom(object):
    """ Atom in a crosslink

    Attributes:
        subunit (:obj:`str`): id of subunit
        subunit_idx (:obj:`int`): index of the subunit for homomers
        element (:obj:`str`): code of the element
        position (:obj:`int`): SMILES position of the atom within the compound
        monomer (:obj:`int`): index of parent monomer
        charge (:obj:`int`): charge of the atom
        component_type (:obj:`str`): type of component the atom belongs to:
            either 'monomer' or 'backbone'

    """

    def __init__(self, subunit, element, position, monomer, charge=0, subunit_idx=None, component_type=None):
        """

        Args:
            subunit (:obj:`str`): id of subunit
            element (:obj:`str`): code of the element
            position (:obj:`int`): SMILES position of the atom within the compound
            monomer (:obj:`int`): index of parent monomer
            charge (:obj:`int`, optional): charge of the atom
            subunit_idx (:obj:`int`, optional): index of the subunit for homomers
            component_type (:obj:`str`, optional): type of component the atom belongs to:
                either 'monomer' or 'backbone'
        """

        self.subunit = subunit
        self.subunit_idx = subunit_idx
        self.element = element
        self.position = position
        self.monomer = monomer
        self.charge = charge
        if component_type == 'm':
            self.component_type = 'monomer'
        elif component_type == 'b':
            self.component_type = 'backbone'
        elif component_type is None:
            self.component_type = 'monomer'
        else:
            self.component_type = component_type

    @property
    def subunit(self):
        """ Get the subunit that the atom belongs to

        Returns:
            :obj:`str`: subunit

        """
        return self._subunit

    @subunit.setter
    def subunit(self, value):
        """ Set the subunit that the atom belongs to

        Args:
            value (:obj:`str`): subunit

        Raises:
            :obj:`ValueError`: if :obj:`value` is not an instance of :obj:`str`
        """
        if not isinstance(value, str):
            raise ValueError('`value` must be an instance of `str`')
        self._subunit = value

    @property
    def subunit_idx(self):
        """ Get the index of the homomer of the subunit that the atom belongs to

        Returns:
            :obj:`int`: subunit_idx or None

        """
        return self._subunit_idx

    @subunit_idx.setter
    def subunit_idx(self, value):
        """ Set the index of the homomer of the subunit that the atom belongs to

        Args:
            value (:obj:`int`): subunit

        Raises:
            :obj:`ValueError`: if :obj:`value` is not None or a positive integer
        """
        if value is not None and (not isinstance(value, int) or value < 1):
            raise ValueError('`value` must be a None or a positive integer')
        self._subunit_idx = value

    @property
    def element(self):
        """ Get the element of the atom

        Returns:
            :obj:`str`: element

        """
        return self._element

    @element.setter
    def element(self, value):
        """ Set the element of the atom

        Args:
            value (:obj:`str`): element

        Raises:
            :obj:`ValueError`: if :obj:`value` is not an instance of :obj:`str`
        """
        if not isinstance(value, str):
            raise ValueError('`value` must be an instance of `str`')
        self._element = value

    @property
    def position(self):
        """ Get the position of the atom in the compound

        Returns:
            :obj:`int`: position

        """
        return self._position

    @position.setter
    def position(self, value):
        """ Set the position of the atom in the compound

        Args:
            value (:obj:`int`): position

        Raises:
            :obj:`ValueError`: if :obj:`value` is not a positive :obj:`int`
        """
        if not isinstance(value, int) or value < 1:
            raise ValueError('`value` must be a positive integer')
        self._position = value

    @property
    def monomer(self):
        """ Get the position in the subunit of the monomer that the atom belongs to

        Returns:
            :obj:`int`: monomer position

        """
        return self._monomer

    @monomer.setter
    def monomer(self, value):
        """ Set the position in the subunit of the monomer that the atom belongs to

        Args:
            value (:obj:`int`): monomer position

        Raises:
            :obj:`ValueError`: if `value` is not a positive integer
        """
        if not isinstance(value, int) or value < 1:
            raise ValueError('`value` must be a positive integer')
        self._monomer = value

    @property
    def charge(self):
        """ Get the charge of the atom

        Returns:
            :obj:`int`: charge

        """
        return self._charge

    @charge.setter
    def charge(self, value):
        """ Set the charge of the atom

        Args:
            value (:obj:`int`): charge

        Raises:
            :obj:`ValueError`: if `value` is not an integer
        """
        if not isinstance(value, int):
            raise ValueError('`value` must be an integer')
        self._charge = value

    @property
    def component_type(self):
        """ Get the type of component the atom belongs to

        Returns:
            :obj:`str`: component type

        """
        return self._component_type

    @component_type.setter
    def component_type(self, value):
        """ Set the type of component the atom belongs to

        Raises:
            :obj:`ValueError`: component_type must be either 'monomer' or 'backbone'

        """
        if value not in ['monomer', 'backbone']:
            raise ValueError('`component_type` must be either "monomer" or "backbone"')
        else:
            self._component_type = value

    def __str__(self):
        """ Generate a string representation

        Returns:
            :obj:`str`: string representation
        """

        if self.charge == 0:
            charge = ''
        else:
            charge = '{:+d}'.format(self.charge)

        if self.subunit_idx is None:
            subunit_idx = ''
        else:
            subunit_idx = '(' + str(self.subunit_idx) + ')'
        return '{}{}-{}{}{}{}'.format(self.subunit, subunit_idx, self.monomer, self.element, self.position, charge)

    def is_equal(self, other):
        """ Check if two atoms are semantically equal (belong to the same subunit/monomer and
        have the same element, position, and charge)

        Args:
            other (:obj:`Atom`): another atom

        Returns:
            :obj:`bool`: :obj:`True`, if the atoms are semantically equal

        """
        if self is other:
            return True
        if self.__class__ != other.__class__:
            return False

        attrs = ['subunit', 'element', 'position', 'monomer', 'charge']

        for attr in attrs:
            if getattr(self, attr) != getattr(other, attr):
                return False

        self_subunit_idx = self.subunit_idx if self.subunit_idx is not None else 1
        other_subunit_idx = other.subunit_idx if other.subunit_idx is not None else 1
        if self_subunit_idx != other_subunit_idx:
            return False

        return True


class Crosslink(abc.ABC):
    """ Abstract class of a crosslink between subunits

    """

    @abc.abstractmethod
    def get_l_bond_atoms(self):
        """ Get the left bond atoms

        Returns:
            :obj:`list` of :obj:`Atom`: left bond atoms

        """
        pass

    @abc.abstractmethod
    def get_r_bond_atoms(self):
        """ Get the right bond atoms

        Returns:
            :obj:`list` of :obj:`Atom`: right bond atoms

        """
        pass

    @abc.abstractmethod
    def get_l_displaced_atoms(self):
        """ Get the left displaced atoms

        Returns:
            :obj:`list` of :obj:`Atom`: left displaced atoms

        """
        pass

    @abc.abstractmethod
    def get_r_displaced_atoms(self):
        """ Get the right displaced atoms

        Returns:
            :obj:`list` of :obj:`Atom`: right displaced atoms

        """
        pass

    @abc.abstractmethod
    def get_order(self):
        """ Get the order

        Returns:
            :obj:`BondOrder`: order

        """
        pass

    @abc.abstractmethod
    def get_stereo(self):
        """ Get the stereochemistry

        Returns:
            :obj:`BondStereo`: stereochemistry

        """
        pass

    @abc.abstractmethod
    def __str__(self):
        """Generate a string representation

        Returns:
            :obj:`str`: string representation
        """
        pass

    def is_equal(self, other):
        """ Check if two crosslinks are semantically equal (have the same bond atoms)

        Args:
            other (:obj:`Crosslink`): another crosslink

        Returns:
            :obj:`bool`: :obj:`True`, if the crosslinks are semantically equal

        """

        if self is other:
            return True
        if self.__class__ != other.__class__ and self.__class__.__bases__ != other.__class__.__bases__:
            return False

        attrs = ['l_bond_atoms', 'l_displaced_atoms', 'r_bond_atoms', 'r_displaced_atoms']

        for attr in attrs:
            self_atoms = getattr(self, 'get_'+attr)()
            other_atoms = getattr(other, 'get_'+attr)()
            if len(self_atoms) != len(other_atoms):
                return False
            for self_atom, other_atom in zip(self_atoms, other_atoms):
                if not self_atom.is_equal(other_atom):
                    return False

        if self.get_order() != other.get_order() or self.get_stereo() != other.get_stereo():
            return False

        return True


class InlineCrosslink(Crosslink):
    """ A crosslink between subunits defined inline

    Attributes:
        l_bond_atoms (:obj:`list` of :obj:`Atom`): atoms from the left subunit that bond with the right subunit
        r_bond_atoms (:obj:`list` of :obj:`Atom`): atoms from the right subunit that bond with the left subunit
        l_displaced_atoms (:obj:`list` of :obj:`Atom`): atoms from the left subunit displaced by the crosslink
        r_displaced_atoms (:obj:`list` of :obj:`Atom`): atoms from the right subunit displaced by the crosslink
        order (:obj:`BondOrder`): order
        stereo (:obj:`BondStereo`): stereochemistry
        comments (:obj:`str`): comments
    """

    def __init__(self, l_bond_atoms=None, r_bond_atoms=None, l_displaced_atoms=None, r_displaced_atoms=None,
                 order=BondOrder.single, stereo=None,
                 comments=None):
        """

        Args:
            l_bond_atoms (:obj:`list`): atoms from the left subunit that bond with the right subunit
            r_bond_atoms (:obj:`list`): atoms from the right subunit that bond with the left subunit
            l_displaced_atoms (:obj:`list`): atoms from the left subunit displaced by the crosslink
            r_displaced_atoms (:obj:`list`): atoms from the right subunit displaced by the crosslink
            order (:obj:`BondOrder`, optional): order
            stereo (:obj:`BondStereo`, optional): stereochemistry
            comments (:obj:`str`): comments
        """
        if l_bond_atoms is None:
            self.l_bond_atoms = []
        else:
            self.l_bond_atoms = l_bond_atoms

        if r_bond_atoms is None:
            self.r_bond_atoms = []
        else:
            self.r_bond_atoms = r_bond_atoms

        if l_displaced_atoms is None:
            self.l_displaced_atoms = []
        else:
            self.l_displaced_atoms = l_displaced_atoms

        if r_bond_atoms is None:
            self.r_displaced_atoms = []
        else:
            self.r_displaced_atoms = r_bond_atoms

        self.order = order
        self.stereo = stereo

        self.comments = comments

    @property
    def l_bond_atoms(self):
        """ Get the left bond atoms

        Returns:
            :obj:`list` of :obj:`Atom`: left bond atoms

        """
        return self._l_bond_atoms

    @l_bond_atoms.setter
    def l_bond_atoms(self, value):
        """ Set the left bond atoms

        Args:
            value (:obj:`list` of :obj:`Atom`): left bond atoms

        Raises:
            :obj:`ValueError`: if :obj:`value` is not an instance of :obj:`list`

        """
        if not isinstance(value, list):
            raise ValueError('`value` must be an instance of `list`')
        self._l_bond_atoms = value

    @property
    def r_bond_atoms(self):
        """ Get the right bond atoms

        Returns:
            :obj:`list` of :obj:`Atom`: right bond atoms

        """
        return self._r_bond_atoms

    @r_bond_atoms.setter
    def r_bond_atoms(self, value):
        """ Set the right bond atoms

        Args:
            value (:obj:`list` of :obj:`Atom`): right bond atoms

        Raises:
            :obj:`ValueError`: if :obj:`value` is not an instance of :obj:`list`

        """
        if not isinstance(value, list):
            raise ValueError('`value` must be an instance of `list`')
        self._r_bond_atoms = value

    @property
    def l_displaced_atoms(self):
        """ Get the left displaced atoms

        Returns:
            :obj:`list` of :obj:`Atom`: left displaced atoms

        """
        return self._l_displaced_atoms

    @l_displaced_atoms.setter
    def l_displaced_atoms(self, value):
        """ Set the left displaced atoms

        Args:
            value (:obj:`list` of :obj:`Atom`): left displaced atoms

        Raises:
            :obj:`ValueError`: if :obj:`value` is not an instance of :obj:`list`

        """
        if not isinstance(value, list):
            raise ValueError('`value` must be an instance of `list`')
        self._l_displaced_atoms = value

    @property
    def r_displaced_atoms(self):
        """ Get the right displaced atoms

        Returns:
            :obj:`list` of :obj:`Atom`: right displaced atoms

        """
        return self._r_displaced_atoms

    @r_displaced_atoms.setter
    def r_displaced_atoms(self, value):
        """ Set the right displaced atoms

        Args:
            value (:obj:`list` of :obj:`Atom`): right displaced atoms

        Raises:
            :obj:`ValueError`: if :obj:`value` is not an instance of :obj:`list`

        """
        if not isinstance(value, list):
            raise ValueError('`value` must be an instance of `list`')
        self._r_displaced_atoms = value

    @property
    def order(self):
        """ Get the order

        Returns:
            :obj:`BondOrder`: order
        """
        return self._order

    @order.setter
    def order(self, value):
        """ Set the order

        Args:
            value (:obj:`BondOrder`): order

        Raises:
            :obj:`ValueError`: if `order` is not an instance of `BondOrder`
        """
        if not isinstance(value, BondOrder):
            raise ValueError('`order` must be an instance of `BondOrder`')
        self._order = value

    @property
    def stereo(self):
        """ Get the stereochemistry

        Returns:
            :obj:`BondStereo`: stereochemistry
        """
        return self._stereo

    @stereo.setter
    def stereo(self, value):
        """ Set the stereo

        Args:
            value (:obj:`BondStereo`): stereochemistry

        Raises:
            :obj:`ValueError`: if `stereo` is not an instance of `BondStereo`
        """
        if value is not None and not isinstance(value, BondStereo):
            raise ValueError('`stereo` must be an instance of `BondStereo` or `None`')
        self._stereo = value

    @property
    def comments(self):
        """ Get comments

        Returns:
            :obj:`str`: comments
        """
        return self._comments

    @comments.setter
    def comments(self, value):
        """ Set comments

        Args:
            value (:obj:`str`): comments

        Raises:
            :obj:`ValueError`: if value is not a str or None
        """
        if value and not isinstance(value, str):
            raise ValueError('`comments` must be a string or None')
        self._comments = value

    def __str__(self):
        """Generate a string representation

        Returns:
            :obj:`str`: string representation
        """
        s = 'x-link: ['

        atom_types = ['l_bond_atoms', 'l_displaced_atoms', 'r_bond_atoms', 'r_displaced_atoms']
        for atom_type in atom_types:
            for atom in getattr(self, atom_type):
                s += ' {}: {} |'.format(atom_type[:-1].replace('_', '-'), str(atom))

        if self.order != BondOrder.single:
            s += ' order: "{}" |'.format(self.order.name)
        if self.stereo is not None:
            s += ' stereo: "{}" |'.format(self.stereo.name)

        if self.comments:
            s += ' comments: "{}" |'.format(self.comments.replace('"', '\\"'))

        s = s[:-1] + ']'
        return s

    def get_l_bond_atoms(self):
        """ Get the left bond atoms

        Returns:
            :obj:`list` of :obj:`Atom`: left bond atoms

        """
        return self.l_bond_atoms

    def get_r_bond_atoms(self):
        """ Get the right bond atoms

        Returns:
            :obj:`list` of :obj:`Atom`: right bond atoms

        """
        return self.r_bond_atoms

    def get_l_displaced_atoms(self):
        """ Get the left displaced atoms

        Returns:
            :obj:`list` of :obj:`Atom`: left displaced atoms

        """
        return self.l_displaced_atoms

    def get_r_displaced_atoms(self):
        """ Get the right displaced atoms

        Returns:
            :obj:`list` of :obj:`Atom`: right displaced atoms

        """
        return self.r_displaced_atoms

    def get_order(self):
        """ Get the order

        Returns:
            :obj:`BondOrder`: order
        """
        return self.order

    def get_stereo(self):
        """ Get the stereochemistry

        Returns:
            :obj:`BondStereo`: stereochemistry
        """
        return self.stereo


_xlink_filename = pkg_resources.resource_filename('bpforms', 'xlink/xlink.yml')


@cache.memoize(typed=False, expire=30 * 24 * 60 * 60, filename_args=[0])
def parse_yaml(path):
    """ Read a YAML file

    Args:
        path (:obj:`str`): path to YAML file which defines alphabet

    Returns:
        :obj:`object`: content of file
    """
    yaml_reader = yaml.YAML()
    with open(path, 'rb') as file:
        return yaml_reader.load(file)


class OntologyCrosslink(Crosslink):
    """ A pre-defined crosslink between subunits

    Attributes:
        type (:obj:`str`): type of the pre-defined crosslink
        l_subunit (:obj:`str`): name of the left subunit
        l_subunit_idx (:obj:`int`, optional): index of the left subunit, optional if only one copy of the subunit
        l_monomer (:obj:`int`): index of the monomer from the left subunit
        r_subunit (:obj:`str`): name of the left subunit
        r_subunit_idx (:obj:`int`, optional): index of the left subunit, optional if only one copy of the subunit
        r_monomer (:obj:`int`): index of the monomer from the right subunit
        xlink_details (:obj:`tuple`): detailed information about the abstracted crosslink

        _xlink_atom_parser (:obj:`lark.Lark`): lark grammar parser used to parse atom strings

    """

    def __init__(self, type, l_subunit, l_monomer, r_subunit, r_monomer, l_subunit_idx=None, r_subunit_idx=None):
        """

        Args:
            type (:obj:`str`): type of the pre-defined crosslink
            l_subunit (:obj:`str`): name of the left subunit
            l_subunit_idx (:obj:`int`, optional): index of the left subunit, optional if only one copy of the subunit
            l_monomer (:obj:`int`): index of the monomer from the left subunit
            r_subunit (:obj:`str`): name of the left subunit
            r_subunit_idx (:obj:`int`, optional): index of the left subunit, optional if only one copy of the subunit
            r_monomer (:obj:`int`): index of the monomer from the right subunit

        """

        self.type = type
        self.l_subunit = l_subunit
        self.l_subunit_idx = l_subunit_idx
        self.l_monomer = l_monomer
        self.r_subunit = r_subunit
        self.r_subunit_idx = r_subunit_idx
        self.r_monomer = r_monomer
        self.xlink_details = self.get_details()

    @property
    def type(self):
        """ Get the type of the abstracted crosslink

        Returns:
            :obj:`str`: type of the crosslink

        """
        return self._type

    @type.setter
    def type(self, value):
        """ Set the type of the abstracted crosslink

        Args:
            value (:obj:`str`): type of the crosslink

        Raises:
            :obj:`ValueError`: if :obj:`value` is not an instance of :obj:`str`

        """
        if not isinstance(value, str):
            raise ValueError('`value` must be an instance of `str`')
        self._type = value

    @property
    def l_subunit(self):
        """ Get the name of the left subunit in the crosslink

        Returns:
            :obj:`str`: name of the left subunit

        """
        return self._l_subunit

    @l_subunit.setter
    def l_subunit(self, value):
        """ Set the name of the left subunit in the crosslink

        Args:
            value (:obj:`str`): name of the left subunit

        Raises:
            :obj:`ValueError`: if :obj:`value` is not an instance of :obj:`str`

        """
        if not isinstance(value, str):
            raise ValueError('`value` must be an instance of `str`')
        self._l_subunit = value

    @property
    def l_subunit_idx(self):
        """ Get the index of the left subunit in the crosslink

        Returns:
            :obj:`int`: index of the left subunit

        """
        return self._l_subunit_idx

    @l_subunit_idx.setter
    def l_subunit_idx(self, value):
        """ Set the index of the left subunit in the crosslink

        Args:
            value (:obj:`int` or None): index of the left subunit

        Raises:
            :obj:`ValueError`: if :obj:`value` is not an instance of :obj:`int` or None

        """
        if value is not None and (not isinstance(value, int) or value < 1):
            raise ValueError('`value` must be an instance of `int` or None')
        self._l_subunit_idx = value

    @property
    def l_monomer(self):
        """ Get the index of the left monomer in the crosslink

        Returns:
            :obj:`int`: index of the left monomer

        """
        return self._l_monomer

    @l_monomer.setter
    def l_monomer(self, value):
        """ Set the index of the left monomer in the crosslink

        Args:
            value (:obj:`int`): index of the left monomer

        Raises:
            :obj:`ValueError`: if :obj:`value` is not an instance of :obj:`int`

        """
        if not isinstance(value, int):
            raise ValueError('`value` must be an instance of `int`')
        self._l_monomer = value

    @property
    def r_subunit(self):
        """ Get the name of the right subunit in the crosslink

        Returns:
            :obj:`str`: name of the right subunit

        """
        return self._r_subunit

    @r_subunit.setter
    def r_subunit(self, value):
        """ Set the name of the right subunit in the crosslink

        Args:
            value (:obj:`str`): name of the right subunit

        Raises:
            :obj:`ValueError`: if :obj:`value` is not an instance of :obj:`str`

        """
        if not isinstance(value, str):
            raise ValueError('`value` must be an instance of `str`')
        self._r_subunit = value

    @property
    def r_subunit_idx(self):
        """ Get the index of the right subunit in the crosslink

        Returns:
            :obj:`int`: index of the right subunit

        """
        return self._r_subunit_idx

    @r_subunit_idx.setter
    def r_subunit_idx(self, value):
        """ Set the index of the right subunit in the crosslink

        Args:
            value (:obj:`int` or None): index of the right subunit

        Raises:
            :obj:`ValueError`: if :obj:`value` is not an instance of :obj:`int` or None

        """
        if value is not None and (not isinstance(value, int) or value < 1):
            raise ValueError('`value` must be an instance of `int` or None')
        self._r_subunit_idx = value

    @property
    def r_monomer(self):
        """ Get the index of the right monomer in the crosslink

        Returns:
            :obj:`int`: index of the right monomer

        """
        return self._r_monomer

    @r_monomer.setter
    def r_monomer(self, value):
        """ Set the index of the right monomer in the crosslink

        Args:
            value (:obj:`int`): index of the right monomer

        Raises:
            :obj:`ValueError`: if :obj:`value` is not an instance of :obj:`int`

        """
        if not isinstance(value, int):
            raise ValueError('`value` must be an instance of `int`')
        self._r_monomer = value

    _xlink_atom_parser = lark.Lark("""
        start: atom_element atom_position atom_charge?
        atom_element: /[A-Z][a-z]?/
        atom_position: /[0-9]+/
        atom_charge: /[\+\-][0-9]+/
    """)

    class ParseTreeTransformer(lark.Transformer):
        # Class that processes the parsetree

        @lark.v_args(inline=True)
        def start(self, *args):
            atom_element = args[0][1]
            atom_position = args[1][1]
            if len(args) > 2:
                atom_charge = args[2][1]
            else:
                atom_charge = 0
            return atom_element, atom_position, atom_charge

        @lark.v_args(inline=True)
        def atom_element(self, *args):
            return ('atom_element', args[0].value)

        @lark.v_args(inline=True)
        def atom_position(self, *args):
            return ('atom_position', int(args[0].value))

        @lark.v_args(inline=True)
        def atom_charge(self, *args):
            return ('atom_charge', int(args[0].value))

    def get_l_bond_atoms(self):
        """ Get the left bond atoms

        Returns:
            :obj:`list` of :obj:`Atom`: left bond atoms

        """
        atoms = []
        for atom in self.xlink_details[1]['l_bond_atoms']:
            tree = self._xlink_atom_parser.parse(atom)
            parse_tree_transformer = self.ParseTreeTransformer()
            element, position, charge = parse_tree_transformer.transform(tree)
            atom = Atom(subunit=self.l_subunit, element=element, position=position,
                        monomer=self.l_monomer, charge=charge, subunit_idx=self.l_subunit_idx)
            atoms.append(atom)
        return atoms

    def get_r_bond_atoms(self):
        """ Get the right bond atoms

        Returns:
            :obj:`list` of :obj:`Atom`: right bond atoms

        """
        atoms = []
        for atom in self.xlink_details[1]['r_bond_atoms']:
            tree = self._xlink_atom_parser.parse(atom)
            parse_tree_transformer = self.ParseTreeTransformer()
            element, position, charge = parse_tree_transformer.transform(tree)
            atom = Atom(subunit=self.r_subunit, element=element, position=position,
                        monomer=self.r_monomer, charge=charge, subunit_idx=self.r_subunit_idx)
            atoms.append(atom)
        return atoms

    def get_l_displaced_atoms(self):
        """ Get the left displaced atoms

        Returns:
            :obj:`list` of :obj:`Atom`: left displaced atoms

        """
        atoms = []
        for atom in self.xlink_details[1]['l_displaced_atoms']:
            tree = self._xlink_atom_parser.parse(atom)
            parse_tree_transformer = self.ParseTreeTransformer()
            element, position, charge = parse_tree_transformer.transform(tree)
            atom = Atom(subunit=self.l_subunit, element=element, position=position,
                        monomer=self.l_monomer, charge=charge, subunit_idx=self.l_subunit_idx)
            atoms.append(atom)
        return atoms

    def get_r_displaced_atoms(self):
        """ Get the right displaced atoms

        Returns:
            :obj:`list` of :obj:`Atom`: right displaced atoms

        """
        atoms = []
        for atom in self.xlink_details[1]['r_displaced_atoms']:
            tree = self._xlink_atom_parser.parse(atom)
            parse_tree_transformer = self.ParseTreeTransformer()
            element, position, charge = parse_tree_transformer.transform(tree)
            atom = Atom(subunit=self.r_subunit, element=element, position=position,
                        monomer=self.r_monomer, charge=charge, subunit_idx=self.r_subunit_idx)
            atoms.append(atom)
        return atoms

    def get_order(self):
        """ Get the order

        Returns:
            :obj:`BondOrder`: order
        """
        return BondOrder[self.xlink_details[1].get('order' , 'single')]

    def get_stereo(self):
        """ Get the stereochemistry

        Returns:
            :obj:`BondStereo`: stereochemistry
        """
        val = self.xlink_details[1].get('stereo', None)
        if val is None:
            return None
        else:
            return BondStereo[val]

    def __str__(self):
        """Generate a string representation

        Returns:
            :obj:`str`: string representation
        """
        s = 'x-link: [ type: {} |'.format(self.type)

        if self.l_subunit_idx is None:
            str_l_subunit_idx = ''
        else:
            str_l_subunit_idx = '({})'.format(self.l_subunit_idx)
        s += ' l: {}{}-{} |'.format(self.l_subunit, str_l_subunit_idx, self.l_monomer)

        if self.r_subunit_idx is None:
            str_r_subunit_idx = ''
        else:
            str_r_subunit_idx = '({})'.format(self.r_subunit_idx)
        s += ' r: {}{}-{} |'.format(self.r_subunit, str_r_subunit_idx, self.r_monomer)

        s = s[:-1]+']'
        return s

    def get_details(self):
        """ Get the full details of the crosslink in a dictionary

        Returns:
            :obj:`dict`: detailed information of the crosslink

        Raises:
            :obj:`KeyError`: Unknown abstracted crosslink type

        """
        xlink_detail_dict = parse_yaml(_xlink_filename)
        for xlink_name, xlink_details in xlink_detail_dict.items():
            if self.type == xlink_name:
                return (xlink_name, xlink_details)
            if self.type in xlink_details['synonyms']:
                return (xlink_name, xlink_details)
            if 'name' in xlink_details and self.type == xlink_details['name']:
                return (xlink_name, xlink_details)

        raise KeyError('Unknown abstracted crosslink type')


class BcForm(object):
    """ A form of a macromolecular complex

    Attributes:
        subunits (:obj:`list` of :obj:`Subunit`): subunit composition of the complex
        crosslinks (:obj:`list` :obj:`Crosslink`): crosslinks in the complex

    """

    def __init__(self, subunits=None, crosslinks=None):
        """

        Args:
            subunits (:obj:`list` of :obj:`Subunit` or :obj:`BcForm`, optional): subunit composition of the complex
            crosslinks (:obj:`list` of :obj:`Crosslink`, optional): crosslinks in the complex

            _parser (:obj:`lark.Lark`): lark grammar parser used in `from_str`
        """
        if subunits is None:
            self.subunits = []
        else:
            self.subunits = subunits

        if crosslinks is None:
            self.crosslinks = []
        else:
            self.crosslinks = crosslinks

    @property
    def subunits(self):
        """ Get the subunits

        Returns:
            :obj:`list` of :obj:`Subunit` or :obj:`BcForm`: subunits

        """
        return self._subunits

    @subunits.setter
    def subunits(self, value):
        """ Set the subunits

        Args:
            value (:obj:`list` of :obj:`Subunit` or :obj`BcForm`): subunits

        Raises:
            :obj:`ValueError`: if :obj:`value` is not an instance of :obj:`list`

        """
        if not isinstance(value, list):
            raise ValueError('`value` must be an instance of `list`')
        self._subunits = value

    @property
    def crosslinks(self):
        """ Get the crosslinks

        Returns:
            :obj:`list` of :obj:`Crosslink`: crosslinks

        """
        return self._crosslinks

    @crosslinks.setter
    def crosslinks(self, value):
        """ Set the crosslinks

        Args:
            value (:obj:`list` of :obj:`Crosslink`): crosslinks

        Raises:
            :obj:`ValueError`: if :obj:`value` is not an instance of :obj:`list`

        """
        if not isinstance(value, list):
            raise ValueError('`value` must be an instance of `list`')
        self._crosslinks = value

    def __str__(self):
        """ Generate a string representation

        Returns:
            :obj:`str`: string representation of complex
        """
        s = ''

        # subunits
        for subunit in self.subunits:
            s += str(subunit) + ' + '
        s = s[:-3]

        # crosslinks
        for crosslink in self.crosslinks:
            s += ' | ' + str(crosslink)

        # return string representation
        return s

    # read the grammar file
    _grammar_filename = pkg_resources.resource_filename('bcforms', 'grammar.lark')

    with open(_grammar_filename, 'r') as file:
        _parser = lark.Lark(file.read())

    def from_str(self, string):
        """ Set a complex from a string representation

        Args:
            string (:obj:`str`): string representation of a complex

        Returns:
            :obj:`BcForm`: structured BcForm representation of the string
        """

        class ParseTreeTransformer(lark.Transformer):
            # Class that processes the parsetree

            def __init__(self, bc_form):
                super(ParseTreeTransformer, self).__init__()
                self.bc_form = bc_form

            @lark.v_args(inline=True)
            def start(self, *args):
                self.bc_form.subunits = args[0]
                self.bc_form.crosslinks = []
                if len(args) > 2:
                    # exists global attr (crosslink)
                    self.bc_form.crosslinks = list(args[2::2])
                return self.bc_form

            # complex
            @lark.v_args(inline=True)
            def complex(self, *args):
                return [Subunit(id=x['id'], stoichiometry=x['stoichiometry']) for x in args if type(x) == dict]

            @lark.v_args(inline=True)
            def component(self, *args):
                component_dict = {}
                if len(args) < 2:
                    # handle the case where no explicit coefficient
                    component_dict['stoichiometry'] = 1
                    component_dict[args[0][0]] = args[0][1]
                else:
                    # handle the case where optional coefficient is explicitly put
                    component_dict[args[0][0]] = args[0][1]
                    component_dict[args[1][0]] = args[1][1]

                return component_dict

            @lark.v_args(inline=True)
            def coefficient(self, *args):
                return ('stoichiometry', int(args[0].value))

            @lark.v_args(inline=True)
            def subunit(self, *args):
                return ('id', args[0].value)

            # crosslinks
            @lark.v_args(inline=True)
            def global_attr(self, *args):
                return args[0]

            @lark.v_args(inline=True)
            def crosslink(self, *args):
                for arg in args:
                    if isinstance(arg, Crosslink):
                        return arg

            # ontology-defined crosslink
            @lark.v_args(inline=True)
            def onto_crosslink(self, *args):
                for arg in args:
                    if isinstance(arg, lark.tree.Tree):
                        attr = arg.children[0][0]
                        val = arg.children[0][1]
                        if attr == 'type':
                            type = val
                        elif attr == 'l_monomer':
                            l_subunit = val[0]
                            l_subunit_idx = val[1]
                            l_monomer = val[2]
                        elif attr == 'r_monomer':
                            r_subunit = val[0]
                            r_subunit_idx = val[1]
                            r_monomer = val[2]
                bond = OntologyCrosslink(type=type,
                                         l_subunit=l_subunit, l_subunit_idx=l_subunit_idx, l_monomer=l_monomer,
                                         r_subunit=r_subunit, r_subunit_idx=r_subunit_idx, r_monomer=r_monomer)
                return bond

            @lark.v_args(inline=True)
            def onto_crosslink_type(self, *args):
                return ('type', args[1].value)

            @lark.v_args(inline=True)
            def onto_crosslink_monomer(self, *args):
                num_optional_args = 0
                monomer_type = args[0][1]
                subunit = args[2][1]
                if args[3][0] == 'subunit_idx':
                    subunit_idx = int(args[3][1])
                else:
                    subunit_idx = None
                    num_optional_args += 1
                monomer = int(args[4 - num_optional_args][1])
                return (monomer_type, (subunit, subunit_idx, monomer))

            @lark.v_args(inline=True)
            def onto_crosslink_monomer_type(self, *args):
                return ('onto_crosslink_monomer_type', args[0] + '_monomer')

            # inline crosslink
            @lark.v_args(inline=True)
            def inline_crosslink(self, *args):
                bond = InlineCrosslink()
                for arg in args:
                    if isinstance(arg, lark.tree.Tree):
                        attr, val = arg.children[0]
                        if attr in ['order', 'stereo', 'comments']:
                            setattr(bond, attr, val)
                        else:
                            attr_val_list = getattr(bond, attr + "s")
                            attr_val_list.append(val)
                return bond

            @lark.v_args(inline=True)
            def inline_crosslink_atom(self, *args):
                num_optional_args = 0
                atom_type = args[0][1]
                subunit = args[2][1]
                if args[3][0] == 'subunit_idx':
                    subunit_idx = int(args[3][1])
                else:
                    subunit_idx = None
                    num_optional_args += 1
                monomer = int(args[4-num_optional_args][1])
                element = args[5-num_optional_args][1]
                position = int(args[6-num_optional_args][1])
                if len(args) > 7-num_optional_args:
                    if args[7-num_optional_args][0] == 'atom_component_type':
                        atom_component_type = args[7-num_optional_args][1]
                    else:
                        atom_component_type = None
                        num_optional_args += 1
                else:
                    atom_component_type = None
                if len(args) > 8-num_optional_args:
                    if args[8-num_optional_args][0] == 'atom_charge':
                        charge = int(args[8-num_optional_args][1])
                    else:
                        charge = 0
                else:
                    charge = 0

                return (atom_type, Atom(subunit=subunit, subunit_idx=subunit_idx, element=element,
                                        position=position, monomer=monomer, charge=charge, component_type=atom_component_type))

            @lark.v_args(inline=True)
            def inline_crosslink_atom_type(self, *args):
                return ('inline_crosslink_atom_type', args[0].value + '_' + args[1].value + '_atom')

            @lark.v_args(inline=True)
            def inline_crosslink_order(self, *args):
                return ('order', BondOrder[args[-2].value])

            @lark.v_args(inline=True)
            def inline_crosslink_stereo(self, *args):
                return ('stereo', BondStereo[args[-2].value])

            @lark.v_args(inline=True)
            def inline_crosslink_comments(self, *args):
                return ('comments', args[1].value[1:-1])

            @lark.v_args(inline=True)
            def monomer_position(self, *args):
                return ('monomer_position', int(args[0].value))

            @lark.v_args(inline=True)
            def subunit_idx(self, *args):
                return ('subunit_idx', int(args[0].value[1:-1]))

            @lark.v_args(inline=True)
            def atom_element(self, *args):
                return ('atom_element', args[0].value)

            @lark.v_args(inline=True)
            def atom_position(self, *args):
                return ('atom_position', int(args[0].value))

            @lark.v_args(inline=True)
            def atom_charge(self, *args):
                return ('atom_charge', args[0].value)

            @lark.v_args(inline=True)
            def atom_component_type(self, *args):
                return ('atom_component_type', args[0].value)

        tree = self._parser.parse(string)
        # print(tree.pretty())
        parse_tree_transformer = ParseTreeTransformer(self)
        bc_form = parse_tree_transformer.transform(tree)
        bc_form.clean()
        return bc_form

    def from_set(self, subunits):
        """ Set the subunits from a list of subunits

        Note: this method does not support crosslinks

        Args:
            subunits: (:obj:`list`): list representation of a complex. For example::

                [
                    {'id': 'ABC_A', 'stoichiometry': 2},
                    {'id': 'ABC_B', 'stoichiometry': 3},
                ]

        Returns:
            :obj:`BcForm`: this complex

        Raises:
            :obj:`ValueError`: subunit has no 'id' key
            :obj:`ValueError`: subunit has no 'stoichiometry' key
        """
        self.subunits = []
        self.crosslinks = []

        for subunit in subunits:
            new_subunit = {}

            # process id of subunit
            if 'id' in subunit:
                new_subunit['id'] = subunit['id']
            else:
                raise ValueError('`subunit` has no `id`')

            # process stoichiometry of subunit
            if 'stoichiometry' in subunit:
                new_subunit['stoichiometry'] = subunit['stoichiometry']
            else:
                raise ValueError('`subunit` has no `stoichiometry`')

            self.subunits.append(Subunit(id=new_subunit['id'], stoichiometry=new_subunit['stoichiometry']))

        self.clean()

        return self

    def clean(self):
        """ Clean up the subunits and the crosslinks

        For example, convert `1 * a + 1 * a` to `2 * a`

        """
        subunits_cleaned = []
        subunit_unique_ids = []
        for subunit in self.subunits:
            if isinstance(subunit, Subunit):
                id = subunit.id
                if id not in subunit_unique_ids:
                    subunit_unique_ids.append(id)
                    subunits_cleaned.append(subunit)
                else:
                    next(subunit_cleaned for subunit_cleaned in subunits_cleaned if subunit_cleaned.id == id).stoichiometry += subunit.stoichiometry
            elif isinstance(subunit, BcForm):
                subunit.clean()
                subunits_cleaned.append(subunit)

        self.subunits = subunits_cleaned

    def get_formula(self, subunit_formulas=None):
        """ Get the empirical formula

        * If user wants to calculate formula of nested BcForm, where some subunits
          are BcForm objects, then the subunit BcForms must be able to calculate
          its own formula through structure

        Args:
            subunit_formulas (:obj:`dict` or :obj:`None`): dictionary of subunit ids and empirical formulas

        Returns:
            :obj:`EmpiricalFormula`: the empirical formula of the BcForm

        Raises:
            :obj:`ValueError`: subunit formulas does not include all subunits
            :obj:`ValueError`: Not all subunits have defined formula
        """

        formula = EmpiricalFormula()

        # subunits
        if subunit_formulas is None:
            for subunit in self.subunits:
                if subunit.get_formula() is None:
                    raise ValueError('Not all subunits have defined formula')
                formula += subunit.get_formula()
        else:
            for subunit in self.subunits:
                if isinstance(subunit, BcForm):
                    formula += subunit.get_formula()
                else:
                    if subunit.id not in subunit_formulas:
                        raise ValueError('subunit_formulas must include all subunits')
                    else:
                        formula += subunit.get_formula(formula=subunit_formulas[subunit.id])

        # crosslinks
        for crosslink in self.crosslinks:
            for atom in itertools.chain(crosslink.get_l_displaced_atoms(), crosslink.get_r_displaced_atoms()):
                formula[atom.element] -= 1
        return formula

    def get_mol_wt(self, subunit_mol_wts=None):
        """ Get the molecular weight

        * If user wants to calculate molecular weight of nested BcForm, where
          some subunits are BcForm objects, then the subunit BcForms must be able
          to calculate its own molecular weight through structure

        Args:
            subunit_formulas (:obj:`dict` or :obj:`None`): dictionary of subunit ids and molecular weights

        Returns:
            :obj:`float`: the molecular weight of the BcForm

        Raises:
            :obj:`ValueError`: subunit_mol_wts does not include all subunits
            :obj:`ValueError`: Not all subunits have defined molecular weight
        """
        mol_wt = 0.0

        # subunits
        if subunit_mol_wts is None:
            for subunit in self.subunits:
                if subunit.get_mol_wt() is None:
                    raise ValueError('Not all subunits have defined molecular weight')
                mol_wt += subunit.get_mol_wt()
        else:
            for subunit in self.subunits:
                if isinstance(subunit, BcForm):
                    mol_wt += subunit.get_mol_wt()
                else:
                    if subunit.id not in subunit_mol_wts:
                        raise ValueError('subunit_mol_wts must include all subunits')
                    else:
                        mol_wt += subunit.get_mol_wt(mol_wt=subunit_mol_wts[subunit.id])

        # crosslinks
        for crosslink in self.crosslinks:
            for atom in itertools.chain(crosslink.get_l_displaced_atoms(), crosslink.get_r_displaced_atoms()):
                mol_wt -= EmpiricalFormula(atom.element).get_molecular_weight()

        return mol_wt

    def get_charge(self, subunit_charges=None):
        """ Get the total charge

        * If user wants to calculate charge of nested BcForm, where
          some subunits are BcForm objects, then the subunit BcForms must be able
          to calculate its own charge through structure

        Args:
            subunit_formulas (:obj:`dict` or :obj:`None`): dictionary of subunit ids and charges

        Returns:
            :obj:`int`: the total charge of the BcForm

        Raises:
            :obj:`ValueError`: subunit_charges does not include all subunits
            :obj:`ValueError`: Not all subunits have defined charge
        """
        charge = 0

        # subunits
        if subunit_charges is None:
            for subunit in self.subunits:
                if subunit.get_charge() is None:
                    raise ValueError('Not all subunits have defined charge')
                charge += subunit.get_charge()
        else:
            for subunit in self.subunits:
                if isinstance(subunit, BcForm):
                    charge += subunit.get_charge()
                else:
                    if subunit.id not in subunit_charges:
                        raise ValueError('subunit_charges must include all subunits')
                    else:
                        charge += subunit.get_charge(charge=subunit_charges[subunit.id])

        # crosslinks
        for crosslink in self.crosslinks:
            for atom in itertools.chain(crosslink.get_l_displaced_atoms(), crosslink.get_r_displaced_atoms()):
                charge -= atom.charge

        # return the total charge
        return charge

    def validate(self):
        """ Check if the BcForm is valid

        * Check if the crosslinking subunit is in the subunit list and if the `subunit_idx` is valid

        Returns:
            :obj:`list` of :obj:`str`: list of errors, if any

        """
        errors = []

        # crosslinks
        self_subunits_subunits = [subunit for subunit in self.subunits if isinstance(subunit, Subunit)]
        self_subunits_bcforms = [subunit for subunit in self.subunits if isinstance(subunit, BcForm)]

        atom_types = ['l_bond_atoms', 'l_displaced_atoms', 'r_bond_atoms', 'r_displaced_atoms']
        for i_crosslink, crosslink in enumerate(self.crosslinks):
            for atom_type in atom_types:
                for i_atom, atom in enumerate(getattr(crosslink, 'get_'+atom_type)()):
                    # check if subunit is present
                    if atom.subunit not in [subunit.id for subunit in self_subunits_subunits]:
                        errors.append("'{}[{}]' of crosslink {} must belong to a subunit in self.subunits".format(
                            atom_type, i_atom, i_crosslink + 1))
                    # check subunit index
                    elif atom.subunit_idx is None:
                        if next(subunit for subunit in self_subunits_subunits if subunit.id == atom.subunit).stoichiometry > 1:
                            errors.append("crosslink {} contains multiple subunit '{}', so the subunit_idx of atom '{}[{}]' cannot be None".format(
                                i_crosslink + 1, atom.subunit, atom_type, i_atom))
                    elif atom.subunit_idx > next(subunit for subunit in self_subunits_subunits if subunit.id == atom.subunit).stoichiometry:
                        errors.append("'{}[{}]' of crosslink {} must belong to a subunit whose index is "
                                      "valid in terms of the stoichiometry of the subunit".format(
                                          atom_type, i_atom, i_crosslink + 1))

        for self_subunits_bcform in self_subunits_bcforms:
            errors.extend(self_subunits_bcform.validate())

        return errors

    def is_equal(self, other):
        """ Check if two complexes are semantically equal (same subunits and crosslinks)

        Args:
            other (:obj:`BcForm`): another complex

        Returns:
            :obj:`bool`: :obj:`True`, if the complexes are semantically equal

        """

        if self is other:
            return True
        if self.__class__ != other.__class__:
            return False

        # test subunits
        if len(self.subunits) != len(other.subunits):
            return False
        for self_subunit in self.subunits:
            found = False
            for other_subunit in other.subunits:
                if self_subunit.is_equal(other_subunit):
                    found = True
                    break
            if not found:
                return False

        # test crosslinks
        if len(self.crosslinks) != len(other.crosslinks):
            return False
        for self_crosslink in self.crosslinks:
            found = False
            for other_crosslink in other.crosslinks:
                if self_crosslink.is_equal(other_crosslink):
                    found = True
                    break
            if not found:
                return False

        return True

    def get_subunit_attribute(self, subunit_id, attribute):
        """ Set attribute (stoichiometry, structure) of subunit by id

        Args:
            subunit_id (:obj:`str`): id of subunit
            attribute (:obj:`str`): attribute to set

        Returns:
            :obj:`int` for stoichiometry, :obj:`bpforms.BpForm`, :obj:`openbabel.OBMol`, or None for structure

        Raises:
            :obj:`ValueError`: No Subunit with subunit_id
            :obj:`ValueError`: Invalid attribute
        """

        subunit = next((subunit for subunit in self.subunits if isinstance(subunit, Subunit) and subunit.id == subunit_id), None)
        if subunit is None:
            raise ValueError('No Subunit with subunit_id')

        if attribute not in ['stoichiometry', 'structure', 'formula', 'mol_wt', 'charge']:
            raise ValueError('Invalid attribute')

        return getattr(subunit, attribute)

    def set_subunit_attribute(self, subunit_id, attribute, value):
        """ Set attribute (stoichiometry, structure) of subunit by id

        Args:
            subunit_id (:obj:`str`): id of subunit
            attribute (:obj:`str`): attribute to set
            value (:obj:`int` for stoichiometry, :obj:`bpforms.BpForm`, :obj:`openbabel.OBMol`, or None for structure): value

        Raises:
            :obj:`ValueError`: No Subunit with subunit_id
            :obj:`ValueError`: Invalid attribute
        """

        subunit = next((subunit for subunit in self.subunits if isinstance(subunit, Subunit) and subunit.id == subunit_id), None)
        if subunit is None:
            raise ValueError('No Subunit with subunit_id')

        if attribute not in ['stoichiometry', 'structure', 'formula', 'mol_wt', 'charge']:
            raise ValueError('Invalid attribute')

        setattr(subunit, attribute, value)

    def get_structure(self):
        """ Get an Open Babel molecule of the structure

        Returns:
            :obj:`openbabel.OBMol`: Open Babel molecule of the structure
        """
        mol = openbabel.OBMol()

        atom_maps = []
        n_atoms = [0]

        # subunits
        for i_subunit, subunit in enumerate(self.subunits):
            structure, atom_map = subunit.get_structure()
            mol += structure

            n_atoms.append(n_atoms[-1]+structure.NumAtoms())

            for subunit_map in atom_map.values():
                for monomer in subunit_map.values():
                    for atom_type in monomer.values():
                        for i_atom, atom in atom_type.items():
                            atom_type[i_atom] = atom+n_atoms[i_subunit]
            atom_maps.append(atom_map)

        # print(atom_maps)

        for atom_map in atom_maps:
            for subunit_map in atom_map.values():
                for monomer in subunit_map.values():
                    for atom_type in monomer.values():
                        for i_atom, atom in atom_type.items():
                            atom_type[i_atom] = mol.GetAtom(atom)

        # mol.AddHydrogens()

        # print(atom_maps)
        # for i in range(mol.NumAtoms()):
        #     print(mol.GetAtom(i+1), mol.GetAtom(i+1).GetAtomicNum())

        bonding_hydrogens = []
        # crosslinks
        # get the atoms
        crosslinks_atoms = []
        for crosslink in self.crosslinks:
            crosslink_atoms = {}
            crosslinks_atoms.append(crosslink_atoms)
            for atom_type in ['l_bond_atoms', 'r_bond_atoms', 'l_displaced_atoms', 'r_displaced_atoms']:
                crosslink_atoms[atom_type] = []
                for atom_md in getattr(crosslink, 'get_'+atom_type)():
                    i_subunit = [i for i in range(len(self.subunits)) if self.subunits[i].id == atom_md.subunit][0]
                    subunit_idx = 1 if atom_md.subunit_idx is None else atom_md.subunit_idx
                    atom = atom_maps[i_subunit][subunit_idx][atom_md.monomer][atom_md.component_type][atom_md.position]
                    if atom_md.element == 'H' and atom.GetAtomicNum() != 1:
                        atom = get_hydrogen_atom(atom, bonding_hydrogens, (i_subunit, subunit_idx -
                                                                           1, atom_md.monomer-1, atom_md.component_type))
                    crosslink_atoms[atom_type].append((atom, i_subunit, subunit_idx, atom_md.monomer, atom_md.position, atom_md.charge))

        # print(OpenBabelUtils.export(mol, format='smiles', options=[]))

        # make the crosslink bonds
        for crosslink, atoms in zip(self.crosslinks, crosslinks_atoms):

            for atom, i_subunit, subunit_idx, i_monomer, i_position, atom_charge in itertools.chain(atoms['l_displaced_atoms'], atoms['r_displaced_atoms']):
                if atom:
                    atom_2 = atom_maps[i_subunit][subunit_idx][i_monomer]['monomer'].get(i_position, None)
                    if atom_2 and atom_2.GetIdx() == atom.GetIdx():
                        atom_maps[i_subunit][subunit_idx][i_monomer]['monomer'].pop(i_position)
                    assert mol.DeleteAtom(atom, True)

            for (l_atom, _, _, _, _, l_atom_charge), (r_atom, _, _, _, _, r_atom_charge) in zip(atoms['l_bond_atoms'], atoms['r_bond_atoms']):
                bond = openbabel.OBBond()
                bond.SetBegin(l_atom)
                bond.SetEnd(r_atom)
                bond.SetBondOrder(crosslink.get_order().value)
                stereo = crosslink.get_stereo()
                if stereo is None:
                    pass
                elif stereo == BondStereo.wedge:
                    bond.SetWedge()
                elif stereo == BondStereo.hash:
                    bond.SetHash()
                elif stereo == BondStereo.up:
                    bond.SetUp()
                elif stereo == BondStereo.down:
                    bond.SetDown()
                assert mol.AddBond(bond)

                if l_atom_charge:
                    l_atom.SetFormalCharge(l_atom.GetFormalCharge() + l_atom_charge)

                if r_atom_charge:
                    r_atom.SetFormalCharge(r_atom.GetFormalCharge() + r_atom_charge)

        for atom_map in atom_maps:
            for subunit_map in atom_map.values():
                for monomer in subunit_map.values():
                    for atom_type in monomer.values():
                        for i_atom, atom in atom_type.items():
                            if atom is not None:
                                atom_type[i_atom] = atom.GetIdx()

        return mol, atom_maps

    def export(self, format='smiles', options=[]):
        """ Export the structure to string

        Args:
            format (:obj:`str`, optional): export format
            options (:obj:`list`, optional): export options

        Returns:
            :obj:`str`: exported string representation of the structure

        """
        return OpenBabelUtils.export(self.get_structure()[0], format=format, options=options)

    def get_genomic_image(self, seq_features=None, width=1200, cols=2, nt_per_track=80, **kwargs):
        """ Get a genomic visualization of the :obj:`BpForm`

        Args:
            seq_features (:obj:`dict`): list of features each
                represented by a dictionary with three keys

                * label (:obj:`str`): description of the type of feature
                * color (:obj:`str`): color
                * positions (:obj:`list` of :obj:`list` of :obj:`int`): list of position
                  ranges of the type of feature
            width (:obj:`int`, optional): width
            cols (:obj:`int`, optional): number of columns of polymers
            nt_per_track (:obj:`int`, optional): number of nucleotides per track

        The method also accepts the same arguments as 
            :obj:`bpforms.util.gen_genomic_viz`.

        Returns:
            :obj:`str`: SVG image
        """
        polymers = []
        polymer_labels = {}
        polymer_idxs = {}
        i_tot_subunit = 0
        for subunit in self.subunits:
            if isinstance(subunit.structure, bpforms.BpForm):
                for i_repeat in range(subunit.stoichiometry):
                    polymers.append(subunit.structure)
                    if subunit.stoichiometry == 1:
                        polymer_labels[i_tot_subunit] = subunit.id
                    else:
                        polymer_labels[i_tot_subunit] = '{} ({})'.format(subunit.id, i_repeat + 1)
                    polymer_idxs[(subunit.id, i_repeat)] = len(polymer_idxs)
                    i_tot_subunit += 1

        inter_crosslinks = []
        for crosslink in self.crosslinks:
            if len(crosslink.get_l_bond_atoms()) >= 1:
                l = crosslink.get_l_bond_atoms()[0]
                r = crosslink.get_r_bond_atoms()[0]

                l_subunit = polymer_idxs[(l.subunit, l.subunit_idx - 1)]
                r_subunit = polymer_idxs[(r.subunit, r.subunit_idx - 1)]

                if isinstance(crosslink, OntologyCrosslink):
                    tooltip = crosslink.type
                else:
                    tooltip = None

                inter_crosslinks.append(InlineCrosslink(
                    l_bond_atoms=[Atom(str(l_subunit), l.element, l.position, l.monomer)],
                    r_bond_atoms=[Atom(str(r_subunit), r.element, r.position, r.monomer)],
                    comments=tooltip))

        seq_features = seq_features or []
        flat_seq_features = []
        for seq_feature in seq_features:
            flat_seq_feature = {
                'label': seq_feature['label'],
                'color': seq_feature['color'],
                'positions': {},
            }
            flat_seq_features.append(flat_seq_feature)
            for subunit in self.subunits:
                if subunit.id in seq_feature['positions']:
                    for i_repeat in range(subunit.stoichiometry):
                        flat_seq_feature['positions'][polymer_idxs[(subunit.id, i_repeat)]] = \
                            seq_feature['positions'][subunit.id]

        return gen_genomic_viz(polymers, inter_crosslinks=inter_crosslinks,
                               polymer_labels=polymer_labels, seq_features=flat_seq_features,
                               width=width, cols=cols, nt_per_track=nt_per_track,
                               **kwargs)


def get_hydrogen_atom(parent_atom, bonding_hydrogens, i_monomer):
    """ Get a hydrogen atom attached to a parent atom
    Args:
        parent_atom (:obj:`openbabel.OBAtom`): parent atom
        bonding_hydrogens (:obj:`list`): hydrogens that have already been gotten
        i_monomer (:obj:`int`): index of parent monomer in sequence
    Returns:
        :obj:`openbabel.OBAtom`: hydrogen atom
    """
    for other_atom in openbabel.OBAtomAtomIter(parent_atom):
        if other_atom.GetAtomicNum() == 1:
            tmp = (i_monomer, other_atom.GetIdx())
            if tmp not in bonding_hydrogens:  # hydrogen
                bonding_hydrogens.append(tmp)
                return other_atom
    return None


def draw_xlink(xlink_name, include_all_hydrogens=False, remove_hydrogens=True, show_atom_nums=False,
               l_color=0x00ea4e, r_color=0x00adef, bond_color=0xea4200,
               width=300, height=200, atom_label_font_size=0.6,
               image_format='png', include_xml_header=False):
    """ Generate an image of a crosslink

    Args:
        xlink_name (:obj:`str`): name of xlink
        include_all_hydrogens (:obj:`bool`, optional): if :obj:`True`, show all hydrogens
        remove_hydrogens (:obj:`bool`, optional): if :obj:`True`, remove all hydrogens
        show_atom_nums (:obj:`bool`, optional): if :obj:`True`, show atom numbers
        l_color (:obj:`int`, optional): color of left monomer
        r_color (:obj:`int`, optional): color of right monomer
        bond_color (:obj:`int`, optional): color of crosslinking bond
        width (:obj:`int`, optional): width
        height (:obj:`int`, optional): height
        atom_label_font_size (:obj:`float`, optional): relative font size of atom labels
        image_format (:obj:`str`, optional): format of image
        include_xml_header (:obj:`bool`, optional): if :obj:`True`, include XML header for SVG image

    Returns:
        :obj:`object`: image
    Raises:
        :obj:`KeyError`: Unknown crosslink id
        :obj:`ValueError`: Unknown monomer alphabet
    """
    xlink_detail_dict = parse_yaml(_xlink_filename)
    if xlink_name in xlink_detail_dict:
        xlink_details = xlink_detail_dict[xlink_name]
    else:
        raise KeyError('Unknown crosslink id')

    # create the bcform
    form = BcForm()

    l_monomer_alphabet = xlink_details['l_monomer_alphabet']
    if l_monomer_alphabet == 'protein':
        l_monomer = bpforms.ProteinForm().from_str(xlink_details['l_monomer'])
    elif l_monomer_alphabet == 'dna':
        l_monomer = bpforms.DnaForm().from_str(xlink_details['l_monomer'])
    elif l_monomer_alphabet == 'rna':
        l_monomer = bpforms.RnaForm().from_str(xlink_details['l_monomer'])
    else:
        raise ValueError('Unknown monomer alphabet')

    r_monomer_alphabet = xlink_details['r_monomer_alphabet']
    if r_monomer_alphabet == 'protein':
        r_monomer = bpforms.ProteinForm().from_str(xlink_details['r_monomer'])
    elif r_monomer_alphabet == 'dna':
        r_monomer = bpforms.DnaForm().from_str(xlink_details['r_monomer'])
    elif r_monomer_alphabet == 'rna':
        r_monomer = bpforms.RnaForm().from_str(xlink_details['r_monomer'])
    else:
        raise ValueError('Unknown monomer alphabet')

    form.subunits.append(Subunit(id='l', stoichiometry=1, structure=l_monomer))
    form.subunits.append(Subunit(id='r', stoichiometry=1, structure=r_monomer))
    form.crosslinks.append(OntologyCrosslink(type=xlink_name, l_subunit='l', l_monomer=1, r_subunit='r', r_monomer=1))

    structure, atom_maps = form.get_structure()

    el_table = openbabel.OBElementTable()

    atom_labels = []
    for i_atom in atom_maps[0][1][1]['monomer'].values():
        if structure.GetAtom(i_atom).GetAtomicNum() > 1:
            break
    atom_labels.append({
        'position': i_atom,
        'element': el_table.GetSymbol(structure.GetAtom(i_atom).GetAtomicNum()),
        'label': xlink_details['l_monomer'],
        'color': l_color})

    for i_atom in atom_maps[1][1][1]['monomer'].values():
        if structure.GetAtom(i_atom).GetAtomicNum() > 1:
            break
    atom_labels.append({
        'position': i_atom,
        'element': el_table.GetSymbol(structure.GetAtom(i_atom).GetAtomicNum()),
        'label': xlink_details['r_monomer'],
        'color': r_color})

    atom_sets = []
    for monomer_atom_map, color in zip(atom_maps, [l_color, r_color]):
        positions = []
        elements = []
        for i_atom in monomer_atom_map[1][1]['monomer'].values():
            atom = structure.GetAtom(i_atom)
            if atom:
                positions.append(i_atom)
                elements.append(el_table.GetSymbol(structure.GetAtom(i_atom).GetAtomicNum()))
        atom_sets.append({'positions': positions, 'elements': elements, 'color': color})

    i_l_atom = atom_maps[0][1][1]['monomer'][form.crosslinks[0].get_l_bond_atoms()[0].position]
    i_r_atom = atom_maps[1][1][1]['monomer'][form.crosslinks[0].get_r_bond_atoms()[0].position]

    bond_sets = [{
        'positions': [[i_l_atom, i_r_atom]],
        'elements': [[
            el_table.GetSymbol(structure.GetAtom(i_l_atom).GetAtomicNum()),
            el_table.GetSymbol(structure.GetAtom(i_r_atom).GetAtomicNum()),
        ]],
        'color': bond_color,
    }]

    if include_all_hydrogens:
        structure.AddHydrogens()

    if remove_hydrogens:
        atom_refs = {}
        for i_atom in range(1, structure.NumAtoms() + 1):
            atom_refs[i_atom] = structure.GetAtom(i_atom)

        structure.DeleteHydrogens()

        atoms = [atom for atom in openbabel.OBMolAtomIter(structure)]

        for label in atom_labels:
            label['position'] = atom_refs[label['position']].GetIdx()

        for atom_set in atom_sets:
            atom_set['positions'] = [atom_refs[position].GetIdx() for position in atom_set['positions'] if atom_refs[position] in atoms]
            atom_set['elements'] = [el_table.GetSymbol(structure.GetAtom(position).GetAtomicNum()) for position in atom_set['positions']]

        for bond_set in bond_sets:
            bond_set['positions'] = [[atom_refs[position[0]].GetIdx(), atom_refs[position[1]].GetIdx()]
                                     for position in bond_set['positions']]

    cml = OpenBabelUtils.export(structure, 'cml')

    if not draw_molecule:
        raise ImportError("ChemAxon Marvin must be installed")
    return draw_molecule(cml, 'cml', image_format=image_format,
                         atom_labels=atom_labels, atom_label_font_size=atom_label_font_size,
                         atom_sets=atom_sets, bond_sets=bond_sets,
                         show_atom_nums=show_atom_nums,
                         width=width, height=height, include_xml_header=include_xml_header)