choderalab/protons

View on GitHub
protons/app/forcefield_generators.py

Summary

Maintainability
D
2 days
Test Coverage
#!/usr/bin/env python
"""
OpenMM ForceField residue template generators.

Adapted from openmoltools: https://github.com/choderalab/openmoltools.

Original license and copyright:
Copyright 2017 Kyle A. Beauchamp, John D. Chodera, David L. Mobley

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

"""

from __future__ import absolute_import

import numpy as np
import os, os.path, sys
from simtk.openmm.app import ForceField
from .amber import run_antechamber
from .openeye import get_charges, assignELF10charges
from simtk.openmm.app.element import Element
import parmed

if sys.version_info >= (3, 0):
    from io import StringIO
    from subprocess import getstatusoutput
else:
    from cStringIO import StringIO
    from commands import getstatusoutput


def generateTopologyFromOEMol(molecule):
    """
    Generate an OpenMM Topology object from an OEMol molecule.

    Parameters
    ----------
    molecule : openeye.oechem.OEMol
        The molecule from which a Topology object is to be generated.

    Returns
    -------
    topology : simtk.openmm.app.Topology
        The Topology object generated from `molecule`.

    """
    # Create a Topology object with one Chain and one Residue.
    from simtk.openmm.app import Topology

    topology = Topology()
    chain = topology.addChain()
    resname = molecule.GetTitle()
    residue = topology.addResidue(resname, chain)

    # Create atoms in the residue.
    for atom in molecule.GetAtoms():
        name = atom.GetName()
        element = Element.getByAtomicNumber(atom.GetAtomicNum())
        atom = topology.addAtom(name, element, residue)

    # Create bonds.
    atoms = {atom.name: atom for atom in topology.atoms()}
    for bond in molecule.GetBonds():
        topology.addBond(atoms[bond.GetBgn().GetName()], atoms[bond.GetEnd().GetName()])

    return topology


def _ensureUniqueAtomNames(molecule):
    """
    Ensure all atom names are unique and not blank.
    If any atom names are degenerate or blank, Tripos atom names are assigned to all atoms.

    Parameters
    ----------
    molecule : openeye.oechem.OEMol
        The molecule to be modified

    """
    from openeye import oechem

    atom_names = set()
    atom_names_are_unique = True
    for atom in molecule.GetAtoms():
        atom_name = atom.GetName()
        if (atom_name in atom_names) or (atom_name == ""):
            atom_names_are_unique = False
        atom_names.add(atom_name)
    if not atom_names_are_unique:
        oechem.OETriposAtomNames(molecule)


def generateOEMolFromTopologyResidue(residue, geometry=False, tripos_atom_names=False):
    """
    Generate an OpenEye OEMol molecule from an OpenMM Topology Residue.

    Parameters
    ----------
    residue : simtk.openmm.app.topology.Residue
        The topology Residue from which an OEMol is to be created.
        An Exception will be thrown if this residue has external bonds.
    geometry : bool, optional, default=False
        If True, will generate a single configuration with OEOmega.
        Note that stereochemistry will be *random*.
    tripos_atom_names : bool, optional, default=False
        If True, will generate and assign Tripos atom names.

    Returns
    -------
    molecule : openeye.oechem.OEMol
        The OEMol molecule corresponding to the topology.
        Atom order will be preserved and bond orders assigned.

    The Antechamber `bondtype` program will be used to assign bond orders, and these
    will be converted back into OEMol bond type assignments.

    Note that there is no way to preserve stereochemistry since `Residue` does
    not note stereochemistry in any way.

    """
    # Raise an Exception if this residue has external bonds.
    if len(list(residue.external_bonds())) > 0:
        raise Exception(
            "Cannot generate an OEMol from residue '%s' because it has external bonds."
            % residue.name
        )

    from openeye import oechem

    # Create OEMol where all atoms have bond order 1.
    molecule = oechem.OEMol()
    molecule.SetTitle(residue.name)  # name molecule after first residue
    for atom in residue.atoms():
        oeatom = molecule.NewAtom(atom.element.atomic_number)
        oeatom.SetName(atom.name)
        oeatom.AddData("topology_index", atom.index)
    oeatoms = {oeatom.GetName(): oeatom for oeatom in molecule.GetAtoms()}
    for (atom1, atom2) in residue.bonds():
        order = 1
        molecule.NewBond(oeatoms[atom1.name], oeatoms[atom2.name], order)

    # Write out a mol2 file without altering molecule.
    import tempfile

    tmpdir = tempfile.mkdtemp()
    mol2_input_filename = os.path.join(tmpdir, "molecule-before-bond-perception.mol2")
    ac_output_filename = os.path.join(tmpdir, "molecule-after-bond-perception.ac")
    ofs = oechem.oemolostream(mol2_input_filename)
    m2h = True
    substruct = False
    oechem.OEWriteMol2File(ofs, molecule, m2h, substruct)
    ofs.close()
    # Run Antechamber bondtype
    import subprocess

    # command = 'bondtype -i %s -o %s -f mol2 -j full' % (mol2_input_filename, ac_output_filename)
    command = "antechamber -i %s -fi mol2 -o %s -fo ac -j 2" % (
        mol2_input_filename,
        ac_output_filename,
    )
    [status, output] = getstatusoutput(command)

    # Define mapping from GAFF bond orders to OpenEye bond orders.
    order_map = {1: 1, 2: 2, 3: 3, 7: 1, 8: 2, 9: 5, 10: 5}
    # Read bonds.
    infile = open(ac_output_filename)
    lines = infile.readlines()
    infile.close()
    antechamber_bond_types = list()
    for line in lines:
        elements = line.split()
        if elements[0] == "BOND":
            antechamber_bond_types.append(int(elements[4]))
    oechem.OEClearAromaticFlags(molecule)
    for (bond, antechamber_bond_type) in zip(
        molecule.GetBonds(), antechamber_bond_types
    ):
        # bond.SetOrder(order_map[antechamber_bond_type])
        bond.SetIntType(order_map[antechamber_bond_type])
    oechem.OEFindRingAtomsAndBonds(molecule)
    oechem.OEKekulize(molecule)
    oechem.OEAssignFormalCharges(molecule)
    oechem.OEAssignAromaticFlags(molecule, oechem.OEAroModelOpenEye)

    # Clean up.
    os.unlink(mol2_input_filename)
    os.unlink(ac_output_filename)
    os.rmdir(tmpdir)

    # Generate Tripos atom names if requested.
    if tripos_atom_names:
        oechem.OETriposAtomNames(molecule)

    # Assign geometry
    if geometry:
        from openeye import oeomega

        omega = oeomega.OEOmega()
        omega.SetMaxConfs(1)
        omega.SetIncludeInput(False)
        omega.SetStrictStereo(False)
        omega(molecule)

    return molecule


def _computeNetCharge(molecule):
    """
    Compute the net formal charge on the molecule.
    Formal charges are assigned by this function.

    Parameters
    ----------
    molecule : openeye.oechem.OEMol
        The molecule for which a net formal charge is to be computed

    Returns
    -------
    net_charge : float
        The net formal charge on the molecule

    """
    from openeye import oechem

    oechem.OEAssignFormalCharges(molecule)
    charges = [atom.GetFormalCharge() for atom in molecule.GetAtoms()]
    net_charge = np.array(charges).sum()
    return net_charge


def _writeMolecule(molecule, output_filename, standardize=True):
    """
    Write the molecule to a file.

    Parameters
    ----------
    molecule : openeye.oechem.OEMol
        The molecule to write (will be modified by writer).
    output_filename : str
        The filename of file to be written; type is autodetected by extension.
    standardize : bool, optional, default=True
        Standardize molecular properties such as atom names in the output file.

    """
    from .openeye import molecule_to_mol2

    molecule_to_mol2(
        molecule,
        tripos_mol2_filename=output_filename,
        conformer=0,
        residue_name=molecule.GetTitle(),
        standardize=standardize,
    )
    # from openeye import oechem
    # ofs = oechem.oemolostream(output_filename)
    # oechem.OEWriteMolecule(ofs, molecule)
    # ofs.close()


def generateResidueTemplate(
    molecule, residue_atoms=None, normalize=True, gaff_version="gaff"
):
    """
    Generate an residue template for simtk.openmm.app.ForceField using GAFF/AM1-BCC.

    This requires the OpenEye toolkit.

    Parameters
    ----------
    molecule : openeye.oechem.OEMol
        The molecule to be parameterized.
        The molecule must have explicit hydrogens.
        Net charge will be inferred from the net formal charge on each molecule.
        Partial charges will be determined automatically using oequacpac and canonical AM1-BCC charging rules.
    residue_atomset : set of OEAtom, optional, default=None
        If not None, only the atoms in this set will be used to construct the residue template
    normalize : bool, optional, default=True
        If True, normalize the molecule by checking aromaticity, adding
        explicit hydrogens, and renaming by IUPAC name.
    gaff_version : str, default = 'gaff'
        One of ['gaff', 'gaff2']; selects which atom types to use.


    Returns
    -------
    template : simtk.openmm.app.forcefield._TemplateData
        Residue template for ForceField using atom types and parameters from `gaff.xml` or `gaff2.xml`.
    additional_parameters_ffxml : str
        Contents of ForceField `ffxml` file defining additional parameters from parmchk(2).

    Notes
    -----
    The residue template will be named after the molecule title.
    This method preserves stereochemistry during AM1-BCC charge parameterization.
    Atom names in molecules will be assigned Tripos atom names if any are blank or not unique.

    """
    # Set the template name based on the molecule title plus a globally unique UUID.
    from uuid import uuid4

    template_name = molecule.GetTitle() + "-" + str(uuid4())

    # If any atom names are not unique, atom names
    _ensureUniqueAtomNames(molecule)

    # Compute net formal charge.
    net_charge = _computeNetCharge(molecule)

    # Generate canonical AM1-BCC charges and a reference conformation.
    molecule = get_charges(
        molecule, strictStereo=False, keep_confs=1, normalize=normalize
    )

    # DEBUG: This may be necessary.
    molecule.SetTitle("MOL")

    # Create temporary directory for running antechamber.
    import tempfile

    tmpdir = tempfile.mkdtemp()
    prefix = "molecule"
    input_mol2_filename = os.path.join(tmpdir, prefix + ".tripos.mol2")
    gaff_mol2_filename = os.path.join(tmpdir, prefix + ".gaff.mol2")
    frcmod_filename = os.path.join(tmpdir, prefix + ".frcmod")

    # Write Tripos mol2 file as antechamber input.
    _writeMolecule(molecule, input_mol2_filename, standardize=normalize)

    # Parameterize the molecule with antechamber.
    run_antechamber(
        template_name,
        input_mol2_filename,
        charge_method=None,
        net_charge=net_charge,
        gaff_mol2_filename=gaff_mol2_filename,
        frcmod_filename=frcmod_filename,
        gaff_version=gaff_version,
    )

    # Read the resulting GAFF mol2 file as a ParmEd structure.
    from openeye import oechem

    ifs = oechem.oemolistream(gaff_mol2_filename)
    ifs.SetFlavor(
        oechem.OEFormat_MOL2,
        oechem.OEIFlavor_MOL2_DEFAULT
        | oechem.OEIFlavor_MOL2_M2H
        | oechem.OEIFlavor_MOL2_Forcefield,
    )
    m2h = True
    oechem.OEReadMolecule(ifs, molecule)
    ifs.close()

    # If residue_atoms = None, add all atoms to the residues
    if residue_atoms == None:
        residue_atoms = [atom for atom in molecule.GetAtoms()]

    # Modify partial charges so that charge on residue atoms is integral.
    residue_charge = 0.0
    sum_of_absolute_charge = 0.0
    for atom in residue_atoms:
        charge = atom.GetPartialCharge()
        residue_charge += charge
        sum_of_absolute_charge += abs(charge)
    excess_charge = residue_charge - net_charge
    if sum_of_absolute_charge == 0.0:
        sum_of_absolute_charge = 1.0
    for atom in residue_atoms:
        charge = atom.GetPartialCharge()
        atom.SetPartialCharge(
            charge + excess_charge * (abs(charge) / sum_of_absolute_charge)
        )

    # Create residue template.
    template = ForceField._TemplateData(template_name)
    for (index, atom) in enumerate(molecule.GetAtoms()):
        atomname = atom.GetName()
        typename = atom.GetType()
        element = Element.getByAtomicNumber(atom.GetAtomicNum())
        charge = atom.GetPartialCharge()
        parameters = {"charge": charge}
        atom_template = ForceField._TemplateAtomData(
            atomname, typename, element, parameters
        )
        template.atoms.append(atom_template)
    for bond in molecule.GetBonds():
        if (bond.GetBgn() in residue_atoms) and (bond.GetEnd() in residue_atoms):
            template.addBondByName(bond.GetBgn().GetName(), bond.GetEnd().GetName())
        elif (bond.GetBgn() in residue_atoms) and (bond.GetEnd() not in residue_atoms):
            template.addExternalBondByName(bond.GetBgn().GetName())
        elif (bond.GetBgn() not in residue_atoms) and (bond.GetEnd() in residue_atoms):
            template.addExternalBondByName(bond.GetEnd().GetName())

    # Generate ffxml file contents for parmchk-generated frcmod output.
    leaprc = StringIO("parm = loadamberparams %s" % frcmod_filename)
    params = parmed.amber.AmberParameterSet.from_leaprc(leaprc)
    params = parmed.openmm.OpenMMParameterSet.from_parameterset(params)
    ffxml = StringIO()
    params.write(ffxml)

    return template, ffxml.getvalue()


def generateForceFieldFromMolecules(
    molecules,
    ignoreFailures=False,
    generateUniqueNames=False,
    normalize=True,
    gaff_version="gaff",
    use_recommended: bool = True,
    omega_max_confs: int = 200,
):
    """
    Generate ffxml file containing additional parameters and residue templates for simtk.openmm.app.ForceField using GAFF/AM1-BCC.

    This requires the OpenEye toolkit.

    Parameters
    ----------
    molecules : list of openeye.oechem.OEMol
        The molecules to be parameterized.
        All molecules must have explicit hydrogens.
        Net charge will be inferred from the net formal charge on each molecule.
        Partial charges will be determined automatically using oequacpac and canonical AM1-BCC charging rules.
    ignoreFailures: bool, optional, default=False
        Determines whether to add a failed molecule to the list of failed molecules (True),
        or raise an Exception (False).
    generateUniqueNames : bool, optional, default=False
        If True, will generate globally unique names for templates.
    normalize : bool, optional, default=True
        If True, normalize the molecule by checking aromaticity, adding
        explicit hydrogens, and renaming by IUPAC name.
    gaff_version : str, default = 'gaff'
        One of ['gaff', 'gaff2']; selects which atom types to use.
    use_recommended : Use the recommended Elf10 charge method.
    omega_max_confs: Max number of conformers for omega.
        Set to -1 use dense conformer mode (for now only works with ELF10 method) helps with AM1BCC convergence issues.

    Returns
    -------
    ffxml : str
        Contents of ForceField `ffxml` file defining additional parameters from parmchk(2) and residue templates.
    failed_molecule_list : list of openeye.oechem.OEMol
        List of the oemols that could not be parameterized. Only returned if ignoreFailures=True

    Notes
    -----
    This method preserves stereochemistry during AM1-BCC charge parameterization.
    Residue template names will be set from molecule names.
    Atom names in molecules will be assigned Tripos atom names if any are blank or not unique.

    """
    if not generateUniqueNames:
        # Check template names are unique.
        template_names = set()
        for molecule in molecules:
            template_name = molecule.GetTitle()
            if template_name == "<0>":
                raise Exception("Molecule '%s' has invalid name" % template_name)
            if template_name in template_names:
                raise Exception(
                    "Molecule '%s' has template name collision." % template_name
                )
            template_names.add(template_name)

    nconf = omega_max_confs

    # Process molecules.
    import tempfile

    tmpdir = tempfile.mkdtemp()
    olddir = os.getcwd()
    os.chdir(tmpdir)
    leaprc = ""
    failed_molecule_list = []
    for (molecule_index, molecule) in enumerate(molecules):
        # Set the template name based on the molecule title.
        if generateUniqueNames:
            from uuid import uuid4

            template_name = molecule.GetTitle() + "-" + str(uuid4())
        else:
            template_name = molecule.GetTitle()

        # If any atom names are not unique, atom names
        _ensureUniqueAtomNames(molecule)

        # Compute net formal charge.
        net_charge = _computeNetCharge(molecule)

        # Generate canonical AM1-BCC charges and a reference conformation.
        if not ignoreFailures:
            if not use_recommended:
                molecule = get_charges(
                    molecule, strictStereo=False, keep_confs=1, normalize=normalize
                )
            else:
                molecule = assignELF10charges(
                    molecule, strictStereo=False, max_confs=nconf
                )
        else:
            try:
                if not use_recommended:
                    molecule = get_charges(
                        molecule, strictStereo=False, keep_confs=1, normalize=normalize
                    )
                else:
                    molecule = assignELF10charges(
                        molecule, strictStereo=False, max_confs=nconf
                    )
            except:
                failed_molecule_list.append(molecule)

        # Create a unique prefix.
        prefix = "molecule%010d" % molecule_index

        # Create temporary directory for running antechamber.
        input_mol2_filename = prefix + ".tripos.mol2"
        gaff_mol2_filename = prefix + ".gaff.mol2"
        frcmod_filename = prefix + ".frcmod"

        # Write Tripos mol2 file as antechamber input.
        _writeMolecule(molecule, input_mol2_filename, standardize=normalize)

        # Parameterize the molecule with antechamber.
        run_antechamber(
            prefix,
            input_mol2_filename,
            charge_method=None,
            net_charge=net_charge,
            gaff_mol2_filename=gaff_mol2_filename,
            frcmod_filename=frcmod_filename,
            gaff_version=gaff_version,
        )

        # Append to leaprc input for parmed.
        leaprc += "%s = loadmol2 %s\n" % (prefix, gaff_mol2_filename)
        leaprc += "loadamberparams %s\n" % frcmod_filename

    # Generate ffxml file contents for parmchk-generated frcmod output.
    leaprc = StringIO(leaprc)
    params = parmed.amber.AmberParameterSet.from_leaprc(leaprc)
    params = parmed.openmm.OpenMMParameterSet.from_parameterset(params)
    ffxml = StringIO()
    params.write(ffxml)

    # TODO: Clean up temporary directory.
    os.chdir(olddir)

    if ignoreFailures:
        return ffxml.getvalue(), failed_molecule_list
    else:
        return ffxml.getvalue()


def createStructureFromResidue(residue):
    # Create ParmEd structure for residue.
    structure = parmed.Structure()
    for a in residue.atoms():
        if a.element is None:
            atom = parmed.ExtraPoint(name=a.name)
        else:
            atom = parmed.Atom(
                atomic_number=a.element.atomic_number, name=a.name, mass=a.element.mass
            )
        structure.add_atom(atom, residue.name, residue.index, "A")
        atommap[a] = atom
    for a1, a2 in topology.bonds():
        structure.bonds.append(Bond(atommap[a1], atommap[a2]))

    return structure


def gaffTemplateGenerator(forcefield, residue, structure=None):
    """
    OpenMM ForceField residue template generator for GAFF/AM1-BCC.

    NOTE: This implementation currently only handles small molecules, not polymeric residues.
    NOTE: We presume we have already loaded the `gaff.xml` force definitions into ForceField.

    Parameters
    ----------
    forcefield : simtk.openmm.app.ForceField
        The ForceField object to which residue templates and/or parameters are to be added.
    residue : simtk.openmm.app.Topology.Residue
        The residue topology for which a template is to be generated.

    Returns
    -------
    success : bool
        If the generator is able to successfully parameterize the residue, `True` is returned.
        If the generator cannot parameterize the residue, it should return `False` and not modify `forcefield`.

    Note that there is no way to preserve stereochemistry since `Residue` does not specify stereochemistry in any way.
    Charge fitting is therefore performed on an indeterminate stereo form.

    """
    # Get a list of external bonds.
    external_bonds = [bond for bond in residue.external_bonds()]
    if len(external_bonds) > 0:
        # We can't parameterize residues with external bonds right now.
        return False

    # Generate an OpenEye OEMol molecule from the Topology Residue.
    molecule = generateOEMolFromTopologyResidue(residue)

    # Generate template and parameters.
    [template, ffxml] = generateResidueTemplate(molecule)

    # Register the template.
    forcefield.registerResidueTemplate(template)

    # Add the parameters.
    forcefield.loadFile(StringIO(ffxml))

    # Signal that we have successfully parameterized the residue.
    return True


class SystemGenerator(object):
    """
    Utility factory to generate OpenMM Systems from Topology objects.

    Parameters
    ----------
    forcefields_to_use : list of string
        List of the names of ffxml files that will be used in system creation.
    forcefield_kwargs : dict of arguments to createSystem, optional
        Allows specification of various aspects of system creation.
    use_gaff : bool, optional, default=True
        If True, will add the GAFF residue template generator.

    Examples
    --------
    >>> from simtk.openmm import app
    >>> forcefield_kwargs={ 'nonbondedMethod' : app.NoCutoff, 'implicitSolvent' : None, 'constraints' : None }
    >>> system_generator = SystemGenerator(['amber99sbildn.xml'], forcefield_kwargs=forcefield_kwargs)
    >>> from openmmtools.testsystems import AlanineDipeptideVacuum
    >>> testsystem = AlanineDipeptideVacuum()
    >>> system = system_generator.createSystem(testsystem.topology)
    """

    def __init__(self, forcefields_to_use, forcefield_kwargs=None, use_gaff=True):
        self._forcefield_xmls = forcefields_to_use
        self._forcefield_kwargs = (
            forcefield_kwargs if forcefield_kwargs is not None else {}
        )
        from simtk.openmm.app import ForceField

        self._forcefield = ForceField(*self._forcefield_xmls)
        if use_gaff:
            self._forcefield.registerTemplateGenerator(gaffTemplateGenerator)

    def getForceField(self):
        """
        Return the associated ForceField object.

        Returns
        -------
        forcefield : simtk.openmm.app.ForceField
            The current ForceField object.
        """
        return self._forcefield

    def createSystem(self, topology):
        """
        Build a system from specified topology object.

        Parameters
        ----------
        topology : simtk.openmm.app.Topology object
            The topology of the system to construct.

        Returns
        -------
        system : openmm.System
            A system object generated from the topology
        """
        system = self._forcefield.createSystem(topology, **self._forcefield_kwargs)
        return system

    @property
    def ffxmls(self):
        return self._forcefield_xmls

    @property
    def forcefield(self):
        return self._forcefield