choderalab/protons

View on GitHub
protons/app/openeye.py

Summary

Maintainability
D
2 days
Test Coverage
"""Adapted from openmoltools: https://github.com/choderalab/openmoltools.

Original license and copyright:
Copyright 2017 Kyle A. Beauchamp, John D. Chodera, David L. Mobley

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

"""
import shutil
import os
import mdtraj as md
from mdtraj.utils import enter_temp_directory
from mdtraj.utils.delay_import import import_
from .amber import run_antechamber
from . import amber_parser
import logging

logger = logging.getLogger(__name__)


def create_ffxml_file(
    gaff_mol2_filenames,
    frcmod_filenames,
    ffxml_filename=None,
    override_mol2_residue_name=None,
):
    """Process multiple gaff mol2 files and frcmod files using the XML conversion and write to an XML file.
    Parameters
    ----------
    gaff_mol2_filenames : list of str
        The names of the gaff mol2 files
    frcmod_filenames : str
        The names of the gaff frcmod files
    ffxml_filename : str, optional, default=None
        Optional name of output ffxml file to generate.  If None, no file
        will be generated.
    override_mol2_residue_name : str, default=None
            If given, use this name to override mol2 residue names.
    Returns
    -------
    ffxml_stringio : str
        StringIO representation of ffxml file containing residue entries for each molecule.
    """

    # Generate ffxml file.
    parser = amber_parser.AmberParser(
        override_mol2_residue_name=override_mol2_residue_name
    )

    amber = import_("openmoltools.amber")
    GAFF_DAT_FILENAME = amber.find_gaff_dat()
    filenames = [GAFF_DAT_FILENAME]
    filenames.extend([filename for filename in gaff_mol2_filenames])
    filenames.extend([filename for filename in frcmod_filenames])

    parser.parse_filenames(filenames)

    ffxml_stream = parser.generate_xml()

    if ffxml_filename is not None:
        outfile = open(ffxml_filename, "w")
        outfile.write(ffxml_stream.read())
        outfile.close()
        ffxml_stream.seek(0)

    return ffxml_stream


# Note: We recommend having every function return *copies* of input, to avoid headaches associated with in-place changes


def assignELF10charges(molecule, max_confs: int = -1, strictStereo=True):
    """
     This function computes atomic partial charges for an OEMol by
     using the ELF10 method

    Parameters:
    -----------
    molecule : OEMol object
        The molecule that needs to be charged
    max_confs : integer
        The max number of conformers used to calculate the atomic partial charges.
        Select -1 to use dense conformers. 
    strictStereo : bool
        a flag used to check if atoms need to have assigned stereo chemistry or not

    Return:
    -------
    mol_copy : OEMol
        a copy of the original molecule with assigned atomic partial charges
    """

    oechem = import_("openeye.oechem")
    if not oechem.OEChemIsLicensed():
        raise (ImportError("Need License for OEChem!"))
    oequacpac = import_("openeye.oequacpac")
    if not oequacpac.OEQuacPacIsLicensed():
        raise (ImportError("Need License for oequacpac!"))

    oeomega = import_("openeye.oeomega")

    mol_copy = molecule.CreateCopy()
    if max_confs < 0:
        omegaOpts = oeomega.OEOmegaOptions(oeomega.OEOmegaSampling_Dense)
        omega = oeomega.OEOmega(omegaOpts)
        omega.SetStrictStereo(strictStereo)
        if not omega(mol_copy):
            raise Exception("Omega failed.")
    else:
        if not mol_copy.GetMaxConfIdx() > max_confs:
            # Generate up to max_confs conformers
            mol_copy = generate_conformers(
                mol_copy, max_confs=max_confs, strictStereo=strictStereo
            )

    # Assign MMFF Atom types
    if not oechem.OEMMFFAtomTypes(mol_copy):
        raise RuntimeError("MMFF atom type assignment returned errors")

    # ELF10 charges
    status = oequacpac.OEAssignCharges(mol_copy, oequacpac.OEAM1BCCELF10Charges())

    if not status:
        raise RuntimeError("OEAssignCharges returned error code %d" % status)

    return mol_copy


def get_charges(
    molecule,
    max_confs=800,
    strictStereo=True,
    normalize=True,
    keep_confs=None,
    legacy=True,
    assign_formal_charges: bool = False,
):
    """Generate charges for an OpenEye OEMol molecule.

    Parameters
    ----------
    molecule : OEMol
        Molecule for which to generate conformers.
        Omega will be used to generate max_confs conformations.
    max_confs : int, optional, default=800
        Max number of conformers to generate
    strictStereo : bool, optional, default=True
        If False, permits smiles strings with unspecified stereochemistry.
        See https://docs.eyesopen.com/omega/usage.html
    normalize : bool, optional, default=True
        If True, normalize the molecule by checking aromaticity, adding
        explicit hydrogens, and renaming by IUPAC name.
    keep_confs : int, optional, default=None
        If None, apply the charges to the provided conformation and return
        this conformation, unless no conformation is present.
        Otherwise, return some or all of the generated
        conformations. If -1, all generated conformations are returned.
        Otherwise, keep_confs = N will return an OEMol with up to N
        generated conformations.  Multiple conformations are still used to
        *determine* the charges.
    legacy : bool, default=True
        If False, uses the new OpenEye charging engine.
        See https://docs.eyesopen.com/toolkits/python/quacpactk/OEProtonFunctions/OEAssignCharges.html#

    assign_formal_charges : default False,
        (Re)assign formal charges for atoms in the molecule.

    Returns
    -------
    charged_copy : OEMol
        A molecule with OpenEye's recommended AM1BCC charge selection scheme.

    Notes
    -----
    Roughly follows
    http://docs.eyesopen.com/toolkits/cookbook/python/modeling/am1-bcc.html
    """

    # If there is no geometry, return at least one conformation.
    if molecule.GetConfs() == 0:
        keep_confs = 1

    oechem = import_("openeye.oechem")
    if not oechem.OEChemIsLicensed():
        raise (ImportError("Need License for OEChem!"))
    oequacpac = import_("openeye.oequacpac")
    if not oequacpac.OEQuacPacIsLicensed():
        raise (ImportError("Need License for oequacpac!"))

    if normalize:
        molecule = normalize_molecule(molecule)
    else:
        molecule = oechem.OEMol(molecule)

    if assign_formal_charges:
        # modifies molecule in place
        oechem.OEAssignFormalCharges(molecule)
        print("")

    charged_copy = generate_conformers(
        molecule, max_confs=max_confs, strictStereo=strictStereo
    )  # Generate up to max_confs conformers

    if not legacy:
        # 2017.2.1 OEToolkits new charging function
        status = oequacpac.OEAssignCharges(charged_copy, oequacpac.OEAM1BCCCharges())
        if not status:
            raise (RuntimeError("OEAssignCharges failed."))
    else:
        # AM1BCCSym recommended by Chris Bayly to KAB+JDC, Oct. 20 2014.
        status = oequacpac.OEAssignPartialCharges(
            charged_copy, oequacpac.OECharges_AM1BCCSym
        )
        if not status:
            raise (
                RuntimeError("OEAssignPartialCharges returned error code %d" % status)
            )

    # Determine conformations to return
    if keep_confs == None:
        # If returning original conformation
        original = molecule.GetCoords()
        # Delete conformers over 1
        for k, conf in enumerate(charged_copy.GetConfs()):
            if k > 0:
                charged_copy.DeleteConf(conf)
        # Copy coordinates to single conformer
        charged_copy.SetCoords(original)
    elif keep_confs > 0:
        logger.debug(
            "keep_confs was set to %s. Molecule positions will be reset." % keep_confs
        )

        # Otherwise if a number is provided, return this many confs if available
        for k, conf in enumerate(charged_copy.GetConfs()):
            if k > keep_confs - 1:
                charged_copy.DeleteConf(conf)
    elif keep_confs == -1:
        # If we want all conformations, continue
        pass
    else:
        # Not a valid option to keep_confs
        raise (ValueError("Not a valid option to keep_confs in get_charges."))

    return charged_copy


def normalize_molecule(molecule):
    """
    Normalize a copy of the molecule by checking aromaticity, adding explicit hydrogens, and
    (if possible) renaming by IUPAC name.

    Parameters
    ----------
    molecule : OEMol
        the molecule to be normalized.

    Returns
    -------
    molcopy : OEMol
        A (copied) version of the normalized molecule

    """
    oechem = import_("openeye.oechem")
    if not oechem.OEChemIsLicensed():
        raise (ImportError("Need License for OEChem!"))
    oeiupac = import_("openeye.oeiupac")
    has_iupac = oeiupac.OEIUPACIsLicensed()

    molcopy = oechem.OEMol(molecule)

    # Assign aromaticity.
    oechem.OEAssignAromaticFlags(molcopy, oechem.OEAroModelOpenEye)

    # Add hydrogens.
    oechem.OEAddExplicitHydrogens(molcopy)

    # Set title to IUPAC name.
    if has_iupac:
        name = oeiupac.OECreateIUPACName(molcopy)
        molcopy.SetTitle(name)

    # Check for any missing atom names, if found reassign all of them.
    if any([atom.GetName() == "" for atom in molcopy.GetAtoms()]):
        oechem.OETriposAtomNames(molcopy)

    return molcopy


def iupac_to_oemol(iupac_name):
    """Create a OEMolBuilder from a iupac name.

    Parameters
    ----------
    iupac_name : str
        IUPAC name of desired molecule.

    Returns
    -------
    molecule : OEMol
        A normalized molecule with desired iupac name.

    """
    oechem = import_("openeye.oechem")
    if not oechem.OEChemIsLicensed():
        raise (ImportError("Need License for OEChem!"))
    oeiupac = import_("openeye.oeiupac")
    if not oeiupac.OEIUPACIsLicensed():
        raise (ImportError("Need License for OEIupac!"))

    # Create an OEMol molecule from IUPAC name.
    molecule = oechem.OEMol()  # create a molecule

    # Populate the MoleCule from the IUPAC name
    if not oeiupac.OEParseIUPACName(molecule, iupac_name):
        raise ValueError(
            "The supplied IUPAC name '%s' could not be parsed." % iupac_name
        )

    molecule = normalize_molecule(molecule)

    return molecule


def smiles_to_oemol(smiles):
    """Create a OEMolBuilder from a smiles string.

    Parameters
    ----------
    smiles : str
        SMILES representation of desired molecule.

    Returns
    -------
    molecule : OEMol
        A normalized molecule with desired smiles string.

    """
    oechem = import_("openeye.oechem")
    if not oechem.OEChemIsLicensed():
        raise (ImportError("Need License for OEChem!"))

    molecule = oechem.OEMol()
    if not oechem.OEParseSmiles(molecule, smiles):
        raise ValueError("The supplied SMILES '%s' could not be parsed." % smiles)

    molecule = normalize_molecule(molecule)

    return molecule


def generate_conformers(
    molecule,
    max_confs=800,
    strictStereo=True,
    ewindow=15.0,
    rms_threshold=1.0,
    strictTypes=True,
):
    """Generate conformations for the supplied molecule

    Parameters
    ----------
    molecule : OEMol
        Molecule for which to generate conformers
    max_confs : int, optional, default=800
        Max number of conformers to generate.  If None, use default OE Value.
    strictStereo : bool, optional, default=True
        If False, permits smiles strings with unspecified stereochemistry.
    strictTypes : bool, optional, default=True
        If True, requires that Omega have exact MMFF types for atoms in molecule; otherwise, allows the closest atom type of the same element to be used.

    Returns
    -------
    molcopy : OEMol
        A multi-conformer molecule with up to max_confs conformers.

    Notes
    -----
    Roughly follows
    http://docs.eyesopen.com/toolkits/cookbook/python/modeling/am1-bcc.html

    """
    oechem = import_("openeye.oechem")
    if not oechem.OEChemIsLicensed():
        raise (ImportError("Need License for OEChem!"))
    oeomega = import_("openeye.oeomega")
    if not oeomega.OEOmegaIsLicensed():
        raise (ImportError("Need License for OEOmega!"))

    molcopy = oechem.OEMol(molecule)
    omega = oeomega.OEOmega()

    # These parameters were chosen to match http://docs.eyesopen.com/toolkits/cookbook/python/modeling/am1-bcc.html
    omega.SetMaxConfs(max_confs)
    omega.SetIncludeInput(True)
    omega.SetCanonOrder(False)

    omega.SetSampleHydrogens(
        True
    )  # Word to the wise: skipping this step can lead to significantly different charges!
    omega.SetEnergyWindow(ewindow)
    omega.SetRMSThreshold(
        rms_threshold
    )  # Word to the wise: skipping this step can lead to significantly different charges!

    omega.SetStrictStereo(strictStereo)
    omega.SetStrictAtomTypes(strictTypes)

    omega.SetIncludeInput(False)  # don't include input
    if max_confs is not None:
        omega.SetMaxConfs(max_confs)

    status = omega(molcopy)  # generate conformation
    if not status:
        raise (RuntimeError("omega returned error code %d" % status))

    return molcopy


def get_names_to_charges(molecule):
    """Return a dictionary of atom names and partial charges, as well as a string representation.

    Parameters
    ----------
    molecule : OEMol
        Molecule for which to grab charges

    Returns
    -------
    data : dictionary
        A dictinoary whose (key, val) pairs are the atom names and partial
        charges, respectively.
    molrepr : str
        A string representation of data
    """

    oechem = import_("openeye.oechem")
    if not oechem.OEChemIsLicensed():
        raise (ImportError("Need License for oechem!"))
    molcopy = oechem.OEMol(molecule)
    molrepr = ""
    data = {}
    for atom in molcopy.GetAtoms():
        name = atom.GetName()
        charge = atom.GetPartialCharge()
        data[name] = charge
        molrepr += "%s %f \n" % (name, charge)
    return data, molrepr


def molecule_to_mol2(
    molecule,
    tripos_mol2_filename=None,
    conformer=0,
    residue_name="MOL",
    standardize=True,
):
    """Convert OE molecule to tripos mol2 file.

    Parameters
    ----------
    molecule : openeye.oechem.OEGraphMol
        The molecule to be converted.
    tripos_mol2_filename : str, optional, default=None
        Output filename.  If None, will create a filename similar to
        name.tripos.mol2, where name is the name of the OE molecule.
    conformer : int, optional, default=0
        Save this frame
    residue_name : str, optional, default="MOL"
        OpenEye writes mol2 files with <0> as the residue / ligand name.
        This chokes many mol2 parsers, so we replace it with a string of
        your choosing.
    standardize: bool, optional, default=True
        Use a high-level writer, which will standardize the molecular properties.
        Set this to false if you wish to retain things such as atom names.
        In this case, a low-level writer will be used.

    Returns
    -------
    tripos_mol2_filename : str
        Filename of output tripos mol2 file

    """

    oechem = import_("openeye.oechem")
    if not oechem.OEChemIsLicensed():
        raise (ImportError("Need License for oechem!"))

    # Get molecule name.
    molecule_name = molecule.GetTitle()
    logger.debug(molecule_name)

    # Write molecule as Tripos mol2.
    if tripos_mol2_filename is None:
        tripos_mol2_filename = molecule_name + ".tripos.mol2"

    ofs = oechem.oemolostream(tripos_mol2_filename)
    ofs.SetFormat(oechem.OEFormat_MOL2H)
    for k, mol in enumerate(molecule.GetConfs()):
        if k == conformer:
            # Standardize will override molecular properties(atom names etc.)
            if standardize:
                oechem.OEWriteMolecule(ofs, mol)
            else:
                oechem.OEWriteMol2File(ofs, mol)

    ofs.close()

    # Replace <0> substructure names with valid text.
    infile = open(tripos_mol2_filename, "r")
    lines = infile.readlines()
    infile.close()
    newlines = [line.replace("<0>", residue_name) for line in lines]
    outfile = open(tripos_mol2_filename, "w")
    outfile.writelines(newlines)
    outfile.close()

    return molecule_name, tripos_mol2_filename


def oemols_to_ffxml(molecules, base_molecule_name="lig"):
    """Generate an OpenMM ffxml object and MDTraj trajectories from multiple OEMols

    Parameters
    ----------
    molecules : list(OEMole)
        Molecules WITH CHARGES.  Each can have multiple conformations.
        WILL GIVE UNDEFINED RESULTS IF NOT CHARGED.
    base_molecule_name : str, optional, default='lig'
        Base name of molecule to use inside parameter files.

    Returns
    -------
    trajectories : list(mdtraj.Trajectory)
        List of MDTraj Trajectories for molecule.  May contain multiple frames
    ffxml : StringIO
        StringIO representation of ffxml file.

    Notes
    -----
    We allow multiple different molecules at once so that they can all be
    included in a single ffxml file, which is currently the only recommended
    way to simulate multiple GAFF molecules in a single simulation.  For most
    applications, you will have just a single molecule:
    e.g. molecules = [my_oemol]
    The resulting ffxml StringIO object can be directly input to OpenMM e.g.
    `forcefield = app.ForceField(ffxml)`

    This will generate a lot of temporary files, so you may want to use
    utils.enter_temp_directory() to avoid clutter.
    """
    all_trajectories = []
    gaff_mol2_filenames = []
    frcmod_filenames = []

    print(os.getcwd())
    for i, molecule in enumerate(molecules):
        trajectories = []
        for j in range(molecule.NumConfs()):
            molecule_name = "%s-%d-%d" % (base_molecule_name, i, j)
            mol2_filename = "./%s.mol2" % molecule_name
            _unused = molecule_to_mol2(molecule, mol2_filename, conformer=j)
            gaff_mol2_filename, frcmod_filename = run_antechamber(
                molecule_name, mol2_filename, charge_method=None
            )  # It's redundant to run antechamber on each frame, fix me later.

            traj = md.load(gaff_mol2_filename)
            trajectories.append(traj)

            if j == 0:  # Only need 1 frame of forcefield files
                gaff_mol2_filenames.append(gaff_mol2_filename)
                frcmod_filenames.append(frcmod_filename)

        # Create a trajectory with all frames of the current molecule
        traj = trajectories[0].join(trajectories[1:])
        all_trajectories.append(traj)

    ffxml = create_ffxml_file(
        gaff_mol2_filenames,
        frcmod_filenames,
        override_mol2_residue_name=base_molecule_name,
    )

    return all_trajectories, ffxml


def smiles_to_antechamber(
    smiles_string,
    gaff_mol2_filename,
    frcmod_filename,
    residue_name="MOL",
    strictStereo=False,
):
    """Build a molecule from a smiles string and run antechamber,
    generating GAFF mol2 and frcmod files from a smiles string.  Charges
    will be generated using the OpenEye QuacPac AM1-BCC implementation.

    Parameters
    ----------
    smiles_string : str
        Smiles string of molecule to construct and charge
    gaff_mol2_filename : str
        Filename of mol2 file output of antechamber, with charges
        created from openeye
    frcmod_filename : str
        Filename of frcmod file output of antechamber.  Most likely
        this file will be almost empty, at least for typical molecules.
    residue_name : str, optional, default="MOL"
        OpenEye writes mol2 files with <0> as the residue / ligand name.
        This chokes many mol2 parsers, so we replace it with a string of
        your choosing.  This might be useful for downstream applications
        if the residue names are required to be unique.
    strictStereo : bool, optional, default=False
        If False, permits smiles strings with unspecified stereochemistry.
        See https://docs.eyesopen.com/omega/usage.html
    """
    oechem = import_("openeye.oechem")
    if not oechem.OEChemIsLicensed():
        raise (ImportError("Need License for oechem!"))

    # Get the absolute path so we can find these filenames from inside a temporary directory.
    gaff_mol2_filename = os.path.abspath(gaff_mol2_filename)
    frcmod_filename = os.path.abspath(frcmod_filename)

    m = smiles_to_oemol(smiles_string)
    m = get_charges(m, strictStereo=strictStereo, keep_confs=1)

    with enter_temp_directory():  # Avoid dumping 50 antechamber files in local directory.
        _unused = molecule_to_mol2(m, "./tmp.mol2", residue_name=residue_name)
        net_charge = oechem.OENetCharge(m)
        tmp_gaff_mol2_filename, tmp_frcmod_filename = run_antechamber(
            "tmp", "./tmp.mol2", charge_method=None, net_charge=net_charge
        )  # USE OE AM1BCC charges!
        shutil.copy(tmp_gaff_mol2_filename, gaff_mol2_filename)
        shutil.copy(tmp_frcmod_filename, frcmod_filename)