protons/app/modeller.py from choderalab/protons

protons/app/modeller.py
Summary

Maintainability

5 days
Test Coverage

Issues
# coding=utf-8
"""Pre-configured OpenMM Modeller object for use with the default protons forcefield. Modified from original source code by Peter Eastman."""

from .topology import Topology
from simtk.openmm.app import modeller
from simtk.openmm.vec3 import Vec3
from simtk.openmm import (
    System,
    Context,
    CustomNonbondedForce,
    HarmonicBondForce,
    HarmonicAngleForce,
    VerletIntegrator,
    LocalEnergyMinimizer,
)
from simtk.unit import nanometer, degree, acos, dot, norm
import os
from . import element as elem
import random
from copy import deepcopy


PACKAGE_ROOT = os.path.abspath(os.path.dirname(__file__))
# Topology.loadBondDefinitions(os.path.join(PACKAGE_ROOT, 'data', 'bonds-amber10-constph.xml'))


class Modeller(modeller.Modeller):
    """A modification of the simtk.openmm.app Modeller class.


    Credits
    -------
    The original class was written by Peter Eastman.
    """

    @classmethod
    def unloadHydrogenDefinitions(cls):
        """Unloading hydrogen definitions."""
        cls._residueHydrogens = {}
        cls._hasLoadedStandardHydrogens = False

    def addHydrogens(self, forcefield=None, pH=None, variants=None, platform=None):
        """Add missing hydrogens to the model.

        This function automatically changes compatible residues into their constant-pH variant if no variant is specified.:

        Aspartic acid:
            AS4: Form with a 2 hydrogens on each one of the delta oxygens (syn,anti)
                It has 5 titration states.

            Alternative:
            AS2: Has 2 hydrogens (syn, anti) on one of the delta oxygens
                It has 3 titration states.

        Cysteine:
            CYS: Neutral form with a hydrogen on the sulfur
            CYX: No hydrogen on the sulfur (either negatively charged, or part of a disulfide bond)

        Glutamic acid:
            GL4: Form with a 2 hydrogens on each one of the epsilon oxygens (syn,anti)
                It has 5 titration states.

        Histidine:
            HIP: Positively charged form with hydrogens on both ND1 and NE2
                It has 3 titration states.

        The variant to use for each residue is determined by the following rules:

        1. Any Cysteine that participates in a disulfide bond uses the CYX variant regardless of pH.
        2. Other residues are all set to maximally protonated state, which can be updated using a proton drive

        You can override these rules by explicitly specifying a variant for any residue.  To do that, provide a list for the
        'variants' parameter, and set the corresponding element to the name of the variant to use.

        A special case is when the model already contains a hydrogen that should not be present in the desired variant.
        If you explicitly specify a variant using the 'variants' parameter, the residue will be modified to match the
        desired variant, removing hydrogens if necessary.  On the other hand, for residues whose variant is selected
        automatically, this function will only add hydrogens.  It will never remove ones that are already present in the
        model.

        Definitions for standard amino acids and nucleotides are built in.  You can call loadHydrogenDefinitions() to load
        additional definitions for other residue types.

        Parameters
        ----------
        forcefield : ForceField=None
            the ForceField to use for determining the positions of hydrogens.
            If this is None, positions will be picked which are generally
            reasonable but not optimized for any particular ForceField.
        pH : None,
            Kept for compatibility reasons. Has no effect.
        variants : list=None
            an optional list of variants to use.  If this is specified, its
            length must equal the number of residues in the model.  variants[i]
            is the name of the variant to use for residue i (indexed starting at
            0). If an element is None, the standard rules will be followed to
            select a variant for that residue.
        platform : Platform=None
            the Platform to use when computing the hydrogen atom positions.  If
            this is None, the default Platform will be used.

        Returns
        -------
        list
             a list of what variant was actually selected for each residue,
             in the same format as the variants parameter

        Notes
        -----

        This function does not use a pH specification. The argument is kept for compatibility reasons.

        """
        # Check the list of variants.

        if pH is not None:
            print("Ignored pH argument provided for constant-pH residues.")

        residues = list(self.topology.residues())
        if variants is not None:
            if len(variants) != len(residues):
                raise ValueError(
                    "The length of the variants list must equal the number of residues"
                )
        else:
            variants = [None] * len(residues)
        actualVariants = [None] * len(residues)

        # Load the residue specifications.

        if not Modeller._hasLoadedStandardHydrogens:
            Modeller.loadHydrogenDefinitions(
                os.path.join(
                    os.path.dirname(__file__), "data", "hydrogens-amber10-constph.xml"
                )
            )

        # Make a list of atoms bonded to each atom.

        bonded = {}
        for atom in self.topology.atoms():
            bonded[atom] = []
        for atom1, atom2 in self.topology.bonds():
            bonded[atom1].append(atom2)
            bonded[atom2].append(atom1)

        # Define a function that decides whether a set of atoms form a hydrogen bond, using fairly tolerant criteria.

        def isHbond(d, h, a):
            if norm(d - a) > 0.35 * nanometer:
                return False
            deltaDH = h - d
            deltaHA = a - h
            deltaDH /= norm(deltaDH)
            deltaHA /= norm(deltaHA)
            return acos(dot(deltaDH, deltaHA)) < 50 * degree

        # Loop over residues.

        newTopology = Topology()
        newTopology.setPeriodicBoxVectors(self.topology.getPeriodicBoxVectors())
        newAtoms = {}
        newPositions = [] * nanometer
        newIndices = []
        acceptors = [
            atom
            for atom in self.topology.atoms()
            if atom.element in (elem.oxygen, elem.nitrogen)
        ]
        for chain in self.topology.chains():
            newChain = newTopology.addChain(chain.id)
            for residue in chain.residues():
                newResidue = newTopology.addResidue(residue.name, newChain, residue.id)
                isNTerminal = residue == chain._residues[0]
                isCTerminal = residue == chain._residues[-1]
                if residue.name in Modeller._residueHydrogens:
                    # Add hydrogens.  First select which variant to use.

                    spec = Modeller._residueHydrogens[residue.name]
                    variant = variants[residue.index]
                    if variant is None:
                        if residue.name == "CYS":
                            # If this is part of a disulfide, use CYX.

                            sulfur = [
                                atom
                                for atom in residue.atoms()
                                if atom.element == elem.sulfur
                            ]
                            if len(sulfur) == 1 and any(
                                (atom.residue != residue for atom in bonded[sulfur[0]])
                            ):
                                variant = "CYX"
                        if residue.name == "HIS":
                            variant = "HIP"
                        if residue.name == "GLU":
                            variant = "GL4"
                        if residue.name == "ASP":
                            variant = "AS4"
                    if variant is not None and variant not in spec.variants:
                        raise ValueError(
                            "Illegal variant for %s residue: %s"
                            % (residue.name, variant)
                        )
                    actualVariants[residue.index] = variant
                    removeExtraHydrogens = variants[residue.index] is not None

                    # Make a list of hydrogens that should be present in the residue.

                    parents = [
                        atom
                        for atom in residue.atoms()
                        if atom.element != elem.hydrogen
                    ]
                    parentNames = [atom.name for atom in parents]
                    hydrogens = [
                        h
                        for h in spec.hydrogens
                        if (variant is None)
                        or (h.variants is None)
                        or (h.variants is not None and variant in h.variants)
                    ]
                    hydrogens = [
                        h
                        for h in hydrogens
                        if h.terminal is None
                        or (isNTerminal and h.terminal == "N")
                        or (isCTerminal and h.terminal == "C")
                    ]
                    hydrogens = [h for h in hydrogens if h.parent in parentNames]

                    # Loop over atoms in the residue, adding them to the new topology along with required hydrogens.

                    for parent in residue.atoms():
                        # Check whether this is a hydrogen that should be removed.

                        if (
                            removeExtraHydrogens
                            and parent.element == elem.hydrogen
                            and not any(parent.name == h.name for h in hydrogens)
                        ):
                            continue

                        # Add the atom.

                        newAtom = newTopology.addAtom(
                            parent.name, parent.element, newResidue
                        )
                        newAtoms[parent] = newAtom
                        newPositions.append(deepcopy(self.positions[parent.index]))
                        if parent in parents:
                            # Match expected hydrogens with existing ones and find which ones need to be added.

                            existing = [
                                atom
                                for atom in bonded[parent]
                                if atom.element == elem.hydrogen
                            ]
                            expected = [h for h in hydrogens if h.parent == parent.name]
                            if len(existing) < len(expected):
                                # Try to match up existing hydrogens to expected ones.

                                matches = []
                                for e in existing:
                                    match = [h for h in expected if h.name == e.name]
                                    if len(match) > 0:
                                        matches.append(match[0])
                                        expected.remove(match[0])
                                    else:
                                        matches.append(None)

                                # If any hydrogens couldn't be matched by name, just match them arbitrarily.

                                for i in range(len(matches)):
                                    if matches[i] is None:
                                        matches[i] = expected[-1]
                                        expected.remove(expected[-1])

                                # Add the missing hydrogens.

                                for h in expected:
                                    newH = newTopology.addAtom(
                                        h.name, elem.hydrogen, newResidue
                                    )
                                    newIndices.append(newH.index)
                                    delta = Vec3(0, 0, 0) * nanometer
                                    if len(bonded[parent]) > 0:
                                        for other in bonded[parent]:
                                            delta += (
                                                self.positions[parent.index]
                                                - self.positions[other.index]
                                            )
                                    else:
                                        delta = (
                                            Vec3(
                                                random.random(),
                                                random.random(),
                                                random.random(),
                                            )
                                            * nanometer
                                        )
                                    delta *= 0.1 * nanometer / norm(delta)
                                    delta += (
                                        0.05
                                        * Vec3(
                                            random.random(),
                                            random.random(),
                                            random.random(),
                                        )
                                        * nanometer
                                    )
                                    delta *= 0.1 * nanometer / norm(delta)
                                    newPositions.append(
                                        self.positions[parent.index] + delta
                                    )
                                    newTopology.addBond(newAtom, newH)
                else:
                    # Just copy over the residue.

                    for atom in residue.atoms():
                        newAtom = newTopology.addAtom(
                            atom.name, atom.element, newResidue
                        )
                        newAtoms[atom] = newAtom
                        newPositions.append(deepcopy(self.positions[atom.index]))
        for bond in self.topology.bonds():
            if bond[0] in newAtoms and bond[1] in newAtoms:
                newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])

        # The hydrogens were added at random positions.  Now perform an energy minimization to fix them up.

        if forcefield is not None:
            # Use the ForceField the user specified.

            system = forcefield.createSystem(newTopology, rigidWater=False)
            atoms = list(newTopology.atoms())
            for i in range(system.getNumParticles()):
                if atoms[i].element != elem.hydrogen:
                    # This is a heavy atom, so make it immobile.
                    system.setParticleMass(i, 0)
        else:
            # Create a System that restrains the distance of each hydrogen from its parent atom
            # and causes hydrogens to spread out evenly.

            system = System()
            nonbonded = CustomNonbondedForce("100/((r/0.1)^4+1)")
            bonds = HarmonicBondForce()
            angles = HarmonicAngleForce()
            system.addForce(nonbonded)
            system.addForce(bonds)
            system.addForce(angles)
            bondedTo = []
            for atom in newTopology.atoms():
                nonbonded.addParticle([])
                if atom.element != elem.hydrogen:
                    system.addParticle(0.0)
                else:
                    system.addParticle(1.0)
                bondedTo.append([])
            for atom1, atom2 in newTopology.bonds():
                if atom1.element == elem.hydrogen or atom2.element == elem.hydrogen:
                    bonds.addBond(atom1.index, atom2.index, 0.1, 100_000.0)
                bondedTo[atom1.index].append(atom2)
                bondedTo[atom2.index].append(atom1)
            for residue in newTopology.residues():
                if residue.name == "HOH":
                    # Add an angle term to make the water geometry correct.

                    atoms = list(residue.atoms())
                    oindex = [
                        i for i in range(len(atoms)) if atoms[i].element == elem.oxygen
                    ]
                    if len(atoms) == 3 and len(oindex) == 1:
                        hindex = list(set([0, 1, 2]) - set(oindex))
                        angles.addAngle(
                            atoms[hindex[0]].index,
                            atoms[oindex[0]].index,
                            atoms[hindex[1]].index,
                            1.824,
                            836.8,
                        )
                else:
                    # Add angle terms for any hydroxyls.

                    for atom in residue.atoms():
                        index = atom.index
                        if (
                            atom.element == elem.oxygen
                            and len(bondedTo[index]) == 2
                            and elem.hydrogen in (a.element for a in bondedTo[index])
                        ):
                            angles.addAngle(
                                bondedTo[index][0].index,
                                index,
                                bondedTo[index][1].index,
                                1.894,
                                460.24,
                            )

        if platform is None:
            context = Context(system, VerletIntegrator(0.0))
        else:
            context = Context(system, VerletIntegrator(0.0), platform)
        context.setPositions(newPositions)
        LocalEnergyMinimizer.minimize(context, 1.0, 50)
        self.topology = newTopology
        self.positions = context.getState(getPositions=True).getPositions()
        del context
        return actualVariants


Modeller.loadHydrogenDefinitions(
    os.path.join(PACKAGE_ROOT, "data", "hydrogens-amber10-constph.xml")
)