choderalab/yank

View on GitHub
docs/yank-yaml-cookbook/combinatorial-experiment.yaml

Summary

Maintainability
Test Coverage
# This example show how to setup a combinatorial experiment. We have
# 2 receptors (each in 2 different conformations) and 2 ligands and we
# want to measure the binding affinities for all possible combination
# of these molecules in both implicit and explicit solvent.

# Three dashes start a YAML script
---

# See the file "all-options.yaml" for the meaning of these options.
# -----------------------------------------------------------------
options:
  verbose: true
  output_dir: test_kinase
  temperature: 310*kelvin
  pressure: 1*atmosphere
  constraints: HBonds
  default_number_of_iterations: 200
  minimize: yes


# In the "molecules" section we specify how we want to our molecules
# to be prepared (i.e. their initial structure and which force
# field parameters to use for them).
# ------------------------------------------------------------------
molecules:
  abl:                                              # Each molecule is identified by an ID, "abl" in this case.
                                                    #
    filepath: !Combinatorial [2HYY.pdb, 3CS9.pdb]   # Lists preceded by "!Combinatorial" here will be expanded
                                                    # combinatorially, so the section "abl" actually specifies
                                                    # two different starting configurations of the kinase Abl.
                                                    #
    strip_protons: yes                              # Let tleap re-add all hydrogen atoms. This is useful if
                                                    # the PDB contains atom names for hydrogen that AMBER does
                                                    # not recognize. The default for this parameter is "no".
  src:
    filepath: !Combinatorial [2OIQ_A.pdb, 3EL7_A.pdb]
    strip_protons: yes

  imatinib:
    filepath: clinical-kinase-inhibitors.csv        # Yank supports pdb, mol2, sdf and csv files. The last one
                                                    # must contain one row for each molecule. The second column
                                                    # must be the SMILES description of the molecule (the first
                                                    # column if the csv file has only one). Yank can use the
                                                    # OpenEye toolkits to generate a molecule from its name or
                                                    # its SMILES description. Instead of using "filepath", you
                                                    # can directly use "smiles" or "name". For example, to
                                                    # specify a benzene molecule you can use either:
                                                    #
                                                    # molecules:
                                                    #   benzene_smiles:
                                                    #     smiles: c1ccccc1
                                                    #     parameters: antechamber
                                                    #   benzene_name:
                                                    #     name: benzene
                                                    #     parameters: antechamber
                                                    #
    select: 0                                       # Select the first row of the csv file which contains the
                                                    # SMILES description of imatinib. Note that the list syntax
                                                    # works everywhere within the "molecules" section, so you
                                                    # could specify both imatinib and bosutinib using
                                                    #
                                                    # molecules:
                                                    #   ligand:
                                                    #     filepath: clinical-kinase-inhibitors.csv
                                                    #     parameters: antechamber
                                                    #     select: [0, 3]
                                                    #
                                                    # The "select" keyword works the same way if you specify a
                                                    # pdb, mol2, or an sdf file containing multiple structures.
                                                    # You can alternatively specify "select: all" which includes
                                                    # all the molecules in the given file. This is also the
                                                    # default value.
                                                    #
    antechamber:                                    # Let antechamber parametrize the molecule using GAFF and
      charge_method: bcc                            # AM1-BCC charges. See antechamber manual for supported
                                                    # charge methods. If OpenEye Toolkits are installed, it is
                                                    # possible to determine the charge with the OpenEye
                                                    # recommended scheme with
                                                    #
                                                    # molecules:
                                                    #   imatinib:
                                                    #     filepath: imatinib.mol2
                                                    #     openeye:
                                                    #       quacpac: am1-bcc
                                                    #     antechamber:
                                                    #       charge_method: null
                                                    #
                                                    # Notice that charge_method is set to null to keep the
                                                    # partial charges determined by openeye.
                                                    #
    epik:                                           # Run Schrodinger's tool Epik with default parameters and
      select: 0                                     # select the most likely protonation state for the molecule
      tautomerize: no                               # (in solution). More control over epik parameters is
      ph: 7.6                                       # possible (see YAML documentation).
      ph_tolerance: 3.0

  bosutinib:
    filepath: molecule_dir/bosutinib.mol2
    leap:                                           # It is possible to optionally include molecule-specific
      parameters: [bosutinib.frcmod, bosutinib.off] # parameters files to import in tleap before creating the
                                                    # system. General parameter files (i.e. leaprc.ff14SB,
                                                    # leaprc.gaff, etc.) are generally specified in the system
                                                    # (see below). To import a single parameter file (i.e. not a
                                                    # list), square brackets are not required.


# Here we specify the parameters of our solvent. The list of
# configuration here is not 100% complete. In general, all parameters
# of simtk.openmm.app.amberprmtopfile.AmberPrmtopFile.createSystem()
# can be specified in this section.
# -----------------------------------------------------------------
solvents:
  RF:                                               # Arbitrary ID for this solvent ("RF" in this case).
    nonbonded_method: CutoffPeriodic                # This specifies an explicit solvent using reaction field.
    nonbonded_cutoff: 1*nanometer                   # Cutoff for interactions is set at 1nm.
    clearance: 10*angstroms                         # The edge of the solvation box will be at a 10 angstroms
                                                    # distance from any atom of the receptor and ligand.
    positive_ion: Na+                               # Neutralizing ions to use.
    negative_ion: Cl-
  vacuum:
    nonbonded_method: NoCutoff

  # We will not use the next two solvents in the experiment
  # but here are two examples of how to define an implicit
  # and a PME solvents with some advanced options supported
  # by OpenMM.
  GBSA:
    nonbonded_method: NoCutoff                      # Implicit solvent using GBSA/OBC2 model.
    implicit_solvent: OBC2
    implicit_solvent_salt_conc: 1.0*mole/liter
    solute_dielectric: 1.5
    solvent_dielectric: 80.0
  PME:
    nonbonded_method: PME
    nonbonded_cutoff: 1*nanometer
    switch_distance: 0.9*nanometer
    ewald_error_tolerance: 0.0003
    clearance: 10*angstroms
    positive_ion: Na+
    negative_ion: Cl-


# Here we describe the system for the calculation in terms of
# components that form the system. Just as in the section
# "molecules", list preceded by "!Combinatorial" in the section
# "systems" will be expanded combinatorially. Here below we have
# 2x2x2x2=16 calculations to run (2 receptors x 2 initial receptor
# structures x 2 ligands x 2 solvent models).
# -----------------------------------------------------------------
systems:                                                 # If you don't need combinations but you only want to run a
  kinase-inhibitor:                                      # single calculation, don't use combinatorial lists. Example:
    receptor: !Combinatorial [abl, src]                  #   receptor: abl
    ligand: !Combinatorial [imatinib, bosutinib]         #   ligand: imatinib
    solvent: !Combinatorial [RF, vacuum]                 #   solvent: RF
    pack: yes                                            # If the ligand is far away from the receptor or if there
                                                         # are clashing atoms (defined as closer than 1.5 angstroms),
                                                         # Yank will randomly translate and rotate the ligand until
                                                         # this is solved. Set this to "no" if you don't wan't to
                                                         # modify the initial position of the ligand as defined in
                                                         # your input file.
    leap:
      parameters: [oldff/leaprc.ff99SBildn, leaprc.gaff] # All our calculations will use the parameters in AMBER
                                                         # ff99SBildn force field and GAFF.

  imatinib-hydration:                                    # System for hydration free energy of imatinib
    solute: imatinib
    solvent1: RF
    solvent2: vacuum
    leap:
      parameters: [leaprc.ff14SB, leaprc.gaff]           # General parameters files for water and imatinib

  # It is also possible to skip the automatic system setup
  # and provide your own system in AMBER or Gromacs format
  amber-system:
    phase1_path: [complex.prmtop, complex.inpcrd]        # System files for the complex phase.
    phase2_path: [solvent.prmtop, solvent.inpcrd]        # System files for the solvent phase.
    ligand_dsl: resname MOL                              # MDTraj DSL string to select the ligand.
    solvent: RF                                          # Specify how to model the solvent.

  gromacs-system:
    phase1_path: [complex.top, complex.gro]
    phase2_path: [solvent.top, solvent.gro]
    gromacs_include_dir: include/                        # Optional path to the directory containing the files
                                                         # included in .top files.
    ligand_dsl: resname MOL
    solvent: RF


# Here we specify the alchemical path of our simulation.
# -----------------------------------------------------------------
protocols:
  absolute-binding:
    complex:
      alchemical_path:
        lambda_electrostatics: [1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0.0, 0.0, 0,0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
        lambda_sterics: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0.0]
    solvent:
      alchemical_path:
        lambda_electrostatics: [1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0.0, 0.0, 0,0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
        lambda_sterics: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0.0]


# Here we specify which experiments to set up and run. Again, like
# in sections "molecules" and "systems". Every list preceeded by
# !Combinatorial will generate multiple experiments. The example
# below would generate 4 experiments (assuming that my-system1 and
# my-system2 were not combinatorial systems as well).
#
#   experiments:
#     system: !Combinatorial [my-system1, my-system2]
#     protocol !Combinatorial [protocol1, protocol2]
#
# -----------------------------------------------------------------
experiments:
  system: kinase-inhibitor
  protocol: absolute-binding
  restraint:                                                 # Optionally, apply a restrain to the ligand to keep
    type: !Combinatorial [Harmonic, FlatBottom]              # it close to the receptor. Possible types are null,
                                                             # Harmonic, FlatBottom, Boresch, and PeriodicTorsionBoresch.
  options:                                                   # All options that can be specified in the "options"
    temperature: !Combinatorial [298.0*kelvin, 307.0*kelvin] # section can be included here.


# It is also possible to specify a sequence of experiments with the syntax
# my-experiment1:
#   system: kinase-inhibitor
#   protocol: absolute-binding
#
# my-experiment2:
#   system: imatinib-hydration
#   protocol: hydration-protocol
#
# experiments: [my-experiment1, my-experiment2]