fragmenstein/victor/_victor_base.py
########################################################################################################################
__doc__ = \
"""
Base metho=ods
"""
__author__ = "Matteo Ferla. [Github](https://github.com/matteoferla)"
__email__ = "matteo.ferla@gmail.com"
__date__ = "2020 A.D."
__license__ = "MIT"
__citation__ = ""
from ..version import __version__
########################################################################################################################
import logging, warnings
import re
import os
import time
from typing import List, Union, Optional, Callable, Dict
from rdkit import Chem
from ..m_rmsd import mRMSD
from ..monster import Monster # will become Victor.Monster
from ..settings import default_settings, default_settings_yaml
class _VictorBase:
uses_pyrosetta = True
quick_reanimation = False # thorugh reanimation?
# These will be depraecated
monster_average_position = default_settings['monster_average_position'] # default False
monster_throw_on_discard = default_settings['monster_throw_on_discard'] # default False
monster_mmff_minisation = default_settings['ff_minisation'] # default True
# These will likely stay
constraint_function_type = 'FLAT_HARMONIC'
work_path = default_settings['work_path']
journal = logging.getLogger('Fragmenstein')
journal.setLevel(logging.DEBUG)
# here for ease of subclassing
Monster = Monster
covalent_definitions = [{'residue': 'CYS', 'smiles': '*SC', 'atomnames': ['CONN3', 'SG', 'CB']}]
warhead_definitions = [{'name': 'acrylamide',
'covalent': 'C(=O)CC*', # the N may be secondary etc. so best not do mad substitutions.
'covalent_atomnames': ['CZ', 'OZ', 'CY', 'CX', 'CONN1'],
# OZ needs to tautomerise & h-bond happily.
'noncovalent': 'C(=O)C=C',
'noncovalent_atomnames': ['CZ', 'OZ', 'CY', 'CX']},
{'name': 'chloroacetamide',
'covalent': 'C(=O)C*', # the N may be secondary etc. so best not do mad substitutions.
'covalent_atomnames': ['CY', 'OY', 'CX', 'CONN1'],
# OY needs to tautomerise & h-bond happily.
'noncovalent': 'C(=O)C[Cl]',
'noncovalent_atomnames': ['CY', 'OY', 'CX', 'CLX']
},
{'name': 'nitrile',
'covalent': 'C(=N)*', # zeroth atom is attached to the rest
'covalent_atomnames': ['CX', 'NX', 'CONN1'],
'noncovalent': 'C(#N)', # zeroth atom is attached to the rest
'noncovalent_atomnames': ['CX', 'NX']
},
{'name': 'vinylsulfonamide',
'covalent': 'S(=O)(=O)CC*', # the N may be secondary etc. so best not do mad substitutions.
'covalent_atomnames': ['SZ', 'OZ1', 'OZ2', 'CY', 'CX', 'CONN1'], # OZ tauto
'noncovalent': 'S(=O)(=O)C=C',
'noncovalent_atomnames': ['SZ', 'OZ1', 'OZ2', 'CY', 'CX']
},
{'name': 'bromoalkyne',
'covalent': 'C(=C)*',
'covalent_atomnames': ['CX', 'CY', 'CONN1'],
# OY needs to tautomerise & h-bond happily.
'noncovalent': 'C#C[Br]',
'noncovalent_atomnames': ['CX', 'CY', 'BRX']
},
]
# these may be wrong and need checking.
possible_definitions = [{'name': 'aurothiol', # gold salt
'covalent': 'S[Au]*',
'covalent_atomnames': ['SY', 'AUX', 'CONN1'],
# OY needs to tautomerise & h-bond happily.
'noncovalent': 'S[Au]P(CC)(CC)CC',
'noncovalent_atomnames': ['SY', 'AUX', 'PL', 'CL1', 'CL2', 'CL3', 'CL4', 'CL5', 'CL6']
},
{'name': 'aldehyde',
'covalent': 'C(O)*',
'covalent_atomnames': ['CX', 'OX', 'CONN1'],
'noncovalent': '[C:H1]=O', # this at
'noncovalent_atomnames': ['CX', 'OX']
},
]
_connected_names = ('CONN', 'LOWE', 'UPPE', 'CONN1', 'CONN2', 'CONN3', 'LOWER', 'UPPER')
error_to_catch = () # Exception
remove_other_hetatms = True # remove all protein heteroatoms that are not water or ligand when loaded from PDB
# ================== Init ==========================================================================================
def __init__(self,
hits: List[Chem.Mol],
pdb_filename: Union[None, str] = None,
pdb_block: Union[None, str] = None,
ligand_resn: str = 'LIG',
ligand_resi: Union[int, str, None] = None,
covalent_resn: str = 'CYS', # no other option is accepted.
covalent_resi: Optional[Union[int, str]] = None,
extra_protein_constraint: Union[str] = None,
pose_fx: Optional[Callable] = None,
monster_random_seed: Optional[int] = None,
**settings
) -> object:
"""
Initialise Victor in order to allow either combinations (merging/linking without a given aimed for molecule)
or placements (using a given aimed for molecule).
:param hits: list of rdkit molecules
:param pdb_filename: file of apo structure
:param pdb_block: alternative for above: a string of apo structure
:param ligand_resn: 3 letter code or your choice
:param ligand_resi: Rosetta-style pose(int) or pdb(str)
:param covalent_resn: only CYS accepted. if smiles has no * it is ignored
:param covalent_resi: Rosetta-style pose(int) or pdb(str)
:param extra_protein_constraint: multiline string of constraints relevant to the protein
:param pose_fx: a function to call with pose to tweak or change something before minimising.
:param monster_random_seed: a random seed for rdkit Embedding
:param settings: Not used in base version of Victor
"""
# ## Store
# entry attributes
if pdb_filename:
with open(pdb_filename) as fh:
self.apo_pdbblock = fh.read()
elif pdb_block:
self.apo_pdbblock = pdb_block
else:
raise ValueError('Provide a pdb_filename or pdb_block of the template')
self.hits = hits
self.ligand_resn: str = ligand_resn.upper()
self.ligand_resi: str = ligand_resi if ligand_resi else self._get_empty_resi()
self.covalent_resn = covalent_resn.upper()
self.covalent_resi = covalent_resi
self._correct_covalent_resi() # noqa defined in plonk. todo: split into covalent and anchor residue.
self.extra_constraint = extra_protein_constraint
self.pose_fx = pose_fx
self.random_seed = monster_random_seed
self.settings = {**default_settings, **settings}
self._process_settings()
# this is readied in case user wants to change it:
self.monster = self.Monster(hits,
average_position=self.monster_average_position,
random_seed=self.random_seed)
self.monster.throw_on_discard = self.monster_throw_on_discard
self.monster.linker_element = self.settings.get('linker_element', 'N')
# ## Fill by place and combine differently
self.long_name: str = 'ligand'
self.smiles = None
# ## Filled by place
self.merging_mode = "not_set"
self.custom_map = {} # this will be fixed in monster by `.fix_custom_map`
# ## Filled by combine
self.joining_cutoff = None
# ## Calculated
self.is_covalent = None
self.params = None # this will be the rdkit_to_params.Params instance
self.mol = None # this will be the unminimised mol
self.constraint = None # Rosetta string-form constraint defs
self.modifications = {}
self.unminimized_pdbblock = None
self.igor = None
self.unbound_pose = None
self.minimized_pdbblock = None
self.minimized_mol = None
self.reference_mol = None # filled only for validate
# buffers etc.
self._warned = warnings.catch_warnings() # new one will be made. here for clarify
self.energy_score = {'bound': {'total_score': float('nan')},
'unbound': {'total_score': float('nan')}}
self.mrmsd = mRMSD.mock()
self.ddG = float('nan')
# for debug purposes
self.tick = time.time()
self.tock = float('inf')
self.error_msg = ''
# ----------------- init called methods ----------------------------------------------------------------------------
@classmethod
def slugify(cls, name: str):
return re.sub(r'[\W_.-]+', '-', name)
def _process_settings(self):
# what settings.py are valid?
pass
_VictorBase.__init__.__doc__ += '\n Some arguments can be defined externally: \n'+default_settings_yaml