matteoferla/Fragmenstein

View on GitHub
fragmenstein/victor/_victor_common.py

Summary

Maintainability
D
2 days
Test Coverage
from ._victor_igor import _VictorIgor
from .minimalPDB import MinimalPDBParser
import os, re
from typing import *
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit_to_params import Constraints

# ----------------------------------------------------------------------------------------------------------------------

class _VictorCommon(_VictorIgor):

    def make_output_folder(self):
        os.makedirs(self.work_path, exist_ok=True)
        path = os.path.join(self.work_path, self.long_name)
        if not os.path.exists(path):
            os.mkdir(path)
        else:
            self.journal.warning(f'{self.long_name} - Folder {path} exists.')

    def _save_prerequisites(self):
        self._log_warnings()
        #  saving params
        self.journal.debug(f'{self.long_name} - saving params')
        params_file = os.path.join(self.work_path, self.long_name, self.long_name + '.params')
        self.params.dump(params_file)
        # saving holo
        self.journal.debug(f'{self.long_name} - saving holo (unminised)')
        holo_file = os.path.join(self.work_path, self.long_name, self.long_name + '.holo_unminimised.pdb')
        with open(holo_file, 'w') as w:
            w.write(self.unminimized_pdbblock)
        # saving constraint
        if self.constraint is not None:
            self.journal.debug(f'{self.long_name} - saving constraint')
            constraint_file = os.path.join(self.work_path, self.long_name, self.long_name + '.con')
            self.constraint.dump(constraint_file)
        else:  # basically impossible.
            constraint_file = ''
        return params_file, holo_file, constraint_file

    # =================== Constraint & attachment ======================================================================

    def _get_constraint(self, extra_constraint: Optional[str] = None) -> Union[Constraints, None]:
        # deal with covalent and non covalent separately
        if self.is_covalent:
            self.journal.debug(f'{self.long_name} - is covalent.')
            constraint = self._fix_covalent()
            if extra_constraint:
                constraint.custom_constraint += self.extra_constraint
            return constraint
        else:
            self.journal.debug(f'{self.long_name} - is not covalent.')
            constraint = self._fix_uncovalent()
            if extra_constraint:
                constraint.custom_constraint += self.extra_constraint
            return constraint

    def _fix_uncovalent(self):
        return Constraints.mock()

    def _fix_covalent(self):
        self.journal.debug(f'{self.long_name} - fixing for covalent')
        # to make life easier for analysis, CX is the attachment atom, CY is the one before it.
        war_def = self._get_war_def()
        warhead = Chem.MolFromSmiles(war_def['covalent'])
        self.params.rename_by_substructure(warhead, war_def['covalent_atomnames'])
        cov_def = [d for d in self.covalent_definitions if d['residue'] == self.covalent_resn][0]
        self.journal.debug(f'{self.long_name} - has a {war_def["name"]}')
        cons = Constraints(smiles=(war_def['covalent'], cov_def['smiles']),
                           names=[*war_def['covalent_atomnames'], *cov_def['atomnames']],
                           ligand_res=self.ligand_resi,
                           target_res=self.covalent_resi)
        # user added constraint
        if 'constraint' in war_def:
            cons.custom_constraint = war_def['constraint']
        return cons

    def add_extra_constraint(self, new_constraint:Union[str]=None):
        if new_constraint is None:
            return # do nothing
        new_constraint = new_constraint.strip()
        if self.extra_constraint is None:
            self.extra_constraint = new_constraint
        self.extra_constraint = self.extra_constraint.strip() + '\n' + new_constraint.strip()

    def make_coordinate_constraints_for_placement(self,
                                    mol: Optional[Chem.Mol] = None,
                                    origins: Optional[List[List[str]]] = None,
                                    std: Optional[List[float]] = None,
                                    mx: Optional[List[float]] = None) -> str:
        """
        See also ``make_coordinate_constraints_for_combination`` in combine.
        This is the normal function and uses the origin data,
        while the other constrains based on lack of novel attribute.

        :param mol: self.monster.positioned_mol if ommitted
        :param origins: self.monster.origin_from_mol(self.monster.positioned_mol)  if ommitted
            list of list of names of hit atoms used as original position
        :param std: self.monster.stdev_from_mol(mol)(self.monster.positioned_mol)  if omitted
            list of standard devs
        :param mx: elf.monster.max_from_mol(self.monster.positioned_mol)  if omitted
            list of maximum euclidean distance
        :return:
        """
        lines = []
        if mol is None:
            mol = self.monster.positioned_mol
        if origins is None:
            origins = self.monster.origin_from_mol(mol)
        if std is None:
            std = self.monster.stdev_from_mol(mol)
        if mx is None:
            mx = self.monster.max_from_mol(mol)
        conf = self.monster.positioned_mol.GetConformer()
        # Calculate
        for i in range(mol.GetNumAtoms()):
            if len(origins[i]) > 0:
                atom: Chem.Atom = mol.GetAtomWithIdx(i)
                if atom.GetAtomicNum() < 2:  # noqa
                    # zahl of 0 is *, 1 is H
                    continue
                elif atom.GetPDBResidueInfo() is None:
                    self.journal.critical(f'Atom {i} ({atom.GetSymbol()}) has no name!')
                    continue
                pos = conf.GetAtomPosition(i)
                if self.constraint_function_type.upper() == 'HARMONIC':
                    fxn = f'HARMONIC 0 {std[i] + 1}'
                elif self.constraint_function_type.upper() == 'FLAT_HARMONIC':
                    if len(origins[i]) > 1:
                        fxn = f'FLAT_HARMONIC 0 1.0 {mx[i]}'
                    else:
                        fxn = f'HARMONIC 0 1.0'
                elif self.constraint_function_type.upper() == 'BOUNDED':
                    fxn = f'BOUNDED 0 {mx[i]} 1 0.5 TAG'
                else:
                    raise ValueError(f'{self.constraint_function_type} is not HARMONIC or FADE or BOUNDED')
                atomname = atom.GetPDBResidueInfo().GetName()
                line = f'CoordinateConstraint {atomname} {self.ligand_resi} ' + \
                             f'CA {self.covalent_resi} ' + \
                             f'{pos.x} {pos.y} {pos.z} {fxn}\n'
                if 'nan' in line:
                    # todo: This is serious.
                    self.journal.warning(f'{atomname} lacks coordinates')
                    continue
                lines.append(line)
        return ''.join(lines)

    def make_coordinate_constraints_for_combination(self):
        """
        See also ``cls.make_coordinate_constraints_for_placement``.
        This operates based on ``atom.HasProp('_Novel')``, not origins!
        :return:
        """
        lines = []
        conf: Chem.Conformer = self.monster.positioned_mol.GetConformer()
        atom: Chem.Atom
        for i, atom in enumerate(self.monster.positioned_mol.GetAtoms()):
            if atom.GetAtomicNum() < 2:  # noqa
                # zahl of 0 is *, 1 is H
                continue
            elif atom.HasProp('_Novel') and atom.GetBoolProp('_Novel'):
                continue  # novels
            elif atom.GetPDBResidueInfo() is None:
                self.journal.critical(f'Atom {i} ({atom.GetSymbol()}) has no name!')
                continue
            else:
                pos = conf.GetAtomPosition(i)
                fxn = f'HARMONIC 0 1'  # the other do not make sense here.

                line = f'CoordinateConstraint {atom.GetPDBResidueInfo().GetName()} {self.ligand_resi} ' + \
                             f'CA {self.covalent_resi} ' + \
                             f'{pos.x} {pos.y} {pos.z} {fxn}\n'
                if 'nan' in line:
                    # todo: This is serious.
                    self.journal.warning(f'Atom {i} lacks coordinates')
                    continue
                lines.append(line)
        return ''.join(lines)

    # ------------------------------------------------------------------------------------------------------------------

    def _get_attachment_from_pdbblock_via_pymol(self) -> Union[None, Chem.Mol]:
        """
        DEPRACATED. FORMER CODE FOR ``_get_attachment_from_pdbblock``

        Yes, yes, I see the madness in using pymol to get an atom for rdkit to make a pose for pyrosetta.
        Hence why `find_attachment` will replace it.
        """
        import pymol2

        self.journal.debug(f'{self.long_name} - getting attachemnt atom')
        if not self.covalent_resn:
            return None
        else:
            if isinstance(self.covalent_resi, str):
                resi, chain = re.match('(\d+)(\w)', self.covalent_resi).groups()
                resi = int(resi)
            else:
                resi = self.covalent_resi
                chain = None
            with pymol2.PyMOL() as pymol:
                pymol.cmd.read_pdbstr(self.apo_pdbblock, 'prot')
                if self.covalent_resn == 'CYS':
                    name = 'SG'
                else:
                    raise NotImplementedError('only done for cys atm')
                try:
                    if chain is not None:
                        pdb = pymol.cmd.get_pdbstr(f'resi {resi} and name {name} and chain {chain}')
                    else:
                        pdb = pymol.cmd.get_pdbstr(f'resi {resi} and name {name}')
                except KeyboardInterrupt as err:
                    raise err
                except:
                    pdb = pymol.cmd.get_pdbstr(f'resi {resi} and name {name}')
                return Chem.MolFromPDBBlock(pdb)

    def _get_attachment_from_pdbblock(self) -> Union[None, Chem.Mol]:
        """
        ``find_attachment`` finds the actual attachment atom not the stated one.
        The historic code for this is now ``_get_attachment_from_pdbblock_via_pymol``
        :return:
        """
        if not self.covalent_resn:
            return None
        if self.covalent_resn == 'CYS':
            name = 'SG'
        else:
            raise NotImplementedError('only done for cys atm')
        if isinstance(self.covalent_resi, str):
            resi, chain = re.match('(\d+)(\w)', self.covalent_resi).groups()
            resi = int(resi)
        else:
            resi = self.covalent_resi
            chain = None
        parser = MinimalPDBParser(self.apo_pdbblock)
        for atom_row in parser.coordinates:
            if parser.get_residue_index(atom_row) != resi:
                continue
            if chain is not None and parser.get_chain(atom_row) != chain:
                continue
            if parser.get_atomname(atom_row) == name.strip():
                return Chem.MolFromPDBBlock(atom_row)
        return None

    def _get_war_def(self):
        for war_def in self.warhead_definitions:
            warhead = Chem.MolFromSmiles(war_def['covalent'])
            if self.mol.HasSubstructMatch(warhead):
                return war_def
        else:
            if self.mol.HasSubstructMatch(Chem.MolFromSmiles('*C')):
                self.journal.warning('Unknown type of covalent')
                return {'name': 'unknown',
                        'covalent': 'C~C*',
                        'covalent_atomnames': ['CY', 'CX', 'CONN1'],
                        'noncovalent': 'C~[C+]',  # clearly not
                        'noncovalent_atomnames': ['CY', 'CX']
                        }
            else:
                raise ValueError(f'{self.long_name} - Unsure what the warhead is {self.smiles}.')

    @classmethod
    def inventorize_warheads(cls, hits: List[Chem.Mol], covalent_form: bool = True) -> List[str]:
        """
        Get the warhead types of the list of hits

        :param hits:
        :param covalent_form: Are the hits already covalent (with *)
        :return: list of non-covalent, chloroacetimide, etc.
        """
        inventory = ['noncovalent'] * len(hits)
        for war_def in cls.warhead_definitions:
            wh = cls._get_warhead_from_definition(war_def, covalent_form)
            for i, hit in enumerate(hits):
                if hit.HasSubstructMatch(wh):
                    inventory[i] = war_def['name']
        return inventory

    @classmethod
    def _get_warhead_from_name(cls, warhead_name: str, covalent_form: bool) -> Chem.Mol:
        """
        get_warhead_definition returns a definition, this retursn a mol.

        :param warhead_name:
        :param covalent_form:
        :return:
        """
        war_def = cls.get_warhead_definition(warhead_name)
        wh = cls._get_warhead_from_definition(war_def, covalent_form)
        return wh

    @classmethod
    def _get_warhead_from_definition(cls, war_def: dict, covalent_form: bool):
        if covalent_form:
            wh = Chem.MolFromSmiles(war_def['covalent'])
        else:
            wh = Chem.MolFromSmiles(war_def['noncovalent'])
        return wh

    @classmethod
    def get_warhead_definition(cls, warhead_name: str):
        return cls._get_warhead_definitions(warhead_name)[0]

    @classmethod
    def _get_warhead_definitions(cls, warhead_name: str):
        """
        It is unlikely that alternative definitions are present. hence why hidden method.

        :param warhead_name:
        :return:
        """
        options = [wd for wd in cls.warhead_definitions if wd['name'] == warhead_name.lower()]
        if len(options) == 0:
            raise ValueError(f'{warhead_name} is not valid.')
        else:
            return options

    @classmethod
    def make_all_warhead_combinations(cls, smiles: str, warhead_name: str, canonical=True) -> Union[dict, None]:
        """
        Convert a unreacted warhead to a reacted one in the SMILES

        :param smiles: unreacted SMILES
        :param warhead_name: name in the definitions
        :param canonical: the SMILES canonical? (makes sense...)
        :return: dictionary of SMILES
        """
        mol = Chem.MolFromSmiles(smiles)
        war_def = cls.get_warhead_definition(warhead_name)
        ncv = Chem.MolFromSmiles(war_def['noncovalent'])
        if mol.HasSubstructMatch(ncv):
            combinations = {}
            for wd in cls.warhead_definitions:
                x = Chem.ReplaceSubstructs(mol, ncv, Chem.MolFromSmiles(wd['covalent']),
                                           replacementConnectionPoint=0)
                combinations[wd['name'] + '_covalent'] = Chem.MolToSmiles(x[0], canonical=canonical)
                x = Chem.ReplaceSubstructs(mol, ncv, Chem.MolFromSmiles(wd['noncovalent']),
                                           replacementConnectionPoint=0)
                combinations[wd['name'] + '_noncovalent'] = Chem.MolToSmiles(x[0], canonical=canonical)
            return combinations
        else:
            return None

    def harmonize_warheads(self, hits, warhead_harmonisation, covalent_form=True):
        """
        Harmonises and marks the atoms with `_Warhead` Prop.

        :param hits:
        :param warhead_harmonisation:
        :param covalent_form:
        :return:
        """
        inventory = self.inventorize_warheads(hits, covalent_form)
        # mark warhead atoms.
        for hit, warhead_name in zip(hits, inventory):
            if warhead_name != 'noncovalent':
                wh = self._get_warhead_from_name(warhead_name, covalent_form)
                match = hit.GetSubstructMatch(wh)
                if match == ():
                    self.journal.warning(f'{self.long_name} - failed to mark warhead. What is it??')
                else:
                    for i in match:
                        hit.GetAtomWithIdx(i).SetBoolProp('_Warhead', True)
        # harmonise
        if warhead_harmonisation == 'keep':
            return hits
        elif warhead_harmonisation == 'strip':
            new_hits = []
            for hit, warhead_name in zip(hits, inventory):
                if warhead_name != 'noncovalent':
                    wh = self._get_warhead_from_name(warhead_name, covalent_form)
                    nhit = AllChem.DeleteSubstructs(hit, wh)
                    Chem.SanitizeMol(nhit)
                    new_hits.append(nhit)
                else:
                    new_hits.append(hit)

            return new_hits
        elif warhead_harmonisation == 'first':
            if len(set(inventory) - {'noncovalent'}) <= 1:
                return hits
            else:
                first = None
                new_hits = []
                for hit, warhead_name in zip(hits, inventory):
                    if warhead_name != 'noncovalent':
                        if first is None:
                            first = warhead_name
                            new_hits.append(hit)
                        elif warhead_name == first:
                            new_hits.append(hit)
                        else:
                            wh = self._get_warhead_from_name(warhead_name, covalent_form)
                            nhit = AllChem.DeleteSubstructs(hit, wh)
                            Chem.SanitizeMol(nhit)
                            new_hits.append(nhit)
                    else:
                        new_hits.append(hit)
                return new_hits
        else: # it is a warhead name.
            raise NotImplementedError

    def MMFF_score(self, mol: Optional[Chem.Mol] = None, delta: bool = False) -> float:
        if mol is None:
            mol = self.igor.mol_from_pose()
        return self.monster.MMFF_score(mol, delta=delta)