KarrLab/wc_utils

View on GitHub
wc_utils/util/chem/core.py

Summary

Maintainability
A
1 hr
Test Coverage
C
76%
""" Chemistry utilities

:Author: Jonathan Karr <jonrkarr@gmail.com>
:Date: 2018-02-07
:Copyright: 2018, Karr Lab
:License: MIT
"""

import attrdict
import mendeleev
import re

try:
    import openbabel
except ModuleNotFoundError:  # pragma: no cover
    pass  # pragma: no cover


class EmpiricalFormula(attrdict.AttrDefault):
    """ An empirical formula """

    def __init__(self, value=''):
        """
        Args:
            value (:obj:`dict` or :obj:`str`): dictionary or string representation of the formula

        Raises:
            :obj:`ValueError`: if :obj:`value` is not a valid formula
        """
        super(EmpiricalFormula, self).__init__(float)

        if isinstance(value, (dict, attrdict.AttrDict, attrdict.AttrDefault)):
            for element, coefficient in value.items():
                self[element] = coefficient
        else:
            if not re.match(r'^(([A-Z][a-z]?)(\-?[0-9]+(\.?[0-9]*)?(e[\-\+]?[0-9]*)?)?)*$', value):
                raise ValueError('"{}" is not a valid formula'.format(value))

            for element, coefficient, _, _ in re.findall(r'([A-Z][a-z]?)(\-?[0-9]+(\.?[0-9]*)?(e[\-\+]?[0-9]*)?)?', value):
                self[element] += float(coefficient or '1')

    def __setitem__(self, element, coefficient):
        """ Set the count of an element

        Args:
            element (:obj:`str`): element symbol
            coefficient (:obj:`float`): element coefficient

        Raises:
            :obj:`ValueError`: if the coefficient is not a float
        """
        if not re.match(r'^[A-Z][a-z]?$', element):
            raise ValueError('Element must be a one or two letter string')

        try:
            coefficient = float(coefficient)
        except ValueError:
            raise ValueError('Coefficient must be a float')

        super(EmpiricalFormula, self).__setitem__(element, coefficient)
        if coefficient == 0.:
            self.pop(element)

    def get_molecular_weight(self):
        """ Get the molecular weight

        Returns:
            :obj:`float`: molecular weight
        """
        mw = 0.
        for element, coefficient in self.items():
            mw += mendeleev.element(element).atomic_weight * coefficient
        return mw

    def __str__(self):
        """ Generate a string representation of the formula """
        vals = []
        for element, coefficient in self.items():
            if coefficient == 0.:
                pass  # pragma: no cover # unreachable due to `__setitem__`
            elif coefficient == 1.:
                vals.append(element)
            elif coefficient == int(coefficient):
                vals.append(element + str(int(coefficient)))
            else:
                vals.append(element + str(coefficient))
        vals.sort()
        return ''.join(vals)

    def __contains__(self, element):
        """
        Args:
            element (:obj:`str`): element symbol

        Returns:
            :obj:`bool`: :obj:`True` if the empirical formula contains the element
        """
        return re.match(r'^[A-Z][a-z]?$', element) is not None

    def __add__(self, other):
        """ Add two empirical formulae

        Args:
            other (:obj:`EmpiricalFormula` or :obj:`str`): another empirical formula

        Returns:
            :obj:`EmpiricalFormula`: sum of the empirical formulae
        """

        if isinstance(other, str):
            other = EmpiricalFormula(other)

        sum = EmpiricalFormula()
        for element, coefficient in self.items():
            sum[element] = coefficient
        for element, coefficient in other.items():
            sum[element] += coefficient

        return sum

    def __sub__(self, other):
        """ Subtract two empirical formulae

        Args:
            other (:obj:`EmpiricalFormula` or :obj:`str`): another empirical formula

        Returns:
            :obj:`EmpiricalFormula`: difference of the empirical formulae
        """
        if isinstance(other, str):
            other = EmpiricalFormula(other)

        diff = EmpiricalFormula()
        for element, coefficient in self.items():
            diff[element] = coefficient
        for element, coefficient in other.items():
            diff[element] -= coefficient

        return diff

    def __mul__(self, quantity):
        """ Subtract two empirical formulae

        Args:
            quantity (:obj:`float`)

        Returns:
            :obj:`EmpiricalFormula`: multiplication of the empirical formula by :obj:`quantity`
        """
        result = EmpiricalFormula()
        for element, coefficient in self.items():
            result[element] = quantity * coefficient

        return result

    def __div__(self, quantity):
        """ Subtract two empirical formulae (for Python 2)

        Args:
            quantity (:obj:`float`)

        Returns:
            :obj:`EmpiricalFormula`: division of the empirical formula by :obj:`quantity`
        """
        return self.__truediv__(quantity)  # pragma: no cover # only used in Python 2

    def __truediv__(self, quantity):
        """ Subtract two empirical formulae

        Args:
            quantity (:obj:`float`)

        Returns:
            :obj:`EmpiricalFormula`: division of the empirical formula by :obj:`quantity`
        """
        result = EmpiricalFormula()
        for element, coefficient in self.items():
            result[element] = coefficient / quantity

        return result

    def __hash__(self):
        """ Generate a hash

        Returns:
            :obj:`int`: hash
        """
        return hash(str(self))


class OpenBabelUtils(object):
    @staticmethod
    def get_formula(mol):
        """ Get the formula of an OpenBabel molecule

        Args:
            mol (:obj:`openbabel.OBMol`): molecule

        Returns:
            :obj:`EmpiricalFormula`: formula
        """
        return EmpiricalFormula(mol.GetFormula().strip('-+'))

    @classmethod
    def get_inchi(cls, mol, options=('r', 'F',)):
        """ Get the InChI-encoded structure of an OpenBabel molecule

        Args:
            mol (:obj:`openbabel.OBMol`): molecule
            options (:obj:`list` of :obj:`str`, optional): export options

        Returns:
            :obj:`str`: InChI-encoded structure
        """
        conversion = openbabel.OBConversion()
        assert conversion.SetOutFormat('inchi'), 'Unable to set format to InChI'
        for option in options:
            conversion.SetOptions(option, conversion.OUTOPTIONS)
        inchi = conversion.WriteString(mol, True)
        inchi = inchi.replace('InChI=1/', 'InChI=1S/')
        inchi = inchi.partition('/f')[0]
        return inchi

    @classmethod
    def get_smiles(cls, mol, options=()):
        """ Get the SMILES-encoded structure of an OpenBabel molecule

        Args:
            mol (:obj:`openbabel.OBMol`): molecule
            options (:obj:`list` of :obj:`str`, optional): export options

        Returns:
            :obj:`str`: SMILES-encoded structure
        """
        conversion = openbabel.OBConversion()
        assert conversion.SetOutFormat('smiles'), 'Unable to set format to Daylight SMILES'
        for option in options:
            conversion.SetOptions(option, conversion.OUTOPTIONS)
        return conversion.WriteString(mol).partition('\t')[0].strip()

    @classmethod
    def export(cls, mol, format, options=()):
        """ Export an OpenBabel molecule to format

        Args:
            mol (:obj:`openbabel.OBMol`): molecule
            format (:obj:`str`): format
            options (:obj:`list` of :obj:`str`, optional): export options

        Returns:
            :obj:`str`: format representation of molecule
        """
        if format == 'inchi':
            return cls.get_inchi(mol, options=options)
        if format in ['smi', 'smiles']:
            return cls.get_smiles(mol, options=options)

        conversion = openbabel.OBConversion()
        assert conversion.SetOutFormat(format), 'Unable to set format to {}'.format(format)
        for option in options:
            conversion.SetOptions(option, conversion.OUTOPTIONS)
        return conversion.WriteString(mol, True)