KarrLab/wc_kb

View on GitHub
wc_kb/core.py

Summary

Maintainability
F
1 wk
Test Coverage
A
96%
""" Core schema to represent a knowledge base to build models

:Author: Balazs Szigeti <balazs.szigeti@mssm.edu>
:Author: Jonathan Karr <karr@mssm.edu>
:Author: Bilal Shaikh  <bilal.shaikh@columbia.edu>
:Author: Arthur Goldberg <Arthur.Goldberg@mssm.edu>
:Author: Yin Hoon Chew <yinhoon.chew@mssm.edu>
:Date: 2018-02-07
:Copyright: 2018, Karr Lab
:License: MIT
"""

from natsort import natsorted, ns
from math import floor
from pyfaidx import Fasta
from wc_utils.util import chem
from wc_utils.util.chem.core import OpenBabelUtils
from wc_utils.util.chem.marvin import get_major_micro_species
from wc_utils.util.units import unit_registry
from wc_onto import onto as kbOnt
import abc
import Bio.Alphabet
import Bio.Seq
import enum
import math
import obj_tables
import obj_tables.abstract
import obj_tables.sci.onto
import obj_tables.sci.units
import openbabel
import re
import token
from obj_tables import (BooleanAttribute, EnumAttribute, FloatAttribute, IntegerAttribute,
                       RegexAttribute, SlugAttribute, StringAttribute, LongStringAttribute,
                       OneToOneAttribute, ManyToOneAttribute, ManyToManyAttribute,
                       InvalidAttribute, TableFormat)
from obj_tables.math.expression import (ManyToOneExpressionAttribute,
                                  ExpressionExpressionTermMeta, Expression)
from wc_utils.util.enumerate import CaseInsensitiveEnum
from wc_utils.util.types import get_subclasses
from wc_utils.util.ontology import are_terms_equivalent
import os

from ._version import __version__ as wc_kb_version


#####################
#####################
# Enumeration classes

PolymerStrand = enum.Enum(value='PolymerStrand', names=[
    ('positive', 1),
    ('+', 1),
    ('negative', -1),
    ('-', -1), ])

PolymerDirection = enum.Enum(value='PolymerDirection', names=[
    ('forward', 1),
    ('reverse', -1), ])


#####################
#####################
# Attributes

class SubunitAttribute(ManyToManyAttribute):
    """ Subunits """

    def __init__(self, related_name='', verbose_name='', verbose_related_name='', description=''):
        """
        Args:
            related_name (:obj:`str`, optional): name of related attribute on `related_class`
            verbose_name (:obj:`str`, optional): verbose name
            verbose_related_name (:obj:`str`, optional): verbose related name
            description (:obj:`str`, optional): description
        """

        super(SubunitAttribute, self).__init__('SpeciesTypeCoefficient',
                                               related_name=related_name,
                                               verbose_name=verbose_name,
                                               verbose_related_name=verbose_related_name,
                                               description=description)

    def serialize(self, subunits, encoded=None):
        """ Serialize related object

        Args:
            subunits (:obj:`list` of :obj:`SpeciesTypeCoefficient`): Python representation of subunits
            encoded (:obj:`dict`, optional): dictionary of objects that have already been encoded

        Returns:
            :obj:`str`: simple Python representation
        """
        if not subunits:
            return ''

        subunits = natsorted(subunits, lambda unit: (
            unit.species_type.id), alg=ns.IGNORECASE)

        lhs = []
        for unit in subunits:
            lhs.append(unit.serialize())

        return '{}'.format(' + '.join(lhs))

    def deserialize(self, value, objects, decoded=None):
        """ Deserialize value

        Args:
            value (:obj:`str`): String representation
            objects (:obj:`dict`): dictionary of objects, grouped by model
            decoded (:obj:`dict`, optional): dictionary of objects that have already been decoded

        Returns:
            :obj:`tuple` of `object`, `InvalidAttribute` or `None`: tuple of cleaned value and cleaning error
        """
        return SpeciesTypeCoefficient.deserialize(self, value, objects)


class OneToOneSpeciesAttribute(OneToOneAttribute):
    """ Species attribute """

    def __init__(self, related_name='', verbose_name='', verbose_related_name='', description=''):
        """
        Args:
            related_name (:obj:`str`, optional): name of related attribute on `related_class`
            verbose_name (:obj:`str`, optional): verbose name
            verbose_related_name (:obj:`str`, optional): verbose related name
            description (:obj:`str`, optional): description
        """
        super(OneToOneSpeciesAttribute, self).__init__('Species',
                                                       related_name=related_name, min_related=1, min_related_rev=0,
                                                       verbose_name=verbose_name, verbose_related_name=verbose_related_name, description=description)

    def serialize(self, value, encoded=None):
        """ Serialize related object
        Args:
            value (:obj:`Model`): Python representation
            encoded (:obj:`dict`, optional): dictionary of objects that have already been encoded
        Returns:
            :obj:`str`: simple Python representation
        """
        return value.serialize()

    def deserialize(self, value, objects, decoded=None):
        """ Deserialize value

        Args:
            value (:obj:`str`): String representation
            objects (:obj:`dict`): dictionary of objects, grouped by model
            decoded (:obj:`dict`, optional): dictionary of objects that have already been decoded

        Returns:
            :obj:`tuple` of :obj:`list` of :obj:`Species`, :obj:`InvalidAttribute` or :obj:`None`: :obj:`tuple` of cleaned value
                and cleaning error
        """
        return Species.deserialize(self, value, objects)


class IdentifierAttribute(ManyToManyAttribute):
    """ Identifier attribute """

    def __init__(self, related_name='', verbose_name='', verbose_related_name='', description=''):
        """
        Args:
            related_name (:obj:`str`, optional): name of related attribute on `related_class`
            verbose_name (:obj:`str`, optional): verbose name
            verbose_related_name (:obj:`str`, optional): verbose related name
            description (:obj:`str`, optional): description
        """
        super(IdentifierAttribute, self).__init__(Identifier,
                                                         related_name=related_name, min_related=0, min_related_rev=0,
                                                         verbose_name=verbose_name, verbose_related_name=verbose_related_name, description=description)

    def serialize(self, identifiers, encoded=None):
        """ Serialize related object
        Args:
            identifiers (:obj:`list` of :obj:`Model`): a list of instances of Identifier Python representation
            encoded (:obj:`dict`, optional): dictionary of objects that have already been encoded
        Returns:
            :obj:`str`: simple Python representation
        """
        if not identifiers:
            return ''

        return ', '.join(obj_tables.serialize() for obj_tables in identifiers)

    def deserialize(self, value, objects, decoded=None):
        """ Deserialize value
        Args:
            value (:obj:`str`): String representation
            objects (:obj:`dict`): dictionary of objects, grouped by model
            decoded (:obj:`dict`, optional): dictionary of objects that have already been decoded
        Returns:
            :obj:`tuple` of :obj:`list` of :obj:`Identifier`, :obj:`InvalidAttribute` or :obj:`None`: :obj:`tuple` of cleaned value
                and cleaning error
        """
        if not value:
            return ([], None)

        obj_pattern = r'({}) *\: *({})'.format(Identifier.namespace.pattern[1:-1], Identifier.id.pattern[1:-1])
        lst_pattern = obj_pattern + r'( *, *{})*'.format(obj_pattern)

        if not re.match(lst_pattern, value, flags=re.I):
            return (None, InvalidAttribute(self, ['Incorrectly formatted list of identifiers: {}'.format(value)]))

        objs = []
        for pat_match in re.findall(obj_pattern, value, flags=re.I):
            namespace_name = pat_match[0]
            data_id = pat_match[1]
            if self.related_class not in objects:
                objects[self.related_class] = {}
            serialized_value = self.related_class()._serialize(namespace=namespace_name, id=data_id)
            if serialized_value in objects[self.related_class]:
                obj = objects[self.related_class][serialized_value]
            else:
                obj = self.related_class(namespace=namespace_name, id=data_id)
                objects[self.related_class][serialized_value] = obj
            objs.append(obj)
        return (objs, None)


class ReactionParticipantAttribute(ManyToManyAttribute):
    """ Reaction participants """

    def __init__(self, related_name='', verbose_name='', verbose_related_name='', description=''):
        """
        Args:
            related_name (:obj:`str`, optional): name of related attribute on `related_class`
            verbose_name (:obj:`str`, optional): verbose name
            verbose_related_name (:obj:`str`, optional): verbose related name
            description (:obj:`str`, optional): description
        """
        super(ReactionParticipantAttribute, self).__init__('SpeciesCoefficient', related_name=related_name,
                                                           verbose_name=verbose_name,
                                                           verbose_related_name=verbose_related_name,
                                                           description=description)

    def serialize(self, participants, encoded=None):
        """ Serialize related object

        Args:
            participants (:obj:`list` of :obj:`SpeciesCoefficient`): Python representation of reaction participants
            encoded (:obj:`dict`, optional): dictionary of objects that have already been encoded

        Returns:
            :obj:`str`: simple Python representation
        """
        if not participants:
            return ''

        comps = set([part.species.compartment for part in participants])
        if len(comps) == 1:
            global_comp = comps.pop()
        else:
            global_comp = None

        if global_comp:
            participants = natsorted(
                participants, lambda part: part.species.species_type.id, alg=ns.IGNORECASE)
        else:
            participants = natsorted(participants, lambda part: (
                part.species.species_type.id, part.species.compartment.id), alg=ns.IGNORECASE)

        lhs = []
        rhs = []
        for part in participants:
            if part.coefficient < 0:
                lhs.append(part.serialize(
                    show_compartment=global_comp is None, show_coefficient_sign=False))
            elif part.coefficient > 0:
                rhs.append(part.serialize(
                    show_compartment=global_comp is None, show_coefficient_sign=False))

        if global_comp:
            return '[{}]: {} ==> {}'.format(global_comp.get_primary_attribute(), ' + '.join(lhs), ' + '.join(rhs))
        else:
            return '{} ==> {}'.format(' + '.join(lhs), ' + '.join(rhs))

    def deserialize(self, value, objects, decoded=None):
        """ Deserialize value

        Args:
            value (:obj:`str`): String representation
            objects (:obj:`dict`): dictionary of objects, grouped by model
            decoded (:obj:`dict`, optional): dictionary of objects that have already been decoded

        Returns:
            :obj:`tuple` of `list` of `SpeciesCoefficient`, `InvalidAttribute` or `None`: tuple of cleaned value
                and cleaning error
        """
        errors = []

        st_id = SpeciesType.id.pattern[1:-1]
        comp_id = Compartment.id.pattern[1:-1]
        stoch = r'\(((\d*\.?\d+|\d+\.)(e[\-\+]?\d+)?)\)'
        gbl_part = r'({} )*({})'.format(stoch, st_id)
        lcl_part = r'({} )*({}\[{}\])'.format(stoch, st_id, comp_id)
        gbl_side = r'{}( \+ {})*'.format(gbl_part, gbl_part)
        lcl_side = r'{}( \+ {})*'.format(lcl_part, lcl_part)
        gbl_pattern = r'^\[({})\]: ({}) ==> ({})$'.format(
            comp_id, gbl_side, gbl_side)
        lcl_pattern = r'^({}) ==> ({})$'.format(lcl_side, lcl_side)
        
        import_pattern = r'^\[({})\]: ==> ({})$'.format(comp_id, st_id)
        export_pattern = r'^\[({})\]: ({}) ==> $'.format(comp_id, st_id)

        global_match = re.match(gbl_pattern, value, flags=re.I)
        local_match = re.match(lcl_pattern, value, flags=re.I)
        import_match = re.match(import_pattern, value, flags=re.I)
        export_match = re.match(export_pattern, value, flags=re.I)

        if global_match:
            if global_match.group(1) in objects[Compartment]:
                global_comp = objects[Compartment][global_match.group(1)]
            else:
                global_comp = None
                errors.append('Undefined compartment "{}"'.format(
                    global_match.group(1)))
            lhs = global_match.group(11)
            rhs = global_match.group(41)

        elif local_match:
            global_comp = None
            lhs = local_match.group(1)
            rhs = local_match.group(49)

        elif import_match:
            if import_match.group(1) in objects[Compartment]:
                global_comp = objects[Compartment][import_match.group(1)]
            else:
                global_comp = None
                errors.append('Undefined compartment "{}"'.format(
                    import_match.group(1)))
            lhs = None
            rhs = import_match.group(11) #todo
                
        elif export_match:
            if export_match.group(1) in objects[Compartment]:
                global_comp = objects[Compartment][export_match.group(1)]
            else:
                global_comp = None
                errors.append('Undefined compartment "{}"'.format(
                    export_match.group(1)))
            lhs = export_match.group(11) #todo
            rhs = None

        else:
            return (None, InvalidAttribute(self, ['Incorrectly formatted participants: {}'.format(value)]))

        lhs_parts = []
        rhs_parts = []
        if lhs:
            lhs_parts, lhs_errors = self.deserialize_side(
                -1., lhs, objects, global_comp)
            errors.extend(lhs_errors)
        if rhs:    
            rhs_parts, rhs_errors = self.deserialize_side(
                1., rhs, objects, global_comp)
            errors.extend(rhs_errors)
        parts = lhs_parts + rhs_parts       

        if errors:
            return (None, InvalidAttribute(self, errors))
        return (parts, None)

    def deserialize_side(self, direction, value, objects, global_comp):
        """ Deserialize the LHS or RHS of a reaction equation
        Args:
            direction (:obj:`float`): -1. indicates LHS, +1. indicates RHS
            value (:obj:`str`): String representation
            objects (:obj:`dict`): dictionary of objects, grouped by model
            global_comp (:obj:`Compartment`): global compartment of the reaction

        Returns:
            :obj:`tuple`:
                * :obj:`list` of :obj:`SpeciesCoefficient`: list of species coefficients
                * :obj:`list` of :obj:`Exception`: list of errors
        """
        parts = []
        errors = []

        st_id = SpeciesType.id.pattern[1:-1]
        comp_id = Compartment.id.pattern[1:-1]
        pattern = r'(\(((\d*\.?\d+|\d+\.)(e[\-\+]?\d+)?)\) )*({})(\[({})\])*'.format(st_id, comp_id)
        i_st = 4
        i_comp = 15
        for part in re.findall(pattern, value, flags=re.I):
            part_errors = []

            species_type = None
            for species_type_cls in get_subclasses(SpeciesType):
                if species_type_cls in objects and part[i_st] in objects[species_type_cls]:
                    species_type = objects[species_type_cls][part[i_st]]
                    break
            if not species_type:
                part_errors.append(
                    'Undefined species type "{}"'.format(part[i_st]))

            if global_comp:
                compartment = global_comp
            elif part[i_comp] in objects[Compartment]:
                compartment = objects[Compartment][part[i_comp]]
            else:
                part_errors.append(
                    'Undefined compartment "{}"'.format(part[i_comp]))

            coefficient = direction * float(part[1] or 1.)

            if part_errors:
                errors += part_errors
            else:
                spec_primary_attribute = Species.gen_id(species_type.get_primary_attribute(),
                                                        compartment.get_primary_attribute())
                species, error = Species.deserialize(
                    self, spec_primary_attribute, objects)
                if error:
                    raise ValueError('Invalid species "{}"'.format(
                        spec_primary_attribute))
                    # pragma: no cover # unreachable due to error checking above

                if coefficient != 0:
                    if SpeciesCoefficient not in objects:
                        objects[SpeciesCoefficient] = {}
                    serialized_value = SpeciesCoefficient._serialize(
                        species, coefficient)
                    if serialized_value in objects[SpeciesCoefficient]:
                        rxn_part = objects[SpeciesCoefficient][serialized_value]
                    else:
                        rxn_part = SpeciesCoefficient(
                            species=species, coefficient=coefficient)
                        objects[SpeciesCoefficient][serialized_value] = rxn_part
                    parts.append(rxn_part)

        return (parts, errors)


#####################
#####################
# Base classes


class Identifier(obj_tables.Model):
    """ Reference to an entity in an external namespace

    Attributes:
        namespace (:obj:`str`): namespace
        id (:obj:`str`): identifier within the namespace

    Related attributes:
        compartments (:obj:`list` of :obj:`Compartment`): compartments
        species_types (:obj:`list` of :obj:`SpeciesType`): species_types
        concentrations (:obj:`list` of :obj:`Concentration`): concentrations
        loci (:obj:`list` of :obj:`PolymerLocus`): loci
        properties (:obj:`list` of :obj:`SpeciesTypeProperty`): species type properties
        reactions (:obj:`list` of :obj:`Reaction`): reactions
        rate_laws (:obj:`list` of :obj:`RateLaw`): rate_laws
        observables (:obj:`list` of :obj:`Observable`): observables
    """
    namespace = obj_tables.RegexAttribute(pattern=r'^[^ \:,]+$')
    id = obj_tables.RegexAttribute(pattern=r'^[^ \:,]+$')

    class Meta(obj_tables.Model.Meta):
        attribute_order = ('namespace', 'id')
        table_format = TableFormat.cell
        ordering = ('namespace', 'id')

    @staticmethod
    def _serialize(namespace, id):
        """ Generate string representation

        Args:
            namespace (:obj:`str`): namespace
            id (:obj:`str`): identifier within the namespace

        Returns:
            :obj:`str`: value of primary attribute
        """
        return '{}:{}'.format(namespace, id)

    def serialize(self):
        """ Generate string representation

        Returns:
            :obj:`str`: value of primary attribute
        """
        return self._serialize(self.namespace, self.id)


class KnowledgeBaseObject(obj_tables.Model):
    """ Knowledge of a biological entity

    Attributes:
        id (:obj:`str`): identifier
        name (:obj:`str`): name
        synonyms (:obj:`str`): synonyms
        comments (:obj:`str`): comments
    """

    id = obj_tables.StringAttribute(primary=True, unique=True)
    name = obj_tables.StringAttribute()
    synonyms = obj_tables.StringAttribute()
    comments = obj_tables.LongStringAttribute()

    def get_nested_metadata(self):
        """ Returns a list of wc_kb.core.Reference / wc_kb.core.DatabaseReference / wc_kb.core.Comments objects that
            appear in the object's wc_kb.core.Evidence and the associated wc_kb.core.Experiment

        Returns:
            id (:obj:`list` of :obj:`Reference`): references
        """

        metadataObjs = {self.id:[], SpeciesTypeProperty:[], Evidence:[], Experiment:[]}
        metadataObjs = self._append_metadata_entries(key=self.id, metadataObjs=metadataObjs)
        metadataObjs = self._parse_EviNExperiment(metadataObjs)

        if hasattr(self,'properties') and self.properties is not None:
            for property in self.properties:
                metadataObjs = property._append_metadata_entries(key=SpeciesTypeProperty, metadataObjs=metadataObjs)
                metadataObjs = property._parse_EviNExperiment(metadataObjs)

            return metadataObjs

    def _parse_EviNExperiment(self, metadataObjs):
        if hasattr(self, 'evidence'):
            for evidence in self.evidence:
                metadataObjs = evidence._append_metadata_entries(key=Evidence, metadataObjs=metadataObjs)
                if evidence.experiment is not None:
                    metadataObjs = evidence.experiment._append_metadata_entries(key=Experiment, metadataObjs=metadataObjs)

        return metadataObjs

    def _append_metadata_entries(self, key, metadataObjs):
        """ Appends wc_kb.core.Reference / wc_kb.core.DatabaseReference / wc_kb.core.Comments objects
            to metadataObjs list

            Input:
                obj(:obj:`obj_tables.Model`): model object

            Return:
                metadataObjs (:obj:`list` of :obj:`Reference` / :obj:`Evidence` / :obj:`Comments`): list of metadata objects
        """

        if self.references!=[]:
            for reference in self.references:
                metadataObjs[key].append(reference)

        if self.identifiers!=[]:
            for database_reference in self.identifiers:
                metadataObjs[key].append(database_reference)

        if self.comments!='':
            metadataObjs[key].append(self.comments)

        return metadataObjs


class KnowledgeBase(KnowledgeBaseObject):
    """ A knowledge base

    Attributes:
        version (:obj:`str`): version
        translation_table (:obj:`int`): translation table
        version (:obj:`str`): version of the KB
        url (:obj:`str`): url of the KB Git repository
        branch (:obj:`str`): branch of the KB Git repository
        revision (:obj:`str`): revision of the KB Git repository
        wc_kb_version (:obj:`str`): version of ``wc_kb``

    Related attributes:
        cell (:obj:`Cell`): cell
    """
    translation_table = obj_tables.IntegerAttribute()
    version = RegexAttribute(
        min_length=1, pattern=r'^[0-9]+\.[0-9+]\.[0-9]+[0-9a-z]*$', flags=re.I)
    url = obj_tables.StringAttribute(verbose_name='URL')
    branch = obj_tables.StringAttribute()
    revision = obj_tables.StringAttribute()
    wc_kb_version = RegexAttribute(min_length=1, pattern=r'^[0-9]+\.[0-9+]\.[0-9]+[0-9a-z]*$', flags=re.I,
                                   default=wc_kb_version, verbose_name='wc_kb version')

    class Meta(obj_tables.Model.Meta):
        verbose_name = 'KB'
        description = 'Knowledge base'
        attribute_order = ('id', 'name', 'translation_table', 'version',
                           'url', 'branch', 'revision', 'wc_kb_version', 'comments')
        table_format = obj_tables.TableFormat.column


class Cell(KnowledgeBaseObject):
    """ Knowledge of a cell

    Attributes:
        knowledge_base (:obj:`KnowledgeBase`): knowledge base
        taxon (:obj:`int`): NCBI taxon identifier

    Related attributes:
        references (:obj:`list` of :obj:`Reference`): references
        compartments (:obj:`list` of :obj:`Compartment`): compartments
        species_types (:obj:`list` of :obj:`SpeciesType`): species types
        concentrations (:obj:`list` of :obj:`Concentration`): concentrations
        observables (:obj:`list` or :obj:`Observable`) : observables
        loci (:obj:`list` of :obj:`PolymerLocus`): locus
        reactions (:obj:`list` of :obj:`Reaction`): reactions
    """
    knowledge_base = obj_tables.OneToOneAttribute(
        KnowledgeBase, related_name='cell')
    taxon = obj_tables.IntegerAttribute()

    class Meta(obj_tables.Model.Meta):
        attribute_order = ('id', 'name', 'taxon', 'comments')
        table_format = obj_tables.TableFormat.column


class Reference(obj_tables.Model):
    """ Reference to the literature

    Attributes:
        id (:obj:`str`): identifier
        name (:obj:`str`): name
        authors (:obj:`str`): authors
        title (:obj:`str`): title
        volume (:obj:`str`): volume
        issue (:obj:`str`): issue
        journal (:obj:`str`): journal
        pages (:obj:``str): pages
        year (:obj:`int`): year
        cell (:obj:`Cell`) : cell
        identifiers (:obj:`list` of :obj:`Identifier`): identifiers
        comments (:obj:`str`): comments
        type (:obj:`pronto`): type of reference

    Related attributes:
        compartments (:obj:`list` of :obj:`Compartment`): compartments
        species_types (:obj:`list` of :obj:`SpeciesType`): species_types
        concentrations (:obj:`list` of :obj:`Concentration`): concentrations
        loci (:obj:`list` of :obj:`PolymerLocus`): loci
        properties (:obj:`list` of :obj:`SpeciesTypeProperty`): species type properties
        reactions (:obj:`list` of :obj:`Reaction`): reactions
        rate_laws (:obj:`list` of :obj:`RateLaw`): rate_laws
        observables (:obj:`list` of :obj:`Observable`): observables
    """

    id = obj_tables.SlugAttribute(primary=True, unique=True)
    name = obj_tables.StringAttribute()
    authors = obj_tables.LongStringAttribute()
    title = obj_tables.LongStringAttribute()
    volume = obj_tables.StringAttribute()
    issue = obj_tables.StringAttribute()
    journal = obj_tables.StringAttribute()
    pages = obj_tables.StringAttribute()
    year = obj_tables.IntegerAttribute()
    cell = obj_tables.ManyToOneAttribute(Cell, related_name='references')
    identifiers = IdentifierAttribute(related_name='references')
    comments = obj_tables.LongStringAttribute()
    type = obj_tables.sci.onto.OntoTermAttribute(kbOnt,
                                  terms = kbOnt['WC:reference'].subclasses(),
                                  default = kbOnt['WC:article'],
                                  none=True)

    class Meta(obj_tables.Model.Meta):
        attribute_order = ('id', 'name', 'type', 'title', 'authors', 'journal', 'volume', 'issue', 'pages', 'year', 'identifiers', 'comments')


class Compartment(KnowledgeBaseObject):
    """ Knowledge of a subcellular compartment

    Attributes:
        cell (:obj:`Cell`): cell
        volumetric_fraction (:obj:`float`): average volumetric fraction relative to the cell volume
        references (:obj:`list` of :obj:`Reference`): references
        identifiers (:obj:`list` of :obj:`Identifier`): identifiers

    Related attributes:
        reaction_participants (:obj:`list` of :obj:`ReactionParticipant`): reaction participants
    """
    id = obj_tables.SlugAttribute(primary=True, unique=True)
    cell = obj_tables.ManyToOneAttribute(Cell, related_name='compartments')
    volumetric_fraction = obj_tables.FloatAttribute(min=0., max=1.)
    references = obj_tables.ManyToManyAttribute(Reference, related_name='compartments')
    identifiers = IdentifierAttribute(related_name='compartments')

    class Meta(obj_tables.Model.Meta):
        attribute_order = ('id', 'name', 'volumetric_fraction', 'identifiers', 'references', 'comments')


class SpeciesType(KnowledgeBaseObject, metaclass=obj_tables.abstract.AbstractModelMeta):
    """ Knowledge of a molecular species

    Attributes:
        cell (:obj:`Cell`): cell
        references (:obj:`list` of :obj:`Reference`): references
        identifiers (:obj:`list` of :obj:`Identifier`): identifiers

    Related attributes:
        reaction_participants (:obj:`list` of :obj:`ReactionParticipant`): reaction participants
    """

    id = obj_tables.SlugAttribute(primary=True, unique=True)
    cell = obj_tables.ManyToOneAttribute(Cell, related_name='species_types')
    references = obj_tables.ManyToManyAttribute(Reference, related_name='species_types')
    identifiers = IdentifierAttribute(related_name='species_types')

    class Meta(obj_tables.Model.Meta):
        attribute_order = ('id', 'name', 'comments', 'references', 'identifiers')

    @abc.abstractmethod
    def get_empirical_formula(self):
        """ Get the empirical formula

        Returns:
            :obj:`chem.EmpiricalFormula`: empirical formula
        """
        pass  # pragma: no cover

    @abc.abstractmethod
    def get_charge(self):
        """ Get the charge

        Returns:
            :obj:`int`: charge
        """
        pass  # pragma: no cover

    @abc.abstractmethod
    def get_mol_wt(self):
        """ Get the molecular weight

        Returns:
            :obj:`float`: molecular weight
        """
        pass  # pragma: no cover


class Species(obj_tables.Model):
    """ Species (tuple of species type, compartment)

    Attributes:
        species_type (:obj:`SpeciesType`): species type
        compartment (:obj:`Compartment`): compartment

    Related attributes:
        concentration (:obj:`Concentration`): concentration
        species_coefficients (:obj:`list` of :obj:`SpeciesCoefficient`): participations in reactions
        rate_law_expressions (:obj:`list` of :obj:`RateLawExpression`): participations in the evaluation of rates
        observable_expressions (:obj:`list` of :obj:`ObservableExpression`): participations in observables
    """

    id = obj_tables.StringAttribute(primary=True, unique=True)
    species_type = ManyToOneAttribute(
        SpeciesType, related_name='species', min_related=1)
    compartment = ManyToOneAttribute(
        Compartment, related_name='species', min_related=1)

    class Meta(obj_tables.Model.Meta):
        attribute_order = ('id', 'species_type', 'compartment')
        frozen_columns = 1
        table_format = TableFormat.cell
        unique_together = (('species_type', 'compartment', ), )
        ordering = ('species_type', 'compartment')
        expression_term_token_pattern = (token.NAME, token.LSQB, token.NAME, token.RSQB)

    @staticmethod
    def gen_id(species_type, compartment):
        """ Generate a Species' primary identifier

        Args:
            species_type (:obj:`object`): a `SpeciesType`, or its id
            compartment (:obj:`object`): a `Compartment`, or its id

        Returns:
            :obj:`str`: canonical identifier for a specie in a compartment, 'species_type_id[compartment_id]'
        """
        if isinstance(species_type, SpeciesType):
            species_type_id = species_type.get_primary_attribute()
        elif isinstance(species_type, str):
            species_type_id = species_type
        else:
            raise ValueError(
                "gen_id: incorrect species type: {}".format(species_type))

        if isinstance(compartment, Compartment):
            compartment_id = compartment.get_primary_attribute()
        elif isinstance(compartment, str):
            compartment_id = compartment
        else:
            raise ValueError(
                "gen_id: incorrect compartment type: {}".format(compartment))

        return '{}[{}]'.format(species_type_id, compartment_id)

    def id(self):
        """ Provide a Species' primary identifier

        Returns:
            :obj:`str`: canonical identifier for a specie in a compartment, 'specie_id[compartment_id]'
        """
        return self.serialize()

    def serialize(self):
        """ Provide a Species' primary identifier

        Returns:
            :obj:`str`: canonical identifier for a specie in a compartment, 'specie_id[compartment_id]'
        """
        return self.gen_id(self.species_type, self.compartment)

    @classmethod
    def deserialize(cls, attribute, value, objects):
        """ Deserialize value

        Args:
            attribute (:obj:`Attribute`): attribute
            value (:obj:`str`): String representation
            objects (:obj:`dict`): dictionary of objects, grouped by model

        Returns:
            :obj:`tuple` of `object`, `InvalidAttribute` or `None`: tuple of cleaned value and cleaning error
        """
        if cls in objects and value in objects[cls]:
            return (objects[cls][value], None)

        pattern = r'^({})\[({})\]$'.format(SpeciesType.id.pattern[1:-1], Compartment.id.pattern[1:-1])
        match = re.match(pattern, value, flags=re.I)
        if match:
            errors = []

            species_type = None
            for species_type_cls in get_subclasses(SpeciesType):
                if species_type_cls in objects and match.group(1) in objects[species_type_cls]:
                    species_type = objects[species_type_cls][match.group(1)]
                    break
            if not species_type:
                errors.append(
                    'Species type "{}" is not defined'.format(match.group(1)))

            if Compartment in objects and match.group(11) in objects[Compartment]:
                compartment = objects[Compartment][match.group(11)]
            else:
                errors.append(
                    'Compartment "{}" is not defined'.format(match.group(11)))

            if errors:
                return (None, InvalidAttribute(attribute, errors))
            else:
                obj = cls(species_type=species_type, compartment=compartment)
                if cls not in objects:
                    objects[cls] = {}
                objects[cls][obj.serialize()] = obj
                return (obj, None)

        return (None, InvalidAttribute(attribute, ['Invalid species']))


class Concentration(KnowledgeBaseObject):
    """ Species concentration

    Attributes:
        cell (:obj:`Cell`): cell
        species (:obj:`Species`): species
        medium (:obj:`str`): medium
        value (:obj:`float`): value
        units (:obj:`unit_registry.Unit`): units; default units is 'M'
        evidence (:obj:`list` of :obj:`Evidence`): evidence
        comments (:obj:`str`): comments
        references (:obj:`list` of :obj:`Reference`): references
        identifiers (:obj:`list` of :obj:`Identifier`): identifiers
    """

    cell = obj_tables.ManyToOneAttribute(Cell, related_name='concentrations')
    species = OneToOneSpeciesAttribute(related_name='concentration')
    medium = obj_tables.StringAttribute()
    value = FloatAttribute(min=0)
    units = obj_tables.sci.units.UnitAttribute(unit_registry,
                          choices=(
                              unit_registry.parse_units('molecule'),
                              unit_registry.parse_units('mM'),
                              unit_registry.parse_units('uM'),
                              unit_registry.parse_units('nM'),
                              unit_registry.parse_units('pM'),
                              unit_registry.parse_units('fM'),
                              unit_registry.parse_units('aM')),
                          default=unit_registry.parse_units('M'))
    evidence = obj_tables.OneToManyAttribute('Evidence', related_name='concentrations')
    references = ManyToManyAttribute(Reference, related_name='concentrations')
    identifiers = IdentifierAttribute(related_name='concentrations')

    class Meta(obj_tables.Model.Meta):
        attribute_order = ('id', 'species', 'value', 'units', 'evidence', 'identifiers', 'references', 'comments')
        unique_together = (('species', ), )
        ordering = ('species',)
        frozen_columns = 1

    def serialize(self):
        """ Generate string representation
        Returns:
            :obj:`str`: value of primary attribute
        """
        return 'CONC[{}]'.format(self.species.serialize())


class SpeciesTypeCoefficient(obj_tables.Model):
    """ A tuple of a species type and a coefficient

    Attributes:
        species_type (:obj:`SpeciesType`): species_type
        coefficient (:obj:`float`): coefficient

    Related attributes:
        complex (:obj:`ComplexSpeciesType`): complex
    """

    species_type = ManyToOneAttribute(SpeciesType, related_name='species_type_coefficients')
    coefficient = FloatAttribute(min=0.)

    class Meta(obj_tables.Model.Meta):
        attribute_order = ('species_type', 'coefficient')
        frozen_columns = 1
        table_format = TableFormat.cell
        ordering = ('species_type',)

    def serialize(self):
        """ Serialize related object

        Returns:
            :obj:`str`: string representation of a species type and a coefficient
        """
        return self._serialize(self.species_type, self.coefficient)

    @staticmethod
    def _serialize(species_type, coefficient):
        """ Serialize values

        Args:
            species_type (:obj:`SpeciesType`): species_type
            coefficient (:obj:`float`): coefficient

        Returns:
            :obj:`str`: string representation of a species type and a coefficient
        """
        coefficient = float(coefficient)


        if coefficient == 1:
            coefficient_str = ''
        elif coefficient % 1 == 0 and abs(coefficient) < 1000:
            coefficient_str = '({:.0f}) '.format(coefficient)
        else:
            coefficient_str = '({:e}) '.format(coefficient)

        return '{}{}'.format(coefficient_str, species_type.get_primary_attribute())

    @classmethod
    def deserialize(cls, attribute, value, objects):
        """ Deserialize value

        Args:
            attribute (:obj:`Attribute`): attribute
            value (:obj:`str`): String representation
            objects (:obj:`dict`): dictionary of objects, grouped by model

        Returns:
            :obj:`tuple` of `list` of `SpeciesTypeCoefficient`, `InvalidAttribute` or `None`: tuple of cleaned value
                and cleaning error
        """
        parts = []
        errors = []
        st_id = SpeciesType.id.pattern[1:-1]
        stoch = r'\(((\d*\.?\d+|\d+\.)(e[\-\+]?\d+)?)\)'
        gbl_part = r'({} )*({})'.format(stoch, st_id)
        gbl_side = r'{}( \+ {})*'.format(gbl_part, gbl_part)
        gbl_pattern = r'^({})$'.format(gbl_side)

        global_match = re.match(gbl_pattern, value, flags=re.I)

        if global_match:
            subunits_str = global_match.group(1)
        else:
            attr = cls.Meta.attributes['species_type']
            return (None, InvalidAttribute(attr, ['Incorrectly formatted participants: {}'.format(value)]))

        for part in re.findall(gbl_part, subunits_str, flags=re.I):

            species_type = None
            for species_type_cls in get_subclasses(SpeciesType):
                if species_type_cls in objects and part[4] in objects[species_type_cls]:
                    species_type = objects[species_type_cls][part[4]]
                    break

            if not species_type:
                errors.append('Undefined species type "{}"'.format(part[4]))

            coefficient = float(part[1] or 1.)

            if not errors:
                if cls not in objects:
                    objects[cls] = {}
                serialized_value = cls._serialize(species_type, coefficient)
                if serialized_value in objects[cls]:
                    subunit_part = objects[cls][serialized_value]
                else:
                    subunit_part = cls(species_type=species_type, coefficient=coefficient)
                    objects[cls][serialized_value] = subunit_part
                parts.append(subunit_part)

        if errors:
            return (None, InvalidAttribute(cls, errors))
        return (parts, None)


class SpeciesCoefficient(obj_tables.Model):
    """ A tuple of a species and a coefficient

    Attributes:
        species (:obj:`Species`): species
        coefficient (:obj:`float`): coefficient

    Related attributes:
        reaction (:obj:`Reaction`): reaction
    """

    species = ManyToOneAttribute(Species, related_name='species_coefficients')
    coefficient = FloatAttribute(nan=False)

    class Meta(obj_tables.Model.Meta):
        attribute_order = ('species', 'coefficient')
        frozen_columns = 1
        table_format = TableFormat.cell
        ordering = ('species',)

    def serialize(self, show_compartment=True, show_coefficient_sign=True):
        """ Serialize related object

        Args:
            show_compartment (:obj:`bool`, optional): if true, show compartment
            show_coefficient_sign (:obj:`bool`, optional): if true, show coefficient sign

        Returns:
            :obj:`str`: string representation of a species and a coefficient
        """
        return self._serialize(self.species, self.coefficient,
                               show_compartment=show_compartment,
                               show_coefficient_sign=show_coefficient_sign)

    @staticmethod
    def _serialize(species, coefficient, show_compartment=True, show_coefficient_sign=True):
        """ Serialize values

        Args:
            species (:obj:`Species`): species
            coefficient (:obj:`float`): coefficient
            show_compartment (:obj:`bool`, optional): if true, show compartment
            show_coefficient_sign (:obj:`bool`, optional): if true, show coefficient sign

        Returns:
            :obj:`str`: string representation of a species and a coefficient
        """
        coefficient = float(coefficient)

        if not show_coefficient_sign:
            coefficient = abs(coefficient)

        if coefficient == 1:
            coefficient_str = ''
        elif coefficient % 1 == 0 and abs(coefficient) < 1000:
            coefficient_str = '({:.0f}) '.format(coefficient)
        else:
            coefficient_str = '({:e}) '.format(coefficient)

        if show_compartment:
            return '{}{}'.format(coefficient_str, species.serialize())
        else:
            return '{}{}'.format(coefficient_str, species.species_type.get_primary_attribute())

    @classmethod
    def deserialize(cls, attribute, value, objects, compartment=None):
        """ Deserialize value

        Args:
            attribute (:obj:`Attribute`): attribute
            value (:obj:`str`): String representation
            objects (:obj:`dict`): dictionary of objects, grouped by model
            compartment (:obj:`Compartment`, optional): compartment

        Returns:
            :obj:`tuple` of `list` of `SpeciesCoefficient`, `InvalidAttribute` or `None`: tuple of cleaned value
                and cleaning error
        """
        errors = []

        st_id = SpeciesType.id.pattern[1:-1]
        comp_id = Compartment.id.pattern[1:-1]
        if compartment:
            pattern = r'^(\(((\-?\d*\.?\d+|\d+\.)(e[\-\+]?\d+)?)\) )*({})$'.format(st_id)
        else:
            pattern = r'^(\(((\-?\d*\.?\d+|\d+\.)(e[\-\+]?\d+)?)\) )*({}\[{}\])$'.format(st_id, comp_id)

        match = re.match(pattern, value, flags=re.I)
        if match:
            errors = []

            coefficient = float(match.group(2) or 1.)

            if compartment:
                species_id = Species.gen_id(match.group(
                    5), compartment.get_primary_attribute())
            else:
                species_id = match.group(5)

            species, error = Species.deserialize(
                attribute, species_id, objects)
            if error:
                return (None, error)

            serial_val = cls._serialize(species, coefficient)
            if cls in objects and serial_val in objects[cls]:
                return (objects[cls][serial_val], None)

            obj = cls(species=species, coefficient=coefficient)
            if cls not in objects:
                objects[cls] = {}
            objects[cls][obj.serialize()] = obj
            return (obj, None)

        else:
            attr = cls.Meta.attributes['species']
            return (None, InvalidAttribute(attr, ['Invalid species coefficient']))


class PolymerSpeciesType(SpeciesType):
    """ Knowledge of a polymer

    Attributes:
        circular (:obj:`bool`): is the polymer circular
        double_stranded (:obj:`bool`): is the polymer double stranded

    Related attributes:
        loci (:obj:`list` of :obj:`PolymerLocus`): loci
    """
    circular = obj_tables.BooleanAttribute()
    double_stranded = obj_tables.BooleanAttribute()

    class Meta(obj_tables.Model.Meta):
        attribute_order = ('id', 'name', 'circular', 'double_stranded',
                           'comments', 'references', 'identifiers')

    @abc.abstractmethod
    def get_seq(self):
        """ Get the polymer sequence

        Returns:
            :obj:`Bio.Seq.Seq`: sequence
        """
        pass  # pragma: no cover

    def get_len(self):
        """ Get the polymer length

        Returns:
            :obj:`int`: length
        """
        return len(self.get_seq())

    def get_subseq(self, start, end, strand=PolymerStrand.positive):
        """ Get a subsequence

        Args:
            start (:obj:`int`): start coordinate (1-indexed)
            end (:obj:`int`): end coordinate (1-indexed)
            strand (:obj:`PolymerStrand`, optional): strand

        Returns:
            :obj:`Bio.Seq.Seq`: sequence

        Raises:
            :obj:`ValueError`: if the polymer is linear and the start or end coordinates
                are less than 1 or greater than the length of the sequence
        """
        seq = self.get_seq()
        seq_len = len(seq)

        # convert to zero-based indexing
        start -= 1

        if self.circular:
            n_wrap = int(math.floor(start / seq_len))
            start = start - seq_len * n_wrap
            end = end - seq_len * n_wrap
        elif start < 0 or end > seq_len:
            raise ValueError('Start and end coordinates for linear polymers must be at '
                             'least 1 and less than the length of the sequence')

        if end <= seq_len:
            pos_seq = seq[start:end]
        else:
            pos_seq = seq[start:] + \
                str(seq) * (int(math.floor(end / seq_len)) - 1) + \
                seq[0:end % seq_len]

        if strand == PolymerStrand.positive:
            return pos_seq
        else:
            return pos_seq.reverse_complement()


class PolymerLocus(KnowledgeBaseObject):
    """ Knowledge about a locus of a polymer

    Attributes:
        polymer (:obj:`PolymerSpeciesType`): polymer
        start (:obj:`int`): start position
        end (:obj:`int`): end position
        strand (:obj:`PolymerStrand`): strand
        references (:obj:`list` of :obj:`Reference`): references
        identifiers (:obj:`list` of :obj:`Identifier`): identifiers
    """

    cell = obj_tables.ManyToOneAttribute(Cell, related_name='loci')
    polymer = obj_tables.ManyToOneAttribute(PolymerSpeciesType, related_name='loci')
    start = obj_tables.IntegerAttribute()
    end = obj_tables.IntegerAttribute()
    references = obj_tables.ManyToManyAttribute(Reference, related_name='loci')
    identifiers = IdentifierAttribute(related_name='loci')
    strand = obj_tables.EnumAttribute(
        PolymerStrand, default=PolymerStrand.positive)

    class Meta(obj_tables.Model.Meta):
        attribute_order = ('id', 'name', 'polymer', 'strand', 'start', 'end', 'identifiers', 'references', 'comments')

    def get_seq(self):
        """ Get the sequence

        Returns:
            :obj:`Bio.Seq.Seq`: sequence
        """
        return self.polymer.get_subseq(self.start, self.end, strand=self.strand)

    def get_len(self):
        """ Get the length

        Returns:
            :obj:`int`: length
        """
        return abs(self.start - self.end) + 1

    def get_direction(self):
        """ Returns the direction of the polymer feature defind by its strand and start/end coordinate

            Returns:
                :obj:`PolymerDirection`: direction (in ['forward', 'reverse'])

            Raises:
                :obj:`ValueError`: start and end coordinate of chromosome feature can not be the same
                :obj:`Exception`: strand is not member of PolymerStrand
        """

        if self.start < self.end:
            if self.strand == PolymerStrand.positive:
                return PolymerDirection.forward
            elif self.strand == PolymerStrand.negative:
                return PolymerDirection.reverse
            else:
                raise Exception('Unrecognized polymer strand ({}) found for {}.'.format(self.strand, self.id))

        elif self.start > self.end:
            if self.strand == PolymerStrand.positive:
                return PolymerDirection.reverse
            elif self.strand == PolymerStrand.negative:
                return PolymerDirection.forward
            else:
                raise Exception('Unrecognized polymer strand ({}) found for {}.'.format(self.strand, self.id))

        elif self.start == self.end:
            raise ValueError('Start and end position of chromosome feature can not be the same (Chrom feature id: {}).'.format(self.id))


class ObservableExpression(obj_tables.Model, Expression):
    """ A mathematical expression of Observables and Species

    The expression used by a `Observable`.

    Attributes:
        expression (:obj:`str`): mathematical expression for an Observable
        species (:obj:`list` of :obj:`Species`): Species used by this Observable expression
        observables (:obj:`list` of :obj:`Observable`): other Observables used by this Observable expression

    Related attributes:
        observable (:obj:`Observable`): observable
    """

    expression = LongStringAttribute(primary=True, unique=True, default='')
    species = ManyToManyAttribute(Species, related_name='observable_expressions')
    observables = ManyToManyAttribute('Observable', related_name='observable_expressions')

    class Meta(obj_tables.Model.Meta, Expression.Meta):
        table_format = TableFormat.cell
        expression_term_models = ('Species', 'Observable')
        expression_is_linear = True
        expression_unit_registry = unit_registry

    def serialize(self):
        """ Generate string representation

        Returns:
            :obj:`str`: string representation
        """
        return Expression.serialize(self)

    @classmethod
    def deserialize(cls, value, objects):
        """ Deserialize value

        Args:
            value (:obj:`str`): String representation
            objects (:obj:`dict`): dictionary of objects, grouped by model

        Returns:
            :obj:`tuple` of :obj:`ObservableExpression`, `InvalidAttribute` or `None`:
                tuple of cleaned value and cleaning error
        """
        return Expression.deserialize(cls, value, objects)


class Observable(KnowledgeBaseObject):
    """ Observable: a linear function of other Observables and Species

    Attributes:
        cell (:obj:`Cell`): cell
        expression (:obj:`ObservableExpression`): mathematical expression for an Observable
        units (:obj:`unit_registry.Unit`): units of expression
        references (:obj:`list` of :obj:`Reference`): references
        identifiers (:obj:`list` of :obj:`Identifier`): identifiers

    Related attributes:
        observable_expressions (:obj:`list` of :obj:`ObservableExpression`): observable expressions
        rate_law_expressions (:obj:`list` of :obj:`RateLawExpression`): rate law expressions
    """

    cell = ManyToOneAttribute(Cell, related_name='observables')
    expression = ManyToOneExpressionAttribute(ObservableExpression, related_name='observable',
                                              min_related=1, min_related_rev=1)
    units = obj_tables.sci.units.UnitAttribute(unit_registry,
                          choices=(unit_registry.parse_units('molecule'),),
                          default=unit_registry.parse_units('molecule'))
    references = obj_tables.ManyToManyAttribute(Reference, related_name='observables')
    identifiers = IdentifierAttribute(related_name='observables')

    class Meta(obj_tables.Model.Meta, ExpressionExpressionTermMeta):
        attribute_order = ('id', 'name', 'expression', 'units', 'identifiers', 'references', 'comments')
        expression_term_model = ObservableExpression
        expression_term_units = 'units'

    def deserialize(self, value, objects, decoded=None):
        """ Deserialize value

        Args:
            value (:obj:`str`): String representation
            objects (:obj:`dict`): dictionary of objects, grouped by model
            decoded (:obj:`dict`, optional): dictionary of objects that have already been decoded

        Returns:
            :obj:`tuple` of :obj:`ObservableExpression`, `InvalidAttribute` or `None`:
                tuple of cleaned value and cleaning error
        """
        return expression.deserialize()


class Parameter(KnowledgeBaseObject):
    """ Knowledge of parameters

    Attributes:
        cell (:obj:`Cell`): cell
        value (:obj:`float`): value
        error (:obj:`float`): measurement error
        units (:obj:`unit_registry.Unit`): units of value
        evidence (:obj:`list` of :obj:`Evidence`): evidence
        references (:obj:`list` of :obj:`Reference`): references
        identifierss (:obj:`list` of :obj:`DatabaseReference`): reference in external namespaces

    Related attributes:
        rate_law_expressions (:obj:`list` of :obj:`RateLawExpression`): rate law expressions that use a Parameter
    """

    cell = obj_tables.ManyToOneAttribute(Cell, related_name='parameters')
    value = FloatAttribute(min=0)
    error = FloatAttribute(min=0)
    units = obj_tables.sci.units.UnitAttribute(unit_registry, none=True)
    references = obj_tables.ManyToManyAttribute(Reference, related_name='parameters')
    evidence = obj_tables.OneToManyAttribute('Evidence', related_name='parameters')
    identifiers = IdentifierAttribute(related_name='parameters')

    class Meta(obj_tables.Model.Meta):
        attribute_order = ('id', 'name', 'synonyms', 'value', 'units', 'evidence', 'identifiers', 'references', 'comments')
        expression_term_token_pattern = (token.NAME, )


class Validator(obj_tables.Validator):
    def run(self, knowledge_base, get_related=True):
        """ Validate a knowledge_base and return its errors

        Args:
            knowledge_base (:obj:`KnowledgeBase`): knowledge base
            get_related (:obj:`bool`, optional): if true, get all related objects

        Returns:
            :obj:`InvalidObjectSet` or `None`: list of invalid objects/models and their errors
        """
        return super(Validator, self).run(knowledge_base, get_related=get_related)


#####################
#####################
# Species types


class MetaboliteSpeciesType(SpeciesType):
    """ Knowledge of a metabolite

    Attributes:
        synonyms (:obj:`str`): synonyms
        type (:obj:`pronto`): type

    """
    synonyms = obj_tables.LongStringAttribute()
    type = obj_tables.sci.onto.OntoTermAttribute(kbOnt,
                                  terms = kbOnt['WC:metabolite'].subclasses(),
                                  none = True)

    class Meta(obj_tables.Model.Meta):
        verbose_name = 'Metabolite'
        attribute_order = ('id', 'name', 'synonyms', 'type', 'identifiers', 'references', 'comments')

    def get_structure(self):
        """ Get the structure

        Returns:
            :obj:`str`: InChI or SMILES structure

        Raises:
            :obj:`ValueError`: if structure has not been provided
        """
        structure = self.properties.get_one(property='structure')
        if structure:
            return structure.get_value()            
        else:    
            raise ValueError('The structure of {} has not been provided'.format(self.id))
                
    def calc_structure(self, ph=7.4, major_tautomer=False, keep_hydrogens=False, dearomatize=False):
        """ Get the major microspecies

        Args:
            pH (:obj:`float`, optional): pH, default is 7.4
            major_tautomer (:obj:`bool`, optional): if :obj:`True`, use the major tautomeric in the calculation
            keep_hydrogens (:obj:`bool`, optional): if :obj:`True`, keep explicity defined hydrogens
            dearomatize (:obj:`bool`, optional): if :obj:`True`, dearomatize molecule

        Returns:
            :obj:`str`: InChI-encoded structure
        """
        structure_str = self.get_structure()
        if 'InChI=' in structure_str:
            return get_major_micro_species(structure_str, 'inchi', 'inchi', 
                ph=ph, major_tautomer=major_tautomer, keep_hydrogens=keep_hydrogens, dearomatize=dearomatize)
        else:
            return get_major_micro_species(structure_str, 'smiles', 'smiles', 
                ph=ph, major_tautomer=major_tautomer, keep_hydrogens=keep_hydrogens, dearomatize=dearomatize)    

    def to_openbabel_mol(self):
        """ Convert species type to an Open Babel molecule

        Returns:
            :obj:`openbabel.OBMol`: Open Babel molecule
        """
        structure_str = self.get_structure()
        structure_type = 'inchi' if 'InChI=' in structure_str else 'smi'
        mol = openbabel.OBMol()
        obConversion = openbabel.OBConversion()
        obConversion.SetInFormat(structure_type)
        obConversion.ReadString(mol, structure_str)

        return mol

    def get_empirical_formula(self):
        """ Get the empirical formula

        Returns:
            :obj:`chem.EmpiricalFormula`: empirical formula
        """
        prop = self.properties.get_one(property='empirical_formula')
        if prop:
            return chem.EmpiricalFormula(prop.get_value())

        return self.calc_empirical_formula()

    def calc_empirical_formula(self):
        """ Calculate the empirical formula

        Returns:
            :obj:`chem.EmpiricalFormula`: empirical formula
        """
        mol = self.to_openbabel_mol()
        return OpenBabelUtils.get_formula(mol)

    def get_charge(self):
        """ Get the charge

        Returns:
            :obj:`int`: charge
        """
        prop = self.properties.get_one(property='charge')
        if prop:
            return prop.get_value()

        return self.calc_charge()

    def calc_charge(self):
        """ Calculate the charge

        Returns:
            :obj:`int`: charge
        """
        mol = self.to_openbabel_mol()
        return mol.GetTotalCharge()

    def get_mol_wt(self):
        """ Get the molecular weight

        Returns:
            :obj:`float`: molecular weight

        Raises:
            :obj:`ValueError`: if there is not enough information to calculate molecular weight
        """
        prop = self.properties.get_one(property='empirical_formula')
        if prop:
            return chem.EmpiricalFormula(prop.get_value()).get_molecular_weight()

        elif self.properties.get_one(property='structure'):
            mol = self.to_openbabel_mol()
            return mol.GetMolWt()
        
        else:
            raise ValueError('Molecular weight cannot be calculated because no structure or '
                'empirical formula has been provided for {}'.format(self.id))
      

class DnaSpeciesType(PolymerSpeciesType):
    """ Knowledge of a DNA species

    Attributes:
        seq_path (:obj:`str`): path to sequence fasta file
        ploidy (:obj:`int`): ploidy
    """

    sequence_path = obj_tables.StringAttribute()
    ploidy = obj_tables.IntegerAttribute(min=0)

    class Meta(obj_tables.Model.Meta):
        verbose_name = 'Chromosome'
        attribute_order = ('id', 'name', 'sequence_path', 'circular', 'double_stranded',
                           'ploidy', 'identifiers', 'references', 'comments')

    def get_seq(self, start=None, end=None):
        """ Get the sequence

        Args:
            start (:obj:`int`, optional): start coordinate of the queried subsequence,
                default is the start of the full sequence
            end (:obj:`int`, optional): end coordinate of the queried subsequence,
                default is the end of the full sequence

        Returns:
            :obj:`Bio.Seq.Seq`: structure
        """

        seq_idx = Fasta(self.sequence_path, as_raw=True)
        start = start or 1
        end = end or len(seq_idx[self.id][:])

        seq = seq_idx[self.id][start-1:end]

        return Bio.Seq.Seq(seq, alphabet=Bio.Alphabet.DNAAlphabet())

    def get_empirical_formula(self):
        """ Get the empirical formula for a DNA molecule with

        * 5' monophosphate (for linear molecules)
        * Deprotonated phosphate oxygens

        * Linear DNA

            :math:`N_A * dAMP + N_C * dCMP + N_G * dGMP + N_T * dTMP - (L - 1) * OH`

        * Circular DNA

            :math:`N_A * dAMP + N_C * dCMP + N_G * dGMP + N_T * dTMP - L * OH`

        N's in the sequence will be distributed into the four bases by preserving the original ratio

        Returns:
           :obj:`chem.EmpiricalFormula`: empirical formula
        """
        seq = self.get_seq()
        n_a = seq.upper().count('A')
        n_c = seq.upper().count('C')
        n_g = seq.upper().count('G')
        n_t = seq.upper().count('T')
        n_n = seq.upper().count('N')

        l = len(seq)
        known_bases = n_a + n_c + n_g + n_t
        n_a += round(n_a / known_bases * n_n)
        n_c += round(n_c / known_bases * n_n)
        n_g += round(n_g / known_bases * n_n)
        n_t = l - (n_a + n_c + n_g)

        if self.double_stranded:
            n_a = n_a + n_t
            n_t = n_a
            n_c = n_c + n_g
            n_g = n_c

        formula = chem.EmpiricalFormula()
        formula.C = 10 * n_a + 9 * n_c + 10 * n_g + 10 * n_t
        formula.H = 12 * n_a + 12 * n_c + 12 * n_g + 13 * n_t - \
            (l - 1 + self.circular) * (1 + self.double_stranded)
        formula.N = 5 * n_a + 3 * n_c + 5 * n_g + 2 * n_t
        formula.O = 6 * n_a + 7 * n_c + 7 * n_g + 8 * n_t - \
            (l - 1 + self.circular) * (1 + self.double_stranded)
        formula.P = n_a + n_c + n_g + n_t

        return formula

    def get_charge(self):
        """ Get the charge for a DNA molecule with

        * 5' monophosphate (for linear molecules)
        * Deprotonated phosphate oxygens

        * Linear DNA

            :math:`-L - 1`

        * Circular DNA

            :math:`-L`

        Returns:
            :obj:`int`: charge
        """
        return (-self.get_len() - 1 + self.circular) * (1 + self.double_stranded)

    def get_mol_wt(self):
        """ Get the molecular weight for a DNA molecule with

        * 5' monophosphate (for linear molecules)
        * Deprotonated phosphate oxygens

        * Linear DNA

            :math:`N_A * MW_{dAMP} + N_C * MW_{dCMP} + N_G * MW_{dGMP} + N_T * MW_{dTMP} - (L - 1) * MW_{OH}`

        * Circular DNA

            :math:`N_A * MW_{dAMP} + N_C * MW_{dCMP} + N_G * MW_{dGMP} + N_T * MW_{dTMP} - L * MW_{OH}`

        Returns:
            :obj:`float`: molecular weight
        """
        return self.get_empirical_formula().get_molecular_weight()


class ComplexSpeciesType(SpeciesType):
    """ Knowledge of a protein complex

    Attributes:
        formation_process (:obj:`pronto`): type of formation process
        subunits (:obj:`list` of :obj:`SpeciesTypeCoefficient`): subunits
        type (:obj:`pronto`): type of complex formation

    """

    subunits = SubunitAttribute(related_name='complexes')
    type = obj_tables.sci.onto.OntoTermAttribute(kbOnt,
                                  terms = kbOnt['WC:complex'].subclasses(),
                                  none=True)
    formation_process  = obj_tables.sci.onto.OntoTermAttribute(kbOnt,
                                  terms = kbOnt['WC:complexFormation'].subclasses(),
                                  none=True)

    class Meta(obj_tables.Model.Meta):
        verbose_name = 'Complex'
        attribute_order = ('id', 'name', 'synonyms', 'type', 'formation_process', 'subunits',
                           'identifiers', 'references', 'comments')

    def get_empirical_formula(self):
        """ Get the empirical formula

        Returns:
            :obj:`chem.EmpiricalFormula`: empirical formula
        """
        # Formula addition
        formula = chem.EmpiricalFormula()
        for subunit in self.subunits:
            for coeff in range(0, abs(int(subunit.coefficient))):
                formula = formula + subunit.species_type.get_empirical_formula()

        return formula

    def get_charge(self):
        """ Get the charge at physiological pH

        Returns:
            :obj:`int`: charge
        """
        charge = 0
        for subunit in self.subunits:
            charge += abs(subunit.coefficient)*subunit.species_type.get_charge()

        return charge

    def get_mol_wt(self):
        """ Get the molecular weight

        Returns:
            :obj:`float`: molecular weight
        """
        weight = 0
        for subunit in self.subunits:
            weight += abs(subunit.coefficient)*subunit.species_type.get_mol_wt()

        return weight


#####################
#####################
# Reactions and related classes

class RateLawDirection(int, CaseInsensitiveEnum):
    """ Rate law directions """
    backward = -1
    forward = 1


class RateLawExpression(obj_tables.Model, Expression):
    """ Rate law expression

    Attributes:
        expression (:obj:`str`): mathematical expression of the rate law
        parameters (:obj:`list` of :obj:`Parameter`): parameters whose values are used in the rate law
        species (:obj:`list` of :obj:`Species`): species whose dynamic concentrations are used in the rate law
        observables (:obj:`list` of :obj:`Observable`): observables whose values are used in the rate law

    Related attributes:
        rate_law (:obj:`RateLaw`): the `RateLaw` which uses this `RateLawExpression`
    """
    expression = LongStringAttribute(primary=True, unique=True, default='')
    parameters = ManyToManyAttribute('Parameter', related_name='rate_law_expressions')
    species = ManyToManyAttribute(Species, related_name='rate_law_expressions')
    observables = ManyToManyAttribute(Observable, related_name='rate_law_expressions')

    class Meta(obj_tables.Model.Meta, Expression.Meta):
        attribute_order = ('expression', 'parameters', 'species', 'observables')
        table_format = TableFormat.cell
        ordering = ('expression',)
        expression_term_models = ('Parameter', 'Species', 'Observable')
        expression_unit_registry = unit_registry

    def serialize(self):
        """ Generate string representation
        Returns:
            :obj:`str`: value of primary attribute
        """
        return Expression.serialize(self)

    @classmethod
    def deserialize(cls, value, objects):
        """ Deserialize value
        Args:
            value (:obj:`str`): String representation
            objects (:obj:`dict`): dictionary of objects, grouped by model
        Returns:
            :obj:`tuple` of :obj:`RateLawExpression`, `InvalidAttribute` or `None`: tuple of cleaned value
                and cleaning error
        """
        return Expression.deserialize(cls, value, objects)


class RateLaw(KnowledgeBaseObject):
    """ Rate law

    Attributes:
        reaction (:obj:`Reaction`): reaction
        direction (:obj:`RateLawDirection`): direction
        expression (:obj:`RateLawExpression`): expression
        units (:obj:`unit_registry.Unit`): units
        references (:obj:`list` of :obj:`Reference`): references
        identifiers (:obj:`list` of :obj:`Identifier`): identifiers
    """
    reaction = ManyToOneAttribute('Reaction', related_name='rate_laws')
    expression = ManyToOneExpressionAttribute(RateLawExpression, min_related=1, min_related_rev=1, related_name='rate_laws')
    units = obj_tables.sci.units.UnitAttribute(unit_registry,
                          choices=(unit_registry.parse_units('s^-1'),),
                          default=unit_registry.parse_units('s^-1'))
    references = obj_tables.ManyToManyAttribute(Reference, related_name='rate_laws')
    identifiers = IdentifierAttribute(related_name='rate_laws')
    direction = EnumAttribute(RateLawDirection, default=RateLawDirection.forward)


    class Meta(obj_tables.Model.Meta, ExpressionExpressionTermMeta):
        attribute_order = ('id', 'reaction', 'direction', 'expression', 'units', 'identifiers', 'references', 'comments')
        expression_term_model = RateLawExpression
        expression_term_units = 'units'

    def gen_id(self):
        """ Generate identifier
        Returns:
            :obj:`str`: identifier
        """
        return '{}_{}'.format(self.reaction.id, self.direction.name)

    def deserialize(self, value, objects, decoded=None):
        """ Deserialize value

        Args:
            value (:obj:`str`): String representation
            objects (:obj:`dict`): dictionary of objects, grouped by model
            decoded (:obj:`dict`, optional): dictionary of objects that have already been decoded

        Returns:
            :obj:`tuple` of :obj:`ObservableExpression`, `InvalidAttribute` or `None`:
                tuple of cleaned value and cleaning error
        """
        return expression.deserialize()


class Reaction(KnowledgeBaseObject):
    """ Knowledge of reactions

    Attributes:
        cell (:obj:`Cell`): cell
        participants (:obj:`list` of :obj:`SpeciesCoefficient`): participants
        reversible (:obj:`boolean`): denotes whether reaction is reversible
        references (:obj:`list` of :obj:`Reference`): references
        identifiers (:obj:`list` of :obj:`Identifier`): identifiers
        evidence (:obj:`list` of :obj:`Evidence`): evidence
        enzymes (:obj:`list` of :obj:`SpeciesType`): enzymes
        coenzymes (:obj:`list` of :obj:`SpeciesType`): coenzymes
        spontaneous (:obj:`bool`): spontaneity
        parameters (:obj:`Parameter`): parameters
        type (:obj:`pronto`): type

    Related attributes:
        rate_laws (:obj:`list` of :obj:`RateLaw`): rate laws; if present, rate_laws[0] is the forward
            rate law, and rate_laws[1] is the backward rate law
    """

    cell = obj_tables.ManyToOneAttribute(Cell, related_name='reactions')
    participants = ReactionParticipantAttribute(related_name='reactions')
    reversible = obj_tables.BooleanAttribute()
    references = obj_tables.ManyToManyAttribute(Reference, related_name='reactions')
    identifiers = IdentifierAttribute(related_name='reactions')
    evidence = obj_tables.OneToManyAttribute('Evidence', related_name='reactions')
    enzymes = obj_tables.ManyToManyAttribute(SpeciesType, related_name='reactions')
    coenzymes = obj_tables.ManyToManyAttribute(SpeciesType, related_name='reactions')
    spontaneous = obj_tables.BooleanAttribute()
    parameters = obj_tables.OneToManyAttribute('Parameter', related_name='reactions')
    type = obj_tables.sci.onto.OntoTermAttribute(kbOnt,
                                  terms = kbOnt['WC:reaction'].subclasses(),
                                  none=True)

    class Meta(obj_tables.Model.Meta):
        attribute_order = ('id', 'name', 'synonyms', 'type', 'participants', 'enzymes', 'coenzymes',
                           'reversible', 'spontaneous', 'parameters', 'evidence', 'identifiers', 'references', 'comments')

#####################
#####################
# Expansion classes

class ChromosomeFeature(PolymerLocus):
    """ Knowledge of chromosome features

    Attributes:
        cell (:obj:`Cell`): cell
        value (:obj:`float`): value
        error (:obj:`float`): measurement error
        units (:obj:`unit_registry.Unit`): units of value
        references (:obj:`list` of :obj:`Reference`): references
        identifiers (:obj:`list` of :obj:`Identifier`): identifiers

    Related attributes:
        seq_path (:obj:`str`): path to sequence fasta file
        ploidy (:obj:`int`): ploidy
    """

    coordinate = obj_tables.IntegerAttribute(min=0)
    start = obj_tables.IntegerAttribute(min=0)
    end = obj_tables.IntegerAttribute(min=0)
    intensity = obj_tables.FloatAttribute(min=0)
    unit = obj_tables.sci.units.UnitAttribute(unit_registry, none=True)
    polymer = obj_tables.ManyToOneAttribute('DnaSpeciesType', related_name='chromosome_features')
    evidence   = obj_tables.OneToManyAttribute('Evidence', related_name='chromosome_features')
    identifiers = IdentifierAttribute(related_name='chromosome_features')
    references = obj_tables.ManyToManyAttribute('Reference', related_name='chromosome_features')
    type = obj_tables.sci.onto.OntoTermAttribute(kbOnt,
                                  terms = kbOnt['WC:chromosomeFeature'].subclasses(),
                                  none=True)

    class Meta(obj_tables.Model.Meta):
        attribute_order = ('id', 'name', 'type', 'polymer', 'start', 'end',
                            'intensity', 'unit', 'evidence', 'identifiers', 'references', 'comments')
        expression_term_token_pattern = (token.NAME, )

    def get_direction(self):
        """ Returns the direction of chromosome feature

            Returns:
                :obj:`PolymerDirection`: direction (in ['forward', 'reverse'])
        """

        if self.start < self.end:
            return PolymerDirection.forward
        elif self.start > self.end:
            return PolymerDirection.reverse
        elif self.start == self.end:
            raise ValueError('Start and end position of chromosome feature can not be the same (Chrom feature id: {}).'.format(self.id))


class Evidence(KnowledgeBaseObject):
    """ Represents the measurement / observation of a property

        Attributes:
            id (:obj:`str`): identifier
            cell (:obj:`Cell`): cell
            object (:obj:`str`): object
            property (:obj:`str`): property
            value (:obj:`float`): value
            units (:obj:`Units`): units
            identifiers(:obj:`list` of :obj:`Identifier`): identifiers
            references (:obj:`list` of :obj:`Reference`): references
            experiment (:obj:`Experiment`): experiment
            comments(:obj:`str`): comments

        Related attributes:

    """

    cell = obj_tables.ManyToOneAttribute('Cell', related_name='evidence')
    object   =  obj_tables.StringAttribute()
    property = obj_tables.StringAttribute()
    value = obj_tables.FloatAttribute()
    units = obj_tables.sci.units.UnitAttribute(unit_registry, none=True) # False allows None units
    identifiers = IdentifierAttribute(related_name='evidence')
    references = obj_tables.ManyToManyAttribute('Reference', related_name='evidence')
    experiment = obj_tables.ManyToOneAttribute('Experiment', related_name ='evidence')
    comments = obj_tables.LongStringAttribute()

    class Meta(obj_tables.Model.Meta):
        attribute_order = ('id', 'cell', 'object', 'property', 'value', 'units', 'experiment', 'identifiers', 'references', 'comments')


class Experiment(KnowledgeBaseObject):
    """ Represents an experiment in which a property was measured

        Attributes:
            id (:obj:`str`): identifier
            species (:obj:`str`): species
            genetic_variant (:obj:`str`): genetic_variant
            external_media (:obj:`str`): external_media
            temperature (:obj:`float`): temperature
            temperature_units (:obj:`Units`): temperature_units
            ph (:obj:`float`): pH
            experiment_design (:obj:`str`): experimental design
            measurement_technology (:obj:`str`): measurement technology
            analysis_type (:obj:`str`): analysis type
            identifiers(:obj:`list` of :obj:`Identifier`): identifiers
            references (:obj:`list` of :obj:`Reference`): references
            comments(:obj:`str`): comments

        Related attributes:
    """

    species = obj_tables.StringAttribute()
    genetic_variant = obj_tables.StringAttribute()
    external_media  = obj_tables.StringAttribute()
    temperature    = obj_tables.FloatAttribute()
    temperature_units = obj_tables.sci.units.UnitAttribute(unit_registry,
                        choices=(unit_registry.parse_units('F'),
                                 unit_registry.parse_units('C'),
                                 unit_registry.parse_units('K')),
                        default= unit_registry.parse_units('C'))
    ph = obj_tables.FloatAttribute()
    experiment_design = obj_tables.StringAttribute()
    measurement_technology = obj_tables.StringAttribute()
    analysis_type = obj_tables.StringAttribute()
    identifiers = IdentifierAttribute(related_name='experiments')
    references = obj_tables.ManyToManyAttribute('Reference', related_name='experiment')
    comments = obj_tables.LongStringAttribute()

    class Meta(obj_tables.Model.Meta):
        attribute_order = ('id', 'experiment_design', 'measurement_technology', 'analysis_type', 'species', 'genetic_variant', 'external_media',
                           'temperature', 'temperature_units', 'ph', 'identifiers', 'references', 'comments')


class SpeciesTypeProperty(KnowledgeBaseObject):
    """ Knowledge of the properties of species types

        Attributes:
            species_type (:obj:`SpeciesType`): species type
            property (:obj:`str`): name of property
            units (:obj:`unit_registry`): units
            value (:obj:`str`): value
            identifiers (:obj:`list` of :obj:`Identifier`): identifiers
            references (:obj:`list` of :obj:`Reference`): references
            evidence (:obj:`list` of :obj:`Evidence`): evidence
            value_type (:obj:`pronto`): value type
    """
    species_type = ManyToOneAttribute(SpeciesType, related_name='properties') #Do we want min_related=1?
    property = obj_tables.StringAttribute()
    units = obj_tables.sci.units.UnitAttribute(unit_registry, none=True)
    value = obj_tables.LongStringAttribute()
    identifiers = IdentifierAttribute(related_name='properties')
    references = ManyToManyAttribute(Reference, related_name='properties')
    evidence = obj_tables.OneToManyAttribute(Evidence, related_name='properties')
    value_type = obj_tables.sci.onto.OntoTermAttribute(kbOnt,
                                terms = kbOnt['WC:valueType'].subclasses(),
                                default = kbOnt['WC:float'],
                                none=False)

    class Meta(obj_tables.Model.Meta):
        verbose_name_plural = 'Species type properties'
        unique_together = (('species_type', 'property', ), )
        attribute_order = ('id', 'species_type', 'property', 'value', 'value_type', 'units', 'evidence',
                           'identifiers', 'references', 'comments')

    def gen_id(self):
        """ Generate id
        Returns:
            :obj:`str`: identifier
        """
        return 'PROP({}:{})'.format(self.species_type.id, self.property)

    def get_value(self):
        """ SpeciesType property values are stored as strings, this function returns the value as the correct type. """

        if self.value == '':
            return None

        if are_terms_equivalent(self.value_type, kbOnt['WC:boolean']):
            return bool(self.value)
        elif are_terms_equivalent(self.value_type, kbOnt['WC:string']):
            return self.value
        elif are_terms_equivalent(self.value_type, kbOnt['WC:integer']):
            return int(self.value)
        elif are_terms_equivalent(self.value_type, kbOnt['WC:float']):
            return float(self.value)
        else:
            raise ValueError('SpeciesTypeProperty "{}" has unexpected value type "{}".'.format(self.id, self.value_type))