wc_model_gen/prokaryote/metabolism.py from KarrLab/wc_model_gen

wc_model_gen/prokaryote/metabolism.py
Summary

Maintainability

5 days
Test Coverage

86%
Issues
Coverage
""" Generator for metabolism submodels based on KBs for random in silico organisms

:Author: Balazs Szigeti <balazs.szigeti@mssm.edu>
:Author: Jonathan Karr <karr@mssm.edu>
:Author: Ashwin Srinivasan <ashwins@mit.edu>
:Author: Arthur Goldberg <Arthur.Goldberg@mssm.edu>
:Date: 2018-06-11
:Copyright: 2018, Karr Lab
:License: MIT

TODO: improve terminology to better distinguish this and the metabolism species generation
"""

from scipy.constants import Avogadro
from wc_onto import onto as wc_ontology
from wc_utils.util.units import unit_registry
import numpy
import wc_lang
import wc_kb
import wc_model_gen


class MetabolismSubmodelGenerator(wc_model_gen.SubmodelGenerator):
    """ Generator for metabolism submodel """

    # self.reaction_scale is the number of molecules transfered / converted with each reaction.
    # Increasing the number reduces the number of metabolic reactions, significantly reducing simulation time
    reaction_scale = 100

    def gen_reactions(self):
        """ Generate reactions assocated with min model

        Raises:
            :obj:`ValueError:` if any phosphate species are missing from the model
        """

        # Get species, model components
        cell = self.knowledge_base.cell
        model = self.model
        submodel = model.submodels.get_one(id='metabolism')
        c = model.compartments.get_one(id='c')
        e = model.compartments.get_one(id='e')
        h_type = model.species_types.get_one(id='h')
        h2o = model.species_types.get_one(id='h2o').species.get_one(compartment=c)
        h = model.species_types.get_one(id='h').species.get_one(compartment=c)
        pi = model.species_types.get_one(id='pi').species.get_one(compartment=c)
        adp = model.species_types.get_one(id='adp').species.get_one(compartment=c)

        # Get species involved in reaction
        mpps = {}
        for id in ['ump', 'amp', 'cmp', 'gmp']:
            mpps[id] = model.species_types.get_one(id=id)

        dpps = {}
        for id in ['udp', 'adp', 'cdp', 'gdp']:
            dpps[id] = model.species_types.get_one(id=id)

        tpps = {}
        for id in ['utp', 'atp', 'ctp', 'gtp']:
            tpps[id] = model.species_types.get_one(id=id)

        # Confirm that all phosphate species were found
        missing = []
        for d in [tpps, dpps, mpps]:
            for id, pps_type in d.items():
                if pps_type is None:
                    missing.append(id)
        if missing:
            raise ValueError("'{}' not found in model.species".format(', '.join(missing)))

        # Generate reactions associated with nucleophosphate maintenance
        for dpp, tripp in zip(dpps.values(), tpps.values()):

            # get/create species
            dpp_e = model.species.get_or_create(species_type=dpp,   compartment=e)
            dpp_c = model.species.get_or_create(species_type=dpp,   compartment=c)
            tripp_c = model.species.get_or_create(species_type=tripp, compartment=c)
            dpp_e.id = dpp_e.gen_id()
            dpp_c.id = dpp_c.gen_id()
            tripp_c.id = tripp_c.gen_id()

            # Create transfer reaction
            rxn = model.reactions.get_or_create(submodel=submodel, id='transfer_'+dpp.id)
            rxn.participants = []
            rxn.participants.add(dpp_e.species_coefficients.get_or_create(coefficient=-self.reaction_scale))
            rxn.participants.add(dpp_c.species_coefficients.get_or_create(coefficient=self.reaction_scale))

            # Create conversion reactions
            rxn = model.reactions.get_or_create(submodel=submodel, id='conversion_'+dpp.id+'_'+tripp.id)
            rxn.participants = []
            rxn.participants.add(dpp_c.species_coefficients.get_or_create(coefficient=-self.reaction_scale))
            rxn.participants.add(pi.species_coefficients.get_or_create(coefficient=-self.reaction_scale))
            # rxn.participants.add(h.species_coefficients.get_or_create(coefficient=-self.reaction_scale))

            rxn.participants.add(tripp_c.species_coefficients.get_or_create(coefficient=self.reaction_scale))
            # rxn.participants.add(h.species_coefficients.get_or_create(coefficient=self.reaction_scale))
            rxn.participants.add(h2o.species_coefficients.get_or_create(coefficient=self.reaction_scale))

        # Generate reactions associated with tRna/AA maintenece
        for observable_kb in cell.observables:
            if observable_kb.id[0:5] == 'tRNA_':
                rxn = model.reactions.get_or_create(submodel=submodel, id='transfer_'+observable_kb.name)
                rxn.participants = []

                for tRNA_specie_kb in observable_kb.expression.species:
                    tRNA_specie_type_model = model.species_types.get_one(id=tRNA_specie_kb.species_type.id)
                    tRNA_model = tRNA_specie_type_model.species.get_one(compartment=c)

                    trna_e = model.species.get_or_create(
                        species_type=tRNA_model.species_type,
                        compartment=e)
                    trna_c = model.species.get_or_create(
                        species_type=tRNA_model.species_type,
                        compartment=c)
                    trna_e.id = trna_e.gen_id()
                    trna_c.id = trna_c.gen_id()
                    rxn.participants.add(trna_e.species_coefficients.get_or_create(coefficient=-self.reaction_scale))
                    rxn.participants.add(trna_c.species_coefficients.get_or_create(coefficient=self.reaction_scale))

        # Generate reactions associated with H maintenece
        rxn = model.reactions.get_or_create(submodel=submodel, id='transfer_h')
        rxn.participants = []

        h_e = model.species.get_or_create(species_type=h_type, compartment=e)
        h_c = model.species.get_or_create(species_type=h_type, compartment=c)
        h_e.id = h_e.gen_id()
        h_c.id = h_c.gen_id()
        rxn.participants.add(h_e.species_coefficients.get_or_create(coefficient=-self.reaction_scale))
        rxn.participants.add(h_c.species_coefficients.get_or_create(coefficient=self.reaction_scale))

    def gen_rate_laws(self):
        """ Generate rate laws associated with min metabolism model

            Raises:
                :obj:`ValueError:` if any phosphate species are missing from the model
                :obj:`Exception:` if there is a reaction with unexpected ID
        """
        cell = self.knowledge_base.cell
        model = self.model
        submodels = self.model.submodels
        submodel = model.submodels.get_one(id='metabolism')
        c = model.compartments.get_one(id='c')
        e = model.compartments.get_one(id='e')
        volume = cell.parameters.get_one(id='mean_volume').value
        cc_length = cell.parameters.get_one(id='mean_doubling_time').value

        # Calculate rates
        mpp_transfer_rate = self.calc_mpp_transfer_rate()
        mpp_conversion_rate = self.calc_mpp_conversion_rate() + mpp_transfer_rate
        aa_transfer_rate = self.calc_aa_transfer_rate()
        h_transfer_rate = self.calc_h_transfer_rate()

        # Loop through reactions and set rate laws
        for rxn in submodel.reactions:

            # Reactions imported from KB have rate laws defined
            if rxn.id.endswith('_kb'):
                continue

            rate_law = model.rate_laws.create(
                reaction=rxn,
                direction=wc_lang.RateLawDirection.forward)
            rate_law.id = rate_law.gen_id()

            param = model.parameters.create(id='k_cat_{}'.format(rxn.id),
                                            type=wc_ontology['WC:k_cat'],
                                            units=unit_registry.parse_units('s^-1'))

            # Rates for transfer reactions from extracellular space
            if rxn.id[0:9] == 'transfer_':
                # tRNA transfer
                if rxn.id[-8:-4] == 'tRNA':
                    param.value = 0.0000000000589232

                # Monophosphate reaction
                elif rxn.id[-2:] == 'dp':
                    param.value = mpp_transfer_rate

                # Hydrogen transfer
                elif rxn.id[-1:] == 'h':
                    param.value = h_transfer_rate

                else:
                    raise Exception('{}: invalid reaction id, no associated rate law is defined.'.format(rxn.id))

            elif rxn.id[0:11] == 'conversion_':
                param.value = mpp_conversion_rate

            else:
                raise Exception('{}: invalid reaction id, no associated rate law is defined.'.format(rxn.id))

            rate_law.expression, error = wc_lang.RateLawExpression.deserialize(param.id, {
                wc_lang.Parameter: {param.id: param},
            })
            assert error is None, str(error)

        # Apply correction terms if translation submodel is present
        # if model.submodels.get_one(id='translation'):
        #    gtp_corr_rate = self.calc_gtp_corr_rate()
        #    model.parameters.get_one(id='k_cat_transfer_gmp').value += gtp_corr_rate
        #    model.parameters.get_one(id='k_cat_conversion_gmp_gtp').value += gtp_corr_rate

    """ Auxiliary functions """

    def calc_h_transfer_rate(self):
        """ Calculates the rate of H transfer from the extracellular space """
        kb = self.knowledge_base

        # Calculate # of H needed on avg per transcription reaction
        avg_H_per_transcription = self.calc_h_per_transcript()

        # Calculate avg # of transcription reactions over CC = # of initial RNAs + # of degrad rxns
        rnas_kb = kb.cell.species_types.get(__type=wc_kb.prokaryote.RnaSpeciesType)
        n_degrad_rxns = self.calc_rna_degrad_rxns()
        n_rnas = self.calc_rna_copy_num() * len(rnas_kb)
        n_transcription_rxns = n_rnas + n_degrad_rxns

        # Calculate the # of H molecules needed to be transfered over the CC
        n_h_transfer = (n_transcription_rxns - n_degrad_rxns) * avg_H_per_transcription

        # Each transfer reactions transports 10 TPs, thus the final /10
        cc_length = kb.cell.parameters.get_one(id='mean_doubling_time').value
        h_transfer_rate = n_h_transfer / cc_length / self.reaction_scale
        return h_transfer_rate

    def calc_mpp_transfer_rate(self):
        """ Calculates the rate of monophosphate transfer from the extracellular space """

        # Calculate # of new TPP molecules needed = # of TPP molecules tied up in RNAs at T=0
        avg_tpp_per_rna = self.calc_tpp_per_rna()
        avg_rna_copy_num = self.calc_rna_copy_num()

        rnas_kb = self.knowledge_base.cell.species_types.get(__type=wc_kb.prokaryote.RnaSpeciesType)
        tpp_in_cell = avg_tpp_per_rna*avg_rna_copy_num*len(rnas_kb)

        cc_length = self.knowledge_base.cell.parameters.get_one(id='mean_doubling_time').value
        mpp_transfer_rate = (tpp_in_cell/cc_length/self.reaction_scale)

        return mpp_transfer_rate

    def calc_aa_transfer_rate(self):
        """ Calculates the rate of monophosphate transfer from the extracellular space """

        # Calculate # of new AA molecules needed = # of AA molecules tied up in prots at t=0
        avg_aa_per_prot = self.calc_aa_per_prot()
        avg_prot_copy_num = self.calc_prot_copy_num()

        prots_kb = self.knowledge_base.cell.species_types.get(__type=wc_kb.prokaryote.ProteinSpeciesType)
        aa_in_cell = avg_aa_per_prot*avg_prot_copy_num*len(prots_kb)

        cc_length = self.knowledge_base.cell.parameters.get_one(id='mean_doubling_time').value
        aa_transfer_rate = (aa_in_cell/cc_length/self.reaction_scale)

        return aa_transfer_rate

    def calc_mpp_conversion_rate(self):
        """ Calculates the rate of conversion from mono- to triphosphate molecules """
        # Calculate expected # of degradation reactions over CC
        n_rna_deg_rxns = self.calc_rna_degrad_rxns()

        # Calculate avg # of mpps produced per degrad rxns; same as TPP content of RNAs
        avg_mpp_per_deg_rxn = self.calc_tpp_per_rna()

        # Calculate # of mpps needed to be converted
        n_mpp_to_convert = avg_mpp_per_deg_rxn*n_rna_deg_rxns

        cc_length = self.knowledge_base.cell.parameters.get_one(id='mean_doubling_time').value
        mpp_conversion_rate = n_mpp_to_convert/cc_length/self.reaction_scale

        return mpp_conversion_rate  # This is only the rate from degradation, needs to add new mpp conversion!

    def calc_tpp_per_rna(self):
        """ Calculates the average triphosphate content of RNAs """

        cell = self.knowledge_base.cell
        model = self.model

        n_tpp = 0
        rnas_kb = cell.species_types.get(__type=wc_kb.prokaryote.RnaSpeciesType)
        for rna_kb in rnas_kb:
            n_tpp += rna_kb.get_seq().count('A')
            n_tpp += rna_kb.get_seq().count('C')
            n_tpp += rna_kb.get_seq().count('G')
            n_tpp += rna_kb.get_seq().count('U')

        avg_tpp_per_rna = n_tpp/4/len(rnas_kb)

        return avg_tpp_per_rna

    def calc_aa_per_prot(self):
        """ Calculates the average amino acid content of proteins """
        cell = self.knowledge_base.cell
        model = self.model

        proteins_kb = cell.species_types.get(__type=wc_kb.prokaryote.ProteinSpeciesType)
        n_aa = 0
        for protein_kb in proteins_kb:
            n_aa += len(protein_kb.get_seq())

        observables_kb = cell.observables
        n_trnas = 0  # we need to count n of TRNAs, since in reduced models there are only 4
        for observable in observables_kb:
            if observable.id[0:5] == 'tRNA_':
                n_trnas += 1

        avg_aa_per_prot = n_aa/(n_trnas*len(proteins_kb))

        return avg_aa_per_prot

    def calc_h_per_transcript(self):
        """ Calculates the average H needed for a transcription reaction """
        submodel = self.model.submodels.get_one(id='transcription')

        h_per_transcript = []
        for rxn in submodel.reactions:
            n_h = 0
            for part in rxn.participants:
                if part.species.species_type.id == 'h':
                    n_h += part.coefficient
            h_per_transcript.append(n_h)

        avg_H_per_transcript = numpy.mean(h_per_transcript)

        return avg_H_per_transcript

    def calc_gtp_per_translate(self):
        """ Calculates the average GTP needed for a translation reaction """
        submodel = self.model.submodels.get_one(id='translation')

        gtp_per_translation = []
        for rxn in submodel.reactions:
            for part in rxn.participants:
                if part.species.species_type.id == 'gtp':
                    gtp_per_translation.append(abs(part.coefficient))

        avg_gtp_per_translate = numpy.mean(gtp_per_translation)

        return avg_gtp_per_translate

    def calc_rna_degrad_rxns(self):
        """ Calculates the expected # of RNA degradation reactions over the CC"""

        cytosol_lang = self.model.compartments.get_one(id='c')
        cytosol_kb = self.knowledge_base.cell.compartments.get_one(id='c')
        rnas_lang = self.model.species_types.get(type=wc_ontology['WC:RNA'])  # RNA
        cc_length = self.knowledge_base.cell.parameters.get_one(id='mean_doubling_time').value
        volume = self.knowledge_base.cell.parameters.get_one(id='mean_volume').value

        # Calculate the # of degradation reactions over CC
        # Each Rna degrad reaction fires: (cc_length/rna.half_life)*rna_copy_number(t=0)
        n_rna_deg_rxns = 0
        for rna_lang in rnas_lang:
            rna_kb = self.knowledge_base.cell.species_types.get_one(id=rna_lang.id)
            half_life = rna_kb.properties.get_one(property='half_life').get_value()

            conc = rna_lang.species.get_one(compartment=cytosol_lang) \
                .distribution_init_concentration.mean
            rna_copy_num = round(conc * volume * Avogadro)
            n_rna_deg_rxns += ((cc_length / half_life) * rna_copy_num)

        return n_rna_deg_rxns

    def calc_prot_degrad_rxns(self):
        """ Calculates the expected # of RNA degradation reactions over the CC"""

        cytosol_lang = self.model.compartments.get_one(id='c')
        cytosol_kb = self.knowledge_base.cell.compartments.get_one(id='c')
        prots_lang = self.model.species_types.get(type=wc_ontology['WC:protein'])  # protein
        cc_length = self.knowledge_base.cell.parameters.get_one(id='mean_doubling_time').value
        volume = self.knowledge_base.cell.parameters.get_one(id='mean_volume').value

        # Calculate the # of degradation reactions over CC
        # Each prot degrad reaction fires: (cc_length/prot.half_life)*prot_copy_number(t=0)
        n_prot_deg_rxns = 0
        for prot_lang in prots_lang:
            prot_kb = self.knowledge_base.cell.species_types.get_one(id=prot_lang.id)
            if isinstance(prot_kb, wc_kb.core.ComplexSpeciesType):
                continue
            half_life = prot_kb.properties.get_one(property='half_life').get_value()

            conc = prot_lang.species.get_one(compartment=cytosol_lang)\
                .distribution_init_concentration.mean
            prot_copy_num = round(conc * volume * Avogadro)
            n_prot_deg_rxns += ((cc_length / half_life) * prot_copy_num)

        #n_prot_deg_rxns = round(n_prot_deg_rxns)
        return n_prot_deg_rxns

    def calc_rna_copy_num(self):
        """ Calculates the # of RNA molecules at t=0 """

        volume = self.knowledge_base.cell.parameters.get_one(id='mean_volume').value
        cytosol_kb = self.knowledge_base.cell.compartments.get_one(id='c')
        rnas_kb = self.knowledge_base.cell.species_types.get(__type=wc_kb.prokaryote.RnaSpeciesType)

        rna_copy_num = []
        for rna in rnas_kb:
            conc = rna.species.get_one(compartment=cytosol_kb).concentration.value
            rna_copy_num.append(round(conc * volume * Avogadro))

        avg_rna_copy_num = numpy.mean(rna_copy_num)

        return avg_rna_copy_num

    def calc_prot_copy_num(self):
        """ Calculates the # of RNA molecules at t=0 """
        # TODO: check if conc is in CN already

        volume = self.knowledge_base.cell.parameters.get_one(id='mean_volume').value
        cytosol_kb = self.knowledge_base.cell.compartments.get_one(id='c')
        prots_kb = self.knowledge_base.cell.species_types.get(__type=wc_kb.prokaryote.ProteinSpeciesType)

        prot_copy_num = []
        for prot in prots_kb:
            conc = prot.species.get_one(compartment=cytosol_kb).concentration.value
            prot_copy_num.append(round(conc*volume*Avogadro))

        avg_prot_copy_num = numpy.mean(prot_copy_num)

        return avg_prot_copy_num

    def calc_gtp_corr_rate(self):
        """ Calculates the extra amount of GTP needed if model has translation subunit """
        cell = self.knowledge_base.cell
        cc_length = cell.parameters.get_one(id='mean_doubling_time').value

        avg_gtp_per_translate = self.calc_gtp_per_translate()
        avg_prot_copy_num = self.calc_prot_copy_num()  # Number of new proteins needed
        n_prot_degrad_rxns = self.calc_prot_degrad_rxns()
        total_translation_gtp = avg_gtp_per_translate * (avg_prot_copy_num + n_prot_degrad_rxns)

        gtp_corr_rate = (total_translation_gtp/cc_length/self.reaction_scale)

        return gtp_corr_rate