diff --git a/etfl/core/genes.py b/etfl/core/genes.py index 29e5584..239a2a6 100644 --- a/etfl/core/genes.py +++ b/etfl/core/genes.py @@ -14,6 +14,7 @@ from cobra import Gene from Bio.Seq import Seq from Bio.Alphabet import DNAAlphabet, RNAAlphabet, ProteinAlphabet +from ..optim.constraints import SynthesisConstraint def make_sequence(sequence, seq_type): @@ -31,7 +32,7 @@ def make_sequence(sequence, seq_type): else: raise TypeError('The type of the sequence argument should be either ' 'string or a Bio.Seq') - + return typed_sequence class ExpressedGene(Gene): @@ -76,8 +77,27 @@ def transcribed_by(self): @transcribed_by.setter def transcribed_by(self,value): - # TODO: Make this a setter that rewrites the adequate constraints - raise NotImplementedError() + if value != self._transcribed_by: + if self.model is None: + # Easy + self._transcribed_by = value + else: + mod_id = self.id + '_transcription' + cstr = self.model.get_constraints_of_type(SynthesisConstraint) + cstr_exist = True # an indicator to show if the gene is transcribed + try: + cstr.get_by_id(mod_id) + except KeyError: + cstr_exist = False + if cstr_exist: + # trancription constraint already exists + # TODO: We need to make the model change the corresponding constraints + raise NotImplementedError() + else: + self._transcribed_by = value + else: + # Nothing to do here :) + pass @property def translated_by(self): @@ -85,8 +105,28 @@ def translated_by(self): @translated_by.setter def translated_by(self,value): - # TODO: Make this a setter that rewrites the adequate constraints - raise NotImplementedError() + if value != self._translated_by: + if self.model is None: + # Easy + self._translated_by = value + else: + # We need to make the model change the corresponding cstr + mod_id = self.id + '_translation' + cstr = self.model.get_constraints_of_type(SynthesisConstraint) + cstr_exist = True # an indicator to show if the gene is translated + try: + cstr.get_by_id(mod_id) + except KeyError: + cstr_exist = False + if cstr_exist: + # translation constraint already exists + # TODO: We need to make the model change the corresponding constraints + raise NotImplementedError() + else: + self._translated_by = value + else: + # Nothing to do here :) + pass @property diff --git a/etfl/core/memodel.py b/etfl/core/memodel.py index d596be5..02326ed 100644 --- a/etfl/core/memodel.py +++ b/etfl/core/memodel.py @@ -54,6 +54,11 @@ from pytfa.utils.str import camel2underscores from pytfa.optim.utils import copy_solver_configuration +def id_maker_rib_rnap(the_set): + # for a set of ribosomes or RNAPs makes an id + id_ = ''.join([x for x in the_set]) + return id_ + class MEModel(LCSBModel, Model): def __init__(self, model=Model(), name = None, @@ -259,17 +264,37 @@ def add_nucleotide_sequences(self, sequences): :param sequences: :return: """ - for gene_id, seq in sequences.items(): if gene_id in self.genes: new = replace_by_me_gene(self, gene_id, seq) - + else: self.logger.warning('Model has no gene {}, Adding it'.format(gene_id)) new = ExpressedGene(id= gene_id, name = gene_id, sequence=seq) self.add_genes([new]) self._make_peptide_from_gene(gene_id) + + + def add_transcription_by(self, transcription_dict): + + for gene_id, transcribed_by in transcription_dict.items(): + # transcribed_by is a list of rnap(s) + try: + self.genes.get_by_id(gene_id).transcribed_by = transcribed_by + except KeyError: + # the gene is not in the model + continue + + def add_translation_by(self, translation_dict): + + for gene_id, translated_by in translation_dict.items(): + # translated_by is a list of rnap(s) + try: + self.genes.get_by_id(gene_id).translated_by = translated_by + except KeyError: + # the gene is not in the model + continue def _make_peptide_from_gene(self, gene_id): free_pep = Peptide(id=gene_id, @@ -277,6 +302,17 @@ def _make_peptide_from_gene(self, gene_id): gene_id=gene_id) free_pep._model = self self.peptides += [free_pep] + + def add_peptide_sequences(self, aa_sequences): + + for pep_id, seq in aa_sequences.items(): + if pep_id in self.peptides: + self.peptides.get_by_id(pep_id).peptide = seq + else: + self.logger.warning('Model has no peptide {}'.format(pep_id)) + continue + + def add_dummies(self, nt_ratios, mrna_kdeg, mrna_length, aa_ratios, enzyme_kdeg, peptide_length): @@ -1404,16 +1440,47 @@ def _populate_rnap(self): # 3 -> Add RNAP capacity constraint self.regenerate_variables() - - usage = self._get_rnap_total_capacity() - - # usage = (sum_RMs + self.RNAPf[this_rnap.id].unscaled - this_rnap.concentration) / \ - # this_rnap.scaling_factor - - # Create the capacity constraint + + transcription_dict = self._sort_rnap_assignment() + gene_list_tot = [] # we need to have a list of all genes + for the_combination, gene_list in transcription_dict.items(): + # CATCH : This is summing ~1500+ variable objects, and for a reason + # sympy does not like it. Let's cut it in smaller chunks and sum + # afterwards + # sum_RPs = sum(self.get_variables_of_type(RibosomeUsage)) + + for gene_id in gene_list: + if gene_id not in gene_list_tot: + gene_list_tot.append(gene_id) + + if len(the_combination) != len(self.rnap.keys()): + # at least one of the rnaps isn't assigned to these genes + # per each such combination adds an inequality to make sure + # that at least we have enough rnap for these genes + # For nondimensionalization + # usage = (sum_RMs + self.RNAPf[this_rnap.id].unscaled - this_rnap.concentration) / \ + # this_rnap.scaling_factor + + # Create the capacity constraint + usage = self._get_rnap_total_capacity(rnap_ids = the_combination, + genes = gene_list) + + + self.add_constraint(kind=TotalCapacity, + hook=self, + id_ = id_maker_rib_rnap(the_combination), + expr=usage, + lb = -self.big_M, + ub = 0, + ) + # Finally add an equlaity constraint to make sure that rnaps + # assigned to all the genes is equal to sum of all rnap usages + rnap_set = frozenset([x for x in self.rnap.keys()]) # a set of rnap ids + usage = self._get_rnap_total_capacity(rnap_ids = rnap_set, + genes = gene_list_tot) self.add_constraint(kind=TotalCapacity, hook=self, - id_ = 'rnap', + id_=id_maker_rib_rnap(rnap_set), expr=usage, lb = 0, ub = 0, @@ -1422,19 +1489,48 @@ def _populate_rnap(self): # update variable and constraints attributes self.regenerate_constraints() self.regenerate_variables() - - def _get_rnap_total_capacity(self): + + def _sort_rnap_assignment(self): + # a function to sort genes based on their type of rnap assignment + # 2 types exist: None or at least one rnap assigned to this gene + all_rnap_usage = self.get_variables_of_type(RNAPUsage) + rnap_set = frozenset([x for x in self.rnap.keys()]) # a set of rnap ids + transcription_dict = dict() # a dict to keep different types; the keys are rnap sets and values are genes + for var in all_rnap_usage: + if var.hook.transcribed_by is None: + # the 1st type which is handeled by assigning all rnaps to this gene + this_type = rnap_set + try: + transcription_dict[this_type].append(var.hook.id) + except KeyError: + # the first time encountering this type + transcription_dict[this_type] = [var.hook.id] + else: + # the 2nd type + this_type = frozenset(var.hook.transcribed_by) + try: + transcription_dict[this_type].append(var.hook.id) + except KeyError: + # the first time encountering this type + transcription_dict[this_type] = [var.hook.id] + return transcription_dict + + def _get_rnap_total_capacity(self, rnap_ids, genes): all_rnap_usage = self.get_variables_of_type(RNAPUsage) - sum_RMs = symbol_sum([x.unscaled for x in all_rnap_usage]) - free_rnap = [self.get_variables_of_type(FreeEnzyme).get_by_id(x) - for x in self.rnap] + # only for genes trascribed by thiscombination of rnaps + sum_RMs = symbol_sum([x.unscaled for x in all_rnap_usage \ + if x.hook.id in genes]) + free_rnap = [self.get_variables_of_type(FreeEnzyme).get_by_id(rnap_id) \ + for rnap_id in rnap_ids] + # The total RNAP capacity constraint looks like # ΣRMi + Σ(free RNAPj) = Σ(Total RNAPj) usage = sum_RMs \ + sum([x.unscaled for x in free_rnap]) \ - - sum([x.concentration for x in self.rnap.values()]) - usage /= min([x.scaling_factor for x in self.rnap.values()]) + - sum([self.rnap[rnap_id].concentration for rnap_id in rnap_ids]) + usage /= min([self.rnap[rnap_id].scaling_factor for rnap_id in rnap_ids]) return usage + def apply_rnap_catalytic_constraint(self, reaction, queue): """ @@ -1615,31 +1711,44 @@ def _populate_ribosomes(self): # 3 -> Add ribosomal capacity constraint self.regenerate_variables() - free_ribosome = [self.get_variables_of_type(FreeEnzyme).get_by_id(x) - for x in self.ribosome] - # CATCH : This is summing ~1500+ variable objects, and for a reason - # sympy does not like it. Let's cut it in smaller chunks and sum - # afterwards - # sum_RPs = sum(self.get_variables_of_type(RibosomeUsage)) - all_ribosome_usage = self.get_variables_of_type(RibosomeUsage) - - # sum_RPs = chunk_sum(all_ribosome_usage) - sum_RPs = symbol_sum([x.unscaled for x in all_ribosome_usage]) - - # The total RNAP capacity constraint looks like - # ΣRMi + Σ(free RNAPj) = Σ(Total RNAPj) - ribo_usage = sum_RPs \ - + sum([x.unscaled for x in free_ribosome]) \ - - sum([x.concentration for x in self.ribosome.values()]) - ribo_usage /= min([x.scaling_factor for x in self.ribosome.values()]) - # For nondimensionalization - # ribo_usage = (sum_RPs + self.Rf.unscaled - self.ribosome.concentration) \ - # / self.ribosome.scaling_factor - - # Create the capacity constraint + translation_dict = self._sort_rib_assignment() + gene_list_tot = [] # we need to have a list of all genes + for the_combination, gene_list in translation_dict.items(): + # CATCH : This is summing ~1500+ variable objects, and for a reason + # sympy does not like it. Let's cut it in smaller chunks and sum + # afterwards + # sum_RPs = sum(self.get_variables_of_type(RibosomeUsage)) + + for gene_id in gene_list: + if gene_id not in gene_list_tot: + gene_list_tot.append(gene_id) + + if len(the_combination) != len(self.ribosome.keys()): + # at least one of the ribosomes isn't assigned to these genes + # per each such combination adds an inequality to make sure + # that at least we have enough ribosome for these genes + # For nondimensionalization + # ribo_usage = (sum_RPs + self.Rf.unscaled - self.ribosome.concentration) \ + # / self.ribosome.scaling_factor + + # Create the capacity constraint + ribo_usage = self._get_rib_total_capacity(rib_ids = the_combination, + genes = gene_list) + self.add_constraint(kind=TotalCapacity, + hook=self, + id_=id_maker_rib_rnap(the_combination), + expr=ribo_usage, + lb = -self.big_M, + ub = 0, + ) + # Finally add an equlaity constraint to make sure that ribosomes + # assigned to all the genes is equal to sum of all ribosome usages + rib_set = frozenset([x for x in self.ribosome.keys()]) # a set of rib ids + ribo_usage = self._get_rib_total_capacity(rib_ids = rib_set, + genes = gene_list_tot) self.add_constraint(kind=TotalCapacity, hook=self, - id_='rib', + id_=id_maker_rib_rnap(rib_set), expr=ribo_usage, lb = 0, ub = 0, @@ -1648,8 +1757,48 @@ def _populate_ribosomes(self): # update variable and constraints attributes self.regenerate_constraints() self.regenerate_variables() + + def _sort_rib_assignment(self): + # a function to sort genes based on their type of ribosome assignment + # 2 types exist: None or at least one ribosome assigned to this gene + all_ribosome_usage = self.get_variables_of_type(RibosomeUsage) + rib_set = frozenset([x for x in self.ribosome.keys()]) # a set of rib ids + translation_dict = dict() # a dict to keep different types; the keys are ribosome sets and values are genes + for var in all_ribosome_usage: + if var.hook.translated_by is None: + # the 1st type which is handeled by assigning all ribosomes to this gene + this_type = rib_set + try: + translation_dict[this_type].append(var.hook.id) + except KeyError: + # the first time encountering this type + translation_dict[this_type] = [var.hook.id] + else: + # the 2nd type + this_type = frozenset(var.hook.translated_by) + try: + translation_dict[this_type].append(var.hook.id) + except KeyError: + # the first time encountering this type + translation_dict[this_type] = [var.hook.id] + return translation_dict + + def _get_rib_total_capacity(self, rib_ids, genes): + all_ribosome_usage = self.get_variables_of_type(RibosomeUsage) + # only for genes traslated by this combination of ribosomes + sum_RPs = symbol_sum([x.unscaled for x in all_ribosome_usage \ + if x.hook.id in genes]) + free_ribosome = [self.get_variables_of_type(FreeEnzyme).get_by_id(rib_id) \ + for rib_id in rib_ids] - + # The total RNAP capacity constraint looks like + # ΣRMi + Σ(free RNAPj) = Σ(Total RNAPj) + usage = sum_RPs \ + + sum([x.unscaled for x in free_ribosome]) \ + - sum([self.ribosome[rib_id].concentration for rib_id in rib_ids]) + usage /= min([self.ribosome[rib_id].scaling_factor for rib_id in rib_ids]) + return usage + def apply_ribosomal_catalytic_constraint(self, reaction): """ Given a translation reaction, apply the constraint that links it with @@ -1716,7 +1865,6 @@ def _add_gene(self, gene): gene._model = self self.genes.append(gene) - #-------------------------------------------------------------------------# def sanitize_varnames(self): @@ -1782,3 +1930,4 @@ def copy(self): copy_solver_configuration(self, new) return new + \ No newline at end of file diff --git a/etfl/io/dict.py b/etfl/io/dict.py index 176bf17..f8e7a3d 100644 --- a/etfl/io/dict.py +++ b/etfl/io/dict.py @@ -12,6 +12,9 @@ from tqdm import tqdm +from Bio.Seq import Seq +from Bio.Alphabet import DNAAlphabet, RNAAlphabet, ProteinAlphabet + import cobra.io.dict as cbd from cobra.exceptions import SolverNotFound from optlang.util import expr_to_json, parse_expr @@ -51,10 +54,18 @@ def expressed_gene_to_dict(gene): obj['rna'] = str(gene.rna) obj['peptide'] = str(gene.peptide) obj['copy_number'] = str(gene.copy_number) - obj['transcribed_by'] = [x.id for x in gene.transcribed_by] \ - if gene.transcribed_by is not None else None - obj['translated_by'] = [x.id for x in gene.translated_by]\ - if gene.translated_by is not None else None + try: + obj['transcribed_by'] = [x.id for x in gene.transcribed_by] \ + if gene.transcribed_by is not None else None + except AttributeError: + obj['transcribed_by'] = [x for x in gene.transcribed_by] \ + if gene.transcribed_by is not None else None + try: + obj['translated_by'] = [x.id for x in gene.translated_by]\ + if gene.translated_by is not None else None + except AttributeError: + obj['translated_by'] = [x for x in gene.translated_by] \ + if gene.translated_by is not None else None return obj @@ -537,7 +548,7 @@ def init_me_model_from_dict(new, obj): new.growth_reaction = obj['growth_reaction'] # Populate enzymes - # new.coupling_dict = rebuild_coupling_dict(new, obj['coupling_dict']) + # new.coupling_dict = rebuild_coupling_dict(, obj['coupling_dict']) new.add_enzymes([enzyme_from_dict(x) for x in obj['enzymes']], prep=False) # Make RNAP @@ -892,8 +903,8 @@ def find_genes_from_dict(new, obj): g = replace_by_me_gene(new, gene_dict['id'], str(sequence)) if key == 'expressed_genes': # Newer models - g._rna = gene_dict['rna'] - g._peptide = gene_dict['peptide'] + g._rna = Seq(gene_dict['rna'], alphabet=RNAAlphabet()) + g._peptide = Seq(gene_dict['peptide'], alphabet=ProteinAlphabet()) g._copy_number = gene_dict['copy_number'] g._transcribed_by = [new.enzymes.get_by_id(e) for e in gene_dict['transcribed_by']] \